lederhosen 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/lederhosen/tasks/uniquify.rb +49 -0
 - data/lib/version.rb +1 -1
 - metadata +4 -3
 
| 
         @@ -0,0 +1,49 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            ##
         
     | 
| 
      
 2 
     | 
    
         
            +
            # uniquify - uniquify a fasta file, also output table with md5 -> number of reads
         
     | 
| 
      
 3 
     | 
    
         
            +
            #
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            require 'digest/md5'
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            module Lederhosen
         
     | 
| 
      
 8 
     | 
    
         
            +
              class CLI
         
     | 
| 
      
 9 
     | 
    
         
            +
                desc 'uniquify',
         
     | 
| 
      
 10 
     | 
    
         
            +
                  'uniquify a fasta file and generate a table with md5 -> abundance'
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
                method_option :input, :type     => :string, :required => true
         
     | 
| 
      
 13 
     | 
    
         
            +
                method_option :output, :type    => :string, :required => true
         
     | 
| 
      
 14 
     | 
    
         
            +
                method_option :table_out, :type => :string, :required => true
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                def uniquify
         
     | 
| 
      
 17 
     | 
    
         
            +
                  input     = options[:input]
         
     | 
| 
      
 18 
     | 
    
         
            +
                  output    = options[:output]
         
     | 
| 
      
 19 
     | 
    
         
            +
                  table_out = options[:table_out]
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                  sequence_counts = Hash.new { |h, k| h[k] = 0 }
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
                  out = File.open(output, 'w')
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
                  File.open(input) do |handle|
         
     | 
| 
      
 26 
     | 
    
         
            +
                    Dna.new(handle).each do |record|
         
     | 
| 
      
 27 
     | 
    
         
            +
                      unless sequence_counts.has_key? record.sequence
         
     | 
| 
      
 28 
     | 
    
         
            +
                        out.puts record
         
     | 
| 
      
 29 
     | 
    
         
            +
                      end
         
     | 
| 
      
 30 
     | 
    
         
            +
                      sequence_counts[record.sequence] += 1
         
     | 
| 
      
 31 
     | 
    
         
            +
                    end
         
     | 
| 
      
 32 
     | 
    
         
            +
                  end
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                  out.close
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
                  # write table
         
     | 
| 
      
 37 
     | 
    
         
            +
                  File.open(table_out, 'w') do |out|
         
     | 
| 
      
 38 
     | 
    
         
            +
                    sequence_counts.each_pair do |sequence, count|
         
     | 
| 
      
 39 
     | 
    
         
            +
                      digest = Digest::MD5.hexdigest(sequence)
         
     | 
| 
      
 40 
     | 
    
         
            +
                      out.puts "#{digest},#{count}"
         
     | 
| 
      
 41 
     | 
    
         
            +
                    end
         
     | 
| 
      
 42 
     | 
    
         
            +
                  end
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
                  kept = sequence_counts.keys.size
         
     | 
| 
      
 45 
     | 
    
         
            +
                  total = sequence_counts.values.inject(:+)
         
     | 
| 
      
 46 
     | 
    
         
            +
                  ohai "kept #{kept} out of #{total} reads (#{100*kept/total.to_f})"
         
     | 
| 
      
 47 
     | 
    
         
            +
                end
         
     | 
| 
      
 48 
     | 
    
         
            +
              end
         
     | 
| 
      
 49 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | 
         @@ -1,13 +1,13 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification 
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: lederhosen
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version 
         
     | 
| 
       4 
     | 
    
         
            -
              hash:  
     | 
| 
      
 4 
     | 
    
         
            +
              hash: 17
         
     | 
| 
       5 
5 
     | 
    
         
             
              prerelease: 
         
     | 
| 
       6 
6 
     | 
    
         
             
              segments: 
         
     | 
| 
       7 
7 
     | 
    
         
             
              - 0
         
     | 
| 
       8 
8 
     | 
    
         
             
              - 2
         
     | 
| 
       9 
     | 
    
         
            -
              -  
     | 
| 
       10 
     | 
    
         
            -
              version: 0.2. 
     | 
| 
      
 9 
     | 
    
         
            +
              - 3
         
     | 
| 
      
 10 
     | 
    
         
            +
              version: 0.2.3
         
     | 
| 
       11 
11 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       12 
12 
     | 
    
         
             
            authors: 
         
     | 
| 
       13 
13 
     | 
    
         
             
            - Austin G. Davis-Richardson
         
     | 
| 
         @@ -95,6 +95,7 @@ files: 
     | 
|
| 
       95 
95 
     | 
    
         
             
            - lib/lederhosen/tasks/trim.rb
         
     | 
| 
       96 
96 
     | 
    
         
             
            - lib/lederhosen/tasks/uc_filter.rb
         
     | 
| 
       97 
97 
     | 
    
         
             
            - lib/lederhosen/tasks/uc_stats.rb
         
     | 
| 
      
 98 
     | 
    
         
            +
            - lib/lederhosen/tasks/uniquify.rb
         
     | 
| 
       98 
99 
     | 
    
         
             
            - lib/version.rb
         
     | 
| 
       99 
100 
     | 
    
         
             
            - readme.md
         
     | 
| 
       100 
101 
     | 
    
         
             
            - spec/data/ILT_L_9_B_001_1.txt.gz
         
     |