bio-gadget 0.4.8 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +10 -20
- data/.travis.yml +5 -0
- data/LICENSE +1 -1
- data/README.org +0 -21
- data/Rakefile +5 -1
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/bio-gadget.gemspec +20 -14
- data/exe/bio-gadget +14 -0
- data/exe/fq1l +5 -0
- data/exe/rbg +1 -0
- data/exe/strt +5 -0
- data/ext/bio_gadget/bio_gadget.c +313 -0
- data/ext/bio_gadget/bio_gadget.h +8 -0
- data/ext/bio_gadget/extconf.rb +3 -0
- data/lib/bio/gadget.rb +171 -0
- data/lib/bio/gadget/fq1l.rb +457 -0
- data/lib/bio/gadget/strt.rb +605 -0
- data/lib/bio/gadget/strt/count.rb +53 -0
- data/lib/bio/gadget/strt/depth.rb +124 -0
- data/lib/bio/gadget/strt/prepare_transcriptome.rb +230 -0
- data/lib/bio/gadgets.rb +135 -0
- data/test/bio/gadget_test.rb +11 -0
- data/test/test_helper.rb +4 -0
- metadata +109 -40
- data/Gthorfile +0 -2
- data/bin/bio-gadget +0 -5
- data/lib/bio-gadget.rb +0 -44
- data/lib/bio-gadget/dedup.rb +0 -33
- data/lib/bio-gadget/demlt.rb +0 -149
- data/lib/bio-gadget/femrg.rb +0 -61
- data/lib/bio-gadget/fqxz.rb +0 -30
- data/lib/bio-gadget/peak.rb +0 -94
- data/lib/bio-gadget/qvstat.rb +0 -34
- data/lib/bio-gadget/rgt2mtx.rb +0 -60
- data/lib/bio-gadget/version.rb +0 -9
- data/lib/bio-gadget/wig5p.rb +0 -51
- data/lib/bio-gadget/wigchr.rb +0 -28
    
        data/test/test_helper.rb
    ADDED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,114 +1,181 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: bio-gadget
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 0.5.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Shintaro Katayama
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 | 
            -
            bindir:  | 
| 9 | 
            +
            bindir: exe
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date:  | 
| 11 | 
            +
            date: 2017-02-01 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 | 
            -
              name:  | 
| 14 | 
            +
              name: bundler
         | 
| 15 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 16 | 
            +
                requirements:
         | 
| 17 | 
            +
                - - "~>"
         | 
| 18 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 19 | 
            +
                    version: '1.12'
         | 
| 20 | 
            +
              type: :development
         | 
| 21 | 
            +
              prerelease: false
         | 
| 22 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 23 | 
            +
                requirements:
         | 
| 24 | 
            +
                - - "~>"
         | 
| 25 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 26 | 
            +
                    version: '1.12'
         | 
| 27 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 28 | 
            +
              name: rake
         | 
| 29 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 30 | 
            +
                requirements:
         | 
| 31 | 
            +
                - - "~>"
         | 
| 32 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 33 | 
            +
                    version: '10.0'
         | 
| 34 | 
            +
              type: :development
         | 
| 35 | 
            +
              prerelease: false
         | 
| 36 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 37 | 
            +
                requirements:
         | 
| 38 | 
            +
                - - "~>"
         | 
| 39 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 40 | 
            +
                    version: '10.0'
         | 
| 41 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 42 | 
            +
              name: rake-compiler
         | 
| 15 43 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 16 44 | 
             
                requirements:
         | 
| 17 | 
            -
                - -  | 
| 45 | 
            +
                - - ">="
         | 
| 18 46 | 
             
                  - !ruby/object:Gem::Version
         | 
| 19 47 | 
             
                    version: '0'
         | 
| 20 | 
            -
              type: : | 
| 48 | 
            +
              type: :development
         | 
| 21 49 | 
             
              prerelease: false
         | 
| 22 50 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 23 51 | 
             
                requirements:
         | 
| 24 | 
            -
                - -  | 
| 52 | 
            +
                - - ">="
         | 
| 25 53 | 
             
                  - !ruby/object:Gem::Version
         | 
| 26 54 | 
             
                    version: '0'
         | 
| 27 55 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 28 | 
            -
              name:  | 
| 56 | 
            +
              name: minitest
         | 
| 57 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 58 | 
            +
                requirements:
         | 
| 59 | 
            +
                - - "~>"
         | 
| 60 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 61 | 
            +
                    version: '5.0'
         | 
| 62 | 
            +
              type: :development
         | 
| 63 | 
            +
              prerelease: false
         | 
| 64 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 65 | 
            +
                requirements:
         | 
| 66 | 
            +
                - - "~>"
         | 
| 67 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 68 | 
            +
                    version: '5.0'
         | 
| 69 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 70 | 
            +
              name: bio
         | 
| 29 71 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 30 72 | 
             
                requirements:
         | 
| 31 | 
            -
                - -  | 
| 73 | 
            +
                - - ">="
         | 
| 32 74 | 
             
                  - !ruby/object:Gem::Version
         | 
| 33 75 | 
             
                    version: '0'
         | 
| 34 76 | 
             
              type: :runtime
         | 
| 35 77 | 
             
              prerelease: false
         | 
| 36 78 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 37 79 | 
             
                requirements:
         | 
| 38 | 
            -
                - -  | 
| 80 | 
            +
                - - ">="
         | 
| 39 81 | 
             
                  - !ruby/object:Gem::Version
         | 
| 40 82 | 
             
                    version: '0'
         | 
| 41 83 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 42 | 
            -
              name: levenshtein | 
| 84 | 
            +
              name: damerau-levenshtein
         | 
| 43 85 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 44 86 | 
             
                requirements:
         | 
| 45 | 
            -
                - -  | 
| 87 | 
            +
                - - ">="
         | 
| 46 88 | 
             
                  - !ruby/object:Gem::Version
         | 
| 47 89 | 
             
                    version: '0'
         | 
| 48 90 | 
             
              type: :runtime
         | 
| 49 91 | 
             
              prerelease: false
         | 
| 50 92 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 51 93 | 
             
                requirements:
         | 
| 52 | 
            -
                - -  | 
| 94 | 
            +
                - - ">="
         | 
| 53 95 | 
             
                  - !ruby/object:Gem::Version
         | 
| 54 96 | 
             
                    version: '0'
         | 
| 55 97 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 56 | 
            -
              name:  | 
| 98 | 
            +
              name: mkfifo
         | 
| 57 99 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 58 100 | 
             
                requirements:
         | 
| 59 | 
            -
                - -  | 
| 101 | 
            +
                - - ">="
         | 
| 60 102 | 
             
                  - !ruby/object:Gem::Version
         | 
| 61 103 | 
             
                    version: '0'
         | 
| 62 104 | 
             
              type: :runtime
         | 
| 63 105 | 
             
              prerelease: false
         | 
| 64 106 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 65 107 | 
             
                requirements:
         | 
| 66 | 
            -
                - -  | 
| 108 | 
            +
                - - ">="
         | 
| 67 109 | 
             
                  - !ruby/object:Gem::Version
         | 
| 68 110 | 
             
                    version: '0'
         | 
| 69 111 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 70 | 
            -
              name:  | 
| 112 | 
            +
              name: parallel
         | 
| 71 113 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 72 114 | 
             
                requirements:
         | 
| 73 | 
            -
                - -  | 
| 115 | 
            +
                - - ">="
         | 
| 74 116 | 
             
                  - !ruby/object:Gem::Version
         | 
| 75 117 | 
             
                    version: '0'
         | 
| 76 118 | 
             
              type: :runtime
         | 
| 77 119 | 
             
              prerelease: false
         | 
| 78 120 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 79 121 | 
             
                requirements:
         | 
| 80 | 
            -
                - -  | 
| 122 | 
            +
                - - ">="
         | 
| 81 123 | 
             
                  - !ruby/object:Gem::Version
         | 
| 82 124 | 
             
                    version: '0'
         | 
| 125 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 126 | 
            +
              name: thor
         | 
| 127 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 128 | 
            +
                requirements:
         | 
| 129 | 
            +
                - - "~>"
         | 
| 130 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 131 | 
            +
                    version: 0.19.3
         | 
| 132 | 
            +
              type: :runtime
         | 
| 133 | 
            +
              prerelease: false
         | 
| 134 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 135 | 
            +
                requirements:
         | 
| 136 | 
            +
                - - "~>"
         | 
| 137 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 138 | 
            +
                    version: 0.19.3
         | 
| 83 139 | 
             
            description: Gadgets for bioinformatics
         | 
| 84 140 | 
             
            email:
         | 
| 85 141 | 
             
            - shintaro.katayama@gmail.com
         | 
| 86 142 | 
             
            executables:
         | 
| 87 143 | 
             
            - bio-gadget
         | 
| 88 | 
            -
             | 
| 144 | 
            +
            - fq1l
         | 
| 145 | 
            +
            - rbg
         | 
| 146 | 
            +
            - strt
         | 
| 147 | 
            +
            extensions:
         | 
| 148 | 
            +
            - ext/bio_gadget/extconf.rb
         | 
| 89 149 | 
             
            extra_rdoc_files: []
         | 
| 90 150 | 
             
            files:
         | 
| 91 | 
            -
            - .gitignore
         | 
| 151 | 
            +
            - ".gitignore"
         | 
| 152 | 
            +
            - ".travis.yml"
         | 
| 92 153 | 
             
            - Gemfile
         | 
| 93 | 
            -
            - Gthorfile
         | 
| 94 154 | 
             
            - LICENSE
         | 
| 95 155 | 
             
            - README.org
         | 
| 96 156 | 
             
            - Rakefile
         | 
| 97 | 
            -
            - bin/ | 
| 157 | 
            +
            - bin/console
         | 
| 158 | 
            +
            - bin/setup
         | 
| 98 159 | 
             
            - bio-gadget.gemspec
         | 
| 99 | 
            -
            -  | 
| 100 | 
            -
            -  | 
| 101 | 
            -
            -  | 
| 102 | 
            -
            -  | 
| 103 | 
            -
            -  | 
| 104 | 
            -
            -  | 
| 105 | 
            -
            -  | 
| 106 | 
            -
            - lib/bio | 
| 107 | 
            -
            - lib/bio | 
| 108 | 
            -
            - lib/bio | 
| 109 | 
            -
            - lib/bio | 
| 160 | 
            +
            - exe/bio-gadget
         | 
| 161 | 
            +
            - exe/fq1l
         | 
| 162 | 
            +
            - exe/rbg
         | 
| 163 | 
            +
            - exe/strt
         | 
| 164 | 
            +
            - ext/bio_gadget/bio_gadget.c
         | 
| 165 | 
            +
            - ext/bio_gadget/bio_gadget.h
         | 
| 166 | 
            +
            - ext/bio_gadget/extconf.rb
         | 
| 167 | 
            +
            - lib/bio/gadget.rb
         | 
| 168 | 
            +
            - lib/bio/gadget/fq1l.rb
         | 
| 169 | 
            +
            - lib/bio/gadget/strt.rb
         | 
| 170 | 
            +
            - lib/bio/gadget/strt/count.rb
         | 
| 171 | 
            +
            - lib/bio/gadget/strt/depth.rb
         | 
| 172 | 
            +
            - lib/bio/gadget/strt/prepare_transcriptome.rb
         | 
| 173 | 
            +
            - lib/bio/gadgets.rb
         | 
| 174 | 
            +
            - test/bio/gadget_test.rb
         | 
| 175 | 
            +
            - test/test_helper.rb
         | 
| 110 176 | 
             
            homepage: https://github.com/shka/ruby-bio-gadget
         | 
| 111 | 
            -
            licenses: | 
| 177 | 
            +
            licenses:
         | 
| 178 | 
            +
            - MIT
         | 
| 112 179 | 
             
            metadata: {}
         | 
| 113 180 | 
             
            post_install_message: 
         | 
| 114 181 | 
             
            rdoc_options: []
         | 
| @@ -116,18 +183,20 @@ require_paths: | |
| 116 183 | 
             
            - lib
         | 
| 117 184 | 
             
            required_ruby_version: !ruby/object:Gem::Requirement
         | 
| 118 185 | 
             
              requirements:
         | 
| 119 | 
            -
              - -  | 
| 186 | 
            +
              - - ">="
         | 
| 120 187 | 
             
                - !ruby/object:Gem::Version
         | 
| 121 188 | 
             
                  version: '0'
         | 
| 122 189 | 
             
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 123 190 | 
             
              requirements:
         | 
| 124 | 
            -
              - -  | 
| 191 | 
            +
              - - ">="
         | 
| 125 192 | 
             
                - !ruby/object:Gem::Version
         | 
| 126 193 | 
             
                  version: '0'
         | 
| 127 194 | 
             
            requirements: []
         | 
| 128 195 | 
             
            rubyforge_project: 
         | 
| 129 | 
            -
            rubygems_version: 2. | 
| 196 | 
            +
            rubygems_version: 2.6.8
         | 
| 130 197 | 
             
            signing_key: 
         | 
| 131 198 | 
             
            specification_version: 4
         | 
| 132 | 
            -
            summary:  | 
| 133 | 
            -
            test_files: | 
| 199 | 
            +
            summary: ''
         | 
| 200 | 
            +
            test_files:
         | 
| 201 | 
            +
            - test/bio/gadget_test.rb
         | 
| 202 | 
            +
            - test/test_helper.rb
         | 
    
        data/Gthorfile
    DELETED
    
    
    
        data/bin/bio-gadget
    DELETED
    
    
    
        data/lib/bio-gadget.rb
    DELETED
    
    | @@ -1,44 +0,0 @@ | |
| 1 | 
            -
            require 'bio-gadget/version'
         | 
| 2 | 
            -
            require 'bio-gadget/dedup'
         | 
| 3 | 
            -
            require 'bio-gadget/demlt'
         | 
| 4 | 
            -
            require 'bio-gadget/femrg'
         | 
| 5 | 
            -
            require 'bio-gadget/fqxz'
         | 
| 6 | 
            -
            require 'bio-gadget/peak'
         | 
| 7 | 
            -
            require 'bio-gadget/qvstat'
         | 
| 8 | 
            -
            require 'bio-gadget/rgt2mtx'
         | 
| 9 | 
            -
            require 'bio-gadget/wig5p'
         | 
| 10 | 
            -
            require 'bio-gadget/wigchr'
         | 
| 11 | 
            -
             | 
| 12 | 
            -
            require 'tempfile'
         | 
| 13 | 
            -
             | 
| 14 | 
            -
            module Bio
         | 
| 15 | 
            -
              class Gadget < Thor
         | 
| 16 | 
            -
             | 
| 17 | 
            -
                private
         | 
| 18 | 
            -
             | 
| 19 | 
            -
                def myopen(file, &block)
         | 
| 20 | 
            -
                  # how to write?
         | 
| 21 | 
            -
                  f = (/\|/ !~ file && /\.gz$/ =~ file) ? "| gunzip -c #{file}" : file
         | 
| 22 | 
            -
                  unless block.nil?
         | 
| 23 | 
            -
                    o = open(f); block.call(o); o.close
         | 
| 24 | 
            -
                  else
         | 
| 25 | 
            -
                    open(f)
         | 
| 26 | 
            -
                  end
         | 
| 27 | 
            -
                end
         | 
| 28 | 
            -
             | 
| 29 | 
            -
                @@mytemppaths = Array.new
         | 
| 30 | 
            -
             | 
| 31 | 
            -
                def mytemppath(basename, tmpdir = Dir::tmpdir)
         | 
| 32 | 
            -
                  fp = Tempfile.open(basename, tmpdir)
         | 
| 33 | 
            -
                  path = fp.path
         | 
| 34 | 
            -
                  @@mytemppaths.push(path)
         | 
| 35 | 
            -
                  fp.close!
         | 
| 36 | 
            -
                  path
         | 
| 37 | 
            -
                end
         | 
| 38 | 
            -
             | 
| 39 | 
            -
                END {
         | 
| 40 | 
            -
                  @@mytemppaths.each { |path| File.unlink(path) if File.exist?(path) }
         | 
| 41 | 
            -
                }
         | 
| 42 | 
            -
             | 
| 43 | 
            -
              end
         | 
| 44 | 
            -
            end
         | 
    
        data/lib/bio-gadget/dedup.rb
    DELETED
    
    | @@ -1,33 +0,0 @@ | |
| 1 | 
            -
            require 'bio-faster'
         | 
| 2 | 
            -
            require 'parallel'
         | 
| 3 | 
            -
             | 
| 4 | 
            -
            module Bio
         | 
| 5 | 
            -
              class Gadget < Thor
         | 
| 6 | 
            -
                namespace :bio
         | 
| 7 | 
            -
             | 
| 8 | 
            -
                desc 'dedup', 'deduplicate fastq (via STDIN)'
         | 
| 9 | 
            -
                def dedup
         | 
| 10 | 
            -
             | 
| 11 | 
            -
                  p1in, p1out = IO.pipe
         | 
| 12 | 
            -
             | 
| 13 | 
            -
                  fork {
         | 
| 14 | 
            -
                    p1in.close
         | 
| 15 | 
            -
                    $stdout.reopen(p1out)
         | 
| 16 | 
            -
                    open("| sort -k 1 -r -S #{sprintf('%2d', 100/(Parallel.processor_count+1))}% -T $TMPDIR | cut -f 2- | uniq -f 2", 'w') { |fp|
         | 
| 17 | 
            -
                      Bio::Faster.new(:stdin).each_record(:quality => :raw) do |seqid, seq, qvs|
         | 
| 18 | 
            -
                        fp.puts "#{seq}#{qvs}\t#{seqid}\t#{qvs}\t#{seq}"
         | 
| 19 | 
            -
                      end
         | 
| 20 | 
            -
                    }
         | 
| 21 | 
            -
                  }
         | 
| 22 | 
            -
             | 
| 23 | 
            -
                  p1out.close
         | 
| 24 | 
            -
             | 
| 25 | 
            -
                  p1in.each_line { |line|
         | 
| 26 | 
            -
                    seqid, qvs, seq = line.rstrip.split
         | 
| 27 | 
            -
                    puts "@#{seqid}\n#{seq}\n+\n#{qvs}"
         | 
| 28 | 
            -
                  }
         | 
| 29 | 
            -
             | 
| 30 | 
            -
                end
         | 
| 31 | 
            -
             | 
| 32 | 
            -
              end
         | 
| 33 | 
            -
            end
         | 
    
        data/lib/bio-gadget/demlt.rb
    DELETED
    
    | @@ -1,149 +0,0 @@ | |
| 1 | 
            -
            require 'bio-faster'
         | 
| 2 | 
            -
            require 'levenshtein'
         | 
| 3 | 
            -
            require 'mkfifo'
         | 
| 4 | 
            -
            require 'parallel'
         | 
| 5 | 
            -
             | 
| 6 | 
            -
            module Bio
         | 
| 7 | 
            -
              class Gadget < Thor
         | 
| 8 | 
            -
             | 
| 9 | 
            -
                namespace :bio
         | 
| 10 | 
            -
             | 
| 11 | 
            -
                desc 'demlt BARCODE [FASTQ]', "Demultiplex fastq from STDIN by barcodes.\n\n"
         | 
| 12 | 
            -
                option 'output-dir', :aliases => '-o', :type => :string, :default => '.'
         | 
| 13 | 
            -
                option 'umi-length', :aliases => '-u', :type => :numeric, :default => 4, :desc => '0 is no umi, means no PCR-amplicon reduction.'
         | 
| 14 | 
            -
                option 'cdna-length', :aliases => '-c', :type => :numeric, :default => 37, :desc => 'Trimming length before PCA-amplicon reduction. -1 is no trimming by length.'
         | 
| 15 | 
            -
                option 'g-trimming', :aliases => '-g', :type => :boolean, :default => false, :desc => "Trimming of 5'-end poly-G. Length of the trimmed Gs attached after the read name."
         | 
| 16 | 
            -
                option 'q-trimming', :aliases => '-q', :type => :string, :default => '~', :desc => "Quality threshold - nucleotides with lower quality will be trimmed, from the end of the sequence. '~' is no trimming by quality, because this is the maximum quality base character."
         | 
| 17 | 
            -
                option 'min-length', :aliases => '-l', :type => :numeric, :default => 0, :desc => 'Length threshold - sequences shorter than this after trimming will be filtered out. 0 is no filtering.'
         | 
| 18 | 
            -
                def demlt(bcfile, fastq=:stdin)
         | 
| 19 | 
            -
             | 
| 20 | 
            -
                  ofs = options['umi-length']
         | 
| 21 | 
            -
                  clen = options['cdna-length']
         | 
| 22 | 
            -
                  gtrim = options['g-trimming']
         | 
| 23 | 
            -
                  qtrim = options['q-trimming']
         | 
| 24 | 
            -
                  mlen = options['min-length']
         | 
| 25 | 
            -
             | 
| 26 | 
            -
                  wells = Array.new
         | 
| 27 | 
            -
                  bcs = Array.new
         | 
| 28 | 
            -
                  bclens = Array.new
         | 
| 29 | 
            -
                  open(bcfile).each do |line|
         | 
| 30 | 
            -
                    cols = line.rstrip.split
         | 
| 31 | 
            -
                    wells.push(cols[0])
         | 
| 32 | 
            -
                    bcs.push(cols[1])
         | 
| 33 | 
            -
                    bclens.push(cols[1].length)
         | 
| 34 | 
            -
                  end
         | 
| 35 | 
            -
             | 
| 36 | 
            -
                  bclens.uniq!
         | 
| 37 | 
            -
                  if bclens.size != 1
         | 
| 38 | 
            -
                    raise 'Inconsistent barcode sequence lengths'
         | 
| 39 | 
            -
                  end
         | 
| 40 | 
            -
                  bclen = bclens[0]
         | 
| 41 | 
            -
             | 
| 42 | 
            -
                  procs = Parallel.processor_count
         | 
| 43 | 
            -
             | 
| 44 | 
            -
                  fifo1paths = Array.new
         | 
| 45 | 
            -
                  procs.times { |i|
         | 
| 46 | 
            -
                    fifo1path = mytemppath('fifo1-')
         | 
| 47 | 
            -
                    File.mkfifo(fifo1path)
         | 
| 48 | 
            -
                    fifo1paths.push(fifo1path)
         | 
| 49 | 
            -
                  }
         | 
| 50 | 
            -
                  pid = Kernel.fork {
         | 
| 51 | 
            -
                    fifo1s = Array.new
         | 
| 52 | 
            -
                    fifo1paths.each { |fifo1path| fifo1s.push(open(fifo1path, 'w')) }
         | 
| 53 | 
            -
                    total = 0
         | 
| 54 | 
            -
                    Bio::Faster.new(fastq).each_record(:quality => :raw) do |vals|
         | 
| 55 | 
            -
                      fifo1 = fifo1s[total % procs]
         | 
| 56 | 
            -
                      fifo1.puts(vals.join("\t"))
         | 
| 57 | 
            -
                      total += 1
         | 
| 58 | 
            -
                    end
         | 
| 59 | 
            -
                    fifo1s.each { |fifo1| fifo1.close }
         | 
| 60 | 
            -
                    Kernel.exit!
         | 
| 61 | 
            -
                  }
         | 
| 62 | 
            -
             | 
| 63 | 
            -
                  fifo2paths = Array.new
         | 
| 64 | 
            -
                  procs.times { |i|
         | 
| 65 | 
            -
                    fifo2path = mytemppath('fifo2-')
         | 
| 66 | 
            -
                    File.mkfifo(fifo2path)
         | 
| 67 | 
            -
                    fifo2paths.push(fifo2path)
         | 
| 68 | 
            -
                    pid = Kernel.fork {
         | 
| 69 | 
            -
                      open(fifo2path, 'w') { |fifo2|
         | 
| 70 | 
            -
                        open(fifo1paths[i], 'r').each { |line|
         | 
| 71 | 
            -
                          seqid, seq, qvs = line.rstrip.split(/\t/)
         | 
| 72 | 
            -
                          tmpdists = Hash.new
         | 
| 73 | 
            -
                          bcs.each_index { |bcidx|
         | 
| 74 | 
            -
                            tmpdists[bcidx] = Levenshtein.distance(bcs[bcidx], seq[ofs, bclen])
         | 
| 75 | 
            -
                          }
         | 
| 76 | 
            -
                          dists = tmpdists.sort { |a, b| a[1] <=> b[1] }
         | 
| 77 | 
            -
                          bc = dists[0][1] < 2 && dists[0][1] < dists[1][1] ? dists[0][0] : -1
         | 
| 78 | 
            -
                          fifo2.puts("#{bc}\t#{seqid}\t#{seq}\t#{qvs}")
         | 
| 79 | 
            -
                        }
         | 
| 80 | 
            -
                      }
         | 
| 81 | 
            -
                      Kernel.exit!
         | 
| 82 | 
            -
                    }
         | 
| 83 | 
            -
                  }
         | 
| 84 | 
            -
             | 
| 85 | 
            -
                  tmpwells = wells + ['other']
         | 
| 86 | 
            -
             | 
| 87 | 
            -
                  fifo3paths = Array.new
         | 
| 88 | 
            -
                  tmpwells.each_index { |i|
         | 
| 89 | 
            -
                    fifo3path = mytemppath('fifo3-')
         | 
| 90 | 
            -
                    File.mkfifo(fifo3path)
         | 
| 91 | 
            -
                    fifo3paths.push(fifo3path)
         | 
| 92 | 
            -
                  }
         | 
| 93 | 
            -
                  pid = Kernel.fork {
         | 
| 94 | 
            -
                    fifo2s = Array.new
         | 
| 95 | 
            -
                    fifo2paths.each { |fifo2path| fifo2s.push(open(fifo2path, 'r')) }
         | 
| 96 | 
            -
                    fifo2done = Hash.new
         | 
| 97 | 
            -
                    fifo3s = Array.new
         | 
| 98 | 
            -
                    fifo3paths.each { |fifo3path| fifo3s.push(open(fifo3path, 'w')) }
         | 
| 99 | 
            -
                    fifo2s.cycle { |fifo2|
         | 
| 100 | 
            -
                      unless fifo2done.key?(fifo2)
         | 
| 101 | 
            -
                        line = fifo2.gets
         | 
| 102 | 
            -
                        if line.nil?
         | 
| 103 | 
            -
                          fifo2done[fifo2] = ''
         | 
| 104 | 
            -
                        else
         | 
| 105 | 
            -
                          bcs, seqid, seq, qvs = line.rstrip.split(/\t/)
         | 
| 106 | 
            -
                          fifo3 = fifo3s[bcs.to_i]
         | 
| 107 | 
            -
                          fifo3.puts([seqid, seq, qvs].join("\t"))
         | 
| 108 | 
            -
                        end
         | 
| 109 | 
            -
                      end
         | 
| 110 | 
            -
                      if fifo2done.size == fifo2s.size
         | 
| 111 | 
            -
                        break
         | 
| 112 | 
            -
                      end
         | 
| 113 | 
            -
                    }
         | 
| 114 | 
            -
                    fifo2s.each { |fifo2| fifo2.close }
         | 
| 115 | 
            -
                    fifo3s.each { |fifo3| fifo3.close }
         | 
| 116 | 
            -
                    Kernel.exit!
         | 
| 117 | 
            -
                  }
         | 
| 118 | 
            -
             | 
| 119 | 
            -
                  tmpwells.each_index { |i|
         | 
| 120 | 
            -
                    well = tmpwells[i]
         | 
| 121 | 
            -
                    outpath = "#{options['output-dir']}/#{well}.fq.xz"
         | 
| 122 | 
            -
                    pid = Kernel.fork {
         | 
| 123 | 
            -
                      left = ofs+bclen
         | 
| 124 | 
            -
                      right = clen > -1 ? -1 : ofs+bclen+clen-1
         | 
| 125 | 
            -
                      preprocess = ofs > 0 ? <<"DEDUPandFORMAT"
         | 
| 126 | 
            -
            ruby -F'\\t' -anle 'f1=$F[1][0..#{right}];f2=$F[2][0..#{right}];puts([f1+f2, $F[0], f2, f1].join("\\t"))' #{fifo3paths[i]} \\
         | 
| 127 | 
            -
            | sort -k 1 -r | cut -f 2- | uniq -f 2 \\
         | 
| 128 | 
            -
            | ruby -F'\\t' -anle 'puts(["@"+$F[0], $F[2][#{left}..-1], "+", $F[1][#{left}..-1]].join("\\n"))' \\
         | 
| 129 | 
            -
            DEDUPandFORMAT
         | 
| 130 | 
            -
                      : <<"FORMAT"
         | 
| 131 | 
            -
            ruby -F'\\t' -anle 'puts(["@"+$F[0], $F[1][#{left}..#{right}], "+", $F[2][#{left}..#{right}].rstrip].join("\\n"))' #{fifo3paths[i]} \\
         | 
| 132 | 
            -
            FORMAT
         | 
| 133 | 
            -
             | 
| 134 | 
            -
                      preprocess += '| ruby -e \'require "bio-faster";Bio::Faster.new(:stdin).each_record(:quality=>:raw){|v|s=v[1].gsub(/^G+/,"");l=v[1].length-s.length;puts("@#{v[0]}|-G#{l}\\n#{s}\\n+\\n#{v[2][l,s.length]}") if s.length>0}\'' if gtrim
         | 
| 135 | 
            -
             | 
| 136 | 
            -
                      if qtrim != '~' || mlen > 0
         | 
| 137 | 
            -
                        preprocess += '| ruby -e \'require "bio-faster";Bio::Faster.new(:stdin).each_record(:quality=>:raw){|v|m=v[2].length-1;0.upto(m){|i|if v[2][i]<"'+qtrim+'" then m=i-1;break;end};puts("@#{v[0]}\n#{v[1][0..m]}\n+\n#{v[2][0..m]}") if m+1>='+mlen.to_s+'}\''
         | 
| 138 | 
            -
                      end
         | 
| 139 | 
            -
             | 
| 140 | 
            -
                      exec preprocess+"| xz -z -c -e > #{outpath}"
         | 
| 141 | 
            -
                    }
         | 
| 142 | 
            -
                  }
         | 
| 143 | 
            -
             | 
| 144 | 
            -
                  Process.waitall
         | 
| 145 | 
            -
             | 
| 146 | 
            -
                end
         | 
| 147 | 
            -
             | 
| 148 | 
            -
              end
         | 
| 149 | 
            -
            end
         |