bio-gadget 0.4.8 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +10 -20
- data/.travis.yml +5 -0
- data/LICENSE +1 -1
- data/README.org +0 -21
- data/Rakefile +5 -1
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/bio-gadget.gemspec +20 -14
- data/exe/bio-gadget +14 -0
- data/exe/fq1l +5 -0
- data/exe/rbg +1 -0
- data/exe/strt +5 -0
- data/ext/bio_gadget/bio_gadget.c +313 -0
- data/ext/bio_gadget/bio_gadget.h +8 -0
- data/ext/bio_gadget/extconf.rb +3 -0
- data/lib/bio/gadget.rb +171 -0
- data/lib/bio/gadget/fq1l.rb +457 -0
- data/lib/bio/gadget/strt.rb +605 -0
- data/lib/bio/gadget/strt/count.rb +53 -0
- data/lib/bio/gadget/strt/depth.rb +124 -0
- data/lib/bio/gadget/strt/prepare_transcriptome.rb +230 -0
- data/lib/bio/gadgets.rb +135 -0
- data/test/bio/gadget_test.rb +11 -0
- data/test/test_helper.rb +4 -0
- metadata +109 -40
- data/Gthorfile +0 -2
- data/bin/bio-gadget +0 -5
- data/lib/bio-gadget.rb +0 -44
- data/lib/bio-gadget/dedup.rb +0 -33
- data/lib/bio-gadget/demlt.rb +0 -149
- data/lib/bio-gadget/femrg.rb +0 -61
- data/lib/bio-gadget/fqxz.rb +0 -30
- data/lib/bio-gadget/peak.rb +0 -94
- data/lib/bio-gadget/qvstat.rb +0 -34
- data/lib/bio-gadget/rgt2mtx.rb +0 -60
- data/lib/bio-gadget/version.rb +0 -9
- data/lib/bio-gadget/wig5p.rb +0 -51
- data/lib/bio-gadget/wigchr.rb +0 -28
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 8aea0f635b0ee3678108495d6fcd104f1ec7293e
         | 
| 4 | 
            +
              data.tar.gz: 895f8bc130d316f85d376f85647c420b9152409e
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 26ac831558ebfe3e03a826d675ad492245b23556698661b143b83716a61e93d2071c6fb194067db221c4c87b6c4d8cb1079d96c688053ad3b804814ecd54253c
         | 
| 7 | 
            +
              data.tar.gz: 92d48f2548473d0ce0f842c97b30a623973e2a083de34719f43df46206ff6a5319ad13558a166052227ac1dcbbffa2dbefd2a2560ff79bb5a321466a5486b8d2
         | 
    
        data/.gitignore
    CHANGED
    
    | @@ -1,21 +1,11 @@ | |
| 1 | 
            -
             | 
| 2 | 
            -
             | 
| 3 | 
            -
            . | 
| 4 | 
            -
             | 
| 5 | 
            -
             | 
| 6 | 
            -
             | 
| 7 | 
            -
             | 
| 8 | 
            -
             | 
| 9 | 
            -
             | 
| 10 | 
            -
            doc/
         | 
| 11 | 
            -
            lib/bundler/man
         | 
| 12 | 
            -
            pkg
         | 
| 13 | 
            -
            rdoc
         | 
| 14 | 
            -
            spec/reports
         | 
| 15 | 
            -
            test/tmp
         | 
| 16 | 
            -
            test/version_tmp
         | 
| 17 | 
            -
            tmp
         | 
| 1 | 
            +
            /.bundle/
         | 
| 2 | 
            +
            /.yardoc
         | 
| 3 | 
            +
            /Gemfile.lock
         | 
| 4 | 
            +
            /_yardoc/
         | 
| 5 | 
            +
            /coverage/
         | 
| 6 | 
            +
            /doc/
         | 
| 7 | 
            +
            /pkg/
         | 
| 8 | 
            +
            /spec/reports/
         | 
| 9 | 
            +
            /tmp/
         | 
| 18 10 | 
             
            *~
         | 
| 19 | 
            -
             | 
| 20 | 
            -
            .fuse*
         | 
| 21 | 
            -
             | 
| 11 | 
            +
            .DS_Store
         | 
    
        data/.travis.yml
    ADDED
    
    
    
        data/LICENSE
    CHANGED
    
    
    
        data/README.org
    CHANGED
    
    | @@ -12,24 +12,3 @@ To check all commands and the usages in this package | |
| 12 12 |  | 
| 13 13 | 
             
            : bio-gadget help
         | 
| 14 14 |  | 
| 15 | 
            -
            Currently available commands are
         | 
| 16 | 
            -
             | 
| 17 | 
            -
            - dedup :: Deduplicate fastq (via STDIN)
         | 
| 18 | 
            -
            - demlt :: Demultiplex fastq by barcodes
         | 
| 19 | 
            -
            - femrg :: Extract and merge first exons
         | 
| 20 | 
            -
            - fqxz :: (Re)compression of *.fq(.gz|.bz2) files
         | 
| 21 | 
            -
            - pead :: Find peak within each exon from wigs by a mojority vote.
         | 
| 22 | 
            -
            - qvstat :: Statistics of quality values in *.qual file
         | 
| 23 | 
            -
            - rgt2mtx :: Convert cuffdiff read group tracking file into tab-separated matrix
         | 
| 24 | 
            -
            - wig5p :: Convert bam-format alignments into wig-format table
         | 
| 25 | 
            -
            - wigchr :: Extract wiggle track on specified chromosome
         | 
| 26 | 
            -
             | 
| 27 | 
            -
            * Contributing
         | 
| 28 | 
            -
             | 
| 29 | 
            -
            1. Fork it
         | 
| 30 | 
            -
            2. Create your feature branch (`git checkout -b my-new-feature`)
         | 
| 31 | 
            -
            3. (optional) Install gems (`bundle`)
         | 
| 32 | 
            -
            4. Add your feature
         | 
| 33 | 
            -
            5. Commit your changes (`git commit -am 'Added some feature'`)
         | 
| 34 | 
            -
            6. Push to the branch (`git push origin my-new-feature`)
         | 
| 35 | 
            -
            7. Create new Pull Request
         | 
    
        data/Rakefile
    CHANGED
    
    
    
        data/bin/console
    ADDED
    
    | @@ -0,0 +1,14 @@ | |
| 1 | 
            +
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require "bundler/setup"
         | 
| 4 | 
            +
            require "test"
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            # You can add fixtures and/or initialization code here to make experimenting
         | 
| 7 | 
            +
            # with your gem easier. You can also use a different console, if you like.
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            # (If you use this, don't forget to add pry to your Gemfile!)
         | 
| 10 | 
            +
            # require "pry"
         | 
| 11 | 
            +
            # Pry.start
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            require "irb"
         | 
| 14 | 
            +
            IRB.start
         | 
    
        data/bin/setup
    ADDED
    
    
    
        data/bio-gadget.gemspec
    CHANGED
    
    | @@ -1,22 +1,28 @@ | |
| 1 | 
            -
            require File.expand_path('../lib/bio-gadget/version', __FILE__)
         | 
| 2 | 
            -
             | 
| 3 1 | 
             
            Gem::Specification.new do |gem|
         | 
| 4 | 
            -
              gem. | 
| 5 | 
            -
              gem. | 
| 6 | 
            -
              gem. | 
| 2 | 
            +
              gem.name          = 'bio-gadget'
         | 
| 3 | 
            +
              gem.version       = '0.5.0'
         | 
| 4 | 
            +
              gem.licenses      = ['MIT']
         | 
| 7 5 | 
             
              gem.summary       = gem.description
         | 
| 8 | 
            -
              gem. | 
| 6 | 
            +
              gem.description   = %q{Gadgets for bioinformatics}
         | 
| 7 | 
            +
              gem.authors       = ['Shintaro Katayama']
         | 
| 8 | 
            +
              gem.email         = ['shintaro.katayama@gmail.com']
         | 
| 9 | 
            +
              gem.homepage      = 'https://github.com/shka/ruby-bio-gadget'
         | 
| 10 | 
            +
              gem.extensions    = %w[ext/bio_gadget/extconf.rb]
         | 
| 9 11 |  | 
| 10 12 | 
             
              gem.files         = `git ls-files`.split($\)
         | 
| 11 | 
            -
              gem. | 
| 13 | 
            +
              gem.bindir        = 'exe'                                           
         | 
| 14 | 
            +
              gem.executables   = gem.files.grep(%r{^exe/}).map{ |f| File.basename(f) }
         | 
| 12 15 | 
             
              gem.test_files    = gem.files.grep(%r{^(test|spec|features)/})
         | 
| 13 | 
            -
              gem. | 
| 14 | 
            -
              gem.require_paths = ["lib"]
         | 
| 15 | 
            -
              gem.version       = Bio::Gadget::VERSION
         | 
| 16 | 
            +
              gem.require_paths = ['lib']
         | 
| 16 17 |  | 
| 17 | 
            -
              gem. | 
| 18 | 
            -
              gem. | 
| 19 | 
            -
              gem. | 
| 20 | 
            -
              gem. | 
| 18 | 
            +
              gem.add_development_dependency 'bundler', '~> 1.12'
         | 
| 19 | 
            +
              gem.add_development_dependency 'rake', '~> 10.0'
         | 
| 20 | 
            +
              gem.add_development_dependency 'rake-compiler'
         | 
| 21 | 
            +
              gem.add_development_dependency 'minitest', '~> 5.0'
         | 
| 22 | 
            +
             | 
| 23 | 
            +
              gem.add_dependency 'bio'
         | 
| 24 | 
            +
              gem.add_dependency 'damerau-levenshtein'
         | 
| 21 25 | 
             
              gem.add_dependency 'mkfifo'
         | 
| 26 | 
            +
              gem.add_dependency 'parallel'
         | 
| 27 | 
            +
              gem.add_dependency 'thor' , '~> 0.19.3'
         | 
| 22 28 | 
             
            end
         | 
    
        data/exe/bio-gadget
    ADDED
    
    | @@ -0,0 +1,14 @@ | |
| 1 | 
            +
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'bio/gadget'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            module Bio
         | 
| 6 | 
            +
              class Gadgets < Bio::Gadget
         | 
| 7 | 
            +
                
         | 
| 8 | 
            +
                register(Bio::Gadget::Fq1l, 'fq1l', 'fq1l [COMMAND]', 'Tools for oneline-fastq; fq1l in the alias')
         | 
| 9 | 
            +
                register(Bio::Gadget::Strt, 'strt', 'strt [COMMAND]', 'Tools for STRT RNAseq; strt is the alias')
         | 
| 10 | 
            +
                
         | 
| 11 | 
            +
              end
         | 
| 12 | 
            +
            end
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            Bio::Gadgets.start
         | 
    
        data/exe/fq1l
    ADDED
    
    
    
        data/exe/rbg
    ADDED
    
    | @@ -0,0 +1 @@ | |
| 1 | 
            +
            exe/bio-gadget
         | 
    
        data/exe/strt
    ADDED
    
    
| @@ -0,0 +1,313 @@ | |
| 1 | 
            +
            #include <limits.h>
         | 
| 2 | 
            +
            #include <regex.h>
         | 
| 3 | 
            +
            #include <stdio.h>
         | 
| 4 | 
            +
            #include <string.h>
         | 
| 5 | 
            +
            #include "bio_gadget.h"
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            VALUE bio_gadget_fq1l_i2i(vSelf, vFirst, vLast)
         | 
| 8 | 
            +
                 VALUE vSelf;
         | 
| 9 | 
            +
                 VALUE vFirst;
         | 
| 10 | 
            +
                 VALUE vLast;
         | 
| 11 | 
            +
            {
         | 
| 12 | 
            +
              char line[BUFSIZE];
         | 
| 13 | 
            +
              char index[BUFSIZE] = "";
         | 
| 14 | 
            +
              char *acc;
         | 
| 15 | 
            +
              char *seq;
         | 
| 16 | 
            +
              char *sep;
         | 
| 17 | 
            +
              char *qual;
         | 
| 18 | 
            +
              unsigned long head;
         | 
| 19 | 
            +
              unsigned long length;
         | 
| 20 | 
            +
             | 
| 21 | 
            +
              head = NUM2INT(vFirst)-1;
         | 
| 22 | 
            +
              length = NUM2INT(vLast)-head;
         | 
| 23 | 
            +
              while(fgets(line, BUFSIZE, stdin) != NULL) {
         | 
| 24 | 
            +
                acc = strtok(line, "\t");
         | 
| 25 | 
            +
                seq = strtok(NULL, "\t");
         | 
| 26 | 
            +
                sep = strtok(NULL, "\t");
         | 
| 27 | 
            +
                qual = strtok(NULL, "\t");
         | 
| 28 | 
            +
                strcpy(index, seq+head);
         | 
| 29 | 
            +
                index[length] = 0;
         | 
| 30 | 
            +
                printf("%s%s\t%s\t%s\t%s", acc, index, seq, sep, qual);
         | 
| 31 | 
            +
              }
         | 
| 32 | 
            +
             | 
| 33 | 
            +
              return Qnil;
         | 
| 34 | 
            +
            }
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            VALUE bio_gadget_fq1l_nr_deg(vSelf)
         | 
| 37 | 
            +
                 VALUE vSelf;
         | 
| 38 | 
            +
            {
         | 
| 39 | 
            +
              char line[BUFSIZE];
         | 
| 40 | 
            +
              char *acc;
         | 
| 41 | 
            +
              char *seq;
         | 
| 42 | 
            +
              char *sep;
         | 
| 43 | 
            +
              char *qual;
         | 
| 44 | 
            +
              char pseq[BUFSIZE] = "";
         | 
| 45 | 
            +
              unsigned long pseql = ULONG_MAX;
         | 
| 46 | 
            +
              char regexs[BUFSIZE];
         | 
| 47 | 
            +
              regex_t regexc;
         | 
| 48 | 
            +
             | 
| 49 | 
            +
              while(fgets(line, BUFSIZE, stdin) != NULL) {
         | 
| 50 | 
            +
                acc = strtok(line, "\t");
         | 
| 51 | 
            +
                seq = strtok(NULL, "\t");
         | 
| 52 | 
            +
                sep = strtok(NULL, "\t");
         | 
| 53 | 
            +
                qual = strtok(NULL, "\t");
         | 
| 54 | 
            +
                if (strlen(seq) >= pseql) {
         | 
| 55 | 
            +
                  printf("%s\t%s\t%s\t%s", acc, seq, sep, qual);
         | 
| 56 | 
            +
                  strcpy(pseq, seq);
         | 
| 57 | 
            +
                  pseql = strlen(seq);
         | 
| 58 | 
            +
                } else {
         | 
| 59 | 
            +
                  sprintf(regexs, "^%s", seq);
         | 
| 60 | 
            +
                  regcomp(®exc, regexs, REG_NOSUB);
         | 
| 61 | 
            +
                  if (regexec(®exc, pseq, 0, NULL, 0) == REG_NOMATCH) {
         | 
| 62 | 
            +
            	printf("%s\t%s\t%s\t%s", acc, seq, sep, qual);
         | 
| 63 | 
            +
            	strcpy(pseq, seq);
         | 
| 64 | 
            +
            	pseql = strlen(seq);
         | 
| 65 | 
            +
                  }
         | 
| 66 | 
            +
                }
         | 
| 67 | 
            +
                regfree(®exc);
         | 
| 68 | 
            +
              }
         | 
| 69 | 
            +
              
         | 
| 70 | 
            +
              return Qnil;
         | 
| 71 | 
            +
            }
         | 
| 72 | 
            +
             | 
| 73 | 
            +
            VALUE bio_gadget_fq1l_nr_std(vSelf)
         | 
| 74 | 
            +
                 VALUE vSelf;
         | 
| 75 | 
            +
            {
         | 
| 76 | 
            +
              char line[BUFSIZE];
         | 
| 77 | 
            +
              char *acc;
         | 
| 78 | 
            +
              char *seq;
         | 
| 79 | 
            +
              char *sep;
         | 
| 80 | 
            +
              char *qual;
         | 
| 81 | 
            +
              char pseq[BUFSIZE] = "";
         | 
| 82 | 
            +
             | 
| 83 | 
            +
              while(fgets(line, BUFSIZE, stdin) != NULL) {
         | 
| 84 | 
            +
                acc = strtok(line, "\t");
         | 
| 85 | 
            +
                seq = strtok(NULL, "\t");
         | 
| 86 | 
            +
                sep = strtok(NULL, "\t");
         | 
| 87 | 
            +
                qual = strtok(NULL, "\t");
         | 
| 88 | 
            +
                if (strcmp(pseq, seq) != 0) {
         | 
| 89 | 
            +
                  printf("%s\t%s\t%s\t%s", acc, seq, sep, qual);
         | 
| 90 | 
            +
                  strcpy(pseq, seq);
         | 
| 91 | 
            +
                }
         | 
| 92 | 
            +
              }
         | 
| 93 | 
            +
              
         | 
| 94 | 
            +
              return Qnil;
         | 
| 95 | 
            +
            }
         | 
| 96 | 
            +
             | 
| 97 | 
            +
            VALUE bio_gadget_fq1l_slice(vSelf, vNth, vSlice)
         | 
| 98 | 
            +
                 VALUE vSelf;
         | 
| 99 | 
            +
                 VALUE vNth;
         | 
| 100 | 
            +
                 VALUE vSlice;
         | 
| 101 | 
            +
            {
         | 
| 102 | 
            +
              char line[BUFSIZE];
         | 
| 103 | 
            +
              unsigned int nth;
         | 
| 104 | 
            +
              unsigned int slice;
         | 
| 105 | 
            +
              unsigned long count;
         | 
| 106 | 
            +
             | 
| 107 | 
            +
              nth = NUM2INT(vNth)-1;
         | 
| 108 | 
            +
              slice = NUM2INT(vSlice);
         | 
| 109 | 
            +
              count = 0;
         | 
| 110 | 
            +
             | 
| 111 | 
            +
              while(fgets(line, BUFSIZE, stdin) != NULL) {
         | 
| 112 | 
            +
                if (count % slice == nth)
         | 
| 113 | 
            +
                  fputs(line, stdout);
         | 
| 114 | 
            +
                count += 1;
         | 
| 115 | 
            +
              }
         | 
| 116 | 
            +
             | 
| 117 | 
            +
              return Qnil;
         | 
| 118 | 
            +
            }
         | 
| 119 | 
            +
             | 
| 120 | 
            +
            VALUE bio_gadget_fq1l_t3(vSelf, vCmdIn, vLen, vMinLen, vPathOut)
         | 
| 121 | 
            +
                 VALUE vSelf;
         | 
| 122 | 
            +
                 VALUE vCmdIn;
         | 
| 123 | 
            +
                 VALUE vLen;
         | 
| 124 | 
            +
                 VALUE vMinLen;
         | 
| 125 | 
            +
                 VALUE vPathOut;
         | 
| 126 | 
            +
            {
         | 
| 127 | 
            +
              char line[BUFSIZE];
         | 
| 128 | 
            +
              char *acc;
         | 
| 129 | 
            +
              char *seq;
         | 
| 130 | 
            +
              char *sep;
         | 
| 131 | 
            +
              char *qual;
         | 
| 132 | 
            +
              int len;
         | 
| 133 | 
            +
              unsigned long minlen;
         | 
| 134 | 
            +
              unsigned long seqlen;
         | 
| 135 | 
            +
              FILE *fp_in;
         | 
| 136 | 
            +
              FILE *fp_out;
         | 
| 137 | 
            +
             | 
| 138 | 
            +
              fp_in = RTEST(vCmdIn) ? popen(StringValueCStr(vCmdIn), "r") : stdin;
         | 
| 139 | 
            +
              fp_out = RTEST(vPathOut) ? fopen(StringValueCStr(vPathOut), "w") : stdout;
         | 
| 140 | 
            +
              len = NUM2INT(vLen);
         | 
| 141 | 
            +
              minlen = NUM2INT(vMinLen);
         | 
| 142 | 
            +
              
         | 
| 143 | 
            +
              while(fgets(line, BUFSIZE, fp_in) != NULL) {
         | 
| 144 | 
            +
                acc = strtok(line, "\t");
         | 
| 145 | 
            +
                seq = strtok(NULL, "\t");
         | 
| 146 | 
            +
                seqlen = strlen(seq)-len;
         | 
| 147 | 
            +
                if (seqlen > 0 && seqlen >= minlen) {
         | 
| 148 | 
            +
                  sep = strtok(NULL, "\t");
         | 
| 149 | 
            +
                  qual = strtok(NULL, "\t");
         | 
| 150 | 
            +
                  seq[seqlen] = 0;
         | 
| 151 | 
            +
                  qual[seqlen] = 0;
         | 
| 152 | 
            +
                  fprintf(fp_out, "%s\t%s\t%s\t%s\n", acc, seq, sep, qual);
         | 
| 153 | 
            +
                }
         | 
| 154 | 
            +
              }
         | 
| 155 | 
            +
              fclose(fp_out);
         | 
| 156 | 
            +
              fclose(fp_in);
         | 
| 157 | 
            +
              
         | 
| 158 | 
            +
              return Qnil;
         | 
| 159 | 
            +
            }
         | 
| 160 | 
            +
             | 
| 161 | 
            +
            VALUE bio_gadget_fq1l_t3q(vSelf, vLQs, vMinLen)
         | 
| 162 | 
            +
                 VALUE vSelf;
         | 
| 163 | 
            +
                 VALUE vLQs;
         | 
| 164 | 
            +
                 VALUE vMinLen;
         | 
| 165 | 
            +
            {
         | 
| 166 | 
            +
              char line[BUFSIZE];
         | 
| 167 | 
            +
              char *acc;
         | 
| 168 | 
            +
              char *seq;
         | 
| 169 | 
            +
              char *sep;
         | 
| 170 | 
            +
              char *qual;
         | 
| 171 | 
            +
              char *lqs;
         | 
| 172 | 
            +
              unsigned long minlen;
         | 
| 173 | 
            +
              unsigned long seqlen;
         | 
| 174 | 
            +
             | 
| 175 | 
            +
              lqs = StringValueCStr(vLQs);
         | 
| 176 | 
            +
              minlen = NUM2INT(vMinLen);
         | 
| 177 | 
            +
             | 
| 178 | 
            +
              while(fgets(line, BUFSIZE, stdin) != NULL) {
         | 
| 179 | 
            +
                acc = strtok(line, "\t");
         | 
| 180 | 
            +
                seq = strtok(NULL, "\t");
         | 
| 181 | 
            +
                sep = strtok(NULL, "\t");
         | 
| 182 | 
            +
                qual = strtok(NULL, "\t\n");
         | 
| 183 | 
            +
                seqlen = strcspn(qual, lqs);
         | 
| 184 | 
            +
                if (seqlen > 0 && seqlen >= minlen) {
         | 
| 185 | 
            +
                  if (seqlen < strlen(qual)) {
         | 
| 186 | 
            +
            	seq[seqlen] = 0;
         | 
| 187 | 
            +
            	qual[seqlen] = 0;
         | 
| 188 | 
            +
                  }
         | 
| 189 | 
            +
                  printf("%s\t%s\t%s\t%s\n", acc, seq, sep, qual);
         | 
| 190 | 
            +
                }
         | 
| 191 | 
            +
              }
         | 
| 192 | 
            +
              
         | 
| 193 | 
            +
              return Qnil;
         | 
| 194 | 
            +
            }
         | 
| 195 | 
            +
             | 
| 196 | 
            +
            VALUE bio_gadget_fq1l_t5(vSelf, vPattern, vMinLen)
         | 
| 197 | 
            +
                 VALUE vSelf;
         | 
| 198 | 
            +
                 VALUE vPattern;
         | 
| 199 | 
            +
                 VALUE vMinLen;
         | 
| 200 | 
            +
            {
         | 
| 201 | 
            +
              char regexs[BUFSIZE];
         | 
| 202 | 
            +
              regex_t regexc;
         | 
| 203 | 
            +
              unsigned long minlen;
         | 
| 204 | 
            +
              char line[BUFSIZE];
         | 
| 205 | 
            +
              regmatch_t match[1];
         | 
| 206 | 
            +
              char *acc;
         | 
| 207 | 
            +
              char *seq;
         | 
| 208 | 
            +
              char *sep;
         | 
| 209 | 
            +
              char *qual;
         | 
| 210 | 
            +
             | 
| 211 | 
            +
              sprintf(regexs, "^%s", StringValueCStr(vPattern));
         | 
| 212 | 
            +
              regcomp(®exc, regexs, REG_EXTENDED);
         | 
| 213 | 
            +
             | 
| 214 | 
            +
              minlen = NUM2INT(vMinLen);
         | 
| 215 | 
            +
             | 
| 216 | 
            +
              while(fgets(line, BUFSIZE, stdin) != NULL) {
         | 
| 217 | 
            +
                acc = strtok(line, "\t");
         | 
| 218 | 
            +
                seq = strtok(NULL, "\t");
         | 
| 219 | 
            +
                sep = strtok(NULL, "\t");
         | 
| 220 | 
            +
                qual = strtok(NULL, "\t");
         | 
| 221 | 
            +
                if(regexec(®exc, seq, 1, match, 0) != REG_NOMATCH) {
         | 
| 222 | 
            +
                  seq += match[0].rm_eo;
         | 
| 223 | 
            +
                  qual += match[0].rm_eo;
         | 
| 224 | 
            +
                  if(strlen(seq) >= minlen)
         | 
| 225 | 
            +
            	printf("%s\t%s\t%s\t%s", acc, seq, sep, qual);
         | 
| 226 | 
            +
                } else
         | 
| 227 | 
            +
                  printf("%s\t%s\t%s\t%s", acc, seq, sep, qual);
         | 
| 228 | 
            +
              }
         | 
| 229 | 
            +
             | 
| 230 | 
            +
              regfree(®exc);
         | 
| 231 | 
            +
              return Qnil;
         | 
| 232 | 
            +
            }
         | 
| 233 | 
            +
             | 
| 234 | 
            +
            VALUE bio_gadget_fq1l_to(vSelf, vDraw, vSkip)
         | 
| 235 | 
            +
                 VALUE vSelf;
         | 
| 236 | 
            +
                 VALUE vDraw;
         | 
| 237 | 
            +
                 VALUE vSkip;
         | 
| 238 | 
            +
            {
         | 
| 239 | 
            +
              char line[BUFSIZE];
         | 
| 240 | 
            +
              unsigned int draw;
         | 
| 241 | 
            +
              unsigned int skip;
         | 
| 242 | 
            +
              unsigned int sum;
         | 
| 243 | 
            +
              unsigned long count;
         | 
| 244 | 
            +
             | 
| 245 | 
            +
              draw = NUM2INT(vDraw);
         | 
| 246 | 
            +
              skip = NUM2INT(vSkip);
         | 
| 247 | 
            +
              sum = draw + skip;
         | 
| 248 | 
            +
              count = 0;
         | 
| 249 | 
            +
             | 
| 250 | 
            +
              while(fgets(line, BUFSIZE, stdin) != NULL) {
         | 
| 251 | 
            +
                if (count % sum < draw)
         | 
| 252 | 
            +
                  fputs(line, stdout);
         | 
| 253 | 
            +
                count += 1;
         | 
| 254 | 
            +
              }
         | 
| 255 | 
            +
             | 
| 256 | 
            +
              return Qnil;
         | 
| 257 | 
            +
            }
         | 
| 258 | 
            +
             | 
| 259 | 
            +
            VALUE bio_gadget_fq1l_u2i(vSelf, vFirst, vLast)
         | 
| 260 | 
            +
                 VALUE vSelf;
         | 
| 261 | 
            +
                 VALUE vFirst;
         | 
| 262 | 
            +
                 VALUE vLast;
         | 
| 263 | 
            +
            {
         | 
| 264 | 
            +
              char line[BUFSIZE];
         | 
| 265 | 
            +
              char index[BUFSIZE] = "";
         | 
| 266 | 
            +
              char *acc;
         | 
| 267 | 
            +
              char *acc1;
         | 
| 268 | 
            +
              char *acc2;
         | 
| 269 | 
            +
              char *seq;
         | 
| 270 | 
            +
              char *sep;
         | 
| 271 | 
            +
              char *qual;
         | 
| 272 | 
            +
              unsigned long head;
         | 
| 273 | 
            +
              unsigned long length;
         | 
| 274 | 
            +
             | 
| 275 | 
            +
              head = NUM2INT(vFirst)-1;
         | 
| 276 | 
            +
              length = NUM2INT(vLast)-head;
         | 
| 277 | 
            +
              while(fgets(line, BUFSIZE, stdin) != NULL) {
         | 
| 278 | 
            +
                acc = strtok(line, "\t");
         | 
| 279 | 
            +
                seq = strtok(NULL, "\t");
         | 
| 280 | 
            +
                sep = strtok(NULL, "\t");
         | 
| 281 | 
            +
                qual = strtok(NULL, "\t");
         | 
| 282 | 
            +
                strcpy(index, seq+head);
         | 
| 283 | 
            +
                index[length] = 0;
         | 
| 284 | 
            +
                acc1 = strtok(acc, " ");
         | 
| 285 | 
            +
                acc2 = strtok(NULL, " ");
         | 
| 286 | 
            +
                if(acc2 == NULL) {
         | 
| 287 | 
            +
                  printf("%s:%s\t%s\t%s\t%s", acc1, index, seq, sep, qual);
         | 
| 288 | 
            +
                }
         | 
| 289 | 
            +
                else {
         | 
| 290 | 
            +
                  printf("%s:%s %s\t%s\t%s\t%s", acc1, index, acc2, seq, sep, qual);
         | 
| 291 | 
            +
                }
         | 
| 292 | 
            +
              }
         | 
| 293 | 
            +
             | 
| 294 | 
            +
              return Qnil;
         | 
| 295 | 
            +
            }
         | 
| 296 | 
            +
             | 
| 297 | 
            +
             | 
| 298 | 
            +
            VALUE rb_mBio_Gadget;
         | 
| 299 | 
            +
             | 
| 300 | 
            +
            void
         | 
| 301 | 
            +
            Init_bio_gadget(void)
         | 
| 302 | 
            +
            {
         | 
| 303 | 
            +
              rb_mBio_Gadget = rb_define_module("BioGadget");
         | 
| 304 | 
            +
              rb_define_module_function(rb_mBio_Gadget, "i2i", bio_gadget_fq1l_i2i, 2);
         | 
| 305 | 
            +
              rb_define_module_function(rb_mBio_Gadget, "nr_deg", bio_gadget_fq1l_nr_deg, 0);
         | 
| 306 | 
            +
              rb_define_module_function(rb_mBio_Gadget, "nr_std", bio_gadget_fq1l_nr_std, 0);
         | 
| 307 | 
            +
              rb_define_module_function(rb_mBio_Gadget, "slice", bio_gadget_fq1l_slice, 2);
         | 
| 308 | 
            +
              rb_define_module_function(rb_mBio_Gadget, "t3", bio_gadget_fq1l_t3, 4);
         | 
| 309 | 
            +
              rb_define_module_function(rb_mBio_Gadget, "t3q", bio_gadget_fq1l_t3q, 2);
         | 
| 310 | 
            +
              rb_define_module_function(rb_mBio_Gadget, "t5", bio_gadget_fq1l_t5, 2);
         | 
| 311 | 
            +
              rb_define_module_function(rb_mBio_Gadget, "to", bio_gadget_fq1l_to, 2);
         | 
| 312 | 
            +
              rb_define_module_function(rb_mBio_Gadget, "u2i", bio_gadget_fq1l_u2i, 2);
         | 
| 313 | 
            +
            }
         |