genfrag 0.0.0.1 → 0.0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +7 -2
- data/README.rdoc +304 -3
- data/lib/genfrag.rb +1 -1
- data/lib/genfrag/app.rb +3 -1
- data/lib/genfrag/app/command.rb +2 -2
- data/lib/genfrag/app/search_command.rb +54 -15
- data/lib/genfrag/app/search_command/match.rb +30 -40
- data/spec/genfrag/app/search_command/predictor_spec.rb +24 -0
- data/spec/genfrag/app_spec.rb +2 -3
- data/spec/genfrag_spec.rb +1 -1
- data/tasks/rdoc.rake +4 -3
- data/tasks/setup.rb +11 -19
- metadata +5 -4
    
        data/History.txt
    CHANGED
    
    
    
        data/README.rdoc
    CHANGED
    
    | @@ -1,12 +1,12 @@ | |
| 1 | 
            -
            Genfrag version 0.0.0. | 
| 1 | 
            +
            Genfrag version 0.0.0.2
         | 
| 2 2 | 
             
                by Pjotr Prins and Trevor Wennblom
         | 
| 3 3 | 
             
                http://genfrag.rubyforge.org
         | 
| 4 | 
            -
                 | 
| 4 | 
            +
                http://rubyforge.org/projects/genfrag/
         | 
| 5 5 |  | 
| 6 6 |  | 
| 7 7 | 
             
            == DESCRIPTION:
         | 
| 8 8 |  | 
| 9 | 
            -
            This is a development release.  | 
| 9 | 
            +
            This is a development release. Some features are functional at this time.
         | 
| 10 10 |  | 
| 11 11 | 
             
            Genfrag allows for rapid in-silico searching of fragments cut by
         | 
| 12 12 | 
             
            different restriction enzymes in large nucleotide acid databases,
         | 
| @@ -33,6 +33,307 @@ This works | |
| 33 33 | 
             
            * sudo gem install genfrag
         | 
| 34 34 |  | 
| 35 35 |  | 
| 36 | 
            +
            == EXAMPLES:
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            === Index command
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            === Search command
         | 
| 41 | 
            +
             | 
| 42 | 
            +
            ==== Example 1
         | 
| 43 | 
            +
             | 
| 44 | 
            +
            Return all sequences from the file 'example.fasta.tdf' that are referenced by the index 'example.fasta_bstyi_msei_index.tdf'
         | 
| 45 | 
            +
             | 
| 46 | 
            +
              genfrag search -f example.fasta --re5 BstYI --re3 MseI -v
         | 
| 47 | 
            +
             | 
| 48 | 
            +
            Only one entry from output is shown below.
         | 
| 49 | 
            +
             | 
| 50 | 
            +
              ---
         | 
| 51 | 
            +
              - sequence
         | 
| 52 | 
            +
                gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctgctaatcgtgaatgcgatccagatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggttttaattggggtgcatttacatgggactctcttaaaaagaatgagtatctcggagaatatactggagaactgatcactcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
         | 
| 53 | 
            +
              - sequence size
         | 
| 54 | 
            +
                380
         | 
| 55 | 
            +
              - fragment - primary strand
         | 
| 56 | 
            +
                gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggttt..
         | 
| 57 | 
            +
              - fragment - complement strand
         | 
| 58 | 
            +
                ....aaacagcctcaacaggagaatcgacacctctaccgtgagaaccactctgtggtcacgtttaggttacgttcttgtacgttattatttttctaagagtaacctttcagactacaagtacctaagtaccaaaat
         | 
| 59 | 
            +
              - fragment with adapters - primary strand
         | 
| 60 | 
            +
                gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggttt..
         | 
| 61 | 
            +
              - fragment with adapters - complement strand
         | 
| 62 | 
            +
                ....aaacagcctcaacaggagaatcgacacctctaccgtgagaaccactctgtggtcacgtttaggttacgttcttgtacgttattatttttctaagagtaacctttcagactacaagtacctaagtaccaaaat
         | 
| 63 | 
            +
             | 
| 64 | 
            +
            The first cut is made using RE5 (restriction enzyme with first match in reference to 5') BstYI. BstYI has the cut patten
         | 
| 65 | 
            +
              5' - r^g a t c y - 3'
         | 
| 66 | 
            +
              3' - y c t a g^r - 5'
         | 
| 67 | 
            +
             | 
| 68 | 
            +
            The first 96bp of the sequence are removed when BstYI makes its cut, starting the strand fragment. The primary strand
         | 
| 69 | 
            +
            fragment begins with 'gatctttgtc', four bases are lost from the complement strand due to the cut pattern of BstYI, therefore 'gatc'
         | 
| 70 | 
            +
            from the primary strand has no hydrogen bonds with the complement strand. These missing nucleotides are represented with a period
         | 
| 71 | 
            +
            ('.').
         | 
| 72 | 
            +
             | 
| 73 | 
            +
            The second cut is made using RE3 (restriction enzyme with first match in reference to 3') MseI. MseI has the cut pattern
         | 
| 74 | 
            +
              5' - t^t a a - 3'
         | 
| 75 | 
            +
              3' - a a t^t - 5'
         | 
| 76 | 
            +
             | 
| 77 | 
            +
            This leaves a final fragment of 136bp. The way MseI cuts will leave the complement strand two nucleotides longer than the primary
         | 
| 78 | 
            +
            strand. This is represented on the primary stand with two periods.
         | 
| 79 | 
            +
             | 
| 80 | 
            +
             | 
| 81 | 
            +
            ==== Example 2
         | 
| 82 | 
            +
             | 
| 83 | 
            +
            This demonstrates using an adapter.
         | 
| 84 | 
            +
             | 
| 85 | 
            +
              genfrag search -f example.fasta --re5 BstYI --re3 MseI -v --adapter5 t
         | 
| 86 | 
            +
             | 
| 87 | 
            +
              ---
         | 
| 88 | 
            +
              - sequence
         | 
| 89 | 
            +
                gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctgctaatcgtgaatgcgatccagatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggttttaattggggtgcatttacatgggactctcttaaaaagaatgagtatctcggagaatatactggagaactgatcactcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
         | 
| 90 | 
            +
              - sequence size
         | 
| 91 | 
            +
                380
         | 
| 92 | 
            +
              - fragment - primary strand
         | 
| 93 | 
            +
                gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggttt..
         | 
| 94 | 
            +
              - fragment - complement strand
         | 
| 95 | 
            +
                ....aaacagcctcaacaggagaatcgacacctctaccgtgagaaccactctgtggtcacgtttaggttacgttcttgtacgttattatttttctaagagtaacctttcagactacaagtacctaagtaccaaaat
         | 
| 96 | 
            +
              - fragment with adapters - primary strand
         | 
| 97 | 
            +
                +++++ttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggttt..
         | 
| 98 | 
            +
              - fragment with adapters - complement strand
         | 
| 99 | 
            +
                ....aaacagcctcaacaggagaatcgacacctctaccgtgagaaccactctgtggtcacgtttaggttacgttcttgtacgttattatttttctaagagtaacctttcagactacaagtacctaagtaccaaaat
         | 
| 100 | 
            +
             | 
| 101 | 
            +
            The adapter can be considered an extension to the restriction enzyme. When searching for a specified adapter, anything that
         | 
| 102 | 
            +
            the restriction enzyme would need to make its match is first ignored before comparing the adapter to the sequence.
         | 
| 103 | 
            +
             | 
| 104 | 
            +
            It was shown previously that BstYI has the cut patten
         | 
| 105 | 
            +
              5' - r^g a t c y - 3'
         | 
| 106 | 
            +
              3' - y c t a g^r - 5'
         | 
| 107 | 
            +
             | 
| 108 | 
            +
            The 'y' symbol indicates a nucleotide of 't' or 'c'.[Footnote 1] Adapter5 is defined as the nucleotide 't' in this example.
         | 
| 109 | 
            +
            5 nucleotides from the restriction enzyme are matched ('gatct') as indicated by the plus ('+') symbols, then the 1 nucleotide
         | 
| 110 | 
            +
            from the adapter is matched ('t').
         | 
| 111 | 
            +
             | 
| 112 | 
            +
            Note that in this current version of Genfrag only the primary strand has the plus symbols applied. In a future version
         | 
| 113 | 
            +
            the complement strand would have a plus symbol in place of the initial 'a'.
         | 
| 114 | 
            +
             | 
| 115 | 
            +
             | 
| 116 | 
            +
            ==== Example 3
         | 
| 117 | 
            +
             | 
| 118 | 
            +
            The previous example with a longer adapter.
         | 
| 119 | 
            +
             | 
| 120 | 
            +
              genfrag search -f example.fasta --re5 BstYI --re3 MseI -v --adapter5 ttgtcg
         | 
| 121 | 
            +
             | 
| 122 | 
            +
              ---
         | 
| 123 | 
            +
              - sequence
         | 
| 124 | 
            +
                gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctgctaatcgtgaatgcgatccagatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggttttaattggggtgcatttacatgggactctcttaaaaagaatgagtatctcggagaatatactggagaactgatcactcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
         | 
| 125 | 
            +
              - sequence size
         | 
| 126 | 
            +
                380
         | 
| 127 | 
            +
              - fragment - primary strand
         | 
| 128 | 
            +
                gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggttt..
         | 
| 129 | 
            +
              - fragment - complement strand
         | 
| 130 | 
            +
                ....aaacagcctcaacaggagaatcgacacctctaccgtgagaaccactctgtggtcacgtttaggttacgttcttgtacgttattatttttctaagagtaacctttcagactacaagtacctaagtaccaaaat
         | 
| 131 | 
            +
              - fragment with adapters - primary strand
         | 
| 132 | 
            +
                +++++ttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggttt..
         | 
| 133 | 
            +
              - fragment with adapters - complement strand
         | 
| 134 | 
            +
                ....aaacagcctcaacaggagaatcgacacctctaccgtgagaaccactctgtggtcacgtttaggttacgttcttgtacgttattatttttctaagagtaacctttcagactacaagtacctaagtaccaaaat
         | 
| 135 | 
            +
              
         | 
| 136 | 
            +
             | 
| 137 | 
            +
            ==== Example 4
         | 
| 138 | 
            +
             | 
| 139 | 
            +
            This demonstrates Adapter3.
         | 
| 140 | 
            +
             | 
| 141 | 
            +
              genfrag search -f example.fasta --re5 BstYI --re3 MseI -v --adapter3 aacca
         | 
| 142 | 
            +
             | 
| 143 | 
            +
              ---
         | 
| 144 | 
            +
              - sequence
         | 
| 145 | 
            +
                gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctgctaatcgtgaatgcgatccagatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggttttaattggggtgcatttacatgggactctcttaaaaagaatgagtatctcggagaatatactggagaactgatcactcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
         | 
| 146 | 
            +
              - sequence size
         | 
| 147 | 
            +
                380
         | 
| 148 | 
            +
              - fragment - primary strand
         | 
| 149 | 
            +
                gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggttt..
         | 
| 150 | 
            +
              - fragment - complement strand
         | 
| 151 | 
            +
                ....aaacagcctcaacaggagaatcgacacctctaccgtgagaaccactctgtggtcacgtttaggttacgttcttgtacgttattatttttctaagagtaacctttcagactacaagtacctaagtaccaaaat
         | 
| 152 | 
            +
              - fragment with adapters - primary strand
         | 
| 153 | 
            +
                gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggtt+..
         | 
| 154 | 
            +
              - fragment with adapters - complement strand
         | 
| 155 | 
            +
                ....aaacagcctcaacaggagaatcgacacctctaccgtgagaaccactctgtggtcacgtttaggttacgttcttgtacgttattatttttctaagagtaacctttcagactacaagtacctaagtaccaaaat
         | 
| 156 | 
            +
             | 
| 157 | 
            +
            It was shown previously that MseI has the cut patten
         | 
| 158 | 
            +
              5' - t^t a a - 3'
         | 
| 159 | 
            +
              3' - a a t^t - 5'
         | 
| 160 | 
            +
             | 
| 161 | 
            +
            Looking at primary strand fragment, the ending nucleotide remaining that has also been used by the restriction enzyme to
         | 
| 162 | 
            +
            match is 't'. When the Adapter3 filter is made, the restriction enzyme match will replace the 't' with a plus symbol.
         | 
| 163 | 
            +
             | 
| 164 | 
            +
            An end of the primary strand is
         | 
| 165 | 
            +
              5' - atggattcatggtt+.. - 3'
         | 
| 166 | 
            +
             | 
| 167 | 
            +
            If that end is reversed and complemented, 'aaca' is the initial four nucleotides that match.
         | 
| 168 | 
            +
             | 
| 169 | 
            +
            Note that in this current version of Genfrag only the primary strand has the plus symbols applied. In a future version
         | 
| 170 | 
            +
            the complement strand would have a plus symbol in place of the final 'aat'.
         | 
| 171 | 
            +
             | 
| 172 | 
            +
             | 
| 173 | 
            +
            ==== Example 5
         | 
| 174 | 
            +
             | 
| 175 | 
            +
            The previous example with Adapter3 using alternate notation.
         | 
| 176 | 
            +
             | 
| 177 | 
            +
              genfrag search -f example.fasta --re5 BstYI --re3 MseI -v --adapter3 _tggtt
         | 
| 178 | 
            +
             | 
| 179 | 
            +
              ---
         | 
| 180 | 
            +
              - sequence
         | 
| 181 | 
            +
                gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctgctaatcgtgaatgcgatccagatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggttttaattggggtgcatttacatgggactctcttaaaaagaatgagtatctcggagaatatactggagaactgatcactcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
         | 
| 182 | 
            +
              - sequence size
         | 
| 183 | 
            +
                380
         | 
| 184 | 
            +
              - fragment - primary strand
         | 
| 185 | 
            +
                gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggttt..
         | 
| 186 | 
            +
              - fragment - complement strand
         | 
| 187 | 
            +
                ....aaacagcctcaacaggagaatcgacacctctaccgtgagaaccactctgtggtcacgtttaggttacgttcttgtacgttattatttttctaagagtaacctttcagactacaagtacctaagtaccaaaat
         | 
| 188 | 
            +
              - fragment with adapters - primary strand
         | 
| 189 | 
            +
                gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggtt+..
         | 
| 190 | 
            +
              - fragment with adapters - complement strand
         | 
| 191 | 
            +
                ....aaacagcctcaacaggagaatcgacacctctaccgtgagaaccactctgtggtcacgtttaggttacgttcttgtacgttattatttttctaagagtaacctttcagactacaagtacctaagtaccaaaat
         | 
| 192 | 
            +
             | 
| 193 | 
            +
            If Adapter3 is supplied with an initial underscore ('_') in the sequence, the sequence is matched directly without a
         | 
| 194 | 
            +
            reverse complement.
         | 
| 195 | 
            +
             | 
| 196 | 
            +
             | 
| 197 | 
            +
            ==== Example 6
         | 
| 198 | 
            +
             | 
| 199 | 
            +
            Using two adapters together.
         | 
| 200 | 
            +
             | 
| 201 | 
            +
              genfrag search -f example.fasta --re5 BstYI --re3 MseI -v --adapter5 ttgtcg --adapter3 aacca
         | 
| 202 | 
            +
             | 
| 203 | 
            +
              ---
         | 
| 204 | 
            +
              - fragment with adapters - primary strand
         | 
| 205 | 
            +
                +++++ttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggtt+..
         | 
| 206 | 
            +
              - fragment with adapters - complement strand
         | 
| 207 | 
            +
                ....aaacagcctcaacaggagaatcgacacctctaccgtgagaaccactctgtggtcacgtttaggttacgttcttgtacgttattatttttctaagagtaacctttcagactacaagtacctaagtaccaaaat
         | 
| 208 | 
            +
             | 
| 209 | 
            +
            Note that in this current version of Genfrag only the primary strand has the plus symbols applied. In a future version
         | 
| 210 | 
            +
            the complement strand would have a plus symbol in place of the initial 'a' and the final 'aat'.
         | 
| 211 | 
            +
             | 
| 212 | 
            +
             | 
| 213 | 
            +
            ==== Example 7
         | 
| 214 | 
            +
             | 
| 215 | 
            +
            Using an adapter and specifying an adapter sequence.
         | 
| 216 | 
            +
             | 
| 217 | 
            +
            You may have particular adapter sequences that you have used. These can be specified with 'adapter5-sequence' or 'adapter3-sequence'.
         | 
| 218 | 
            +
            Note that 'adapter3-sequence' will be reversed when applied to the primary strand. Any change to the sequence caused
         | 
| 219 | 
            +
            by the adapter sequence will be noted with an equals ('=') symbol.
         | 
| 220 | 
            +
             | 
| 221 | 
            +
              genfrag search -f example.fasta --re5 BstYI --re3 MseI -v --adapter5 ttgtcg --adapter3 aacca --adapter5-sequence NXNXNXNX --adapter3-sequence NZNZNZNZ
         | 
| 222 | 
            +
             | 
| 223 | 
            +
              ---
         | 
| 224 | 
            +
              - sequence
         | 
| 225 | 
            +
                gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctgctaatcgtgaatgcgatccagatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggttttaattggggtgcatttacatgggactctcttaaaaagaatgagtatctcggagaatatactggagaactgatcactcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
         | 
| 226 | 
            +
              - sequence size
         | 
| 227 | 
            +
                380
         | 
| 228 | 
            +
              - fragment - primary strand
         | 
| 229 | 
            +
                gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggttt..
         | 
| 230 | 
            +
              - fragment - complement strand
         | 
| 231 | 
            +
                ....aaacagcctcaacaggagaatcgacacctctaccgtgagaaccactctgtggtcacgtttaggttacgttcttgtacgttattatttttctaagagtaacctttcagactacaagtacctaagtaccaaaat
         | 
| 232 | 
            +
              - fragment with adapters - primary strand
         | 
| 233 | 
            +
                NXNXNXNXttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggttZNZNZNZN
         | 
| 234 | 
            +
              - fragment with adapters - complement strand
         | 
| 235 | 
            +
                ===....aaacagcctcaacaggagaatcgacacctctaccgtgagaaccactctgtggtcacgtttaggttacgttcttgtacgttattatttttctaagagtaacctttcagactacaagtacctaagtaccaaaat=====
         | 
| 236 | 
            +
             | 
| 237 | 
            +
             | 
| 238 | 
            +
            ==== Example 8
         | 
| 239 | 
            +
             | 
| 240 | 
            +
            The previous example but with short adapter sequences.
         | 
| 241 | 
            +
             | 
| 242 | 
            +
              genfrag search -f example.fasta --re5 BstYI --re3 MseI --adapter5 ttgtcg --adapter3 aacca --adapter5-sequence X --adapter3-sequence Z
         | 
| 243 | 
            +
             | 
| 244 | 
            +
              ---
         | 
| 245 | 
            +
              - fragment with adapters - primary strand
         | 
| 246 | 
            +
                ====XttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggttZ==
         | 
| 247 | 
            +
              - fragment with adapters - complement strand
         | 
| 248 | 
            +
                ....aaacagcctcaacaggagaatcgacacctctaccgtgagaaccactctgtggtcacgtttaggttacgttcttgtacgttattatttttctaagagtaacctttcagactacaagtacctaagtaccaaaat
         | 
| 249 | 
            +
             | 
| 250 | 
            +
             | 
| 251 | 
            +
            ==== Example 9
         | 
| 252 | 
            +
             | 
| 253 | 
            +
            Using an adapter and specifying an adapter size, these can be specified with 'adapter5-size' or 'adapter3-size'.
         | 
| 254 | 
            +
             | 
| 255 | 
            +
            You may know the size of your adapter, but not have a particular sequence in mind. The unknown nucleotides will be represented
         | 
| 256 | 
            +
            with a question mark character ('?').
         | 
| 257 | 
            +
             | 
| 258 | 
            +
                genfrag search -f example.fasta --re5 BstYI --re3 MseI --adapter5 ttgtcg --adapter3 aacca --adapter5-size 6 --adapter3-size 8
         | 
| 259 | 
            +
             | 
| 260 | 
            +
                ---
         | 
| 261 | 
            +
                - fragment with adapters - primary strand
         | 
| 262 | 
            +
                  ????????ttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggtt??????
         | 
| 263 | 
            +
                - fragment with adapters - complement strand
         | 
| 264 | 
            +
                  ===....aaacagcctcaacaggagaatcgacacctctaccgtgagaaccactctgtggtcacgtttaggttacgttcttgtacgttattatttttctaagagtaacctttcagactacaagtacctaagtaccaaaat===
         | 
| 265 | 
            +
             | 
| 266 | 
            +
             | 
| 267 | 
            +
            ==== Example 10
         | 
| 268 | 
            +
             | 
| 269 | 
            +
            The previous example but with short adapter sizes.
         | 
| 270 | 
            +
             | 
| 271 | 
            +
                genfrag search -f example.fasta --re5 BstYI --re3 MseI --adapter5 ttgtcg --adapter3 aacca --adapter5-size 1 --adapter3-size 2
         | 
| 272 | 
            +
             | 
| 273 | 
            +
              ---
         | 
| 274 | 
            +
              - fragment with adapters - primary strand
         | 
| 275 | 
            +
                ====?ttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggtt??=
         | 
| 276 | 
            +
              - fragment with adapters - complement strand
         | 
| 277 | 
            +
                ....aaacagcctcaacaggagaatcgacacctctaccgtgagaaccactctgtggtcacgtttaggttacgttcttgtacgttattatttttctaagagtaacctttcagactacaagtacctaagtaccaaaat
         | 
| 278 | 
            +
             | 
| 279 | 
            +
             | 
| 280 | 
            +
            ==== Example 11
         | 
| 281 | 
            +
             | 
| 282 | 
            +
            An exact fragmentation length can be searched for with the 'seqsize' argument.
         | 
| 283 | 
            +
             | 
| 284 | 
            +
              genfrag search -f example.fasta --re5 BstYI --re3 MseI -s 136
         | 
| 285 | 
            +
             | 
| 286 | 
            +
              ---
         | 
| 287 | 
            +
              - fragment with adapters - primary strand
         | 
| 288 | 
            +
                gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggttt..
         | 
| 289 | 
            +
              - fragment with adapters - complement strand
         | 
| 290 | 
            +
                ....aaacagcctcaacaggagaatcgacacctctaccgtgagaaccactctgtggtcacgtttaggttacgttcttgtacgttattatttttctaagagtaacctttcagactacaagtacctaagtaccaaaat
         | 
| 291 | 
            +
             | 
| 292 | 
            +
             | 
| 293 | 
            +
            ==== Example 12
         | 
| 294 | 
            +
             | 
| 295 | 
            +
            The previous example with multiple fragment result sizes accepted. Different sizes can be separated by commas.
         | 
| 296 | 
            +
             | 
| 297 | 
            +
              genfrag search -f example.fasta --re5 BstYI --re3 MseI -s 136,166
         | 
| 298 | 
            +
             | 
| 299 | 
            +
              ---
         | 
| 300 | 
            +
              - fragment with adapters - primary strand
         | 
| 301 | 
            +
                gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaattcctccttcaaaccaataaaaagattctcattggaaagtctgatgttcatggatggggtgcatttacatgggactctct..
         | 
| 302 | 
            +
              - fragment with adapters - complement strand
         | 
| 303 | 
            +
                ....aaacagcctcaacaggagaatcgacacctctaccgtgagaaccactctgtggtcacgtttaggttacgttcttgtacgttaaggaggaagtttggttatttttctaagagtaacctttcagactacaagtacctaccccacgtaaatgtaccctgagagaat
         | 
| 304 | 
            +
              ---
         | 
| 305 | 
            +
              - fragment with adapters - primary strand
         | 
| 306 | 
            +
                gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggttt..
         | 
| 307 | 
            +
              - fragment with adapters - complement strand
         | 
| 308 | 
            +
                ....aaacagcctcaacaggagaatcgacacctctaccgtgagaaccactctgtggtcacgtttaggttacgttcttgtacgttattatttttctaagagtaacctttcagactacaagtacctaagtaccaaaat
         | 
| 309 | 
            +
             | 
| 310 | 
            +
             | 
| 311 | 
            +
            ==== Example 13
         | 
| 312 | 
            +
             | 
| 313 | 
            +
            The previous example with a sequence size range accepted. Since you may only have an approximate idea of the fragment size
         | 
| 314 | 
            +
            you are expecting, you may give a range by using a plus symbol ('+') to indicate a tolerance to a size.
         | 
| 315 | 
            +
             | 
| 316 | 
            +
              genfrag search -f example.fasta --re5 BstYI --re3 MseI -s 144+10,166
         | 
| 317 | 
            +
              
         | 
| 318 | 
            +
              ---
         | 
| 319 | 
            +
              - fragment with adapters - primary strand
         | 
| 320 | 
            +
                gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaattcctccttcaaaccaataaaaagattctcattggaaagtctgatgttcatggatggggtgcatttacatgggactctct..
         | 
| 321 | 
            +
              - fragment with adapters - complement strand
         | 
| 322 | 
            +
                ....aaacagcctcaacaggagaatcgacacctctaccgtgagaaccactctgtggtcacgtttaggttacgttcttgtacgttaaggaggaagtttggttatttttctaagagtaacctttcagactacaagtacctaccccacgtaaatgtaccctgagagaat
         | 
| 323 | 
            +
              ---
         | 
| 324 | 
            +
              - fragment with adapters - primary strand
         | 
| 325 | 
            +
                gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacaccagtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggttt..
         | 
| 326 | 
            +
              - fragment with adapters - complement strand
         | 
| 327 | 
            +
                ....aaacagcctcaacaggagaatcgacacctctaccgtgagaaccactctgtggtcacgtttaggttacgttcttgtacgttattatttttctaagagtaacctttcagactacaagtacctaagtaccaaaat
         | 
| 328 | 
            +
             | 
| 329 | 
            +
             | 
| 330 | 
            +
            ==== Footnotes
         | 
| 331 | 
            +
            [1]: 
         | 
| 332 | 
            +
              require 'rubygems'
         | 
| 333 | 
            +
              require 'bio'
         | 
| 334 | 
            +
              puts Bio::Sequence::NA.new('y').to_re   # => (?-mix:[tcy])
         | 
| 335 | 
            +
             | 
| 336 | 
            +
             | 
| 36 337 | 
             
            == LICENSE:
         | 
| 37 338 |  | 
| 38 339 | 
             
            Copyright (c) 2009 Pjotr Prins and Trevor Wennblom
         | 
    
        data/lib/genfrag.rb
    CHANGED
    
    
    
        data/lib/genfrag/app.rb
    CHANGED
    
    
    
        data/lib/genfrag/app/command.rb
    CHANGED
    
    | @@ -89,9 +89,9 @@ class Command | |
| 89 89 | 
             
                      lambda { |value|
         | 
| 90 90 | 
             
                        options[:filefasta] = value
         | 
| 91 91 | 
             
                      }],
         | 
| 92 | 
            -
                  : | 
| 92 | 
            +
                  :seqsize => ['-s', '--seqsize SIZE', Array, '',
         | 
| 93 93 | 
             
                      lambda { |value|
         | 
| 94 | 
            -
                        options[: | 
| 94 | 
            +
                        options[:seqsize] = value
         | 
| 95 95 | 
             
                      }],
         | 
| 96 96 |  | 
| 97 97 | 
             
                  :adapter5 => ['-y', '--adapter5 ADAPTER', String, '',
         | 
| @@ -45,7 +45,7 @@ class SearchCommand < Command | |
| 45 45 | 
             
                ary = [:verbose, :quiet, :tracktime, :indir, :outdir, :sqlite, :re5, :re3,
         | 
| 46 46 | 
             
                  :filelookup, :filefasta, :fileadapters, :adapter5_sequence, :adapter3_sequence,
         | 
| 47 47 | 
             
                  :adapter5_size, :adapter3_size, :named_adapter5, :named_adapter3,
         | 
| 48 | 
            -
                  :adapter5, :adapter3
         | 
| 48 | 
            +
                  :adapter5, :adapter3, :seqsize
         | 
| 49 49 | 
             
                ]
         | 
| 50 50 | 
             
                ary.each { |a| opts.on(*std_opts[a]) }
         | 
| 51 51 |  | 
| @@ -54,10 +54,11 @@ class SearchCommand < Command | |
| 54 54 | 
             
                opts.on( '-h', '--help', 'show this message' ) { @out.puts opts; exit }
         | 
| 55 55 |  | 
| 56 56 | 
             
                opts.separator '  Examples:'
         | 
| 57 | 
            +
                opts.separator '    genfrag search -f example.fasta --re5 BstYI --re3 MseI -v'
         | 
| 57 58 | 
             
                opts.separator '    genfrag search -f example.fasta --re5 BstYI --re3 MseI --adapter5 tt'
         | 
| 58 59 | 
             
                opts.separator '    genfrag search -f example.fasta --re5 BstYI --re3 MseI --add 26 --adapter5 ct --adapter3 aa --size 190,215'
         | 
| 59 60 | 
             
                opts.separator '    genfrag search -f example.fasta --re5 BstYI --re3 MseI --adapter5-size 11 --adapter5 tt --adapter3-size 15 --size 168'
         | 
| 60 | 
            -
                opts.separator '    genfrag search -f example.fasta --re5 BstYI --re3 MseI --adapter5-sequence GACTGCGTAGTGATC --adapter5 tt -- | 
| 61 | 
            +
                opts.separator '    genfrag search -f example.fasta --re5 BstYI --re3 MseI --adapter5-sequence GACTGCGTAGTGATC --adapter5 tt --size 168'
         | 
| 61 62 | 
             
                opts.separator '    genfrag search -f example.fasta --re5 BstYI --re3 MseI --adapter5-size 11 --adapter5 ct --adapter3-size 15 --adapter3 aa --size 190,215'
         | 
| 62 63 | 
             
                opts.separator '    genfrag search -f example.fasta --re5 BstYI --re3 MseI --add 26 --named-adapter5 BstYI-T4 --named-adapter3 MseI-21 --size 190,215'
         | 
| 63 64 | 
             
                opts
         | 
| @@ -122,7 +123,7 @@ class SearchCommand < Command | |
| 122 123 | 
             
                @ops.sqlite         ||= false
         | 
| 123 124 | 
             
                @ops.re5            ||= nil
         | 
| 124 125 | 
             
                @ops.re3            ||= nil
         | 
| 125 | 
            -
                @ops. | 
| 126 | 
            +
                @ops.seqsize        ||= [0]
         | 
| 126 127 | 
             
                @ops.adapter5_size  ||= nil
         | 
| 127 128 | 
             
                @ops.adapter3_size  ||= nil
         | 
| 128 129 | 
             
                @ops.adapter5       ||= nil
         | 
| @@ -158,6 +159,37 @@ END | |
| 158 159 | 
             
                  raise ArgumentError, "Must specify --fileadapters when using a named_adapter"
         | 
| 159 160 | 
             
                end
         | 
| 160 161 |  | 
| 162 | 
            +
                if @ops.adapter5_size and @ops.adapter5_sequence
         | 
| 163 | 
            +
                  raise ArgumentError, '--adapter5-sequence and --adapter5-size both supplied, may only have one'
         | 
| 164 | 
            +
                end
         | 
| 165 | 
            +
                if @ops.adapter3_size and @ops.adapter3_sequence
         | 
| 166 | 
            +
                  raise ArgumentError, '--adapter3-sequence and --adapter3-size both supplied, may only have one'
         | 
| 167 | 
            +
                end
         | 
| 168 | 
            +
                
         | 
| 169 | 
            +
                if (@ops.adapter5_sequence or @ops.adapter5_size) and !@ops.adapter5
         | 
| 170 | 
            +
                  raise ArgumentError, '--adapter5 missing in presence of --adapter5-sequence or --adapter5-size'
         | 
| 171 | 
            +
                end
         | 
| 172 | 
            +
                if (@ops.adapter3_sequence or @ops.adapter3_size) and !@ops.adapter3
         | 
| 173 | 
            +
                  raise ArgumentError, '--adapter3 missing in presence of --adapter3-sequence or --adapter3-size'
         | 
| 174 | 
            +
                end
         | 
| 175 | 
            +
                
         | 
| 176 | 
            +
                if [@ops.seqsize].flatten == [0] or [@ops.seqsize].flatten == [nil] or [@ops.seqsize].flatten == ['0']
         | 
| 177 | 
            +
                  @ops.seqsize = nil
         | 
| 178 | 
            +
                else
         | 
| 179 | 
            +
                  h = {:ranges => [], :ints => []}
         | 
| 180 | 
            +
                  @ops.seqsize.flatten.each do |s|
         | 
| 181 | 
            +
                    if s.include?('+')
         | 
| 182 | 
            +
                      a = s.split('+')
         | 
| 183 | 
            +
                      c = a[0].to_i
         | 
| 184 | 
            +
                      r = a[1].to_i
         | 
| 185 | 
            +
                      h[:ranges] << (c-r..c+r)
         | 
| 186 | 
            +
                    else
         | 
| 187 | 
            +
                      h[:ints] << s.to_i
         | 
| 188 | 
            +
                    end
         | 
| 189 | 
            +
                  end
         | 
| 190 | 
            +
                  @ops.seqsize = h
         | 
| 191 | 
            +
                end
         | 
| 192 | 
            +
                
         | 
| 161 193 | 
             
                if processed_adapters
         | 
| 162 194 | 
             
                  adapter_setup_1(processed_adapters)
         | 
| 163 195 | 
             
                else
         | 
| @@ -171,13 +203,6 @@ END | |
| 171 203 | 
             
                  seq3 = Bio::Sequence::NA.new(@adapters[:adapter3_specificity][1..-1]).downcase
         | 
| 172 204 | 
             
                  @adapters[:adapter3_specificity] = seq3.complement.to_s
         | 
| 173 205 | 
             
                end
         | 
| 174 | 
            -
                
         | 
| 175 | 
            -
                if @ops.adapter5_size and @ops.adapter5_sequence and (@ops.adapter5_size != @adapters[:adapter5_size])
         | 
| 176 | 
            -
                  raise ArgumentError, "--adapter5-sequence and --adapter5-size both supplied"
         | 
| 177 | 
            -
                end
         | 
| 178 | 
            -
                if @ops.adapter3_size and @ops.adapter3_sequence and (@ops.adapter3_size != @adapters[:adapter3_size])
         | 
| 179 | 
            -
                  raise ArgumentError, "--adapter3-sequence and --adapter3-size both supplied"
         | 
| 180 | 
            -
                end
         | 
| 181 206 |  | 
| 182 207 | 
             
                @trim = calculate_trim_for_nucleotides(@re5_ds, @re3_ds)
         | 
| 183 208 |  | 
| @@ -185,11 +210,10 @@ END | |
| 185 210 | 
             
              # Start calculations
         | 
| 186 211 | 
             
              #
         | 
| 187 212 | 
             
                left_trim, right_trim = calculate_left_and_right_trims(@trim)
         | 
| 188 | 
            -
             | 
| 189 | 
            -
                matching_fragments = find_matching_fragments(@sizes, left_trim, right_trim)
         | 
| 213 | 
            +
                
         | 
| 190 214 | 
             
                results = []
         | 
| 191 215 |  | 
| 192 | 
            -
                 | 
| 216 | 
            +
                @sizes.values.each do |hit|
         | 
| 193 217 | 
             
                  hit.each do |entry|
         | 
| 194 218 | 
             
                    seq = @sequences[entry[:fasta_id]][:sequence]
         | 
| 195 219 | 
             
                    raw_frag = seq[entry[:offset]..(entry[:offset]+entry[:raw_size]-1)]
         | 
| @@ -199,7 +223,7 @@ END | |
| 199 223 | 
             
                    p = primary_frag.dup
         | 
| 200 224 | 
             
                    c = complement_frag.dup
         | 
| 201 225 |  | 
| 202 | 
            -
                  # note the next two if-statements at this  | 
| 226 | 
            +
                  # note the next two if-statements at this level chain together with 'p' and 'c'
         | 
| 203 227 | 
             
                    if @adapters[:adapter5_specificity]
         | 
| 204 228 | 
             
                      p, c = matches_adapter(5, p, c, raw_frag, @trim)
         | 
| 205 229 | 
             
                      next if !p  # next if returned false -- no match
         | 
| @@ -212,8 +236,23 @@ END | |
| 212 236 |  | 
| 213 237 | 
             
                    primary_frag_with_adapters = p
         | 
| 214 238 | 
             
                    complement_frag_with_adapters = c
         | 
| 239 | 
            +
                            
         | 
| 240 | 
            +
                    if @ops.seqsize
         | 
| 241 | 
            +
                      primary_frag_with_adapters_size = primary_frag_with_adapters.size
         | 
| 242 | 
            +
                      good = false
         | 
| 243 | 
            +
                      if @ops.seqsize[:ints].include?(primary_frag_with_adapters_size)
         | 
| 244 | 
            +
                        good = true
         | 
| 245 | 
            +
                      else
         | 
| 246 | 
            +
                        @ops.seqsize[:ranges].each do |range|
         | 
| 247 | 
            +
                          good = true if range.include?(primary_frag_with_adapters_size)
         | 
| 248 | 
            +
                          break if good
         | 
| 249 | 
            +
                        end
         | 
| 250 | 
            +
                      end
         | 
| 251 | 
            +
                    # next if fragment size not in range
         | 
| 252 | 
            +
                      next if !good
         | 
| 253 | 
            +
                    end
         | 
| 215 254 |  | 
| 216 | 
            -
                    results << {:raw_frag => raw_frag, :primary_frag => primary_frag, :primary_frag_with_adapters => primary_frag_with_adapters, :complement_frag => complement_frag, :complement_frag_with_adapters => complement_frag_with_adapters, :entry => entry, :seq => seq} | 
| 255 | 
            +
                    results << {:raw_frag => raw_frag, :primary_frag => primary_frag, :primary_frag_with_adapters => primary_frag_with_adapters, :complement_frag => complement_frag, :complement_frag_with_adapters => complement_frag_with_adapters, :entry => entry, :seq => seq}
         | 
| 217 256 | 
             
                  end
         | 
| 218 257 | 
             
                end
         | 
| 219 258 |  | 
| @@ -26,6 +26,7 @@ class SearchCommand < Command | |
| 26 26 | 
             
                  primary_frag =~ /(\.*)/
         | 
| 27 27 | 
             
                  dots_on_primary = $1.size
         | 
| 28 28 | 
             
                  lead_in = tail.size + dots_on_primary
         | 
| 29 | 
            +
             | 
| 29 30 | 
             
                  return false if primary_frag[ lead_in .. -1 ].tr('.', '') !~ /^#{adapter_specificity}/i
         | 
| 30 31 |  | 
| 31 32 | 
             
                elsif five_or_three == 3
         | 
| @@ -54,70 +55,41 @@ class SearchCommand < Command | |
| 54 55 | 
             
                  raise "First argument to matches_adapter must be a '5' or a '3'. Received: #{five_or_three.inspect}"
         | 
| 55 56 | 
             
                end
         | 
| 56 57 |  | 
| 57 | 
            -
                #return false if raw_frag[ [trim_primary, trim_complement].max .. -1 ] !~ /^#{adapter_specificity}/i
         | 
| 58 | 
            -
             | 
| 59 | 
            -
                #overhang = [trim_primary, trim_complement].max - [trim_primary, trim_complement].min
         | 
| 60 | 
            -
             | 
| 61 | 
            -
                #lead_in = overhang
         | 
| 62 | 
            -
             | 
| 63 58 | 
             
                if adapter_sequence
         | 
| 64 | 
            -
             | 
| 65 | 
            -
             | 
| 66 | 
            -
              #      if lead_in >= adapter_sequence.size
         | 
| 67 | 
            -
              #        # need to preserve dots on primary string
         | 
| 68 | 
            -
              #        new_primary_frag = ('.' * (lead_in - adapter_sequence.size)) + adapter_sequence + primary_frag[ lead_in .. -1 ]
         | 
| 69 | 
            -
              #        new_complement_frag = complement_frag
         | 
| 70 | 
            -
              #      else
         | 
| 71 | 
            -
              #        # need to add dots to beginning of complement string
         | 
| 72 | 
            -
              #        new_primary_frag = adapter_sequence + primary_frag[ lead_in .. -1 ]
         | 
| 73 | 
            -
              #        new_complement_frag = ('.' * (adapter_sequence.size - lead_in) ) + complement_frag
         | 
| 74 | 
            -
              #      end
         | 
| 75 | 
            -
             | 
| 59 | 
            +
                # adapter-sequence supplied
         | 
| 60 | 
            +
                  new_primary_frag, new_complement_frag = preserve_or_add(adapter_sequence.size, lead_in, adapter_sequence, primary_frag, complement_frag)
         | 
| 76 61 | 
             
                elsif adapter_size
         | 
| 77 | 
            -
             | 
| 78 | 
            -
             | 
| 79 | 
            -
              #      # only the size and the specificity of the adapter has been provided
         | 
| 80 | 
            -
              #      size_of_specificity = adapter_specificity.size
         | 
| 81 | 
            -
              #      size_of_sequence    = adapter_size - size_of_specificity
         | 
| 82 | 
            -
              #      if lead_in >= size_of_sequence
         | 
| 83 | 
            -
              #        # need to preserve dots on primary string
         | 
| 84 | 
            -
              #        new_primary_frag = primary_frag[ 0 .. (lead_in - 1) ].upcase + primary_frag[ lead_in .. -1 ]
         | 
| 85 | 
            -
              #        new_complement_frag = complement_frag
         | 
| 86 | 
            -
              #      else
         | 
| 87 | 
            -
              #        # need to add dots to beginning of complement string
         | 
| 88 | 
            -
              #        new_primary_frag = ('+' * (size_of_sequence - lead_in) ) + primary_frag[ 0 .. (lead_in - 1) ].upcase + primary_frag[ lead_in .. -1 ]
         | 
| 89 | 
            -
              #        new_complement_frag = ('.' * (size_of_sequence - lead_in) ) + complement_frag
         | 
| 90 | 
            -
              #      end
         | 
| 91 | 
            -
             | 
| 62 | 
            +
                # adapter-size supplied
         | 
| 63 | 
            +
                  new_primary_frag, new_complement_frag = preserve_or_add(adapter_size, lead_in, adapter_sequence, primary_frag, complement_frag)
         | 
| 92 64 | 
             
                else
         | 
| 93 65 | 
             
                # only the specificity has been provided
         | 
| 94 66 | 
             
                  new_primary_frag = ('.' * dots_on_primary) + ('+' * tail.size) + primary_frag[ lead_in .. -1 ]
         | 
| 95 67 | 
             
                  new_complement_frag = complement_frag
         | 
| 96 | 
            -
             | 
| 97 68 | 
             
                end
         | 
| 98 69 |  | 
| 99 70 | 
             
                if five_or_three == 3
         | 
| 100 | 
            -
                  new_primary_frag.reverse | 
| 101 | 
            -
             | 
| 71 | 
            +
                  return [new_primary_frag.reverse, new_complement_frag.reverse]
         | 
| 72 | 
            +
                else
         | 
| 73 | 
            +
                  return [new_primary_frag, new_complement_frag]
         | 
| 102 74 | 
             
                end
         | 
| 103 | 
            -
             | 
| 104 | 
            -
                return [new_primary_frag, new_complement_frag]
         | 
| 105 75 | 
             
              end
         | 
| 106 76 |  | 
| 107 77 |  | 
| 78 | 
            +
            =begin
         | 
| 108 79 | 
             
            # Find the fragments that match the search parameters
         | 
| 109 80 | 
             
            #
         | 
| 110 81 | 
             
              def find_matching_fragments(sizes, left, right)
         | 
| 111 82 | 
             
                hits=[]
         | 
| 83 | 
            +
                
         | 
| 112 84 | 
             
                s = (@adapters[:adapter5_size] or 0) + (@adapters[:adapter3_size] or 0)
         | 
| 113 85 |  | 
| 114 | 
            -
                if [@ops. | 
| 86 | 
            +
                if [@ops.seqsize].flatten == [0] or [@ops.seqsize].flatten == [nil] or [@ops.seqsize].flatten == ['0']
         | 
| 115 87 | 
             
                  sizes.each do |raw_size, info|
         | 
| 116 88 | 
             
                    hits << info
         | 
| 117 89 | 
             
                  end
         | 
| 118 90 |  | 
| 119 91 | 
             
                else
         | 
| 120 | 
            -
                  [@ops. | 
| 92 | 
            +
                  [@ops.seqsize].flatten.each do |seek_size|
         | 
| 121 93 | 
             
                    seek_size = seek_size.to_i
         | 
| 122 94 | 
             
                    sizes.each do |raw_size, info|
         | 
| 123 95 | 
             
                      frag_size = raw_size - left[:trim_from_both] - right[:trim_from_both]
         | 
| @@ -130,6 +102,7 @@ class SearchCommand < Command | |
| 130 102 |  | 
| 131 103 | 
             
                return hits
         | 
| 132 104 | 
             
              end
         | 
| 105 | 
            +
            =end
         | 
| 133 106 |  | 
| 134 107 | 
             
              def right_tail_of(s)
         | 
| 135 108 | 
             
              # 'PpiI' => "n n n n n n^n n n n n n n g a a c n n n n n c t c n n n n n n n n n n n n n^n"
         | 
| @@ -157,6 +130,23 @@ class SearchCommand < Command | |
| 157 130 | 
             
                end
         | 
| 158 131 |  | 
| 159 132 | 
             
              end
         | 
| 133 | 
            +
              
         | 
| 134 | 
            +
              def preserve_or_add(size, lead_in, adapter_sequence, primary_frag, complement_frag)
         | 
| 135 | 
            +
                if adapter_sequence.nil? or adapter_sequence.empty?
         | 
| 136 | 
            +
                  adapter_sequence = '?' * size
         | 
| 137 | 
            +
                end
         | 
| 138 | 
            +
                
         | 
| 139 | 
            +
                if lead_in >= size
         | 
| 140 | 
            +
                # need to preserve dots on primary string
         | 
| 141 | 
            +
                  p = ('=' * (lead_in - size)) + adapter_sequence + primary_frag[ lead_in .. -1 ]
         | 
| 142 | 
            +
                  c = complement_frag
         | 
| 143 | 
            +
                else
         | 
| 144 | 
            +
                # need to add dots to beginning of complement string
         | 
| 145 | 
            +
                  p = adapter_sequence + primary_frag[ lead_in .. -1 ]
         | 
| 146 | 
            +
                  c = ('=' * (size - lead_in) ) + complement_frag
         | 
| 147 | 
            +
                end
         | 
| 148 | 
            +
                [p,c]
         | 
| 149 | 
            +
              end
         | 
| 160 150 |  | 
| 161 151 | 
             
            end  # class SearchCommand
         | 
| 162 152 | 
             
            end  # class App
         | 
| @@ -0,0 +1,24 @@ | |
| 1 | 
            +
             | 
| 2 | 
            +
            require File.expand_path(
         | 
| 3 | 
            +
                File.join(File.dirname(__FILE__), %w[.. .. .. spec_helper]))
         | 
| 4 | 
            +
                
         | 
| 5 | 
            +
            # --------------------------------------------------------------------------
         | 
| 6 | 
            +
            describe "Genfrag::App::PredictorCommand" do
         | 
| 7 | 
            +
             | 
| 8 | 
            +
              it "should receive a resultset from the search command"
         | 
| 9 | 
            +
             | 
| 10 | 
            +
              it "should receive what predictor method to use"
         | 
| 11 | 
            +
             | 
| 12 | 
            +
              describe "test adjusted sizes with a resultset" do
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                it "should calculate a p-value for every match"
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                it "should return p-values and flags for outlier values"
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                it "should optionally return the classifiers"
         | 
| 19 | 
            +
             | 
| 20 | 
            +
              end
         | 
| 21 | 
            +
              
         | 
| 22 | 
            +
            end
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            # EOF
         | 
    
        data/spec/genfrag/app_spec.rb
    CHANGED
    
    | @@ -58,9 +58,8 @@ describe Genfrag::App do | |
| 58 58 | 
             
              end
         | 
| 59 59 |  | 
| 60 60 | 
             
              it 'should report an error for unrecognized commands' do
         | 
| 61 | 
            -
                 | 
| 62 | 
            -
                @err.readline.should == ' | 
| 63 | 
            -
                @err.readline.should == '    Unknown command "bad_func"'
         | 
| 61 | 
            +
                @app.cli_run %w[bad_func]
         | 
| 62 | 
            +
                @err.readline.should == 'Unknown command "bad_func"'
         | 
| 64 63 | 
             
              end
         | 
| 65 64 |  | 
| 66 65 | 
             
              it 'should report a version number' do
         | 
    
        data/spec/genfrag_spec.rb
    CHANGED
    
    
    
        data/tasks/rdoc.rake
    CHANGED
    
    | @@ -19,10 +19,11 @@ namespace :doc do | |
| 19 19 | 
             
                        end
         | 
| 20 20 | 
             
                rd.rdoc_files.push(*files)
         | 
| 21 21 |  | 
| 22 | 
            -
                 | 
| 23 | 
            -
             | 
| 22 | 
            +
                name    = PROJ.name
         | 
| 24 23 | 
             
                rf_name = PROJ.rubyforge.name
         | 
| 25 | 
            -
             | 
| 24 | 
            +
             | 
| 25 | 
            +
                title = "#{name}-#{PROJ.version} Documentation"
         | 
| 26 | 
            +
                title = "#{rf_name}'s " + title if rf_name.valid? and rf_name != name
         | 
| 26 27 |  | 
| 27 28 | 
             
                rd.options << "-t #{title}"
         | 
| 28 29 | 
             
                rd.options.concat(rdoc.opts)
         | 
    
        data/tasks/setup.rb
    CHANGED
    
    | @@ -6,7 +6,7 @@ require 'fileutils' | |
| 6 6 | 
             
            require 'ostruct'
         | 
| 7 7 | 
             
            require 'find'
         | 
| 8 8 |  | 
| 9 | 
            -
            class OpenStruct; undef :gem; end
         | 
| 9 | 
            +
            class OpenStruct; undef :gem if defined? :gem; end 
         | 
| 10 10 |  | 
| 11 11 | 
             
            # TODO: make my own openstruct type object that includes descriptions
         | 
| 12 12 | 
             
            # TODO: use the descriptions to output help on the available bones options
         | 
| @@ -124,9 +124,7 @@ import(*rakefiles) | |
| 124 124 | 
             
            %w(lib ext).each {|dir| PROJ.libs << dir if test ?d, dir}
         | 
| 125 125 |  | 
| 126 126 | 
             
            # Setup some constants
         | 
| 127 | 
            -
             | 
| 128 | 
            -
             | 
| 129 | 
            -
            DEV_NULL = WIN32 ? 'NUL:' : '/dev/null'
         | 
| 127 | 
            +
            DEV_NULL = File.exist?('/dev/null') ? '/dev/null' : 'NUL:'
         | 
| 130 128 |  | 
| 131 129 | 
             
            def quiet( &block )
         | 
| 132 130 | 
             
              io = [STDOUT.dup, STDERR.dup]
         | 
| @@ -139,21 +137,15 @@ ensure | |
| 139 137 | 
             
              $stdout, $stderr = STDOUT, STDERR
         | 
| 140 138 | 
             
            end
         | 
| 141 139 |  | 
| 142 | 
            -
            DIFF = if  | 
| 143 | 
            -
                   else
         | 
| 144 | 
            -
             | 
| 145 | 
            -
             | 
| 146 | 
            -
                   end unless defined?  | 
| 147 | 
            -
             | 
| 148 | 
            -
             | 
| 149 | 
            -
             | 
| 150 | 
            -
             | 
| 151 | 
            -
                     else '' end
         | 
| 152 | 
            -
                   end
         | 
| 153 | 
            -
             | 
| 154 | 
            -
            RCOV = WIN32 ? 'rcov.bat' : 'rcov'
         | 
| 155 | 
            -
            RDOC = WIN32 ? 'rdoc.bat' : 'rdoc'
         | 
| 156 | 
            -
            GEM  = WIN32 ? 'gem.bat'  : 'gem'
         | 
| 140 | 
            +
            DIFF = if system("gdiff '#{__FILE__}' '#{__FILE__}' > #{DEV_NULL} 2>&1") then 'gdiff'
         | 
| 141 | 
            +
                   else 'diff' end unless defined? DIFF
         | 
| 142 | 
            +
             | 
| 143 | 
            +
            SUDO = if system("which sudo > #{DEV_NULL} 2>&1") then 'sudo'
         | 
| 144 | 
            +
                   else '' end unless defined? SUDO
         | 
| 145 | 
            +
             | 
| 146 | 
            +
            RCOV = "#{RUBY} -S rcov"
         | 
| 147 | 
            +
            RDOC = "#{RUBY} -S rdoc"
         | 
| 148 | 
            +
            GEM  = "#{RUBY} -S gem"
         | 
| 157 149 |  | 
| 158 150 | 
             
            %w(rcov spec/rake/spectask rubyforge bones facets/ansicode).each do |lib|
         | 
| 159 151 | 
             
              begin
         | 
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification 
         | 
| 2 2 | 
             
            name: genfrag
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version 
         | 
| 4 | 
            -
              version: 0.0.0. | 
| 4 | 
            +
              version: 0.0.0.2
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors: 
         | 
| 7 7 | 
             
            - Pjotr Prins and Trevor Wennblom
         | 
| @@ -9,7 +9,7 @@ autorequire: | |
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 11 |  | 
| 12 | 
            -
            date: 2009- | 
| 12 | 
            +
            date: 2009-03-16 00:00:00 -05:00
         | 
| 13 13 | 
             
            default_executable: 
         | 
| 14 14 | 
             
            dependencies: 
         | 
| 15 15 | 
             
            - !ruby/object:Gem::Dependency 
         | 
| @@ -40,9 +40,9 @@ dependencies: | |
| 40 40 | 
             
                requirements: 
         | 
| 41 41 | 
             
                - - ">="
         | 
| 42 42 | 
             
                  - !ruby/object:Gem::Version 
         | 
| 43 | 
            -
                    version: 2.4. | 
| 43 | 
            +
                    version: 2.4.2
         | 
| 44 44 | 
             
                version: 
         | 
| 45 | 
            -
            description: This is a development release.  | 
| 45 | 
            +
            description: This is a development release. Some features are functional at this time.  Genfrag allows for rapid in-silico searching of fragments cut by different restriction enzymes in large nucleotide acid databases, followed by matching specificity adapters which allow a further data reduction when looking for differential expression of genes and markers.
         | 
| 46 46 | 
             
            email: trevor@corevx.com
         | 
| 47 47 | 
             
            executables: 
         | 
| 48 48 | 
             
            - genfrag
         | 
| @@ -82,6 +82,7 @@ files: | |
| 82 82 | 
             
            - spec/genfrag/app/command_spec.rb
         | 
| 83 83 | 
             
            - spec/genfrag/app/index_command_spec.rb
         | 
| 84 84 | 
             
            - spec/genfrag/app/search_command/match_spec.rb
         | 
| 85 | 
            +
            - spec/genfrag/app/search_command/predictor_spec.rb
         | 
| 85 86 | 
             
            - spec/genfrag/app/search_command/process_file_spec.rb
         | 
| 86 87 | 
             
            - spec/genfrag/app/search_command/trim_spec.rb
         | 
| 87 88 | 
             
            - spec/genfrag/app/search_command_spec.rb
         |