bio-ngs 0.3.2.alpha.01

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. data/.document +5 -0
  2. data/Gemfile +39 -0
  3. data/Gemfile.lock +81 -0
  4. data/LICENSE.txt +28 -0
  5. data/README.rdoc +240 -0
  6. data/Rakefile +60 -0
  7. data/VERSION +1 -0
  8. data/bin/biongs +35 -0
  9. data/bio-ngs.gemspec +215 -0
  10. data/ext/mkrf_conf.rb +87 -0
  11. data/lib/bio-ngs.rb +54 -0
  12. data/lib/bio/appl/ngs/bcl2qseq.rb +93 -0
  13. data/lib/bio/appl/ngs/blast.rb +36 -0
  14. data/lib/bio/appl/ngs/bowtie-inspect.rb +50 -0
  15. data/lib/bio/appl/ngs/cufflinks.rb +489 -0
  16. data/lib/bio/appl/ngs/fastx.rb +170 -0
  17. data/lib/bio/appl/ngs/samtools.rb +118 -0
  18. data/lib/bio/appl/ngs/sff_extract.rb +23 -0
  19. data/lib/bio/appl/ngs/tophat.rb +158 -0
  20. data/lib/bio/ngs/converter.rb +100 -0
  21. data/lib/bio/ngs/core_ext.rb +12 -0
  22. data/lib/bio/ngs/db.rb +66 -0
  23. data/lib/bio/ngs/db/migrate/homology/201105030707_create_blastout.rb +22 -0
  24. data/lib/bio/ngs/db/migrate/homology/201105030709_create_goannotation.rb +29 -0
  25. data/lib/bio/ngs/db/migrate/ontology/201105030708_create_go.rb +18 -0
  26. data/lib/bio/ngs/db/migrate/ontology/201105030710_create_gene_go.rb +17 -0
  27. data/lib/bio/ngs/db/migrate/ontology/201105030711_create_gene.rb +16 -0
  28. data/lib/bio/ngs/db/models.rb +1 -0
  29. data/lib/bio/ngs/db/models/homology.rb +8 -0
  30. data/lib/bio/ngs/db/models/ontology.rb +16 -0
  31. data/lib/bio/ngs/ext/bin/common/fastq_coverage_graph.sh +161 -0
  32. data/lib/bio/ngs/ext/bin/common/sff_extract +1505 -0
  33. data/lib/bio/ngs/ext/bin/linux/samtools +0 -0
  34. data/lib/bio/ngs/ext/bin/osx/samtools +0 -0
  35. data/lib/bio/ngs/ext/versions.yaml +73 -0
  36. data/lib/bio/ngs/graphics.rb +189 -0
  37. data/lib/bio/ngs/homology.rb +102 -0
  38. data/lib/bio/ngs/ontology.rb +103 -0
  39. data/lib/bio/ngs/quality.rb +64 -0
  40. data/lib/bio/ngs/record.rb +50 -0
  41. data/lib/bio/ngs/task.rb +46 -0
  42. data/lib/bio/ngs/utils.rb +176 -0
  43. data/lib/development_tasks.rb +34 -0
  44. data/lib/enumerable.rb +37 -0
  45. data/lib/tasks/bwa.thor +126 -0
  46. data/lib/tasks/convert.thor +454 -0
  47. data/lib/tasks/history.thor +51 -0
  48. data/lib/tasks/homology.thor +121 -0
  49. data/lib/tasks/ontology.thor +93 -0
  50. data/lib/tasks/project.thor +51 -0
  51. data/lib/tasks/quality.thor +142 -0
  52. data/lib/tasks/rna.thor +126 -0
  53. data/lib/tasks/sff_extract.thor +9 -0
  54. data/lib/templates/README.tt +43 -0
  55. data/lib/templates/db.tt +6 -0
  56. data/lib/wrapper.rb +225 -0
  57. data/spec/converter_qseq_spec.rb +56 -0
  58. data/spec/fixture/s_1_1_1108_qseq.txt +100 -0
  59. data/spec/quality_spec.rb +40 -0
  60. data/spec/sff_extract_spec.rb +98 -0
  61. data/spec/spec_helper.rb +55 -0
  62. data/spec/tophat_spec.rb +99 -0
  63. data/spec/utils_spec.rb +22 -0
  64. data/test/conf/test_db.yml +4 -0
  65. data/test/data/blastoutput.xml +69 -0
  66. data/test/data/gene-GO.json +1 -0
  67. data/test/data/goa_uniprot +27 -0
  68. data/test/data/goslim_goa.obo +1763 -0
  69. data/test/helper.rb +18 -0
  70. data/test/test_bio-ngs.rb +17 -0
  71. data/test/test_db.rb +21 -0
  72. data/test/test_homology.rb +102 -0
  73. data/test/test_ngs.rb +21 -0
  74. data/test/test_ontology.rb +74 -0
  75. data/test/test_utils.rb +29 -0
  76. metadata +460 -0
@@ -0,0 +1,100 @@
1
+ H125 98 1 1108 1586 1989 CGATGT 1 CAGA.C.................A.....GAATGGCATGGATCAAGAAAATCCCCCTTGTGAAGAAGAATCAGCAG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
2
+ H125 98 1 1108 1879 1986 CGATGT 1 CACA...................C.....GAACCTTTATGAGCCGGCTGCCATCTAGTTTGACGCGGATTCTCTTG ^^^^BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
3
+ H125 98 1 1108 1786 1997 CGATGT 1 CGCAGA.T.G.............C.A.TCTGATATGAAACATTGGCCCTTTATGGTGGTGAATGATGCTGGCAGGC _____\B]B]BBBBBBBBBBBBBTBXBTXZ^]]]^_____`___`__`\`__________^`___Y^^^^`^___B 0
4
+ H125 98 1 1108 2174 1996 CGATGT 1 CTCACA.A...............T.T..ATTCTGTGTTTTATCAAAAAGCAAAGATATTCTCACCTTCACCTTGCT _^_^_UBSBBBBBBBBBBBBBBBYBRBBUVTYQUT___U^^`^______^_________Y_______\___X___Y 0
5
+ H125 98 1 1108 2322 1999 CGATGT 1 GCTGGG.TTAC...........CC.T.GCCTTAGTTCTTATTTCAAGGAAAAGCTGCTAGAAAGGGAAACCATGAG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
6
+ H125 98 1 1108 2619 1977 CGATGT 1 CAG...........................CCAGCAGGACACCGCCTGCAGAAAGGACCTGCCCTGATAATGTCCC ]OTBBBBBBBBBBBBBBBBBBBBBBBBBBBXVR[Z_`_____Y__`_____`_____`_`_`________```_Y_ 0
7
+ H125 98 1 1108 2850 1986 CGATGT 1 CAAA...................G.....GGTGTGGCCATGAATCCTGTGGAGCATCCTTTTGGAGGTGGCAACCA BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
8
+ H125 98 1 1108 2828 1997 CGATGT 1 GTGATA.G.T.............C.G.GGCAAGGGGGGTTGGAGGAGCATGAAGCCCCCAATATTATGTAGGTCAC ]_^BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
9
+ H125 98 1 1108 3107 1977 CGATGT 1 CGG...........................GGGCTTAAAAAAAAAAAAAAAAAAAAAACCCATTTTTTTGCTAACG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
10
+ H125 98 1 1108 3138 1994 CGATGT 1 CTCT.C.T...............A....GCAAAGACAGCAGGCAAGTCATTCTCCAAAGAAAGTGTAAGAGTGCCC ^^^^B[B[BBBBBBBBBBBBBBBTBBBBYZ[][X]______________`____`_____^^`_____\_`__^__ 0
11
+ H125 98 1 1108 3326 1979 CGATGT 1 ATT...........................AATGAAAAGATTGCATATAATGCTGCATCTGTTAGAAACAGAACTA WWWBBBBBBBBBBBBBBBBBBBBBBBBBBBPRVTP____^X`______^_______^_______`\`________S 0
12
+ H125 98 1 1108 3444 1988 CGATGT 1 CGAG...................T.....AGAATTGCATCGTGCTCATCGACAGCACACCGTACCGACAGTGGTAC Z]]ZBBBBBBBBBBBBBBBBBBB[BBBBBRQWFXT__^____^_`_______^_______^WZ_]^\^^^W^\V__ 0
13
+ H125 98 1 1108 3529 1974 CGATGT 1 GTA...........................GCGGACACGGACATGGGT.AA.ACAC.CTGCCTCATGGGGAAACCT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
14
+ H125 98 1 1108 3620 1995 CGATGT 1 CTCAGA.G...............A....TTCAAGAGGCTAGCCCCAGCCAAGAAGGCCCCCTCAGCCAAGGGGGCA ``___SBYBBBBBBBBBBBBBBBRBBBBXPUWVPU___YYY^^GZOTSXS_Y__YZH[]^^M]YU^``^T^VZVXW 0
15
+ H125 98 1 1108 3845 1975 CGATGT 1 CAC...........................GCTGAAAGCCTAGGGGATGGA.AAGT.GCAGGCAAGCCCCGGGGCG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
16
+ H125 98 1 1108 4403 1983 CGATGT 1 CTT....................A.....ATGGACATAAACAAACTTTATTGCACACGACTATTGTGAGGATAAAG ]ZWBBBBBBBBBBBBBBBBBBBBPBBBBBPV\VLR_____`_^`^__^__^^___^__Y_`____`_`U`\_____ 0
17
+ H125 98 1 1108 4424 1985 CGATGT 1 CCCA...................G.....GTGGTCAGGATCCACTGTGGCTGTGAGTCCCTCCCATCCTGTGGCAC ]X]YBBBBBBBBBBBBBBBBBBBRBBBBBXTW\U[`\______________^Y^\]VY^^__^^N`]_BBBBBBBB 0
18
+ H125 98 1 1108 4718 1991 CGATGT 1 TCCC.T.................T....CAACAACTGAAAACGGATGAGGCCAGACTGACTGAAGGGCCCAAGCCA ^^Z^B[BBBBBBBBBBBBBBBBBRBBBBWWXZSXU______________Y____^_^^^______^____Y__T_Y 0
19
+ H125 98 1 1108 4958 1978 CGATGT 1 GTG...........................ATTAGCCGGGCATGGTAGCGGGCGCCTGTAGTTACAGCTACTCTGG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
20
+ H125 98 1 1108 4916 1983 CGATGT 1 CCT....................G.....TCCATTTACTTCTTCCCATCCTGGGACCTGCCAGGGCAGCAATCTCT ]][BBBBBBBBBBBBBBBBBBBBRBBBBBRZWWX]_`_______`_______\_______^__\_`____`_`___ 0
21
+ H125 98 1 1108 4759 1998 CGATGT 1 CAAAAT.A.A............CC.C.ACATGGGTCTATTTTGTGCTTAAAAATAATTTAAAAATCATACAATATT ``___]B]B]BBBBBBBBBBBBY]B[B[[U\^^^Z_`___`^^^`_``__`___^^[______________\____ 0
22
+ H125 98 1 1108 5672 1976 CGATGT 1 AAA...........................AAAAGGACTTTGAAGTTTATCAAGTAGGGAAGAAAAATGTTGCTTT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
23
+ H125 98 1 1108 5522 1979 CGATGT 1 AGA...........................TCACCGATGTATCTCGTATGCCGTCTTCTGCTTGAAAAAACAAATT VTWBBBBBBBBBBBBBBBBBBBBBBBBBBBXZY[W_Y___^YY^^]ZZW^\_\_\^^`^O^`___^BBBBBBBBBB 0
24
+ H125 98 1 1108 5874 1983 CGATGT 1 CAGC...................A.....TGCGACTGTTTAGTTCTCAGCTCTTTCATCACCTCCTTGCTGGAGAT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
25
+ H125 98 1 1108 5775 1999 CGATGT 1 GCCTAG.TGCC...........GGAT.AGGATATTTGCTACTTGCAGCCTTTATCAAAATAAGAGACTAGCAAATT _^^YY[B[UXRBBBBBBBBBBBXZMRBPTPTPQWQW]VVMZ_Z]]Y]_ZU]^\\^]\\Z]^^^U_]]]]__^_^O[ 0
26
+ H125 98 1 1108 6560 1978 CGATGT 1 CCA...........................GTCCCCACACCCCATGACACCGCCCGCTCGCCCTCCCTCCATTCTC ]YXBBBBBBBBBBBBBBBBBBBBBBBBBBBXS]YW`_^^___^^__\_______^^_`______`^^^_^^_____ 0
27
+ H125 98 1 1108 6500 1978 CGATGT 1 AGA...........................TCACCGATGTATCTCGTATGCCGTATCATTAGATCGGAAGAGCACA VXUBBBBBBBBBBBBBBBBBBBBBBBBBBBJYUUW_______`___Y___]^^W]____`__YH_^]]\]____\] 0
28
+ H125 98 1 1108 6921 1974 CGATGT 1 AGT...........................AAAATATCAAGGCTCTCA.TG.GAAA.TGTAGACCCCAAATAACTG ][ZBBBBBBBBBBBBBBBBBBBBBBBBBBBX[YXY^`____`^`^^^VBVSBTY]ZB][]^^Y\\_^_____^^__ 0
29
+ H125 98 1 1108 7694 1990 CGATGT 1 TAGG.A.................A.....GCTGACAAGGATACTGATAGAAAAAGTGATTTCTTCTTATTATAAAG ][[[BQBBBBBBBBBBBBBBBBBRBBBBBXUWZWZ_^_`________`__^\]]]________^^```_Y_`____ 0
30
+ H125 98 1 1108 7856 1988 CGATGT 1 CCCA.T.................T.....CCCTGAGGCCCTCCAGAGGTTATCTGCCCATCATCTCACCATCATGG ^^^^B]BBBBBBBBBBBBBBBBBTBBBBBXVWYUZ_______^^__^__`_\`^^__`_____``_______^__` 0
31
+ H125 98 1 1108 8073 1990 CGATGT 1 GAGG.C.................C.....ACATGCTGCTGGTGATGCTGGAGGACATGAACACAGGGACAGAATCA BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
32
+ H125 98 1 1108 8108 1993 CGATGT 1 AGCA.T.................G....AAGAAGTCAGAATTTGAAACGGAAGAATGAAGAAAAGAAATAAAAATG MVRTBPBBBBBBBBBBBBBBBBBOBBBBROTZQUQ^\\\]]^^`U^ZWY^]WW\]YY^V^__^^^^^^^_]XZ]QV 0
33
+ H125 98 1 1108 8747 1974 CGATGT 1 GGA...........................TTTGTAAATAAAATGCTG.CA.GAAA.TTAGAAGGAAAATTAAATT Z]ZBBBBBBBBBBBBBBBBBBBBBBBBBBBZ]]WX_____`^^^_^X]B]QBRXZWBTZT^VY^^_______``__ 0
34
+ H125 98 1 1108 8581 1984 CGATGT 1 GTCA...................G.....GATGAGGAAGATGATCAGGAGGATGATGAAGGTGAAGAGGGAGATGA ][]UBBBBBBBBBBBBBBBBBBBYBBBBBXQRUQU__________^__Y_^X_`^_^^___Y^^^TZRSX`_`^_W 0
35
+ H125 98 1 1108 8516 1990 CGATGT 1 ATTT.G.................T.....TATCAATGCAGCCTTTTCAGCCAATGAGAGGCCTCGTCACCATCACG VWTXBPBBBBBBBBBBBBBBBBBRBBBBBRTURXW__^\YV]__^_____RPUXV^^\^^U\^\]_____TP]TXS 0
36
+ H125 98 1 1108 8787 1977 CGATGT 1 CAG...........................CTTTCTAGGGACCAGCTGCAGCTCCTTCTCTTGAAGATTGCCACCA ]ZWBBBBBBBBBBBBBBBBBBBBBBBBBBBXV]ZZ_______`____`_^`\___^_\__XZW]Z\]]^\V^TT\^ 0
37
+ H125 98 1 1108 8959 1993 CGATGT 1 GGCA.A.................A....TCTCTCTTCTGGTCTTAGCCCACTTAACCCAGTCTGAAGAGCCAAAAT WTRXBSBBBBBBBBBBBBBBBBBUBBBBXWWZWSWX___Y__\_U[^X[\[[[__^___Y_[]]____W_^Z___\ 0
38
+ H125 98 1 1108 8860 1997 CGATGT 1 AAGAAG.T.A.............C.G.TCCACCCATCATCAGCCATCTGAAACTGACAGACTTTGGTCTGTCCCGC _`___ZBSBZBBBBBBBBBBBBBYBXBY]UY^[[M^_____`________________`__^____________`` 0
39
+ H125 98 1 1108 9205 1979 CGATGT 1 GAA...........................GGTGGCGAAGGCCTACGCTGCTCTTGCTGCCCTAGAAAAGCTTTTC TSOBBBBBBBBBBBBBBBBBBBBBBBBBBBRRQXU_^^Y__\__^_M^V_^]]O]X^_\^`___^W\^W[Y[_TX^ 0
40
+ H125 98 1 1108 9076 1982 CGATGT 1 TGG....................G.....ACATGCCTTGGTTCAAGGGATGGAAAGTCACCCGTAAGGATGGCAAT V\UBBBBBBBBBBBBBBBBBBBBYBBBBBY]X[UU____^__^_______`__`_`^^___``___\_\______U 0
41
+ H125 98 1 1108 9727 1974 CGATGT 1 CAA...........................GCATCACGCCACCTGACT.CA.ACTA.ACTACAAGGCTACAGTAAC YR[BBBBBBBBBBBBBBBBBBBBBBBBBBBRIZRU__`______`^V\BWZBSMTYB]Z]XVTX]___^___O^__ 0
42
+ H125 98 1 1108 9601 1975 CGATGT 1 CAA...........................TATACCAATGACATCATTTCTAGTATATCTTAAAACATTATATTTG ]]VBBBBBBBBBBBBBBBBBBBBBBBBBBBTVW[U`____^_X________`________\`____`^_^_^`___ 0
43
+ H125 98 1 1108 9556 1976 CGATGT 1 GAT...........................CACCGATGTCTCTCCTATGCCGTCTTCTTCTTGACAAAAATATGTA BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
44
+ H125 98 1 1108 9837 1974 CGATGT 1 AAA...........................AACTTCTTCCACAGAGAG.TC.GGGG.CGGAGGGACTCTGCTGAAC BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
45
+ H125 98 1 1108 10228 1973 CGATGT 1 AGG...........................ATTCTGAAGGGAAAATGG.TC.CTTA.ATTGCAAGTAACCAAAGGA ][]BBBBBBBBBBBBBBBBBBBBBBBBBBBPY\ZX_Y____`___^^]BUZBXZXXBZ[Z^][^]__`__\V]]^^ 0
46
+ H125 98 1 1108 10001 1987 CGATGT 1 CGGC...................C.....CCAGAAAGATTATAGTTGTACCCACACCAAGTTCCCATGGTTGAAGC ]]Z]BBBBBBBBBBBBBBBBBBBTBBBBBP]QVLS^^Y^^T]U]V^XXGV^\^]^__________^_`_____`OZ 0
47
+ H125 98 1 1108 10207 1989 CGATGT 1 CTCG.A.................A.....ACTAGTATATCGCTAACACCTCATATCCTCCCTACTATGCCAAGAAG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
48
+ H125 98 1 1108 10270 1999 CGATGT 1 GATTCA.ACAC...........TTCCTAACCAGCAAGGCCCCCATACACCATCTATTCCATAAACCACTCAGGTTA _____]B^]]RBBBBBBBBBBBRYV[ZY][__Y___Y_OY_____TY^YU__\____________^____\U]]\Z 0
49
+ H125 98 1 1108 10617 1987 CGATGT 1 CCAG...................G.....TCTGGATTTTCTTTCTGCCTCTTCTATCCAGAAAGGATACTGGAGAA ]][YBBBBBBBBBBBBBBBBBBBTBBBBBR[T\U[`___^^^`^^_______`_____\__`________Y_____ 0
50
+ H125 98 1 1108 10985 1979 CGATGT 1 CCA...........................CTGGAAGTCCTAGCCAAAGCAATCAGACAAGAGAAAGAAATAAAGG TY[BBBBBBBBBBBBBBBBBBBBBBBBBBBYWWYZ_`_^___^Y_\^___`_V__^__^_^^^O^S^[[]^_X^^\ 0
51
+ H125 98 1 1108 10934 1987 CGATGT 1 CTTT...................T.....CCAAGATGATGCTTGGTGCTCTAGCCATCACTTCCTGGCCTGCAGGG ]URQBBBBBBBBBBBBBBBBBBBVBBBBBRTXYRU________V_Z]ZJ^\]Z]^______\________^Y_Y__ 0
52
+ H125 98 1 1108 11197 1979 CGATGT 1 TTG...........................AAAAGGCCTGCGACCATGTCTGCCACCCAACCATTTATTTTCTACT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
53
+ H125 98 1 1108 11485 1976 CGATGT 1 TGT...........................AGGAACGCAGACACATTCAGATGCCTTTGCAGAAAGAGATGCCAGA ][ZBBBBBBBBBBBBBBBBBBBBBBBBBBBP\YXV____^__`^_____\_______^__``^_^________^\_ 0
54
+ H125 98 1 1108 11450 1977 CGATGT 1 CCT...........................GATGCCAAACGGTGGAGCCTTGAAGATACATTTCCTCTCCTTCTGT ]VVBBBBBBBBBBBBBBBBBBBBBBBBBBBXPSNN_______^_VZZ]]]_`__^\_`___`______^___`W_^ 0
55
+ H125 98 1 1108 11381 1997 CGATGT 1 ATCCAT.C.T............GC.T.CATCCACCCATCCAGCCATGCACCTGCCCATCCATCAATTCACTGATCC _`__`ZB]B]BBBBBBBBBBBBXRBZBZ[W_`_[^^^^_`___`_______^_\`_______`__`_______^`_ 0
56
+ H125 98 1 1108 11507 1973 CGATGT 1 GGA...........................AGGAGACTAGCACAGGGA.TT.TCCT.ATCCTCACCATCACACCTG W[[BBBBBBBBBBBBBBBBBBBBBBBBBBBRXXZQ`^___Y____^X\B\QBPZXOBTVW\X^\^_`VY_Y]^XOY 0
57
+ H125 98 1 1108 12016 1998 CGATGT 1 CCACTC.G.AG...........CC.G.CTGTGGTGATGCCTCAGTTTGGACAGAAACTCAAACCTGACGCACAAAC __``_[B]BRXBBBBBBBBBBBQZB]BURXYY^^^_`_______`___________`____`__\_`_\_^_\__^ 0
58
+ H125 98 1 1108 12414 1984 CGATGT 1 GTTT...................T.....ACTTATAAGGCTGCAGTCTAGGAGGCAGAGCAATGAATCATTACAAT ]VWZBBBBBBBBBBBBBBBBBBB[BBBBBUVRZOX_________`_____`_______\_Y______`______`_ 0
59
+ H125 98 1 1108 12441 1993 CGATGT 1 CAGC.G.T...............T....TAGAATCAGGTCTATTGCACGTCTGGCCAATATTGATGAAGAAATGCT ]U[YBTBYBBBBBBBBBBBBBBBTBBBBPLTSUZP_^___\Y^^^______`_`_`_U________`___U^^___ 0
60
+ H125 98 1 1108 12850 1984 CGATGT 1 CGGG...................T.....TCCCAAGTAGCTGGAATTACAAGCGTGTGCCACCAAGCCTGGCTAAT ]ZZ]BBBBBBBBBBBBBBBBBBBRBBBBBQWPNPQ__`_^____YYZY^TW^T\T]Z\]Z___________^^_^V 0
61
+ H125 98 1 1108 12905 1989 CGATGT 1 GGCC.C.................T.....CGCGGCCGGCGCGCACCTTCGTGTGGTACTGAAACGAGGGGCAGCCC BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
62
+ H125 98 1 1108 12929 1991 CGATGT 1 AGAT.G.................A....AGTCACCGATGTATCTCGTATGCCGTATCATTAGATCGGAAGAGCACA YZ[YBSBBBBBBBBBBBBBBBBBXBBBBPZSW[XW_^``_______^__`______^_V_VR\\[\__^]V_^__` 0
63
+ H125 98 1 1108 13719 1980 CGATGT 1 GCG....................T......GGGGACCTTCGGCCGAGTTGTACAATGTGTTGACCATCGCAGGGGT ]]]BBBBBBBBBBBBBBBBBBBBYBBBBBBX[T[U_`__________________`_`___\__``Y___VY_\^V 0
64
+ H125 98 1 1108 13548 1995 CGATGT 1 CCGGCC.C...............T.G..ATAAAGTCTTGCTTTGTTACCCAGATTGGCCTGAAACTCCTGAGCTCA ___Y`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
65
+ H125 98 1 1108 13959 1988 CGATGT 1 TCAA.G.................T.....TCATTCACGAAATAAATACAGTGATCATTCACAGTGTAGTAGATCAT ]XWSBVBBBBBBBBBBBBBBBBBXBBBBBT[TT[VY_^_____^V`__`____\\\_____\_`_^`_^___^__` 0
66
+ H125 98 1 1108 14218 1997 CGATGT 1 TGGGCA.G.A............CA.A.TTGCTATCTGTATACACGTTTTTTCTTTTTTCCTTTCCCAGTTCTAAGG _____ZB]BZBBBBBBBBBBBBXYBZBZ[Z_`^`^_`__\____`_____``^`^`^^`___`_______`__Y_] 0
67
+ H125 98 1 1108 14318 1979 CGATGT 1 GAC...........................AAAGAAAAAAAAGACAAAAGAAGAAAAGGAGCTACCATGCAGTCCA ]XWBBBBBBBBBBBBBBBBBBBBBBBBBBBWVTQR^^^^S^^^^^^__^XPX[V^^^X`_O____^`__YRHT[S_ 0
68
+ H125 98 1 1108 14432 1988 CGATGT 1 AGCA.T.................C.....CTTCTAAGTTATATAAACACTTTTAATGCAAACATTCAATAGGGCAT ^^^^B]BBBBBBBBBBBBBBBBBWBBBBBX[X[ZZ__`__`__`_______`__________________`_____ 0
69
+ H125 98 1 1108 14609 1991 CGATGT 1 AGAA.A.................A....GTGGGATATATTAAAAGCACCTTGGGAGGCTGAGGCGGGCAGATCACG ][[YBXBBBBBBBBBBBBBBBBBRBBBBXXZZYYZ_^______`^__`________^^____^___^^`^_\`___ 0
70
+ H125 98 1 1108 14813 1974 CGATGT 1 AGA...........................TCACCGATGTATCTCGTA.GC.GTCT.CTGCTTGAAAAAACATCCA BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
71
+ H125 98 1 1108 15180 1989 CGATGT 1 ATTT.G.................C.....TCAATGACACTTTTGTCCATGTCACTGATCTTTCTGGCAAGGAAACC ]Z[[BZBBBBBBBBBBBBBBBBBYBBBBBPTTTZT^_____`__^^___^_^\\____^___`__^\_V\V_\^]Y 0
72
+ H125 98 1 1108 15469 1989 CGATGT 1 ACTG.A.................G.....GCGTCTTCAGAGGGGGGTAGCATGACCTCAAGCCTTCTATAAAAGCC BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
73
+ H125 98 1 1108 15599 1981 CGATGT 1 GCT....................A.....CACTAGGTCTCCTCTGGCACCCATGTGTGGGAATGTGAGTGCCTTGT ]YYBBBBBBBBBBBBBBBBBBBBVBBBBBXZUZWV_________________`______^V________`__^`__ 0
74
+ H125 98 1 1108 15785 1987 CGATGT 1 CGCA...................G.....GAGTAGCAGCTCCCGCTGGTGTGCGGCTAGGGGTAGGATCCCATATT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
75
+ H125 98 1 1108 16225 1978 CGATGT 1 CTG...........................GCGGAGTCTCTTCCCTTGCGTGCATAGGTCCCGGTTGGTAGAGGGT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
76
+ H125 98 1 1108 16515 1977 CGATGT 1 CAG...........................TCAATACGGGGAAAAATAAAGTGAGGCGAGTGAAGACCATTTATGC BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
77
+ H125 98 1 1108 16914 1985 CGATGT 1 AGAT...................A.....GTCACCGATGTATCTCGTATGCCGTCTTCTGCTTGAAAAAAACAACA ]]]]BBBBBBBBBBBBBBBBBBBZBBBBBTRX\[Z__`__^UU^^___Y__`__`_`_UI^`BBBBBBBBBBBBBB 0
78
+ H125 98 1 1108 17210 1978 CGATGT 1 CCC...........................TGTACCTTCCTGAGCTGAGACATGAGCCACACCTGGGTCCATGTGA ]]]BBBBBBBBBBBBBBBBBBBBBBBBBBBXQMNQ__^_____T__Y______^___YI^WW^^Z_PZZ___^_\] 0
79
+ H125 98 1 1108 17560 1987 CGATGT 1 CACG.G.................C.....CGCGCCCACCCCCCCCCGCCGGGCCCGCACGGGGAGTCACCACCTTG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
80
+ H125 98 1 1108 17716 1987 CGATGT 1 TGGG.G.................T.....GGCTGAGACCGGAGAATCCCTTGAACCCAGGAGGCAGAGGCTGCAGT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
81
+ H125 98 1 1108 17756 1977 CGATGT 1 CTG...........................TCGTGCCATTGCACTCCATCTTGGGTGACAGAGTGAGACATCATCT Z]ZBBBBBBBBBBBBBBBBBBBBBBBBBBBNQQKZ____Y__^_^________^_______^`__`\__^^_Y__^ 0
82
+ H125 98 1 1108 17851 1988 CGATGT 1 CAGA.C.................T.....TAAAAGCCAAAATGGGAAAGGAAAAGACTCATATCAACATTGTCGTC ][][BZBBBBBBBBBBBBBBBBBYBBBBBRZPZNZ__\__^`__^___`_\_\^^^^__\_`^_Y__________\ 0
83
+ H125 98 1 1108 18055 1974 CGATGT 1 GTT...........................CTATAGGAGTGGATGATG.AG.TATT.GTCATTGCATGATAGGATA BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
84
+ H125 98 1 1108 18193 1990 CGATGT 1 CTTA.C.................A....AGGGAGCCTCTGGGGTTAGGATCAGTAAGGTTTGCTGTCGTTTTTGGA ]Z]ZBQBBBBBBBBBBBBBBBBBRBBBBRQWWU[W__Y___________Y_\^_\_____U`__^\^_]\O^^^^V 0
85
+ H125 98 1 1108 18377 1977 CGATGT 1 GGC...........................CAAATATAAGGAAATGGCCCAATGAACGTGGTTGTGGGAGGGGAAA WZWBBBBBBBBBBBBBBBBBBBBBBBBBBBRUVLU_`__`__________________\_`_`^___\`_^^^^BB 0
86
+ H125 98 1 1108 18485 1979 CGATGT 1 ATA...........................CGAGATGAGCTCAGATCTACCTTTCCTCTTCATGGACCAGACTGGA VQQBBBBBBBBBBBBBBBBBBBBBBBBBBBRQVWU___`_V`^VXYL^^Z[RW^WI^\^U]]]Y]________`QV 0
87
+ H125 98 1 1108 18436 1988 CGATGT 1 CTTA.A.................A.....CTGTTTGGCTTCAACTCCTACTTTTTTCAGCACGATTCCTTTTGCAT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
88
+ H125 98 1 1108 18330 1992 CGATGT 1 CTTT.A.................T....GTAAAAGGAGAAAAAGATGCGGGGGAAATGGAGCAGGGGGTGGGGAAA ][U[BWBBBBBBBBBBBBBBBBBRBBBBSOSWJWW_V_Y_^ZXZZSNZ^O_`^^__Y__NY^^\YZ]^VVX\YWXV 0
89
+ H125 98 1 1108 19283 1973 CGATGT 1 CTT...........................CACAGCCTGTTTAATCTG.TG.TTGT.GGCTTTAACATCCACAATG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
90
+ H125 98 1 1108 19389 1994 CGATGT 1 TGCT.A.C...............A....TGAGACCAGTTCACTAGAAGAAACAAACATTTCTGCCATGCAGACCAA ^^^^B]B]BBBBBBBBBBBBBBBVBBBBT[W]ZZX_`______`^___`_^^_^^^_`__________^_\_____ 0
91
+ H125 98 1 1108 19966 1977 CGATGT 1 CCT...........................GGAAAACATGTCTTCAGAAATAAATGCTTCCTCTTCCAGGTAGTTC ]RYBBBBBBBBBBBBBBBBBBBBBBBBBBBRUMVS__^^________^__\]]`]___^`^_`____^__Z^\Z^` 0
92
+ H125 98 1 1108 20041 1984 CGATGT 1 CTTT...................G.....GAGGTGCAGGGGGCGCAGGGGGCGGCGTCACCGAGGAGCAGGCGGGC BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
93
+ H125 98 1 1108 20230 2000 CGATGT 1 GTGAAG.ATATA....G.....TATTCTCCCCCAGTTTGGCAGAGGCTGGCCCTCTTTTGACTGCCCCTGCTTTCT _____\B\^VOQBBBBXBBBBBHOUUMZR[]]XX]___Y^__Y\X^BBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
94
+ H125 98 1 1108 20276 1979 CGATGT 1 TCT....................G......GGCGCACATAGACTACTCCAGGAGTAAGTTGTTCTTGTTTTTTTCG VYPBBBBBBBBBBBBBBBBBBBBTBBBBBBYWSTRX\^]]____Y]W]TKUYOSLS[ZXXUV^W^]XT^]^`^^^_ 0
95
+ H125 98 1 1108 20743 1977 CGATGT 1 CTG...........................TTGACAGAGCCAACTCCCTTGAGGGTGGCAGGAAGCACACCCCCCA BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
96
+ H125 98 1 1108 20532 1980 CGATGT 1 CTA....................A.....TTTTGGTAGAGATGGGGATTTTGCCATGTTGCCCAGGCTGGTCTCCT ]VRBBBBBBBBBBBBBBBBBBBBRBBBBBP]SRR]_______`______^________`\___X___`_^SRP\\^ 0
97
+ H125 98 1 1108 20881 1975 CGATGT 1 CTC...........................ATCAATCATATACAGAATATCTACAAAAAACCTACATCATACTTAA ]X]BBBBBBBBBBBBBBBBBBBBBBBBBBBXVSUN__U__`____^X^V\^]\_]_`^XX^`\_`_`___^_Y`^_ 0
98
+ H125 98 1 1108 21009 2000 CGATGT 1 GTGGCG.CGGGT....G.....GGGTCGAAGTCAAGGCCCTGGCTTGACGCTGGTGCCACCGCGAACGCTCGCACA _BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
99
+ H125 98 1 1108 1188 2036 CGATGT 1 CTTGTATGCAGCATCCCCTTCTTGCCTAGGGACTTGAAGGGCCAGGCTTCCTGTCATTGCCTCACTCAAATGTAGC gggggggggggggegggggffggeggegggeagge^ggdbcgggcdgedegfggffff^ffffefdeeZefccceg 1
100
+ H125 98 1 1108 1147 2046 CGATGT 1 CGGGAAATGGCGGAAATGTGCAAGGATGTTATATGAATTGTGTTGGTTGGCCTAAAACACAGAGCCGGCTTGAAGT [XSZGXPSKPJPP]TWQRFRHXW\WXXX]PUX[W_^R^[XRNRYV]^Y[]UUNRT]X_BBBBBBBBBBBBBBBBBB 0
@@ -0,0 +1,40 @@
1
+ #
2
+ # tophat_spec.rb - RSpec Test
3
+ #
4
+ # Copyright:: Copyright (C) 2011
5
+ # Raoul Bonnal <r@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+ #
9
+
10
+
11
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
12
+
13
+ describe "When quality is read" do
14
+ it "should give me back the quality scores of first read in Illumina 1.5+ encoding" do
15
+ read = Bio::FlatFile.auto(File.dirname(__FILE__) + "/fixture/test.fastq").first
16
+ read.format = :fastq_illumina
17
+ qual = read.quality_scores
18
+ qual.should == [39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 37, 39, 39, 39, 39, 39, 38, 38, 39, 39, 37, 39, 39, 37, 39, 39, 39, 37, 33, 39, 39, 37, 30, 39, 39, 36, 34, 35, 39, 39, 39, 35, 36, 39, 37, 36, 37, 39, 38, 39, 39, 38, 38, 38, 38, 30, 38, 38, 38, 38, 37, 38, 36, 37, 37, 26, 37, 38, 35, 35, 35, 37, 39]
19
+ end
20
+
21
+ it "should give me back the quality scores of last read, with Bs in Illumina 1.5+ encoding" do
22
+ quals = []
23
+ Bio::FlatFile.auto(File.dirname(__FILE__) + "/fixture/test.fastq").each do |read|
24
+ read.format = :fastq_illumina
25
+ quals = read.quality_scores
26
+ end
27
+ quals.should == [27, 24, 19, 26, 7, 24, 16, 19, 11, 16, 10, 16, 16, 29, 20, 23, 17, 18, 6, 18, 8, 24, 23, 28, 23, 24, 24, 24, 29, 16, 21, 24, 27, 23, 31, 30, 18, 30, 27, 24, 18, 14, 18, 25, 22, 29, 30, 25, 27, 29, 21, 21, 14, 18, 20, 29, 24, 31, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
28
+ end
29
+
30
+ it "should tell me how many B are in the sequence with Illumina 1.5+ encoding" do
31
+ reads = Bio::Ngs::FastQuality.new(File.dirname(__FILE__) + "/fixture/test.fastq")
32
+ reads.quality_profile.should == nil
33
+ end
34
+
35
+ it "should return the comulative count of Bs in all the sequences" do
36
+ reads = Bio::Ngs::FastQuality.new(File.dirname(__FILE__) + "/fixture/test.fastq", :fastq_illumina)
37
+ # reads = Bio::Ngs::FastQuality.new("/Users/bonnalraoul/Desktop/s_1_1_1108_qseq.fastq", :fastq_illumina)
38
+ reads.track_b_count.b_profile.should == [[58, 1], [59, 1], [60, 1], [61, 1], [62, 1], [63, 1], [64, 1], [65, 1], [66, 1], [67, 1], [68, 1], [69, 1], [70, 1], [71, 1], [72, 1], [73, 1], [74, 1], [75, 1]]
39
+ end
40
+ end
@@ -0,0 +1,98 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ require 'thor/base'
3
+
4
+ describe "SffExtract" do
5
+
6
+ describe "class" do
7
+ it "should have default options" do
8
+ Bio::Ngs::SffExtract.options.should == {"append" => {:type => :boolean, :aliases => "-a", :desc =>"append output to existing files"},
9
+ "xml_info" => {:type => :string, :aliases => "-i", :desc => "extra info to write in the xml file"},
10
+ "linker_file" => {:type => :string, :aliases => "-l", :desc => "FASTA file with paired-end linker sequences"},
11
+ "clip" => {:type => :boolean, :aliases => "-c", :desc => "clip (completely remove) ends with low qual and/or adaptor sequence"},
12
+ "upper_case" => {:type => :boolean, :aliases => "-u", :desc => "all bases in upper case, including clipped ends"},
13
+ "min_left_clip" => {:type => :numeric, :desc => "if the left clip coming from the SFF is smaller than this value, override it"},
14
+ "fastq" => {:type => :boolean, :aliases => "-Q", :desc => "store as FASTQ file instead of FASTA + FASTA quality file"},
15
+ "out_basename" => {:type => :string, :aliases => "-o", :desc => "base name for all output files"},
16
+ "seq_file" => {:type => :string, :aliases => "-s", :desc => "output sequence file name"},
17
+ "qual_file" => {:type => :string, :aliases => "-q", :desc => "output quality file name"},
18
+ "xml_file" => {:type => :string, :aliases => "-x", :desc => "output ancillary xml file name"}
19
+ }
20
+ end
21
+ it "should have a default program name" do
22
+ Bio::Ngs::SffExtract.program.should == Bio::Ngs::Utils.binary("sff_extract")
23
+ end
24
+
25
+ end
26
+
27
+ describe "instance" do
28
+ it "has default options" do
29
+ Bio::Ngs::SffExtract.new.options.should == {"append" => {:type => :boolean, :aliases => "-a", :desc =>"append output to existing files"},
30
+ "xml_info" => {:type => :string, :aliases => "-i", :desc => "extra info to write in the xml file"},
31
+ "linker_file" => {:type => :string, :aliases => "-l", :desc => "FASTA file with paired-end linker sequences"},
32
+ "clip" => {:type => :boolean, :aliases => "-c", :desc => "clip (completely remove) ends with low qual and/or adaptor sequence"},
33
+ "upper_case" => {:type => :boolean, :aliases => "-u", :desc => "all bases in upper case, including clipped ends"},
34
+ "min_left_clip" => {:type => :numeric, :desc => "if the left clip coming from the SFF is smaller than this value, override it"},
35
+ "fastq" => {:type => :boolean, :aliases => "-Q", :desc => "store as FASTQ file instead of FASTA + FASTA quality file"},
36
+ "out_basename" => {:type => :string, :aliases => "-o", :desc => "base name for all output files"},
37
+ "seq_file" => {:type => :string, :aliases => "-s", :desc => "output sequence file name"},
38
+ "qual_file" => {:type => :string, :aliases => "-q", :desc => "output quality file name"},
39
+ "xml_file" => {:type => :string, :aliases => "-x", :desc => "output ancillary xml file name"}
40
+ }
41
+ end
42
+
43
+ it "has custom name" do
44
+ Bio::Ngs::SffExtract.new("/usr/local/bin/sff_extract").program.should == "/usr/local/bin/sff_extract"
45
+ end
46
+
47
+ it "overwrites specifc option" do
48
+ tophat = Bio::Ngs::SffExtract.new
49
+ tophat.options={:reads=>{:type=>:numeric}}
50
+ tophat.options[:reads][:type].should == :numeric
51
+ end
52
+
53
+ it "add custom option" do
54
+ tophat = Bio::Ngs::SffExtract.new
55
+ tophat.options={:parameter_xxx=>{:type=>:numeric}}
56
+ tophat.options.should == {"append" => {:type => :boolean, :aliases => "-a", :desc =>"append output to existing files"},
57
+ "xml_info" => {:type => :string, :aliases => "-i", :desc => "extra info to write in the xml file"},
58
+ "linker_file" => {:type => :string, :aliases => "-l", :desc => "FASTA file with paired-end linker sequences"},
59
+ "clip" => {:type => :boolean, :aliases => "-c", :desc => "clip (completely remove) ends with low qual and/or adaptor sequence"},
60
+ "upper_case" => {:type => :boolean, :aliases => "-u", :desc => "all bases in upper case, including clipped ends"},
61
+ "min_left_clip" => {:type => :numeric, :desc => "if the left clip coming from the SFF is smaller than this value, override it"},
62
+ "fastq" => {:type => :boolean, :aliases => "-Q", :desc => "store as FASTQ file instead of FASTA + FASTA quality file"},
63
+ "out_basename" => {:type => :string, :aliases => "-o", :desc => "base name for all output files"},
64
+ "seq_file" => {:type => :string, :aliases => "-s", :desc => "output sequence file name"},
65
+ "qual_file" => {:type => :string, :aliases => "-q", :desc => "output quality file name"},
66
+ "xml_file" => {:type => :string, :aliases => "-x", :desc => "output ancillary xml file name"},
67
+ :parameter_xxx=>{:type=>:numeric}
68
+ }
69
+ end
70
+
71
+ it "set a default option to be returned as params" do
72
+ tophat = Bio::Ngs::SffExtract.new
73
+ tophat.options={:parameter_xxx=>{:type=>:numeric, :default=>10}}
74
+ tophat.params.should == {:parameter_xxx=>{:type=>:numeric, :default=>10}}
75
+ end
76
+
77
+ it "get normalized options" do
78
+ tophat = Bio::Ngs::SffExtract.new
79
+ tophat.options={:parameter_xxx=>{:type=>:numeric, :default=>10}}
80
+ tophat.normalize_params.should == "--parameter_xxx=10"
81
+ end
82
+
83
+ it "does not save a valid parameter/option" do
84
+ tophat = Bio::Ngs::SffExtract.new
85
+ tophat.params={:fake_parameter=>1234567890}
86
+ tophat.normalize_params.should == []
87
+ end
88
+
89
+ it "set a default option and get the parameters for the binary program" do
90
+ tophat = Bio::Ngs::SffExtract.new
91
+ tophat.options={:parameter_xxx=>{:type=>:numeric, :default=>10}}
92
+ tophat.params={:fake_parameter=>01}
93
+ tophat.normalize_params.should == "--parameter_xxx=10"
94
+ end
95
+
96
+ end
97
+
98
+ end
@@ -0,0 +1,55 @@
1
+ $TESTING=true
2
+
3
+ # require 'simplecov'
4
+ # SimpleCov.start do
5
+ # add_group 'Libraries', 'lib'
6
+ # add_group 'Specs', 'spec'
7
+ # end
8
+
9
+ require 'thor'
10
+ require 'thor/base'
11
+ require 'stringio'
12
+ require 'rdoc'
13
+ require 'rspec'
14
+ require 'diff/lcs' # You need diff/lcs installed to run specs (but not to run Thor).
15
+ #require 'fakeweb' # You need fakeweb installed to run specs (but not to run Thor).
16
+
17
+ $:.unshift(File.join(File.dirname(__FILE__), "..", "lib"))
18
+
19
+ require 'bio-ngs'
20
+
21
+ # Set shell to basic
22
+ $0 = "thor"
23
+ $thor_runner = true
24
+ ARGV.clear
25
+ Thor::Base.shell = Thor::Shell::Basic
26
+
27
+ # Load fixtures
28
+ %w(bwa history project quality rna sff_extract).each do |task|
29
+ load File.join(File.dirname(__FILE__), "..", "lib", "tasks", "#{task}.thor" )
30
+ end
31
+
32
+ RSpec.configure do |config|
33
+ def capture(stream)
34
+ begin
35
+ stream = stream.to_s
36
+ eval "$#{stream} = StringIO.new"
37
+ yield
38
+ result = eval("$#{stream}").string
39
+ ensure
40
+ eval("$#{stream} = #{stream.upcase}")
41
+ end
42
+
43
+ result
44
+ end
45
+
46
+ def source_root
47
+ File.join(File.dirname(__FILE__), "..", "lib", "tasks")
48
+ end
49
+
50
+ def destination_root
51
+ File.join(File.dirname(__FILE__), 'sandbox')
52
+ end
53
+
54
+ alias :silence :capture
55
+ end
@@ -0,0 +1,99 @@
1
+ #
2
+ # tophat_spec.rb - RSpec Test
3
+ #
4
+ # Copyright:: Copyright (C) 2011
5
+ # Raoul Bonnal <r@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+ #
9
+
10
+
11
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
12
+
13
+ describe "Tophat" do
14
+ describe "class" do
15
+ it "default options" do
16
+ Bio::Ngs::Tophat.options.should == {"output-dir"=>{:type=>:string, :aliases=>"-o"}, "min-anchor"=>{:type=>:numeric, :aliases=>"-a"}, "splice-mismatches"=>{:type=>:numeric, :aliases=>"-m"}, "min-intron"=>{:type=>:numeric, :aliases=>"-i"}, "max-intront"=>{:type=>:numeric, :aliases=>"-I"}, "max-multihits"=>{:type=>:numeric, :aliases=>"-g"}, "min-isoform_fraction"=>{:type=>:numeric, :aliases=>"-F"}, "solexa-quals"=>{:type=>:boolean}, "solexa1.3-quals"=>{:type=>:boolean, :aliases=>"--phred64-quals"}, :quals=>{:type=>:boolean, :aliases=>"-Q"}, "integer-quals"=>{:type=>:boolean}, :color=>{:type=>:boolean, :aliases=>"-C"}, "library-type"=>{:type=>:string}, "num-threads"=>{:type=>:numeric, :aliases=>"-p"}, "GTF"=>{:type=>:string, :aliases=>"-G"}, "raw-juncs"=>{:type=>:string, :aliases=>"-j"}, :insertions=>{:type=>:string}, :deletions=>{:type=>:string}, "mate-inner-dist"=>{:type=>:numeric, :aliases=>"-r"}, "mate-std-dev"=>{:type=>:numeric}, "no-novel-juncs"=>{:type=>:boolean}, "no-gtf-juncs"=>{:type=>:boolean}, "no-coverage-search"=>{:type=>:boolean}, "coverage-search"=>{:type=>:boolean}, "no-closure-search"=>{:type=>:boolean}, "closure-search"=>{:type=>:boolean}, "fill-gaps"=>{:type=>:boolean}, "microexon-search"=>{:type=>:boolean}, "butterfly-search"=>{:type=>:boolean}, "no-butterfly-search"=>{:type=>:boolean}, "keep-tmp"=>{:type=>:boolean}, "tmp-dir"=>{:type=>:string}, "segment-mismatches"=>{:type=>:numeric}, "segment-length"=>{:type=>:numeric}, "min-closure-exon"=>{:type=>:numeric}, "min-closure-intron"=>{:type=>:numeric}, "max-closure-intron"=>{:type=>:numeric}, "min-coverage-intron"=>{:type=>:numeric}, "max-coverage-intron"=>{:type=>:numeric}, "min-segment-intron"=>{:type=>:numeric}, "max-segment-intron"=>{:type=>:numeric}, "rg-id"=>{:type=>:string}, "rg-sample"=>{:type=>:string}, "rg-library"=>{:type=>:string}, "rg-description"=>{:type=>:string}, "rg-platform-unit"=>{:type=>:string}, "rg-center"=>{:type=>:string}, "rg-date"=>{:type=>:string}, "rg-platform"=>{:type=>:string}}
17
+ end
18
+
19
+ it "has default program name" do
20
+ Bio::Ngs::Tophat.program.should == Bio::Ngs::Utils.binary("tophat/tophat")
21
+ end
22
+ end
23
+
24
+ describe "instance" do
25
+ it "has default options" do
26
+ Bio::Ngs::Tophat.new.options.should == {"output-dir"=>{:type=>:string, :aliases=>"-o"}, "min-anchor"=>{:type=>:numeric, :aliases=>"-a"}, "splice-mismatches"=>{:type=>:numeric, :aliases=>"-m"}, "min-intron"=>{:type=>:numeric, :aliases=>"-i"}, "max-intront"=>{:type=>:numeric, :aliases=>"-I"}, "max-multihits"=>{:type=>:numeric, :aliases=>"-g"}, "min-isoform_fraction"=>{:type=>:numeric, :aliases=>"-F"}, "solexa-quals"=>{:type=>:boolean}, "solexa1.3-quals"=>{:type=>:boolean, :aliases=>"--phred64-quals"}, :quals=>{:type=>:boolean, :aliases=>"-Q"}, "integer-quals"=>{:type=>:boolean}, :color=>{:type=>:boolean, :aliases=>"-C"}, "library-type"=>{:type=>:string}, "num-threads"=>{:type=>:numeric, :aliases=>"-p"}, "GTF"=>{:type=>:string, :aliases=>"-G"}, "raw-juncs"=>{:type=>:string, :aliases=>"-j"}, :insertions=>{:type=>:string}, :deletions=>{:type=>:string}, "mate-inner-dist"=>{:type=>:numeric, :aliases=>"-r"}, "mate-std-dev"=>{:type=>:numeric}, "no-novel-juncs"=>{:type=>:boolean}, "no-gtf-juncs"=>{:type=>:boolean}, "no-coverage-search"=>{:type=>:boolean}, "coverage-search"=>{:type=>:boolean}, "no-closure-search"=>{:type=>:boolean}, "closure-search"=>{:type=>:boolean}, "fill-gaps"=>{:type=>:boolean}, "microexon-search"=>{:type=>:boolean}, "butterfly-search"=>{:type=>:boolean}, "no-butterfly-search"=>{:type=>:boolean}, "keep-tmp"=>{:type=>:boolean}, "tmp-dir"=>{:type=>:string}, "segment-mismatches"=>{:type=>:numeric}, "segment-length"=>{:type=>:numeric}, "min-closure-exon"=>{:type=>:numeric}, "min-closure-intron"=>{:type=>:numeric}, "max-closure-intron"=>{:type=>:numeric}, "min-coverage-intron"=>{:type=>:numeric}, "max-coverage-intron"=>{:type=>:numeric}, "min-segment-intron"=>{:type=>:numeric}, "max-segment-intron"=>{:type=>:numeric}, "rg-id"=>{:type=>:string}, "rg-sample"=>{:type=>:string}, "rg-library"=>{:type=>:string}, "rg-description"=>{:type=>:string}, "rg-platform-unit"=>{:type=>:string}, "rg-center"=>{:type=>:string}, "rg-date"=>{:type=>:string}, "rg-platform"=>{:type=>:string}}
27
+ end
28
+
29
+ it "has custom name" do
30
+ Bio::Ngs::Tophat.new("/usr/local/bin/tophat").program.should == "/usr/local/bin/tophat"
31
+ end
32
+
33
+ it "overwrites specifc option" do
34
+ tophat = Bio::Ngs::Tophat.new
35
+ tophat.options={:reads=>{:type=>:numeric}}
36
+ tophat.options[:reads][:type].should == :numeric
37
+ end
38
+
39
+ it "add custom option" do
40
+ tophat = Bio::Ngs::Tophat.new
41
+ tophat.options={:parameter_xxx=>{:type=>:numeric}}
42
+ tophat.options.should == {"output-dir"=>{:type=>:string, :aliases=>"-o"}, "min-anchor"=>{:type=>:numeric, :aliases=>"-a"}, "splice-mismatches"=>{:type=>:numeric, :aliases=>"-m"}, "min-intron"=>{:type=>:numeric, :aliases=>"-i"}, "max-intront"=>{:type=>:numeric, :aliases=>"-I"}, "max-multihits"=>{:type=>:numeric, :aliases=>"-g"}, "min-isoform_fraction"=>{:type=>:numeric, :aliases=>"-F"}, "solexa-quals"=>{:type=>:boolean}, "solexa1.3-quals"=>{:type=>:boolean, :aliases=>"--phred64-quals"}, :quals=>{:type=>:boolean, :aliases=>"-Q"}, "integer-quals"=>{:type=>:boolean}, :color=>{:type=>:boolean, :aliases=>"-C"}, "library-type"=>{:type=>:string}, "num-threads"=>{:type=>:numeric, :aliases=>"-p"}, "GTF"=>{:type=>:string, :aliases=>"-G"}, "raw-juncs"=>{:type=>:string, :aliases=>"-j"}, :insertions=>{:type=>:string}, :deletions=>{:type=>:string}, "mate-inner-dist"=>{:type=>:numeric, :aliases=>"-r"}, "mate-std-dev"=>{:type=>:numeric}, "no-novel-juncs"=>{:type=>:boolean}, "no-gtf-juncs"=>{:type=>:boolean}, "no-coverage-search"=>{:type=>:boolean}, "coverage-search"=>{:type=>:boolean}, "no-closure-search"=>{:type=>:boolean}, "closure-search"=>{:type=>:boolean}, "fill-gaps"=>{:type=>:boolean}, "microexon-search"=>{:type=>:boolean}, "butterfly-search"=>{:type=>:boolean}, "no-butterfly-search"=>{:type=>:boolean}, "keep-tmp"=>{:type=>:boolean}, "tmp-dir"=>{:type=>:string}, "segment-mismatches"=>{:type=>:numeric}, "segment-length"=>{:type=>:numeric}, "min-closure-exon"=>{:type=>:numeric}, "min-closure-intron"=>{:type=>:numeric}, "max-closure-intron"=>{:type=>:numeric}, "min-coverage-intron"=>{:type=>:numeric}, "max-coverage-intron"=>{:type=>:numeric}, "min-segment-intron"=>{:type=>:numeric}, "max-segment-intron"=>{:type=>:numeric}, "rg-id"=>{:type=>:string}, "rg-sample"=>{:type=>:string}, "rg-library"=>{:type=>:string}, "rg-description"=>{:type=>:string}, "rg-platform-unit"=>{:type=>:string}, "rg-center"=>{:type=>:string}, "rg-date"=>{:type=>:string}, "rg-platform"=>{:type=>:string}, :parameter_xxx=>{:type=>:numeric}}
43
+ end
44
+
45
+ it "set a default option to be returned as params" do
46
+ tophat = Bio::Ngs::Tophat.new
47
+ #setting a default options
48
+ #TODO: add check between type and default value, in the main class,
49
+ # Thor already does it.
50
+ tophat.options={:parameter_xxx=>{:type=>:numeric, :default=>10}}
51
+ tophat.params.should == {:parameter_xxx=>{:type=>:numeric, :default=>10}}
52
+ end
53
+
54
+ it "get normalized options" do
55
+ tophat = Bio::Ngs::Tophat.new
56
+ #setting a default options
57
+ #TODO: add check between type and default value, in the main class,
58
+ # Thor already does it.
59
+ tophat.options={:parameter_xxx=>{:type=>:numeric, :default=>10}}
60
+ tophat.normalize_params.should == "--parameter_xxx=10"
61
+ end
62
+
63
+ it "does not save a valid parameter/option" do
64
+ tophat = Bio::Ngs::Tophat.new
65
+ tophat.params={:fake_parameter=>1234567890}
66
+ tophat.normalize_params.should == []
67
+ end
68
+
69
+ it "set a default option and get the parameters for the binary program" do
70
+ tophat = Bio::Ngs::Tophat.new
71
+ #setting a default options
72
+ #TODO: add check between type and default value, in the main class,
73
+ # Thor already does it.
74
+ tophat.options={:parameter_xxx=>{:type=>:numeric, :default=>10}}
75
+ tophat.params={:fake_parameter=>01}
76
+ tophat.normalize_params.should == "--parameter_xxx=10"
77
+ end
78
+ end
79
+ end
80
+
81
+
82
+ # describe Tophat do
83
+ # describe "Tophat" do
84
+ # it "the program is " do
85
+ # Bio::Ngs::Tophat.new.program.should == Bio::Ngs::Utils.binary("tophat/tophat")
86
+ # end
87
+ #
88
+ # it "returns the default parameters" do
89
+ # Bio::Ngs::Tophat.new.main.should == ""
90
+ # end
91
+ #
92
+ # it "all the parameters of this application" do
93
+ # first_option = Bio::Ngs::Tophat.tasks["main"].options.first #is an array of name, Thor::Option
94
+ # thor_option = first_option.last
95
+ # thor_option.name.should == "reads"
96
+ # end
97
+ #
98
+ # end
99
+ # end
@@ -0,0 +1,22 @@
1
+ #
2
+ # converter_qseq_spec.rb - RSpec Test
3
+ #
4
+ # Copyright:: Copyright (C) 2011
5
+ # Raoul Bonnal <r@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+ #
9
+
10
+
11
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
12
+
13
+ describe "Utils" do
14
+ it "tags the regular file name with the new tag and extension" do
15
+ Bio::Ngs::Utils.tag_filename("test_file_name.txt", "report", "csv").should == "test_file_name_report.csv"
16
+ end
17
+
18
+ it "tags the strange file name with the new tag and extension" do
19
+ Bio::Ngs::Utils.tag_filename("test_file_name", "report", "csv").should == "test_file_name_report.csv"
20
+ end
21
+
22
+ end
@@ -0,0 +1,4 @@
1
+ adapter: sqlite3
2
+ database: test/data/test.sqlite3
3
+ pool: 5
4
+ timeout: 5000
@@ -0,0 +1,69 @@
1
+ <?xml version="1.0"?>
2
+ <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
3
+ <BlastOutput>
4
+ <BlastOutput_program>blastx</BlastOutput_program>
5
+ <BlastOutput_version>blastx 2.2.19 [Nov-02-2008]</BlastOutput_version>
6
+ <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
7
+ <BlastOutput_db>uniprot_sprot</BlastOutput_db>
8
+ <BlastOutput_query-ID>lcl|1_0</BlastOutput_query-ID>
9
+ <BlastOutput_query-def>ENSBTAG00000025113_499_35</BlastOutput_query-def>
10
+ <BlastOutput_query-len>1221</BlastOutput_query-len>
11
+ <BlastOutput_param>
12
+ <Parameters>
13
+ <Parameters_matrix>BLOSUM62</Parameters_matrix>
14
+ <Parameters_expect>10</Parameters_expect>
15
+ <Parameters_gap-open>11</Parameters_gap-open>
16
+ <Parameters_gap-extend>1</Parameters_gap-extend>
17
+ <Parameters_filter>F</Parameters_filter>
18
+ </Parameters>
19
+ </BlastOutput_param>
20
+ <BlastOutput_iterations>
21
+ <Iteration>
22
+ <Iteration_iter-num>239</Iteration_iter-num>
23
+ <Iteration_query-ID>lcl|239_0</Iteration_query-ID>
24
+ <Iteration_query-def>ENSBTAG00000031386_1906_35</Iteration_query-def>
25
+ <Iteration_query-len>2107</Iteration_query-len>
26
+ <Iteration_hits>
27
+ <Hit>
28
+ <Hit_num>1</Hit_num>
29
+ <Hit_id>sp|Q5TTP0|WDY_ANOGA</Hit_id>
30
+ <Hit_def>WD repeat-containing protein on Y chromosome OS=Anopheles gambiae GN=WDY PE=4 SV=4</Hit_def>
31
+ <Hit_accession>Q5TTP0</Hit_accession>
32
+ <Hit_len>1059</Hit_len>
33
+ <Hit_hsps>
34
+ <Hsp>
35
+ <Hsp_num>1</Hsp_num>
36
+ <Hsp_bit-score>224.172</Hsp_bit-score>
37
+ <Hsp_score>570</Hsp_score>
38
+ <Hsp_evalue>2.55108e-57</Hsp_evalue>
39
+ <Hsp_query-from>1</Hsp_query-from>
40
+ <Hsp_query-to>1641</Hsp_query-to>
41
+ <Hsp_hit-from>347</Hsp_hit-from>
42
+ <Hsp_hit-to>897</Hsp_hit-to>
43
+ <Hsp_query-frame>1</Hsp_query-frame>
44
+ <Hsp_identity>167</Hsp_identity>
45
+ <Hsp_positive>271</Hsp_positive>
46
+ <Hsp_gaps>19</Hsp_gaps>
47
+ <Hsp_align-len>566</Hsp_align-len>
48
+ <Hsp_qseq>KGILCFDYCPDRNVLVTGGYDPLIRLWNPFFSRKPVWMMKGHQTSVTHIVVNSKDSSILLSVSKDKNIRVWDMQDYQCLQSFCGFSEV-IKGTWICALGLKWGHGFGIGILKGYLETQGPGRAEEKTT--TYSTPLCAVLYSKVFKQVVSGCLSGMVSVWEVVTGRRLMEFSVTGDQ-------QVELTAMSLDESEGCLLTGLRDGTVKMWNYSVGECLLTFPKADQLEISGIVHMNKVFYTTGWSKRIT-YYTFHKIKPVLLCHHWQTFHTEDVLSMAKYQNQ--FIATSSYNGDILFWNVGTFRPTQRALASDPQPRRPLSPVRGLRDQLKGKKQTSRRSCCCSPMHR--WRRLAMFSQPSGC-HIEVLTFFLIIFLQTRPRLPHSAALLSSCMDGYIYAWSIHGSGGLLGKFPXXXXXXXXXXXXAMATDQNDWILVTGDCKGCIKIWDIKD-YCAHSDK-QTNHPSEINKFRFLISERIQVSLPNYSPPEEKKVEAGQTISLIPPQLLISWKAHLDSVADILYVDSLQLVISAGQDRDVKAWKLSGDAIGTFGLSV-WKRL</Hsp_qseq>
49
+ <Hsp_hseq>RGVTCFAFEPSNELLVSGGPDCDLRLWDIHRPEKPSVVLVGHTSSITFLFLQDAGEKIY-SLDQRKIIKVWDVRNRVLLQTFGQFSTVLVKGVPACAYYNKRARELVVASNKLFVTACCPEIALDRTDGESHTKPVSVLLYNGLYRLVVSCGFDSFIIVWDHRVNRKMTIITEAHTQIRNGVLEPVEITAACFDGKEQMLLTGARNGSLKIWNIGGRTCMRTIQIEEDCEVTGVFWQANRILAMGWNHRVVEFAAFAEQDEYPRGLQWRKQHSDDILCAAVSGSEPGVMATCSYAGELVFWMLETGQPYRRYDATNPRTRLPISFREGRADLMKPRKLTPRRSLFQMPPGQLAHRRLTRILMPSGLEQMRQLSIQALLFLAMRKMLPDRGTLFGSLDNGMVQVWSHHPDGGFKGQF--NGIHMAGDRIITLATDKANRFLFTGTALGYVKTWYIENCWIPNEDKFHVNKPALRILFPFLLNDVV--------PGRAKRSARAQ----VKPWLLNSYQAHRACVTGLTYLDDTGLLLSCSSDRTVRLWTLGGRYIGLLGSPVNWQPL</Hsp_hseq>
50
+ <Hsp_midline>+G+ CF + P +LV+GG D +RLW+ KP ++ GH +S+T + + I S+ + K I+VWD+++ LQ+F FS V +KG CA K + K ++ P A ++T +++ P+ +LY+ +++ VVS + VW+ R++ + Q VE+TA D E LLTG R+G++K+WN C+ T + E++G+ GW+ R+ + F + W+ H++D+L A ++ +AT SY G+++FW + T +P +R A++P+ R P+S G D +K +K T RRS P + RRL PSG + L+ ++FL R LP L S +G + WS H GG G+F +ATD+ + L TG G +K W I++ + + DK N P+ F FL+++ + P K+ Q + P LL S++AH V + Y+D L++S DR V+ W L G IG G V W+ L</Hsp_midline>
51
+ </Hsp>
52
+ </Hit_hsps>
53
+ </Hit>
54
+ </Iteration_hits>
55
+ <Iteration_stat>
56
+ <Statistics>
57
+ <Statistics_db-num>525997</Statistics_db-num>
58
+ <Statistics_db-len>185874894</Statistics_db-len>
59
+ <Statistics_hsp-len>0</Statistics_hsp-len>
60
+ <Statistics_eff-space>0</Statistics_eff-space>
61
+ <Statistics_kappa>0.041</Statistics_kappa>
62
+ <Statistics_lambda>0.267</Statistics_lambda>
63
+ <Statistics_entropy>0.14</Statistics_entropy>
64
+ </Statistics>
65
+ </Iteration_stat>
66
+ </Iteration>
67
+ </BlastOutput_iterations>
68
+ </BlastOutput>
69
+