bio-ngs 0.3.2.alpha.01

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. data/.document +5 -0
  2. data/Gemfile +39 -0
  3. data/Gemfile.lock +81 -0
  4. data/LICENSE.txt +28 -0
  5. data/README.rdoc +240 -0
  6. data/Rakefile +60 -0
  7. data/VERSION +1 -0
  8. data/bin/biongs +35 -0
  9. data/bio-ngs.gemspec +215 -0
  10. data/ext/mkrf_conf.rb +87 -0
  11. data/lib/bio-ngs.rb +54 -0
  12. data/lib/bio/appl/ngs/bcl2qseq.rb +93 -0
  13. data/lib/bio/appl/ngs/blast.rb +36 -0
  14. data/lib/bio/appl/ngs/bowtie-inspect.rb +50 -0
  15. data/lib/bio/appl/ngs/cufflinks.rb +489 -0
  16. data/lib/bio/appl/ngs/fastx.rb +170 -0
  17. data/lib/bio/appl/ngs/samtools.rb +118 -0
  18. data/lib/bio/appl/ngs/sff_extract.rb +23 -0
  19. data/lib/bio/appl/ngs/tophat.rb +158 -0
  20. data/lib/bio/ngs/converter.rb +100 -0
  21. data/lib/bio/ngs/core_ext.rb +12 -0
  22. data/lib/bio/ngs/db.rb +66 -0
  23. data/lib/bio/ngs/db/migrate/homology/201105030707_create_blastout.rb +22 -0
  24. data/lib/bio/ngs/db/migrate/homology/201105030709_create_goannotation.rb +29 -0
  25. data/lib/bio/ngs/db/migrate/ontology/201105030708_create_go.rb +18 -0
  26. data/lib/bio/ngs/db/migrate/ontology/201105030710_create_gene_go.rb +17 -0
  27. data/lib/bio/ngs/db/migrate/ontology/201105030711_create_gene.rb +16 -0
  28. data/lib/bio/ngs/db/models.rb +1 -0
  29. data/lib/bio/ngs/db/models/homology.rb +8 -0
  30. data/lib/bio/ngs/db/models/ontology.rb +16 -0
  31. data/lib/bio/ngs/ext/bin/common/fastq_coverage_graph.sh +161 -0
  32. data/lib/bio/ngs/ext/bin/common/sff_extract +1505 -0
  33. data/lib/bio/ngs/ext/bin/linux/samtools +0 -0
  34. data/lib/bio/ngs/ext/bin/osx/samtools +0 -0
  35. data/lib/bio/ngs/ext/versions.yaml +73 -0
  36. data/lib/bio/ngs/graphics.rb +189 -0
  37. data/lib/bio/ngs/homology.rb +102 -0
  38. data/lib/bio/ngs/ontology.rb +103 -0
  39. data/lib/bio/ngs/quality.rb +64 -0
  40. data/lib/bio/ngs/record.rb +50 -0
  41. data/lib/bio/ngs/task.rb +46 -0
  42. data/lib/bio/ngs/utils.rb +176 -0
  43. data/lib/development_tasks.rb +34 -0
  44. data/lib/enumerable.rb +37 -0
  45. data/lib/tasks/bwa.thor +126 -0
  46. data/lib/tasks/convert.thor +454 -0
  47. data/lib/tasks/history.thor +51 -0
  48. data/lib/tasks/homology.thor +121 -0
  49. data/lib/tasks/ontology.thor +93 -0
  50. data/lib/tasks/project.thor +51 -0
  51. data/lib/tasks/quality.thor +142 -0
  52. data/lib/tasks/rna.thor +126 -0
  53. data/lib/tasks/sff_extract.thor +9 -0
  54. data/lib/templates/README.tt +43 -0
  55. data/lib/templates/db.tt +6 -0
  56. data/lib/wrapper.rb +225 -0
  57. data/spec/converter_qseq_spec.rb +56 -0
  58. data/spec/fixture/s_1_1_1108_qseq.txt +100 -0
  59. data/spec/quality_spec.rb +40 -0
  60. data/spec/sff_extract_spec.rb +98 -0
  61. data/spec/spec_helper.rb +55 -0
  62. data/spec/tophat_spec.rb +99 -0
  63. data/spec/utils_spec.rb +22 -0
  64. data/test/conf/test_db.yml +4 -0
  65. data/test/data/blastoutput.xml +69 -0
  66. data/test/data/gene-GO.json +1 -0
  67. data/test/data/goa_uniprot +27 -0
  68. data/test/data/goslim_goa.obo +1763 -0
  69. data/test/helper.rb +18 -0
  70. data/test/test_bio-ngs.rb +17 -0
  71. data/test/test_db.rb +21 -0
  72. data/test/test_homology.rb +102 -0
  73. data/test/test_ngs.rb +21 -0
  74. data/test/test_ontology.rb +74 -0
  75. data/test/test_utils.rb +29 -0
  76. metadata +460 -0
@@ -0,0 +1,100 @@
1
+ H125 98 1 1108 1586 1989 CGATGT 1 CAGA.C.................A.....GAATGGCATGGATCAAGAAAATCCCCCTTGTGAAGAAGAATCAGCAG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
2
+ H125 98 1 1108 1879 1986 CGATGT 1 CACA...................C.....GAACCTTTATGAGCCGGCTGCCATCTAGTTTGACGCGGATTCTCTTG ^^^^BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
3
+ H125 98 1 1108 1786 1997 CGATGT 1 CGCAGA.T.G.............C.A.TCTGATATGAAACATTGGCCCTTTATGGTGGTGAATGATGCTGGCAGGC _____\B]B]BBBBBBBBBBBBBTBXBTXZ^]]]^_____`___`__`\`__________^`___Y^^^^`^___B 0
4
+ H125 98 1 1108 2174 1996 CGATGT 1 CTCACA.A...............T.T..ATTCTGTGTTTTATCAAAAAGCAAAGATATTCTCACCTTCACCTTGCT _^_^_UBSBBBBBBBBBBBBBBBYBRBBUVTYQUT___U^^`^______^_________Y_______\___X___Y 0
5
+ H125 98 1 1108 2322 1999 CGATGT 1 GCTGGG.TTAC...........CC.T.GCCTTAGTTCTTATTTCAAGGAAAAGCTGCTAGAAAGGGAAACCATGAG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
6
+ H125 98 1 1108 2619 1977 CGATGT 1 CAG...........................CCAGCAGGACACCGCCTGCAGAAAGGACCTGCCCTGATAATGTCCC ]OTBBBBBBBBBBBBBBBBBBBBBBBBBBBXVR[Z_`_____Y__`_____`_____`_`_`________```_Y_ 0
7
+ H125 98 1 1108 2850 1986 CGATGT 1 CAAA...................G.....GGTGTGGCCATGAATCCTGTGGAGCATCCTTTTGGAGGTGGCAACCA BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
8
+ H125 98 1 1108 2828 1997 CGATGT 1 GTGATA.G.T.............C.G.GGCAAGGGGGGTTGGAGGAGCATGAAGCCCCCAATATTATGTAGGTCAC ]_^BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
9
+ H125 98 1 1108 3107 1977 CGATGT 1 CGG...........................GGGCTTAAAAAAAAAAAAAAAAAAAAAACCCATTTTTTTGCTAACG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
10
+ H125 98 1 1108 3138 1994 CGATGT 1 CTCT.C.T...............A....GCAAAGACAGCAGGCAAGTCATTCTCCAAAGAAAGTGTAAGAGTGCCC ^^^^B[B[BBBBBBBBBBBBBBBTBBBBYZ[][X]______________`____`_____^^`_____\_`__^__ 0
11
+ H125 98 1 1108 3326 1979 CGATGT 1 ATT...........................AATGAAAAGATTGCATATAATGCTGCATCTGTTAGAAACAGAACTA WWWBBBBBBBBBBBBBBBBBBBBBBBBBBBPRVTP____^X`______^_______^_______`\`________S 0
12
+ H125 98 1 1108 3444 1988 CGATGT 1 CGAG...................T.....AGAATTGCATCGTGCTCATCGACAGCACACCGTACCGACAGTGGTAC Z]]ZBBBBBBBBBBBBBBBBBBB[BBBBBRQWFXT__^____^_`_______^_______^WZ_]^\^^^W^\V__ 0
13
+ H125 98 1 1108 3529 1974 CGATGT 1 GTA...........................GCGGACACGGACATGGGT.AA.ACAC.CTGCCTCATGGGGAAACCT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
14
+ H125 98 1 1108 3620 1995 CGATGT 1 CTCAGA.G...............A....TTCAAGAGGCTAGCCCCAGCCAAGAAGGCCCCCTCAGCCAAGGGGGCA ``___SBYBBBBBBBBBBBBBBBRBBBBXPUWVPU___YYY^^GZOTSXS_Y__YZH[]^^M]YU^``^T^VZVXW 0
15
+ H125 98 1 1108 3845 1975 CGATGT 1 CAC...........................GCTGAAAGCCTAGGGGATGGA.AAGT.GCAGGCAAGCCCCGGGGCG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
16
+ H125 98 1 1108 4403 1983 CGATGT 1 CTT....................A.....ATGGACATAAACAAACTTTATTGCACACGACTATTGTGAGGATAAAG ]ZWBBBBBBBBBBBBBBBBBBBBPBBBBBPV\VLR_____`_^`^__^__^^___^__Y_`____`_`U`\_____ 0
17
+ H125 98 1 1108 4424 1985 CGATGT 1 CCCA...................G.....GTGGTCAGGATCCACTGTGGCTGTGAGTCCCTCCCATCCTGTGGCAC ]X]YBBBBBBBBBBBBBBBBBBBRBBBBBXTW\U[`\______________^Y^\]VY^^__^^N`]_BBBBBBBB 0
18
+ H125 98 1 1108 4718 1991 CGATGT 1 TCCC.T.................T....CAACAACTGAAAACGGATGAGGCCAGACTGACTGAAGGGCCCAAGCCA ^^Z^B[BBBBBBBBBBBBBBBBBRBBBBWWXZSXU______________Y____^_^^^______^____Y__T_Y 0
19
+ H125 98 1 1108 4958 1978 CGATGT 1 GTG...........................ATTAGCCGGGCATGGTAGCGGGCGCCTGTAGTTACAGCTACTCTGG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
20
+ H125 98 1 1108 4916 1983 CGATGT 1 CCT....................G.....TCCATTTACTTCTTCCCATCCTGGGACCTGCCAGGGCAGCAATCTCT ]][BBBBBBBBBBBBBBBBBBBBRBBBBBRZWWX]_`_______`_______\_______^__\_`____`_`___ 0
21
+ H125 98 1 1108 4759 1998 CGATGT 1 CAAAAT.A.A............CC.C.ACATGGGTCTATTTTGTGCTTAAAAATAATTTAAAAATCATACAATATT ``___]B]B]BBBBBBBBBBBBY]B[B[[U\^^^Z_`___`^^^`_``__`___^^[______________\____ 0
22
+ H125 98 1 1108 5672 1976 CGATGT 1 AAA...........................AAAAGGACTTTGAAGTTTATCAAGTAGGGAAGAAAAATGTTGCTTT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
23
+ H125 98 1 1108 5522 1979 CGATGT 1 AGA...........................TCACCGATGTATCTCGTATGCCGTCTTCTGCTTGAAAAAACAAATT VTWBBBBBBBBBBBBBBBBBBBBBBBBBBBXZY[W_Y___^YY^^]ZZW^\_\_\^^`^O^`___^BBBBBBBBBB 0
24
+ H125 98 1 1108 5874 1983 CGATGT 1 CAGC...................A.....TGCGACTGTTTAGTTCTCAGCTCTTTCATCACCTCCTTGCTGGAGAT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
25
+ H125 98 1 1108 5775 1999 CGATGT 1 GCCTAG.TGCC...........GGAT.AGGATATTTGCTACTTGCAGCCTTTATCAAAATAAGAGACTAGCAAATT _^^YY[B[UXRBBBBBBBBBBBXZMRBPTPTPQWQW]VVMZ_Z]]Y]_ZU]^\\^]\\Z]^^^U_]]]]__^_^O[ 0
26
+ H125 98 1 1108 6560 1978 CGATGT 1 CCA...........................GTCCCCACACCCCATGACACCGCCCGCTCGCCCTCCCTCCATTCTC ]YXBBBBBBBBBBBBBBBBBBBBBBBBBBBXS]YW`_^^___^^__\_______^^_`______`^^^_^^_____ 0
27
+ H125 98 1 1108 6500 1978 CGATGT 1 AGA...........................TCACCGATGTATCTCGTATGCCGTATCATTAGATCGGAAGAGCACA VXUBBBBBBBBBBBBBBBBBBBBBBBBBBBJYUUW_______`___Y___]^^W]____`__YH_^]]\]____\] 0
28
+ H125 98 1 1108 6921 1974 CGATGT 1 AGT...........................AAAATATCAAGGCTCTCA.TG.GAAA.TGTAGACCCCAAATAACTG ][ZBBBBBBBBBBBBBBBBBBBBBBBBBBBX[YXY^`____`^`^^^VBVSBTY]ZB][]^^Y\\_^_____^^__ 0
29
+ H125 98 1 1108 7694 1990 CGATGT 1 TAGG.A.................A.....GCTGACAAGGATACTGATAGAAAAAGTGATTTCTTCTTATTATAAAG ][[[BQBBBBBBBBBBBBBBBBBRBBBBBXUWZWZ_^_`________`__^\]]]________^^```_Y_`____ 0
30
+ H125 98 1 1108 7856 1988 CGATGT 1 CCCA.T.................T.....CCCTGAGGCCCTCCAGAGGTTATCTGCCCATCATCTCACCATCATGG ^^^^B]BBBBBBBBBBBBBBBBBTBBBBBXVWYUZ_______^^__^__`_\`^^__`_____``_______^__` 0
31
+ H125 98 1 1108 8073 1990 CGATGT 1 GAGG.C.................C.....ACATGCTGCTGGTGATGCTGGAGGACATGAACACAGGGACAGAATCA BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
32
+ H125 98 1 1108 8108 1993 CGATGT 1 AGCA.T.................G....AAGAAGTCAGAATTTGAAACGGAAGAATGAAGAAAAGAAATAAAAATG MVRTBPBBBBBBBBBBBBBBBBBOBBBBROTZQUQ^\\\]]^^`U^ZWY^]WW\]YY^V^__^^^^^^^_]XZ]QV 0
33
+ H125 98 1 1108 8747 1974 CGATGT 1 GGA...........................TTTGTAAATAAAATGCTG.CA.GAAA.TTAGAAGGAAAATTAAATT Z]ZBBBBBBBBBBBBBBBBBBBBBBBBBBBZ]]WX_____`^^^_^X]B]QBRXZWBTZT^VY^^_______``__ 0
34
+ H125 98 1 1108 8581 1984 CGATGT 1 GTCA...................G.....GATGAGGAAGATGATCAGGAGGATGATGAAGGTGAAGAGGGAGATGA ][]UBBBBBBBBBBBBBBBBBBBYBBBBBXQRUQU__________^__Y_^X_`^_^^___Y^^^TZRSX`_`^_W 0
35
+ H125 98 1 1108 8516 1990 CGATGT 1 ATTT.G.................T.....TATCAATGCAGCCTTTTCAGCCAATGAGAGGCCTCGTCACCATCACG VWTXBPBBBBBBBBBBBBBBBBBRBBBBBRTURXW__^\YV]__^_____RPUXV^^\^^U\^\]_____TP]TXS 0
36
+ H125 98 1 1108 8787 1977 CGATGT 1 CAG...........................CTTTCTAGGGACCAGCTGCAGCTCCTTCTCTTGAAGATTGCCACCA ]ZWBBBBBBBBBBBBBBBBBBBBBBBBBBBXV]ZZ_______`____`_^`\___^_\__XZW]Z\]]^\V^TT\^ 0
37
+ H125 98 1 1108 8959 1993 CGATGT 1 GGCA.A.................A....TCTCTCTTCTGGTCTTAGCCCACTTAACCCAGTCTGAAGAGCCAAAAT WTRXBSBBBBBBBBBBBBBBBBBUBBBBXWWZWSWX___Y__\_U[^X[\[[[__^___Y_[]]____W_^Z___\ 0
38
+ H125 98 1 1108 8860 1997 CGATGT 1 AAGAAG.T.A.............C.G.TCCACCCATCATCAGCCATCTGAAACTGACAGACTTTGGTCTGTCCCGC _`___ZBSBZBBBBBBBBBBBBBYBXBY]UY^[[M^_____`________________`__^____________`` 0
39
+ H125 98 1 1108 9205 1979 CGATGT 1 GAA...........................GGTGGCGAAGGCCTACGCTGCTCTTGCTGCCCTAGAAAAGCTTTTC TSOBBBBBBBBBBBBBBBBBBBBBBBBBBBRRQXU_^^Y__\__^_M^V_^]]O]X^_\^`___^W\^W[Y[_TX^ 0
40
+ H125 98 1 1108 9076 1982 CGATGT 1 TGG....................G.....ACATGCCTTGGTTCAAGGGATGGAAAGTCACCCGTAAGGATGGCAAT V\UBBBBBBBBBBBBBBBBBBBBYBBBBBY]X[UU____^__^_______`__`_`^^___``___\_\______U 0
41
+ H125 98 1 1108 9727 1974 CGATGT 1 CAA...........................GCATCACGCCACCTGACT.CA.ACTA.ACTACAAGGCTACAGTAAC YR[BBBBBBBBBBBBBBBBBBBBBBBBBBBRIZRU__`______`^V\BWZBSMTYB]Z]XVTX]___^___O^__ 0
42
+ H125 98 1 1108 9601 1975 CGATGT 1 CAA...........................TATACCAATGACATCATTTCTAGTATATCTTAAAACATTATATTTG ]]VBBBBBBBBBBBBBBBBBBBBBBBBBBBTVW[U`____^_X________`________\`____`^_^_^`___ 0
43
+ H125 98 1 1108 9556 1976 CGATGT 1 GAT...........................CACCGATGTCTCTCCTATGCCGTCTTCTTCTTGACAAAAATATGTA BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
44
+ H125 98 1 1108 9837 1974 CGATGT 1 AAA...........................AACTTCTTCCACAGAGAG.TC.GGGG.CGGAGGGACTCTGCTGAAC BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
45
+ H125 98 1 1108 10228 1973 CGATGT 1 AGG...........................ATTCTGAAGGGAAAATGG.TC.CTTA.ATTGCAAGTAACCAAAGGA ][]BBBBBBBBBBBBBBBBBBBBBBBBBBBPY\ZX_Y____`___^^]BUZBXZXXBZ[Z^][^]__`__\V]]^^ 0
46
+ H125 98 1 1108 10001 1987 CGATGT 1 CGGC...................C.....CCAGAAAGATTATAGTTGTACCCACACCAAGTTCCCATGGTTGAAGC ]]Z]BBBBBBBBBBBBBBBBBBBTBBBBBP]QVLS^^Y^^T]U]V^XXGV^\^]^__________^_`_____`OZ 0
47
+ H125 98 1 1108 10207 1989 CGATGT 1 CTCG.A.................A.....ACTAGTATATCGCTAACACCTCATATCCTCCCTACTATGCCAAGAAG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
48
+ H125 98 1 1108 10270 1999 CGATGT 1 GATTCA.ACAC...........TTCCTAACCAGCAAGGCCCCCATACACCATCTATTCCATAAACCACTCAGGTTA _____]B^]]RBBBBBBBBBBBRYV[ZY][__Y___Y_OY_____TY^YU__\____________^____\U]]\Z 0
49
+ H125 98 1 1108 10617 1987 CGATGT 1 CCAG...................G.....TCTGGATTTTCTTTCTGCCTCTTCTATCCAGAAAGGATACTGGAGAA ]][YBBBBBBBBBBBBBBBBBBBTBBBBBR[T\U[`___^^^`^^_______`_____\__`________Y_____ 0
50
+ H125 98 1 1108 10985 1979 CGATGT 1 CCA...........................CTGGAAGTCCTAGCCAAAGCAATCAGACAAGAGAAAGAAATAAAGG TY[BBBBBBBBBBBBBBBBBBBBBBBBBBBYWWYZ_`_^___^Y_\^___`_V__^__^_^^^O^S^[[]^_X^^\ 0
51
+ H125 98 1 1108 10934 1987 CGATGT 1 CTTT...................T.....CCAAGATGATGCTTGGTGCTCTAGCCATCACTTCCTGGCCTGCAGGG ]URQBBBBBBBBBBBBBBBBBBBVBBBBBRTXYRU________V_Z]ZJ^\]Z]^______\________^Y_Y__ 0
52
+ H125 98 1 1108 11197 1979 CGATGT 1 TTG...........................AAAAGGCCTGCGACCATGTCTGCCACCCAACCATTTATTTTCTACT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
53
+ H125 98 1 1108 11485 1976 CGATGT 1 TGT...........................AGGAACGCAGACACATTCAGATGCCTTTGCAGAAAGAGATGCCAGA ][ZBBBBBBBBBBBBBBBBBBBBBBBBBBBP\YXV____^__`^_____\_______^__``^_^________^\_ 0
54
+ H125 98 1 1108 11450 1977 CGATGT 1 CCT...........................GATGCCAAACGGTGGAGCCTTGAAGATACATTTCCTCTCCTTCTGT ]VVBBBBBBBBBBBBBBBBBBBBBBBBBBBXPSNN_______^_VZZ]]]_`__^\_`___`______^___`W_^ 0
55
+ H125 98 1 1108 11381 1997 CGATGT 1 ATCCAT.C.T............GC.T.CATCCACCCATCCAGCCATGCACCTGCCCATCCATCAATTCACTGATCC _`__`ZB]B]BBBBBBBBBBBBXRBZBZ[W_`_[^^^^_`___`_______^_\`_______`__`_______^`_ 0
56
+ H125 98 1 1108 11507 1973 CGATGT 1 GGA...........................AGGAGACTAGCACAGGGA.TT.TCCT.ATCCTCACCATCACACCTG W[[BBBBBBBBBBBBBBBBBBBBBBBBBBBRXXZQ`^___Y____^X\B\QBPZXOBTVW\X^\^_`VY_Y]^XOY 0
57
+ H125 98 1 1108 12016 1998 CGATGT 1 CCACTC.G.AG...........CC.G.CTGTGGTGATGCCTCAGTTTGGACAGAAACTCAAACCTGACGCACAAAC __``_[B]BRXBBBBBBBBBBBQZB]BURXYY^^^_`_______`___________`____`__\_`_\_^_\__^ 0
58
+ H125 98 1 1108 12414 1984 CGATGT 1 GTTT...................T.....ACTTATAAGGCTGCAGTCTAGGAGGCAGAGCAATGAATCATTACAAT ]VWZBBBBBBBBBBBBBBBBBBB[BBBBBUVRZOX_________`_____`_______\_Y______`______`_ 0
59
+ H125 98 1 1108 12441 1993 CGATGT 1 CAGC.G.T...............T....TAGAATCAGGTCTATTGCACGTCTGGCCAATATTGATGAAGAAATGCT ]U[YBTBYBBBBBBBBBBBBBBBTBBBBPLTSUZP_^___\Y^^^______`_`_`_U________`___U^^___ 0
60
+ H125 98 1 1108 12850 1984 CGATGT 1 CGGG...................T.....TCCCAAGTAGCTGGAATTACAAGCGTGTGCCACCAAGCCTGGCTAAT ]ZZ]BBBBBBBBBBBBBBBBBBBRBBBBBQWPNPQ__`_^____YYZY^TW^T\T]Z\]Z___________^^_^V 0
61
+ H125 98 1 1108 12905 1989 CGATGT 1 GGCC.C.................T.....CGCGGCCGGCGCGCACCTTCGTGTGGTACTGAAACGAGGGGCAGCCC BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
62
+ H125 98 1 1108 12929 1991 CGATGT 1 AGAT.G.................A....AGTCACCGATGTATCTCGTATGCCGTATCATTAGATCGGAAGAGCACA YZ[YBSBBBBBBBBBBBBBBBBBXBBBBPZSW[XW_^``_______^__`______^_V_VR\\[\__^]V_^__` 0
63
+ H125 98 1 1108 13719 1980 CGATGT 1 GCG....................T......GGGGACCTTCGGCCGAGTTGTACAATGTGTTGACCATCGCAGGGGT ]]]BBBBBBBBBBBBBBBBBBBBYBBBBBBX[T[U_`__________________`_`___\__``Y___VY_\^V 0
64
+ H125 98 1 1108 13548 1995 CGATGT 1 CCGGCC.C...............T.G..ATAAAGTCTTGCTTTGTTACCCAGATTGGCCTGAAACTCCTGAGCTCA ___Y`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
65
+ H125 98 1 1108 13959 1988 CGATGT 1 TCAA.G.................T.....TCATTCACGAAATAAATACAGTGATCATTCACAGTGTAGTAGATCAT ]XWSBVBBBBBBBBBBBBBBBBBXBBBBBT[TT[VY_^_____^V`__`____\\\_____\_`_^`_^___^__` 0
66
+ H125 98 1 1108 14218 1997 CGATGT 1 TGGGCA.G.A............CA.A.TTGCTATCTGTATACACGTTTTTTCTTTTTTCCTTTCCCAGTTCTAAGG _____ZB]BZBBBBBBBBBBBBXYBZBZ[Z_`^`^_`__\____`_____``^`^`^^`___`_______`__Y_] 0
67
+ H125 98 1 1108 14318 1979 CGATGT 1 GAC...........................AAAGAAAAAAAAGACAAAAGAAGAAAAGGAGCTACCATGCAGTCCA ]XWBBBBBBBBBBBBBBBBBBBBBBBBBBBWVTQR^^^^S^^^^^^__^XPX[V^^^X`_O____^`__YRHT[S_ 0
68
+ H125 98 1 1108 14432 1988 CGATGT 1 AGCA.T.................C.....CTTCTAAGTTATATAAACACTTTTAATGCAAACATTCAATAGGGCAT ^^^^B]BBBBBBBBBBBBBBBBBWBBBBBX[X[ZZ__`__`__`_______`__________________`_____ 0
69
+ H125 98 1 1108 14609 1991 CGATGT 1 AGAA.A.................A....GTGGGATATATTAAAAGCACCTTGGGAGGCTGAGGCGGGCAGATCACG ][[YBXBBBBBBBBBBBBBBBBBRBBBBXXZZYYZ_^______`^__`________^^____^___^^`^_\`___ 0
70
+ H125 98 1 1108 14813 1974 CGATGT 1 AGA...........................TCACCGATGTATCTCGTA.GC.GTCT.CTGCTTGAAAAAACATCCA BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
71
+ H125 98 1 1108 15180 1989 CGATGT 1 ATTT.G.................C.....TCAATGACACTTTTGTCCATGTCACTGATCTTTCTGGCAAGGAAACC ]Z[[BZBBBBBBBBBBBBBBBBBYBBBBBPTTTZT^_____`__^^___^_^\\____^___`__^\_V\V_\^]Y 0
72
+ H125 98 1 1108 15469 1989 CGATGT 1 ACTG.A.................G.....GCGTCTTCAGAGGGGGGTAGCATGACCTCAAGCCTTCTATAAAAGCC BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
73
+ H125 98 1 1108 15599 1981 CGATGT 1 GCT....................A.....CACTAGGTCTCCTCTGGCACCCATGTGTGGGAATGTGAGTGCCTTGT ]YYBBBBBBBBBBBBBBBBBBBBVBBBBBXZUZWV_________________`______^V________`__^`__ 0
74
+ H125 98 1 1108 15785 1987 CGATGT 1 CGCA...................G.....GAGTAGCAGCTCCCGCTGGTGTGCGGCTAGGGGTAGGATCCCATATT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
75
+ H125 98 1 1108 16225 1978 CGATGT 1 CTG...........................GCGGAGTCTCTTCCCTTGCGTGCATAGGTCCCGGTTGGTAGAGGGT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
76
+ H125 98 1 1108 16515 1977 CGATGT 1 CAG...........................TCAATACGGGGAAAAATAAAGTGAGGCGAGTGAAGACCATTTATGC BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
77
+ H125 98 1 1108 16914 1985 CGATGT 1 AGAT...................A.....GTCACCGATGTATCTCGTATGCCGTCTTCTGCTTGAAAAAAACAACA ]]]]BBBBBBBBBBBBBBBBBBBZBBBBBTRX\[Z__`__^UU^^___Y__`__`_`_UI^`BBBBBBBBBBBBBB 0
78
+ H125 98 1 1108 17210 1978 CGATGT 1 CCC...........................TGTACCTTCCTGAGCTGAGACATGAGCCACACCTGGGTCCATGTGA ]]]BBBBBBBBBBBBBBBBBBBBBBBBBBBXQMNQ__^_____T__Y______^___YI^WW^^Z_PZZ___^_\] 0
79
+ H125 98 1 1108 17560 1987 CGATGT 1 CACG.G.................C.....CGCGCCCACCCCCCCCCGCCGGGCCCGCACGGGGAGTCACCACCTTG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
80
+ H125 98 1 1108 17716 1987 CGATGT 1 TGGG.G.................T.....GGCTGAGACCGGAGAATCCCTTGAACCCAGGAGGCAGAGGCTGCAGT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
81
+ H125 98 1 1108 17756 1977 CGATGT 1 CTG...........................TCGTGCCATTGCACTCCATCTTGGGTGACAGAGTGAGACATCATCT Z]ZBBBBBBBBBBBBBBBBBBBBBBBBBBBNQQKZ____Y__^_^________^_______^`__`\__^^_Y__^ 0
82
+ H125 98 1 1108 17851 1988 CGATGT 1 CAGA.C.................T.....TAAAAGCCAAAATGGGAAAGGAAAAGACTCATATCAACATTGTCGTC ][][BZBBBBBBBBBBBBBBBBBYBBBBBRZPZNZ__\__^`__^___`_\_\^^^^__\_`^_Y__________\ 0
83
+ H125 98 1 1108 18055 1974 CGATGT 1 GTT...........................CTATAGGAGTGGATGATG.AG.TATT.GTCATTGCATGATAGGATA BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
84
+ H125 98 1 1108 18193 1990 CGATGT 1 CTTA.C.................A....AGGGAGCCTCTGGGGTTAGGATCAGTAAGGTTTGCTGTCGTTTTTGGA ]Z]ZBQBBBBBBBBBBBBBBBBBRBBBBRQWWU[W__Y___________Y_\^_\_____U`__^\^_]\O^^^^V 0
85
+ H125 98 1 1108 18377 1977 CGATGT 1 GGC...........................CAAATATAAGGAAATGGCCCAATGAACGTGGTTGTGGGAGGGGAAA WZWBBBBBBBBBBBBBBBBBBBBBBBBBBBRUVLU_`__`__________________\_`_`^___\`_^^^^BB 0
86
+ H125 98 1 1108 18485 1979 CGATGT 1 ATA...........................CGAGATGAGCTCAGATCTACCTTTCCTCTTCATGGACCAGACTGGA VQQBBBBBBBBBBBBBBBBBBBBBBBBBBBRQVWU___`_V`^VXYL^^Z[RW^WI^\^U]]]Y]________`QV 0
87
+ H125 98 1 1108 18436 1988 CGATGT 1 CTTA.A.................A.....CTGTTTGGCTTCAACTCCTACTTTTTTCAGCACGATTCCTTTTGCAT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
88
+ H125 98 1 1108 18330 1992 CGATGT 1 CTTT.A.................T....GTAAAAGGAGAAAAAGATGCGGGGGAAATGGAGCAGGGGGTGGGGAAA ][U[BWBBBBBBBBBBBBBBBBBRBBBBSOSWJWW_V_Y_^ZXZZSNZ^O_`^^__Y__NY^^\YZ]^VVX\YWXV 0
89
+ H125 98 1 1108 19283 1973 CGATGT 1 CTT...........................CACAGCCTGTTTAATCTG.TG.TTGT.GGCTTTAACATCCACAATG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
90
+ H125 98 1 1108 19389 1994 CGATGT 1 TGCT.A.C...............A....TGAGACCAGTTCACTAGAAGAAACAAACATTTCTGCCATGCAGACCAA ^^^^B]B]BBBBBBBBBBBBBBBVBBBBT[W]ZZX_`______`^___`_^^_^^^_`__________^_\_____ 0
91
+ H125 98 1 1108 19966 1977 CGATGT 1 CCT...........................GGAAAACATGTCTTCAGAAATAAATGCTTCCTCTTCCAGGTAGTTC ]RYBBBBBBBBBBBBBBBBBBBBBBBBBBBRUMVS__^^________^__\]]`]___^`^_`____^__Z^\Z^` 0
92
+ H125 98 1 1108 20041 1984 CGATGT 1 CTTT...................G.....GAGGTGCAGGGGGCGCAGGGGGCGGCGTCACCGAGGAGCAGGCGGGC BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
93
+ H125 98 1 1108 20230 2000 CGATGT 1 GTGAAG.ATATA....G.....TATTCTCCCCCAGTTTGGCAGAGGCTGGCCCTCTTTTGACTGCCCCTGCTTTCT _____\B\^VOQBBBBXBBBBBHOUUMZR[]]XX]___Y^__Y\X^BBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
94
+ H125 98 1 1108 20276 1979 CGATGT 1 TCT....................G......GGCGCACATAGACTACTCCAGGAGTAAGTTGTTCTTGTTTTTTTCG VYPBBBBBBBBBBBBBBBBBBBBTBBBBBBYWSTRX\^]]____Y]W]TKUYOSLS[ZXXUV^W^]XT^]^`^^^_ 0
95
+ H125 98 1 1108 20743 1977 CGATGT 1 CTG...........................TTGACAGAGCCAACTCCCTTGAGGGTGGCAGGAAGCACACCCCCCA BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
96
+ H125 98 1 1108 20532 1980 CGATGT 1 CTA....................A.....TTTTGGTAGAGATGGGGATTTTGCCATGTTGCCCAGGCTGGTCTCCT ]VRBBBBBBBBBBBBBBBBBBBBRBBBBBP]SRR]_______`______^________`\___X___`_^SRP\\^ 0
97
+ H125 98 1 1108 20881 1975 CGATGT 1 CTC...........................ATCAATCATATACAGAATATCTACAAAAAACCTACATCATACTTAA ]X]BBBBBBBBBBBBBBBBBBBBBBBBBBBXVSUN__U__`____^X^V\^]\_]_`^XX^`\_`_`___^_Y`^_ 0
98
+ H125 98 1 1108 21009 2000 CGATGT 1 GTGGCG.CGGGT....G.....GGGTCGAAGTCAAGGCCCTGGCTTGACGCTGGTGCCACCGCGAACGCTCGCACA _BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0
99
+ H125 98 1 1108 1188 2036 CGATGT 1 CTTGTATGCAGCATCCCCTTCTTGCCTAGGGACTTGAAGGGCCAGGCTTCCTGTCATTGCCTCACTCAAATGTAGC gggggggggggggegggggffggeggegggeagge^ggdbcgggcdgedegfggffff^ffffefdeeZefccceg 1
100
+ H125 98 1 1108 1147 2046 CGATGT 1 CGGGAAATGGCGGAAATGTGCAAGGATGTTATATGAATTGTGTTGGTTGGCCTAAAACACAGAGCCGGCTTGAAGT [XSZGXPSKPJPP]TWQRFRHXW\WXXX]PUX[W_^R^[XRNRYV]^Y[]UUNRT]X_BBBBBBBBBBBBBBBBBB 0
@@ -0,0 +1,40 @@
1
+ #
2
+ # tophat_spec.rb - RSpec Test
3
+ #
4
+ # Copyright:: Copyright (C) 2011
5
+ # Raoul Bonnal <r@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+ #
9
+
10
+
11
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
12
+
13
+ describe "When quality is read" do
14
+ it "should give me back the quality scores of first read in Illumina 1.5+ encoding" do
15
+ read = Bio::FlatFile.auto(File.dirname(__FILE__) + "/fixture/test.fastq").first
16
+ read.format = :fastq_illumina
17
+ qual = read.quality_scores
18
+ qual.should == [39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 37, 39, 39, 39, 39, 39, 38, 38, 39, 39, 37, 39, 39, 37, 39, 39, 39, 37, 33, 39, 39, 37, 30, 39, 39, 36, 34, 35, 39, 39, 39, 35, 36, 39, 37, 36, 37, 39, 38, 39, 39, 38, 38, 38, 38, 30, 38, 38, 38, 38, 37, 38, 36, 37, 37, 26, 37, 38, 35, 35, 35, 37, 39]
19
+ end
20
+
21
+ it "should give me back the quality scores of last read, with Bs in Illumina 1.5+ encoding" do
22
+ quals = []
23
+ Bio::FlatFile.auto(File.dirname(__FILE__) + "/fixture/test.fastq").each do |read|
24
+ read.format = :fastq_illumina
25
+ quals = read.quality_scores
26
+ end
27
+ quals.should == [27, 24, 19, 26, 7, 24, 16, 19, 11, 16, 10, 16, 16, 29, 20, 23, 17, 18, 6, 18, 8, 24, 23, 28, 23, 24, 24, 24, 29, 16, 21, 24, 27, 23, 31, 30, 18, 30, 27, 24, 18, 14, 18, 25, 22, 29, 30, 25, 27, 29, 21, 21, 14, 18, 20, 29, 24, 31, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
28
+ end
29
+
30
+ it "should tell me how many B are in the sequence with Illumina 1.5+ encoding" do
31
+ reads = Bio::Ngs::FastQuality.new(File.dirname(__FILE__) + "/fixture/test.fastq")
32
+ reads.quality_profile.should == nil
33
+ end
34
+
35
+ it "should return the comulative count of Bs in all the sequences" do
36
+ reads = Bio::Ngs::FastQuality.new(File.dirname(__FILE__) + "/fixture/test.fastq", :fastq_illumina)
37
+ # reads = Bio::Ngs::FastQuality.new("/Users/bonnalraoul/Desktop/s_1_1_1108_qseq.fastq", :fastq_illumina)
38
+ reads.track_b_count.b_profile.should == [[58, 1], [59, 1], [60, 1], [61, 1], [62, 1], [63, 1], [64, 1], [65, 1], [66, 1], [67, 1], [68, 1], [69, 1], [70, 1], [71, 1], [72, 1], [73, 1], [74, 1], [75, 1]]
39
+ end
40
+ end
@@ -0,0 +1,98 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ require 'thor/base'
3
+
4
+ describe "SffExtract" do
5
+
6
+ describe "class" do
7
+ it "should have default options" do
8
+ Bio::Ngs::SffExtract.options.should == {"append" => {:type => :boolean, :aliases => "-a", :desc =>"append output to existing files"},
9
+ "xml_info" => {:type => :string, :aliases => "-i", :desc => "extra info to write in the xml file"},
10
+ "linker_file" => {:type => :string, :aliases => "-l", :desc => "FASTA file with paired-end linker sequences"},
11
+ "clip" => {:type => :boolean, :aliases => "-c", :desc => "clip (completely remove) ends with low qual and/or adaptor sequence"},
12
+ "upper_case" => {:type => :boolean, :aliases => "-u", :desc => "all bases in upper case, including clipped ends"},
13
+ "min_left_clip" => {:type => :numeric, :desc => "if the left clip coming from the SFF is smaller than this value, override it"},
14
+ "fastq" => {:type => :boolean, :aliases => "-Q", :desc => "store as FASTQ file instead of FASTA + FASTA quality file"},
15
+ "out_basename" => {:type => :string, :aliases => "-o", :desc => "base name for all output files"},
16
+ "seq_file" => {:type => :string, :aliases => "-s", :desc => "output sequence file name"},
17
+ "qual_file" => {:type => :string, :aliases => "-q", :desc => "output quality file name"},
18
+ "xml_file" => {:type => :string, :aliases => "-x", :desc => "output ancillary xml file name"}
19
+ }
20
+ end
21
+ it "should have a default program name" do
22
+ Bio::Ngs::SffExtract.program.should == Bio::Ngs::Utils.binary("sff_extract")
23
+ end
24
+
25
+ end
26
+
27
+ describe "instance" do
28
+ it "has default options" do
29
+ Bio::Ngs::SffExtract.new.options.should == {"append" => {:type => :boolean, :aliases => "-a", :desc =>"append output to existing files"},
30
+ "xml_info" => {:type => :string, :aliases => "-i", :desc => "extra info to write in the xml file"},
31
+ "linker_file" => {:type => :string, :aliases => "-l", :desc => "FASTA file with paired-end linker sequences"},
32
+ "clip" => {:type => :boolean, :aliases => "-c", :desc => "clip (completely remove) ends with low qual and/or adaptor sequence"},
33
+ "upper_case" => {:type => :boolean, :aliases => "-u", :desc => "all bases in upper case, including clipped ends"},
34
+ "min_left_clip" => {:type => :numeric, :desc => "if the left clip coming from the SFF is smaller than this value, override it"},
35
+ "fastq" => {:type => :boolean, :aliases => "-Q", :desc => "store as FASTQ file instead of FASTA + FASTA quality file"},
36
+ "out_basename" => {:type => :string, :aliases => "-o", :desc => "base name for all output files"},
37
+ "seq_file" => {:type => :string, :aliases => "-s", :desc => "output sequence file name"},
38
+ "qual_file" => {:type => :string, :aliases => "-q", :desc => "output quality file name"},
39
+ "xml_file" => {:type => :string, :aliases => "-x", :desc => "output ancillary xml file name"}
40
+ }
41
+ end
42
+
43
+ it "has custom name" do
44
+ Bio::Ngs::SffExtract.new("/usr/local/bin/sff_extract").program.should == "/usr/local/bin/sff_extract"
45
+ end
46
+
47
+ it "overwrites specifc option" do
48
+ tophat = Bio::Ngs::SffExtract.new
49
+ tophat.options={:reads=>{:type=>:numeric}}
50
+ tophat.options[:reads][:type].should == :numeric
51
+ end
52
+
53
+ it "add custom option" do
54
+ tophat = Bio::Ngs::SffExtract.new
55
+ tophat.options={:parameter_xxx=>{:type=>:numeric}}
56
+ tophat.options.should == {"append" => {:type => :boolean, :aliases => "-a", :desc =>"append output to existing files"},
57
+ "xml_info" => {:type => :string, :aliases => "-i", :desc => "extra info to write in the xml file"},
58
+ "linker_file" => {:type => :string, :aliases => "-l", :desc => "FASTA file with paired-end linker sequences"},
59
+ "clip" => {:type => :boolean, :aliases => "-c", :desc => "clip (completely remove) ends with low qual and/or adaptor sequence"},
60
+ "upper_case" => {:type => :boolean, :aliases => "-u", :desc => "all bases in upper case, including clipped ends"},
61
+ "min_left_clip" => {:type => :numeric, :desc => "if the left clip coming from the SFF is smaller than this value, override it"},
62
+ "fastq" => {:type => :boolean, :aliases => "-Q", :desc => "store as FASTQ file instead of FASTA + FASTA quality file"},
63
+ "out_basename" => {:type => :string, :aliases => "-o", :desc => "base name for all output files"},
64
+ "seq_file" => {:type => :string, :aliases => "-s", :desc => "output sequence file name"},
65
+ "qual_file" => {:type => :string, :aliases => "-q", :desc => "output quality file name"},
66
+ "xml_file" => {:type => :string, :aliases => "-x", :desc => "output ancillary xml file name"},
67
+ :parameter_xxx=>{:type=>:numeric}
68
+ }
69
+ end
70
+
71
+ it "set a default option to be returned as params" do
72
+ tophat = Bio::Ngs::SffExtract.new
73
+ tophat.options={:parameter_xxx=>{:type=>:numeric, :default=>10}}
74
+ tophat.params.should == {:parameter_xxx=>{:type=>:numeric, :default=>10}}
75
+ end
76
+
77
+ it "get normalized options" do
78
+ tophat = Bio::Ngs::SffExtract.new
79
+ tophat.options={:parameter_xxx=>{:type=>:numeric, :default=>10}}
80
+ tophat.normalize_params.should == "--parameter_xxx=10"
81
+ end
82
+
83
+ it "does not save a valid parameter/option" do
84
+ tophat = Bio::Ngs::SffExtract.new
85
+ tophat.params={:fake_parameter=>1234567890}
86
+ tophat.normalize_params.should == []
87
+ end
88
+
89
+ it "set a default option and get the parameters for the binary program" do
90
+ tophat = Bio::Ngs::SffExtract.new
91
+ tophat.options={:parameter_xxx=>{:type=>:numeric, :default=>10}}
92
+ tophat.params={:fake_parameter=>01}
93
+ tophat.normalize_params.should == "--parameter_xxx=10"
94
+ end
95
+
96
+ end
97
+
98
+ end
@@ -0,0 +1,55 @@
1
+ $TESTING=true
2
+
3
+ # require 'simplecov'
4
+ # SimpleCov.start do
5
+ # add_group 'Libraries', 'lib'
6
+ # add_group 'Specs', 'spec'
7
+ # end
8
+
9
+ require 'thor'
10
+ require 'thor/base'
11
+ require 'stringio'
12
+ require 'rdoc'
13
+ require 'rspec'
14
+ require 'diff/lcs' # You need diff/lcs installed to run specs (but not to run Thor).
15
+ #require 'fakeweb' # You need fakeweb installed to run specs (but not to run Thor).
16
+
17
+ $:.unshift(File.join(File.dirname(__FILE__), "..", "lib"))
18
+
19
+ require 'bio-ngs'
20
+
21
+ # Set shell to basic
22
+ $0 = "thor"
23
+ $thor_runner = true
24
+ ARGV.clear
25
+ Thor::Base.shell = Thor::Shell::Basic
26
+
27
+ # Load fixtures
28
+ %w(bwa history project quality rna sff_extract).each do |task|
29
+ load File.join(File.dirname(__FILE__), "..", "lib", "tasks", "#{task}.thor" )
30
+ end
31
+
32
+ RSpec.configure do |config|
33
+ def capture(stream)
34
+ begin
35
+ stream = stream.to_s
36
+ eval "$#{stream} = StringIO.new"
37
+ yield
38
+ result = eval("$#{stream}").string
39
+ ensure
40
+ eval("$#{stream} = #{stream.upcase}")
41
+ end
42
+
43
+ result
44
+ end
45
+
46
+ def source_root
47
+ File.join(File.dirname(__FILE__), "..", "lib", "tasks")
48
+ end
49
+
50
+ def destination_root
51
+ File.join(File.dirname(__FILE__), 'sandbox')
52
+ end
53
+
54
+ alias :silence :capture
55
+ end
@@ -0,0 +1,99 @@
1
+ #
2
+ # tophat_spec.rb - RSpec Test
3
+ #
4
+ # Copyright:: Copyright (C) 2011
5
+ # Raoul Bonnal <r@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+ #
9
+
10
+
11
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
12
+
13
+ describe "Tophat" do
14
+ describe "class" do
15
+ it "default options" do
16
+ Bio::Ngs::Tophat.options.should == {"output-dir"=>{:type=>:string, :aliases=>"-o"}, "min-anchor"=>{:type=>:numeric, :aliases=>"-a"}, "splice-mismatches"=>{:type=>:numeric, :aliases=>"-m"}, "min-intron"=>{:type=>:numeric, :aliases=>"-i"}, "max-intront"=>{:type=>:numeric, :aliases=>"-I"}, "max-multihits"=>{:type=>:numeric, :aliases=>"-g"}, "min-isoform_fraction"=>{:type=>:numeric, :aliases=>"-F"}, "solexa-quals"=>{:type=>:boolean}, "solexa1.3-quals"=>{:type=>:boolean, :aliases=>"--phred64-quals"}, :quals=>{:type=>:boolean, :aliases=>"-Q"}, "integer-quals"=>{:type=>:boolean}, :color=>{:type=>:boolean, :aliases=>"-C"}, "library-type"=>{:type=>:string}, "num-threads"=>{:type=>:numeric, :aliases=>"-p"}, "GTF"=>{:type=>:string, :aliases=>"-G"}, "raw-juncs"=>{:type=>:string, :aliases=>"-j"}, :insertions=>{:type=>:string}, :deletions=>{:type=>:string}, "mate-inner-dist"=>{:type=>:numeric, :aliases=>"-r"}, "mate-std-dev"=>{:type=>:numeric}, "no-novel-juncs"=>{:type=>:boolean}, "no-gtf-juncs"=>{:type=>:boolean}, "no-coverage-search"=>{:type=>:boolean}, "coverage-search"=>{:type=>:boolean}, "no-closure-search"=>{:type=>:boolean}, "closure-search"=>{:type=>:boolean}, "fill-gaps"=>{:type=>:boolean}, "microexon-search"=>{:type=>:boolean}, "butterfly-search"=>{:type=>:boolean}, "no-butterfly-search"=>{:type=>:boolean}, "keep-tmp"=>{:type=>:boolean}, "tmp-dir"=>{:type=>:string}, "segment-mismatches"=>{:type=>:numeric}, "segment-length"=>{:type=>:numeric}, "min-closure-exon"=>{:type=>:numeric}, "min-closure-intron"=>{:type=>:numeric}, "max-closure-intron"=>{:type=>:numeric}, "min-coverage-intron"=>{:type=>:numeric}, "max-coverage-intron"=>{:type=>:numeric}, "min-segment-intron"=>{:type=>:numeric}, "max-segment-intron"=>{:type=>:numeric}, "rg-id"=>{:type=>:string}, "rg-sample"=>{:type=>:string}, "rg-library"=>{:type=>:string}, "rg-description"=>{:type=>:string}, "rg-platform-unit"=>{:type=>:string}, "rg-center"=>{:type=>:string}, "rg-date"=>{:type=>:string}, "rg-platform"=>{:type=>:string}}
17
+ end
18
+
19
+ it "has default program name" do
20
+ Bio::Ngs::Tophat.program.should == Bio::Ngs::Utils.binary("tophat/tophat")
21
+ end
22
+ end
23
+
24
+ describe "instance" do
25
+ it "has default options" do
26
+ Bio::Ngs::Tophat.new.options.should == {"output-dir"=>{:type=>:string, :aliases=>"-o"}, "min-anchor"=>{:type=>:numeric, :aliases=>"-a"}, "splice-mismatches"=>{:type=>:numeric, :aliases=>"-m"}, "min-intron"=>{:type=>:numeric, :aliases=>"-i"}, "max-intront"=>{:type=>:numeric, :aliases=>"-I"}, "max-multihits"=>{:type=>:numeric, :aliases=>"-g"}, "min-isoform_fraction"=>{:type=>:numeric, :aliases=>"-F"}, "solexa-quals"=>{:type=>:boolean}, "solexa1.3-quals"=>{:type=>:boolean, :aliases=>"--phred64-quals"}, :quals=>{:type=>:boolean, :aliases=>"-Q"}, "integer-quals"=>{:type=>:boolean}, :color=>{:type=>:boolean, :aliases=>"-C"}, "library-type"=>{:type=>:string}, "num-threads"=>{:type=>:numeric, :aliases=>"-p"}, "GTF"=>{:type=>:string, :aliases=>"-G"}, "raw-juncs"=>{:type=>:string, :aliases=>"-j"}, :insertions=>{:type=>:string}, :deletions=>{:type=>:string}, "mate-inner-dist"=>{:type=>:numeric, :aliases=>"-r"}, "mate-std-dev"=>{:type=>:numeric}, "no-novel-juncs"=>{:type=>:boolean}, "no-gtf-juncs"=>{:type=>:boolean}, "no-coverage-search"=>{:type=>:boolean}, "coverage-search"=>{:type=>:boolean}, "no-closure-search"=>{:type=>:boolean}, "closure-search"=>{:type=>:boolean}, "fill-gaps"=>{:type=>:boolean}, "microexon-search"=>{:type=>:boolean}, "butterfly-search"=>{:type=>:boolean}, "no-butterfly-search"=>{:type=>:boolean}, "keep-tmp"=>{:type=>:boolean}, "tmp-dir"=>{:type=>:string}, "segment-mismatches"=>{:type=>:numeric}, "segment-length"=>{:type=>:numeric}, "min-closure-exon"=>{:type=>:numeric}, "min-closure-intron"=>{:type=>:numeric}, "max-closure-intron"=>{:type=>:numeric}, "min-coverage-intron"=>{:type=>:numeric}, "max-coverage-intron"=>{:type=>:numeric}, "min-segment-intron"=>{:type=>:numeric}, "max-segment-intron"=>{:type=>:numeric}, "rg-id"=>{:type=>:string}, "rg-sample"=>{:type=>:string}, "rg-library"=>{:type=>:string}, "rg-description"=>{:type=>:string}, "rg-platform-unit"=>{:type=>:string}, "rg-center"=>{:type=>:string}, "rg-date"=>{:type=>:string}, "rg-platform"=>{:type=>:string}}
27
+ end
28
+
29
+ it "has custom name" do
30
+ Bio::Ngs::Tophat.new("/usr/local/bin/tophat").program.should == "/usr/local/bin/tophat"
31
+ end
32
+
33
+ it "overwrites specifc option" do
34
+ tophat = Bio::Ngs::Tophat.new
35
+ tophat.options={:reads=>{:type=>:numeric}}
36
+ tophat.options[:reads][:type].should == :numeric
37
+ end
38
+
39
+ it "add custom option" do
40
+ tophat = Bio::Ngs::Tophat.new
41
+ tophat.options={:parameter_xxx=>{:type=>:numeric}}
42
+ tophat.options.should == {"output-dir"=>{:type=>:string, :aliases=>"-o"}, "min-anchor"=>{:type=>:numeric, :aliases=>"-a"}, "splice-mismatches"=>{:type=>:numeric, :aliases=>"-m"}, "min-intron"=>{:type=>:numeric, :aliases=>"-i"}, "max-intront"=>{:type=>:numeric, :aliases=>"-I"}, "max-multihits"=>{:type=>:numeric, :aliases=>"-g"}, "min-isoform_fraction"=>{:type=>:numeric, :aliases=>"-F"}, "solexa-quals"=>{:type=>:boolean}, "solexa1.3-quals"=>{:type=>:boolean, :aliases=>"--phred64-quals"}, :quals=>{:type=>:boolean, :aliases=>"-Q"}, "integer-quals"=>{:type=>:boolean}, :color=>{:type=>:boolean, :aliases=>"-C"}, "library-type"=>{:type=>:string}, "num-threads"=>{:type=>:numeric, :aliases=>"-p"}, "GTF"=>{:type=>:string, :aliases=>"-G"}, "raw-juncs"=>{:type=>:string, :aliases=>"-j"}, :insertions=>{:type=>:string}, :deletions=>{:type=>:string}, "mate-inner-dist"=>{:type=>:numeric, :aliases=>"-r"}, "mate-std-dev"=>{:type=>:numeric}, "no-novel-juncs"=>{:type=>:boolean}, "no-gtf-juncs"=>{:type=>:boolean}, "no-coverage-search"=>{:type=>:boolean}, "coverage-search"=>{:type=>:boolean}, "no-closure-search"=>{:type=>:boolean}, "closure-search"=>{:type=>:boolean}, "fill-gaps"=>{:type=>:boolean}, "microexon-search"=>{:type=>:boolean}, "butterfly-search"=>{:type=>:boolean}, "no-butterfly-search"=>{:type=>:boolean}, "keep-tmp"=>{:type=>:boolean}, "tmp-dir"=>{:type=>:string}, "segment-mismatches"=>{:type=>:numeric}, "segment-length"=>{:type=>:numeric}, "min-closure-exon"=>{:type=>:numeric}, "min-closure-intron"=>{:type=>:numeric}, "max-closure-intron"=>{:type=>:numeric}, "min-coverage-intron"=>{:type=>:numeric}, "max-coverage-intron"=>{:type=>:numeric}, "min-segment-intron"=>{:type=>:numeric}, "max-segment-intron"=>{:type=>:numeric}, "rg-id"=>{:type=>:string}, "rg-sample"=>{:type=>:string}, "rg-library"=>{:type=>:string}, "rg-description"=>{:type=>:string}, "rg-platform-unit"=>{:type=>:string}, "rg-center"=>{:type=>:string}, "rg-date"=>{:type=>:string}, "rg-platform"=>{:type=>:string}, :parameter_xxx=>{:type=>:numeric}}
43
+ end
44
+
45
+ it "set a default option to be returned as params" do
46
+ tophat = Bio::Ngs::Tophat.new
47
+ #setting a default options
48
+ #TODO: add check between type and default value, in the main class,
49
+ # Thor already does it.
50
+ tophat.options={:parameter_xxx=>{:type=>:numeric, :default=>10}}
51
+ tophat.params.should == {:parameter_xxx=>{:type=>:numeric, :default=>10}}
52
+ end
53
+
54
+ it "get normalized options" do
55
+ tophat = Bio::Ngs::Tophat.new
56
+ #setting a default options
57
+ #TODO: add check between type and default value, in the main class,
58
+ # Thor already does it.
59
+ tophat.options={:parameter_xxx=>{:type=>:numeric, :default=>10}}
60
+ tophat.normalize_params.should == "--parameter_xxx=10"
61
+ end
62
+
63
+ it "does not save a valid parameter/option" do
64
+ tophat = Bio::Ngs::Tophat.new
65
+ tophat.params={:fake_parameter=>1234567890}
66
+ tophat.normalize_params.should == []
67
+ end
68
+
69
+ it "set a default option and get the parameters for the binary program" do
70
+ tophat = Bio::Ngs::Tophat.new
71
+ #setting a default options
72
+ #TODO: add check between type and default value, in the main class,
73
+ # Thor already does it.
74
+ tophat.options={:parameter_xxx=>{:type=>:numeric, :default=>10}}
75
+ tophat.params={:fake_parameter=>01}
76
+ tophat.normalize_params.should == "--parameter_xxx=10"
77
+ end
78
+ end
79
+ end
80
+
81
+
82
+ # describe Tophat do
83
+ # describe "Tophat" do
84
+ # it "the program is " do
85
+ # Bio::Ngs::Tophat.new.program.should == Bio::Ngs::Utils.binary("tophat/tophat")
86
+ # end
87
+ #
88
+ # it "returns the default parameters" do
89
+ # Bio::Ngs::Tophat.new.main.should == ""
90
+ # end
91
+ #
92
+ # it "all the parameters of this application" do
93
+ # first_option = Bio::Ngs::Tophat.tasks["main"].options.first #is an array of name, Thor::Option
94
+ # thor_option = first_option.last
95
+ # thor_option.name.should == "reads"
96
+ # end
97
+ #
98
+ # end
99
+ # end
@@ -0,0 +1,22 @@
1
+ #
2
+ # converter_qseq_spec.rb - RSpec Test
3
+ #
4
+ # Copyright:: Copyright (C) 2011
5
+ # Raoul Bonnal <r@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+ #
9
+
10
+
11
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
12
+
13
+ describe "Utils" do
14
+ it "tags the regular file name with the new tag and extension" do
15
+ Bio::Ngs::Utils.tag_filename("test_file_name.txt", "report", "csv").should == "test_file_name_report.csv"
16
+ end
17
+
18
+ it "tags the strange file name with the new tag and extension" do
19
+ Bio::Ngs::Utils.tag_filename("test_file_name", "report", "csv").should == "test_file_name_report.csv"
20
+ end
21
+
22
+ end
@@ -0,0 +1,4 @@
1
+ adapter: sqlite3
2
+ database: test/data/test.sqlite3
3
+ pool: 5
4
+ timeout: 5000
@@ -0,0 +1,69 @@
1
+ <?xml version="1.0"?>
2
+ <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
3
+ <BlastOutput>
4
+ <BlastOutput_program>blastx</BlastOutput_program>
5
+ <BlastOutput_version>blastx 2.2.19 [Nov-02-2008]</BlastOutput_version>
6
+ <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
7
+ <BlastOutput_db>uniprot_sprot</BlastOutput_db>
8
+ <BlastOutput_query-ID>lcl|1_0</BlastOutput_query-ID>
9
+ <BlastOutput_query-def>ENSBTAG00000025113_499_35</BlastOutput_query-def>
10
+ <BlastOutput_query-len>1221</BlastOutput_query-len>
11
+ <BlastOutput_param>
12
+ <Parameters>
13
+ <Parameters_matrix>BLOSUM62</Parameters_matrix>
14
+ <Parameters_expect>10</Parameters_expect>
15
+ <Parameters_gap-open>11</Parameters_gap-open>
16
+ <Parameters_gap-extend>1</Parameters_gap-extend>
17
+ <Parameters_filter>F</Parameters_filter>
18
+ </Parameters>
19
+ </BlastOutput_param>
20
+ <BlastOutput_iterations>
21
+ <Iteration>
22
+ <Iteration_iter-num>239</Iteration_iter-num>
23
+ <Iteration_query-ID>lcl|239_0</Iteration_query-ID>
24
+ <Iteration_query-def>ENSBTAG00000031386_1906_35</Iteration_query-def>
25
+ <Iteration_query-len>2107</Iteration_query-len>
26
+ <Iteration_hits>
27
+ <Hit>
28
+ <Hit_num>1</Hit_num>
29
+ <Hit_id>sp|Q5TTP0|WDY_ANOGA</Hit_id>
30
+ <Hit_def>WD repeat-containing protein on Y chromosome OS=Anopheles gambiae GN=WDY PE=4 SV=4</Hit_def>
31
+ <Hit_accession>Q5TTP0</Hit_accession>
32
+ <Hit_len>1059</Hit_len>
33
+ <Hit_hsps>
34
+ <Hsp>
35
+ <Hsp_num>1</Hsp_num>
36
+ <Hsp_bit-score>224.172</Hsp_bit-score>
37
+ <Hsp_score>570</Hsp_score>
38
+ <Hsp_evalue>2.55108e-57</Hsp_evalue>
39
+ <Hsp_query-from>1</Hsp_query-from>
40
+ <Hsp_query-to>1641</Hsp_query-to>
41
+ <Hsp_hit-from>347</Hsp_hit-from>
42
+ <Hsp_hit-to>897</Hsp_hit-to>
43
+ <Hsp_query-frame>1</Hsp_query-frame>
44
+ <Hsp_identity>167</Hsp_identity>
45
+ <Hsp_positive>271</Hsp_positive>
46
+ <Hsp_gaps>19</Hsp_gaps>
47
+ <Hsp_align-len>566</Hsp_align-len>
48
+ <Hsp_qseq>KGILCFDYCPDRNVLVTGGYDPLIRLWNPFFSRKPVWMMKGHQTSVTHIVVNSKDSSILLSVSKDKNIRVWDMQDYQCLQSFCGFSEV-IKGTWICALGLKWGHGFGIGILKGYLETQGPGRAEEKTT--TYSTPLCAVLYSKVFKQVVSGCLSGMVSVWEVVTGRRLMEFSVTGDQ-------QVELTAMSLDESEGCLLTGLRDGTVKMWNYSVGECLLTFPKADQLEISGIVHMNKVFYTTGWSKRIT-YYTFHKIKPVLLCHHWQTFHTEDVLSMAKYQNQ--FIATSSYNGDILFWNVGTFRPTQRALASDPQPRRPLSPVRGLRDQLKGKKQTSRRSCCCSPMHR--WRRLAMFSQPSGC-HIEVLTFFLIIFLQTRPRLPHSAALLSSCMDGYIYAWSIHGSGGLLGKFPXXXXXXXXXXXXAMATDQNDWILVTGDCKGCIKIWDIKD-YCAHSDK-QTNHPSEINKFRFLISERIQVSLPNYSPPEEKKVEAGQTISLIPPQLLISWKAHLDSVADILYVDSLQLVISAGQDRDVKAWKLSGDAIGTFGLSV-WKRL</Hsp_qseq>
49
+ <Hsp_hseq>RGVTCFAFEPSNELLVSGGPDCDLRLWDIHRPEKPSVVLVGHTSSITFLFLQDAGEKIY-SLDQRKIIKVWDVRNRVLLQTFGQFSTVLVKGVPACAYYNKRARELVVASNKLFVTACCPEIALDRTDGESHTKPVSVLLYNGLYRLVVSCGFDSFIIVWDHRVNRKMTIITEAHTQIRNGVLEPVEITAACFDGKEQMLLTGARNGSLKIWNIGGRTCMRTIQIEEDCEVTGVFWQANRILAMGWNHRVVEFAAFAEQDEYPRGLQWRKQHSDDILCAAVSGSEPGVMATCSYAGELVFWMLETGQPYRRYDATNPRTRLPISFREGRADLMKPRKLTPRRSLFQMPPGQLAHRRLTRILMPSGLEQMRQLSIQALLFLAMRKMLPDRGTLFGSLDNGMVQVWSHHPDGGFKGQF--NGIHMAGDRIITLATDKANRFLFTGTALGYVKTWYIENCWIPNEDKFHVNKPALRILFPFLLNDVV--------PGRAKRSARAQ----VKPWLLNSYQAHRACVTGLTYLDDTGLLLSCSSDRTVRLWTLGGRYIGLLGSPVNWQPL</Hsp_hseq>
50
+ <Hsp_midline>+G+ CF + P +LV+GG D +RLW+ KP ++ GH +S+T + + I S+ + K I+VWD+++ LQ+F FS V +KG CA K + K ++ P A ++T +++ P+ +LY+ +++ VVS + VW+ R++ + Q VE+TA D E LLTG R+G++K+WN C+ T + E++G+ GW+ R+ + F + W+ H++D+L A ++ +AT SY G+++FW + T +P +R A++P+ R P+S G D +K +K T RRS P + RRL PSG + L+ ++FL R LP L S +G + WS H GG G+F +ATD+ + L TG G +K W I++ + + DK N P+ F FL+++ + P K+ Q + P LL S++AH V + Y+D L++S DR V+ W L G IG G V W+ L</Hsp_midline>
51
+ </Hsp>
52
+ </Hit_hsps>
53
+ </Hit>
54
+ </Iteration_hits>
55
+ <Iteration_stat>
56
+ <Statistics>
57
+ <Statistics_db-num>525997</Statistics_db-num>
58
+ <Statistics_db-len>185874894</Statistics_db-len>
59
+ <Statistics_hsp-len>0</Statistics_hsp-len>
60
+ <Statistics_eff-space>0</Statistics_eff-space>
61
+ <Statistics_kappa>0.041</Statistics_kappa>
62
+ <Statistics_lambda>0.267</Statistics_lambda>
63
+ <Statistics_entropy>0.14</Statistics_entropy>
64
+ </Statistics>
65
+ </Iteration_stat>
66
+ </Iteration>
67
+ </BlastOutput_iterations>
68
+ </BlastOutput>
69
+