bio 1.4.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. data/ChangeLog +1712 -0
  2. data/KNOWN_ISSUES.rdoc +11 -1
  3. data/README.rdoc +3 -2
  4. data/RELEASE_NOTES.rdoc +65 -127
  5. data/bioruby.gemspec +38 -2
  6. data/doc/RELEASE_NOTES-1.4.0.rdoc +167 -0
  7. data/doc/Tutorial.rd +74 -16
  8. data/doc/Tutorial.rd.html +68 -16
  9. data/lib/bio.rb +2 -0
  10. data/lib/bio/appl/clustalw/report.rb +18 -0
  11. data/lib/bio/appl/paml/codeml/report.rb +579 -21
  12. data/lib/bio/command.rb +149 -21
  13. data/lib/bio/db/aaindex.rb +11 -1
  14. data/lib/bio/db/embl/sptr.rb +1 -1
  15. data/lib/bio/db/fasta/defline.rb +7 -2
  16. data/lib/bio/db/fasta/qual.rb +24 -0
  17. data/lib/bio/db/fasta/qual_to_biosequence.rb +29 -0
  18. data/lib/bio/db/fastq.rb +15 -0
  19. data/lib/bio/db/go.rb +2 -2
  20. data/lib/bio/db/kegg/common.rb +109 -5
  21. data/lib/bio/db/kegg/genes.rb +61 -15
  22. data/lib/bio/db/kegg/genome.rb +43 -38
  23. data/lib/bio/db/kegg/module.rb +158 -0
  24. data/lib/bio/db/kegg/orthology.rb +40 -1
  25. data/lib/bio/db/kegg/pathway.rb +254 -0
  26. data/lib/bio/db/medline.rb +6 -2
  27. data/lib/bio/io/flatfile/autodetection.rb +6 -0
  28. data/lib/bio/location.rb +39 -0
  29. data/lib/bio/reference.rb +24 -0
  30. data/lib/bio/sequence.rb +2 -0
  31. data/lib/bio/sequence/adapter.rb +1 -0
  32. data/lib/bio/sequence/format.rb +14 -0
  33. data/lib/bio/sequence/sequence_masker.rb +95 -0
  34. data/lib/bio/tree.rb +4 -4
  35. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +5 -0
  36. data/lib/bio/version.rb +1 -1
  37. data/setup.rb +5 -0
  38. data/test/data/KEGG/K02338.orthology +180 -52
  39. data/test/data/KEGG/M00118.module +44 -0
  40. data/test/data/KEGG/T00005.genome +140 -0
  41. data/test/data/KEGG/T00070.genome +34 -0
  42. data/test/data/KEGG/b0529.gene +47 -0
  43. data/test/data/KEGG/ec00072.pathway +23 -0
  44. data/test/data/KEGG/hsa00790.pathway +59 -0
  45. data/test/data/KEGG/ko00312.pathway +16 -0
  46. data/test/data/KEGG/map00030.pathway +37 -0
  47. data/test/data/KEGG/map00052.pathway +13 -0
  48. data/test/data/KEGG/rn00250.pathway +114 -0
  49. data/test/data/clustalw/example1.aln +58 -0
  50. data/test/data/go/selected_component.ontology +12 -0
  51. data/test/data/go/selected_gene_association.sgd +31 -0
  52. data/test/data/go/selected_wikipedia2go +13 -0
  53. data/test/data/medline/20146148_modified.medline +54 -0
  54. data/test/data/paml/codeml/models/aa.aln +26 -0
  55. data/test/data/paml/codeml/models/aa.dnd +13 -0
  56. data/test/data/paml/codeml/models/aa.ph +13 -0
  57. data/test/data/paml/codeml/models/alignment.phy +49 -0
  58. data/test/data/paml/codeml/models/results0-3.txt +312 -0
  59. data/test/data/paml/codeml/models/results7-8.txt +340 -0
  60. data/test/functional/bio/io/test_togows.rb +8 -8
  61. data/test/functional/bio/test_command.rb +7 -6
  62. data/test/unit/bio/appl/clustalw/test_report.rb +80 -0
  63. data/test/unit/bio/appl/paml/codeml/test_rates.rb +6 -6
  64. data/test/unit/bio/appl/paml/codeml/test_report.rb +231 -24
  65. data/test/unit/bio/appl/paml/codeml/test_report_single.rb +46 -0
  66. data/test/unit/bio/db/embl/test_sptr.rb +1 -1
  67. data/test/unit/bio/db/fasta/test_defline.rb +160 -0
  68. data/test/unit/bio/db/fasta/test_defline_misc.rb +490 -0
  69. data/test/unit/bio/db/kegg/test_genes.rb +281 -1
  70. data/test/unit/bio/db/kegg/test_genome.rb +408 -0
  71. data/test/unit/bio/db/kegg/test_module.rb +246 -0
  72. data/test/unit/bio/db/kegg/test_orthology.rb +95 -0
  73. data/test/unit/bio/db/kegg/test_pathway.rb +1250 -0
  74. data/test/unit/bio/db/test_aaindex.rb +8 -7
  75. data/test/unit/bio/db/test_fastq.rb +36 -0
  76. data/test/unit/bio/db/test_go.rb +171 -0
  77. data/test/unit/bio/db/test_medline.rb +148 -0
  78. data/test/unit/bio/db/test_qual.rb +9 -2
  79. data/test/unit/bio/sequence/test_sequence_masker.rb +169 -0
  80. data/test/unit/bio/test_tree.rb +260 -1
  81. data/test/unit/bio/util/test_contingency_table.rb +7 -7
  82. metadata +53 -6
@@ -0,0 +1,490 @@
1
+ #
2
+ # test/unit/bio/db/fasta/test_defline_misc.rb - Unit test for Bio::FastaDefline
3
+ #
4
+ # Copyright:: Copyright (C) 2010
5
+ # John Prince <jtprince@byu.edu>
6
+ #
7
+ # License:: The Ruby License
8
+ #
9
+ # $Id:$
10
+ #
11
+
12
+ # loading helper routine for testing bioruby
13
+ require 'pathname'
14
+ load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4,
15
+ 'bioruby_test_helper.rb')).cleanpath.to_s
16
+
17
+ # libraries needed for the tests
18
+ require 'test/unit'
19
+ require 'bio/db/fasta/defline'
20
+
21
+ module Bio
22
+
23
+ class TestFastaDeflineGI1 < Test::Unit::TestCase
24
+
25
+ def setup
26
+ definition_line = '>gi|671595|emb|CAA85678.1| rubisco large subunit [Perovskia abrotanoides]'
27
+ @defline = FastaDefline.new(definition_line)
28
+ end
29
+
30
+ def test_entry_id
31
+ assert_equal('gi|671595', @defline.entry_id)
32
+ end
33
+
34
+ def test_emb
35
+ assert_equal('CAA85678.1', @defline.emb)
36
+ end
37
+
38
+ def test_get_emb
39
+ assert_equal('CAA85678.1', @defline.get('emb'))
40
+ end
41
+
42
+ def test_gi
43
+ assert_equal('671595', @defline.gi)
44
+ end
45
+
46
+ def test_accession
47
+ assert_equal('CAA85678', @defline.accession)
48
+ end
49
+
50
+ def test_accessions
51
+ assert_equal(['CAA85678'], @defline.accessions)
52
+ end
53
+
54
+ def test_acc_version
55
+ assert_equal('CAA85678.1', @defline.acc_version)
56
+ end
57
+
58
+ def test_locus
59
+ assert_equal(nil, @defline.locus)
60
+ end
61
+
62
+ def test_list_ids
63
+ assert_equal([["gi", "671595"], ["emb", "CAA85678.1", nil], ["Perovskia abrotanoides"]], @defline.list_ids)
64
+ end
65
+
66
+ def test_description
67
+ assert_equal('rubisco large subunit [Perovskia abrotanoides]', @defline.description)
68
+ end
69
+
70
+ def test_descriptions
71
+ assert_equal(['rubisco large subunit [Perovskia abrotanoides]'], @defline.descriptions)
72
+ end
73
+
74
+ def test_words
75
+ assert_equal(["abrotanoides", "large", "perovskia", "rubisco", "subunit"], @defline.words)
76
+ end
77
+
78
+ def test_id_strings
79
+ assert_equal(["671595", "CAA85678.1"], @defline.id_strings)
80
+ end
81
+
82
+ def test_get_all_by_type
83
+ assert_equal([], @defline.get_all_by_type)
84
+ end
85
+ end
86
+
87
+ class TestFastaDeflineGIMultiple < Test::Unit::TestCase
88
+
89
+ def setup
90
+ definition_line = ">gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)\001gi|2147182|pir||I51898 cholecystokinin A receptor - guinea pig\001gi|544724|gb|AAB29504.1| cholecystokinin A receptor; CCK-A receptor [Cavia]"
91
+ @defline = FastaDefline.new(definition_line)
92
+ end
93
+
94
+ def test_entry_id
95
+ assert_equal("gi|2495000", @defline.entry_id)
96
+ end
97
+
98
+ def test_gi
99
+ assert_equal("2495000", @defline.gi)
100
+ end
101
+
102
+ def test_accession
103
+ assert_equal("AAB29504", @defline.accession)
104
+ end
105
+
106
+ def test_accessions
107
+ assert_equal(["Q63931", "AAB29504"], @defline.accessions)
108
+ end
109
+
110
+ def test_acc_version
111
+ assert_equal("AAB29504.1", @defline.acc_version)
112
+ end
113
+
114
+ def test_locus
115
+ assert_equal(nil, @defline.locus)
116
+ end
117
+
118
+ def test_list_ids
119
+ assert_equal([["gi", "2495000"], ["sp", "Q63931", "CCKR_CAVPO"], ["gi", "2147182"], ["pir", nil, "I51898"], ["gi", "544724"], ["gb", "AAB29504.1", nil], ["Cavia"]], @defline.list_ids)
120
+ end
121
+
122
+ def test_description
123
+ assert_equal("CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)", @defline.description)
124
+ end
125
+
126
+ def test_descriptions
127
+ assert_equal(["CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)", "cholecystokinin A receptor - guinea pig", "cholecystokinin A receptor; CCK-A receptor [Cavia]"], @defline.descriptions)
128
+ end
129
+
130
+ def test_words
131
+ assert_equal(["cavia", "cck-a", "cck-ar", "cholecystokinin", "guinea", "pig", "receptor", "type"], @defline.words)
132
+ end
133
+
134
+ def test_id_strings
135
+ assert_equal(["2495000", "Q63931", "CCKR_CAVPO", "2147182", "I51898", "544724", "AAB29504.1", "Cavia"], @defline.id_strings)
136
+ end
137
+
138
+ def test_get_all_by_type
139
+ assert_equal([], @defline.get_all_by_type)
140
+ end
141
+ end
142
+
143
+ class TestFastaDeflineGI2 < Test::Unit::TestCase
144
+
145
+ def setup
146
+ definition_line = '>gi|9910844|sp|Q9UWG2|RL3_METVA 50S ribosomal protein L3P'
147
+ @defline = FastaDefline.new(definition_line)
148
+ end
149
+
150
+ def test_entry_id
151
+ assert_equal("gi|9910844", @defline.entry_id)
152
+ end
153
+
154
+ def test_gi
155
+ assert_equal("9910844", @defline.gi)
156
+ end
157
+
158
+ def test_sp
159
+ assert_equal('RL3_METVA', @defline.sp)
160
+ end
161
+
162
+ def test_accession
163
+ assert_equal("Q9UWG2", @defline.accession)
164
+ end
165
+
166
+ def test_accessions
167
+ assert_equal(["Q9UWG2"], @defline.accessions)
168
+ end
169
+
170
+ def test_acc_version
171
+ assert_equal(nil, @defline.acc_version)
172
+ end
173
+
174
+ def test_locus
175
+ assert_equal(nil, @defline.locus)
176
+ end
177
+
178
+ def test_list_ids
179
+ assert_equal([["gi", "9910844"], ["sp", "Q9UWG2", "RL3_METVA"]], @defline.list_ids)
180
+ end
181
+
182
+ def test_description
183
+ assert_equal("50S ribosomal protein L3P", @defline.description)
184
+ end
185
+
186
+ def test_descriptions
187
+ assert_equal(["50S ribosomal protein L3P"], @defline.descriptions)
188
+ end
189
+
190
+ def test_words
191
+ assert_equal(["50s", "ribosomal"], @defline.words)
192
+ end
193
+
194
+ def test_id_strings
195
+ assert_equal(["9910844", "Q9UWG2", "RL3_METVA", "L3P"], @defline.id_strings)
196
+ end
197
+
198
+ def test_get_all_by_type
199
+ assert_equal([], @defline.get_all_by_type)
200
+ end
201
+ end
202
+ class TestFastaDeflineSce < Test::Unit::TestCase
203
+
204
+ def setup
205
+ definition_line = '>sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]'
206
+ @defline = FastaDefline.new(definition_line)
207
+ end
208
+
209
+ def test_entry_id
210
+ assert_equal("sce:YBR160W", @defline.entry_id)
211
+ end
212
+
213
+ def test_gi
214
+ assert_equal(nil, @defline.gi)
215
+ end
216
+
217
+ def test_accession
218
+ assert_equal(nil, @defline.accession)
219
+ end
220
+
221
+ def test_accessions
222
+ assert_equal([], @defline.accessions)
223
+ end
224
+
225
+ def test_acc_version
226
+ assert_equal(nil, @defline.acc_version)
227
+ end
228
+
229
+ def test_locus
230
+ assert_equal(nil, @defline.locus)
231
+ end
232
+
233
+ def test_list_ids
234
+ assert_equal([["sce", "YBR160W"], ["EC", "2.7.1.-"], ["SP", "CC28_YEAST"]], @defline.list_ids)
235
+ end
236
+
237
+ def test_description
238
+ assert_equal("CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]", @defline.description)
239
+ end
240
+
241
+ def test_descriptions
242
+ assert_equal(["CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]"], @defline.descriptions)
243
+ end
244
+
245
+ def test_words
246
+ assert_equal(["catalytic", "cyclin-dependent", "kinase", "srm5", "subunit"], @defline.words)
247
+ end
248
+
249
+ def test_id_strings
250
+ assert_equal(["YBR160W", "2.7.1.-", "CC28_YEAST", "CC28_YEAST", "CDC28"], @defline.id_strings)
251
+ end
252
+
253
+ def test_get_all_by_type
254
+ assert_equal([], @defline.get_all_by_type)
255
+ end
256
+ end
257
+
258
+ class TestFastaDeflineEmb < Test::Unit::TestCase
259
+
260
+ def setup
261
+ definition_line = '>emb:CACDC28 [X80034] C.albicans CDC28 gene'
262
+ @defline = FastaDefline.new(definition_line)
263
+ end
264
+
265
+ def test_entry_id
266
+ assert_equal("emb:CACDC28", @defline.entry_id)
267
+ end
268
+
269
+ def test_gi
270
+ assert_equal(nil, @defline.gi)
271
+ end
272
+
273
+ def test_accession
274
+ assert_equal("CACDC28", @defline.accession)
275
+ end
276
+
277
+ def test_accessions
278
+ assert_equal(["CACDC28"], @defline.accessions)
279
+ end
280
+
281
+ def test_acc_version
282
+ assert_equal("CACDC28", @defline.acc_version)
283
+ end
284
+
285
+ def test_locus
286
+ assert_equal(nil, @defline.locus)
287
+ end
288
+
289
+ def test_list_ids
290
+ assert_equal([["emb", "CACDC28"], ["X80034"]], @defline.list_ids)
291
+ end
292
+
293
+ def test_description
294
+ assert_equal("[X80034] C.albicans CDC28 gene", @defline.description)
295
+ end
296
+
297
+ def test_descriptions
298
+ assert_equal(["[X80034] C.albicans CDC28 gene"], @defline.descriptions)
299
+ end
300
+
301
+ def test_words
302
+ assert_equal(["albicans"], @defline.words)
303
+ end
304
+
305
+ def test_id_strings
306
+ assert_equal(["CACDC28", "X80034", "CDC28", "X80034"], @defline.id_strings)
307
+ end
308
+
309
+ def test_get_all_by_type
310
+ assert_equal([], @defline.get_all_by_type)
311
+ end
312
+ end
313
+
314
+ class TestFastaDeflineSimple < Test::Unit::TestCase
315
+
316
+ def setup
317
+ definition_line = '>ABC12345 this is test'
318
+ @defline = FastaDefline.new(definition_line)
319
+ end
320
+
321
+ def test_entry_id
322
+ assert_equal("ABC12345", @defline.entry_id)
323
+ end
324
+
325
+ def test_gi
326
+ assert_equal(nil, @defline.gi)
327
+ end
328
+
329
+ def test_accession
330
+ assert_equal(nil, @defline.accession)
331
+ end
332
+
333
+ def test_accessions
334
+ assert_equal([], @defline.accessions)
335
+ end
336
+
337
+ def test_acc_version
338
+ assert_equal(nil, @defline.acc_version)
339
+ end
340
+
341
+ def test_locus
342
+ assert_equal(nil, @defline.locus)
343
+ end
344
+
345
+ def test_list_ids
346
+ assert_equal([["ABC12345"]], @defline.list_ids)
347
+ end
348
+
349
+ def test_description
350
+ assert_equal("this is test", @defline.description)
351
+ end
352
+
353
+ def test_descriptions
354
+ assert_equal(["this is test"], @defline.descriptions)
355
+ end
356
+
357
+ def test_words
358
+ assert_equal(["test"], @defline.words)
359
+ end
360
+
361
+ def test_id_strings
362
+ assert_equal(["ABC12345"], @defline.id_strings)
363
+ end
364
+
365
+ def test_get_all_by_type
366
+ assert_equal([], @defline.get_all_by_type)
367
+ end
368
+ end
369
+
370
+ class TestFastaDeflineSwissProt < Test::Unit::TestCase
371
+
372
+ def setup
373
+ definition_line = '>sp|P05100|3MG1_ECOLI DNA-3-methyladenine glycosylase 1 OS=Escherichia coli (strain K12) GN=tag PE=1 SV=1'
374
+ @defline = FastaDefline.new(definition_line)
375
+ end
376
+
377
+ def test_entry_id
378
+ assert_equal('sp|P05100|3MG1_ECOLI', @defline.entry_id )
379
+ end
380
+
381
+ def test_get
382
+ assert_equal('3MG1_ECOLI', @defline.get('sp') )
383
+ end
384
+
385
+ def test_sp
386
+ assert_equal('3MG1_ECOLI', @defline.sp )
387
+ end
388
+
389
+ def test_accession
390
+ assert_equal("P05100", @defline.accession)
391
+ end
392
+
393
+ def test_accessions
394
+ assert_equal(["P05100"], @defline.accessions)
395
+ end
396
+
397
+ def test_acc_version
398
+ assert_equal(nil, @defline.acc_version)
399
+ end
400
+
401
+ def test_locus
402
+ assert_equal(nil, @defline.locus)
403
+ end
404
+
405
+ def test_list_ids
406
+ assert_equal([["sp", "P05100", "3MG1_ECOLI"]], @defline.list_ids)
407
+ end
408
+
409
+ def test_description
410
+ assert_equal("DNA-3-methyladenine glycosylase 1 OS=Escherichia coli (strain K12) GN=tag PE=1 SV=1", @defline.description)
411
+ end
412
+
413
+ def test_descriptions
414
+ assert_equal(["DNA-3-methyladenine glycosylase 1 OS=Escherichia coli (strain K12) GN=tag PE=1 SV=1"], @defline.descriptions)
415
+ end
416
+
417
+ def test_words
418
+ assert_equal(["coli", "dna-3-methyladenine", "glycosylase", "gn=tag", "os=escherichia", "pe=1", "sv=1"], @defline.words)
419
+ end
420
+
421
+ def test_id_strings
422
+ assert_equal(["P05100", "3MG1_ECOLI", "K12"], @defline.id_strings)
423
+ end
424
+
425
+ def test_get_all_by_type
426
+ assert_equal([], @defline.get_all_by_type)
427
+ end
428
+ end
429
+
430
+ class TestFastaDeflineTrembl < Test::Unit::TestCase
431
+
432
+ def setup
433
+ definition_line = '>tr|C8URF0|C8URF0_ECO1A Conserved predicted plasmid protein ECsL50 OS=Escherichia coli O111:H- (strain 11128 / EHEC) GN=ECO111_p3-39 PE=4 SV=1'
434
+ @defline = Bio::FastaDefline.new(definition_line)
435
+ end
436
+
437
+ def test_entry_id
438
+ assert_equal('tr|C8URF0|C8URF0_ECO1A', @defline.entry_id )
439
+ end
440
+
441
+ def test_get
442
+ assert_equal('C8URF0_ECO1A', @defline.get('tr') )
443
+ end
444
+
445
+ def test_tr
446
+ assert_equal('C8URF0_ECO1A', @defline.tr )
447
+ end
448
+
449
+ def test_accession
450
+ assert_equal("C8URF0", @defline.accession)
451
+ end
452
+
453
+ def test_accessions
454
+ assert_equal(["C8URF0"], @defline.accessions)
455
+ end
456
+
457
+ def test_acc_version
458
+ assert_equal(nil, @defline.acc_version)
459
+ end
460
+
461
+ def test_locus
462
+ assert_equal(nil, @defline.locus)
463
+ end
464
+
465
+ def test_list_ids
466
+ assert_equal([["tr", "C8URF0", "C8URF0_ECO1A"]], @defline.list_ids)
467
+ end
468
+
469
+ def test_description
470
+ assert_equal("Conserved predicted plasmid protein ECsL50 OS=Escherichia coli O111:H- (strain 11128 / EHEC) GN=ECO111_p3-39 PE=4 SV=1", @defline.description)
471
+ end
472
+
473
+ def test_descriptions
474
+ assert_equal(["Conserved predicted plasmid protein ECsL50 OS=Escherichia coli O111:H- (strain 11128 / EHEC) GN=ECO111_p3-39 PE=4 SV=1"], @defline.descriptions)
475
+ end
476
+
477
+ def test_words
478
+ assert_equal(["11128", "coli", "conserved", "ehec", "gn=eco111_p3-39", "os=escherichia", "pe=4", "plasmid", "predicted", "sv=1"], @defline.words)
479
+ end
480
+
481
+ def test_id_strings
482
+ assert_equal(["C8URF0", "C8URF0_ECO1A", "ECsL50", "O111"], @defline.id_strings)
483
+ end
484
+
485
+ def test_get_all_by_type
486
+ assert_equal([], @defline.get_all_by_type)
487
+ end
488
+ end
489
+ end
490
+