bio 1.4.0 → 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. data/ChangeLog +1712 -0
  2. data/KNOWN_ISSUES.rdoc +11 -1
  3. data/README.rdoc +3 -2
  4. data/RELEASE_NOTES.rdoc +65 -127
  5. data/bioruby.gemspec +38 -2
  6. data/doc/RELEASE_NOTES-1.4.0.rdoc +167 -0
  7. data/doc/Tutorial.rd +74 -16
  8. data/doc/Tutorial.rd.html +68 -16
  9. data/lib/bio.rb +2 -0
  10. data/lib/bio/appl/clustalw/report.rb +18 -0
  11. data/lib/bio/appl/paml/codeml/report.rb +579 -21
  12. data/lib/bio/command.rb +149 -21
  13. data/lib/bio/db/aaindex.rb +11 -1
  14. data/lib/bio/db/embl/sptr.rb +1 -1
  15. data/lib/bio/db/fasta/defline.rb +7 -2
  16. data/lib/bio/db/fasta/qual.rb +24 -0
  17. data/lib/bio/db/fasta/qual_to_biosequence.rb +29 -0
  18. data/lib/bio/db/fastq.rb +15 -0
  19. data/lib/bio/db/go.rb +2 -2
  20. data/lib/bio/db/kegg/common.rb +109 -5
  21. data/lib/bio/db/kegg/genes.rb +61 -15
  22. data/lib/bio/db/kegg/genome.rb +43 -38
  23. data/lib/bio/db/kegg/module.rb +158 -0
  24. data/lib/bio/db/kegg/orthology.rb +40 -1
  25. data/lib/bio/db/kegg/pathway.rb +254 -0
  26. data/lib/bio/db/medline.rb +6 -2
  27. data/lib/bio/io/flatfile/autodetection.rb +6 -0
  28. data/lib/bio/location.rb +39 -0
  29. data/lib/bio/reference.rb +24 -0
  30. data/lib/bio/sequence.rb +2 -0
  31. data/lib/bio/sequence/adapter.rb +1 -0
  32. data/lib/bio/sequence/format.rb +14 -0
  33. data/lib/bio/sequence/sequence_masker.rb +95 -0
  34. data/lib/bio/tree.rb +4 -4
  35. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +5 -0
  36. data/lib/bio/version.rb +1 -1
  37. data/setup.rb +5 -0
  38. data/test/data/KEGG/K02338.orthology +180 -52
  39. data/test/data/KEGG/M00118.module +44 -0
  40. data/test/data/KEGG/T00005.genome +140 -0
  41. data/test/data/KEGG/T00070.genome +34 -0
  42. data/test/data/KEGG/b0529.gene +47 -0
  43. data/test/data/KEGG/ec00072.pathway +23 -0
  44. data/test/data/KEGG/hsa00790.pathway +59 -0
  45. data/test/data/KEGG/ko00312.pathway +16 -0
  46. data/test/data/KEGG/map00030.pathway +37 -0
  47. data/test/data/KEGG/map00052.pathway +13 -0
  48. data/test/data/KEGG/rn00250.pathway +114 -0
  49. data/test/data/clustalw/example1.aln +58 -0
  50. data/test/data/go/selected_component.ontology +12 -0
  51. data/test/data/go/selected_gene_association.sgd +31 -0
  52. data/test/data/go/selected_wikipedia2go +13 -0
  53. data/test/data/medline/20146148_modified.medline +54 -0
  54. data/test/data/paml/codeml/models/aa.aln +26 -0
  55. data/test/data/paml/codeml/models/aa.dnd +13 -0
  56. data/test/data/paml/codeml/models/aa.ph +13 -0
  57. data/test/data/paml/codeml/models/alignment.phy +49 -0
  58. data/test/data/paml/codeml/models/results0-3.txt +312 -0
  59. data/test/data/paml/codeml/models/results7-8.txt +340 -0
  60. data/test/functional/bio/io/test_togows.rb +8 -8
  61. data/test/functional/bio/test_command.rb +7 -6
  62. data/test/unit/bio/appl/clustalw/test_report.rb +80 -0
  63. data/test/unit/bio/appl/paml/codeml/test_rates.rb +6 -6
  64. data/test/unit/bio/appl/paml/codeml/test_report.rb +231 -24
  65. data/test/unit/bio/appl/paml/codeml/test_report_single.rb +46 -0
  66. data/test/unit/bio/db/embl/test_sptr.rb +1 -1
  67. data/test/unit/bio/db/fasta/test_defline.rb +160 -0
  68. data/test/unit/bio/db/fasta/test_defline_misc.rb +490 -0
  69. data/test/unit/bio/db/kegg/test_genes.rb +281 -1
  70. data/test/unit/bio/db/kegg/test_genome.rb +408 -0
  71. data/test/unit/bio/db/kegg/test_module.rb +246 -0
  72. data/test/unit/bio/db/kegg/test_orthology.rb +95 -0
  73. data/test/unit/bio/db/kegg/test_pathway.rb +1250 -0
  74. data/test/unit/bio/db/test_aaindex.rb +8 -7
  75. data/test/unit/bio/db/test_fastq.rb +36 -0
  76. data/test/unit/bio/db/test_go.rb +171 -0
  77. data/test/unit/bio/db/test_medline.rb +148 -0
  78. data/test/unit/bio/db/test_qual.rb +9 -2
  79. data/test/unit/bio/sequence/test_sequence_masker.rb +169 -0
  80. data/test/unit/bio/test_tree.rb +260 -1
  81. data/test/unit/bio/util/test_contingency_table.rb +7 -7
  82. metadata +53 -6
@@ -0,0 +1,490 @@
1
+ #
2
+ # test/unit/bio/db/fasta/test_defline_misc.rb - Unit test for Bio::FastaDefline
3
+ #
4
+ # Copyright:: Copyright (C) 2010
5
+ # John Prince <jtprince@byu.edu>
6
+ #
7
+ # License:: The Ruby License
8
+ #
9
+ # $Id:$
10
+ #
11
+
12
+ # loading helper routine for testing bioruby
13
+ require 'pathname'
14
+ load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4,
15
+ 'bioruby_test_helper.rb')).cleanpath.to_s
16
+
17
+ # libraries needed for the tests
18
+ require 'test/unit'
19
+ require 'bio/db/fasta/defline'
20
+
21
+ module Bio
22
+
23
+ class TestFastaDeflineGI1 < Test::Unit::TestCase
24
+
25
+ def setup
26
+ definition_line = '>gi|671595|emb|CAA85678.1| rubisco large subunit [Perovskia abrotanoides]'
27
+ @defline = FastaDefline.new(definition_line)
28
+ end
29
+
30
+ def test_entry_id
31
+ assert_equal('gi|671595', @defline.entry_id)
32
+ end
33
+
34
+ def test_emb
35
+ assert_equal('CAA85678.1', @defline.emb)
36
+ end
37
+
38
+ def test_get_emb
39
+ assert_equal('CAA85678.1', @defline.get('emb'))
40
+ end
41
+
42
+ def test_gi
43
+ assert_equal('671595', @defline.gi)
44
+ end
45
+
46
+ def test_accession
47
+ assert_equal('CAA85678', @defline.accession)
48
+ end
49
+
50
+ def test_accessions
51
+ assert_equal(['CAA85678'], @defline.accessions)
52
+ end
53
+
54
+ def test_acc_version
55
+ assert_equal('CAA85678.1', @defline.acc_version)
56
+ end
57
+
58
+ def test_locus
59
+ assert_equal(nil, @defline.locus)
60
+ end
61
+
62
+ def test_list_ids
63
+ assert_equal([["gi", "671595"], ["emb", "CAA85678.1", nil], ["Perovskia abrotanoides"]], @defline.list_ids)
64
+ end
65
+
66
+ def test_description
67
+ assert_equal('rubisco large subunit [Perovskia abrotanoides]', @defline.description)
68
+ end
69
+
70
+ def test_descriptions
71
+ assert_equal(['rubisco large subunit [Perovskia abrotanoides]'], @defline.descriptions)
72
+ end
73
+
74
+ def test_words
75
+ assert_equal(["abrotanoides", "large", "perovskia", "rubisco", "subunit"], @defline.words)
76
+ end
77
+
78
+ def test_id_strings
79
+ assert_equal(["671595", "CAA85678.1"], @defline.id_strings)
80
+ end
81
+
82
+ def test_get_all_by_type
83
+ assert_equal([], @defline.get_all_by_type)
84
+ end
85
+ end
86
+
87
+ class TestFastaDeflineGIMultiple < Test::Unit::TestCase
88
+
89
+ def setup
90
+ definition_line = ">gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)\001gi|2147182|pir||I51898 cholecystokinin A receptor - guinea pig\001gi|544724|gb|AAB29504.1| cholecystokinin A receptor; CCK-A receptor [Cavia]"
91
+ @defline = FastaDefline.new(definition_line)
92
+ end
93
+
94
+ def test_entry_id
95
+ assert_equal("gi|2495000", @defline.entry_id)
96
+ end
97
+
98
+ def test_gi
99
+ assert_equal("2495000", @defline.gi)
100
+ end
101
+
102
+ def test_accession
103
+ assert_equal("AAB29504", @defline.accession)
104
+ end
105
+
106
+ def test_accessions
107
+ assert_equal(["Q63931", "AAB29504"], @defline.accessions)
108
+ end
109
+
110
+ def test_acc_version
111
+ assert_equal("AAB29504.1", @defline.acc_version)
112
+ end
113
+
114
+ def test_locus
115
+ assert_equal(nil, @defline.locus)
116
+ end
117
+
118
+ def test_list_ids
119
+ assert_equal([["gi", "2495000"], ["sp", "Q63931", "CCKR_CAVPO"], ["gi", "2147182"], ["pir", nil, "I51898"], ["gi", "544724"], ["gb", "AAB29504.1", nil], ["Cavia"]], @defline.list_ids)
120
+ end
121
+
122
+ def test_description
123
+ assert_equal("CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)", @defline.description)
124
+ end
125
+
126
+ def test_descriptions
127
+ assert_equal(["CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)", "cholecystokinin A receptor - guinea pig", "cholecystokinin A receptor; CCK-A receptor [Cavia]"], @defline.descriptions)
128
+ end
129
+
130
+ def test_words
131
+ assert_equal(["cavia", "cck-a", "cck-ar", "cholecystokinin", "guinea", "pig", "receptor", "type"], @defline.words)
132
+ end
133
+
134
+ def test_id_strings
135
+ assert_equal(["2495000", "Q63931", "CCKR_CAVPO", "2147182", "I51898", "544724", "AAB29504.1", "Cavia"], @defline.id_strings)
136
+ end
137
+
138
+ def test_get_all_by_type
139
+ assert_equal([], @defline.get_all_by_type)
140
+ end
141
+ end
142
+
143
+ class TestFastaDeflineGI2 < Test::Unit::TestCase
144
+
145
+ def setup
146
+ definition_line = '>gi|9910844|sp|Q9UWG2|RL3_METVA 50S ribosomal protein L3P'
147
+ @defline = FastaDefline.new(definition_line)
148
+ end
149
+
150
+ def test_entry_id
151
+ assert_equal("gi|9910844", @defline.entry_id)
152
+ end
153
+
154
+ def test_gi
155
+ assert_equal("9910844", @defline.gi)
156
+ end
157
+
158
+ def test_sp
159
+ assert_equal('RL3_METVA', @defline.sp)
160
+ end
161
+
162
+ def test_accession
163
+ assert_equal("Q9UWG2", @defline.accession)
164
+ end
165
+
166
+ def test_accessions
167
+ assert_equal(["Q9UWG2"], @defline.accessions)
168
+ end
169
+
170
+ def test_acc_version
171
+ assert_equal(nil, @defline.acc_version)
172
+ end
173
+
174
+ def test_locus
175
+ assert_equal(nil, @defline.locus)
176
+ end
177
+
178
+ def test_list_ids
179
+ assert_equal([["gi", "9910844"], ["sp", "Q9UWG2", "RL3_METVA"]], @defline.list_ids)
180
+ end
181
+
182
+ def test_description
183
+ assert_equal("50S ribosomal protein L3P", @defline.description)
184
+ end
185
+
186
+ def test_descriptions
187
+ assert_equal(["50S ribosomal protein L3P"], @defline.descriptions)
188
+ end
189
+
190
+ def test_words
191
+ assert_equal(["50s", "ribosomal"], @defline.words)
192
+ end
193
+
194
+ def test_id_strings
195
+ assert_equal(["9910844", "Q9UWG2", "RL3_METVA", "L3P"], @defline.id_strings)
196
+ end
197
+
198
+ def test_get_all_by_type
199
+ assert_equal([], @defline.get_all_by_type)
200
+ end
201
+ end
202
+ class TestFastaDeflineSce < Test::Unit::TestCase
203
+
204
+ def setup
205
+ definition_line = '>sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]'
206
+ @defline = FastaDefline.new(definition_line)
207
+ end
208
+
209
+ def test_entry_id
210
+ assert_equal("sce:YBR160W", @defline.entry_id)
211
+ end
212
+
213
+ def test_gi
214
+ assert_equal(nil, @defline.gi)
215
+ end
216
+
217
+ def test_accession
218
+ assert_equal(nil, @defline.accession)
219
+ end
220
+
221
+ def test_accessions
222
+ assert_equal([], @defline.accessions)
223
+ end
224
+
225
+ def test_acc_version
226
+ assert_equal(nil, @defline.acc_version)
227
+ end
228
+
229
+ def test_locus
230
+ assert_equal(nil, @defline.locus)
231
+ end
232
+
233
+ def test_list_ids
234
+ assert_equal([["sce", "YBR160W"], ["EC", "2.7.1.-"], ["SP", "CC28_YEAST"]], @defline.list_ids)
235
+ end
236
+
237
+ def test_description
238
+ assert_equal("CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]", @defline.description)
239
+ end
240
+
241
+ def test_descriptions
242
+ assert_equal(["CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]"], @defline.descriptions)
243
+ end
244
+
245
+ def test_words
246
+ assert_equal(["catalytic", "cyclin-dependent", "kinase", "srm5", "subunit"], @defline.words)
247
+ end
248
+
249
+ def test_id_strings
250
+ assert_equal(["YBR160W", "2.7.1.-", "CC28_YEAST", "CC28_YEAST", "CDC28"], @defline.id_strings)
251
+ end
252
+
253
+ def test_get_all_by_type
254
+ assert_equal([], @defline.get_all_by_type)
255
+ end
256
+ end
257
+
258
+ class TestFastaDeflineEmb < Test::Unit::TestCase
259
+
260
+ def setup
261
+ definition_line = '>emb:CACDC28 [X80034] C.albicans CDC28 gene'
262
+ @defline = FastaDefline.new(definition_line)
263
+ end
264
+
265
+ def test_entry_id
266
+ assert_equal("emb:CACDC28", @defline.entry_id)
267
+ end
268
+
269
+ def test_gi
270
+ assert_equal(nil, @defline.gi)
271
+ end
272
+
273
+ def test_accession
274
+ assert_equal("CACDC28", @defline.accession)
275
+ end
276
+
277
+ def test_accessions
278
+ assert_equal(["CACDC28"], @defline.accessions)
279
+ end
280
+
281
+ def test_acc_version
282
+ assert_equal("CACDC28", @defline.acc_version)
283
+ end
284
+
285
+ def test_locus
286
+ assert_equal(nil, @defline.locus)
287
+ end
288
+
289
+ def test_list_ids
290
+ assert_equal([["emb", "CACDC28"], ["X80034"]], @defline.list_ids)
291
+ end
292
+
293
+ def test_description
294
+ assert_equal("[X80034] C.albicans CDC28 gene", @defline.description)
295
+ end
296
+
297
+ def test_descriptions
298
+ assert_equal(["[X80034] C.albicans CDC28 gene"], @defline.descriptions)
299
+ end
300
+
301
+ def test_words
302
+ assert_equal(["albicans"], @defline.words)
303
+ end
304
+
305
+ def test_id_strings
306
+ assert_equal(["CACDC28", "X80034", "CDC28", "X80034"], @defline.id_strings)
307
+ end
308
+
309
+ def test_get_all_by_type
310
+ assert_equal([], @defline.get_all_by_type)
311
+ end
312
+ end
313
+
314
+ class TestFastaDeflineSimple < Test::Unit::TestCase
315
+
316
+ def setup
317
+ definition_line = '>ABC12345 this is test'
318
+ @defline = FastaDefline.new(definition_line)
319
+ end
320
+
321
+ def test_entry_id
322
+ assert_equal("ABC12345", @defline.entry_id)
323
+ end
324
+
325
+ def test_gi
326
+ assert_equal(nil, @defline.gi)
327
+ end
328
+
329
+ def test_accession
330
+ assert_equal(nil, @defline.accession)
331
+ end
332
+
333
+ def test_accessions
334
+ assert_equal([], @defline.accessions)
335
+ end
336
+
337
+ def test_acc_version
338
+ assert_equal(nil, @defline.acc_version)
339
+ end
340
+
341
+ def test_locus
342
+ assert_equal(nil, @defline.locus)
343
+ end
344
+
345
+ def test_list_ids
346
+ assert_equal([["ABC12345"]], @defline.list_ids)
347
+ end
348
+
349
+ def test_description
350
+ assert_equal("this is test", @defline.description)
351
+ end
352
+
353
+ def test_descriptions
354
+ assert_equal(["this is test"], @defline.descriptions)
355
+ end
356
+
357
+ def test_words
358
+ assert_equal(["test"], @defline.words)
359
+ end
360
+
361
+ def test_id_strings
362
+ assert_equal(["ABC12345"], @defline.id_strings)
363
+ end
364
+
365
+ def test_get_all_by_type
366
+ assert_equal([], @defline.get_all_by_type)
367
+ end
368
+ end
369
+
370
+ class TestFastaDeflineSwissProt < Test::Unit::TestCase
371
+
372
+ def setup
373
+ definition_line = '>sp|P05100|3MG1_ECOLI DNA-3-methyladenine glycosylase 1 OS=Escherichia coli (strain K12) GN=tag PE=1 SV=1'
374
+ @defline = FastaDefline.new(definition_line)
375
+ end
376
+
377
+ def test_entry_id
378
+ assert_equal('sp|P05100|3MG1_ECOLI', @defline.entry_id )
379
+ end
380
+
381
+ def test_get
382
+ assert_equal('3MG1_ECOLI', @defline.get('sp') )
383
+ end
384
+
385
+ def test_sp
386
+ assert_equal('3MG1_ECOLI', @defline.sp )
387
+ end
388
+
389
+ def test_accession
390
+ assert_equal("P05100", @defline.accession)
391
+ end
392
+
393
+ def test_accessions
394
+ assert_equal(["P05100"], @defline.accessions)
395
+ end
396
+
397
+ def test_acc_version
398
+ assert_equal(nil, @defline.acc_version)
399
+ end
400
+
401
+ def test_locus
402
+ assert_equal(nil, @defline.locus)
403
+ end
404
+
405
+ def test_list_ids
406
+ assert_equal([["sp", "P05100", "3MG1_ECOLI"]], @defline.list_ids)
407
+ end
408
+
409
+ def test_description
410
+ assert_equal("DNA-3-methyladenine glycosylase 1 OS=Escherichia coli (strain K12) GN=tag PE=1 SV=1", @defline.description)
411
+ end
412
+
413
+ def test_descriptions
414
+ assert_equal(["DNA-3-methyladenine glycosylase 1 OS=Escherichia coli (strain K12) GN=tag PE=1 SV=1"], @defline.descriptions)
415
+ end
416
+
417
+ def test_words
418
+ assert_equal(["coli", "dna-3-methyladenine", "glycosylase", "gn=tag", "os=escherichia", "pe=1", "sv=1"], @defline.words)
419
+ end
420
+
421
+ def test_id_strings
422
+ assert_equal(["P05100", "3MG1_ECOLI", "K12"], @defline.id_strings)
423
+ end
424
+
425
+ def test_get_all_by_type
426
+ assert_equal([], @defline.get_all_by_type)
427
+ end
428
+ end
429
+
430
+ class TestFastaDeflineTrembl < Test::Unit::TestCase
431
+
432
+ def setup
433
+ definition_line = '>tr|C8URF0|C8URF0_ECO1A Conserved predicted plasmid protein ECsL50 OS=Escherichia coli O111:H- (strain 11128 / EHEC) GN=ECO111_p3-39 PE=4 SV=1'
434
+ @defline = Bio::FastaDefline.new(definition_line)
435
+ end
436
+
437
+ def test_entry_id
438
+ assert_equal('tr|C8URF0|C8URF0_ECO1A', @defline.entry_id )
439
+ end
440
+
441
+ def test_get
442
+ assert_equal('C8URF0_ECO1A', @defline.get('tr') )
443
+ end
444
+
445
+ def test_tr
446
+ assert_equal('C8URF0_ECO1A', @defline.tr )
447
+ end
448
+
449
+ def test_accession
450
+ assert_equal("C8URF0", @defline.accession)
451
+ end
452
+
453
+ def test_accessions
454
+ assert_equal(["C8URF0"], @defline.accessions)
455
+ end
456
+
457
+ def test_acc_version
458
+ assert_equal(nil, @defline.acc_version)
459
+ end
460
+
461
+ def test_locus
462
+ assert_equal(nil, @defline.locus)
463
+ end
464
+
465
+ def test_list_ids
466
+ assert_equal([["tr", "C8URF0", "C8URF0_ECO1A"]], @defline.list_ids)
467
+ end
468
+
469
+ def test_description
470
+ assert_equal("Conserved predicted plasmid protein ECsL50 OS=Escherichia coli O111:H- (strain 11128 / EHEC) GN=ECO111_p3-39 PE=4 SV=1", @defline.description)
471
+ end
472
+
473
+ def test_descriptions
474
+ assert_equal(["Conserved predicted plasmid protein ECsL50 OS=Escherichia coli O111:H- (strain 11128 / EHEC) GN=ECO111_p3-39 PE=4 SV=1"], @defline.descriptions)
475
+ end
476
+
477
+ def test_words
478
+ assert_equal(["11128", "coli", "conserved", "ehec", "gn=eco111_p3-39", "os=escherichia", "pe=4", "plasmid", "predicted", "sv=1"], @defline.words)
479
+ end
480
+
481
+ def test_id_strings
482
+ assert_equal(["C8URF0", "C8URF0_ECO1A", "ECsL50", "O111"], @defline.id_strings)
483
+ end
484
+
485
+ def test_get_all_by_type
486
+ assert_equal([], @defline.get_all_by_type)
487
+ end
488
+ end
489
+ end
490
+