bio 2.0.4 → 2.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +29 -0
  3. data/.gitignore +32 -0
  4. data/ChangeLog +433 -0
  5. data/Gemfile +3 -0
  6. data/LEGAL +2 -0
  7. data/README.rdoc +1 -1
  8. data/RELEASE_NOTES.rdoc +64 -0
  9. data/appveyor.yml +14 -13
  10. data/bioruby.gemspec +9 -10
  11. data/doc/Tutorial.md +1274 -0
  12. data/doc/Tutorial_ja.md +2595 -0
  13. data/lib/bio/appl/blast/genomenet.rb +2 -1
  14. data/lib/bio/appl/clustalw/report.rb +3 -2
  15. data/lib/bio/appl/iprscan/report.rb +2 -1
  16. data/lib/bio/appl/meme/mast.rb +2 -1
  17. data/lib/bio/appl/paml/common.rb +2 -1
  18. data/lib/bio/appl/pts1.rb +1 -1
  19. data/lib/bio/db/embl/common.rb +5 -2
  20. data/lib/bio/db/embl/uniprotkb.rb +52 -19
  21. data/lib/bio/db/fastq.rb +3 -2
  22. data/lib/bio/db/gff.rb +14 -8
  23. data/lib/bio/db/newick.rb +6 -5
  24. data/lib/bio/db/pdb/chain.rb +2 -1
  25. data/lib/bio/db/pdb/pdb.rb +2 -1
  26. data/lib/bio/db/prosite.rb +2 -0
  27. data/lib/bio/db.rb +5 -4
  28. data/lib/bio/io/flatfile/buffer.rb +2 -1
  29. data/lib/bio/io/flatfile/splitter.rb +2 -1
  30. data/lib/bio/pathway.rb +2 -1
  31. data/lib/bio/sequence/common.rb +2 -1
  32. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +3 -2
  33. data/lib/bio/util/sirna.rb +3 -2
  34. data/lib/bio/version.rb +1 -1
  35. data/test/data/uniprot/P03589.uniprot +127 -0
  36. data/test/data/uniprot/P49144.uniprot +232 -0
  37. data/test/functional/bio/test_command.rb +2 -0
  38. data/test/network/bio/db/kegg/test_genes_hsa7422.rb +29 -17
  39. data/test/unit/bio/appl/iprscan/test_report.rb +3 -2
  40. data/test/unit/bio/db/embl/test_uniprotkb_P03589.rb +378 -0
  41. data/test/unit/bio/db/embl/test_uniprotkb_P49144.rb +359 -0
  42. data/test/unit/bio/io/flatfile/test_splitter.rb +7 -4
  43. data/test/unit/bio/sequence/test_common.rb +3 -2
  44. data/test/unit/bio/test_alignment.rb +17 -16
  45. data/test/unit/bio/test_sequence.rb +3 -2
  46. metadata +11 -15
  47. data/.travis.yml +0 -71
  48. data/gemfiles/Gemfile.travis-jruby1.8 +0 -6
  49. data/gemfiles/Gemfile.travis-jruby1.9 +0 -5
  50. data/gemfiles/Gemfile.travis-rbx +0 -10
  51. data/gemfiles/Gemfile.travis-ruby1.8 +0 -6
  52. data/gemfiles/Gemfile.travis-ruby1.9 +0 -5
  53. data/gemfiles/Gemfile.windows +0 -6
  54. data/gemfiles/modify-Gemfile.rb +0 -28
  55. data/gemfiles/prepare-gemspec.rb +0 -29
@@ -0,0 +1,378 @@
1
+ # frozen_string_literal: true
2
+ #
3
+ # test/unit/bio/db/embl/test_uniprotkb_P03589.rb - Unit tests for Bio::UniProtKB
4
+ #
5
+ # Copyright::: Copyright (C) 2023 BioRuby Project <staff@bioruby.org>
6
+ # License:: The Ruby License
7
+ # Contributor:: 2005 Mitsuteru Nakao <n@bioruby.org>
8
+ # 2023 Naohisa Goto <ng@bioruby.org>
9
+ #
10
+
11
+ # loading helper routine for testing bioruby
12
+ require 'pathname'
13
+ load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4,
14
+ 'bioruby_test_helper.rb')).cleanpath.to_s
15
+
16
+ # libraries needed for the tests
17
+ require 'test/unit'
18
+ require 'bio/db/embl/uniprotkb'
19
+
20
+ module Bio
21
+ class TestUniProtKB_P03589 < Test::Unit::TestCase
22
+
23
+ def setup
24
+ data = File.read(File.join(BioRubyTestDataPath,
25
+ 'uniprot', 'P03589.uniprot'))
26
+ @obj = Bio::UniProtKB.new(data)
27
+ end
28
+
29
+ def test_id_line
30
+ assert(@obj.id_line)
31
+ end
32
+
33
+ def test_id_line_entry_name
34
+ assert_equal('1A_AMVLE', @obj.id_line('ENTRY_NAME'))
35
+ end
36
+
37
+ def test_id_line_data_class
38
+ assert_equal('Reviewed', @obj.id_line('DATA_CLASS'))
39
+ end
40
+
41
+ #def test_id_line_molecule_type
42
+ # assert_equal('PRT', @obj.id_line('MOLECULE_TYPE'))
43
+ #end
44
+
45
+ def test_id_line_sequence_length
46
+ assert_equal(1126, @obj.id_line('SEQUENCE_LENGTH'))
47
+ end
48
+
49
+ def test_entry
50
+ entry = '1A_AMVLE'
51
+ assert_equal(entry, @obj.entry)
52
+ assert_equal(entry, @obj.entry_name)
53
+ assert_equal(entry, @obj.entry_id)
54
+ end
55
+
56
+ #def test_molecule
57
+ # assert_equal('PRT', @obj.molecule)
58
+ # assert_equal('PRT', @obj.molecule_type)
59
+ #end
60
+
61
+ def test_sequence_length
62
+ seqlen = 1126
63
+ assert_equal(seqlen, @obj.sequence_length)
64
+ assert_equal(seqlen, @obj.aalen)
65
+ end
66
+
67
+ def test_ac
68
+ acs = ["P03589"].freeze
69
+ assert_equal(acs, @obj.ac)
70
+ assert_equal(acs, @obj.accessions)
71
+ end
72
+
73
+ def test_accession
74
+ assert_equal('P03589', @obj.accession)
75
+ end
76
+
77
+ def test_dr
78
+ assert_equal(13, @obj.dr.size)
79
+ assert_equal(8, @obj.dr['GO'].size)
80
+ assert_equal([["IPR027351", "(+)RNA_virus_helicase_core_dom"],
81
+ ["IPR002588", "Alphavirus-like_MT_dom"],
82
+ ["IPR027417", "P-loop_NTPase"]],
83
+ @obj.dr['InterPro'])
84
+ end
85
+
86
+ def test_dr_with_key
87
+ pfam = [{" " => "1",
88
+ "Version" => "Viral_helicase1",
89
+ "Accession" => "PF01443",
90
+ "Molecular Type" => nil
91
+ },
92
+ {" " => "1",
93
+ "Version" => "Vmethyltransf",
94
+ "Accession" => "PF01660",
95
+ "Molecular Type" => nil
96
+ }].freeze
97
+ assert_equal(pfam, @obj.dr('Pfam'))
98
+ embl = [{"Accession" => "L00163",
99
+ "Version" => "AAA46289.1",
100
+ " " => "-",
101
+ "Molecular Type" => "Genomic_RNA"
102
+ }].freeze
103
+ assert_equal(embl, @obj.dr('EMBL'))
104
+ end
105
+
106
+ def test_dr_with_key_empty
107
+ assert_equal([], @obj.dr('NOT_A_DATABASE'))
108
+ end
109
+
110
+ def test_dt
111
+ assert(@obj.dt)
112
+ end
113
+
114
+ def test_dt_created
115
+ assert_equal('21-JUL-1986, integrated into UniProtKB/Swiss-Prot.',
116
+ @obj.dt('created'))
117
+ end
118
+
119
+ def test_dt_sequence
120
+ assert_equal('21-JUL-1986, sequence version 1.',
121
+ @obj.dt('sequence'))
122
+ end
123
+
124
+ def test_dt_annotation
125
+ assert_equal('22-FEB-2023, entry version 78.',
126
+ @obj.dt('annotation'))
127
+ end
128
+
129
+ def test_de
130
+ assert(@obj.de)
131
+ end
132
+
133
+ def test_protein_name
134
+ assert_equal("Replication protein 1a",
135
+ @obj.protein_name)
136
+ end
137
+
138
+ def test_synonyms
139
+ assert_equal([], @obj.synonyms)
140
+ end
141
+
142
+ def test_protein_name_after_calling_de
143
+ assert(@obj.de)
144
+ assert_equal("Replication protein 1a",
145
+ @obj.protein_name)
146
+ end
147
+
148
+ def test_gn
149
+ assert_equal([{:orfs=>["ORF1a"], :synonyms=>[], :name=>"", :loci=>[]}],
150
+ @obj.gn)
151
+ end
152
+
153
+ def test_gn_uniprot_parser
154
+ assert_equal([{:orfs=>["ORF1a"], :loci=>[], :name=>"", :synonyms=>[]}],
155
+ @obj.instance_eval("gn_uniprot_parser"))
156
+ end
157
+
158
+ def test_gn_old_parser
159
+ assert_equal([["ORFNames=ORF1a;"]],
160
+ @obj.instance_eval("gn_old_parser"))
161
+ end
162
+
163
+ def test_gene_names
164
+ assert_equal([""], @obj.gene_names)
165
+ end
166
+
167
+ def test_gene_name
168
+ assert_equal('', @obj.gene_name)
169
+ end
170
+
171
+ def test_os
172
+ assert(@obj.os)
173
+ end
174
+
175
+ def test_os_access
176
+ assert_equal("Alfalfa mosaic virus (strain 425 / isolate Leiden)",
177
+ @obj.os(0))
178
+ end
179
+
180
+ def test_os_access2
181
+ assert_equal({"name"=>"(strain 425 / isolate Leiden)",
182
+ "os"=>"Alfalfa mosaic virus"}, @obj.os[0])
183
+ end
184
+
185
+ def test_oc
186
+ assert_equal(["Viruses",
187
+ "Riboviria",
188
+ "Orthornavirae",
189
+ "Kitrinoviricota",
190
+ "Alsuviricetes",
191
+ "Martellivirales",
192
+ "Bromoviridae",
193
+ "Alfamovirus"],
194
+ @obj.oc)
195
+ end
196
+
197
+ def test_ox
198
+ assert_equal({"NCBI_TaxID"=>["12322"]}, @obj.ox)
199
+ end
200
+
201
+ def test_ref # Bio::UniProtKB#ref
202
+ assert_equal(Array, @obj.ref.class)
203
+ end
204
+
205
+ def test_cc
206
+ assert_equal(Hash, @obj.cc.class)
207
+ end
208
+
209
+ def test_cc_database
210
+ assert_equal(nil, @obj.cc('DATABASE'))
211
+ end
212
+
213
+ def test_cc_alternative_products
214
+ assert_equal(nil, @obj.cc('ALTERNATIVE PRODUCTS'))
215
+ end
216
+
217
+ def test_cc_mass_spectrometry
218
+ assert_equal(nil, @obj.cc('MASS SPECTROMETRY'))
219
+ end
220
+
221
+
222
+ def test_kw
223
+ keywords = ["ATP-binding",
224
+ "Helicase",
225
+ "Host endoplasmic reticulum",
226
+ "Host membrane",
227
+ "Hydrolase",
228
+ "Membrane",
229
+ "Methyltransferase",
230
+ "Nucleotide-binding",
231
+ "Reference proteome",
232
+ "Transferase"]
233
+ assert_equal(keywords, @obj.kw)
234
+ end
235
+
236
+ def test_ft
237
+ assert(@obj.ft)
238
+ ft = {"CHAIN" => [
239
+ {"From"=>1, "To"=>1126, "diff"=>[],
240
+ "original"=>["CHAIN", "1", "1126",
241
+ [["note", "Replication protein 1a"],
242
+ ["id", "PRO_0000083254"]]],
243
+ "note"=>"Replication protein 1a",
244
+ "id"=>"PRO_0000083254",
245
+ "FTId"=>"PRO_0000083254"}],
246
+ "DOMAIN"=>
247
+ [{"From"=>90, "To"=>278,"diff"=>[],
248
+ "original"=>["DOMAIN", "90", "278",
249
+ [["note", "Alphavirus-like MT"],
250
+ ["evidence",
251
+ "ECO:0000255|PROSITE-ProRule:PRU01079"]]],
252
+ "note"=>"Alphavirus-like MT",
253
+ "evidence"=>"ECO:0000255|PROSITE-ProRule:PRU01079"},
254
+ {"From"=>806, "To"=>963, "diff"=>[],
255
+ "original"=>["DOMAIN", "806", "963",
256
+ [["note", "(+)RNA virus helicase ATP-binding"]]],
257
+ "note"=>"(+)RNA virus helicase ATP-binding"},
258
+ {"From"=>964, "To"=>1125, "diff"=>[],
259
+ "original"=>["DOMAIN", "964", "1125",
260
+ [["note", "(+)RNA virus helicase C-terminal"]]],
261
+ "note"=>"(+)RNA virus helicase C-terminal"}],
262
+ "REGION"=>
263
+ [{"From"=>69, "To"=>406, "diff"=>[],
264
+ "original"=>["REGION", "69", "406",
265
+ [["note", "Methyltransferase"]]],
266
+ "note"=>"Methyltransferase"},
267
+ {"From"=>834, "To"=>1094, "diff"=>[],
268
+ "original"=>["REGION", "834", "1094",
269
+ [["note", "ATP-dependent helicase"]]],
270
+ "note"=>"ATP-dependent helicase"}],
271
+ "BINDING"=>
272
+ [{"From"=>838, "To"=>845, "diff"=>[],
273
+ "original"=>["BINDING", "838", "845",
274
+ [["ligand", "ATP"],
275
+ ["ligand_id", "ChEBI:CHEBI:30616"],
276
+ ["evidence", "ECO:0000255"]]],
277
+ "ligand"=>"ATP", "ligand_id"=>"ChEBI:CHEBI:30616",
278
+ "evidence"=>"ECO:0000255"}]}
279
+ assert_equal(ft, @obj.ft)
280
+ end
281
+
282
+ def test_sq
283
+ assert_equal({"CRC64"=>"BF5A8019B47D4CBF", "aalen"=>1126, "MW"=>125828},
284
+ @obj.sq)
285
+ end
286
+
287
+ def test_sq_crc64
288
+ assert_equal("BF5A8019B47D4CBF", @obj.sq('CRC64'))
289
+ end
290
+
291
+ def test_sq_mw
292
+ mw = 125828
293
+ assert_equal(mw, @obj.sq('mw'))
294
+ assert_equal(mw, @obj.sq('molecular'))
295
+ assert_equal(mw, @obj.sq('weight'))
296
+ end
297
+
298
+ def test_sq_len
299
+ length = 1126
300
+ assert_equal(length, @obj.sq('len'))
301
+ assert_equal(length, @obj.sq('length'))
302
+ assert_equal(length, @obj.sq('AA'))
303
+ end
304
+
305
+ def test_seq
306
+ seq ="MNADAQSTDASLSMREPLSHASIQEMLRRVVEKQAADDTTAIGKVFSEAGRAYAQDALPS" +
307
+ "DKGEVLKISFSLDATQQNILRANFPGRRTVFSNSSSSSHCFAAAHRLLETDFVYRCFGNT" +
308
+ "VDSIIDLGGNFVSHMKVKRHNVHCCCPILDARDGARLTERILSLKSYVRKHPEIVGEADY" +
309
+ "CMDTFQKCSRRADYAFAIHSTSDLDVGELACSLDQKGVMKFICTMMVDADMLIHNEGEIP" +
310
+ "NFNVRWEIDRKKDLIHFDFIDEPNLGYSHRFSLLKHYLTYNAVDLGHAAYRIERKQDFGG" +
311
+ "VMVIDLTYSLGFVPKMPHSNGRSCAWYNRVKGQMVVHTVNEGYYHHSYQTAVRRKVLVDK" +
312
+ "KVLTRVTEVAFRQFRPNADAHSAIQSIATMLSSSTNHTIIGGVTLISGKPLSPDDYIPVA" +
313
+ "TTIYYRVKKLYNAIPEMLSLLDKGERLSTDAVLKGSEGPMWYSGPTFLSALDKVNVPGDF" +
314
+ "VAKALLSLPKRDLKSLFSRSATSHSERTPVRDESPIRCTDGVFYPIRMLLKCLGSDKFES" +
315
+ "VTITDPRSNTETTVDLYQSFQKKIETVFSFILGKIDGPSPLISDPVYFQSLEDVYYAEWH" +
316
+ "QGNAIDASNYARTLLDDIRKQKEESLKAKAKEVEDAQKLNRAILQVHAYLEAHPDGGKIE" +
317
+ "GLGLSSQFIAKIPELAIPTPKPLPEFEKNAETGEILRINPHSDAILEAIDYLKSTSANSI" +
318
+ "ITLNKLGDHCQWTTKGLDVVWAGDDKRRAFIPKKNTWVGPTARSYPLAKYERAMSKDGYV" +
319
+ "TLRWDGEVLDANCVRSLSQYEIVFVDQSCVFASAEAIIPSLEKALGLEAHFSVTIVDGVA" +
320
+ "GCGKTTNIKQIARSSGRDVDLILTSNRSSADELKETIDCSPLTKLHYIRTCDSYLMSASA" +
321
+ "VKAQRLIFDECFLQHAGLVYAAATLAGCSEVIGFGDTEQIPFVSRNPSFVFRHHKLTGKV" +
322
+ "ERKLITWRSPADATYCLEKYFYKNKKPVKTNSRVLRSIEVVPINSPVSVERNTNALYLCH" +
323
+ "TQAEKAVLKAQTHLKGCDNIFTTHEAQGKTFDNVYFCRLTRTSTSLATGRDPINGPCNGL" +
324
+ "VALSRHKKTFKYFTIAHDSDDVIYNACRDAGNTDDSILARSYNHNF"
325
+ seq.freeze
326
+ assert_equal(seq, @obj.seq)
327
+ assert_equal(seq, @obj.aaseq)
328
+ end
329
+
330
+ def test_oh
331
+ oh = [
332
+ {"NCBI_TaxID"=>"4045", "HostName"=>"Apium graveolens (Celery)"},
333
+ {"NCBI_TaxID"=>"83862",
334
+ "HostName"=>"Astragalus glycyphyllos (Wild liquorice)"},
335
+ {"NCBI_TaxID"=>"4072",
336
+ "HostName"=>"Capsicum annuum (Capsicum pepper)"},
337
+ {"NCBI_TaxID"=>"41386", "HostName"=>"Caryopteris incana"},
338
+ {"NCBI_TaxID"=>"3827",
339
+ "HostName"=>"Cicer arietinum (Chickpea) (Garbanzo)"},
340
+ {"NCBI_TaxID"=>"3847",
341
+ "HostName"=>"Glycine max (Soybean) (Glycine hispida)"},
342
+ {"NCBI_TaxID"=>"35936",
343
+ "HostName"=>"Lablab purpureus (Hyacinth bean) (Dolichos lablab)"},
344
+ {"NCBI_TaxID"=>"4236",
345
+ "HostName"=>"Lactuca sativa (Garden lettuce)"},
346
+ {"NCBI_TaxID"=>"3864",
347
+ "HostName"=>"Lens culinaris (Lentil) (Cicer lens)"},
348
+ {"NCBI_TaxID"=>"3869", "HostName"=>"Lupinus"},
349
+ {"NCBI_TaxID"=>"145753",
350
+ "HostName"=>"Malva parviflora (Little mallow) (Cheeseweed mallow)"},
351
+ {"NCBI_TaxID"=>"3879",
352
+ "HostName"=>"Medicago sativa (Alfalfa)"},
353
+ {"NCBI_TaxID"=>"4097",
354
+ "HostName"=>"Nicotiana tabacum (Common tobacco)"},
355
+ {"NCBI_TaxID"=>"3885",
356
+ "HostName"=>"Phaseolus vulgaris (Kidney bean) (French bean)"},
357
+ {"NCBI_TaxID"=>"23113", "HostName"=>"Philadelphus"},
358
+ {"NCBI_TaxID"=>"3888", "HostName"=>"Pisum sativum (Garden pea)"},
359
+ {"NCBI_TaxID"=>"4081",
360
+ "HostName"=>
361
+ "Solanum lycopersicum (Tomato) (Lycopersicon esculentum)"},
362
+ {"NCBI_TaxID"=>"4113", "HostName"=>"Solanum tuberosum (Potato)"},
363
+ {"NCBI_TaxID"=>"157662", "HostName"=>"Teramnus repens"},
364
+ {"NCBI_TaxID"=>"60916",
365
+ "HostName"=>"Trifolium incarnatum (Crimson clover)"},
366
+ {"NCBI_TaxID"=>"85293",
367
+ "HostName"=>"Viburnum opulus (High-bush cranberry)"},
368
+ {"NCBI_TaxID"=>"3916",
369
+ "HostName"=>
370
+ "Vigna radiata var. radiata (Mung bean) (Phaseolus aureus)"},
371
+ {"NCBI_TaxID"=>"3917", "HostName"=>"Vigna unguiculata (Cowpea)"}
372
+ ]
373
+ assert_equal(oh, @obj.oh)
374
+ end
375
+
376
+ end # class TestUniProtKB
377
+ end # module Bio
378
+