bio 2.0.1 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +313 -0
- data/KNOWN_ISSUES.rdoc +1 -1
- data/LEGAL +9 -0
- data/README.rdoc +3 -3
- data/RELEASE_NOTES.rdoc +59 -0
- data/bioruby.gemspec +4 -2
- data/bioruby.gemspec.erb +0 -1
- data/lib/bio/appl/iprscan/report.rb +3 -3
- data/lib/bio/appl/sosui/report.rb +1 -1
- data/lib/bio/db/embl/uniprotkb.rb +137 -12
- data/lib/bio/db/go.rb +2 -2
- data/lib/bio/sequence/common.rb +112 -0
- data/lib/bio/sequence/format.rb +1 -0
- data/lib/bio/tree.rb +1 -1
- data/lib/bio/version.rb +1 -1
- data/test/data/uniprot/P28907.uniprot +551 -0
- data/test/unit/bio/db/embl/test_uniprotkb_P28907.rb +325 -0
- data/test/unit/bio/sequence/test_ruby3.rb +462 -0
- metadata +6 -4
@@ -0,0 +1,325 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
#
|
3
|
+
# test/unit/bio/db/embl/test_uniprotkb_P28907.rb - Unit tests for Bio::UniProtKB
|
4
|
+
#
|
5
|
+
# Copyright::: Copyright (C) 2022 BioRuby Project <staff@bioruby.org>
|
6
|
+
# License:: The Ruby License
|
7
|
+
# Contributor:: 2005 Mitsuteru Nakao <n@bioruby.org>
|
8
|
+
# 2022 Naohisa Goto <ng@bioruby.org>
|
9
|
+
#
|
10
|
+
|
11
|
+
# loading helper routine for testing bioruby
|
12
|
+
require 'pathname'
|
13
|
+
load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4,
|
14
|
+
'bioruby_test_helper.rb')).cleanpath.to_s
|
15
|
+
|
16
|
+
# libraries needed for the tests
|
17
|
+
require 'test/unit'
|
18
|
+
require 'bio/db/embl/uniprotkb'
|
19
|
+
|
20
|
+
module Bio
|
21
|
+
class TestUniProtKB_P28907 < Test::Unit::TestCase
|
22
|
+
|
23
|
+
def setup
|
24
|
+
data = File.read(File.join(BioRubyTestDataPath,
|
25
|
+
'uniprot', 'P28907.uniprot'))
|
26
|
+
@obj = Bio::UniProtKB.new(data)
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_id_line
|
30
|
+
assert(@obj.id_line)
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_id_line_entry_name
|
34
|
+
assert_equal('CD38_HUMAN', @obj.id_line('ENTRY_NAME'))
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_id_line_data_class
|
38
|
+
assert_equal('Reviewed', @obj.id_line('DATA_CLASS'))
|
39
|
+
end
|
40
|
+
|
41
|
+
#def test_id_line_molecule_type
|
42
|
+
# assert_equal('PRT', @obj.id_line('MOLECULE_TYPE'))
|
43
|
+
#end
|
44
|
+
|
45
|
+
def test_id_line_sequence_length
|
46
|
+
assert_equal(300, @obj.id_line('SEQUENCE_LENGTH'))
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_entry
|
50
|
+
entry = 'CD38_HUMAN'
|
51
|
+
assert_equal(entry, @obj.entry)
|
52
|
+
assert_equal(entry, @obj.entry_name)
|
53
|
+
assert_equal(entry, @obj.entry_id)
|
54
|
+
end
|
55
|
+
|
56
|
+
#def test_molecule
|
57
|
+
# assert_equal('PRT', @obj.molecule)
|
58
|
+
# assert_equal('PRT', @obj.molecule_type)
|
59
|
+
#end
|
60
|
+
|
61
|
+
def test_sequence_length
|
62
|
+
seqlen = 300
|
63
|
+
assert_equal(seqlen, @obj.sequence_length)
|
64
|
+
assert_equal(seqlen, @obj.aalen)
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_ac
|
68
|
+
acs = ["P28907", "O00121", "O00122", "Q96HY4"].freeze
|
69
|
+
assert_equal(acs, @obj.ac)
|
70
|
+
assert_equal(acs, @obj.accessions)
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_accession
|
74
|
+
assert_equal('P28907', @obj.accession)
|
75
|
+
end
|
76
|
+
|
77
|
+
def test_dr
|
78
|
+
assert_equal(81, @obj.dr.size)
|
79
|
+
assert_equal(39, @obj.dr['GO'].size)
|
80
|
+
assert_equal([["IPR003193", "ADP-ribosyl_cyclase"],
|
81
|
+
["IPR033567", "CD38"]],
|
82
|
+
@obj.dr['InterPro'])
|
83
|
+
end
|
84
|
+
|
85
|
+
def test_dr_with_key
|
86
|
+
pfam = [{" " => "1",
|
87
|
+
"Version" => "Rib_hydrolayse",
|
88
|
+
"Accession" => "PF02267",
|
89
|
+
"Molecular Type" => nil
|
90
|
+
}].freeze
|
91
|
+
assert_equal(pfam, @obj.dr('Pfam'))
|
92
|
+
embl = [{"Accession" => "M34461",
|
93
|
+
"Version" => "AAA68482.1",
|
94
|
+
" " => "-",
|
95
|
+
"Molecular Type" => "mRNA"},
|
96
|
+
{"Accession" => "D84276",
|
97
|
+
"Version" => "BAA18964.1",
|
98
|
+
" " => "-",
|
99
|
+
"Molecular Type" => "mRNA"},
|
100
|
+
{"Accession" => "D84277",
|
101
|
+
"Version" => "BAA18965.1",
|
102
|
+
" " => "-",
|
103
|
+
"Molecular Type" => "mRNA"},
|
104
|
+
{"Accession" => "D84284",
|
105
|
+
"Version" => "BAA18966.1",
|
106
|
+
" " => "-",
|
107
|
+
"Molecular Type" => "Genomic_DNA"},
|
108
|
+
{"Accession" => "BC007964",
|
109
|
+
"Version" => "AAH07964.1",
|
110
|
+
" " => "-",
|
111
|
+
"Molecular Type" => "mRNA"}].freeze
|
112
|
+
assert_equal(embl, @obj.dr('EMBL'))
|
113
|
+
end
|
114
|
+
|
115
|
+
def test_dr_with_key_empty
|
116
|
+
assert_equal([], @obj.dr('NOT_A_DATABASE'))
|
117
|
+
end
|
118
|
+
|
119
|
+
def test_dt
|
120
|
+
assert(@obj.dt)
|
121
|
+
end
|
122
|
+
|
123
|
+
def test_dt_created
|
124
|
+
assert_equal('01-DEC-1992, integrated into UniProtKB/Swiss-Prot.',
|
125
|
+
@obj.dt('created'))
|
126
|
+
end
|
127
|
+
|
128
|
+
def test_dt_sequence
|
129
|
+
assert_equal('23-NOV-2004, sequence version 2.',
|
130
|
+
@obj.dt('sequence'))
|
131
|
+
end
|
132
|
+
|
133
|
+
def test_dt_annotation
|
134
|
+
assert_equal('03-AUG-2022, entry version 213.',
|
135
|
+
@obj.dt('annotation'))
|
136
|
+
end
|
137
|
+
|
138
|
+
def test_de
|
139
|
+
assert(@obj.de)
|
140
|
+
end
|
141
|
+
|
142
|
+
def test_protein_name
|
143
|
+
assert_equal("ADP-ribosyl cyclase/cyclic ADP-ribose hydrolase 1",
|
144
|
+
@obj.protein_name)
|
145
|
+
end
|
146
|
+
|
147
|
+
def test_synonyms
|
148
|
+
ary = [
|
149
|
+
"EC 3.2.2.6",
|
150
|
+
"2'-phospho-ADP-ribosyl cyclase",
|
151
|
+
"2'-phospho-ADP-ribosyl cyclase/2'-phospho-cyclic-ADP-ribose transferase",
|
152
|
+
"EC 2.4.99.20",
|
153
|
+
"2'-phospho-cyclic-ADP-ribose transferase",
|
154
|
+
"ADP-ribosyl cyclase 1",
|
155
|
+
"ADPRC 1",
|
156
|
+
"Cyclic ADP-ribose hydrolase 1",
|
157
|
+
"cADPr hydrolase 1",
|
158
|
+
"T10",
|
159
|
+
"CD_antigen=CD38"
|
160
|
+
].freeze
|
161
|
+
assert_equal(ary, @obj.synonyms)
|
162
|
+
end
|
163
|
+
|
164
|
+
def test_protein_name_after_calling_de
|
165
|
+
assert(@obj.de)
|
166
|
+
assert_equal("ADP-ribosyl cyclase/cyclic ADP-ribose hydrolase 1",
|
167
|
+
@obj.protein_name)
|
168
|
+
end
|
169
|
+
|
170
|
+
def test_synonyms_after_calling_de
|
171
|
+
assert(@obj.de)
|
172
|
+
assert_equal(11, @obj.synonyms.size)
|
173
|
+
end
|
174
|
+
|
175
|
+
def test_gn
|
176
|
+
assert_equal([{:orfs=>[], :synonyms=>[], :name=>"CD38", :loci=>[]}],
|
177
|
+
@obj.gn)
|
178
|
+
end
|
179
|
+
|
180
|
+
def test_gn_uniprot_parser
|
181
|
+
assert_equal([{:orfs=>[], :loci=>[], :name=>"CD38", :synonyms=>[]}],
|
182
|
+
@obj.instance_eval("gn_uniprot_parser"))
|
183
|
+
end
|
184
|
+
|
185
|
+
def test_gn_old_parser
|
186
|
+
assert_equal([["Name=CD38;"]],
|
187
|
+
@obj.instance_eval("gn_old_parser"))
|
188
|
+
end
|
189
|
+
|
190
|
+
def test_gene_names
|
191
|
+
assert_equal(["CD38"], @obj.gene_names)
|
192
|
+
end
|
193
|
+
|
194
|
+
def test_gene_name
|
195
|
+
assert_equal('CD38', @obj.gene_name)
|
196
|
+
end
|
197
|
+
|
198
|
+
def test_os
|
199
|
+
assert(@obj.os)
|
200
|
+
end
|
201
|
+
|
202
|
+
def test_os_access
|
203
|
+
assert_equal("Homo sapiens (Human)", @obj.os(0))
|
204
|
+
end
|
205
|
+
|
206
|
+
def test_os_access2
|
207
|
+
assert_equal({"name"=>"(Human)", "os"=>"Homo sapiens"}, @obj.os[0])
|
208
|
+
end
|
209
|
+
|
210
|
+
def test_oc
|
211
|
+
assert_equal(["Eukaryota", "Metazoa", "Chordata", "Craniata",
|
212
|
+
"Vertebrata", "Euteleostomi", "Mammalia", "Eutheria",
|
213
|
+
"Euarchontoglires", "Primates",
|
214
|
+
"Haplorrhini", "Catarrhini", "Hominidae", "Homo"],
|
215
|
+
@obj.oc)
|
216
|
+
end
|
217
|
+
|
218
|
+
def test_ox
|
219
|
+
assert_equal({"NCBI_TaxID"=>["9606"]}, @obj.ox)
|
220
|
+
end
|
221
|
+
|
222
|
+
def test_ref # Bio::UniProtKB#ref
|
223
|
+
assert_equal(Array, @obj.ref.class)
|
224
|
+
end
|
225
|
+
|
226
|
+
def test_cc
|
227
|
+
assert_equal(Hash, @obj.cc.class)
|
228
|
+
end
|
229
|
+
|
230
|
+
def test_cc_database
|
231
|
+
assert_equal(nil, @obj.cc('DATABASE'))
|
232
|
+
end
|
233
|
+
|
234
|
+
def test_cc_alternative_products
|
235
|
+
ap = { "Event"=>["Alternative splicing"],
|
236
|
+
"Named isoforms"=>"2",
|
237
|
+
"Comment"=>"",
|
238
|
+
"Variants"=>
|
239
|
+
[{"Name"=>"1",
|
240
|
+
"Synonyms"=>[],
|
241
|
+
"IsoId"=>["P28907-1"],
|
242
|
+
"Sequence"=>["Displayed"]},
|
243
|
+
{"Name"=>"2",
|
244
|
+
"Synonyms"=>[],
|
245
|
+
"IsoId"=>["P28907-2"],
|
246
|
+
"Sequence"=>["VSP_000707", "VSP_000708"]}]}
|
247
|
+
assert_equal(ap, @obj.cc('ALTERNATIVE PRODUCTS'))
|
248
|
+
end
|
249
|
+
|
250
|
+
def test_cc_mass_spectrometry
|
251
|
+
assert_equal(nil, @obj.cc('MASS SPECTROMETRY'))
|
252
|
+
end
|
253
|
+
|
254
|
+
|
255
|
+
def test_kw
|
256
|
+
keywords = ["3D-structure", "Alternative splicing",
|
257
|
+
"Diabetes mellitus", "Disulfide bond",
|
258
|
+
"Glycoprotein", "Hydrolase", "Membrane",
|
259
|
+
"NAD", "NADP", "Receptor", "Reference proteome",
|
260
|
+
"Signal-anchor", "Transferase", "Transmembrane",
|
261
|
+
"Transmembrane helix"]
|
262
|
+
assert_equal(keywords, @obj.kw)
|
263
|
+
end
|
264
|
+
|
265
|
+
def test_ft
|
266
|
+
assert(@obj.ft)
|
267
|
+
name = 'TOPO_DOM'
|
268
|
+
data = [{"From"=>1,
|
269
|
+
"To"=>21,
|
270
|
+
"diff"=>[],
|
271
|
+
"original"=>
|
272
|
+
["TOPO_DOM",
|
273
|
+
"1",
|
274
|
+
"21",
|
275
|
+
[["note", "Cytoplasmic"],
|
276
|
+
["evidence", "ECO:0000255"]]],
|
277
|
+
"note"=>"Cytoplasmic",
|
278
|
+
"evidence"=>"ECO:0000255"},
|
279
|
+
{"From"=>43,
|
280
|
+
"To"=>300,
|
281
|
+
"diff"=>[],
|
282
|
+
"original"=>
|
283
|
+
["TOPO_DOM",
|
284
|
+
"43",
|
285
|
+
"300",
|
286
|
+
[["note", "Extracellular"],
|
287
|
+
["evidence", "ECO:0000255"]]],
|
288
|
+
"note"=>"Extracellular",
|
289
|
+
"evidence"=>"ECO:0000255"}].freeze
|
290
|
+
|
291
|
+
assert_equal(data, @obj.ft[name])
|
292
|
+
end
|
293
|
+
|
294
|
+
def test_sq
|
295
|
+
assert_equal({"CRC64"=>"47BBE38C3DE3E6AA", "aalen"=>300, "MW"=>34328},
|
296
|
+
@obj.sq)
|
297
|
+
end
|
298
|
+
|
299
|
+
def test_sq_crc64
|
300
|
+
assert_equal("47BBE38C3DE3E6AA", @obj.sq('CRC64'))
|
301
|
+
end
|
302
|
+
|
303
|
+
def test_sq_mw
|
304
|
+
mw = 34328
|
305
|
+
assert_equal(mw, @obj.sq('mw'))
|
306
|
+
assert_equal(mw, @obj.sq('molecular'))
|
307
|
+
assert_equal(mw, @obj.sq('weight'))
|
308
|
+
end
|
309
|
+
|
310
|
+
def test_sq_len
|
311
|
+
length = 300
|
312
|
+
assert_equal(length, @obj.sq('len'))
|
313
|
+
assert_equal(length, @obj.sq('length'))
|
314
|
+
assert_equal(length, @obj.sq('AA'))
|
315
|
+
end
|
316
|
+
|
317
|
+
def test_seq
|
318
|
+
seq = "MANCEFSPVSGDKPCCRLSRRAQLCLGVSILVLILVVVLAVVVPRWRQQWSGPGTTKRFPETVLARCVKYTEIHPEMRHVDCQSVWDAFKGAFISKHPCNITEEDYQPLMKLGTQTVPCNKILLWSRIKDLAHQFTQVQRDMFTLEDTLLGYLADDLTWCGEFNTSKINYQSCPDWRKDCSNNPVSVFWKTVSRRFAEAACDVVHVMLNGSRSKIFDKNSTFGSVEVHNLQPEKVQTLEAWVIHGGREDSRDLCQDPTIKELESIISKRNIQFSCKNIYRPDKFLQCVKNPEDSSCTSEI"
|
319
|
+
assert_equal(seq, @obj.seq)
|
320
|
+
assert_equal(seq, @obj.aaseq)
|
321
|
+
end
|
322
|
+
|
323
|
+
end # class TestUniProtKB
|
324
|
+
end # module Bio
|
325
|
+
|