bio 1.4.1 → 1.4.2
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +954 -0
- data/KNOWN_ISSUES.rdoc +40 -5
- data/README.rdoc +36 -35
- data/RELEASE_NOTES.rdoc +87 -59
- data/bioruby.gemspec +24 -2
- data/doc/RELEASE_NOTES-1.4.1.rdoc +104 -0
- data/doc/Tutorial.rd +162 -200
- data/doc/Tutorial.rd.html +149 -146
- data/lib/bio.rb +1 -0
- data/lib/bio/appl/blast.rb +1 -1
- data/lib/bio/appl/blast/ddbj.rb +26 -34
- data/lib/bio/appl/blast/genomenet.rb +21 -11
- data/lib/bio/db/embl/sptr.rb +193 -21
- data/lib/bio/db/fasta.rb +1 -1
- data/lib/bio/db/fastq.rb +14 -0
- data/lib/bio/db/fastq/format_fastq.rb +2 -2
- data/lib/bio/db/genbank/ddbj.rb +1 -2
- data/lib/bio/db/genbank/format_genbank.rb +1 -1
- data/lib/bio/db/medline.rb +1 -0
- data/lib/bio/db/newick.rb +3 -1
- data/lib/bio/db/pdb/pdb.rb +9 -9
- data/lib/bio/db/pdb/residue.rb +2 -2
- data/lib/bio/io/ddbjrest.rb +344 -0
- data/lib/bio/io/ncbirest.rb +121 -1
- data/lib/bio/location.rb +2 -2
- data/lib/bio/reference.rb +3 -4
- data/lib/bio/shell/plugin/entry.rb +7 -3
- data/lib/bio/shell/plugin/ncbirest.rb +5 -1
- data/lib/bio/util/restriction_enzyme.rb +3 -0
- data/lib/bio/util/restriction_enzyme/dense_int_array.rb +195 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +7 -7
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +57 -18
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +2 -2
- data/lib/bio/util/restriction_enzyme/sorted_num_array.rb +219 -0
- data/lib/bio/version.rb +1 -1
- data/sample/test_restriction_enzyme_long.rb +4403 -0
- data/test/data/fasta/EFTU_BACSU.fasta +8 -0
- data/test/data/genbank/CAA35997.gp +48 -0
- data/test/data/genbank/SCU49845.gb +167 -0
- data/test/data/litdb/1717226.litdb +13 -0
- data/test/data/pir/CRAB_ANAPL.pir +6 -0
- data/test/functional/bio/appl/blast/test_remote.rb +93 -0
- data/test/functional/bio/appl/test_blast.rb +61 -0
- data/test/functional/bio/io/test_ddbjrest.rb +47 -0
- data/test/functional/bio/test_command.rb +3 -3
- data/test/unit/bio/db/embl/test_sptr.rb +6 -6
- data/test/unit/bio/db/embl/test_uniprot_new_part.rb +208 -0
- data/test/unit/bio/db/genbank/test_common.rb +274 -0
- data/test/unit/bio/db/genbank/test_genbank.rb +401 -0
- data/test/unit/bio/db/genbank/test_genpept.rb +81 -0
- data/test/unit/bio/db/pdb/test_pdb.rb +3287 -11
- data/test/unit/bio/db/test_fasta.rb +34 -12
- data/test/unit/bio/db/test_fastq.rb +26 -0
- data/test/unit/bio/db/test_litdb.rb +95 -0
- data/test/unit/bio/db/test_medline.rb +1 -0
- data/test/unit/bio/db/test_nbrf.rb +82 -0
- data/test/unit/bio/db/test_newick.rb +22 -4
- data/test/unit/bio/test_reference.rb +35 -0
- data/test/unit/bio/util/restriction_enzyme/test_dense_int_array.rb +201 -0
- data/test/unit/bio/util/restriction_enzyme/test_sorted_num_array.rb +281 -0
- metadata +44 -38
data/lib/bio.rb
CHANGED
data/lib/bio/appl/blast.rb
CHANGED
@@ -30,7 +30,7 @@ module Bio
|
|
30
30
|
#
|
31
31
|
# # To run an actual BLAST analysis:
|
32
32
|
# # 1. create a BLAST factory
|
33
|
-
# remote_blast_factory = Bio::Blast.remote('blastp', '
|
33
|
+
# remote_blast_factory = Bio::Blast.remote('blastp', 'swissprot',
|
34
34
|
# '-e 0.0001', 'genomenet')
|
35
35
|
# #or:
|
36
36
|
# local_blast_factory = Bio::Blast.local('blastn','/path/to/db')
|
data/lib/bio/appl/blast/ddbj.rb
CHANGED
@@ -8,7 +8,7 @@
|
|
8
8
|
#
|
9
9
|
|
10
10
|
require 'bio/appl/blast/remote'
|
11
|
-
require 'bio/io/
|
11
|
+
require 'bio/io/ddbjrest'
|
12
12
|
|
13
13
|
module Bio::Blast::Remote
|
14
14
|
|
@@ -37,32 +37,43 @@ module Bio::Blast::Remote
|
|
37
37
|
if defined? @parse_databases
|
38
38
|
return nil if @parse_databases
|
39
39
|
end
|
40
|
-
drv = Bio::DDBJ::
|
40
|
+
drv = Bio::DDBJ::REST::Blast.new
|
41
41
|
str = drv.getSupportDatabaseList
|
42
42
|
|
43
43
|
databases = {}
|
44
44
|
dbdescs = {}
|
45
|
-
|
45
|
+
keys = [ 'blastn', 'blastp' ]
|
46
|
+
keys.each do |key|
|
47
|
+
databases[key] ||= []
|
48
|
+
dbdescs[key] ||= {}
|
49
|
+
end
|
46
50
|
prefix = ''
|
47
|
-
|
48
|
-
dbdescs[key] ||= {}
|
51
|
+
prefix_count = 0
|
49
52
|
str.each_line do |line|
|
50
53
|
a = line.strip.split(/\s*\-\s*/, 2)
|
51
54
|
case a.size
|
52
55
|
when 1
|
53
56
|
prefix = a[0].to_s.strip
|
54
57
|
prefix += ': ' unless prefix.empty?
|
55
|
-
|
58
|
+
prefix_count = 0
|
56
59
|
next #each_line
|
57
60
|
when 0
|
58
|
-
prefix = ''
|
59
|
-
key = 'blastp'
|
60
|
-
databases[key] ||= []
|
61
|
-
dbdescs[key] ||= {}
|
61
|
+
prefix = '' if prefix_count > 0
|
62
62
|
next #each_line
|
63
63
|
end
|
64
64
|
name = a[0].to_s.strip.freeze
|
65
|
-
desc =
|
65
|
+
desc = a[1].to_s.strip
|
66
|
+
key = case desc
|
67
|
+
when /\(NT\)\s*$/
|
68
|
+
'blastn'
|
69
|
+
when /\(AA\)\s*$/
|
70
|
+
'blastp'
|
71
|
+
else
|
72
|
+
warn "DDBJ BLAST: could not determine the database is NT or AA: #{line.chomp}" if $VERBOSE
|
73
|
+
next #each_line
|
74
|
+
end
|
75
|
+
desc = (prefix + desc).freeze
|
76
|
+
prefix_count += 1
|
66
77
|
databases[key].push name
|
67
78
|
dbdescs[key][name] = desc
|
68
79
|
end
|
@@ -96,11 +107,9 @@ module Bio::Blast::Remote
|
|
96
107
|
options = make_command_line_options
|
97
108
|
opt = Bio::Blast::NCBIOptions.new(options)
|
98
109
|
|
99
|
-
#
|
100
|
-
@ddbj_remote_blast ||= Bio::DDBJ::
|
101
|
-
|
102
|
-
# always use REST version to prevent warning messages
|
103
|
-
@ddbj_request_manager ||= Bio::DDBJ::XML::RequestManager::REST.new
|
110
|
+
# REST objects are cached
|
111
|
+
@ddbj_remote_blast ||= Bio::DDBJ::REST::Blast.new
|
112
|
+
@ddbj_request_manager ||= Bio::DDBJ::REST::RequestManager.new
|
104
113
|
|
105
114
|
program = opt.delete('-p')
|
106
115
|
db = opt.delete('-d')
|
@@ -110,24 +119,7 @@ module Bio::Blast::Remote
|
|
110
119
|
qid = @ddbj_remote_blast.searchParamAsync(program, db, query, optstr)
|
111
120
|
@output = qid
|
112
121
|
|
113
|
-
|
114
|
-
flag = true
|
115
|
-
while flag
|
116
|
-
if $VERBOSE then
|
117
|
-
$stderr.puts "DDBJ BLAST: ID: #{qid} -- waitng #{sleeptime} sec."
|
118
|
-
end
|
119
|
-
sleep(sleeptime)
|
120
|
-
|
121
|
-
result = @ddbj_request_manager.getAsyncResult(qid)
|
122
|
-
case result.to_s
|
123
|
-
when /The search and analysis service by WWW is very busy now/
|
124
|
-
raise result.to_s.strip + '(Alternatively, wrong options may be given.)'
|
125
|
-
when /Your job has not completed yet/
|
126
|
-
sleeptime = 5
|
127
|
-
else
|
128
|
-
flag = false
|
129
|
-
end
|
130
|
-
end while flag
|
122
|
+
result = @ddbj_request_manager.wait_getAsyncResult(qid)
|
131
123
|
|
132
124
|
@output = result
|
133
125
|
return @output
|
@@ -69,11 +69,11 @@ module Bio::Blast::Remote
|
|
69
69
|
#
|
70
70
|
# * http://www.ncbi.nlm.nih.gov/blast/
|
71
71
|
# * http://www.ncbi.nlm.nih.gov/Education/BLASTinfo/similarity.html
|
72
|
-
# * http://
|
72
|
+
# * http://www.genome.jp/tools/blast/
|
73
73
|
#
|
74
74
|
module GenomeNet
|
75
75
|
|
76
|
-
Host = "
|
76
|
+
Host = "www.genome.jp".freeze
|
77
77
|
|
78
78
|
# Creates a remote BLAST factory using GenomeNet.
|
79
79
|
# Returns Bio::Blast object.
|
@@ -100,7 +100,7 @@ module Bio::Blast::Remote
|
|
100
100
|
key = nil
|
101
101
|
host = Bio::Blast::Remote::Genomenet::Host
|
102
102
|
http = Bio::Command.new_http(host)
|
103
|
-
result = http.get('/')
|
103
|
+
result = http.get('/tools/blast/')
|
104
104
|
#p result.body
|
105
105
|
result.body.each_line do |line|
|
106
106
|
case line
|
@@ -166,11 +166,20 @@ module Bio::Blast::Remote
|
|
166
166
|
program = opt.delete('-p')
|
167
167
|
db = opt.delete('-d')
|
168
168
|
|
169
|
+
# When database name starts with mine-aa or mine-nt,
|
170
|
+
# space-separated list of KEGG organism codes can be given.
|
171
|
+
# For example, "mine-aa eco bsu hsa".
|
172
|
+
if /\A(mine-(aa|nt))\s+/ =~ db.to_s then
|
173
|
+
db = $1
|
174
|
+
myspecies = {}
|
175
|
+
myspecies["myspecies-#{$2}"] = $'
|
176
|
+
end
|
177
|
+
|
169
178
|
matrix = opt.delete('-M') || 'blosum62'
|
170
179
|
filter = opt.delete('-F') || 'T'
|
171
180
|
|
172
|
-
|
173
|
-
|
181
|
+
opt_v = opt.delete('-v') || 500 # default value for GenomeNet
|
182
|
+
opt_b = opt.delete('-b') || 250 # default value for GenomeNet
|
174
183
|
|
175
184
|
# format, not for form parameters, but included in option string
|
176
185
|
opt_m = opt.get('-m') || '7' # default of BioRuby GenomeNet factory
|
@@ -186,11 +195,13 @@ module Bio::Blast::Remote
|
|
186
195
|
'other_param' => optstr,
|
187
196
|
'matrix' => matrix,
|
188
197
|
'filter' => filter,
|
189
|
-
'V_value' =>
|
190
|
-
'B_value' =>
|
198
|
+
'V_value' => opt_v,
|
199
|
+
'B_value' => opt_b,
|
191
200
|
'alignment_view' => 0,
|
192
201
|
}
|
193
202
|
|
203
|
+
form.merge!(myspecies) if myspecies
|
204
|
+
|
194
205
|
form.keys.each do |k|
|
195
206
|
form.delete(k) unless form[k]
|
196
207
|
end
|
@@ -227,10 +238,9 @@ module Bio::Blast::Remote
|
|
227
238
|
end
|
228
239
|
end
|
229
240
|
|
230
|
-
# workaround 2005.08.12
|
231
|
-
if /\<A +HREF=\"(http\:\/\/
|
232
|
-
|
233
|
-
@output = result.body
|
241
|
+
# workaround 2005.08.12 + 2011.01.27
|
242
|
+
if /\<A +HREF=\"(http\:\/\/[\-\.a-z0-9]+\.genome\.jp(\/tmp\/[^\"]+))\"\>Show all result\<\/A\>/i =~ @output.to_s then
|
243
|
+
@output = Bio::Command.read_uri($1)
|
234
244
|
txt = @output.to_s.split(/\<pre\>/)[1]
|
235
245
|
raise 'cannot understand response' unless txt
|
236
246
|
txt.sub!(/\<\/pre\>.*\z/m, '')
|
data/lib/bio/db/embl/sptr.rb
CHANGED
@@ -50,7 +50,18 @@ class SPTR < EMBLDB
|
|
50
50
|
# returns a content (Int or String) of the ID line by a given key.
|
51
51
|
# Hash keys: ['ENTRY_NAME', 'DATA_CLASS', 'MODECULE_TYPE', 'SEQUENCE_LENGTH']
|
52
52
|
#
|
53
|
-
# === ID Line
|
53
|
+
# === ID Line (since UniProtKB release 9.0 of 31-Oct-2006)
|
54
|
+
# ID P53_HUMAN Reviewed; 393 AA.
|
55
|
+
# #"ID #{ENTRY_NAME} #{DATA_CLASS}; #{SEQUENCE_LENGTH}."
|
56
|
+
#
|
57
|
+
# === Examples
|
58
|
+
# obj.id_line #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"Reviewed",
|
59
|
+
# "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>nil}
|
60
|
+
#
|
61
|
+
# obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"
|
62
|
+
#
|
63
|
+
#
|
64
|
+
# === ID Line (older style)
|
54
65
|
# ID P53_HUMAN STANDARD; PRT; 393 AA.
|
55
66
|
# #"ID #{ENTRY_NAME} #{DATA_CLASS}; #{MOLECULE_TYPE}; #{SEQUENCE_LENGTH}."
|
56
67
|
#
|
@@ -65,11 +76,20 @@ class SPTR < EMBLDB
|
|
65
76
|
return @data['ID'] if @data['ID']
|
66
77
|
|
67
78
|
part = @orig['ID'].split(/ +/)
|
79
|
+
if part[4].to_s.chomp == 'AA.' then
|
80
|
+
# after UniProtKB release 9.0 of 31-Oct-2006
|
81
|
+
# (http://www.uniprot.org/docs/sp_news.htm)
|
82
|
+
molecule_type = nil
|
83
|
+
sequence_length = part[3].to_i
|
84
|
+
else
|
85
|
+
molecule_type = part[3].sub(/;/,'')
|
86
|
+
sequence_length = part[4].to_i
|
87
|
+
end
|
68
88
|
@data['ID'] = {
|
69
89
|
'ENTRY_NAME' => part[1],
|
70
90
|
'DATA_CLASS' => part[2].sub(/;/,''),
|
71
|
-
'MOLECULE_TYPE' =>
|
72
|
-
'SEQUENCE_LENGTH' =>
|
91
|
+
'MOLECULE_TYPE' => molecule_type,
|
92
|
+
'SEQUENCE_LENGTH' => sequence_length
|
73
93
|
}
|
74
94
|
end
|
75
95
|
|
@@ -111,12 +131,27 @@ class SPTR < EMBLDB
|
|
111
131
|
# returns a Hash of information in the DT lines.
|
112
132
|
# hash keys:
|
113
133
|
# ['created', 'sequence', 'annotation']
|
134
|
+
#--
|
114
135
|
# also Symbols acceptable (ASAP):
|
115
136
|
# [:created, :sequence, :annotation]
|
137
|
+
#++
|
116
138
|
#
|
117
|
-
#
|
139
|
+
# Since UniProtKB release 7.0 of 07-Feb-2006, the DT line format is
|
140
|
+
# changed, and the word "annotation" is no longer used in DT lines.
|
141
|
+
# Despite the change, the word "annotation" is still used for keeping
|
142
|
+
# compatibility.
|
143
|
+
#
|
144
|
+
# returns a String of information in the DT lines by a given key.
|
118
145
|
#
|
119
146
|
# === DT Line; date (3/entry)
|
147
|
+
# DT DD-MMM-YYY (integrated into UniProtKB/XXXXX.)
|
148
|
+
# DT DD-MMM-YYY (sequence version NN)
|
149
|
+
# DT DD-MMM-YYY (entry version NN)
|
150
|
+
#
|
151
|
+
# The format have been changed in UniProtKB release 7.0 of 07-Feb-2006.
|
152
|
+
# Below is the older format.
|
153
|
+
#
|
154
|
+
# === Old format of DT Line; date (3/entry)
|
120
155
|
# DT DD-MMM-YYY (rel. NN, Created)
|
121
156
|
# DT DD-MMM-YYY (rel. NN, Last sequence update)
|
122
157
|
# DT DD-MMM-YYY (rel. NN, Last annotation update)
|
@@ -133,7 +168,79 @@ class SPTR < EMBLDB
|
|
133
168
|
end
|
134
169
|
|
135
170
|
|
171
|
+
# (private) parses DE line (description lines)
|
172
|
+
# since UniProtKB release 14.0 of 22-Jul-2008
|
173
|
+
#
|
174
|
+
# Return array containing array.
|
175
|
+
#
|
176
|
+
# http://www.uniprot.org/docs/sp_news.htm
|
177
|
+
def parse_DE_line_rel14(str)
|
178
|
+
# Retruns if it is not the new format since Rel.14
|
179
|
+
return nil unless /^DE (RecName|AltName|SubName)\: / =~ str
|
180
|
+
ret = []
|
181
|
+
cur = nil
|
182
|
+
str.each_line do |line|
|
183
|
+
case line
|
184
|
+
when /^DE (Includes|Contains)\: *$/
|
185
|
+
cur = [ $1 ]
|
186
|
+
ret.push cur
|
187
|
+
cur = nil
|
188
|
+
#subcat_and_desc = nil
|
189
|
+
next
|
190
|
+
when /^DE *(RecName|AltName|SubName)\: +(.*)/
|
191
|
+
category = $1
|
192
|
+
subcat_and_desc = $2
|
193
|
+
cur = [ category ]
|
194
|
+
ret.push cur
|
195
|
+
when /^DE *(Flags)\: +(.*)/
|
196
|
+
category = $1
|
197
|
+
desc = $2
|
198
|
+
flags = desc.strip.split(/\s*\;\s*/) || []
|
199
|
+
cur = [ category, flags ]
|
200
|
+
ret.push cur
|
201
|
+
cur = nil
|
202
|
+
#subcat_and_desc = nil
|
203
|
+
next
|
204
|
+
when /^DE *(.*)/
|
205
|
+
subcat_and_desc = $1
|
206
|
+
else
|
207
|
+
warn "Warning: skipped DE line in unknown format: #{line.inspect}"
|
208
|
+
#subcat_and_desc = nil
|
209
|
+
next
|
210
|
+
end
|
211
|
+
case subcat_and_desc
|
212
|
+
when nil
|
213
|
+
# does nothing
|
214
|
+
when /\A([^\=]+)\=(.*)/
|
215
|
+
subcat = $1
|
216
|
+
desc = $2
|
217
|
+
desc.sub!(/\;\s*\z/, '')
|
218
|
+
unless cur
|
219
|
+
warn "Warning: unknown category in DE line: #{line.inspect}"
|
220
|
+
cur = [ '' ]
|
221
|
+
ret.push cur
|
222
|
+
end
|
223
|
+
cur.push [ subcat, desc ]
|
224
|
+
else
|
225
|
+
warn "Warning: skipped DE line description in unknown format: #{line.inspect}"
|
226
|
+
end
|
227
|
+
end
|
228
|
+
ret
|
229
|
+
end
|
230
|
+
private :parse_DE_line_rel14
|
231
|
+
|
136
232
|
# returns the proposed official name of the protein.
|
233
|
+
# Returns a String.
|
234
|
+
#
|
235
|
+
# Since UniProtKB release 14.0 of 22-Jul-2008, the DE line format have
|
236
|
+
# been changed. The method returns the full name which is taken from
|
237
|
+
# "RecName: Full=" or "SubName: Full=" line normally in the beginning of
|
238
|
+
# the DE lines.
|
239
|
+
# Unlike parser for old format, no special treatments for fragment or
|
240
|
+
# precursor.
|
241
|
+
#
|
242
|
+
# For old format, the method parses the DE lines and returns the protein
|
243
|
+
# name as a String.
|
137
244
|
#
|
138
245
|
# === DE Line; description (>=1)
|
139
246
|
# "DE #{OFFICIAL_NAME} (#{SYNONYM})"
|
@@ -142,27 +249,83 @@ class SPTR < EMBLDB
|
|
142
249
|
# SYNONYM >=0
|
143
250
|
# CONTEINS >=0
|
144
251
|
def protein_name
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
name
|
252
|
+
@data['DE'] ||= parse_DE_line_rel14(get('DE'))
|
253
|
+
parsed_de_line = @data['DE']
|
254
|
+
if parsed_de_line then
|
255
|
+
# since UniProtKB release 14.0 of 22-Jul-2008
|
256
|
+
name = nil
|
257
|
+
parsed_de_line.each do |a|
|
258
|
+
case a[0]
|
259
|
+
when 'RecName', 'SubName'
|
260
|
+
if name_pair = a[1..-1].find { |b| b[0] == 'Full' } then
|
261
|
+
name = name_pair[1]
|
262
|
+
break
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
266
|
+
name = name.to_s
|
267
|
+
else
|
268
|
+
# old format (before Rel. 13.x)
|
269
|
+
name = ""
|
270
|
+
if de_line = fetch('DE') then
|
271
|
+
str = de_line[/^[^\[]*/] # everything preceding the first [ (the "contains" part)
|
272
|
+
name = str[/^[^(]*/].strip
|
273
|
+
name << ' (Fragment)' if str =~ /fragment/i
|
274
|
+
end
|
150
275
|
end
|
151
276
|
return name
|
152
277
|
end
|
153
278
|
|
154
279
|
|
155
|
-
# returns
|
280
|
+
# returns synonyms (unofficial and/or alternative names).
|
281
|
+
# Returns an Array containing String objects.
|
282
|
+
#
|
283
|
+
# Since UniProtKB release 14.0 of 22-Jul-2008, the DE line format have
|
284
|
+
# been changed. The method returns the full or short names which are
|
285
|
+
# taken from "RecName: Short=", "RecName: EC=", and AltName lines,
|
286
|
+
# except after "Contains:" or "Includes:".
|
287
|
+
# For keeping compatibility with old format parser, "RecName: EC=N.N.N.N"
|
288
|
+
# is reported as "EC N.N.N.N".
|
289
|
+
# In addition, to prevent confusion, "Allergen=" and "CD_antigen="
|
290
|
+
# prefixes are added for the corresponding fields.
|
156
291
|
#
|
292
|
+
# For old format, the method parses the DE lines and returns synonyms.
|
157
293
|
# synonyms are each placed in () following the official name on the DE line.
|
158
294
|
def synonyms
|
159
295
|
ary = Array.new
|
160
|
-
|
161
|
-
|
296
|
+
@data['DE'] ||= parse_DE_line_rel14(get('DE'))
|
297
|
+
parsed_de_line = @data['DE']
|
298
|
+
if parsed_de_line then
|
299
|
+
# since UniProtKB release 14.0 of 22-Jul-2008
|
300
|
+
parsed_de_line.each do |a|
|
301
|
+
case a[0]
|
302
|
+
when 'Includes', 'Contains'
|
303
|
+
break #the each loop
|
304
|
+
when 'RecName', 'SubName', 'AltName'
|
305
|
+
a[1..-1].each do |b|
|
306
|
+
if name = b[1] and b[1] != self.protein_name then
|
307
|
+
case b[0]
|
308
|
+
when 'EC'
|
309
|
+
name = "EC " + b[1]
|
310
|
+
when 'Allergen', 'CD_antigen'
|
311
|
+
name = b[0] + '=' + b[1]
|
312
|
+
else
|
313
|
+
name = b[1]
|
314
|
+
end
|
315
|
+
ary.push name
|
316
|
+
end
|
317
|
+
end
|
318
|
+
end #case a[0]
|
319
|
+
end #parsed_de_line.each
|
320
|
+
else
|
321
|
+
# old format (before Rel. 13.x)
|
322
|
+
if de_line = fetch('DE') then
|
323
|
+
line = de_line.sub(/\[.*\]/,'') # ignore stuff between [ and ]. That's the "contains" part
|
162
324
|
line.scan(/\([^)]+/) do |synonym|
|
163
325
|
unless synonym =~ /fragment/i then
|
164
326
|
ary << synonym[1..-1].strip # index to remove the leading (
|
165
327
|
end
|
328
|
+
end
|
166
329
|
end
|
167
330
|
end
|
168
331
|
return ary
|
@@ -919,25 +1082,34 @@ class SPTR < EMBLDB
|
|
919
1082
|
end
|
920
1083
|
private :cc_subcellular_location
|
921
1084
|
|
922
|
-
|
923
|
-
|
1085
|
+
|
1086
|
+
#--
|
1087
|
+
# Since UniProtKB release 12.2 of 11-Sep-2007:
|
1088
|
+
# CC -!- WEB RESOURCE: Name=ResourceName[; Note=FreeText][; URL=WWWAddress]. # Old format:
|
1089
|
+
# CC -!- WEB RESOURCE: NAME=ResourceName[; NOTE=FreeText][; URL=WWWAddress].
|
1090
|
+
#++
|
1091
|
+
|
924
1092
|
def cc_web_resource(data)
|
925
1093
|
data.map {|x|
|
926
|
-
entry = {'
|
1094
|
+
entry = {'Name' => nil, 'Note' => nil, 'URL' => nil}
|
927
1095
|
x.split(';').each do |y|
|
928
1096
|
case y
|
929
|
-
when /
|
930
|
-
|
931
|
-
|
932
|
-
entry[
|
933
|
-
when /
|
1097
|
+
when /(Name|Note)\=(.+)/
|
1098
|
+
key = $1
|
1099
|
+
val = $2.strip
|
1100
|
+
entry[key] = val
|
1101
|
+
when /(NAME|NOTE)\=(.+)/
|
1102
|
+
key = $1.downcase.capitalize
|
1103
|
+
val = $2.strip
|
1104
|
+
entry[key] = val
|
1105
|
+
when /URL\=\"(.+)\"/
|
934
1106
|
entry['URL'] = $1.strip
|
935
1107
|
end
|
936
1108
|
end
|
937
1109
|
entry
|
938
1110
|
}
|
939
1111
|
end
|
940
|
-
|
1112
|
+
private :cc_web_resource
|
941
1113
|
|
942
1114
|
# returns databases cross-references in the DR lines.
|
943
1115
|
# * Bio::SPTR#dr -> Hash w/in Array
|