bio 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +954 -0
- data/KNOWN_ISSUES.rdoc +40 -5
- data/README.rdoc +36 -35
- data/RELEASE_NOTES.rdoc +87 -59
- data/bioruby.gemspec +24 -2
- data/doc/RELEASE_NOTES-1.4.1.rdoc +104 -0
- data/doc/Tutorial.rd +162 -200
- data/doc/Tutorial.rd.html +149 -146
- data/lib/bio.rb +1 -0
- data/lib/bio/appl/blast.rb +1 -1
- data/lib/bio/appl/blast/ddbj.rb +26 -34
- data/lib/bio/appl/blast/genomenet.rb +21 -11
- data/lib/bio/db/embl/sptr.rb +193 -21
- data/lib/bio/db/fasta.rb +1 -1
- data/lib/bio/db/fastq.rb +14 -0
- data/lib/bio/db/fastq/format_fastq.rb +2 -2
- data/lib/bio/db/genbank/ddbj.rb +1 -2
- data/lib/bio/db/genbank/format_genbank.rb +1 -1
- data/lib/bio/db/medline.rb +1 -0
- data/lib/bio/db/newick.rb +3 -1
- data/lib/bio/db/pdb/pdb.rb +9 -9
- data/lib/bio/db/pdb/residue.rb +2 -2
- data/lib/bio/io/ddbjrest.rb +344 -0
- data/lib/bio/io/ncbirest.rb +121 -1
- data/lib/bio/location.rb +2 -2
- data/lib/bio/reference.rb +3 -4
- data/lib/bio/shell/plugin/entry.rb +7 -3
- data/lib/bio/shell/plugin/ncbirest.rb +5 -1
- data/lib/bio/util/restriction_enzyme.rb +3 -0
- data/lib/bio/util/restriction_enzyme/dense_int_array.rb +195 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +7 -7
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +57 -18
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +2 -2
- data/lib/bio/util/restriction_enzyme/sorted_num_array.rb +219 -0
- data/lib/bio/version.rb +1 -1
- data/sample/test_restriction_enzyme_long.rb +4403 -0
- data/test/data/fasta/EFTU_BACSU.fasta +8 -0
- data/test/data/genbank/CAA35997.gp +48 -0
- data/test/data/genbank/SCU49845.gb +167 -0
- data/test/data/litdb/1717226.litdb +13 -0
- data/test/data/pir/CRAB_ANAPL.pir +6 -0
- data/test/functional/bio/appl/blast/test_remote.rb +93 -0
- data/test/functional/bio/appl/test_blast.rb +61 -0
- data/test/functional/bio/io/test_ddbjrest.rb +47 -0
- data/test/functional/bio/test_command.rb +3 -3
- data/test/unit/bio/db/embl/test_sptr.rb +6 -6
- data/test/unit/bio/db/embl/test_uniprot_new_part.rb +208 -0
- data/test/unit/bio/db/genbank/test_common.rb +274 -0
- data/test/unit/bio/db/genbank/test_genbank.rb +401 -0
- data/test/unit/bio/db/genbank/test_genpept.rb +81 -0
- data/test/unit/bio/db/pdb/test_pdb.rb +3287 -11
- data/test/unit/bio/db/test_fasta.rb +34 -12
- data/test/unit/bio/db/test_fastq.rb +26 -0
- data/test/unit/bio/db/test_litdb.rb +95 -0
- data/test/unit/bio/db/test_medline.rb +1 -0
- data/test/unit/bio/db/test_nbrf.rb +82 -0
- data/test/unit/bio/db/test_newick.rb +22 -4
- data/test/unit/bio/test_reference.rb +35 -0
- data/test/unit/bio/util/restriction_enzyme/test_dense_int_array.rb +201 -0
- data/test/unit/bio/util/restriction_enzyme/test_sorted_num_array.rb +281 -0
- metadata +44 -38
data/lib/bio.rb
CHANGED
data/lib/bio/appl/blast.rb
CHANGED
@@ -30,7 +30,7 @@ module Bio
|
|
30
30
|
#
|
31
31
|
# # To run an actual BLAST analysis:
|
32
32
|
# # 1. create a BLAST factory
|
33
|
-
# remote_blast_factory = Bio::Blast.remote('blastp', '
|
33
|
+
# remote_blast_factory = Bio::Blast.remote('blastp', 'swissprot',
|
34
34
|
# '-e 0.0001', 'genomenet')
|
35
35
|
# #or:
|
36
36
|
# local_blast_factory = Bio::Blast.local('blastn','/path/to/db')
|
data/lib/bio/appl/blast/ddbj.rb
CHANGED
@@ -8,7 +8,7 @@
|
|
8
8
|
#
|
9
9
|
|
10
10
|
require 'bio/appl/blast/remote'
|
11
|
-
require 'bio/io/
|
11
|
+
require 'bio/io/ddbjrest'
|
12
12
|
|
13
13
|
module Bio::Blast::Remote
|
14
14
|
|
@@ -37,32 +37,43 @@ module Bio::Blast::Remote
|
|
37
37
|
if defined? @parse_databases
|
38
38
|
return nil if @parse_databases
|
39
39
|
end
|
40
|
-
drv = Bio::DDBJ::
|
40
|
+
drv = Bio::DDBJ::REST::Blast.new
|
41
41
|
str = drv.getSupportDatabaseList
|
42
42
|
|
43
43
|
databases = {}
|
44
44
|
dbdescs = {}
|
45
|
-
|
45
|
+
keys = [ 'blastn', 'blastp' ]
|
46
|
+
keys.each do |key|
|
47
|
+
databases[key] ||= []
|
48
|
+
dbdescs[key] ||= {}
|
49
|
+
end
|
46
50
|
prefix = ''
|
47
|
-
|
48
|
-
dbdescs[key] ||= {}
|
51
|
+
prefix_count = 0
|
49
52
|
str.each_line do |line|
|
50
53
|
a = line.strip.split(/\s*\-\s*/, 2)
|
51
54
|
case a.size
|
52
55
|
when 1
|
53
56
|
prefix = a[0].to_s.strip
|
54
57
|
prefix += ': ' unless prefix.empty?
|
55
|
-
|
58
|
+
prefix_count = 0
|
56
59
|
next #each_line
|
57
60
|
when 0
|
58
|
-
prefix = ''
|
59
|
-
key = 'blastp'
|
60
|
-
databases[key] ||= []
|
61
|
-
dbdescs[key] ||= {}
|
61
|
+
prefix = '' if prefix_count > 0
|
62
62
|
next #each_line
|
63
63
|
end
|
64
64
|
name = a[0].to_s.strip.freeze
|
65
|
-
desc =
|
65
|
+
desc = a[1].to_s.strip
|
66
|
+
key = case desc
|
67
|
+
when /\(NT\)\s*$/
|
68
|
+
'blastn'
|
69
|
+
when /\(AA\)\s*$/
|
70
|
+
'blastp'
|
71
|
+
else
|
72
|
+
warn "DDBJ BLAST: could not determine the database is NT or AA: #{line.chomp}" if $VERBOSE
|
73
|
+
next #each_line
|
74
|
+
end
|
75
|
+
desc = (prefix + desc).freeze
|
76
|
+
prefix_count += 1
|
66
77
|
databases[key].push name
|
67
78
|
dbdescs[key][name] = desc
|
68
79
|
end
|
@@ -96,11 +107,9 @@ module Bio::Blast::Remote
|
|
96
107
|
options = make_command_line_options
|
97
108
|
opt = Bio::Blast::NCBIOptions.new(options)
|
98
109
|
|
99
|
-
#
|
100
|
-
@ddbj_remote_blast ||= Bio::DDBJ::
|
101
|
-
|
102
|
-
# always use REST version to prevent warning messages
|
103
|
-
@ddbj_request_manager ||= Bio::DDBJ::XML::RequestManager::REST.new
|
110
|
+
# REST objects are cached
|
111
|
+
@ddbj_remote_blast ||= Bio::DDBJ::REST::Blast.new
|
112
|
+
@ddbj_request_manager ||= Bio::DDBJ::REST::RequestManager.new
|
104
113
|
|
105
114
|
program = opt.delete('-p')
|
106
115
|
db = opt.delete('-d')
|
@@ -110,24 +119,7 @@ module Bio::Blast::Remote
|
|
110
119
|
qid = @ddbj_remote_blast.searchParamAsync(program, db, query, optstr)
|
111
120
|
@output = qid
|
112
121
|
|
113
|
-
|
114
|
-
flag = true
|
115
|
-
while flag
|
116
|
-
if $VERBOSE then
|
117
|
-
$stderr.puts "DDBJ BLAST: ID: #{qid} -- waitng #{sleeptime} sec."
|
118
|
-
end
|
119
|
-
sleep(sleeptime)
|
120
|
-
|
121
|
-
result = @ddbj_request_manager.getAsyncResult(qid)
|
122
|
-
case result.to_s
|
123
|
-
when /The search and analysis service by WWW is very busy now/
|
124
|
-
raise result.to_s.strip + '(Alternatively, wrong options may be given.)'
|
125
|
-
when /Your job has not completed yet/
|
126
|
-
sleeptime = 5
|
127
|
-
else
|
128
|
-
flag = false
|
129
|
-
end
|
130
|
-
end while flag
|
122
|
+
result = @ddbj_request_manager.wait_getAsyncResult(qid)
|
131
123
|
|
132
124
|
@output = result
|
133
125
|
return @output
|
@@ -69,11 +69,11 @@ module Bio::Blast::Remote
|
|
69
69
|
#
|
70
70
|
# * http://www.ncbi.nlm.nih.gov/blast/
|
71
71
|
# * http://www.ncbi.nlm.nih.gov/Education/BLASTinfo/similarity.html
|
72
|
-
# * http://
|
72
|
+
# * http://www.genome.jp/tools/blast/
|
73
73
|
#
|
74
74
|
module GenomeNet
|
75
75
|
|
76
|
-
Host = "
|
76
|
+
Host = "www.genome.jp".freeze
|
77
77
|
|
78
78
|
# Creates a remote BLAST factory using GenomeNet.
|
79
79
|
# Returns Bio::Blast object.
|
@@ -100,7 +100,7 @@ module Bio::Blast::Remote
|
|
100
100
|
key = nil
|
101
101
|
host = Bio::Blast::Remote::Genomenet::Host
|
102
102
|
http = Bio::Command.new_http(host)
|
103
|
-
result = http.get('/')
|
103
|
+
result = http.get('/tools/blast/')
|
104
104
|
#p result.body
|
105
105
|
result.body.each_line do |line|
|
106
106
|
case line
|
@@ -166,11 +166,20 @@ module Bio::Blast::Remote
|
|
166
166
|
program = opt.delete('-p')
|
167
167
|
db = opt.delete('-d')
|
168
168
|
|
169
|
+
# When database name starts with mine-aa or mine-nt,
|
170
|
+
# space-separated list of KEGG organism codes can be given.
|
171
|
+
# For example, "mine-aa eco bsu hsa".
|
172
|
+
if /\A(mine-(aa|nt))\s+/ =~ db.to_s then
|
173
|
+
db = $1
|
174
|
+
myspecies = {}
|
175
|
+
myspecies["myspecies-#{$2}"] = $'
|
176
|
+
end
|
177
|
+
|
169
178
|
matrix = opt.delete('-M') || 'blosum62'
|
170
179
|
filter = opt.delete('-F') || 'T'
|
171
180
|
|
172
|
-
|
173
|
-
|
181
|
+
opt_v = opt.delete('-v') || 500 # default value for GenomeNet
|
182
|
+
opt_b = opt.delete('-b') || 250 # default value for GenomeNet
|
174
183
|
|
175
184
|
# format, not for form parameters, but included in option string
|
176
185
|
opt_m = opt.get('-m') || '7' # default of BioRuby GenomeNet factory
|
@@ -186,11 +195,13 @@ module Bio::Blast::Remote
|
|
186
195
|
'other_param' => optstr,
|
187
196
|
'matrix' => matrix,
|
188
197
|
'filter' => filter,
|
189
|
-
'V_value' =>
|
190
|
-
'B_value' =>
|
198
|
+
'V_value' => opt_v,
|
199
|
+
'B_value' => opt_b,
|
191
200
|
'alignment_view' => 0,
|
192
201
|
}
|
193
202
|
|
203
|
+
form.merge!(myspecies) if myspecies
|
204
|
+
|
194
205
|
form.keys.each do |k|
|
195
206
|
form.delete(k) unless form[k]
|
196
207
|
end
|
@@ -227,10 +238,9 @@ module Bio::Blast::Remote
|
|
227
238
|
end
|
228
239
|
end
|
229
240
|
|
230
|
-
# workaround 2005.08.12
|
231
|
-
if /\<A +HREF=\"(http\:\/\/
|
232
|
-
|
233
|
-
@output = result.body
|
241
|
+
# workaround 2005.08.12 + 2011.01.27
|
242
|
+
if /\<A +HREF=\"(http\:\/\/[\-\.a-z0-9]+\.genome\.jp(\/tmp\/[^\"]+))\"\>Show all result\<\/A\>/i =~ @output.to_s then
|
243
|
+
@output = Bio::Command.read_uri($1)
|
234
244
|
txt = @output.to_s.split(/\<pre\>/)[1]
|
235
245
|
raise 'cannot understand response' unless txt
|
236
246
|
txt.sub!(/\<\/pre\>.*\z/m, '')
|
data/lib/bio/db/embl/sptr.rb
CHANGED
@@ -50,7 +50,18 @@ class SPTR < EMBLDB
|
|
50
50
|
# returns a content (Int or String) of the ID line by a given key.
|
51
51
|
# Hash keys: ['ENTRY_NAME', 'DATA_CLASS', 'MODECULE_TYPE', 'SEQUENCE_LENGTH']
|
52
52
|
#
|
53
|
-
# === ID Line
|
53
|
+
# === ID Line (since UniProtKB release 9.0 of 31-Oct-2006)
|
54
|
+
# ID P53_HUMAN Reviewed; 393 AA.
|
55
|
+
# #"ID #{ENTRY_NAME} #{DATA_CLASS}; #{SEQUENCE_LENGTH}."
|
56
|
+
#
|
57
|
+
# === Examples
|
58
|
+
# obj.id_line #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"Reviewed",
|
59
|
+
# "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>nil}
|
60
|
+
#
|
61
|
+
# obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"
|
62
|
+
#
|
63
|
+
#
|
64
|
+
# === ID Line (older style)
|
54
65
|
# ID P53_HUMAN STANDARD; PRT; 393 AA.
|
55
66
|
# #"ID #{ENTRY_NAME} #{DATA_CLASS}; #{MOLECULE_TYPE}; #{SEQUENCE_LENGTH}."
|
56
67
|
#
|
@@ -65,11 +76,20 @@ class SPTR < EMBLDB
|
|
65
76
|
return @data['ID'] if @data['ID']
|
66
77
|
|
67
78
|
part = @orig['ID'].split(/ +/)
|
79
|
+
if part[4].to_s.chomp == 'AA.' then
|
80
|
+
# after UniProtKB release 9.0 of 31-Oct-2006
|
81
|
+
# (http://www.uniprot.org/docs/sp_news.htm)
|
82
|
+
molecule_type = nil
|
83
|
+
sequence_length = part[3].to_i
|
84
|
+
else
|
85
|
+
molecule_type = part[3].sub(/;/,'')
|
86
|
+
sequence_length = part[4].to_i
|
87
|
+
end
|
68
88
|
@data['ID'] = {
|
69
89
|
'ENTRY_NAME' => part[1],
|
70
90
|
'DATA_CLASS' => part[2].sub(/;/,''),
|
71
|
-
'MOLECULE_TYPE' =>
|
72
|
-
'SEQUENCE_LENGTH' =>
|
91
|
+
'MOLECULE_TYPE' => molecule_type,
|
92
|
+
'SEQUENCE_LENGTH' => sequence_length
|
73
93
|
}
|
74
94
|
end
|
75
95
|
|
@@ -111,12 +131,27 @@ class SPTR < EMBLDB
|
|
111
131
|
# returns a Hash of information in the DT lines.
|
112
132
|
# hash keys:
|
113
133
|
# ['created', 'sequence', 'annotation']
|
134
|
+
#--
|
114
135
|
# also Symbols acceptable (ASAP):
|
115
136
|
# [:created, :sequence, :annotation]
|
137
|
+
#++
|
116
138
|
#
|
117
|
-
#
|
139
|
+
# Since UniProtKB release 7.0 of 07-Feb-2006, the DT line format is
|
140
|
+
# changed, and the word "annotation" is no longer used in DT lines.
|
141
|
+
# Despite the change, the word "annotation" is still used for keeping
|
142
|
+
# compatibility.
|
143
|
+
#
|
144
|
+
# returns a String of information in the DT lines by a given key.
|
118
145
|
#
|
119
146
|
# === DT Line; date (3/entry)
|
147
|
+
# DT DD-MMM-YYY (integrated into UniProtKB/XXXXX.)
|
148
|
+
# DT DD-MMM-YYY (sequence version NN)
|
149
|
+
# DT DD-MMM-YYY (entry version NN)
|
150
|
+
#
|
151
|
+
# The format have been changed in UniProtKB release 7.0 of 07-Feb-2006.
|
152
|
+
# Below is the older format.
|
153
|
+
#
|
154
|
+
# === Old format of DT Line; date (3/entry)
|
120
155
|
# DT DD-MMM-YYY (rel. NN, Created)
|
121
156
|
# DT DD-MMM-YYY (rel. NN, Last sequence update)
|
122
157
|
# DT DD-MMM-YYY (rel. NN, Last annotation update)
|
@@ -133,7 +168,79 @@ class SPTR < EMBLDB
|
|
133
168
|
end
|
134
169
|
|
135
170
|
|
171
|
+
# (private) parses DE line (description lines)
|
172
|
+
# since UniProtKB release 14.0 of 22-Jul-2008
|
173
|
+
#
|
174
|
+
# Return array containing array.
|
175
|
+
#
|
176
|
+
# http://www.uniprot.org/docs/sp_news.htm
|
177
|
+
def parse_DE_line_rel14(str)
|
178
|
+
# Retruns if it is not the new format since Rel.14
|
179
|
+
return nil unless /^DE (RecName|AltName|SubName)\: / =~ str
|
180
|
+
ret = []
|
181
|
+
cur = nil
|
182
|
+
str.each_line do |line|
|
183
|
+
case line
|
184
|
+
when /^DE (Includes|Contains)\: *$/
|
185
|
+
cur = [ $1 ]
|
186
|
+
ret.push cur
|
187
|
+
cur = nil
|
188
|
+
#subcat_and_desc = nil
|
189
|
+
next
|
190
|
+
when /^DE *(RecName|AltName|SubName)\: +(.*)/
|
191
|
+
category = $1
|
192
|
+
subcat_and_desc = $2
|
193
|
+
cur = [ category ]
|
194
|
+
ret.push cur
|
195
|
+
when /^DE *(Flags)\: +(.*)/
|
196
|
+
category = $1
|
197
|
+
desc = $2
|
198
|
+
flags = desc.strip.split(/\s*\;\s*/) || []
|
199
|
+
cur = [ category, flags ]
|
200
|
+
ret.push cur
|
201
|
+
cur = nil
|
202
|
+
#subcat_and_desc = nil
|
203
|
+
next
|
204
|
+
when /^DE *(.*)/
|
205
|
+
subcat_and_desc = $1
|
206
|
+
else
|
207
|
+
warn "Warning: skipped DE line in unknown format: #{line.inspect}"
|
208
|
+
#subcat_and_desc = nil
|
209
|
+
next
|
210
|
+
end
|
211
|
+
case subcat_and_desc
|
212
|
+
when nil
|
213
|
+
# does nothing
|
214
|
+
when /\A([^\=]+)\=(.*)/
|
215
|
+
subcat = $1
|
216
|
+
desc = $2
|
217
|
+
desc.sub!(/\;\s*\z/, '')
|
218
|
+
unless cur
|
219
|
+
warn "Warning: unknown category in DE line: #{line.inspect}"
|
220
|
+
cur = [ '' ]
|
221
|
+
ret.push cur
|
222
|
+
end
|
223
|
+
cur.push [ subcat, desc ]
|
224
|
+
else
|
225
|
+
warn "Warning: skipped DE line description in unknown format: #{line.inspect}"
|
226
|
+
end
|
227
|
+
end
|
228
|
+
ret
|
229
|
+
end
|
230
|
+
private :parse_DE_line_rel14
|
231
|
+
|
136
232
|
# returns the proposed official name of the protein.
|
233
|
+
# Returns a String.
|
234
|
+
#
|
235
|
+
# Since UniProtKB release 14.0 of 22-Jul-2008, the DE line format have
|
236
|
+
# been changed. The method returns the full name which is taken from
|
237
|
+
# "RecName: Full=" or "SubName: Full=" line normally in the beginning of
|
238
|
+
# the DE lines.
|
239
|
+
# Unlike parser for old format, no special treatments for fragment or
|
240
|
+
# precursor.
|
241
|
+
#
|
242
|
+
# For old format, the method parses the DE lines and returns the protein
|
243
|
+
# name as a String.
|
137
244
|
#
|
138
245
|
# === DE Line; description (>=1)
|
139
246
|
# "DE #{OFFICIAL_NAME} (#{SYNONYM})"
|
@@ -142,27 +249,83 @@ class SPTR < EMBLDB
|
|
142
249
|
# SYNONYM >=0
|
143
250
|
# CONTEINS >=0
|
144
251
|
def protein_name
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
name
|
252
|
+
@data['DE'] ||= parse_DE_line_rel14(get('DE'))
|
253
|
+
parsed_de_line = @data['DE']
|
254
|
+
if parsed_de_line then
|
255
|
+
# since UniProtKB release 14.0 of 22-Jul-2008
|
256
|
+
name = nil
|
257
|
+
parsed_de_line.each do |a|
|
258
|
+
case a[0]
|
259
|
+
when 'RecName', 'SubName'
|
260
|
+
if name_pair = a[1..-1].find { |b| b[0] == 'Full' } then
|
261
|
+
name = name_pair[1]
|
262
|
+
break
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
266
|
+
name = name.to_s
|
267
|
+
else
|
268
|
+
# old format (before Rel. 13.x)
|
269
|
+
name = ""
|
270
|
+
if de_line = fetch('DE') then
|
271
|
+
str = de_line[/^[^\[]*/] # everything preceding the first [ (the "contains" part)
|
272
|
+
name = str[/^[^(]*/].strip
|
273
|
+
name << ' (Fragment)' if str =~ /fragment/i
|
274
|
+
end
|
150
275
|
end
|
151
276
|
return name
|
152
277
|
end
|
153
278
|
|
154
279
|
|
155
|
-
# returns
|
280
|
+
# returns synonyms (unofficial and/or alternative names).
|
281
|
+
# Returns an Array containing String objects.
|
282
|
+
#
|
283
|
+
# Since UniProtKB release 14.0 of 22-Jul-2008, the DE line format have
|
284
|
+
# been changed. The method returns the full or short names which are
|
285
|
+
# taken from "RecName: Short=", "RecName: EC=", and AltName lines,
|
286
|
+
# except after "Contains:" or "Includes:".
|
287
|
+
# For keeping compatibility with old format parser, "RecName: EC=N.N.N.N"
|
288
|
+
# is reported as "EC N.N.N.N".
|
289
|
+
# In addition, to prevent confusion, "Allergen=" and "CD_antigen="
|
290
|
+
# prefixes are added for the corresponding fields.
|
156
291
|
#
|
292
|
+
# For old format, the method parses the DE lines and returns synonyms.
|
157
293
|
# synonyms are each placed in () following the official name on the DE line.
|
158
294
|
def synonyms
|
159
295
|
ary = Array.new
|
160
|
-
|
161
|
-
|
296
|
+
@data['DE'] ||= parse_DE_line_rel14(get('DE'))
|
297
|
+
parsed_de_line = @data['DE']
|
298
|
+
if parsed_de_line then
|
299
|
+
# since UniProtKB release 14.0 of 22-Jul-2008
|
300
|
+
parsed_de_line.each do |a|
|
301
|
+
case a[0]
|
302
|
+
when 'Includes', 'Contains'
|
303
|
+
break #the each loop
|
304
|
+
when 'RecName', 'SubName', 'AltName'
|
305
|
+
a[1..-1].each do |b|
|
306
|
+
if name = b[1] and b[1] != self.protein_name then
|
307
|
+
case b[0]
|
308
|
+
when 'EC'
|
309
|
+
name = "EC " + b[1]
|
310
|
+
when 'Allergen', 'CD_antigen'
|
311
|
+
name = b[0] + '=' + b[1]
|
312
|
+
else
|
313
|
+
name = b[1]
|
314
|
+
end
|
315
|
+
ary.push name
|
316
|
+
end
|
317
|
+
end
|
318
|
+
end #case a[0]
|
319
|
+
end #parsed_de_line.each
|
320
|
+
else
|
321
|
+
# old format (before Rel. 13.x)
|
322
|
+
if de_line = fetch('DE') then
|
323
|
+
line = de_line.sub(/\[.*\]/,'') # ignore stuff between [ and ]. That's the "contains" part
|
162
324
|
line.scan(/\([^)]+/) do |synonym|
|
163
325
|
unless synonym =~ /fragment/i then
|
164
326
|
ary << synonym[1..-1].strip # index to remove the leading (
|
165
327
|
end
|
328
|
+
end
|
166
329
|
end
|
167
330
|
end
|
168
331
|
return ary
|
@@ -919,25 +1082,34 @@ class SPTR < EMBLDB
|
|
919
1082
|
end
|
920
1083
|
private :cc_subcellular_location
|
921
1084
|
|
922
|
-
|
923
|
-
|
1085
|
+
|
1086
|
+
#--
|
1087
|
+
# Since UniProtKB release 12.2 of 11-Sep-2007:
|
1088
|
+
# CC -!- WEB RESOURCE: Name=ResourceName[; Note=FreeText][; URL=WWWAddress]. # Old format:
|
1089
|
+
# CC -!- WEB RESOURCE: NAME=ResourceName[; NOTE=FreeText][; URL=WWWAddress].
|
1090
|
+
#++
|
1091
|
+
|
924
1092
|
def cc_web_resource(data)
|
925
1093
|
data.map {|x|
|
926
|
-
entry = {'
|
1094
|
+
entry = {'Name' => nil, 'Note' => nil, 'URL' => nil}
|
927
1095
|
x.split(';').each do |y|
|
928
1096
|
case y
|
929
|
-
when /
|
930
|
-
|
931
|
-
|
932
|
-
entry[
|
933
|
-
when /
|
1097
|
+
when /(Name|Note)\=(.+)/
|
1098
|
+
key = $1
|
1099
|
+
val = $2.strip
|
1100
|
+
entry[key] = val
|
1101
|
+
when /(NAME|NOTE)\=(.+)/
|
1102
|
+
key = $1.downcase.capitalize
|
1103
|
+
val = $2.strip
|
1104
|
+
entry[key] = val
|
1105
|
+
when /URL\=\"(.+)\"/
|
934
1106
|
entry['URL'] = $1.strip
|
935
1107
|
end
|
936
1108
|
end
|
937
1109
|
entry
|
938
1110
|
}
|
939
1111
|
end
|
940
|
-
|
1112
|
+
private :cc_web_resource
|
941
1113
|
|
942
1114
|
# returns databases cross-references in the DR lines.
|
943
1115
|
# * Bio::SPTR#dr -> Hash w/in Array
|