bio 2.0.1 → 2.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ChangeLog +313 -0
- data/KNOWN_ISSUES.rdoc +1 -1
- data/LEGAL +9 -0
- data/README.rdoc +3 -3
- data/RELEASE_NOTES.rdoc +59 -0
- data/bioruby.gemspec +4 -2
- data/bioruby.gemspec.erb +0 -1
- data/lib/bio/appl/iprscan/report.rb +3 -3
- data/lib/bio/appl/sosui/report.rb +1 -1
- data/lib/bio/db/embl/uniprotkb.rb +137 -12
- data/lib/bio/db/go.rb +2 -2
- data/lib/bio/sequence/common.rb +112 -0
- data/lib/bio/sequence/format.rb +1 -0
- data/lib/bio/tree.rb +1 -1
- data/lib/bio/version.rb +1 -1
- data/test/data/uniprot/P28907.uniprot +551 -0
- data/test/unit/bio/db/embl/test_uniprotkb_P28907.rb +325 -0
- data/test/unit/bio/sequence/test_ruby3.rb +462 -0
- metadata +6 -4
@@ -174,7 +174,7 @@ class UniProtKB < EMBLDB
|
|
174
174
|
#
|
175
175
|
# http://www.uniprot.org/docs/sp_news.htm
|
176
176
|
def parse_DE_line_rel14(str)
|
177
|
-
#
|
177
|
+
# Returns if it is not the new format since Rel.14
|
178
178
|
return nil unless /^DE (RecName|AltName|SubName)\: / =~ str
|
179
179
|
ret = []
|
180
180
|
cur = nil
|
@@ -248,9 +248,8 @@ class UniProtKB < EMBLDB
|
|
248
248
|
# SYNONYM >=0
|
249
249
|
# CONTEINS >=0
|
250
250
|
def protein_name
|
251
|
-
|
252
|
-
parsed_de_line
|
253
|
-
if parsed_de_line then
|
251
|
+
parsed_de_line = self.de
|
252
|
+
if parsed_de_line.kind_of?(Array) then
|
254
253
|
# since UniProtKB release 14.0 of 22-Jul-2008
|
255
254
|
name = nil
|
256
255
|
parsed_de_line.each do |a|
|
@@ -275,7 +274,6 @@ class UniProtKB < EMBLDB
|
|
275
274
|
return name
|
276
275
|
end
|
277
276
|
|
278
|
-
|
279
277
|
# returns synonyms (unofficial and/or alternative names).
|
280
278
|
# Returns an Array containing String objects.
|
281
279
|
#
|
@@ -292,9 +290,8 @@ class UniProtKB < EMBLDB
|
|
292
290
|
# synonyms are each placed in () following the official name on the DE line.
|
293
291
|
def synonyms
|
294
292
|
ary = Array.new
|
295
|
-
|
296
|
-
parsed_de_line
|
297
|
-
if parsed_de_line then
|
293
|
+
parsed_de_line = self.de
|
294
|
+
if parsed_de_line.kind_of?(Array) then
|
298
295
|
# since UniProtKB release 14.0 of 22-Jul-2008
|
299
296
|
parsed_de_line.each do |a|
|
300
297
|
case a[0]
|
@@ -330,6 +327,20 @@ class UniProtKB < EMBLDB
|
|
330
327
|
return ary
|
331
328
|
end
|
332
329
|
|
330
|
+
# Returns an Array (for new format since rel 14)
|
331
|
+
# or a String (for old format before rel 14) for the DE line.
|
332
|
+
#
|
333
|
+
def de
|
334
|
+
return @data['DE'] if @data['DE']
|
335
|
+
parsed_de_line = parse_DE_line_rel14(get('DE'))
|
336
|
+
case parsed_de_line
|
337
|
+
when Array # new format since rel14
|
338
|
+
@data['DE'] ||= parsed_de_line
|
339
|
+
else
|
340
|
+
super
|
341
|
+
end
|
342
|
+
@data['DE']
|
343
|
+
end
|
333
344
|
|
334
345
|
# returns gene names in the GN line.
|
335
346
|
#
|
@@ -1197,9 +1208,124 @@ class UniProtKB < EMBLDB
|
|
1197
1208
|
return ft[feature_key] if feature_key
|
1198
1209
|
return @data['FT'] if @data['FT']
|
1199
1210
|
|
1211
|
+
ftstr = get('FT')
|
1212
|
+
ftlines = ftstr.split("\n")
|
1213
|
+
for i in 0..10 do
|
1214
|
+
if /^FT +([^\s]+) +(([^\s]+)\:)?([\<\?]?[0-9]+|\?)(?:\.\.([\>\?]?[0-9]+|\?))?\s*$/ =~ ftlines[i] &&
|
1215
|
+
/^FT +\/([^\s\=]+)(?:\=(\")?(.+)(\")?)?\s*$/ =~ ftlines[i+1] then
|
1216
|
+
fmt_2019_11 = true
|
1217
|
+
break #for i
|
1218
|
+
end
|
1219
|
+
end #for i
|
1220
|
+
|
1221
|
+
hash = if fmt_2019_11 then
|
1222
|
+
ft_2019_11_parser(ftlines)
|
1223
|
+
else
|
1224
|
+
ft_legacy_parser(ftlines)
|
1225
|
+
end
|
1226
|
+
@data['FT'] = hash
|
1227
|
+
end
|
1228
|
+
|
1229
|
+
# FT parser since UniProt release 2019_11
|
1230
|
+
# https://www.uniprot.org/release-notes/2019-12-18-release#text%5Fft
|
1231
|
+
def ft_2019_11_parser(ftlines)
|
1232
|
+
table = []
|
1233
|
+
cur_ft = nil
|
1234
|
+
cont = false
|
1235
|
+
begin
|
1236
|
+
ftlines.each do |line|
|
1237
|
+
if /^FT +([^\s]+) +(([^\s]+)\:)?([\<\?]?[0-9]+|\?)(?:\.\.([\>\?]?[0-9]+|\?))?\s*$/ =~ line
|
1238
|
+
cur_ft = [$1.to_s, # Feature Name
|
1239
|
+
"#{$2}#{$4}", # From
|
1240
|
+
$5.to_s, # To
|
1241
|
+
[] # Qualifiers
|
1242
|
+
]
|
1243
|
+
table.push cur_ft
|
1244
|
+
cont = false
|
1245
|
+
elsif cont && /^FT {19}/ =~ line
|
1246
|
+
str = $'
|
1247
|
+
str.rstrip!
|
1248
|
+
orig = cur_ft[3][-1][1].to_s
|
1249
|
+
if orig.size > 0 && orig[-1] != ' ' &&
|
1250
|
+
str.length > 0 && str[0] != ' ' then
|
1251
|
+
orig.concat ' '
|
1252
|
+
end
|
1253
|
+
orig.concat str
|
1254
|
+
cur_ft[3][-1][1] = orig
|
1255
|
+
if cont && orig[-1] == "\""
|
1256
|
+
orig.chop!
|
1257
|
+
cont = false
|
1258
|
+
end
|
1259
|
+
elsif /^FT +\/([^\s\=]+)(?:\=(\")?(.+))?\s*$/ =~ line
|
1260
|
+
key = $1
|
1261
|
+
val = $3
|
1262
|
+
val.rstrip!
|
1263
|
+
cur_ft[3].push [ key, val ]
|
1264
|
+
cont = false
|
1265
|
+
if $2 == "\""
|
1266
|
+
if val.to_s[-1] == "\""
|
1267
|
+
val.chop!
|
1268
|
+
else
|
1269
|
+
cont = true
|
1270
|
+
end
|
1271
|
+
end
|
1272
|
+
else
|
1273
|
+
raise "FT parse error: #{line.inspect}"
|
1274
|
+
end
|
1275
|
+
end
|
1276
|
+
|
1277
|
+
hash = {}
|
1278
|
+
table.each do |feature|
|
1279
|
+
cur_h = {
|
1280
|
+
# Removing '<', '>' or '?' in FROM/TO endopoint.
|
1281
|
+
'From' => feature[1].sub(/\D/, '').to_i,
|
1282
|
+
'To' => feature[2].sub(/\D/, '').to_i,
|
1283
|
+
'diff' => [],
|
1284
|
+
'original' => feature
|
1285
|
+
}
|
1286
|
+
hash[feature[0]] ||= []
|
1287
|
+
hash[feature[0]].push cur_h
|
1288
|
+
feature[3].each do |a|
|
1289
|
+
case a[0]
|
1290
|
+
when 'From', 'To', 'Description', 'FTId', 'diff', 'original'
|
1291
|
+
; # do nothing
|
1292
|
+
else
|
1293
|
+
cur_h[a[0]] = a[1]
|
1294
|
+
end
|
1295
|
+
end
|
1296
|
+
if cur_h["id"] then
|
1297
|
+
cur_h['FTId'] = cur_h['id']
|
1298
|
+
end
|
1299
|
+
|
1300
|
+
case feature[0]
|
1301
|
+
when 'VARSPLIC', 'VARIANT', 'VAR_SEQ', 'CONFLICT'
|
1302
|
+
case cur_h['note'].to_s
|
1303
|
+
when /(\w[\w ]*\w*) - ?> (\w[\w ]*\w*)/
|
1304
|
+
original_res = $1
|
1305
|
+
changed_res = $2
|
1306
|
+
original_res = original_res.gsub(/ /,'').strip
|
1307
|
+
chenged_res = changed_res.gsub(/ /,'').strip
|
1308
|
+
when /Missing/i
|
1309
|
+
original_res = seq.subseq(cur_h['From'],
|
1310
|
+
cur_h['To'])
|
1311
|
+
changed_res = ''
|
1312
|
+
end
|
1313
|
+
cur_h['diff'] = [original_res, chenged_res]
|
1314
|
+
end
|
1315
|
+
end
|
1316
|
+
rescue
|
1317
|
+
raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n'#{self.get('FT')}'\n"
|
1318
|
+
end
|
1319
|
+
|
1320
|
+
hash
|
1321
|
+
end
|
1322
|
+
private :ft_2019_11_parser
|
1323
|
+
|
1324
|
+
# FT parser for the format before Uniprot release 2019_11
|
1325
|
+
def ft_legacy_parser(ftlines)
|
1200
1326
|
table = []
|
1201
1327
|
begin
|
1202
|
-
|
1328
|
+
ftlines.each do |line|
|
1203
1329
|
if line =~ /^FT \w/
|
1204
1330
|
feature = line.chomp.ljust(74)
|
1205
1331
|
table << [feature[ 5..12].strip, # Feature Name
|
@@ -1256,10 +1382,9 @@ class UniProtKB < EMBLDB
|
|
1256
1382
|
raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n'#{self.get('FT')}'\n"
|
1257
1383
|
end
|
1258
1384
|
|
1259
|
-
|
1385
|
+
hash
|
1260
1386
|
end
|
1261
|
-
|
1262
|
-
|
1387
|
+
private :ft_legacy_parser
|
1263
1388
|
|
1264
1389
|
# returns a Hash of conteins in the SQ lines.
|
1265
1390
|
# * Bio::UniProtKBL#sq -> hsh
|
data/lib/bio/db/go.rb
CHANGED
@@ -193,7 +193,7 @@ class GO
|
|
193
193
|
# Delimiter
|
194
194
|
RS = DELIMITER
|
195
195
|
|
196
|
-
#
|
196
|
+
# Returns an Array of parsed gene_association flatfile.
|
197
197
|
# Block is acceptable.
|
198
198
|
def self.parser(str)
|
199
199
|
if block_given?
|
@@ -226,7 +226,7 @@ class GO
|
|
226
226
|
# Returns Db_Reference variable.
|
227
227
|
attr_reader :db_reference # -> []
|
228
228
|
|
229
|
-
#
|
229
|
+
# Returns Evidence code variable.
|
230
230
|
attr_reader :evidence
|
231
231
|
|
232
232
|
# Returns the entry is associated with this value.
|
data/lib/bio/sequence/common.rb
CHANGED
@@ -303,6 +303,118 @@ module Common
|
|
303
303
|
end
|
304
304
|
alias splicing splice
|
305
305
|
|
306
|
+
#--
|
307
|
+
# Workaround for Ruby 3.0.0 incompatible changes
|
308
|
+
if ::RUBY_VERSION > "3"
|
309
|
+
|
310
|
+
# Acts almost the same as String#split.
|
311
|
+
def split(*arg)
|
312
|
+
if block_given?
|
313
|
+
super
|
314
|
+
else
|
315
|
+
ret = super(*arg)
|
316
|
+
ret.collect! { |x| self.class.new('').replace(x) }
|
317
|
+
ret
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
%w( * ljust rjust center ).each do |w|
|
322
|
+
module_eval %Q{
|
323
|
+
def #{w}(*arg)
|
324
|
+
self.class.new('').replace(super)
|
325
|
+
end
|
326
|
+
}
|
327
|
+
end
|
328
|
+
|
329
|
+
%w( chomp chop
|
330
|
+
delete delete_prefix delete_suffix
|
331
|
+
lstrip rstrip strip
|
332
|
+
reverse
|
333
|
+
squeeze
|
334
|
+
succ next
|
335
|
+
tr tr_s
|
336
|
+
capitalize upcase downcase swapcase
|
337
|
+
).each do |w|
|
338
|
+
module_eval %Q{
|
339
|
+
def #{w}(*arg)
|
340
|
+
s = self.dup
|
341
|
+
s.#{w}!(*arg)
|
342
|
+
s
|
343
|
+
end
|
344
|
+
}
|
345
|
+
end
|
346
|
+
|
347
|
+
%w( sub gsub ).each do |w|
|
348
|
+
module_eval %Q{
|
349
|
+
def #{w}(*arg, &block)
|
350
|
+
s = self.dup
|
351
|
+
s.#{w}!(*arg, &block)
|
352
|
+
s
|
353
|
+
end
|
354
|
+
}
|
355
|
+
end
|
356
|
+
|
357
|
+
#Reference: https://nacl-ltd.github.io/2018/11/08/gsub-wrapper.html
|
358
|
+
#(Title: Is it possible to implement gsub wrapper?)
|
359
|
+
%w( sub! gsub! ).each do |w|
|
360
|
+
module_eval %Q{
|
361
|
+
def #{w}(*arg, &block)
|
362
|
+
if block_given? then
|
363
|
+
super(*arg) do |m|
|
364
|
+
b = Thread.current[:_backref]
|
365
|
+
Thread.current[:_backref] = ::Regexp.last_match
|
366
|
+
block.binding.eval("$~ = Thread.current[:_backref]")
|
367
|
+
Thread.current[:_backref] = b
|
368
|
+
block.call(self.class.new('').replace(m))
|
369
|
+
end
|
370
|
+
else
|
371
|
+
super
|
372
|
+
end
|
373
|
+
end
|
374
|
+
}
|
375
|
+
end
|
376
|
+
|
377
|
+
%w( each_char each_grapheme_cluster each_line ).each do |w|
|
378
|
+
module_eval %Q{
|
379
|
+
def #{w}
|
380
|
+
if block_given?
|
381
|
+
super { |c| yield(self.class.new('').replace(c)) }
|
382
|
+
else
|
383
|
+
enum_for(:#{w})
|
384
|
+
end
|
385
|
+
end
|
386
|
+
}
|
387
|
+
end
|
388
|
+
|
389
|
+
%w( slice [] slice! ).each do |w|
|
390
|
+
module_eval %Q{
|
391
|
+
def #{w}(*arg)
|
392
|
+
r = super
|
393
|
+
r ? self.class.new('').replace(r) : r
|
394
|
+
end
|
395
|
+
}
|
396
|
+
end
|
397
|
+
|
398
|
+
%w( partition rpartition ).each do |w|
|
399
|
+
module_eval %Q{
|
400
|
+
def #{w}(sep)
|
401
|
+
r = super
|
402
|
+
if r.kind_of?(Array)
|
403
|
+
r[1] == sep ?
|
404
|
+
[ self.class.new('').replace(r[0]),
|
405
|
+
r[1],
|
406
|
+
self.class.new('').replace(r[2]) ] :
|
407
|
+
r.collect { |x| self.class.new('').replace(x) }
|
408
|
+
else
|
409
|
+
r
|
410
|
+
end
|
411
|
+
end
|
412
|
+
}
|
413
|
+
end
|
414
|
+
#++
|
415
|
+
|
416
|
+
end # if ::RUBY_VERSION > "3"
|
417
|
+
|
306
418
|
end # Common
|
307
419
|
|
308
420
|
end # Sequence
|
data/lib/bio/sequence/format.rb
CHANGED
data/lib/bio/tree.rb
CHANGED
@@ -605,7 +605,7 @@ module Bio
|
|
605
605
|
end
|
606
606
|
|
607
607
|
# Gets path from node1 to node2.
|
608
|
-
#
|
608
|
+
# Returns an array of nodes, including node1 and node2.
|
609
609
|
# If node1 and/or node2 do not exist, IndexError is raised.
|
610
610
|
# If node1 and node2 are not connected, NoPathError is raised.
|
611
611
|
# The result is unspecified for cyclic trees.
|
data/lib/bio/version.rb
CHANGED
@@ -10,7 +10,7 @@
|
|
10
10
|
module Bio
|
11
11
|
|
12
12
|
# BioRuby version (Array containing Integer)
|
13
|
-
BIORUBY_VERSION = [2, 0,
|
13
|
+
BIORUBY_VERSION = [2, 0, 4].extend(Comparable).freeze
|
14
14
|
|
15
15
|
# Extra version specifier (String or nil).
|
16
16
|
# Existance of the value indicates development version.
|