bio 2.0.1 → 2.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -174,7 +174,7 @@ class UniProtKB < EMBLDB
174
174
  #
175
175
  # http://www.uniprot.org/docs/sp_news.htm
176
176
  def parse_DE_line_rel14(str)
177
- # Retruns if it is not the new format since Rel.14
177
+ # Returns if it is not the new format since Rel.14
178
178
  return nil unless /^DE (RecName|AltName|SubName)\: / =~ str
179
179
  ret = []
180
180
  cur = nil
@@ -248,9 +248,8 @@ class UniProtKB < EMBLDB
248
248
  # SYNONYM >=0
249
249
  # CONTEINS >=0
250
250
  def protein_name
251
- @data['DE'] ||= parse_DE_line_rel14(get('DE'))
252
- parsed_de_line = @data['DE']
253
- if parsed_de_line then
251
+ parsed_de_line = self.de
252
+ if parsed_de_line.kind_of?(Array) then
254
253
  # since UniProtKB release 14.0 of 22-Jul-2008
255
254
  name = nil
256
255
  parsed_de_line.each do |a|
@@ -275,7 +274,6 @@ class UniProtKB < EMBLDB
275
274
  return name
276
275
  end
277
276
 
278
-
279
277
  # returns synonyms (unofficial and/or alternative names).
280
278
  # Returns an Array containing String objects.
281
279
  #
@@ -292,9 +290,8 @@ class UniProtKB < EMBLDB
292
290
  # synonyms are each placed in () following the official name on the DE line.
293
291
  def synonyms
294
292
  ary = Array.new
295
- @data['DE'] ||= parse_DE_line_rel14(get('DE'))
296
- parsed_de_line = @data['DE']
297
- if parsed_de_line then
293
+ parsed_de_line = self.de
294
+ if parsed_de_line.kind_of?(Array) then
298
295
  # since UniProtKB release 14.0 of 22-Jul-2008
299
296
  parsed_de_line.each do |a|
300
297
  case a[0]
@@ -330,6 +327,20 @@ class UniProtKB < EMBLDB
330
327
  return ary
331
328
  end
332
329
 
330
+ # Returns an Array (for new format since rel 14)
331
+ # or a String (for old format before rel 14) for the DE line.
332
+ #
333
+ def de
334
+ return @data['DE'] if @data['DE']
335
+ parsed_de_line = parse_DE_line_rel14(get('DE'))
336
+ case parsed_de_line
337
+ when Array # new format since rel14
338
+ @data['DE'] ||= parsed_de_line
339
+ else
340
+ super
341
+ end
342
+ @data['DE']
343
+ end
333
344
 
334
345
  # returns gene names in the GN line.
335
346
  #
@@ -1197,9 +1208,124 @@ class UniProtKB < EMBLDB
1197
1208
  return ft[feature_key] if feature_key
1198
1209
  return @data['FT'] if @data['FT']
1199
1210
 
1211
+ ftstr = get('FT')
1212
+ ftlines = ftstr.split("\n")
1213
+ for i in 0..10 do
1214
+ if /^FT +([^\s]+) +(([^\s]+)\:)?([\<\?]?[0-9]+|\?)(?:\.\.([\>\?]?[0-9]+|\?))?\s*$/ =~ ftlines[i] &&
1215
+ /^FT +\/([^\s\=]+)(?:\=(\")?(.+)(\")?)?\s*$/ =~ ftlines[i+1] then
1216
+ fmt_2019_11 = true
1217
+ break #for i
1218
+ end
1219
+ end #for i
1220
+
1221
+ hash = if fmt_2019_11 then
1222
+ ft_2019_11_parser(ftlines)
1223
+ else
1224
+ ft_legacy_parser(ftlines)
1225
+ end
1226
+ @data['FT'] = hash
1227
+ end
1228
+
1229
+ # FT parser since UniProt release 2019_11
1230
+ # https://www.uniprot.org/release-notes/2019-12-18-release#text%5Fft
1231
+ def ft_2019_11_parser(ftlines)
1232
+ table = []
1233
+ cur_ft = nil
1234
+ cont = false
1235
+ begin
1236
+ ftlines.each do |line|
1237
+ if /^FT +([^\s]+) +(([^\s]+)\:)?([\<\?]?[0-9]+|\?)(?:\.\.([\>\?]?[0-9]+|\?))?\s*$/ =~ line
1238
+ cur_ft = [$1.to_s, # Feature Name
1239
+ "#{$2}#{$4}", # From
1240
+ $5.to_s, # To
1241
+ [] # Qualifiers
1242
+ ]
1243
+ table.push cur_ft
1244
+ cont = false
1245
+ elsif cont && /^FT {19}/ =~ line
1246
+ str = $'
1247
+ str.rstrip!
1248
+ orig = cur_ft[3][-1][1].to_s
1249
+ if orig.size > 0 && orig[-1] != ' ' &&
1250
+ str.length > 0 && str[0] != ' ' then
1251
+ orig.concat ' '
1252
+ end
1253
+ orig.concat str
1254
+ cur_ft[3][-1][1] = orig
1255
+ if cont && orig[-1] == "\""
1256
+ orig.chop!
1257
+ cont = false
1258
+ end
1259
+ elsif /^FT +\/([^\s\=]+)(?:\=(\")?(.+))?\s*$/ =~ line
1260
+ key = $1
1261
+ val = $3
1262
+ val.rstrip!
1263
+ cur_ft[3].push [ key, val ]
1264
+ cont = false
1265
+ if $2 == "\""
1266
+ if val.to_s[-1] == "\""
1267
+ val.chop!
1268
+ else
1269
+ cont = true
1270
+ end
1271
+ end
1272
+ else
1273
+ raise "FT parse error: #{line.inspect}"
1274
+ end
1275
+ end
1276
+
1277
+ hash = {}
1278
+ table.each do |feature|
1279
+ cur_h = {
1280
+ # Removing '<', '>' or '?' in FROM/TO endopoint.
1281
+ 'From' => feature[1].sub(/\D/, '').to_i,
1282
+ 'To' => feature[2].sub(/\D/, '').to_i,
1283
+ 'diff' => [],
1284
+ 'original' => feature
1285
+ }
1286
+ hash[feature[0]] ||= []
1287
+ hash[feature[0]].push cur_h
1288
+ feature[3].each do |a|
1289
+ case a[0]
1290
+ when 'From', 'To', 'Description', 'FTId', 'diff', 'original'
1291
+ ; # do nothing
1292
+ else
1293
+ cur_h[a[0]] = a[1]
1294
+ end
1295
+ end
1296
+ if cur_h["id"] then
1297
+ cur_h['FTId'] = cur_h['id']
1298
+ end
1299
+
1300
+ case feature[0]
1301
+ when 'VARSPLIC', 'VARIANT', 'VAR_SEQ', 'CONFLICT'
1302
+ case cur_h['note'].to_s
1303
+ when /(\w[\w ]*\w*) - ?> (\w[\w ]*\w*)/
1304
+ original_res = $1
1305
+ changed_res = $2
1306
+ original_res = original_res.gsub(/ /,'').strip
1307
+ chenged_res = changed_res.gsub(/ /,'').strip
1308
+ when /Missing/i
1309
+ original_res = seq.subseq(cur_h['From'],
1310
+ cur_h['To'])
1311
+ changed_res = ''
1312
+ end
1313
+ cur_h['diff'] = [original_res, chenged_res]
1314
+ end
1315
+ end
1316
+ rescue
1317
+ raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n'#{self.get('FT')}'\n"
1318
+ end
1319
+
1320
+ hash
1321
+ end
1322
+ private :ft_2019_11_parser
1323
+
1324
+ # FT parser for the format before Uniprot release 2019_11
1325
+ def ft_legacy_parser(ftlines)
1200
1326
  table = []
1201
1327
  begin
1202
- get('FT').split("\n").each do |line|
1328
+ ftlines.each do |line|
1203
1329
  if line =~ /^FT \w/
1204
1330
  feature = line.chomp.ljust(74)
1205
1331
  table << [feature[ 5..12].strip, # Feature Name
@@ -1256,10 +1382,9 @@ class UniProtKB < EMBLDB
1256
1382
  raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n'#{self.get('FT')}'\n"
1257
1383
  end
1258
1384
 
1259
- @data['FT'] = hash
1385
+ hash
1260
1386
  end
1261
-
1262
-
1387
+ private :ft_legacy_parser
1263
1388
 
1264
1389
  # returns a Hash of conteins in the SQ lines.
1265
1390
  # * Bio::UniProtKBL#sq -> hsh
data/lib/bio/db/go.rb CHANGED
@@ -193,7 +193,7 @@ class GO
193
193
  # Delimiter
194
194
  RS = DELIMITER
195
195
 
196
- # Retruns an Array of parsed gene_association flatfile.
196
+ # Returns an Array of parsed gene_association flatfile.
197
197
  # Block is acceptable.
198
198
  def self.parser(str)
199
199
  if block_given?
@@ -226,7 +226,7 @@ class GO
226
226
  # Returns Db_Reference variable.
227
227
  attr_reader :db_reference # -> []
228
228
 
229
- # Retruns Evidence code variable.
229
+ # Returns Evidence code variable.
230
230
  attr_reader :evidence
231
231
 
232
232
  # Returns the entry is associated with this value.
@@ -303,6 +303,118 @@ module Common
303
303
  end
304
304
  alias splicing splice
305
305
 
306
+ #--
307
+ # Workaround for Ruby 3.0.0 incompatible changes
308
+ if ::RUBY_VERSION > "3"
309
+
310
+ # Acts almost the same as String#split.
311
+ def split(*arg)
312
+ if block_given?
313
+ super
314
+ else
315
+ ret = super(*arg)
316
+ ret.collect! { |x| self.class.new('').replace(x) }
317
+ ret
318
+ end
319
+ end
320
+
321
+ %w( * ljust rjust center ).each do |w|
322
+ module_eval %Q{
323
+ def #{w}(*arg)
324
+ self.class.new('').replace(super)
325
+ end
326
+ }
327
+ end
328
+
329
+ %w( chomp chop
330
+ delete delete_prefix delete_suffix
331
+ lstrip rstrip strip
332
+ reverse
333
+ squeeze
334
+ succ next
335
+ tr tr_s
336
+ capitalize upcase downcase swapcase
337
+ ).each do |w|
338
+ module_eval %Q{
339
+ def #{w}(*arg)
340
+ s = self.dup
341
+ s.#{w}!(*arg)
342
+ s
343
+ end
344
+ }
345
+ end
346
+
347
+ %w( sub gsub ).each do |w|
348
+ module_eval %Q{
349
+ def #{w}(*arg, &block)
350
+ s = self.dup
351
+ s.#{w}!(*arg, &block)
352
+ s
353
+ end
354
+ }
355
+ end
356
+
357
+ #Reference: https://nacl-ltd.github.io/2018/11/08/gsub-wrapper.html
358
+ #(Title: Is it possible to implement gsub wrapper?)
359
+ %w( sub! gsub! ).each do |w|
360
+ module_eval %Q{
361
+ def #{w}(*arg, &block)
362
+ if block_given? then
363
+ super(*arg) do |m|
364
+ b = Thread.current[:_backref]
365
+ Thread.current[:_backref] = ::Regexp.last_match
366
+ block.binding.eval("$~ = Thread.current[:_backref]")
367
+ Thread.current[:_backref] = b
368
+ block.call(self.class.new('').replace(m))
369
+ end
370
+ else
371
+ super
372
+ end
373
+ end
374
+ }
375
+ end
376
+
377
+ %w( each_char each_grapheme_cluster each_line ).each do |w|
378
+ module_eval %Q{
379
+ def #{w}
380
+ if block_given?
381
+ super { |c| yield(self.class.new('').replace(c)) }
382
+ else
383
+ enum_for(:#{w})
384
+ end
385
+ end
386
+ }
387
+ end
388
+
389
+ %w( slice [] slice! ).each do |w|
390
+ module_eval %Q{
391
+ def #{w}(*arg)
392
+ r = super
393
+ r ? self.class.new('').replace(r) : r
394
+ end
395
+ }
396
+ end
397
+
398
+ %w( partition rpartition ).each do |w|
399
+ module_eval %Q{
400
+ def #{w}(sep)
401
+ r = super
402
+ if r.kind_of?(Array)
403
+ r[1] == sep ?
404
+ [ self.class.new('').replace(r[0]),
405
+ r[1],
406
+ self.class.new('').replace(r[2]) ] :
407
+ r.collect { |x| self.class.new('').replace(x) }
408
+ else
409
+ r
410
+ end
411
+ end
412
+ }
413
+ end
414
+ #++
415
+
416
+ end # if ::RUBY_VERSION > "3"
417
+
306
418
  end # Common
307
419
 
308
420
  end # Sequence
@@ -10,6 +10,7 @@
10
10
  #
11
11
 
12
12
  require 'erb'
13
+ require 'date'
13
14
 
14
15
  module Bio
15
16
 
data/lib/bio/tree.rb CHANGED
@@ -605,7 +605,7 @@ module Bio
605
605
  end
606
606
 
607
607
  # Gets path from node1 to node2.
608
- # Retruns an array of nodes, including node1 and node2.
608
+ # Returns an array of nodes, including node1 and node2.
609
609
  # If node1 and/or node2 do not exist, IndexError is raised.
610
610
  # If node1 and node2 are not connected, NoPathError is raised.
611
611
  # The result is unspecified for cyclic trees.
data/lib/bio/version.rb CHANGED
@@ -10,7 +10,7 @@
10
10
  module Bio
11
11
 
12
12
  # BioRuby version (Array containing Integer)
13
- BIORUBY_VERSION = [2, 0, 1].extend(Comparable).freeze
13
+ BIORUBY_VERSION = [2, 0, 4].extend(Comparable).freeze
14
14
 
15
15
  # Extra version specifier (String or nil).
16
16
  # Existance of the value indicates development version.