bio 2.0.1 → 2.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -174,7 +174,7 @@ class UniProtKB < EMBLDB
174
174
  #
175
175
  # http://www.uniprot.org/docs/sp_news.htm
176
176
  def parse_DE_line_rel14(str)
177
- # Retruns if it is not the new format since Rel.14
177
+ # Returns if it is not the new format since Rel.14
178
178
  return nil unless /^DE (RecName|AltName|SubName)\: / =~ str
179
179
  ret = []
180
180
  cur = nil
@@ -248,9 +248,8 @@ class UniProtKB < EMBLDB
248
248
  # SYNONYM >=0
249
249
  # CONTEINS >=0
250
250
  def protein_name
251
- @data['DE'] ||= parse_DE_line_rel14(get('DE'))
252
- parsed_de_line = @data['DE']
253
- if parsed_de_line then
251
+ parsed_de_line = self.de
252
+ if parsed_de_line.kind_of?(Array) then
254
253
  # since UniProtKB release 14.0 of 22-Jul-2008
255
254
  name = nil
256
255
  parsed_de_line.each do |a|
@@ -275,7 +274,6 @@ class UniProtKB < EMBLDB
275
274
  return name
276
275
  end
277
276
 
278
-
279
277
  # returns synonyms (unofficial and/or alternative names).
280
278
  # Returns an Array containing String objects.
281
279
  #
@@ -292,9 +290,8 @@ class UniProtKB < EMBLDB
292
290
  # synonyms are each placed in () following the official name on the DE line.
293
291
  def synonyms
294
292
  ary = Array.new
295
- @data['DE'] ||= parse_DE_line_rel14(get('DE'))
296
- parsed_de_line = @data['DE']
297
- if parsed_de_line then
293
+ parsed_de_line = self.de
294
+ if parsed_de_line.kind_of?(Array) then
298
295
  # since UniProtKB release 14.0 of 22-Jul-2008
299
296
  parsed_de_line.each do |a|
300
297
  case a[0]
@@ -330,6 +327,20 @@ class UniProtKB < EMBLDB
330
327
  return ary
331
328
  end
332
329
 
330
+ # Returns an Array (for new format since rel 14)
331
+ # or a String (for old format before rel 14) for the DE line.
332
+ #
333
+ def de
334
+ return @data['DE'] if @data['DE']
335
+ parsed_de_line = parse_DE_line_rel14(get('DE'))
336
+ case parsed_de_line
337
+ when Array # new format since rel14
338
+ @data['DE'] ||= parsed_de_line
339
+ else
340
+ super
341
+ end
342
+ @data['DE']
343
+ end
333
344
 
334
345
  # returns gene names in the GN line.
335
346
  #
@@ -1197,9 +1208,124 @@ class UniProtKB < EMBLDB
1197
1208
  return ft[feature_key] if feature_key
1198
1209
  return @data['FT'] if @data['FT']
1199
1210
 
1211
+ ftstr = get('FT')
1212
+ ftlines = ftstr.split("\n")
1213
+ for i in 0..10 do
1214
+ if /^FT +([^\s]+) +(([^\s]+)\:)?([\<\?]?[0-9]+|\?)(?:\.\.([\>\?]?[0-9]+|\?))?\s*$/ =~ ftlines[i] &&
1215
+ /^FT +\/([^\s\=]+)(?:\=(\")?(.+)(\")?)?\s*$/ =~ ftlines[i+1] then
1216
+ fmt_2019_11 = true
1217
+ break #for i
1218
+ end
1219
+ end #for i
1220
+
1221
+ hash = if fmt_2019_11 then
1222
+ ft_2019_11_parser(ftlines)
1223
+ else
1224
+ ft_legacy_parser(ftlines)
1225
+ end
1226
+ @data['FT'] = hash
1227
+ end
1228
+
1229
+ # FT parser since UniProt release 2019_11
1230
+ # https://www.uniprot.org/release-notes/2019-12-18-release#text%5Fft
1231
+ def ft_2019_11_parser(ftlines)
1232
+ table = []
1233
+ cur_ft = nil
1234
+ cont = false
1235
+ begin
1236
+ ftlines.each do |line|
1237
+ if /^FT +([^\s]+) +(([^\s]+)\:)?([\<\?]?[0-9]+|\?)(?:\.\.([\>\?]?[0-9]+|\?))?\s*$/ =~ line
1238
+ cur_ft = [$1.to_s, # Feature Name
1239
+ "#{$2}#{$4}", # From
1240
+ $5.to_s, # To
1241
+ [] # Qualifiers
1242
+ ]
1243
+ table.push cur_ft
1244
+ cont = false
1245
+ elsif cont && /^FT {19}/ =~ line
1246
+ str = $'
1247
+ str.rstrip!
1248
+ orig = cur_ft[3][-1][1].to_s
1249
+ if orig.size > 0 && orig[-1] != ' ' &&
1250
+ str.length > 0 && str[0] != ' ' then
1251
+ orig.concat ' '
1252
+ end
1253
+ orig.concat str
1254
+ cur_ft[3][-1][1] = orig
1255
+ if cont && orig[-1] == "\""
1256
+ orig.chop!
1257
+ cont = false
1258
+ end
1259
+ elsif /^FT +\/([^\s\=]+)(?:\=(\")?(.+))?\s*$/ =~ line
1260
+ key = $1
1261
+ val = $3
1262
+ val.rstrip!
1263
+ cur_ft[3].push [ key, val ]
1264
+ cont = false
1265
+ if $2 == "\""
1266
+ if val.to_s[-1] == "\""
1267
+ val.chop!
1268
+ else
1269
+ cont = true
1270
+ end
1271
+ end
1272
+ else
1273
+ raise "FT parse error: #{line.inspect}"
1274
+ end
1275
+ end
1276
+
1277
+ hash = {}
1278
+ table.each do |feature|
1279
+ cur_h = {
1280
+ # Removing '<', '>' or '?' in FROM/TO endopoint.
1281
+ 'From' => feature[1].sub(/\D/, '').to_i,
1282
+ 'To' => feature[2].sub(/\D/, '').to_i,
1283
+ 'diff' => [],
1284
+ 'original' => feature
1285
+ }
1286
+ hash[feature[0]] ||= []
1287
+ hash[feature[0]].push cur_h
1288
+ feature[3].each do |a|
1289
+ case a[0]
1290
+ when 'From', 'To', 'Description', 'FTId', 'diff', 'original'
1291
+ ; # do nothing
1292
+ else
1293
+ cur_h[a[0]] = a[1]
1294
+ end
1295
+ end
1296
+ if cur_h["id"] then
1297
+ cur_h['FTId'] = cur_h['id']
1298
+ end
1299
+
1300
+ case feature[0]
1301
+ when 'VARSPLIC', 'VARIANT', 'VAR_SEQ', 'CONFLICT'
1302
+ case cur_h['note'].to_s
1303
+ when /(\w[\w ]*\w*) - ?> (\w[\w ]*\w*)/
1304
+ original_res = $1
1305
+ changed_res = $2
1306
+ original_res = original_res.gsub(/ /,'').strip
1307
+ chenged_res = changed_res.gsub(/ /,'').strip
1308
+ when /Missing/i
1309
+ original_res = seq.subseq(cur_h['From'],
1310
+ cur_h['To'])
1311
+ changed_res = ''
1312
+ end
1313
+ cur_h['diff'] = [original_res, chenged_res]
1314
+ end
1315
+ end
1316
+ rescue
1317
+ raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n'#{self.get('FT')}'\n"
1318
+ end
1319
+
1320
+ hash
1321
+ end
1322
+ private :ft_2019_11_parser
1323
+
1324
+ # FT parser for the format before Uniprot release 2019_11
1325
+ def ft_legacy_parser(ftlines)
1200
1326
  table = []
1201
1327
  begin
1202
- get('FT').split("\n").each do |line|
1328
+ ftlines.each do |line|
1203
1329
  if line =~ /^FT \w/
1204
1330
  feature = line.chomp.ljust(74)
1205
1331
  table << [feature[ 5..12].strip, # Feature Name
@@ -1256,10 +1382,9 @@ class UniProtKB < EMBLDB
1256
1382
  raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n'#{self.get('FT')}'\n"
1257
1383
  end
1258
1384
 
1259
- @data['FT'] = hash
1385
+ hash
1260
1386
  end
1261
-
1262
-
1387
+ private :ft_legacy_parser
1263
1388
 
1264
1389
  # returns a Hash of conteins in the SQ lines.
1265
1390
  # * Bio::UniProtKBL#sq -> hsh
data/lib/bio/db/go.rb CHANGED
@@ -193,7 +193,7 @@ class GO
193
193
  # Delimiter
194
194
  RS = DELIMITER
195
195
 
196
- # Retruns an Array of parsed gene_association flatfile.
196
+ # Returns an Array of parsed gene_association flatfile.
197
197
  # Block is acceptable.
198
198
  def self.parser(str)
199
199
  if block_given?
@@ -226,7 +226,7 @@ class GO
226
226
  # Returns Db_Reference variable.
227
227
  attr_reader :db_reference # -> []
228
228
 
229
- # Retruns Evidence code variable.
229
+ # Returns Evidence code variable.
230
230
  attr_reader :evidence
231
231
 
232
232
  # Returns the entry is associated with this value.
@@ -303,6 +303,118 @@ module Common
303
303
  end
304
304
  alias splicing splice
305
305
 
306
+ #--
307
+ # Workaround for Ruby 3.0.0 incompatible changes
308
+ if ::RUBY_VERSION > "3"
309
+
310
+ # Acts almost the same as String#split.
311
+ def split(*arg)
312
+ if block_given?
313
+ super
314
+ else
315
+ ret = super(*arg)
316
+ ret.collect! { |x| self.class.new('').replace(x) }
317
+ ret
318
+ end
319
+ end
320
+
321
+ %w( * ljust rjust center ).each do |w|
322
+ module_eval %Q{
323
+ def #{w}(*arg)
324
+ self.class.new('').replace(super)
325
+ end
326
+ }
327
+ end
328
+
329
+ %w( chomp chop
330
+ delete delete_prefix delete_suffix
331
+ lstrip rstrip strip
332
+ reverse
333
+ squeeze
334
+ succ next
335
+ tr tr_s
336
+ capitalize upcase downcase swapcase
337
+ ).each do |w|
338
+ module_eval %Q{
339
+ def #{w}(*arg)
340
+ s = self.dup
341
+ s.#{w}!(*arg)
342
+ s
343
+ end
344
+ }
345
+ end
346
+
347
+ %w( sub gsub ).each do |w|
348
+ module_eval %Q{
349
+ def #{w}(*arg, &block)
350
+ s = self.dup
351
+ s.#{w}!(*arg, &block)
352
+ s
353
+ end
354
+ }
355
+ end
356
+
357
+ #Reference: https://nacl-ltd.github.io/2018/11/08/gsub-wrapper.html
358
+ #(Title: Is it possible to implement gsub wrapper?)
359
+ %w( sub! gsub! ).each do |w|
360
+ module_eval %Q{
361
+ def #{w}(*arg, &block)
362
+ if block_given? then
363
+ super(*arg) do |m|
364
+ b = Thread.current[:_backref]
365
+ Thread.current[:_backref] = ::Regexp.last_match
366
+ block.binding.eval("$~ = Thread.current[:_backref]")
367
+ Thread.current[:_backref] = b
368
+ block.call(self.class.new('').replace(m))
369
+ end
370
+ else
371
+ super
372
+ end
373
+ end
374
+ }
375
+ end
376
+
377
+ %w( each_char each_grapheme_cluster each_line ).each do |w|
378
+ module_eval %Q{
379
+ def #{w}
380
+ if block_given?
381
+ super { |c| yield(self.class.new('').replace(c)) }
382
+ else
383
+ enum_for(:#{w})
384
+ end
385
+ end
386
+ }
387
+ end
388
+
389
+ %w( slice [] slice! ).each do |w|
390
+ module_eval %Q{
391
+ def #{w}(*arg)
392
+ r = super
393
+ r ? self.class.new('').replace(r) : r
394
+ end
395
+ }
396
+ end
397
+
398
+ %w( partition rpartition ).each do |w|
399
+ module_eval %Q{
400
+ def #{w}(sep)
401
+ r = super
402
+ if r.kind_of?(Array)
403
+ r[1] == sep ?
404
+ [ self.class.new('').replace(r[0]),
405
+ r[1],
406
+ self.class.new('').replace(r[2]) ] :
407
+ r.collect { |x| self.class.new('').replace(x) }
408
+ else
409
+ r
410
+ end
411
+ end
412
+ }
413
+ end
414
+ #++
415
+
416
+ end # if ::RUBY_VERSION > "3"
417
+
306
418
  end # Common
307
419
 
308
420
  end # Sequence
@@ -10,6 +10,7 @@
10
10
  #
11
11
 
12
12
  require 'erb'
13
+ require 'date'
13
14
 
14
15
  module Bio
15
16
 
data/lib/bio/tree.rb CHANGED
@@ -605,7 +605,7 @@ module Bio
605
605
  end
606
606
 
607
607
  # Gets path from node1 to node2.
608
- # Retruns an array of nodes, including node1 and node2.
608
+ # Returns an array of nodes, including node1 and node2.
609
609
  # If node1 and/or node2 do not exist, IndexError is raised.
610
610
  # If node1 and node2 are not connected, NoPathError is raised.
611
611
  # The result is unspecified for cyclic trees.
data/lib/bio/version.rb CHANGED
@@ -10,7 +10,7 @@
10
10
  module Bio
11
11
 
12
12
  # BioRuby version (Array containing Integer)
13
- BIORUBY_VERSION = [2, 0, 1].extend(Comparable).freeze
13
+ BIORUBY_VERSION = [2, 0, 4].extend(Comparable).freeze
14
14
 
15
15
  # Extra version specifier (String or nil).
16
16
  # Existance of the value indicates development version.