bio 1.4.0 → 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. data/ChangeLog +1712 -0
  2. data/KNOWN_ISSUES.rdoc +11 -1
  3. data/README.rdoc +3 -2
  4. data/RELEASE_NOTES.rdoc +65 -127
  5. data/bioruby.gemspec +38 -2
  6. data/doc/RELEASE_NOTES-1.4.0.rdoc +167 -0
  7. data/doc/Tutorial.rd +74 -16
  8. data/doc/Tutorial.rd.html +68 -16
  9. data/lib/bio.rb +2 -0
  10. data/lib/bio/appl/clustalw/report.rb +18 -0
  11. data/lib/bio/appl/paml/codeml/report.rb +579 -21
  12. data/lib/bio/command.rb +149 -21
  13. data/lib/bio/db/aaindex.rb +11 -1
  14. data/lib/bio/db/embl/sptr.rb +1 -1
  15. data/lib/bio/db/fasta/defline.rb +7 -2
  16. data/lib/bio/db/fasta/qual.rb +24 -0
  17. data/lib/bio/db/fasta/qual_to_biosequence.rb +29 -0
  18. data/lib/bio/db/fastq.rb +15 -0
  19. data/lib/bio/db/go.rb +2 -2
  20. data/lib/bio/db/kegg/common.rb +109 -5
  21. data/lib/bio/db/kegg/genes.rb +61 -15
  22. data/lib/bio/db/kegg/genome.rb +43 -38
  23. data/lib/bio/db/kegg/module.rb +158 -0
  24. data/lib/bio/db/kegg/orthology.rb +40 -1
  25. data/lib/bio/db/kegg/pathway.rb +254 -0
  26. data/lib/bio/db/medline.rb +6 -2
  27. data/lib/bio/io/flatfile/autodetection.rb +6 -0
  28. data/lib/bio/location.rb +39 -0
  29. data/lib/bio/reference.rb +24 -0
  30. data/lib/bio/sequence.rb +2 -0
  31. data/lib/bio/sequence/adapter.rb +1 -0
  32. data/lib/bio/sequence/format.rb +14 -0
  33. data/lib/bio/sequence/sequence_masker.rb +95 -0
  34. data/lib/bio/tree.rb +4 -4
  35. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +5 -0
  36. data/lib/bio/version.rb +1 -1
  37. data/setup.rb +5 -0
  38. data/test/data/KEGG/K02338.orthology +180 -52
  39. data/test/data/KEGG/M00118.module +44 -0
  40. data/test/data/KEGG/T00005.genome +140 -0
  41. data/test/data/KEGG/T00070.genome +34 -0
  42. data/test/data/KEGG/b0529.gene +47 -0
  43. data/test/data/KEGG/ec00072.pathway +23 -0
  44. data/test/data/KEGG/hsa00790.pathway +59 -0
  45. data/test/data/KEGG/ko00312.pathway +16 -0
  46. data/test/data/KEGG/map00030.pathway +37 -0
  47. data/test/data/KEGG/map00052.pathway +13 -0
  48. data/test/data/KEGG/rn00250.pathway +114 -0
  49. data/test/data/clustalw/example1.aln +58 -0
  50. data/test/data/go/selected_component.ontology +12 -0
  51. data/test/data/go/selected_gene_association.sgd +31 -0
  52. data/test/data/go/selected_wikipedia2go +13 -0
  53. data/test/data/medline/20146148_modified.medline +54 -0
  54. data/test/data/paml/codeml/models/aa.aln +26 -0
  55. data/test/data/paml/codeml/models/aa.dnd +13 -0
  56. data/test/data/paml/codeml/models/aa.ph +13 -0
  57. data/test/data/paml/codeml/models/alignment.phy +49 -0
  58. data/test/data/paml/codeml/models/results0-3.txt +312 -0
  59. data/test/data/paml/codeml/models/results7-8.txt +340 -0
  60. data/test/functional/bio/io/test_togows.rb +8 -8
  61. data/test/functional/bio/test_command.rb +7 -6
  62. data/test/unit/bio/appl/clustalw/test_report.rb +80 -0
  63. data/test/unit/bio/appl/paml/codeml/test_rates.rb +6 -6
  64. data/test/unit/bio/appl/paml/codeml/test_report.rb +231 -24
  65. data/test/unit/bio/appl/paml/codeml/test_report_single.rb +46 -0
  66. data/test/unit/bio/db/embl/test_sptr.rb +1 -1
  67. data/test/unit/bio/db/fasta/test_defline.rb +160 -0
  68. data/test/unit/bio/db/fasta/test_defline_misc.rb +490 -0
  69. data/test/unit/bio/db/kegg/test_genes.rb +281 -1
  70. data/test/unit/bio/db/kegg/test_genome.rb +408 -0
  71. data/test/unit/bio/db/kegg/test_module.rb +246 -0
  72. data/test/unit/bio/db/kegg/test_orthology.rb +95 -0
  73. data/test/unit/bio/db/kegg/test_pathway.rb +1250 -0
  74. data/test/unit/bio/db/test_aaindex.rb +8 -7
  75. data/test/unit/bio/db/test_fastq.rb +36 -0
  76. data/test/unit/bio/db/test_go.rb +171 -0
  77. data/test/unit/bio/db/test_medline.rb +148 -0
  78. data/test/unit/bio/db/test_qual.rb +9 -2
  79. data/test/unit/bio/sequence/test_sequence_masker.rb +169 -0
  80. data/test/unit/bio/test_tree.rb +260 -1
  81. data/test/unit/bio/util/test_contingency_table.rb +7 -7
  82. metadata +53 -6
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # = bio/command.rb - general methods for external command execution
3
3
  #
4
- # Copyright:: Copyright (C) 2003-2008
4
+ # Copyright:: Copyright (C) 2003-2010
5
5
  # Naohisa Goto <ng@bioruby.org>,
6
6
  # Toshiaki Katayama <k@bioruby.org>
7
7
  # License:: The Ruby License
@@ -34,6 +34,59 @@ module Command
34
34
 
35
35
  module_function
36
36
 
37
+ # *CAUTION* Bio::Command INTERNAL USE ONLY.
38
+ # Users must NOT use the method.
39
+ # The method will be removed when it is not needed.
40
+ #
41
+ # Checks if the program is running on Microsoft Windows.
42
+ # If Windows, returns true. Otherwise, returns false.
43
+ # Note that Cygwin is not treated as Windows.
44
+ #
45
+ # Known issues:
46
+ # * It might make a mistake in minor platforms/architectures/interpreters.
47
+ # * When running JRuby on Cygwin, the result is unknown.
48
+ # ---
49
+ # *Returns*:: true or false
50
+ def windows_platform?
51
+ case RUBY_PLATFORM
52
+ when /(?:mswin|bccwin|mingw)(?:32|64)/i
53
+ true
54
+ when /java/i
55
+ # Reference: Redmine's platform.rb
56
+ # http://www.redmine.org/projects/redmine/repository/revisions/1753/entry/trunk/lib/redmine/platform.rb
57
+ if /windows/i =~ (ENV['OS'] || ENV['os']).to_s then
58
+ true
59
+ else
60
+ false
61
+ end
62
+ else
63
+ false
64
+ end
65
+ end
66
+ private_class_method :windows_platform?
67
+
68
+ # *CAUTION* Bio::Command INTERNAL USE ONLY.
69
+ # Users must NOT use the method.
70
+ # The method will be removed when it is not needed.
71
+ #
72
+ # Checks if the OS does not support fork(2) system call.
73
+ # When not supported, it returns true.
74
+ # When supported or unknown, it returns false or nil.
75
+ #
76
+ # Known issues:
77
+ # * It might make a mistake in minor platforms/architectures/interpreters.
78
+ # ---
79
+ # *Returns*:: true, false or nil.
80
+ def no_fork?
81
+ if (defined?(@@no_fork) && @@no_fork) or
82
+ windows_platform? or /java/i =~ RUBY_PLATFORM then
83
+ true
84
+ else
85
+ false
86
+ end
87
+ end
88
+ private_class_method :no_fork?
89
+
37
90
  # Escape special characters in command line string for cmd.exe on Windows.
38
91
  # ---
39
92
  # *Arguments*:
@@ -66,8 +119,7 @@ module Command
66
119
  # * (required) _str_: String
67
120
  # *Returns*:: String object
68
121
  def escape_shell(str)
69
- case RUBY_PLATFORM
70
- when /mswin32|bccwin32/
122
+ if windows_platform? then
71
123
  escape_shell_windows(str)
72
124
  else
73
125
  escape_shell_unix(str)
@@ -80,8 +132,7 @@ module Command
80
132
  # * (required) _ary_: Array containing String objects
81
133
  # *Returns*:: String object
82
134
  def make_command_line(ary)
83
- case RUBY_PLATFORM
84
- when /mswin32|bccwin32/
135
+ if windows_platform? then
85
136
  make_command_line_windows(ary)
86
137
  else
87
138
  make_command_line_unix(ary)
@@ -130,8 +181,8 @@ module Command
130
181
  [ arg0 ]
131
182
  end
132
183
 
133
- # Executes the program. Automatically select popen for Windows
134
- # environment and fork for the others.
184
+ # Executes the program. Automatically select popen for Ruby 1.9 or
185
+ # Windows environment and fork for the others.
135
186
  # A block must be given. An IO object is passed to the block.
136
187
  #
137
188
  # Available options:
@@ -143,27 +194,62 @@ module Command
143
194
  # * (optional) _options_: Hash
144
195
  # *Returns*:: (undefined)
145
196
  def call_command(cmd, options = {}, &block) #:yields: io
146
- case RUBY_PLATFORM
147
- when /mswin32|bccwin32/
197
+ if RUBY_VERSION >= "1.9.0" then
198
+ return call_command_popen(cmd, options, &block)
199
+ elsif no_fork? then
148
200
  call_command_popen(cmd, options, &block)
149
201
  else
150
- call_command_fork(cmd, options, &block)
202
+ begin
203
+ call_command_fork(cmd, options, &block)
204
+ rescue NotImplementedError
205
+ # fork(2) not implemented
206
+ @@no_fork = true
207
+ call_command_popen(cmd, options, &block)
208
+ end
151
209
  end
152
210
  end
153
211
 
212
+ # This method is internally called from the call_command method.
213
+ # In normal case, use call_command, and do not call this method directly.
214
+ #
154
215
  # Executes the program via IO.popen for OS which doesn't support fork.
155
216
  # A block must be given. An IO object is passed to the block.
217
+ #
218
+ # See the document of call_command for available options.
219
+ #
220
+ # Note for Ruby 1.8:
221
+ # In Ruby 1.8, although shell unsafe characters are escaped.
222
+ # If inescapable characters exists, it raises RuntimeError.
223
+ # So, call_command_fork is normally recommended.
224
+ #
225
+ # Note for Ruby 1.9:
226
+ # In Ruby 1.9, call_command_popen is safe and robust enough, and is the
227
+ # recommended way, because IO.popen is improved to get a command-line
228
+ # as an array without calling shell.
229
+ #
156
230
  # ---
157
231
  # *Arguments*:
158
232
  # * (required) _cmd_: Array containing String objects
159
233
  # * (optional) _options_: Hash
160
234
  # *Returns*:: (undefined)
161
235
  def call_command_popen(cmd, options = {})
236
+ if RUBY_VERSION >= "1.9.0" then
237
+ # For Ruby 1.9 or later, using command line array with options.
238
+ dir = options[:chdir]
239
+ cmd = safe_command_line_array(cmd)
240
+ if dir then
241
+ cmd = cmd + [ { :chdir => dir } ]
242
+ end
243
+ r = IO.popen(cmd, "r+") do |io|
244
+ yield io
245
+ end
246
+ return r
247
+ end
248
+ # For Ruby 1.8, using command line string.
162
249
  str = make_command_line(cmd)
163
250
  # processing options
164
251
  if dir = options[:chdir] then
165
- case RUBY_PLATFORM
166
- when /mswin32|bccwin32/
252
+ if windows_platform?
167
253
  # Unix-like dir separator is changed to Windows dir separator
168
254
  # by using String#gsub.
169
255
  dirstr = dir.gsub(/\//, "\\")
@@ -182,11 +268,24 @@ module Command
182
268
  end
183
269
  end
184
270
 
271
+ # This method is internally called from the call_command method.
272
+ # In normal case, use call_command, and do not call this method directly.
273
+ #
185
274
  # Executes the program via fork (by using IO.popen("-")) and exec.
186
275
  # A block must be given. An IO object is passed to the block.
187
276
  #
188
- # From the view point of security, this method is recommended
189
- # rather than call_command_popen.
277
+ # See the document of call_command for available options.
278
+ #
279
+ # Note for Ruby 1.8:
280
+ # In Ruby 1.8, from the view point of security, this method is recommended
281
+ # rather than call_command_popen. However, this method might have problems
282
+ # with multi-threads.
283
+ #
284
+ # Note for Ruby 1.9:
285
+ # In Ruby 1.9, this method can not be used, because Thread.critical is
286
+ # removed. In Ruby 1.9, call_command_popen is safe and robust enough, and
287
+ # is the recommended way, because IO.popen is improved to get a
288
+ # command-line as an array without calling shell.
190
289
  #
191
290
  # ---
192
291
  # *Arguments*:
@@ -196,12 +295,17 @@ module Command
196
295
  def call_command_fork(cmd, options = {})
197
296
  dir = options[:chdir]
198
297
  cmd = safe_command_line_array(cmd)
298
+ begin
299
+ tc, Thread.critical, flag0, flag1 = Thread.critical, true, true, true
199
300
  IO.popen("-", "r+") do |io|
200
301
  if io then
201
302
  # parent
303
+ flag0, Thread.critical, flag1 = false, tc, false
202
304
  yield io
203
305
  else
204
306
  # child
307
+ Thread.critical = true # for safety, though already true
308
+ GC.disable
205
309
  # chdir to options[:chdir] if available
206
310
  begin
207
311
  Dir.chdir(dir) if dir
@@ -218,6 +322,11 @@ module Command
218
322
  Process.exit!(1)
219
323
  end
220
324
  end
325
+ ensure
326
+ # When IO.popen("-") raises error, Thread.critical will be set here.
327
+ Thread.critical = tc if flag0 or flag1
328
+ #warn 'Thread.critical might have wrong value.' if flag0 != flag1
329
+ end
221
330
  end
222
331
 
223
332
  # Executes the program via Open3.popen3
@@ -240,7 +349,8 @@ module Command
240
349
  # waits the program termination, and returns the output data printed to the
241
350
  # standard output as a string.
242
351
  #
243
- # Automatically select popen for Windows environment and fork for the others.
352
+ # Automatically select popen for Ruby 1.9 or Windows environment and
353
+ # fork for the others.
244
354
  #
245
355
  # Available options:
246
356
  # :chdir => "path" : changes working directory to the specified path.
@@ -252,19 +362,32 @@ module Command
252
362
  # * (optional) _options_: Hash
253
363
  # *Returns*:: String or nil
254
364
  def query_command(cmd, query = nil, options = {})
255
- case RUBY_PLATFORM
256
- when /mswin32|bccwin32/
365
+ if RUBY_VERSION >= "1.9.0" then
366
+ return query_command_popen(cmd, query, options)
367
+ elsif no_fork? then
257
368
  query_command_popen(cmd, query, options)
258
369
  else
259
- query_command_fork(cmd, query, options)
370
+ begin
371
+ query_command_fork(cmd, query, options)
372
+ rescue NotImplementedError
373
+ # fork(2) not implemented
374
+ @@no_fork = true
375
+ query_command_fork(cmd, query, options)
376
+ end
260
377
  end
261
378
  end
262
379
 
380
+ # This method is internally called from the query_command method.
381
+ # In normal case, use query_command, and do not call this method directly.
382
+ #
263
383
  # Executes the program with the query (String) given to the standard input,
264
384
  # waits the program termination, and returns the output data printed to the
265
385
  # standard output as a string.
266
386
  #
267
- # IO.popen is used for OS which doesn't support fork.
387
+ # See the document of query_command for available options.
388
+ #
389
+ # See the document of call_command_popen for the security and Ruby
390
+ # version specific issues.
268
391
  #
269
392
  # ---
270
393
  # *Arguments*:
@@ -283,14 +406,19 @@ module Command
283
406
  ret
284
407
  end
285
408
 
409
+ # This method is internally called from the query_command method.
410
+ # In normal case, use query_command, and do not call this method directly.
411
+ #
286
412
  # Executes the program with the query (String) given to the standard input,
287
413
  # waits the program termination, and returns the output data printed to the
288
414
  # standard output as a string.
289
415
  #
290
416
  # Fork (by using IO.popen("-")) and exec is used to execute the program.
291
417
  #
292
- # From the view point of security, this method is recommended
293
- # rather than query_command_popen.
418
+ # See the document of query_command for available options.
419
+ #
420
+ # See the document of call_command_fork for the security and Ruby
421
+ # version specific issues.
294
422
  #
295
423
  # ---
296
424
  # *Arguments*:
@@ -254,7 +254,17 @@ module Bio
254
254
  label_data.each_line do |line|
255
255
  ma << line.strip.split(/\s+/).map {|x| x.to_f }
256
256
  end
257
- @data['matrix'] = Matrix[*ma]
257
+ ma_len = ma.size
258
+ ma.each do |row|
259
+ row_size = row.size
260
+ if row_size < ma_len
261
+ (row_size..ma_len-1).each do |i|
262
+ row[i] = ma[i][row_size-1]
263
+ end
264
+ end
265
+ end
266
+ mat = Matrix[*ma]
267
+ @data['matrix'] = mat
258
268
  end
259
269
  end
260
270
 
@@ -188,7 +188,7 @@ class SPTR < EMBLDB
188
188
  def gn
189
189
  unless @data['GN']
190
190
  case fetch('GN')
191
- when /Name=/,/ORFNames=/
191
+ when /Name=/,/ORFNames=/,/OrderedLocusNames=/,/Synonyms=/
192
192
  @data['GN'] = gn_uniprot_parser
193
193
  else
194
194
  @data['GN'] = gn_old_parser
@@ -126,10 +126,14 @@ module Bio
126
126
  # http://www.ncbi.nlm.nih.gov/BLAST/fasta.shtml
127
127
  #
128
128
  # * Frequently Asked Questions: Indexing of Sequence Identifiers (by Warren R. Gish.)
129
+ # (Dead link. Please find in http://web.archive.org/ ).
129
130
  # http://blast.wustl.edu/doc/FAQ-Indexing.html#Identifiers
130
131
  #
131
- # * README.formatdb
132
- # ftp://ftp.ncbi.nih.gov/blast/documents/README.formatdb
132
+ # * Program Parameters for formatdb and fastacmd (by Tao Tao)
133
+ # http://www.ncbi.nlm.nih.gov/staff/tao/URLAPI/formatdb_fastacmd.html#t1.1
134
+ #
135
+ # * Formatdb README
136
+ # ftp://ftp.ncbi.nih.gov/blast/documents/formatdb.html
133
137
  #
134
138
  class FastaDefline
135
139
 
@@ -140,6 +144,7 @@ module Bio
140
144
  'emb' => [ 'acc_version', 'locus' ], # EMBL
141
145
  'dbj' => [ 'acc_version', 'locus' ], # DDBJ
142
146
  'sp' => [ 'accession', 'entry_id' ], # SWISS-PROT
147
+ 'tr' => [ 'accession', 'entry_id' ], # TREMBL
143
148
  'pdb' => [ 'entry_id', 'chain' ], # PDB
144
149
  'bbs' => [ 'number' ], # GenInfo Backbone Id
145
150
  'gnl' => [ 'database' , 'entry_id' ], # General database identifier
@@ -95,6 +95,30 @@ module Bio
95
95
  data[n]
96
96
  end
97
97
 
98
+ # Returns the data as a Bio::Sequence object.
99
+ # In the returned sequence object, the length of the sequence is zero,
100
+ # and the numeric data is stored to the Bio::Sequence#quality_scores
101
+ # attirbute.
102
+ #
103
+ # Because the meaning of the numeric data is unclear,
104
+ # Bio::Sequence#quality_score_type is not set by default.
105
+ #
106
+ # Note: If you modify the returned Bio::Sequence object,
107
+ # the sequence or definition in this FastaNumericFormat object
108
+ # might also be changed (but not always be changed)
109
+ # because of efficiency.
110
+ #
111
+ # ---
112
+ # *Arguments*:
113
+ # *Returns*:: (Bio::Sequence) sequence object
114
+ def to_biosequence
115
+ s = Bio::Sequence.adapter(self,
116
+ Bio::Sequence::Adapter::FastaNumericFormat)
117
+ s.seq = Bio::Sequence::Generic.new('')
118
+ s
119
+ end
120
+ alias to_seq to_biosequence
121
+
98
122
  undef query, blast, fasta, seq, naseq, nalen, aaseq, aalen
99
123
 
100
124
  end #class FastaNumericFormat
@@ -0,0 +1,29 @@
1
+ #
2
+ # = bio/db/fasta/qual_to_biosequence.rb - Bio::FastaNumericFormat to Bio::Sequence adapter module
3
+ #
4
+ # Copyright:: Copyright (C) 2010
5
+ # Naohisa Goto <ng@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+
9
+ require 'bio/sequence'
10
+ require 'bio/sequence/adapter'
11
+ require 'bio/db/fasta/fasta_to_biosequence'
12
+
13
+ # Internal use only. Normal users should not use this module.
14
+ #
15
+ # Bio::FastaNumericFormat to Bio::Sequence adapter module.
16
+ # It is internally used in Bio::FastaNumericFormat#to_biosequence.
17
+ #
18
+ module Bio::Sequence::Adapter::FastaNumericFormat
19
+
20
+ extend Bio::Sequence::Adapter
21
+
22
+ include Bio::Sequence::Adapter::FastaFormat
23
+
24
+ private
25
+
26
+ def_biosequence_adapter :quality_scores, :data
27
+
28
+ end #module Bio::Sequence::Adapter::FastaNumericFormat
29
+
@@ -640,6 +640,21 @@ class Fastq
640
640
  Bio::Sequence.adapter(self, Bio::Sequence::Adapter::Fastq)
641
641
  end
642
642
 
643
+ # Masks low quality sequence regions.
644
+ # For each sequence position, if the quality score is smaller than
645
+ # the threshold, the sequence in the position is replaced with
646
+ # <em>mask_char</em>.
647
+ #
648
+ # Note: This method does not care quality_score_type.
649
+ # ---
650
+ # *Arguments*:
651
+ # * (required) <em>threshold</em> : (Numeric) threshold
652
+ # * (optional) <em>mask_char</em> : (String) character used for masking
653
+ # *Returns*:: Bio::Sequence object
654
+ def mask(threshold, mask_char = 'n')
655
+ to_biosequence.mask_with_quality_score(threshold, mask_char)
656
+ end
657
+
643
658
  end #class Fastq
644
659
 
645
660
  end #module Bio
@@ -293,8 +293,8 @@ class GO
293
293
 
294
294
  # Bio::GO::GeneAssociation#to_str -> a line of gene_association file.
295
295
  def to_str
296
- return [@db, @db_object_id, @db_object_symbol, @quialifier, @goid,
297
- @qualifier.join("|"), @evidence, @with.join("|"), @aspect,
296
+ return [@db, @db_object_id, @db_object_symbol, @qualifier, @goid,
297
+ @db_reference.join("|"), @evidence, @with.join("|"), @aspect,
298
298
  @db_object_name, @db_object_synonym.join("|"), @db_object_type,
299
299
  @taxon, @date, @assigned_by].join("\t")
300
300
  end
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # = bio/db/kegg/common.rb - Common methods for KEGG database classes
3
3
  #
4
- # Copyright:: Copyright (C) 2003-2007 Toshiaki Katayama <k@bioruby.org>
4
+ # Copyright:: Copyright (C) 2001-2007 Toshiaki Katayama <k@bioruby.org>
5
5
  # Copyright:: Copyright (C) 2003 Masumi Itoh <m@bioruby.org>
6
6
  # Copyright:: Copyright (C) 2009 Kozo Nishida <kozo-ni@is.naist.jp>
7
7
  # License:: The Ruby License
@@ -23,6 +23,69 @@ class KEGG
23
23
  # Namespace for methods commonly used in the Bio::KEGG::* classes.
24
24
  module Common
25
25
 
26
+ # The module provides references method.
27
+ module References
28
+ # REFERENCE -- Returns contents of the REFERENCE records as an Array of
29
+ # Bio::Reference objects.
30
+ def references
31
+ unless @data['REFERENCE']
32
+ ary = []
33
+ toptag2array(get('REFERENCE')).each do |ref|
34
+ hash = Hash.new
35
+ subtag2array(ref).each do |field|
36
+ case tag_get(field)
37
+ when /REFERENCE/
38
+ cmnt = tag_cut(field).chomp
39
+ if /^\s*PMID\:(\d+)\s*/ =~ cmnt then
40
+ hash['pubmed'] = $1
41
+ cmnt = $'
42
+ end
43
+ if cmnt and !cmnt.empty? then
44
+ hash['comments'] ||= []
45
+ hash['comments'].push(cmnt)
46
+ end
47
+ when /AUTHORS/
48
+ authors = truncate(tag_cut(field))
49
+ authors = authors.split(/\, /)
50
+ authors[-1] = authors[-1].split(/\s+and\s+/) if authors[-1]
51
+ authors = authors.flatten.map { |a| a.sub(',', ', ') }
52
+ hash['authors'] = authors
53
+ when /TITLE/
54
+ hash['title'] = truncate(tag_cut(field))
55
+ when /JOURNAL/
56
+ journal = truncate(tag_cut(field))
57
+ case journal
58
+ # KEGG style
59
+ when /(.*) (\d*(?:\([^\)]+\))?)\:(\d+\-\d+) \((\d+)\)$/
60
+ hash['journal'] = $1
61
+ hash['volume'] = $2
62
+ hash['pages'] = $3
63
+ hash['year'] = $4
64
+ # old KEGG style
65
+ when /(.*) (\d+):(\d+\-\d+) \((\d+)\) \[UI:(\d+)\]$/
66
+ hash['journal'] = $1
67
+ hash['volume'] = $2
68
+ hash['pages'] = $3
69
+ hash['year'] = $4
70
+ hash['medline'] = $5
71
+ # Only journal name and year are available
72
+ when /(.*) \((\d+)\)$/
73
+ hash['journal'] = $1
74
+ hash['year'] = $2
75
+ else
76
+ hash['journal'] = journal
77
+ end
78
+ end
79
+ end
80
+ ary.push(Reference.new(hash))
81
+ end
82
+ @data['REFERENCE'] = ary #.extend(Bio::References::BackwardCompatibility)
83
+
84
+ end
85
+ @data['REFERENCE']
86
+ end
87
+ end #module References
88
+
26
89
  # The module providing dblinks_as_hash methods.
27
90
  #
28
91
  # Bio::KEGG::* internal use only.
@@ -54,7 +117,8 @@ class KEGG
54
117
  unless defined? @pathways_as_hash then
55
118
  hash = {}
56
119
  pathways_as_strings.each do |line|
57
- sign, entry_id, name = line.split(/\s+/, 3)
120
+ line = line.sub(/\APATH\:\s+/, '')
121
+ entry_id, name = line.split(/\s+/, 2)
58
122
  hash[entry_id] = name
59
123
  end
60
124
  @pathways_as_hash = hash
@@ -72,9 +136,9 @@ class KEGG
72
136
  def orthologs_as_hash
73
137
  unless defined? @orthologs_as_hash
74
138
  kos = {}
75
- orthologs_as_strings.each do |ko|
76
- entry = ko.scan(/K[0-9]{5}/)[0]
77
- sign, entry_id, definition = ko.split(/\s+/, 3)
139
+ orthologs_as_strings.each do |line|
140
+ ko = line.sub(/\AKO\:\s+/, '')
141
+ entry_id, definition = ko.split(/\s+/, 2)
78
142
  kos[entry_id] = definition
79
143
  end
80
144
  @orthologs_as_hash = kos
@@ -106,6 +170,46 @@ class KEGG
106
170
  end
107
171
  end #module GenesAsHash
108
172
 
173
+ # This module provides modules_as_hash method.
174
+ #
175
+ # Bio::KEGG::* internal use only.
176
+ module ModulesAsHash
177
+ # Returns MODULE field as a Hash.
178
+ # Each key of the hash is KEGG MODULE ID,
179
+ # and each value is the name of the Pathway Module.
180
+ # ---
181
+ # *Returns*:: Hash
182
+ def modules_as_hash
183
+ unless defined? @modules_s_as_hash then
184
+ hash = {}
185
+ modules_as_strings.each do |line|
186
+ entry_id, name = line.split(/\s+/, 2)
187
+ hash[entry_id] = name
188
+ end
189
+ @modules_as_hash = hash
190
+ end
191
+ @modules_as_hash
192
+ end
193
+ end #module ModulesAsHash
194
+
195
+ # This module provides strings_as_hash private method.
196
+ #
197
+ # Bio::KEGG::* internal use only.
198
+ module StringsAsHash
199
+ # (Private) Creates a hash from lines.
200
+ # Each line is consisted of two components, ID and description,
201
+ # separated with spaces. IDs must be unique with each other.
202
+ def strings_as_hash(lines)
203
+ hash = {}
204
+ lines.each do |line|
205
+ entry_id, definition = line.split(/\s+/, 2)
206
+ hash[entry_id] = definition
207
+ end
208
+ return hash
209
+ end
210
+ private :strings_as_hash
211
+ end #module StringsAsHash
212
+
109
213
  end #module Common
110
214
  end #class KEGG
111
215
  end #module Bio