bio 1.4.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. data/ChangeLog +1712 -0
  2. data/KNOWN_ISSUES.rdoc +11 -1
  3. data/README.rdoc +3 -2
  4. data/RELEASE_NOTES.rdoc +65 -127
  5. data/bioruby.gemspec +38 -2
  6. data/doc/RELEASE_NOTES-1.4.0.rdoc +167 -0
  7. data/doc/Tutorial.rd +74 -16
  8. data/doc/Tutorial.rd.html +68 -16
  9. data/lib/bio.rb +2 -0
  10. data/lib/bio/appl/clustalw/report.rb +18 -0
  11. data/lib/bio/appl/paml/codeml/report.rb +579 -21
  12. data/lib/bio/command.rb +149 -21
  13. data/lib/bio/db/aaindex.rb +11 -1
  14. data/lib/bio/db/embl/sptr.rb +1 -1
  15. data/lib/bio/db/fasta/defline.rb +7 -2
  16. data/lib/bio/db/fasta/qual.rb +24 -0
  17. data/lib/bio/db/fasta/qual_to_biosequence.rb +29 -0
  18. data/lib/bio/db/fastq.rb +15 -0
  19. data/lib/bio/db/go.rb +2 -2
  20. data/lib/bio/db/kegg/common.rb +109 -5
  21. data/lib/bio/db/kegg/genes.rb +61 -15
  22. data/lib/bio/db/kegg/genome.rb +43 -38
  23. data/lib/bio/db/kegg/module.rb +158 -0
  24. data/lib/bio/db/kegg/orthology.rb +40 -1
  25. data/lib/bio/db/kegg/pathway.rb +254 -0
  26. data/lib/bio/db/medline.rb +6 -2
  27. data/lib/bio/io/flatfile/autodetection.rb +6 -0
  28. data/lib/bio/location.rb +39 -0
  29. data/lib/bio/reference.rb +24 -0
  30. data/lib/bio/sequence.rb +2 -0
  31. data/lib/bio/sequence/adapter.rb +1 -0
  32. data/lib/bio/sequence/format.rb +14 -0
  33. data/lib/bio/sequence/sequence_masker.rb +95 -0
  34. data/lib/bio/tree.rb +4 -4
  35. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +5 -0
  36. data/lib/bio/version.rb +1 -1
  37. data/setup.rb +5 -0
  38. data/test/data/KEGG/K02338.orthology +180 -52
  39. data/test/data/KEGG/M00118.module +44 -0
  40. data/test/data/KEGG/T00005.genome +140 -0
  41. data/test/data/KEGG/T00070.genome +34 -0
  42. data/test/data/KEGG/b0529.gene +47 -0
  43. data/test/data/KEGG/ec00072.pathway +23 -0
  44. data/test/data/KEGG/hsa00790.pathway +59 -0
  45. data/test/data/KEGG/ko00312.pathway +16 -0
  46. data/test/data/KEGG/map00030.pathway +37 -0
  47. data/test/data/KEGG/map00052.pathway +13 -0
  48. data/test/data/KEGG/rn00250.pathway +114 -0
  49. data/test/data/clustalw/example1.aln +58 -0
  50. data/test/data/go/selected_component.ontology +12 -0
  51. data/test/data/go/selected_gene_association.sgd +31 -0
  52. data/test/data/go/selected_wikipedia2go +13 -0
  53. data/test/data/medline/20146148_modified.medline +54 -0
  54. data/test/data/paml/codeml/models/aa.aln +26 -0
  55. data/test/data/paml/codeml/models/aa.dnd +13 -0
  56. data/test/data/paml/codeml/models/aa.ph +13 -0
  57. data/test/data/paml/codeml/models/alignment.phy +49 -0
  58. data/test/data/paml/codeml/models/results0-3.txt +312 -0
  59. data/test/data/paml/codeml/models/results7-8.txt +340 -0
  60. data/test/functional/bio/io/test_togows.rb +8 -8
  61. data/test/functional/bio/test_command.rb +7 -6
  62. data/test/unit/bio/appl/clustalw/test_report.rb +80 -0
  63. data/test/unit/bio/appl/paml/codeml/test_rates.rb +6 -6
  64. data/test/unit/bio/appl/paml/codeml/test_report.rb +231 -24
  65. data/test/unit/bio/appl/paml/codeml/test_report_single.rb +46 -0
  66. data/test/unit/bio/db/embl/test_sptr.rb +1 -1
  67. data/test/unit/bio/db/fasta/test_defline.rb +160 -0
  68. data/test/unit/bio/db/fasta/test_defline_misc.rb +490 -0
  69. data/test/unit/bio/db/kegg/test_genes.rb +281 -1
  70. data/test/unit/bio/db/kegg/test_genome.rb +408 -0
  71. data/test/unit/bio/db/kegg/test_module.rb +246 -0
  72. data/test/unit/bio/db/kegg/test_orthology.rb +95 -0
  73. data/test/unit/bio/db/kegg/test_pathway.rb +1250 -0
  74. data/test/unit/bio/db/test_aaindex.rb +8 -7
  75. data/test/unit/bio/db/test_fastq.rb +36 -0
  76. data/test/unit/bio/db/test_go.rb +171 -0
  77. data/test/unit/bio/db/test_medline.rb +148 -0
  78. data/test/unit/bio/db/test_qual.rb +9 -2
  79. data/test/unit/bio/sequence/test_sequence_masker.rb +169 -0
  80. data/test/unit/bio/test_tree.rb +260 -1
  81. data/test/unit/bio/util/test_contingency_table.rb +7 -7
  82. metadata +53 -6
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # = bio/command.rb - general methods for external command execution
3
3
  #
4
- # Copyright:: Copyright (C) 2003-2008
4
+ # Copyright:: Copyright (C) 2003-2010
5
5
  # Naohisa Goto <ng@bioruby.org>,
6
6
  # Toshiaki Katayama <k@bioruby.org>
7
7
  # License:: The Ruby License
@@ -34,6 +34,59 @@ module Command
34
34
 
35
35
  module_function
36
36
 
37
+ # *CAUTION* Bio::Command INTERNAL USE ONLY.
38
+ # Users must NOT use the method.
39
+ # The method will be removed when it is not needed.
40
+ #
41
+ # Checks if the program is running on Microsoft Windows.
42
+ # If Windows, returns true. Otherwise, returns false.
43
+ # Note that Cygwin is not treated as Windows.
44
+ #
45
+ # Known issues:
46
+ # * It might make a mistake in minor platforms/architectures/interpreters.
47
+ # * When running JRuby on Cygwin, the result is unknown.
48
+ # ---
49
+ # *Returns*:: true or false
50
+ def windows_platform?
51
+ case RUBY_PLATFORM
52
+ when /(?:mswin|bccwin|mingw)(?:32|64)/i
53
+ true
54
+ when /java/i
55
+ # Reference: Redmine's platform.rb
56
+ # http://www.redmine.org/projects/redmine/repository/revisions/1753/entry/trunk/lib/redmine/platform.rb
57
+ if /windows/i =~ (ENV['OS'] || ENV['os']).to_s then
58
+ true
59
+ else
60
+ false
61
+ end
62
+ else
63
+ false
64
+ end
65
+ end
66
+ private_class_method :windows_platform?
67
+
68
+ # *CAUTION* Bio::Command INTERNAL USE ONLY.
69
+ # Users must NOT use the method.
70
+ # The method will be removed when it is not needed.
71
+ #
72
+ # Checks if the OS does not support fork(2) system call.
73
+ # When not supported, it returns true.
74
+ # When supported or unknown, it returns false or nil.
75
+ #
76
+ # Known issues:
77
+ # * It might make a mistake in minor platforms/architectures/interpreters.
78
+ # ---
79
+ # *Returns*:: true, false or nil.
80
+ def no_fork?
81
+ if (defined?(@@no_fork) && @@no_fork) or
82
+ windows_platform? or /java/i =~ RUBY_PLATFORM then
83
+ true
84
+ else
85
+ false
86
+ end
87
+ end
88
+ private_class_method :no_fork?
89
+
37
90
  # Escape special characters in command line string for cmd.exe on Windows.
38
91
  # ---
39
92
  # *Arguments*:
@@ -66,8 +119,7 @@ module Command
66
119
  # * (required) _str_: String
67
120
  # *Returns*:: String object
68
121
  def escape_shell(str)
69
- case RUBY_PLATFORM
70
- when /mswin32|bccwin32/
122
+ if windows_platform? then
71
123
  escape_shell_windows(str)
72
124
  else
73
125
  escape_shell_unix(str)
@@ -80,8 +132,7 @@ module Command
80
132
  # * (required) _ary_: Array containing String objects
81
133
  # *Returns*:: String object
82
134
  def make_command_line(ary)
83
- case RUBY_PLATFORM
84
- when /mswin32|bccwin32/
135
+ if windows_platform? then
85
136
  make_command_line_windows(ary)
86
137
  else
87
138
  make_command_line_unix(ary)
@@ -130,8 +181,8 @@ module Command
130
181
  [ arg0 ]
131
182
  end
132
183
 
133
- # Executes the program. Automatically select popen for Windows
134
- # environment and fork for the others.
184
+ # Executes the program. Automatically select popen for Ruby 1.9 or
185
+ # Windows environment and fork for the others.
135
186
  # A block must be given. An IO object is passed to the block.
136
187
  #
137
188
  # Available options:
@@ -143,27 +194,62 @@ module Command
143
194
  # * (optional) _options_: Hash
144
195
  # *Returns*:: (undefined)
145
196
  def call_command(cmd, options = {}, &block) #:yields: io
146
- case RUBY_PLATFORM
147
- when /mswin32|bccwin32/
197
+ if RUBY_VERSION >= "1.9.0" then
198
+ return call_command_popen(cmd, options, &block)
199
+ elsif no_fork? then
148
200
  call_command_popen(cmd, options, &block)
149
201
  else
150
- call_command_fork(cmd, options, &block)
202
+ begin
203
+ call_command_fork(cmd, options, &block)
204
+ rescue NotImplementedError
205
+ # fork(2) not implemented
206
+ @@no_fork = true
207
+ call_command_popen(cmd, options, &block)
208
+ end
151
209
  end
152
210
  end
153
211
 
212
+ # This method is internally called from the call_command method.
213
+ # In normal case, use call_command, and do not call this method directly.
214
+ #
154
215
  # Executes the program via IO.popen for OS which doesn't support fork.
155
216
  # A block must be given. An IO object is passed to the block.
217
+ #
218
+ # See the document of call_command for available options.
219
+ #
220
+ # Note for Ruby 1.8:
221
+ # In Ruby 1.8, although shell unsafe characters are escaped.
222
+ # If inescapable characters exists, it raises RuntimeError.
223
+ # So, call_command_fork is normally recommended.
224
+ #
225
+ # Note for Ruby 1.9:
226
+ # In Ruby 1.9, call_command_popen is safe and robust enough, and is the
227
+ # recommended way, because IO.popen is improved to get a command-line
228
+ # as an array without calling shell.
229
+ #
156
230
  # ---
157
231
  # *Arguments*:
158
232
  # * (required) _cmd_: Array containing String objects
159
233
  # * (optional) _options_: Hash
160
234
  # *Returns*:: (undefined)
161
235
  def call_command_popen(cmd, options = {})
236
+ if RUBY_VERSION >= "1.9.0" then
237
+ # For Ruby 1.9 or later, using command line array with options.
238
+ dir = options[:chdir]
239
+ cmd = safe_command_line_array(cmd)
240
+ if dir then
241
+ cmd = cmd + [ { :chdir => dir } ]
242
+ end
243
+ r = IO.popen(cmd, "r+") do |io|
244
+ yield io
245
+ end
246
+ return r
247
+ end
248
+ # For Ruby 1.8, using command line string.
162
249
  str = make_command_line(cmd)
163
250
  # processing options
164
251
  if dir = options[:chdir] then
165
- case RUBY_PLATFORM
166
- when /mswin32|bccwin32/
252
+ if windows_platform?
167
253
  # Unix-like dir separator is changed to Windows dir separator
168
254
  # by using String#gsub.
169
255
  dirstr = dir.gsub(/\//, "\\")
@@ -182,11 +268,24 @@ module Command
182
268
  end
183
269
  end
184
270
 
271
+ # This method is internally called from the call_command method.
272
+ # In normal case, use call_command, and do not call this method directly.
273
+ #
185
274
  # Executes the program via fork (by using IO.popen("-")) and exec.
186
275
  # A block must be given. An IO object is passed to the block.
187
276
  #
188
- # From the view point of security, this method is recommended
189
- # rather than call_command_popen.
277
+ # See the document of call_command for available options.
278
+ #
279
+ # Note for Ruby 1.8:
280
+ # In Ruby 1.8, from the view point of security, this method is recommended
281
+ # rather than call_command_popen. However, this method might have problems
282
+ # with multi-threads.
283
+ #
284
+ # Note for Ruby 1.9:
285
+ # In Ruby 1.9, this method can not be used, because Thread.critical is
286
+ # removed. In Ruby 1.9, call_command_popen is safe and robust enough, and
287
+ # is the recommended way, because IO.popen is improved to get a
288
+ # command-line as an array without calling shell.
190
289
  #
191
290
  # ---
192
291
  # *Arguments*:
@@ -196,12 +295,17 @@ module Command
196
295
  def call_command_fork(cmd, options = {})
197
296
  dir = options[:chdir]
198
297
  cmd = safe_command_line_array(cmd)
298
+ begin
299
+ tc, Thread.critical, flag0, flag1 = Thread.critical, true, true, true
199
300
  IO.popen("-", "r+") do |io|
200
301
  if io then
201
302
  # parent
303
+ flag0, Thread.critical, flag1 = false, tc, false
202
304
  yield io
203
305
  else
204
306
  # child
307
+ Thread.critical = true # for safety, though already true
308
+ GC.disable
205
309
  # chdir to options[:chdir] if available
206
310
  begin
207
311
  Dir.chdir(dir) if dir
@@ -218,6 +322,11 @@ module Command
218
322
  Process.exit!(1)
219
323
  end
220
324
  end
325
+ ensure
326
+ # When IO.popen("-") raises error, Thread.critical will be set here.
327
+ Thread.critical = tc if flag0 or flag1
328
+ #warn 'Thread.critical might have wrong value.' if flag0 != flag1
329
+ end
221
330
  end
222
331
 
223
332
  # Executes the program via Open3.popen3
@@ -240,7 +349,8 @@ module Command
240
349
  # waits the program termination, and returns the output data printed to the
241
350
  # standard output as a string.
242
351
  #
243
- # Automatically select popen for Windows environment and fork for the others.
352
+ # Automatically select popen for Ruby 1.9 or Windows environment and
353
+ # fork for the others.
244
354
  #
245
355
  # Available options:
246
356
  # :chdir => "path" : changes working directory to the specified path.
@@ -252,19 +362,32 @@ module Command
252
362
  # * (optional) _options_: Hash
253
363
  # *Returns*:: String or nil
254
364
  def query_command(cmd, query = nil, options = {})
255
- case RUBY_PLATFORM
256
- when /mswin32|bccwin32/
365
+ if RUBY_VERSION >= "1.9.0" then
366
+ return query_command_popen(cmd, query, options)
367
+ elsif no_fork? then
257
368
  query_command_popen(cmd, query, options)
258
369
  else
259
- query_command_fork(cmd, query, options)
370
+ begin
371
+ query_command_fork(cmd, query, options)
372
+ rescue NotImplementedError
373
+ # fork(2) not implemented
374
+ @@no_fork = true
375
+ query_command_fork(cmd, query, options)
376
+ end
260
377
  end
261
378
  end
262
379
 
380
+ # This method is internally called from the query_command method.
381
+ # In normal case, use query_command, and do not call this method directly.
382
+ #
263
383
  # Executes the program with the query (String) given to the standard input,
264
384
  # waits the program termination, and returns the output data printed to the
265
385
  # standard output as a string.
266
386
  #
267
- # IO.popen is used for OS which doesn't support fork.
387
+ # See the document of query_command for available options.
388
+ #
389
+ # See the document of call_command_popen for the security and Ruby
390
+ # version specific issues.
268
391
  #
269
392
  # ---
270
393
  # *Arguments*:
@@ -283,14 +406,19 @@ module Command
283
406
  ret
284
407
  end
285
408
 
409
+ # This method is internally called from the query_command method.
410
+ # In normal case, use query_command, and do not call this method directly.
411
+ #
286
412
  # Executes the program with the query (String) given to the standard input,
287
413
  # waits the program termination, and returns the output data printed to the
288
414
  # standard output as a string.
289
415
  #
290
416
  # Fork (by using IO.popen("-")) and exec is used to execute the program.
291
417
  #
292
- # From the view point of security, this method is recommended
293
- # rather than query_command_popen.
418
+ # See the document of query_command for available options.
419
+ #
420
+ # See the document of call_command_fork for the security and Ruby
421
+ # version specific issues.
294
422
  #
295
423
  # ---
296
424
  # *Arguments*:
@@ -254,7 +254,17 @@ module Bio
254
254
  label_data.each_line do |line|
255
255
  ma << line.strip.split(/\s+/).map {|x| x.to_f }
256
256
  end
257
- @data['matrix'] = Matrix[*ma]
257
+ ma_len = ma.size
258
+ ma.each do |row|
259
+ row_size = row.size
260
+ if row_size < ma_len
261
+ (row_size..ma_len-1).each do |i|
262
+ row[i] = ma[i][row_size-1]
263
+ end
264
+ end
265
+ end
266
+ mat = Matrix[*ma]
267
+ @data['matrix'] = mat
258
268
  end
259
269
  end
260
270
 
@@ -188,7 +188,7 @@ class SPTR < EMBLDB
188
188
  def gn
189
189
  unless @data['GN']
190
190
  case fetch('GN')
191
- when /Name=/,/ORFNames=/
191
+ when /Name=/,/ORFNames=/,/OrderedLocusNames=/,/Synonyms=/
192
192
  @data['GN'] = gn_uniprot_parser
193
193
  else
194
194
  @data['GN'] = gn_old_parser
@@ -126,10 +126,14 @@ module Bio
126
126
  # http://www.ncbi.nlm.nih.gov/BLAST/fasta.shtml
127
127
  #
128
128
  # * Frequently Asked Questions: Indexing of Sequence Identifiers (by Warren R. Gish.)
129
+ # (Dead link. Please find in http://web.archive.org/ ).
129
130
  # http://blast.wustl.edu/doc/FAQ-Indexing.html#Identifiers
130
131
  #
131
- # * README.formatdb
132
- # ftp://ftp.ncbi.nih.gov/blast/documents/README.formatdb
132
+ # * Program Parameters for formatdb and fastacmd (by Tao Tao)
133
+ # http://www.ncbi.nlm.nih.gov/staff/tao/URLAPI/formatdb_fastacmd.html#t1.1
134
+ #
135
+ # * Formatdb README
136
+ # ftp://ftp.ncbi.nih.gov/blast/documents/formatdb.html
133
137
  #
134
138
  class FastaDefline
135
139
 
@@ -140,6 +144,7 @@ module Bio
140
144
  'emb' => [ 'acc_version', 'locus' ], # EMBL
141
145
  'dbj' => [ 'acc_version', 'locus' ], # DDBJ
142
146
  'sp' => [ 'accession', 'entry_id' ], # SWISS-PROT
147
+ 'tr' => [ 'accession', 'entry_id' ], # TREMBL
143
148
  'pdb' => [ 'entry_id', 'chain' ], # PDB
144
149
  'bbs' => [ 'number' ], # GenInfo Backbone Id
145
150
  'gnl' => [ 'database' , 'entry_id' ], # General database identifier
@@ -95,6 +95,30 @@ module Bio
95
95
  data[n]
96
96
  end
97
97
 
98
+ # Returns the data as a Bio::Sequence object.
99
+ # In the returned sequence object, the length of the sequence is zero,
100
+ # and the numeric data is stored to the Bio::Sequence#quality_scores
101
+ # attirbute.
102
+ #
103
+ # Because the meaning of the numeric data is unclear,
104
+ # Bio::Sequence#quality_score_type is not set by default.
105
+ #
106
+ # Note: If you modify the returned Bio::Sequence object,
107
+ # the sequence or definition in this FastaNumericFormat object
108
+ # might also be changed (but not always be changed)
109
+ # because of efficiency.
110
+ #
111
+ # ---
112
+ # *Arguments*:
113
+ # *Returns*:: (Bio::Sequence) sequence object
114
+ def to_biosequence
115
+ s = Bio::Sequence.adapter(self,
116
+ Bio::Sequence::Adapter::FastaNumericFormat)
117
+ s.seq = Bio::Sequence::Generic.new('')
118
+ s
119
+ end
120
+ alias to_seq to_biosequence
121
+
98
122
  undef query, blast, fasta, seq, naseq, nalen, aaseq, aalen
99
123
 
100
124
  end #class FastaNumericFormat
@@ -0,0 +1,29 @@
1
+ #
2
+ # = bio/db/fasta/qual_to_biosequence.rb - Bio::FastaNumericFormat to Bio::Sequence adapter module
3
+ #
4
+ # Copyright:: Copyright (C) 2010
5
+ # Naohisa Goto <ng@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+
9
+ require 'bio/sequence'
10
+ require 'bio/sequence/adapter'
11
+ require 'bio/db/fasta/fasta_to_biosequence'
12
+
13
+ # Internal use only. Normal users should not use this module.
14
+ #
15
+ # Bio::FastaNumericFormat to Bio::Sequence adapter module.
16
+ # It is internally used in Bio::FastaNumericFormat#to_biosequence.
17
+ #
18
+ module Bio::Sequence::Adapter::FastaNumericFormat
19
+
20
+ extend Bio::Sequence::Adapter
21
+
22
+ include Bio::Sequence::Adapter::FastaFormat
23
+
24
+ private
25
+
26
+ def_biosequence_adapter :quality_scores, :data
27
+
28
+ end #module Bio::Sequence::Adapter::FastaNumericFormat
29
+
@@ -640,6 +640,21 @@ class Fastq
640
640
  Bio::Sequence.adapter(self, Bio::Sequence::Adapter::Fastq)
641
641
  end
642
642
 
643
+ # Masks low quality sequence regions.
644
+ # For each sequence position, if the quality score is smaller than
645
+ # the threshold, the sequence in the position is replaced with
646
+ # <em>mask_char</em>.
647
+ #
648
+ # Note: This method does not care quality_score_type.
649
+ # ---
650
+ # *Arguments*:
651
+ # * (required) <em>threshold</em> : (Numeric) threshold
652
+ # * (optional) <em>mask_char</em> : (String) character used for masking
653
+ # *Returns*:: Bio::Sequence object
654
+ def mask(threshold, mask_char = 'n')
655
+ to_biosequence.mask_with_quality_score(threshold, mask_char)
656
+ end
657
+
643
658
  end #class Fastq
644
659
 
645
660
  end #module Bio
@@ -293,8 +293,8 @@ class GO
293
293
 
294
294
  # Bio::GO::GeneAssociation#to_str -> a line of gene_association file.
295
295
  def to_str
296
- return [@db, @db_object_id, @db_object_symbol, @quialifier, @goid,
297
- @qualifier.join("|"), @evidence, @with.join("|"), @aspect,
296
+ return [@db, @db_object_id, @db_object_symbol, @qualifier, @goid,
297
+ @db_reference.join("|"), @evidence, @with.join("|"), @aspect,
298
298
  @db_object_name, @db_object_synonym.join("|"), @db_object_type,
299
299
  @taxon, @date, @assigned_by].join("\t")
300
300
  end
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # = bio/db/kegg/common.rb - Common methods for KEGG database classes
3
3
  #
4
- # Copyright:: Copyright (C) 2003-2007 Toshiaki Katayama <k@bioruby.org>
4
+ # Copyright:: Copyright (C) 2001-2007 Toshiaki Katayama <k@bioruby.org>
5
5
  # Copyright:: Copyright (C) 2003 Masumi Itoh <m@bioruby.org>
6
6
  # Copyright:: Copyright (C) 2009 Kozo Nishida <kozo-ni@is.naist.jp>
7
7
  # License:: The Ruby License
@@ -23,6 +23,69 @@ class KEGG
23
23
  # Namespace for methods commonly used in the Bio::KEGG::* classes.
24
24
  module Common
25
25
 
26
+ # The module provides references method.
27
+ module References
28
+ # REFERENCE -- Returns contents of the REFERENCE records as an Array of
29
+ # Bio::Reference objects.
30
+ def references
31
+ unless @data['REFERENCE']
32
+ ary = []
33
+ toptag2array(get('REFERENCE')).each do |ref|
34
+ hash = Hash.new
35
+ subtag2array(ref).each do |field|
36
+ case tag_get(field)
37
+ when /REFERENCE/
38
+ cmnt = tag_cut(field).chomp
39
+ if /^\s*PMID\:(\d+)\s*/ =~ cmnt then
40
+ hash['pubmed'] = $1
41
+ cmnt = $'
42
+ end
43
+ if cmnt and !cmnt.empty? then
44
+ hash['comments'] ||= []
45
+ hash['comments'].push(cmnt)
46
+ end
47
+ when /AUTHORS/
48
+ authors = truncate(tag_cut(field))
49
+ authors = authors.split(/\, /)
50
+ authors[-1] = authors[-1].split(/\s+and\s+/) if authors[-1]
51
+ authors = authors.flatten.map { |a| a.sub(',', ', ') }
52
+ hash['authors'] = authors
53
+ when /TITLE/
54
+ hash['title'] = truncate(tag_cut(field))
55
+ when /JOURNAL/
56
+ journal = truncate(tag_cut(field))
57
+ case journal
58
+ # KEGG style
59
+ when /(.*) (\d*(?:\([^\)]+\))?)\:(\d+\-\d+) \((\d+)\)$/
60
+ hash['journal'] = $1
61
+ hash['volume'] = $2
62
+ hash['pages'] = $3
63
+ hash['year'] = $4
64
+ # old KEGG style
65
+ when /(.*) (\d+):(\d+\-\d+) \((\d+)\) \[UI:(\d+)\]$/
66
+ hash['journal'] = $1
67
+ hash['volume'] = $2
68
+ hash['pages'] = $3
69
+ hash['year'] = $4
70
+ hash['medline'] = $5
71
+ # Only journal name and year are available
72
+ when /(.*) \((\d+)\)$/
73
+ hash['journal'] = $1
74
+ hash['year'] = $2
75
+ else
76
+ hash['journal'] = journal
77
+ end
78
+ end
79
+ end
80
+ ary.push(Reference.new(hash))
81
+ end
82
+ @data['REFERENCE'] = ary #.extend(Bio::References::BackwardCompatibility)
83
+
84
+ end
85
+ @data['REFERENCE']
86
+ end
87
+ end #module References
88
+
26
89
  # The module providing dblinks_as_hash methods.
27
90
  #
28
91
  # Bio::KEGG::* internal use only.
@@ -54,7 +117,8 @@ class KEGG
54
117
  unless defined? @pathways_as_hash then
55
118
  hash = {}
56
119
  pathways_as_strings.each do |line|
57
- sign, entry_id, name = line.split(/\s+/, 3)
120
+ line = line.sub(/\APATH\:\s+/, '')
121
+ entry_id, name = line.split(/\s+/, 2)
58
122
  hash[entry_id] = name
59
123
  end
60
124
  @pathways_as_hash = hash
@@ -72,9 +136,9 @@ class KEGG
72
136
  def orthologs_as_hash
73
137
  unless defined? @orthologs_as_hash
74
138
  kos = {}
75
- orthologs_as_strings.each do |ko|
76
- entry = ko.scan(/K[0-9]{5}/)[0]
77
- sign, entry_id, definition = ko.split(/\s+/, 3)
139
+ orthologs_as_strings.each do |line|
140
+ ko = line.sub(/\AKO\:\s+/, '')
141
+ entry_id, definition = ko.split(/\s+/, 2)
78
142
  kos[entry_id] = definition
79
143
  end
80
144
  @orthologs_as_hash = kos
@@ -106,6 +170,46 @@ class KEGG
106
170
  end
107
171
  end #module GenesAsHash
108
172
 
173
+ # This module provides modules_as_hash method.
174
+ #
175
+ # Bio::KEGG::* internal use only.
176
+ module ModulesAsHash
177
+ # Returns MODULE field as a Hash.
178
+ # Each key of the hash is KEGG MODULE ID,
179
+ # and each value is the name of the Pathway Module.
180
+ # ---
181
+ # *Returns*:: Hash
182
+ def modules_as_hash
183
+ unless defined? @modules_s_as_hash then
184
+ hash = {}
185
+ modules_as_strings.each do |line|
186
+ entry_id, name = line.split(/\s+/, 2)
187
+ hash[entry_id] = name
188
+ end
189
+ @modules_as_hash = hash
190
+ end
191
+ @modules_as_hash
192
+ end
193
+ end #module ModulesAsHash
194
+
195
+ # This module provides strings_as_hash private method.
196
+ #
197
+ # Bio::KEGG::* internal use only.
198
+ module StringsAsHash
199
+ # (Private) Creates a hash from lines.
200
+ # Each line is consisted of two components, ID and description,
201
+ # separated with spaces. IDs must be unique with each other.
202
+ def strings_as_hash(lines)
203
+ hash = {}
204
+ lines.each do |line|
205
+ entry_id, definition = line.split(/\s+/, 2)
206
+ hash[entry_id] = definition
207
+ end
208
+ return hash
209
+ end
210
+ private :strings_as_hash
211
+ end #module StringsAsHash
212
+
109
213
  end #module Common
110
214
  end #class KEGG
111
215
  end #module Bio