bio 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -5,7 +5,7 @@
5
5
  # Mitsuteru C. Nakao <n@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: common.rb,v 1.12 2007/04/05 23:35:40 trevor Exp $
8
+ # $Id: common.rb,v 1.12.2.5 2008/05/07 12:22:10 ngoto Exp $
9
9
  #
10
10
  # == Description
11
11
  #
@@ -73,6 +73,7 @@
73
73
 
74
74
  require 'bio/db'
75
75
  require 'bio/reference'
76
+ require 'bio/compat/references'
76
77
 
77
78
  module Bio
78
79
  class EMBLDB
@@ -270,33 +271,48 @@ module Common
270
271
  def references
271
272
  unless @data['references']
272
273
  ary = self.ref.map {|ent|
273
- hash = Hash.new('')
274
+ hash = Hash.new
274
275
  ent.each {|key, value|
275
276
  case key
277
+ when 'RN'
278
+ if /\[(\d+)\]/ =~ value.to_s
279
+ hash['embl_gb_record_number'] = $1.to_i
280
+ end
281
+ when 'RC'
282
+ unless value.to_s.strip.empty?
283
+ hash['comments'] ||= []
284
+ hash['comments'].push value
285
+ end
286
+ when 'RP'
287
+ hash['sequence_position'] = value
276
288
  when 'RA'
277
- hash['authors'] = value.split(/, /)
289
+ a = value.split(/\, /)
290
+ a.each do |x|
291
+ x.sub!(/( [^ ]+)\z/, ",\\1")
292
+ end
293
+ hash['authors'] = a
278
294
  when 'RT'
279
295
  hash['title'] = value
280
296
  when 'RL'
281
- if value =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
282
- hash['journal'] = $1
297
+ if /(.*) (\d+) *(\(([^\)]+)\))?(\, |\:)([a-zA-Z\d]+\-[a-zA-Z\d]+) *\((\d+)\)\.?\z/ =~ value.to_s
298
+ hash['journal'] = $1.rstrip
283
299
  hash['volume'] = $2
284
- hash['issue'] = $3
285
- hash['pages'] = $4
286
- hash['year'] = $5
300
+ hash['issue'] = $4
301
+ hash['pages'] = $6
302
+ hash['year'] = $7
287
303
  else
288
304
  hash['journal'] = value
289
305
  end
290
- when 'RX' # PUBMED, MEDLINE
291
- value.split('.').each {|item|
292
- tag, xref = item.split(/; /).map {|i| i.strip }
306
+ when 'RX' # PUBMED, DOI, (AGRICOLA)
307
+ value.split(/\. /).each {|item|
308
+ tag, xref = item.split(/\; /).map {|i| i.strip.sub(/\.\z/, '') }
293
309
  hash[ tag.downcase ] = xref
294
310
  }
295
311
  end
296
312
  }
297
313
  Reference.new(hash)
298
314
  }
299
- @data['references'] = References.new(ary)
315
+ @data['references'] = ary.extend(Bio::References::BackwardCompatibility)
300
316
  end
301
317
  @data['references']
302
318
  end
@@ -2,10 +2,12 @@
2
2
  # = bio/db/embl/embl.rb - EMBL database class
3
3
  #
4
4
  #
5
- # Copyright:: Copyright (C) 2001-2007 Mitsuteru C. Nakao <n@bioruby.org>
5
+ # Copyright:: Copyright (C) 2001-2007
6
+ # Mitsuteru C. Nakao <n@bioruby.org>
7
+ # Jan Aerts <jan.aerts@bbsrc.ac.uk>
6
8
  # License:: The Ruby License
7
9
  #
8
- # $Id: embl.rb,v 1.29 2007/04/05 23:35:40 trevor Exp $
10
+ # $Id: embl.rb,v 1.29.2.7 2008/06/17 16:04:36 ngoto Exp $
9
11
  #
10
12
  # == Description
11
13
  #
@@ -29,8 +31,13 @@
29
31
  # http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html
30
32
  #
31
33
 
34
+ require 'date'
32
35
  require 'bio/db'
33
36
  require 'bio/db/embl/common'
37
+ require 'bio/compat/features'
38
+ require 'bio/compat/references'
39
+ require 'bio/sequence'
40
+ require 'bio/sequence/dblink'
34
41
 
35
42
  module Bio
36
43
  class EMBL < EMBLDB
@@ -120,6 +127,14 @@ class EMBL < EMBLDB
120
127
  end
121
128
  alias molecule_type molecule
122
129
 
130
+ def data_class
131
+ id_line('DATA_CLASS')
132
+ end
133
+
134
+ def topology
135
+ id_line('TOPOLOGY')
136
+ end
137
+
123
138
  # returns DIVISION in the ID line.
124
139
  # * Bio::EMBL#division -> String
125
140
  def division
@@ -221,8 +236,8 @@ class EMBL < EMBLDB
221
236
  # RN RC RP RX RA RT RL
222
237
  #
223
238
  # Bio::EMBLDB#ref
224
-
225
-
239
+
240
+
226
241
  ##
227
242
  # DR Line; defabases cross-regerence (>=0)
228
243
  # "DR database_identifier; primary_identifier; secondary_identifier."
@@ -246,7 +261,6 @@ class EMBL < EMBLDB
246
261
  # FT Line; feature table data (>=0)
247
262
  def ft
248
263
  unless @data['FT']
249
- @data['FT'] = Array.new
250
264
  ary = Array.new
251
265
  in_quote = false
252
266
  @orig['FT'].each_line do |line|
@@ -276,7 +290,7 @@ class EMBL < EMBLDB
276
290
  parse_qualifiers(subary)
277
291
  end
278
292
 
279
- @data['FT'] = Features.new(ary)
293
+ @data['FT'] = ary.extend(Bio::Features::BackwardCompatibility)
280
294
  end
281
295
  if block_given?
282
296
  @data['FT'].each do |feature|
@@ -311,9 +325,9 @@ class EMBL < EMBLDB
311
325
  #
312
326
  # CC Line; comments of notes (>=0)
313
327
  def cc
314
- get('CC')
328
+ get('CC').to_s.gsub(/^CC /, '')
315
329
  end
316
-
330
+ alias comment cc
317
331
 
318
332
  ##
319
333
  # XX Line; spacer line (many)
@@ -355,13 +369,96 @@ class EMBL < EMBLDB
355
369
  # @orig[''] as sequence
356
370
  # bb Line; (blanks) sequence data (>=1)
357
371
  def seq
358
- Sequence::NA.new( fetch('').gsub(/ /,'').gsub(/\d+/,'') )
372
+ Bio::Sequence::NA.new( fetch('').gsub(/ /,'').gsub(/\d+/,'') )
359
373
  end
360
374
  alias naseq seq
361
375
  alias ntseq seq
362
376
 
377
+ #--
363
378
  # // Line; termination line (end; 1/entry)
379
+ #++
380
+
381
+ # modified date. Returns Date object, String or nil.
382
+ def date_modified
383
+ parse_date(self.dt['updated'])
384
+ end
385
+
386
+ # created date. Returns Date object, String or nil.
387
+ def date_created
388
+ parse_date(self.dt['created'])
389
+ end
390
+
391
+ # release number when last updated
392
+ def release_modified
393
+ parse_release_version(self.dt['updated'])[0]
394
+ end
395
+
396
+ # release number when created
397
+ def release_created
398
+ parse_release_version(self.dt['created'])[0]
399
+ end
364
400
 
401
+ # entry version number numbered by EMBL
402
+ def entry_version
403
+ parse_release_version(self.dt['updated'])[1]
404
+ end
405
+
406
+ # parse date string. Returns Date object.
407
+ def parse_date(str)
408
+ begin
409
+ Date.parse(str)
410
+ rescue ArgumentError, TypeError, NoMethodError, NameError
411
+ str
412
+ end
413
+ end
414
+ private :parse_date
415
+
416
+ # extracts release and version numbers from DT line
417
+ def parse_release_version(str)
418
+ return [ nil, nil ] unless str
419
+ a = str.split(/[\(\,\)]/)
420
+ dstr = a.shift
421
+ rel = nil
422
+ ver = nil
423
+ a.each do |x|
424
+ case x
425
+ when /Rel\.\s*(.+)/
426
+ rel = $1.strip
427
+ when /Version\s*(.+)/
428
+ ver = $1.strip
429
+ end
430
+ end
431
+ [ rel, ver ]
432
+ end
433
+ private :parse_release_version
434
+
435
+ # database references (DR).
436
+ # Returns an array of Bio::Sequence::DBLink objects.
437
+ def dblinks
438
+ get('DR').split(/\n/).collect { |x|
439
+ Bio::Sequence::DBLink.parse_embl_DR_line(x)
440
+ }
441
+ end
442
+
443
+ # species
444
+ def species
445
+ self.fetch('OS')
446
+ end
447
+
448
+ # taxonomy classfication
449
+ alias classification oc
450
+
451
+ # features
452
+ alias features ft
453
+
454
+
455
+ # converts the entry to Bio::Sequence object
456
+ # ---
457
+ # *Arguments*::
458
+ # *Returns*:: Bio::Sequence object
459
+ def to_biosequence
460
+ Bio::Sequence.adapter(self, Bio::Sequence::Adapter::EMBL)
461
+ end
365
462
 
366
463
  ### private methods
367
464
 
@@ -400,3 +497,4 @@ class EMBL < EMBLDB
400
497
  end # class EMBL
401
498
 
402
499
  end # module Bio
500
+
@@ -0,0 +1,85 @@
1
+ #
2
+ # = bio/db/embl/embl_to_biosequence.rb - Bio::EMBL to Bio::Sequence adapter module
3
+ #
4
+ # Copyright:: Copyright (C) 2008
5
+ # Naohisa Goto <ng@bioruby.org>,
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id:$
9
+ #
10
+
11
+ require 'bio/sequence'
12
+ require 'bio/sequence/adapter'
13
+
14
+ # Internal use only. Normal users should not use this module.
15
+ #
16
+ # Bio::EMBL to Bio::Sequence adapter module.
17
+ # It is internally used in Bio::EMBL#to_biosequence.
18
+ #
19
+ module Bio::Sequence::Adapter::EMBL
20
+
21
+ extend Bio::Sequence::Adapter
22
+
23
+ private
24
+
25
+ def_biosequence_adapter :seq
26
+
27
+ def_biosequence_adapter :id_namespace do |orig|
28
+ 'EMBL'
29
+ end
30
+
31
+ def_biosequence_adapter :entry_id
32
+
33
+ def_biosequence_adapter :primary_accession do |orig|
34
+ orig.accessions[0]
35
+ end
36
+
37
+ def_biosequence_adapter :secondary_accessions do |orig|
38
+ orig.accessions[1..-1] || []
39
+ end
40
+
41
+ def_biosequence_adapter :molecule_type
42
+
43
+ def_biosequence_adapter :data_class
44
+
45
+ def_biosequence_adapter :definition, :description
46
+
47
+ def_biosequence_adapter :topology
48
+
49
+ def_biosequence_adapter :date_created
50
+
51
+ def_biosequence_adapter :date_modified
52
+
53
+ def_biosequence_adapter :release_created
54
+
55
+ def_biosequence_adapter :release_modified
56
+
57
+ def_biosequence_adapter :entry_version
58
+
59
+ def_biosequence_adapter :division
60
+
61
+ def_biosequence_adapter :sequence_version, :version
62
+
63
+ def_biosequence_adapter :keywords
64
+
65
+ def_biosequence_adapter :species
66
+
67
+ def_biosequence_adapter :classification
68
+
69
+ #--
70
+ # unsupported yet
71
+ # def_biosequence_adapter :organelle do |orig|
72
+ # orig.fetch('OG')
73
+ # end
74
+ #++
75
+
76
+ def_biosequence_adapter :references
77
+
78
+ def_biosequence_adapter :features
79
+
80
+ def_biosequence_adapter :comments, :cc
81
+
82
+ def_biosequence_adapter :dblinks
83
+
84
+ end #module Bio::Sequence::Adapter::EMBL
85
+
@@ -0,0 +1,190 @@
1
+ #
2
+ # = bio/db/embl/format_embl.rb - EMBL format generater
3
+ #
4
+ # Copyright:: Copyright (C) 2008
5
+ # Jan Aerts <jandot@bioruby.org>,
6
+ # Naohisa Goto <ng@bioruby.org>
7
+ # License:: The Ruby License
8
+ #
9
+ # $Id: format_embl.rb,v 1.1.2.7 2008/06/19 12:45:15 ngoto Exp $
10
+ #
11
+
12
+ require 'bio/sequence/format'
13
+
14
+ module Bio::Sequence::Format::NucFormatter
15
+
16
+ # INTERNAL USE ONLY, YOU SHOULD NOT USE THIS CLASS.
17
+ # Embl format output class for Bio::Sequence.
18
+ class Embl < Bio::Sequence::Format::FormatterBase
19
+
20
+ # helper methods
21
+ include Bio::Sequence::Format::INSDFeatureHelper
22
+
23
+ private
24
+
25
+ # wrapping with EMBL style
26
+ def embl_wrap(prefix, str)
27
+ wrap(str.to_s, 80, prefix)
28
+ end
29
+
30
+ # Given words (an Array of String) are wrapping with EMBL style.
31
+ # Each word is never splitted inside the word.
32
+ def embl_wrap_words(prefix, array)
33
+ width = 80
34
+ result = []
35
+ str = nil
36
+ array.each do |x|
37
+ if str then
38
+ if str.length + 1 + x.length > width then
39
+ str = nil
40
+ else
41
+ str.concat ' '
42
+ str.concat x
43
+ end
44
+ end
45
+ unless str then
46
+ str = prefix + x
47
+ result.push str
48
+ end
49
+ end
50
+ result.join("\n")
51
+ end
52
+
53
+ # format reference
54
+ # ref:: Bio::Reference object
55
+ # hash:: (optional) a hash for RN (reference number) administration
56
+ def reference_format_embl(ref, hash = nil)
57
+ lines = Array.new
58
+ if ref.embl_gb_record_number or hash then
59
+ refno = ref.embl_gb_record_number.to_i
60
+ hash ||= {}
61
+ if refno <= 0 or hash[refno] then
62
+ refno = hash.keys.sort[-1].to_i + 1
63
+ hash[refno] = true
64
+ end
65
+ lines << embl_wrap("RN ", "[#{refno}]")
66
+ end
67
+ if ref.comments then
68
+ ref.comments.each do |cmnt|
69
+ lines << embl_wrap("RC ", cmnt)
70
+ end
71
+ end
72
+ unless ref.sequence_position.to_s.empty? then
73
+ lines << embl_wrap("RP ", "#{ref.sequence_position}")
74
+ end
75
+ unless ref.doi.to_s.empty? then
76
+ lines << embl_wrap("RX ", "DOI; #{ref.doi}.")
77
+ end
78
+ unless ref.pubmed.to_s.empty? then
79
+ lines << embl_wrap("RX ", "PUBMED; #{ref.pubmed}.")
80
+ end
81
+ unless ref.authors.empty? then
82
+ auth = ref.authors.collect do |x|
83
+ y = x.to_s.strip.split(/\, *([^\,]+)\z/)
84
+ y[1].gsub!(/\. +/, '.') if y[1]
85
+ y.join(' ')
86
+ end
87
+ lastauth = auth.pop
88
+ auth.each { |x| x.concat ',' }
89
+ auth.push(lastauth.to_s + ';')
90
+ lines << embl_wrap_words('RA ', auth)
91
+ end
92
+ lines << embl_wrap('RT ',
93
+ (ref.title.to_s.empty? ? '' :
94
+ "\"#{ref.title}\"") + ';')
95
+ unless ref.journal.to_s.empty? then
96
+ volissue = "#{ref.volume.to_s}"
97
+ volissue = "#{volissue}(#{ref.issue})" unless ref.issue.to_s.empty?
98
+ rl = "#{ref.journal}"
99
+ rl += " #{volissue}" unless volissue.empty?
100
+ rl += ":#{ref.pages}" unless ref.pages.to_s.empty?
101
+ rl += "(#{ref.year})" unless ref.year.to_s.empty?
102
+ rl += '.'
103
+ lines << embl_wrap('RL ', rl)
104
+ end
105
+ lines << "XX"
106
+ return lines.join("\n")
107
+ end
108
+
109
+ def seq_format_embl(seq)
110
+ counter = 0
111
+ result = seq.gsub(/.{1,60}/) do |x|
112
+ counter += x.length
113
+ x = x.gsub(/.{10}/, '\0 ')
114
+ sprintf(" %-66s%9d\n", x, counter)
115
+ end
116
+ result.chomp!
117
+ result
118
+ end
119
+
120
+ def seq_composition(seq)
121
+ { :a => seq.count('aA'),
122
+ :c => seq.count('cC'),
123
+ :g => seq.count('gG'),
124
+ :t => seq.count('tTuU'),
125
+ :other => seq.count('^aAcCgGtTuU')
126
+ }
127
+ end
128
+
129
+ # moleculue type
130
+ def mol_type_embl
131
+ if mt = molecule_type then
132
+ mt
133
+ elsif f = (features or []).find { |f| f.feature == 'source' } and
134
+ q = f.qualifiers.find { |q| q.qualifier == 'mol_type' } then
135
+ q.value
136
+ else
137
+ 'NA'
138
+ end
139
+ end
140
+
141
+ # CC line. Comments.
142
+ def comments_format_embl(cmnts)
143
+ return '' if !cmnts or cmnts.empty?
144
+ cmnts = [ cmnts ] unless cmnts.kind_of?(Array)
145
+ a = []
146
+ cmnts.each do |str|
147
+ a.push embl_wrap('CC ', str)
148
+ end
149
+ unless a.empty? then
150
+ a.push "XX "
151
+ a.push '' # dummy to put "\n" at the end of the string
152
+ end
153
+ a.join("\n")
154
+ end
155
+
156
+
157
+ # Erb template of EMBL format for Bio::Sequence
158
+ erb_template <<'__END_OF_TEMPLATE__'
159
+ ID <%= primary_accession || entry_id %>; SV <%= sequence_version %>; <%= topology %>; <%= mol_type_embl %>; <%= data_class %>; <%= division %>; <%= seq.length %> BP.
160
+ XX
161
+ <%= embl_wrap('AC ', accessions.reject{|a| a.nil?}.join('; ') + ';') %>
162
+ XX
163
+ DT <%= format_date(date_created || null_date) %> (Rel. <%= release_created || 0 %>, Created)
164
+ DT <%= format_date(date_modified || null_date) %> (Rel. <%= release_modified || 0 %>, Last updated, Version <%= entry_version || 0 %>)
165
+ XX
166
+ <%= embl_wrap('DE ', definition) %>
167
+ XX
168
+ <%= embl_wrap('KW ', (keywords || []).join('; ') + '.') %>
169
+ XX
170
+ OS <%= species %>
171
+ <%= embl_wrap('OC ', (classification || []).join('; ') + '.') %>
172
+ XX
173
+ <% hash = {}; (references || []).each do |ref| %><%= reference_format_embl(ref, hash) %>
174
+ <% end %><% (dblinks || []).each do |r|
175
+ %>DR <%= r.database %>; <%= r.id %><% unless r.secondary_ids.empty? %>; <%= r.secondary_ids[0] %><% end %>.
176
+ <% end %><% if dblinks and !dblinks.empty? then
177
+ %>XX
178
+ <% end %><%= comments_format_embl(comments)
179
+ %>FH Key Location/Qualifiers
180
+ FH
181
+ <%= format_features_embl(features || []) %>XX
182
+ SQ Sequence <%= seq.length %> BP; <% c = seq_composition(seq) %><%= c[:a] %> A; <%= c[:c] %> C; <%= c[:g] %> G; <%= c[:t] %> T; <%= c[:other] %> other;
183
+ <%= seq_format_embl(seq) %>
184
+ //
185
+ __END_OF_TEMPLATE__
186
+
187
+ end #class Embl
188
+
189
+ end #module Bio::Sequence::Format::NucFormatter
190
+