bio 1.2.1 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -5,7 +5,7 @@
5
5
  # Mitsuteru C. Nakao <n@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: common.rb,v 1.12 2007/04/05 23:35:40 trevor Exp $
8
+ # $Id: common.rb,v 1.12.2.5 2008/05/07 12:22:10 ngoto Exp $
9
9
  #
10
10
  # == Description
11
11
  #
@@ -73,6 +73,7 @@
73
73
 
74
74
  require 'bio/db'
75
75
  require 'bio/reference'
76
+ require 'bio/compat/references'
76
77
 
77
78
  module Bio
78
79
  class EMBLDB
@@ -270,33 +271,48 @@ module Common
270
271
  def references
271
272
  unless @data['references']
272
273
  ary = self.ref.map {|ent|
273
- hash = Hash.new('')
274
+ hash = Hash.new
274
275
  ent.each {|key, value|
275
276
  case key
277
+ when 'RN'
278
+ if /\[(\d+)\]/ =~ value.to_s
279
+ hash['embl_gb_record_number'] = $1.to_i
280
+ end
281
+ when 'RC'
282
+ unless value.to_s.strip.empty?
283
+ hash['comments'] ||= []
284
+ hash['comments'].push value
285
+ end
286
+ when 'RP'
287
+ hash['sequence_position'] = value
276
288
  when 'RA'
277
- hash['authors'] = value.split(/, /)
289
+ a = value.split(/\, /)
290
+ a.each do |x|
291
+ x.sub!(/( [^ ]+)\z/, ",\\1")
292
+ end
293
+ hash['authors'] = a
278
294
  when 'RT'
279
295
  hash['title'] = value
280
296
  when 'RL'
281
- if value =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
282
- hash['journal'] = $1
297
+ if /(.*) (\d+) *(\(([^\)]+)\))?(\, |\:)([a-zA-Z\d]+\-[a-zA-Z\d]+) *\((\d+)\)\.?\z/ =~ value.to_s
298
+ hash['journal'] = $1.rstrip
283
299
  hash['volume'] = $2
284
- hash['issue'] = $3
285
- hash['pages'] = $4
286
- hash['year'] = $5
300
+ hash['issue'] = $4
301
+ hash['pages'] = $6
302
+ hash['year'] = $7
287
303
  else
288
304
  hash['journal'] = value
289
305
  end
290
- when 'RX' # PUBMED, MEDLINE
291
- value.split('.').each {|item|
292
- tag, xref = item.split(/; /).map {|i| i.strip }
306
+ when 'RX' # PUBMED, DOI, (AGRICOLA)
307
+ value.split(/\. /).each {|item|
308
+ tag, xref = item.split(/\; /).map {|i| i.strip.sub(/\.\z/, '') }
293
309
  hash[ tag.downcase ] = xref
294
310
  }
295
311
  end
296
312
  }
297
313
  Reference.new(hash)
298
314
  }
299
- @data['references'] = References.new(ary)
315
+ @data['references'] = ary.extend(Bio::References::BackwardCompatibility)
300
316
  end
301
317
  @data['references']
302
318
  end
@@ -2,10 +2,12 @@
2
2
  # = bio/db/embl/embl.rb - EMBL database class
3
3
  #
4
4
  #
5
- # Copyright:: Copyright (C) 2001-2007 Mitsuteru C. Nakao <n@bioruby.org>
5
+ # Copyright:: Copyright (C) 2001-2007
6
+ # Mitsuteru C. Nakao <n@bioruby.org>
7
+ # Jan Aerts <jan.aerts@bbsrc.ac.uk>
6
8
  # License:: The Ruby License
7
9
  #
8
- # $Id: embl.rb,v 1.29 2007/04/05 23:35:40 trevor Exp $
10
+ # $Id: embl.rb,v 1.29.2.7 2008/06/17 16:04:36 ngoto Exp $
9
11
  #
10
12
  # == Description
11
13
  #
@@ -29,8 +31,13 @@
29
31
  # http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html
30
32
  #
31
33
 
34
+ require 'date'
32
35
  require 'bio/db'
33
36
  require 'bio/db/embl/common'
37
+ require 'bio/compat/features'
38
+ require 'bio/compat/references'
39
+ require 'bio/sequence'
40
+ require 'bio/sequence/dblink'
34
41
 
35
42
  module Bio
36
43
  class EMBL < EMBLDB
@@ -120,6 +127,14 @@ class EMBL < EMBLDB
120
127
  end
121
128
  alias molecule_type molecule
122
129
 
130
+ def data_class
131
+ id_line('DATA_CLASS')
132
+ end
133
+
134
+ def topology
135
+ id_line('TOPOLOGY')
136
+ end
137
+
123
138
  # returns DIVISION in the ID line.
124
139
  # * Bio::EMBL#division -> String
125
140
  def division
@@ -221,8 +236,8 @@ class EMBL < EMBLDB
221
236
  # RN RC RP RX RA RT RL
222
237
  #
223
238
  # Bio::EMBLDB#ref
224
-
225
-
239
+
240
+
226
241
  ##
227
242
  # DR Line; defabases cross-regerence (>=0)
228
243
  # "DR database_identifier; primary_identifier; secondary_identifier."
@@ -246,7 +261,6 @@ class EMBL < EMBLDB
246
261
  # FT Line; feature table data (>=0)
247
262
  def ft
248
263
  unless @data['FT']
249
- @data['FT'] = Array.new
250
264
  ary = Array.new
251
265
  in_quote = false
252
266
  @orig['FT'].each_line do |line|
@@ -276,7 +290,7 @@ class EMBL < EMBLDB
276
290
  parse_qualifiers(subary)
277
291
  end
278
292
 
279
- @data['FT'] = Features.new(ary)
293
+ @data['FT'] = ary.extend(Bio::Features::BackwardCompatibility)
280
294
  end
281
295
  if block_given?
282
296
  @data['FT'].each do |feature|
@@ -311,9 +325,9 @@ class EMBL < EMBLDB
311
325
  #
312
326
  # CC Line; comments of notes (>=0)
313
327
  def cc
314
- get('CC')
328
+ get('CC').to_s.gsub(/^CC /, '')
315
329
  end
316
-
330
+ alias comment cc
317
331
 
318
332
  ##
319
333
  # XX Line; spacer line (many)
@@ -355,13 +369,96 @@ class EMBL < EMBLDB
355
369
  # @orig[''] as sequence
356
370
  # bb Line; (blanks) sequence data (>=1)
357
371
  def seq
358
- Sequence::NA.new( fetch('').gsub(/ /,'').gsub(/\d+/,'') )
372
+ Bio::Sequence::NA.new( fetch('').gsub(/ /,'').gsub(/\d+/,'') )
359
373
  end
360
374
  alias naseq seq
361
375
  alias ntseq seq
362
376
 
377
+ #--
363
378
  # // Line; termination line (end; 1/entry)
379
+ #++
380
+
381
+ # modified date. Returns Date object, String or nil.
382
+ def date_modified
383
+ parse_date(self.dt['updated'])
384
+ end
385
+
386
+ # created date. Returns Date object, String or nil.
387
+ def date_created
388
+ parse_date(self.dt['created'])
389
+ end
390
+
391
+ # release number when last updated
392
+ def release_modified
393
+ parse_release_version(self.dt['updated'])[0]
394
+ end
395
+
396
+ # release number when created
397
+ def release_created
398
+ parse_release_version(self.dt['created'])[0]
399
+ end
364
400
 
401
+ # entry version number numbered by EMBL
402
+ def entry_version
403
+ parse_release_version(self.dt['updated'])[1]
404
+ end
405
+
406
+ # parse date string. Returns Date object.
407
+ def parse_date(str)
408
+ begin
409
+ Date.parse(str)
410
+ rescue ArgumentError, TypeError, NoMethodError, NameError
411
+ str
412
+ end
413
+ end
414
+ private :parse_date
415
+
416
+ # extracts release and version numbers from DT line
417
+ def parse_release_version(str)
418
+ return [ nil, nil ] unless str
419
+ a = str.split(/[\(\,\)]/)
420
+ dstr = a.shift
421
+ rel = nil
422
+ ver = nil
423
+ a.each do |x|
424
+ case x
425
+ when /Rel\.\s*(.+)/
426
+ rel = $1.strip
427
+ when /Version\s*(.+)/
428
+ ver = $1.strip
429
+ end
430
+ end
431
+ [ rel, ver ]
432
+ end
433
+ private :parse_release_version
434
+
435
+ # database references (DR).
436
+ # Returns an array of Bio::Sequence::DBLink objects.
437
+ def dblinks
438
+ get('DR').split(/\n/).collect { |x|
439
+ Bio::Sequence::DBLink.parse_embl_DR_line(x)
440
+ }
441
+ end
442
+
443
+ # species
444
+ def species
445
+ self.fetch('OS')
446
+ end
447
+
448
+ # taxonomy classfication
449
+ alias classification oc
450
+
451
+ # features
452
+ alias features ft
453
+
454
+
455
+ # converts the entry to Bio::Sequence object
456
+ # ---
457
+ # *Arguments*::
458
+ # *Returns*:: Bio::Sequence object
459
+ def to_biosequence
460
+ Bio::Sequence.adapter(self, Bio::Sequence::Adapter::EMBL)
461
+ end
365
462
 
366
463
  ### private methods
367
464
 
@@ -400,3 +497,4 @@ class EMBL < EMBLDB
400
497
  end # class EMBL
401
498
 
402
499
  end # module Bio
500
+
@@ -0,0 +1,85 @@
1
+ #
2
+ # = bio/db/embl/embl_to_biosequence.rb - Bio::EMBL to Bio::Sequence adapter module
3
+ #
4
+ # Copyright:: Copyright (C) 2008
5
+ # Naohisa Goto <ng@bioruby.org>,
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id:$
9
+ #
10
+
11
+ require 'bio/sequence'
12
+ require 'bio/sequence/adapter'
13
+
14
+ # Internal use only. Normal users should not use this module.
15
+ #
16
+ # Bio::EMBL to Bio::Sequence adapter module.
17
+ # It is internally used in Bio::EMBL#to_biosequence.
18
+ #
19
+ module Bio::Sequence::Adapter::EMBL
20
+
21
+ extend Bio::Sequence::Adapter
22
+
23
+ private
24
+
25
+ def_biosequence_adapter :seq
26
+
27
+ def_biosequence_adapter :id_namespace do |orig|
28
+ 'EMBL'
29
+ end
30
+
31
+ def_biosequence_adapter :entry_id
32
+
33
+ def_biosequence_adapter :primary_accession do |orig|
34
+ orig.accessions[0]
35
+ end
36
+
37
+ def_biosequence_adapter :secondary_accessions do |orig|
38
+ orig.accessions[1..-1] || []
39
+ end
40
+
41
+ def_biosequence_adapter :molecule_type
42
+
43
+ def_biosequence_adapter :data_class
44
+
45
+ def_biosequence_adapter :definition, :description
46
+
47
+ def_biosequence_adapter :topology
48
+
49
+ def_biosequence_adapter :date_created
50
+
51
+ def_biosequence_adapter :date_modified
52
+
53
+ def_biosequence_adapter :release_created
54
+
55
+ def_biosequence_adapter :release_modified
56
+
57
+ def_biosequence_adapter :entry_version
58
+
59
+ def_biosequence_adapter :division
60
+
61
+ def_biosequence_adapter :sequence_version, :version
62
+
63
+ def_biosequence_adapter :keywords
64
+
65
+ def_biosequence_adapter :species
66
+
67
+ def_biosequence_adapter :classification
68
+
69
+ #--
70
+ # unsupported yet
71
+ # def_biosequence_adapter :organelle do |orig|
72
+ # orig.fetch('OG')
73
+ # end
74
+ #++
75
+
76
+ def_biosequence_adapter :references
77
+
78
+ def_biosequence_adapter :features
79
+
80
+ def_biosequence_adapter :comments, :cc
81
+
82
+ def_biosequence_adapter :dblinks
83
+
84
+ end #module Bio::Sequence::Adapter::EMBL
85
+
@@ -0,0 +1,190 @@
1
+ #
2
+ # = bio/db/embl/format_embl.rb - EMBL format generater
3
+ #
4
+ # Copyright:: Copyright (C) 2008
5
+ # Jan Aerts <jandot@bioruby.org>,
6
+ # Naohisa Goto <ng@bioruby.org>
7
+ # License:: The Ruby License
8
+ #
9
+ # $Id: format_embl.rb,v 1.1.2.7 2008/06/19 12:45:15 ngoto Exp $
10
+ #
11
+
12
+ require 'bio/sequence/format'
13
+
14
+ module Bio::Sequence::Format::NucFormatter
15
+
16
+ # INTERNAL USE ONLY, YOU SHOULD NOT USE THIS CLASS.
17
+ # Embl format output class for Bio::Sequence.
18
+ class Embl < Bio::Sequence::Format::FormatterBase
19
+
20
+ # helper methods
21
+ include Bio::Sequence::Format::INSDFeatureHelper
22
+
23
+ private
24
+
25
+ # wrapping with EMBL style
26
+ def embl_wrap(prefix, str)
27
+ wrap(str.to_s, 80, prefix)
28
+ end
29
+
30
+ # Given words (an Array of String) are wrapping with EMBL style.
31
+ # Each word is never splitted inside the word.
32
+ def embl_wrap_words(prefix, array)
33
+ width = 80
34
+ result = []
35
+ str = nil
36
+ array.each do |x|
37
+ if str then
38
+ if str.length + 1 + x.length > width then
39
+ str = nil
40
+ else
41
+ str.concat ' '
42
+ str.concat x
43
+ end
44
+ end
45
+ unless str then
46
+ str = prefix + x
47
+ result.push str
48
+ end
49
+ end
50
+ result.join("\n")
51
+ end
52
+
53
+ # format reference
54
+ # ref:: Bio::Reference object
55
+ # hash:: (optional) a hash for RN (reference number) administration
56
+ def reference_format_embl(ref, hash = nil)
57
+ lines = Array.new
58
+ if ref.embl_gb_record_number or hash then
59
+ refno = ref.embl_gb_record_number.to_i
60
+ hash ||= {}
61
+ if refno <= 0 or hash[refno] then
62
+ refno = hash.keys.sort[-1].to_i + 1
63
+ hash[refno] = true
64
+ end
65
+ lines << embl_wrap("RN ", "[#{refno}]")
66
+ end
67
+ if ref.comments then
68
+ ref.comments.each do |cmnt|
69
+ lines << embl_wrap("RC ", cmnt)
70
+ end
71
+ end
72
+ unless ref.sequence_position.to_s.empty? then
73
+ lines << embl_wrap("RP ", "#{ref.sequence_position}")
74
+ end
75
+ unless ref.doi.to_s.empty? then
76
+ lines << embl_wrap("RX ", "DOI; #{ref.doi}.")
77
+ end
78
+ unless ref.pubmed.to_s.empty? then
79
+ lines << embl_wrap("RX ", "PUBMED; #{ref.pubmed}.")
80
+ end
81
+ unless ref.authors.empty? then
82
+ auth = ref.authors.collect do |x|
83
+ y = x.to_s.strip.split(/\, *([^\,]+)\z/)
84
+ y[1].gsub!(/\. +/, '.') if y[1]
85
+ y.join(' ')
86
+ end
87
+ lastauth = auth.pop
88
+ auth.each { |x| x.concat ',' }
89
+ auth.push(lastauth.to_s + ';')
90
+ lines << embl_wrap_words('RA ', auth)
91
+ end
92
+ lines << embl_wrap('RT ',
93
+ (ref.title.to_s.empty? ? '' :
94
+ "\"#{ref.title}\"") + ';')
95
+ unless ref.journal.to_s.empty? then
96
+ volissue = "#{ref.volume.to_s}"
97
+ volissue = "#{volissue}(#{ref.issue})" unless ref.issue.to_s.empty?
98
+ rl = "#{ref.journal}"
99
+ rl += " #{volissue}" unless volissue.empty?
100
+ rl += ":#{ref.pages}" unless ref.pages.to_s.empty?
101
+ rl += "(#{ref.year})" unless ref.year.to_s.empty?
102
+ rl += '.'
103
+ lines << embl_wrap('RL ', rl)
104
+ end
105
+ lines << "XX"
106
+ return lines.join("\n")
107
+ end
108
+
109
+ def seq_format_embl(seq)
110
+ counter = 0
111
+ result = seq.gsub(/.{1,60}/) do |x|
112
+ counter += x.length
113
+ x = x.gsub(/.{10}/, '\0 ')
114
+ sprintf(" %-66s%9d\n", x, counter)
115
+ end
116
+ result.chomp!
117
+ result
118
+ end
119
+
120
+ def seq_composition(seq)
121
+ { :a => seq.count('aA'),
122
+ :c => seq.count('cC'),
123
+ :g => seq.count('gG'),
124
+ :t => seq.count('tTuU'),
125
+ :other => seq.count('^aAcCgGtTuU')
126
+ }
127
+ end
128
+
129
+ # moleculue type
130
+ def mol_type_embl
131
+ if mt = molecule_type then
132
+ mt
133
+ elsif f = (features or []).find { |f| f.feature == 'source' } and
134
+ q = f.qualifiers.find { |q| q.qualifier == 'mol_type' } then
135
+ q.value
136
+ else
137
+ 'NA'
138
+ end
139
+ end
140
+
141
+ # CC line. Comments.
142
+ def comments_format_embl(cmnts)
143
+ return '' if !cmnts or cmnts.empty?
144
+ cmnts = [ cmnts ] unless cmnts.kind_of?(Array)
145
+ a = []
146
+ cmnts.each do |str|
147
+ a.push embl_wrap('CC ', str)
148
+ end
149
+ unless a.empty? then
150
+ a.push "XX "
151
+ a.push '' # dummy to put "\n" at the end of the string
152
+ end
153
+ a.join("\n")
154
+ end
155
+
156
+
157
+ # Erb template of EMBL format for Bio::Sequence
158
+ erb_template <<'__END_OF_TEMPLATE__'
159
+ ID <%= primary_accession || entry_id %>; SV <%= sequence_version %>; <%= topology %>; <%= mol_type_embl %>; <%= data_class %>; <%= division %>; <%= seq.length %> BP.
160
+ XX
161
+ <%= embl_wrap('AC ', accessions.reject{|a| a.nil?}.join('; ') + ';') %>
162
+ XX
163
+ DT <%= format_date(date_created || null_date) %> (Rel. <%= release_created || 0 %>, Created)
164
+ DT <%= format_date(date_modified || null_date) %> (Rel. <%= release_modified || 0 %>, Last updated, Version <%= entry_version || 0 %>)
165
+ XX
166
+ <%= embl_wrap('DE ', definition) %>
167
+ XX
168
+ <%= embl_wrap('KW ', (keywords || []).join('; ') + '.') %>
169
+ XX
170
+ OS <%= species %>
171
+ <%= embl_wrap('OC ', (classification || []).join('; ') + '.') %>
172
+ XX
173
+ <% hash = {}; (references || []).each do |ref| %><%= reference_format_embl(ref, hash) %>
174
+ <% end %><% (dblinks || []).each do |r|
175
+ %>DR <%= r.database %>; <%= r.id %><% unless r.secondary_ids.empty? %>; <%= r.secondary_ids[0] %><% end %>.
176
+ <% end %><% if dblinks and !dblinks.empty? then
177
+ %>XX
178
+ <% end %><%= comments_format_embl(comments)
179
+ %>FH Key Location/Qualifiers
180
+ FH
181
+ <%= format_features_embl(features || []) %>XX
182
+ SQ Sequence <%= seq.length %> BP; <% c = seq_composition(seq) %><%= c[:a] %> A; <%= c[:c] %> C; <%= c[:g] %> G; <%= c[:t] %> T; <%= c[:other] %> other;
183
+ <%= seq_format_embl(seq) %>
184
+ //
185
+ __END_OF_TEMPLATE__
186
+
187
+ end #class Embl
188
+
189
+ end #module Bio::Sequence::Format::NucFormatter
190
+