bio 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -0,0 +1,63 @@
1
+ #
2
+ # = bio/db/fasta/fasta_to_biosequence.rb - Bio::FastaFormat to Bio::Sequence adapter module
3
+ #
4
+ # Copyright:: Copyright (C) 2008
5
+ # Naohisa Goto <ng@bioruby.org>,
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id:$
9
+ #
10
+
11
+ require 'bio/sequence'
12
+ require 'bio/sequence/adapter'
13
+
14
+ # Internal use only. Normal users should not use this module.
15
+ #
16
+ # Bio::FastaFormat to Bio::Sequence adapter module.
17
+ # It is internally used in Bio::FastaFormat#to_biosequence.
18
+ #
19
+ module Bio::Sequence::Adapter::FastaFormat
20
+
21
+ extend Bio::Sequence::Adapter
22
+
23
+ private
24
+
25
+ def_biosequence_adapter :seq
26
+
27
+ # primary accession
28
+ def_biosequence_adapter :primary_accession do |orig|
29
+ orig.identifiers.accessions.first or orig.identifiers.entry_id
30
+ end
31
+
32
+ # secondary accessions
33
+ def_biosequence_adapter :secondary_accessions do |orig|
34
+ orig.identifiers.accessions[1..-1]
35
+ end
36
+
37
+ # entry_id
38
+ def_biosequence_adapter :entry_id do |orig|
39
+ orig.identifiers.locus or orig.identifiers.accessions.first or
40
+ orig.identifiers.entry_id
41
+ end
42
+
43
+ # NCBI GI is stored on other_seqids
44
+ def_biosequence_adapter :other_seqids do |orig|
45
+ other = []
46
+ if orig.identifiers.gi then
47
+ other.push Bio::Sequence::DBLink.new('GI', orig.identifiers.gi)
48
+ end
49
+ other.empty? ? nil : other
50
+ end
51
+
52
+ # definition
53
+ def_biosequence_adapter :definition do |orig|
54
+ if orig.identifiers.accessions.empty? and
55
+ !(orig.identifiers.gi) then
56
+ orig.definition
57
+ else
58
+ orig.identifiers.description
59
+ end
60
+ end
61
+
62
+ end #module Bio::Sequence::Adapter::FastaFormat
63
+
@@ -0,0 +1,97 @@
1
+ #
2
+ # = bio/db/fasta/format_fasta.rb - Fasta format generater
3
+ #
4
+ # Copyright:: Copyright (C) 2006-2008
5
+ # Toshiaki Katayama <k@bioruby.org>,
6
+ # Naohisa Goto <ng@bioruby.org>,
7
+ # Jan Aerts <jan.aerts@bbsrc.ac.uk>
8
+ # License:: The Ruby License
9
+ #
10
+ # $Id: format_fasta.rb,v 1.1.2.1 2008/03/04 11:26:59 ngoto Exp $
11
+ #
12
+
13
+ require 'bio/sequence/format'
14
+
15
+ module Bio::Sequence::Format::Formatter
16
+
17
+ # INTERNAL USE ONLY, YOU SHOULD NOT USE THIS CLASS.
18
+ # Simple Fasta format output class for Bio::Sequence.
19
+ class Fasta < Bio::Sequence::Format::FormatterBase
20
+
21
+ # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD.
22
+ #
23
+ # Creates a new Fasta format generater object from the sequence.
24
+ #
25
+ # ---
26
+ # *Arguments*:
27
+ # * _sequence_: Bio::Sequence object
28
+ # * (optional) :header => _header_: String (default nil)
29
+ # * (optional) :width => _width_: Fixnum (default 70)
30
+ def initialize; end if false # dummy for RDoc
31
+
32
+ # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD.
33
+ #
34
+ # Output the FASTA format string of the sequence.
35
+ #
36
+ # Currently, this method is used in Bio::Sequence#output like so,
37
+ #
38
+ # s = Bio::Sequence.new('atgc')
39
+ # puts s.output(:fasta) #=> "> \natgc\n"
40
+ # ---
41
+ # *Returns*:: String object
42
+ def output
43
+ header = @options[:header]
44
+ width = @options.has_key?(:width) ? @options[:width] : 70
45
+ seq = @sequence.seq
46
+ entry_id = @sequence.entry_id ||
47
+ "#{@sequence.primary_accession}.#{@sequence.sequence_version}"
48
+ definition = @sequence.definition
49
+ header ||= "#{entry_id} #{definition}"
50
+
51
+ ">#{header}\n" +
52
+ if width
53
+ seq.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
54
+ else
55
+ seq.to_s + "\n"
56
+ end
57
+ end
58
+ end #class Fasta
59
+
60
+ # INTERNAL USE ONLY, YOU SHOULD NOT USE THIS CLASS.
61
+ # NCBI-Style Fasta format output class for Bio::Sequence.
62
+ # (like "ncbi" format in EMBOSS)
63
+ #
64
+ # Note that this class is under construction.
65
+ class Fasta_ncbi < Bio::Sequence::Format::FormatterBase
66
+
67
+ # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD.
68
+ #
69
+ # Output the FASTA format string of the sequence.
70
+ #
71
+ # Currently, this method is used in Bio::Sequence#output like so,
72
+ #
73
+ # s = Bio::Sequence.new('atgc')
74
+ # puts s.output(:ncbi) #=> "> \natgc\n"
75
+ # ---
76
+ # *Returns*:: String object
77
+ def output
78
+ width = 70
79
+ seq = @sequence.seq
80
+ #gi = @sequence.gi_number
81
+ dbname = 'lcl'
82
+ if @sequence.primary_accession.to_s.empty? then
83
+ idstr = @sequence.entry_id
84
+ else
85
+ idstr = "#{@sequence.primary_accession}.#{@sequence.sequence_version}"
86
+ end
87
+
88
+ definition = @sequence.definition
89
+ header = "#{dbname}|#{idstr} #{definition}"
90
+
91
+ ">#{header}\n" + seq.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
92
+ end
93
+ end #class Ncbi
94
+
95
+ end #module Bio::Sequence::Format::Formatter
96
+
97
+
@@ -4,7 +4,7 @@
4
4
  # Copyright:: Copyright (C) 2004 Toshiaki Katayama <k@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: common.rb,v 1.11 2007/04/05 23:35:40 trevor Exp $
7
+ # $Id: common.rb,v 1.11.2.5 2008/06/17 15:53:21 ngoto Exp $
8
8
  #
9
9
 
10
10
  require 'bio/db'
@@ -44,7 +44,7 @@ module Common
44
44
 
45
45
  # ACCESSION -- Returns contents of the ACCESSION record as an Array.
46
46
  def accessions
47
- accession.split(/\s+/)
47
+ field_fetch('ACCESSION').strip.split(/\s+/)
48
48
  end
49
49
 
50
50
 
@@ -137,9 +137,20 @@ module Common
137
137
  unless @data['REFERENCE']
138
138
  ary = []
139
139
  toptag2array(get('REFERENCE')).each do |ref|
140
- hash = Hash.new('')
140
+ hash = Hash.new
141
141
  subtag2array(ref).each do |field|
142
142
  case tag_get(field)
143
+ when /REFERENCE/
144
+ if /(\d+)(\s*\((.+)\))?/m =~ tag_cut(field) then
145
+ hash['embl_gb_record_number'] = $1.to_i
146
+ if $3 and $3 != 'sites' then
147
+ seqpos = $3
148
+ seqpos.sub!(/\A\s*bases\s+/, '')
149
+ seqpos.gsub!(/(\d+)\s+to\s+(\d+)/, "\\1-\\2")
150
+ seqpos.gsub!(/\s*\;\s*/, ', ')
151
+ hash['sequence_position'] = seqpos
152
+ end
153
+ end
143
154
  when /AUTHORS/
144
155
  authors = truncate(tag_cut(field))
145
156
  authors = authors.split(/, /)
@@ -163,11 +174,14 @@ module Common
163
174
  hash['medline'] = truncate(tag_cut(field))
164
175
  when /PUBMED/
165
176
  hash['pubmed'] = truncate(tag_cut(field))
177
+ when /REMARK/
178
+ hash['comments'] ||= []
179
+ hash['comments'].push truncate(tag_cut(field))
166
180
  end
167
181
  end
168
182
  ary.push(Reference.new(hash))
169
183
  end
170
- @data['REFERENCE'] = References.new(ary)
184
+ @data['REFERENCE'] = ary.extend(Bio::References::BackwardCompatibility)
171
185
  end
172
186
  if block_given?
173
187
  @data['REFERENCE'].each do |r|
@@ -181,12 +195,15 @@ module Common
181
195
 
182
196
  # COMMENT -- Returns contents of the COMMENT record as a String.
183
197
  def comment
184
- field_fetch('COMMENT')
198
+ str = get('COMMENT').to_s.sub(/\ACOMMENT /, '')
199
+ str.gsub!(/^ {12}/, '')
200
+ str.chomp!
201
+ str
185
202
  end
186
203
 
187
204
 
188
- # FEATURES -- Returns contents of the FEATURES record as a Bio::Features
189
- # object.
205
+ # FEATURES -- Returns contents of the FEATURES record as an array of
206
+ # Bio::Feature objects.
190
207
  def features
191
208
  unless @data['FEATURES']
192
209
  ary = []
@@ -228,7 +245,7 @@ module Common
228
245
  parse_qualifiers(subary)
229
246
  end
230
247
 
231
- @data['FEATURES'] = Features.new(ary)
248
+ @data['FEATURES'] = ary.extend(Bio::Features::BackwardCompatibility)
232
249
  end
233
250
  if block_given?
234
251
  @data['FEATURES'].each do |f|
@@ -0,0 +1,187 @@
1
+ #
2
+ # = bio/db/genbank/format_genbank.rb - GenBank format generater
3
+ #
4
+ # Copyright:: Copyright (C) 2008 Naohisa Goto <ng@bioruby.org>
5
+ # License:: The Ruby License
6
+ #
7
+ # $Id: format_genbank.rb,v 1.1.2.5 2008/06/17 15:59:24 ngoto Exp $
8
+ #
9
+
10
+ require 'bio/sequence/format'
11
+
12
+ module Bio::Sequence::Format::NucFormatter
13
+
14
+ # INTERNAL USE ONLY, YOU SHOULD NOT USE THIS CLASS.
15
+ # GenBank format output class for Bio::Sequence.
16
+ class Genbank < Bio::Sequence::Format::FormatterBase
17
+
18
+ # helper methods
19
+ include Bio::Sequence::Format::INSDFeatureHelper
20
+
21
+ private
22
+
23
+ # string wrapper for GenBank format
24
+ def genbank_wrap(str)
25
+ wrap(str.to_s, 67).gsub(/\n/, "\n" + " " * 12)
26
+ end
27
+
28
+ # string wrap with adding a dot at the end of the string
29
+ def genbank_wrap_dot(str)
30
+ str = str.to_s
31
+ str = str + '.' unless /\.\z/ =~ str
32
+ genbank_wrap(str)
33
+ end
34
+
35
+ # Given words (an Array of String) are wrapping with EMBL style.
36
+ # Each word is never splitted inside the word.
37
+ def genbank_wrap_words(array)
38
+ width = 67
39
+ result = []
40
+ str = nil
41
+ array.each do |x|
42
+ if str then
43
+ if str.length + 1 + x.length > width then
44
+ str = nil
45
+ else
46
+ str.concat ' '
47
+ str.concat x
48
+ end
49
+ end
50
+ unless str then
51
+ str = "#{x}"
52
+ result.push str
53
+ end
54
+ end
55
+ result.join("\n" + " " * 12)
56
+ end
57
+
58
+ # formats references
59
+ def reference_format_genbank(ref, num)
60
+ pos = ref.sequence_position.to_s.gsub(/\s/, '')
61
+ pos.gsub!(/(\d+)\-(\d+)/, "\\1 to \\2")
62
+ pos.gsub!(/\s*\,\s*/, '; ')
63
+ if pos.empty?
64
+ pos = ''
65
+ else
66
+ pos = " (bases #{pos})"
67
+ end
68
+ volissue = "#{ref.volume.to_s}"
69
+ volissue += " (#{ref.issue})" unless ref.issue.to_s.empty?
70
+ journal = "#{ref.journal.to_s}"
71
+ journal += " #{volissue}" unless volissue.empty?
72
+ journal += ", #{ref.pages}" unless ref.pages.to_s.empty?
73
+ journal += " (#{ref.year})" unless ref.year.to_s.empty?
74
+
75
+ alist = ref.authors.collect do |x|
76
+ y = x.to_s.strip.split(/\, *([^\,]+)\z/)
77
+ y[1].gsub!(/\. +/, '.') if y[1]
78
+ y.join(',')
79
+ end
80
+ lastauthor = alist.pop
81
+ last2author = alist.pop
82
+ alist.each { |x| x.concat ',' }
83
+ alist.push last2author if last2author
84
+ alist.push "and" unless alist.empty?
85
+ alist.push lastauthor.to_s
86
+ result = <<__END_OF_REFERENCE__
87
+ REFERENCE #{ genbank_wrap(sprintf('%-2d%s', num, pos))}
88
+ AUTHORS #{ genbank_wrap_words(alist) }
89
+ TITLE #{ genbank_wrap(ref.title.to_s) }
90
+ JOURNAL #{ genbank_wrap(journal) }
91
+ __END_OF_REFERENCE__
92
+ unless ref.pubmed.to_s.empty? then
93
+ result.concat " PUBMED #{ genbank_wrap(ref.pubmed) }\n"
94
+ end
95
+ if ref.comments and !(ref.comments.empty?) then
96
+ ref.comments.each do |c|
97
+ result.concat " REMARK #{ genbank_wrap(c) }\n"
98
+ end
99
+ end
100
+ result
101
+ end
102
+
103
+ # formats comments lines as GenBank
104
+ def comments_format_genbank(cmnts)
105
+ return '' if !cmnts or cmnts.empty?
106
+ cmnts = [ cmnts ] unless cmnts.kind_of?(Array)
107
+ a = []
108
+ cmnts.each do |str|
109
+ a.push "COMMENT #{ genbank_wrap(str) }\n"
110
+ end
111
+ a.join('')
112
+ end
113
+
114
+ # formats sequence lines as GenBank
115
+ def seq_format_genbank(str)
116
+ i = 1
117
+ result = str.gsub(/.{1,60}/) do |s|
118
+ s = s.gsub(/.{1,10}/, ' \0')
119
+ y = sprintf("%9d%s\n", i, s)
120
+ i += 60
121
+ y
122
+ end
123
+ result
124
+ end
125
+
126
+ # formats date
127
+ def date_format_genbank
128
+ date_modified || date_created || null_date
129
+ end
130
+
131
+ # moleculue type
132
+ def mol_type_genbank
133
+ if /(DNA|(t|r|m|u|sn|sno)?RNA)/i =~ molecule_type.to_s then
134
+ $1.sub(/[DR]NA/) { |x| x.upcase }
135
+ else
136
+ 'NA'
137
+ end
138
+ end
139
+
140
+ # NCBI GI number
141
+ def ncbi_gi_number
142
+ ids = other_seqids
143
+ if ids and r = ids.find { |x| x.database == 'GI' } then
144
+ r.id
145
+ else
146
+ nil
147
+ end
148
+ end
149
+
150
+ # strandedness
151
+ def strandedness_genbank
152
+ return nil unless strandedness
153
+ case strandedness
154
+ when 'single'; 'ss-';
155
+ when 'double'; 'ds-';
156
+ when 'mixed'; 'ms-';
157
+ else; nil
158
+ end
159
+ end
160
+
161
+ # Erb template of GenBank format for Bio::Sequence
162
+ erb_template <<'__END_OF_TEMPLATE__'
163
+ LOCUS <%= sprintf("%-16s", entry_id) %> <%= sprintf("%11d", length) %> bp <%= sprintf("%3s", strandedness_genbank) %><%= sprintf("%-6s", mol_type_genbank) %> <%= sprintf("%-8s", topology) %><%= sprintf("%4s", division) %> <%= date_format_genbank %>
164
+ DEFINITION <%= genbank_wrap_dot(definition.to_s) %>
165
+ ACCESSION <%= genbank_wrap(([ primary_accession ] + (secondary_accessions or [])).join(" ")) %>
166
+ VERSION <%= primary_accession %>.<%= sequence_version %><% if gi = ncbi_gi_number then %> GI:<%= gi %><% end %>
167
+ KEYWORDS <%= genbank_wrap_dot((keywords or []).join('; ')) %>
168
+ SOURCE <%= genbank_wrap(species) %>
169
+ ORGANISM <%= genbank_wrap(species) %>
170
+ <%= genbank_wrap_dot((classification or []).join('; ')) %>
171
+ <%
172
+ n = 0
173
+ (references or []).each do |ref|
174
+ n += 1
175
+ %><%= reference_format_genbank(ref, n) %><%
176
+ end
177
+ %><%= comments_format_genbank(comments)
178
+ %>FEATURES Location/Qualifiers
179
+ <%= format_features_genbank(features || [])
180
+ %>ORIGIN
181
+ <%= seq_format_genbank(seq)
182
+ %>//
183
+ __END_OF_TEMPLATE__
184
+
185
+ end #class Genbank
186
+ end #module Bio::Sequence::Format::NucFormatter
187
+
@@ -4,11 +4,14 @@
4
4
  # Copyright:: Copyright (C) 2000-2005 Toshiaki Katayama <k@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: genbank.rb,v 0.40 2007/04/05 23:35:40 trevor Exp $
7
+ # $Id: genbank.rb,v 0.40.2.4 2008/06/17 15:56:18 ngoto Exp $
8
8
  #
9
9
 
10
+ require 'date'
10
11
  require 'bio/db'
11
12
  require 'bio/db/genbank/common'
13
+ require 'bio/sequence'
14
+ require 'bio/sequence/dblink'
12
15
 
13
16
  module Bio
14
17
 
@@ -121,10 +124,42 @@ class GenBank < NCBIDB
121
124
  alias naseq seq
122
125
  alias nalen length
123
126
 
127
+ # (obsolete???) length of the sequence
124
128
  def seq_len
125
129
  seq.length
126
130
  end
127
131
 
132
+ # modified date. Returns Date object, String or nil.
133
+ def date_modified
134
+ begin
135
+ Date.parse(self.date)
136
+ rescue ArgumentError, TypeError, NoMethodError, NameError
137
+ self.date
138
+ end
139
+ end
140
+
141
+ # Taxonomy classfication. Returns an array of strings.
142
+ def classification
143
+ self.taxonomy.to_s.sub(/\.\z/, '').split(/\s*\;\s*/)
144
+ end
145
+
146
+ # Strandedness. Returns one of 'single', 'double', 'mixed', or nil.
147
+ def strandedness
148
+ case self.strand.to_s.downcase
149
+ when 'ss-'; 'single'
150
+ when 'ds-'; 'double'
151
+ when 'ms-'; 'mixed'
152
+ else nil; end
153
+ end
154
+
155
+ # converts Bio::GenBank to Bio::Sequence
156
+ # ---
157
+ # *Arguments*:
158
+ # *Returns*:: Bio::Sequence object
159
+ def to_biosequence
160
+ Bio::Sequence.adapter(self, Bio::Sequence::Adapter::GenBank)
161
+ end
162
+
128
163
  end # GenBank
129
164
  end # Bio
130
165