bio 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -9,7 +9,7 @@
9
9
  # Jan Aerts <jan.aerts@bbsrc.ac.uk>
10
10
  # License:: The Ruby License
11
11
  #
12
- # $Id: sequence.rb,v 0.58 2007/04/05 23:35:39 trevor Exp $
12
+ # $Id: sequence.rb,v 0.58.2.12 2008/06/17 15:25:22 ngoto Exp $
13
13
  #
14
14
 
15
15
  require 'bio/sequence/compat'
@@ -70,6 +70,9 @@ class Sequence
70
70
  autoload :AA, 'bio/sequence/aa'
71
71
  autoload :Generic, 'bio/sequence/generic'
72
72
  autoload :Format, 'bio/sequence/format'
73
+ autoload :Adapter, 'bio/sequence/adapter'
74
+
75
+ include Format
73
76
 
74
77
  # Create a new Bio::Sequence object
75
78
  #
@@ -95,71 +98,137 @@ class Sequence
95
98
  # Pass any unknown method calls to the wrapped sequence object. see
96
99
  # http://www.rubycentral.com/book/ref_c_object.html#Object.method_missing
97
100
  def method_missing(sym, *args, &block) #:nodoc:
98
- @seq.send(sym, *args, &block)
101
+ begin
102
+ seq.__send__(sym, *args, &block)
103
+ rescue NoMethodError => evar
104
+ lineno = __LINE__ - 2
105
+ file = __FILE__
106
+ bt_here = [ "#{file}:#{lineno}:in \`__send__\'",
107
+ "#{file}:#{lineno}:in \`method_missing\'"
108
+ ]
109
+ if bt_here == evar.backtrace[0, 2] then
110
+ bt = evar.backtrace[2..-1]
111
+ evar = evar.class.new("undefined method \`#{sym.to_s}\' for #{self.inspect}")
112
+ evar.set_backtrace(bt)
113
+ end
114
+ #p lineno
115
+ #p file
116
+ #p bt_here
117
+ #p evar.backtrace
118
+ raise(evar)
119
+ end
99
120
  end
100
121
 
101
- # The sequence identifier. For example, for a sequence
102
- # of Genbank origin, this is the accession number.
122
+ # The sequence identifier (String). For example, for a sequence
123
+ # of Genbank origin, this is the locus name.
124
+ # For a sequence of EMBL origin, this is the primary accession number.
103
125
  attr_accessor :entry_id
104
126
 
105
- # A String with a description of the sequence
127
+ # A String with a description of the sequence (String)
106
128
  attr_accessor :definition
107
129
 
108
- # An Array of Bio::Feature objects
130
+ # Features (An Array of Bio::Feature objects)
109
131
  attr_accessor :features
110
132
 
111
- # An Array of Bio::Reference objects
133
+ # References (An Array of Bio::Reference objects)
112
134
  attr_accessor :references
113
135
 
114
- # A comment String
136
+ # Comments (String or an Array of String)
115
137
  attr_accessor :comments
116
138
 
117
- # Date from sequence source. Often date of deposition.
118
- attr_accessor :date
119
-
120
- # An Array of Strings
139
+ # Keywords (An Array of String)
121
140
  attr_accessor :keywords
122
141
 
123
- # An Array of Strings; links to other database entries.
142
+ # Links to other database entries.
143
+ # (An Array of Bio::Sequence::DBLink objects)
124
144
  attr_accessor :dblinks
125
-
126
- # A taxonomy String
127
- attr_accessor :taxonomy
128
-
145
+
129
146
  # Bio::Sequence::NA/AA
130
147
  attr_accessor :moltype
131
148
 
132
149
  # The sequence object, usually Bio::Sequence::NA/AA,
133
150
  # but could be a simple String
134
151
  attr_accessor :seq
152
+
153
+ #---
154
+ # Attributes below have been added during BioHackathon2008
155
+ #+++
135
156
 
136
- # Using Bio::Sequence::Format, return a String with the Bio::Sequence
137
- # object formatted in the given style.
138
- #
139
- # Formats currently implemented are: 'fasta', 'genbank', and 'embl'
140
- #
141
- # s = Bio::Sequence.new('atgc')
142
- # puts s.output(:fasta) #=> "> \natgc\n"
143
- #
144
- # The style argument is given as a Ruby
145
- # Symbol(http://www.ruby-doc.org/core/classes/Symbol.html)
146
- # ---
147
- # *Arguments*:
148
- # * (required) _style_: :fasta, :genbank, *or* :embl
149
- # *Returns*:: String object
150
- def output(style)
151
- extend Bio::Sequence::Format
152
- case style
153
- when :fasta
154
- format_fasta
155
- when :gff
156
- format_gff
157
- when :genbank
158
- format_genbank
159
- when :embl
160
- format_embl
161
- end
162
- end
157
+ # Version number of the sequence (String or Integer).
158
+ # Unlike <tt>entry_version</tt>, <tt>sequence_version</tt> will be changed
159
+ # when the submitter of the sequence updates the entry.
160
+ # Normally, the same entry taken from different databases (EMBL, GenBank,
161
+ # and DDBJ) may have the same sequence_version.
162
+ attr_accessor :sequence_version
163
+
164
+ # Topology (String). "circular", "linear", or nil.
165
+ attr_accessor :topology
166
+
167
+ # Strandedness (String). "single" (single-stranded),
168
+ # "double" (double-stranded), "mixed" (mixed-stranded), or nil.
169
+ attr_accessor :strandedness
170
+
171
+ # molecular type (String). "DNA" or "RNA" for nucleotide sequence.
172
+ attr_accessor :molecule_type
173
+
174
+ # Data Class defined by EMBL (String)
175
+ # See http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html#3_1
176
+ attr_accessor :data_class
177
+
178
+ # Taxonomic Division defined by EMBL/GenBank/DDBJ (String)
179
+ # See http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html#3_2
180
+ attr_accessor :division
181
+
182
+ # Primary accession number (String)
183
+ attr_accessor :primary_accession
184
+
185
+ # Secondary accession numbers (Array of String)
186
+ attr_accessor :secondary_accessions
187
+
188
+ # Created date of the sequence entry (Date, DateTime, Time, or String)
189
+ attr_accessor :date_created
190
+
191
+ # Last modified date of the sequence entry (Date, DateTime, Time, or String)
192
+ attr_accessor :date_modified
193
+
194
+ # Release information when created (String)
195
+ attr_accessor :release_created
196
+
197
+ # Release information when last-modified (String)
198
+ attr_accessor :release_modified
199
+
200
+ # Version of the entry (String or Integer).
201
+ # Unlike <tt>sequence_version</tt>, <tt>entry_version</tt> is a database
202
+ # maintainer's internal version number.
203
+ # The version number will be changed when the database maintainer
204
+ # modifies the entry.
205
+ # The same enrty in EMBL, GenBank, and DDBJ may have different
206
+ # entry_version.
207
+ attr_accessor :entry_version
208
+
209
+ # Organism species (String). For example, "Escherichia coli".
210
+ attr_accessor :species
211
+
212
+ # Organism classification, taxonomic classification of the source organism.
213
+ # (Array of String)
214
+ attr_accessor :classification
215
+ alias taxonomy classification
216
+
217
+ # (not well supported) Organelle information (String).
218
+ attr_accessor :organelle
219
+
220
+ # Namespace of the sequence IDs described in entry_id, primary_accession,
221
+ # and secondary_accessions methods (String).
222
+ # For example, 'EMBL', 'GenBank', 'DDBJ', 'RefSeq'.
223
+ attr_accessor :id_namespace
224
+
225
+ # Sequence identifiers which are not described in entry_id,
226
+ # primary_accession,and secondary_accessions methods
227
+ # (Array of Bio::Sequence::DBLink objects).
228
+ # For example, NCBI GI number can be stored.
229
+ # Note that only identifiers of the entry itself should be stored.
230
+ # For database cross references, <tt>dblinks</tt> should be used.
231
+ attr_accessor :other_seqids
163
232
 
164
233
  # Guess the type of sequence, Amino Acid or Nucleic Acid, and create a
165
234
  # new sequence object (Bio::Sequence::AA or Bio::Sequence::NA) on the basis
@@ -174,9 +243,9 @@ class Sequence
174
243
  def auto
175
244
  @moltype = guess
176
245
  if @moltype == NA
177
- @seq = NA.new(@seq)
246
+ @seq = NA.new(seq)
178
247
  else
179
- @seq = AA.new(@seq)
248
+ @seq = AA.new(seq)
180
249
  end
181
250
  end
182
251
 
@@ -236,7 +305,7 @@ class Sequence
236
305
  # * (optional) _index_: Fixnum (default 1)
237
306
  # *Returns*:: Bio::Sequence::NA/AA
238
307
  def guess(threshold = 0.9, length = 10000, index = 0)
239
- str = @seq.to_s[index,length].to_s.extend Bio::Sequence::Common
308
+ str = seq.to_s[index,length].to_s.extend Bio::Sequence::Common
240
309
  cmp = str.composition
241
310
 
242
311
  bases = cmp['A'] + cmp['T'] + cmp['G'] + cmp['C'] + cmp['U'] +
@@ -309,7 +378,7 @@ class Sequence
309
378
  # ---
310
379
  # *Returns*:: Bio::Sequence::NA
311
380
  def na
312
- @seq = NA.new(@seq)
381
+ @seq = NA.new(seq)
313
382
  @moltype = NA
314
383
  end
315
384
 
@@ -330,146 +399,58 @@ class Sequence
330
399
  # ---
331
400
  # *Returns*:: Bio::Sequence::AA
332
401
  def aa
333
- @seq = AA.new(@seq)
402
+ @seq = AA.new(seq)
334
403
  @moltype = AA
335
404
  end
336
-
337
- end # Sequence
338
405
 
406
+ # Create a new Bio::Sequence object from a formatted string
407
+ # (GenBank, EMBL, fasta format, etc.)
408
+ #
409
+ # s = Bio::Sequence.input(str)
410
+ # ---
411
+ # *Arguments*:
412
+ # * (required) _str_: string
413
+ # * (optional) _format_: format specification (class or nil)
414
+ # *Returns*:: Bio::Sequence object
415
+ def self.input(str, format = nil)
416
+ if format then
417
+ klass = format
418
+ else
419
+ klass = Bio::FlatFile::AutoDetect.default.autodetect(str)
420
+ end
421
+ obj = klass.new(str)
422
+ obj.to_biosequence
423
+ end
339
424
 
340
- end # Bio
425
+ # alias of Bio::Sequence.input
426
+ def self.read(str, format = nil)
427
+ input(str, format)
428
+ end
341
429
 
430
+ # accession numbers of the sequence
431
+ #
432
+ # *Returns*:: Array of String
433
+ def accessions
434
+ [ primary_accession, secondary_accessions ].flatten.compact
435
+ end
436
+
437
+ # Normally, users should not call this method directly.
438
+ # Use Bio::*#to_biosequence (e.g. Bio::GenBank#to_biosequence).
439
+ #
440
+ # Creates a new Bio::Sequence object from database data with an
441
+ # adapter module.
442
+ def self.adapter(source_data, adapter_module)
443
+ biosequence = self.new(nil)
444
+ biosequence.instance_eval {
445
+ remove_instance_variable(:@seq)
446
+ @source_data = source_data
447
+ }
448
+ biosequence.extend(adapter_module)
449
+ biosequence
450
+ end
451
+
452
+ end # Sequence
342
453
 
343
- if __FILE__ == $0
344
-
345
- puts "== Test Bio::Sequence::NA.new"
346
- p Bio::Sequence::NA.new('')
347
- p na = Bio::Sequence::NA.new('atgcatgcATGCATGCAAAA')
348
- p rna = Bio::Sequence::NA.new('augcaugcaugcaugcaaaa')
349
-
350
- puts "\n== Test Bio::Sequence::AA.new"
351
- p Bio::Sequence::AA.new('')
352
- p aa = Bio::Sequence::AA.new('ACDEFGHIKLMNPQRSTVWYU')
353
-
354
- puts "\n== Test Bio::Sequence#to_s"
355
- p na.to_s
356
- p aa.to_s
357
-
358
- puts "\n== Test Bio::Sequence#subseq(2,6)"
359
- p na
360
- p na.subseq(2,6)
361
-
362
- puts "\n== Test Bio::Sequence#[2,6]"
363
- p na
364
- p na[2,6]
365
-
366
- puts "\n== Test Bio::Sequence#to_fasta('hoge', 8)"
367
- puts na.to_fasta('hoge', 8)
368
-
369
- puts "\n== Test Bio::Sequence#window_search(15)"
370
- p na
371
- na.window_search(15) {|x| p x}
372
-
373
- puts "\n== Test Bio::Sequence#total({'a'=>0.1,'t'=>0.2,'g'=>0.3,'c'=>0.4})"
374
- p na.total({'a'=>0.1,'t'=>0.2,'g'=>0.3,'c'=>0.4})
375
-
376
- puts "\n== Test Bio::Sequence#composition"
377
- p na
378
- p na.composition
379
- p rna
380
- p rna.composition
381
-
382
- puts "\n== Test Bio::Sequence::NA#splicing('complement(join(1..5,16..20))')"
383
- p na
384
- p na.splicing("complement(join(1..5,16..20))")
385
- p rna
386
- p rna.splicing("complement(join(1..5,16..20))")
387
-
388
- puts "\n== Test Bio::Sequence::NA#complement"
389
- p na.complement
390
- p rna.complement
391
- p Bio::Sequence::NA.new('tacgyrkmhdbvswn').complement
392
- p Bio::Sequence::NA.new('uacgyrkmhdbvswn').complement
393
-
394
- puts "\n== Test Bio::Sequence::NA#translate"
395
- p na
396
- p na.translate
397
- p rna
398
- p rna.translate
399
-
400
- puts "\n== Test Bio::Sequence::NA#gc_percent"
401
- p na.gc_percent
402
- p rna.gc_percent
403
-
404
- puts "\n== Test Bio::Sequence::NA#illegal_bases"
405
- p na.illegal_bases
406
- p Bio::Sequence::NA.new('tacgyrkmhdbvswn').illegal_bases
407
- p Bio::Sequence::NA.new('abcdefghijklmnopqrstuvwxyz-!%#$@').illegal_bases
408
-
409
- puts "\n== Test Bio::Sequence::NA#molecular_weight"
410
- p na
411
- p na.molecular_weight
412
- p rna
413
- p rna.molecular_weight
414
-
415
- puts "\n== Test Bio::Sequence::NA#to_re"
416
- p Bio::Sequence::NA.new('atgcrymkdhvbswn')
417
- p Bio::Sequence::NA.new('atgcrymkdhvbswn').to_re
418
- p Bio::Sequence::NA.new('augcrymkdhvbswn')
419
- p Bio::Sequence::NA.new('augcrymkdhvbswn').to_re
420
-
421
- puts "\n== Test Bio::Sequence::NA#names"
422
- p na.names
423
-
424
- puts "\n== Test Bio::Sequence::NA#pikachu"
425
- p na.pikachu
426
-
427
- puts "\n== Test Bio::Sequence::NA#randomize"
428
- print "Orig : "; p na
429
- print "Rand : "; p na.randomize
430
- print "Rand : "; p na.randomize
431
- print "Rand : "; p na.randomize.randomize
432
- print "Block : "; na.randomize do |x| print x end; puts
433
-
434
- print "Orig : "; p rna
435
- print "Rand : "; p rna.randomize
436
- print "Rand : "; p rna.randomize
437
- print "Rand : "; p rna.randomize.randomize
438
- print "Block : "; rna.randomize do |x| print x end; puts
439
-
440
- puts "\n== Test Bio::Sequence::NA.randomize(counts)"
441
- print "Count : "; p counts = {'a'=>10,'c'=>20,'g'=>30,'t'=>40}
442
- print "Rand : "; p Bio::Sequence::NA.randomize(counts)
443
- print "Count : "; p counts = {'a'=>10,'c'=>20,'g'=>30,'u'=>40}
444
- print "Rand : "; p Bio::Sequence::NA.randomize(counts)
445
- print "Block : "; Bio::Sequence::NA.randomize(counts) {|x| print x}; puts
446
-
447
- puts "\n== Test Bio::Sequence::AA#codes"
448
- p aa
449
- p aa.codes
450
-
451
- puts "\n== Test Bio::Sequence::AA#names"
452
- p aa
453
- p aa.names
454
-
455
- puts "\n== Test Bio::Sequence::AA#molecular_weight"
456
- p aa.subseq(1,20)
457
- p aa.subseq(1,20).molecular_weight
458
-
459
- puts "\n== Test Bio::Sequence::AA#randomize"
460
- aaseq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA'
461
- s = Bio::Sequence::AA.new(aaseq)
462
- print "Orig : "; p s
463
- print "Rand : "; p s.randomize
464
- print "Rand : "; p s.randomize
465
- print "Rand : "; p s.randomize.randomize
466
- print "Block : "; s.randomize {|x| print x}; puts
467
-
468
- puts "\n== Test Bio::Sequence::AA.randomize(counts)"
469
- print "Count : "; p counts = s.composition
470
- print "Rand : "; puts Bio::Sequence::AA.randomize(counts)
471
- print "Block : "; Bio::Sequence::AA.randomize(counts) {|x| print x}; puts
472
-
473
- end
474
454
 
455
+ end # Bio
475
456
 
@@ -0,0 +1,108 @@
1
+ #
2
+ # = bio/sequence/adapter.rb - Bio::Sequence adapter helper module
3
+ #
4
+ # Copyright:: Copyright (C) 2008
5
+ # Naohisa Goto <ng@bioruby.org>,
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id:$
9
+ #
10
+
11
+ require 'bio/sequence'
12
+
13
+ # Internal use only. Normal users should not use this module.
14
+ #
15
+ # Helper methods for defining adapters used when converting data classes to
16
+ # Bio::Sequence class, with pseudo lazy evaluation and pseudo memoization.
17
+ #
18
+ # This module is used by using "extend", not "include".
19
+ #
20
+ module Bio::Sequence::Adapter
21
+
22
+ autoload :GenBank, 'bio/db/genbank/genbank_to_biosequence'
23
+ autoload :EMBL, 'bio/db/embl/embl_to_biosequence'
24
+ autoload :FastaFormat, 'bio/db/fasta/fasta_to_biosequence'
25
+ autoload :BioSQL, 'bio/db/biosql/biosql_to_biosequence'
26
+
27
+ private
28
+
29
+ # Defines a reader attribute method with psudo lazy evaluation/memoization.
30
+ #
31
+ # It defines a method <i>name</i> like attr_reader, but at the first time
32
+ # when the method <i>name</i> is called, it acts as follows:
33
+ # When instance variable @<i>name</i> is not defined,
34
+ # calls <tt>__get__<i>name</i>(@source_data)</tt> and stores the returned
35
+ # value to @<i>name</i>, and changes its behavior to the same as
36
+ # <tt>attr_reader </tt><i>:name</i>.
37
+ # When instance variable @name is already defined,
38
+ # its behavior is changed to the same as
39
+ # <tt>attr_reader </tt><i>:name</i>.
40
+ # When the object is frozen, storing to the instance variable and
41
+ # changing methods behavior do not occur, and the value of
42
+ # <tt>__get__<i>name</i>(@source_data)</tt> is returned.
43
+ #
44
+ # Note that it assumes that the source data object is stored in
45
+ # @source_data instance variable.
46
+ def attr_reader_lazy(name)
47
+ #$stderr.puts "attr_reader_lazy :#{name}"
48
+ varname = "@#{name}".intern
49
+ methodname = "__get__#{name}".intern
50
+
51
+ # module to reset method's behavior to normal attr_reader
52
+ reset = "Attr_#{name}".intern
53
+ const_set(reset, Module.new { attr_reader name })
54
+ reset_module_name = "#{self}::#{reset}"
55
+
56
+ # define attr method
57
+ module_eval <<__END_OF_DEF__
58
+ def #{name}
59
+ unless defined? #{varname} then
60
+ #$stderr.puts "LAZY #{name}: calling #{methodname}"
61
+ val = #{methodname}(@source_data)
62
+ #{varname} = val unless frozen?
63
+ else
64
+ val = #{varname}
65
+ end
66
+ unless frozen? then
67
+ #$stderr.puts "LAZY #{name}: finalize: attr_reader :#{name}"
68
+ self.extend(#{reset_module_name})
69
+ end
70
+ val
71
+ end
72
+ __END_OF_DEF__
73
+ end
74
+
75
+ # Defines a Bio::Sequence to Bio::* adapter method with
76
+ # psudo lazy evaluation and psudo memoization.
77
+ #
78
+ # Without block, defines a private method <tt>__get__<i>name</i>(orig)</tt>
79
+ # which calls <i>source_method</i> for @source_data.
80
+ #
81
+ # def__get__(name, source_method) is the same as:
82
+ # def __get__name(orig); orig.source_method; end
83
+ # attr_reader_lazy name
84
+ #
85
+ # If block is given, <tt>__get__<i>name</i>(orig)</tt> is defined
86
+ # with the block. The @source_data is given as an argument of the block,
87
+ # i.e. the block must get an argument.
88
+ #
89
+ def def_biosequence_adapter(name, source_method = name, &block)
90
+ methodname = "__get__#{name}".intern
91
+
92
+ if block then
93
+ define_method(methodname, block)
94
+ else
95
+ module_eval <<__END_OF_DEF__
96
+ def #{methodname}(orig)
97
+ orig.#{source_method}
98
+ end
99
+ __END_OF_DEF__
100
+ end
101
+ private methodname
102
+ attr_reader_lazy name
103
+ true
104
+ end
105
+
106
+ end #module Bio::Sequence::Adapter
107
+
108
+