bio 1.2.1 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -9,7 +9,7 @@
9
9
  # Jan Aerts <jan.aerts@bbsrc.ac.uk>
10
10
  # License:: The Ruby License
11
11
  #
12
- # $Id: sequence.rb,v 0.58 2007/04/05 23:35:39 trevor Exp $
12
+ # $Id: sequence.rb,v 0.58.2.12 2008/06/17 15:25:22 ngoto Exp $
13
13
  #
14
14
 
15
15
  require 'bio/sequence/compat'
@@ -70,6 +70,9 @@ class Sequence
70
70
  autoload :AA, 'bio/sequence/aa'
71
71
  autoload :Generic, 'bio/sequence/generic'
72
72
  autoload :Format, 'bio/sequence/format'
73
+ autoload :Adapter, 'bio/sequence/adapter'
74
+
75
+ include Format
73
76
 
74
77
  # Create a new Bio::Sequence object
75
78
  #
@@ -95,71 +98,137 @@ class Sequence
95
98
  # Pass any unknown method calls to the wrapped sequence object. see
96
99
  # http://www.rubycentral.com/book/ref_c_object.html#Object.method_missing
97
100
  def method_missing(sym, *args, &block) #:nodoc:
98
- @seq.send(sym, *args, &block)
101
+ begin
102
+ seq.__send__(sym, *args, &block)
103
+ rescue NoMethodError => evar
104
+ lineno = __LINE__ - 2
105
+ file = __FILE__
106
+ bt_here = [ "#{file}:#{lineno}:in \`__send__\'",
107
+ "#{file}:#{lineno}:in \`method_missing\'"
108
+ ]
109
+ if bt_here == evar.backtrace[0, 2] then
110
+ bt = evar.backtrace[2..-1]
111
+ evar = evar.class.new("undefined method \`#{sym.to_s}\' for #{self.inspect}")
112
+ evar.set_backtrace(bt)
113
+ end
114
+ #p lineno
115
+ #p file
116
+ #p bt_here
117
+ #p evar.backtrace
118
+ raise(evar)
119
+ end
99
120
  end
100
121
 
101
- # The sequence identifier. For example, for a sequence
102
- # of Genbank origin, this is the accession number.
122
+ # The sequence identifier (String). For example, for a sequence
123
+ # of Genbank origin, this is the locus name.
124
+ # For a sequence of EMBL origin, this is the primary accession number.
103
125
  attr_accessor :entry_id
104
126
 
105
- # A String with a description of the sequence
127
+ # A String with a description of the sequence (String)
106
128
  attr_accessor :definition
107
129
 
108
- # An Array of Bio::Feature objects
130
+ # Features (An Array of Bio::Feature objects)
109
131
  attr_accessor :features
110
132
 
111
- # An Array of Bio::Reference objects
133
+ # References (An Array of Bio::Reference objects)
112
134
  attr_accessor :references
113
135
 
114
- # A comment String
136
+ # Comments (String or an Array of String)
115
137
  attr_accessor :comments
116
138
 
117
- # Date from sequence source. Often date of deposition.
118
- attr_accessor :date
119
-
120
- # An Array of Strings
139
+ # Keywords (An Array of String)
121
140
  attr_accessor :keywords
122
141
 
123
- # An Array of Strings; links to other database entries.
142
+ # Links to other database entries.
143
+ # (An Array of Bio::Sequence::DBLink objects)
124
144
  attr_accessor :dblinks
125
-
126
- # A taxonomy String
127
- attr_accessor :taxonomy
128
-
145
+
129
146
  # Bio::Sequence::NA/AA
130
147
  attr_accessor :moltype
131
148
 
132
149
  # The sequence object, usually Bio::Sequence::NA/AA,
133
150
  # but could be a simple String
134
151
  attr_accessor :seq
152
+
153
+ #---
154
+ # Attributes below have been added during BioHackathon2008
155
+ #+++
135
156
 
136
- # Using Bio::Sequence::Format, return a String with the Bio::Sequence
137
- # object formatted in the given style.
138
- #
139
- # Formats currently implemented are: 'fasta', 'genbank', and 'embl'
140
- #
141
- # s = Bio::Sequence.new('atgc')
142
- # puts s.output(:fasta) #=> "> \natgc\n"
143
- #
144
- # The style argument is given as a Ruby
145
- # Symbol(http://www.ruby-doc.org/core/classes/Symbol.html)
146
- # ---
147
- # *Arguments*:
148
- # * (required) _style_: :fasta, :genbank, *or* :embl
149
- # *Returns*:: String object
150
- def output(style)
151
- extend Bio::Sequence::Format
152
- case style
153
- when :fasta
154
- format_fasta
155
- when :gff
156
- format_gff
157
- when :genbank
158
- format_genbank
159
- when :embl
160
- format_embl
161
- end
162
- end
157
+ # Version number of the sequence (String or Integer).
158
+ # Unlike <tt>entry_version</tt>, <tt>sequence_version</tt> will be changed
159
+ # when the submitter of the sequence updates the entry.
160
+ # Normally, the same entry taken from different databases (EMBL, GenBank,
161
+ # and DDBJ) may have the same sequence_version.
162
+ attr_accessor :sequence_version
163
+
164
+ # Topology (String). "circular", "linear", or nil.
165
+ attr_accessor :topology
166
+
167
+ # Strandedness (String). "single" (single-stranded),
168
+ # "double" (double-stranded), "mixed" (mixed-stranded), or nil.
169
+ attr_accessor :strandedness
170
+
171
+ # molecular type (String). "DNA" or "RNA" for nucleotide sequence.
172
+ attr_accessor :molecule_type
173
+
174
+ # Data Class defined by EMBL (String)
175
+ # See http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html#3_1
176
+ attr_accessor :data_class
177
+
178
+ # Taxonomic Division defined by EMBL/GenBank/DDBJ (String)
179
+ # See http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html#3_2
180
+ attr_accessor :division
181
+
182
+ # Primary accession number (String)
183
+ attr_accessor :primary_accession
184
+
185
+ # Secondary accession numbers (Array of String)
186
+ attr_accessor :secondary_accessions
187
+
188
+ # Created date of the sequence entry (Date, DateTime, Time, or String)
189
+ attr_accessor :date_created
190
+
191
+ # Last modified date of the sequence entry (Date, DateTime, Time, or String)
192
+ attr_accessor :date_modified
193
+
194
+ # Release information when created (String)
195
+ attr_accessor :release_created
196
+
197
+ # Release information when last-modified (String)
198
+ attr_accessor :release_modified
199
+
200
+ # Version of the entry (String or Integer).
201
+ # Unlike <tt>sequence_version</tt>, <tt>entry_version</tt> is a database
202
+ # maintainer's internal version number.
203
+ # The version number will be changed when the database maintainer
204
+ # modifies the entry.
205
+ # The same enrty in EMBL, GenBank, and DDBJ may have different
206
+ # entry_version.
207
+ attr_accessor :entry_version
208
+
209
+ # Organism species (String). For example, "Escherichia coli".
210
+ attr_accessor :species
211
+
212
+ # Organism classification, taxonomic classification of the source organism.
213
+ # (Array of String)
214
+ attr_accessor :classification
215
+ alias taxonomy classification
216
+
217
+ # (not well supported) Organelle information (String).
218
+ attr_accessor :organelle
219
+
220
+ # Namespace of the sequence IDs described in entry_id, primary_accession,
221
+ # and secondary_accessions methods (String).
222
+ # For example, 'EMBL', 'GenBank', 'DDBJ', 'RefSeq'.
223
+ attr_accessor :id_namespace
224
+
225
+ # Sequence identifiers which are not described in entry_id,
226
+ # primary_accession,and secondary_accessions methods
227
+ # (Array of Bio::Sequence::DBLink objects).
228
+ # For example, NCBI GI number can be stored.
229
+ # Note that only identifiers of the entry itself should be stored.
230
+ # For database cross references, <tt>dblinks</tt> should be used.
231
+ attr_accessor :other_seqids
163
232
 
164
233
  # Guess the type of sequence, Amino Acid or Nucleic Acid, and create a
165
234
  # new sequence object (Bio::Sequence::AA or Bio::Sequence::NA) on the basis
@@ -174,9 +243,9 @@ class Sequence
174
243
  def auto
175
244
  @moltype = guess
176
245
  if @moltype == NA
177
- @seq = NA.new(@seq)
246
+ @seq = NA.new(seq)
178
247
  else
179
- @seq = AA.new(@seq)
248
+ @seq = AA.new(seq)
180
249
  end
181
250
  end
182
251
 
@@ -236,7 +305,7 @@ class Sequence
236
305
  # * (optional) _index_: Fixnum (default 1)
237
306
  # *Returns*:: Bio::Sequence::NA/AA
238
307
  def guess(threshold = 0.9, length = 10000, index = 0)
239
- str = @seq.to_s[index,length].to_s.extend Bio::Sequence::Common
308
+ str = seq.to_s[index,length].to_s.extend Bio::Sequence::Common
240
309
  cmp = str.composition
241
310
 
242
311
  bases = cmp['A'] + cmp['T'] + cmp['G'] + cmp['C'] + cmp['U'] +
@@ -309,7 +378,7 @@ class Sequence
309
378
  # ---
310
379
  # *Returns*:: Bio::Sequence::NA
311
380
  def na
312
- @seq = NA.new(@seq)
381
+ @seq = NA.new(seq)
313
382
  @moltype = NA
314
383
  end
315
384
 
@@ -330,146 +399,58 @@ class Sequence
330
399
  # ---
331
400
  # *Returns*:: Bio::Sequence::AA
332
401
  def aa
333
- @seq = AA.new(@seq)
402
+ @seq = AA.new(seq)
334
403
  @moltype = AA
335
404
  end
336
-
337
- end # Sequence
338
405
 
406
+ # Create a new Bio::Sequence object from a formatted string
407
+ # (GenBank, EMBL, fasta format, etc.)
408
+ #
409
+ # s = Bio::Sequence.input(str)
410
+ # ---
411
+ # *Arguments*:
412
+ # * (required) _str_: string
413
+ # * (optional) _format_: format specification (class or nil)
414
+ # *Returns*:: Bio::Sequence object
415
+ def self.input(str, format = nil)
416
+ if format then
417
+ klass = format
418
+ else
419
+ klass = Bio::FlatFile::AutoDetect.default.autodetect(str)
420
+ end
421
+ obj = klass.new(str)
422
+ obj.to_biosequence
423
+ end
339
424
 
340
- end # Bio
425
+ # alias of Bio::Sequence.input
426
+ def self.read(str, format = nil)
427
+ input(str, format)
428
+ end
341
429
 
430
+ # accession numbers of the sequence
431
+ #
432
+ # *Returns*:: Array of String
433
+ def accessions
434
+ [ primary_accession, secondary_accessions ].flatten.compact
435
+ end
436
+
437
+ # Normally, users should not call this method directly.
438
+ # Use Bio::*#to_biosequence (e.g. Bio::GenBank#to_biosequence).
439
+ #
440
+ # Creates a new Bio::Sequence object from database data with an
441
+ # adapter module.
442
+ def self.adapter(source_data, adapter_module)
443
+ biosequence = self.new(nil)
444
+ biosequence.instance_eval {
445
+ remove_instance_variable(:@seq)
446
+ @source_data = source_data
447
+ }
448
+ biosequence.extend(adapter_module)
449
+ biosequence
450
+ end
451
+
452
+ end # Sequence
342
453
 
343
- if __FILE__ == $0
344
-
345
- puts "== Test Bio::Sequence::NA.new"
346
- p Bio::Sequence::NA.new('')
347
- p na = Bio::Sequence::NA.new('atgcatgcATGCATGCAAAA')
348
- p rna = Bio::Sequence::NA.new('augcaugcaugcaugcaaaa')
349
-
350
- puts "\n== Test Bio::Sequence::AA.new"
351
- p Bio::Sequence::AA.new('')
352
- p aa = Bio::Sequence::AA.new('ACDEFGHIKLMNPQRSTVWYU')
353
-
354
- puts "\n== Test Bio::Sequence#to_s"
355
- p na.to_s
356
- p aa.to_s
357
-
358
- puts "\n== Test Bio::Sequence#subseq(2,6)"
359
- p na
360
- p na.subseq(2,6)
361
-
362
- puts "\n== Test Bio::Sequence#[2,6]"
363
- p na
364
- p na[2,6]
365
-
366
- puts "\n== Test Bio::Sequence#to_fasta('hoge', 8)"
367
- puts na.to_fasta('hoge', 8)
368
-
369
- puts "\n== Test Bio::Sequence#window_search(15)"
370
- p na
371
- na.window_search(15) {|x| p x}
372
-
373
- puts "\n== Test Bio::Sequence#total({'a'=>0.1,'t'=>0.2,'g'=>0.3,'c'=>0.4})"
374
- p na.total({'a'=>0.1,'t'=>0.2,'g'=>0.3,'c'=>0.4})
375
-
376
- puts "\n== Test Bio::Sequence#composition"
377
- p na
378
- p na.composition
379
- p rna
380
- p rna.composition
381
-
382
- puts "\n== Test Bio::Sequence::NA#splicing('complement(join(1..5,16..20))')"
383
- p na
384
- p na.splicing("complement(join(1..5,16..20))")
385
- p rna
386
- p rna.splicing("complement(join(1..5,16..20))")
387
-
388
- puts "\n== Test Bio::Sequence::NA#complement"
389
- p na.complement
390
- p rna.complement
391
- p Bio::Sequence::NA.new('tacgyrkmhdbvswn').complement
392
- p Bio::Sequence::NA.new('uacgyrkmhdbvswn').complement
393
-
394
- puts "\n== Test Bio::Sequence::NA#translate"
395
- p na
396
- p na.translate
397
- p rna
398
- p rna.translate
399
-
400
- puts "\n== Test Bio::Sequence::NA#gc_percent"
401
- p na.gc_percent
402
- p rna.gc_percent
403
-
404
- puts "\n== Test Bio::Sequence::NA#illegal_bases"
405
- p na.illegal_bases
406
- p Bio::Sequence::NA.new('tacgyrkmhdbvswn').illegal_bases
407
- p Bio::Sequence::NA.new('abcdefghijklmnopqrstuvwxyz-!%#$@').illegal_bases
408
-
409
- puts "\n== Test Bio::Sequence::NA#molecular_weight"
410
- p na
411
- p na.molecular_weight
412
- p rna
413
- p rna.molecular_weight
414
-
415
- puts "\n== Test Bio::Sequence::NA#to_re"
416
- p Bio::Sequence::NA.new('atgcrymkdhvbswn')
417
- p Bio::Sequence::NA.new('atgcrymkdhvbswn').to_re
418
- p Bio::Sequence::NA.new('augcrymkdhvbswn')
419
- p Bio::Sequence::NA.new('augcrymkdhvbswn').to_re
420
-
421
- puts "\n== Test Bio::Sequence::NA#names"
422
- p na.names
423
-
424
- puts "\n== Test Bio::Sequence::NA#pikachu"
425
- p na.pikachu
426
-
427
- puts "\n== Test Bio::Sequence::NA#randomize"
428
- print "Orig : "; p na
429
- print "Rand : "; p na.randomize
430
- print "Rand : "; p na.randomize
431
- print "Rand : "; p na.randomize.randomize
432
- print "Block : "; na.randomize do |x| print x end; puts
433
-
434
- print "Orig : "; p rna
435
- print "Rand : "; p rna.randomize
436
- print "Rand : "; p rna.randomize
437
- print "Rand : "; p rna.randomize.randomize
438
- print "Block : "; rna.randomize do |x| print x end; puts
439
-
440
- puts "\n== Test Bio::Sequence::NA.randomize(counts)"
441
- print "Count : "; p counts = {'a'=>10,'c'=>20,'g'=>30,'t'=>40}
442
- print "Rand : "; p Bio::Sequence::NA.randomize(counts)
443
- print "Count : "; p counts = {'a'=>10,'c'=>20,'g'=>30,'u'=>40}
444
- print "Rand : "; p Bio::Sequence::NA.randomize(counts)
445
- print "Block : "; Bio::Sequence::NA.randomize(counts) {|x| print x}; puts
446
-
447
- puts "\n== Test Bio::Sequence::AA#codes"
448
- p aa
449
- p aa.codes
450
-
451
- puts "\n== Test Bio::Sequence::AA#names"
452
- p aa
453
- p aa.names
454
-
455
- puts "\n== Test Bio::Sequence::AA#molecular_weight"
456
- p aa.subseq(1,20)
457
- p aa.subseq(1,20).molecular_weight
458
-
459
- puts "\n== Test Bio::Sequence::AA#randomize"
460
- aaseq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA'
461
- s = Bio::Sequence::AA.new(aaseq)
462
- print "Orig : "; p s
463
- print "Rand : "; p s.randomize
464
- print "Rand : "; p s.randomize
465
- print "Rand : "; p s.randomize.randomize
466
- print "Block : "; s.randomize {|x| print x}; puts
467
-
468
- puts "\n== Test Bio::Sequence::AA.randomize(counts)"
469
- print "Count : "; p counts = s.composition
470
- print "Rand : "; puts Bio::Sequence::AA.randomize(counts)
471
- print "Block : "; Bio::Sequence::AA.randomize(counts) {|x| print x}; puts
472
-
473
- end
474
454
 
455
+ end # Bio
475
456
 
@@ -0,0 +1,108 @@
1
+ #
2
+ # = bio/sequence/adapter.rb - Bio::Sequence adapter helper module
3
+ #
4
+ # Copyright:: Copyright (C) 2008
5
+ # Naohisa Goto <ng@bioruby.org>,
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id:$
9
+ #
10
+
11
+ require 'bio/sequence'
12
+
13
+ # Internal use only. Normal users should not use this module.
14
+ #
15
+ # Helper methods for defining adapters used when converting data classes to
16
+ # Bio::Sequence class, with pseudo lazy evaluation and pseudo memoization.
17
+ #
18
+ # This module is used by using "extend", not "include".
19
+ #
20
+ module Bio::Sequence::Adapter
21
+
22
+ autoload :GenBank, 'bio/db/genbank/genbank_to_biosequence'
23
+ autoload :EMBL, 'bio/db/embl/embl_to_biosequence'
24
+ autoload :FastaFormat, 'bio/db/fasta/fasta_to_biosequence'
25
+ autoload :BioSQL, 'bio/db/biosql/biosql_to_biosequence'
26
+
27
+ private
28
+
29
+ # Defines a reader attribute method with psudo lazy evaluation/memoization.
30
+ #
31
+ # It defines a method <i>name</i> like attr_reader, but at the first time
32
+ # when the method <i>name</i> is called, it acts as follows:
33
+ # When instance variable @<i>name</i> is not defined,
34
+ # calls <tt>__get__<i>name</i>(@source_data)</tt> and stores the returned
35
+ # value to @<i>name</i>, and changes its behavior to the same as
36
+ # <tt>attr_reader </tt><i>:name</i>.
37
+ # When instance variable @name is already defined,
38
+ # its behavior is changed to the same as
39
+ # <tt>attr_reader </tt><i>:name</i>.
40
+ # When the object is frozen, storing to the instance variable and
41
+ # changing methods behavior do not occur, and the value of
42
+ # <tt>__get__<i>name</i>(@source_data)</tt> is returned.
43
+ #
44
+ # Note that it assumes that the source data object is stored in
45
+ # @source_data instance variable.
46
+ def attr_reader_lazy(name)
47
+ #$stderr.puts "attr_reader_lazy :#{name}"
48
+ varname = "@#{name}".intern
49
+ methodname = "__get__#{name}".intern
50
+
51
+ # module to reset method's behavior to normal attr_reader
52
+ reset = "Attr_#{name}".intern
53
+ const_set(reset, Module.new { attr_reader name })
54
+ reset_module_name = "#{self}::#{reset}"
55
+
56
+ # define attr method
57
+ module_eval <<__END_OF_DEF__
58
+ def #{name}
59
+ unless defined? #{varname} then
60
+ #$stderr.puts "LAZY #{name}: calling #{methodname}"
61
+ val = #{methodname}(@source_data)
62
+ #{varname} = val unless frozen?
63
+ else
64
+ val = #{varname}
65
+ end
66
+ unless frozen? then
67
+ #$stderr.puts "LAZY #{name}: finalize: attr_reader :#{name}"
68
+ self.extend(#{reset_module_name})
69
+ end
70
+ val
71
+ end
72
+ __END_OF_DEF__
73
+ end
74
+
75
+ # Defines a Bio::Sequence to Bio::* adapter method with
76
+ # psudo lazy evaluation and psudo memoization.
77
+ #
78
+ # Without block, defines a private method <tt>__get__<i>name</i>(orig)</tt>
79
+ # which calls <i>source_method</i> for @source_data.
80
+ #
81
+ # def__get__(name, source_method) is the same as:
82
+ # def __get__name(orig); orig.source_method; end
83
+ # attr_reader_lazy name
84
+ #
85
+ # If block is given, <tt>__get__<i>name</i>(orig)</tt> is defined
86
+ # with the block. The @source_data is given as an argument of the block,
87
+ # i.e. the block must get an argument.
88
+ #
89
+ def def_biosequence_adapter(name, source_method = name, &block)
90
+ methodname = "__get__#{name}".intern
91
+
92
+ if block then
93
+ define_method(methodname, block)
94
+ else
95
+ module_eval <<__END_OF_DEF__
96
+ def #{methodname}(orig)
97
+ orig.#{source_method}
98
+ end
99
+ __END_OF_DEF__
100
+ end
101
+ private methodname
102
+ attr_reader_lazy name
103
+ true
104
+ end
105
+
106
+ end #module Bio::Sequence::Adapter
107
+
108
+