bio 1.2.1 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +3421 -0
- data/KNOWN_ISSUES.rdoc +88 -0
- data/README.rdoc +252 -0
- data/README_DEV.rdoc +285 -0
- data/Rakefile +143 -0
- data/bin/bioruby +0 -0
- data/bin/br_biofetch.rb +0 -0
- data/bin/br_bioflat.rb +12 -1
- data/bin/br_biogetseq.rb +0 -0
- data/bin/br_pmfetch.rb +4 -3
- data/bioruby.gemspec +477 -0
- data/bioruby.gemspec.erb +117 -0
- data/doc/Changes-0.7.rd +7 -0
- data/doc/Changes-1.3.rdoc +239 -0
- data/doc/Tutorial.rd +296 -184
- data/doc/Tutorial.rd.html +1031 -0
- data/doc/Tutorial.rd.ja +111 -45
- data/doc/Tutorial.rd.ja.html +2225 -0
- data/doc/bioruby.css +281 -0
- data/extconf.rb +2 -0
- data/lib/bio.rb +29 -4
- data/lib/bio/appl/blast.rb +306 -121
- data/lib/bio/appl/blast/ddbj.rb +142 -0
- data/lib/bio/appl/blast/format0.rb +35 -25
- data/lib/bio/appl/blast/format8.rb +2 -2
- data/lib/bio/appl/blast/genomenet.rb +263 -0
- data/lib/bio/appl/blast/ncbioptions.rb +220 -0
- data/lib/bio/appl/blast/remote.rb +106 -0
- data/lib/bio/appl/blast/report.rb +260 -9
- data/lib/bio/appl/blast/rexml.rb +12 -5
- data/lib/bio/appl/blast/rpsblast.rb +277 -0
- data/lib/bio/appl/blast/wublast.rb +133 -12
- data/lib/bio/appl/blast/xmlparser.rb +35 -18
- data/lib/bio/appl/blat/report.rb +46 -5
- data/lib/bio/appl/emboss.rb +62 -13
- data/lib/bio/appl/fasta.rb +9 -11
- data/lib/bio/appl/genscan/report.rb +3 -3
- data/lib/bio/appl/hmmer.rb +1 -1
- data/lib/bio/appl/hmmer/report.rb +10 -10
- data/lib/bio/appl/paml/baseml.rb +95 -0
- data/lib/bio/appl/paml/baseml/report.rb +32 -0
- data/lib/bio/appl/paml/codeml.rb +242 -0
- data/lib/bio/appl/paml/codeml/rates.rb +67 -0
- data/lib/bio/appl/paml/codeml/report.rb +67 -0
- data/lib/bio/appl/paml/common.rb +348 -0
- data/lib/bio/appl/paml/common_report.rb +38 -0
- data/lib/bio/appl/paml/yn00.rb +103 -0
- data/lib/bio/appl/paml/yn00/report.rb +32 -0
- data/lib/bio/appl/psort.rb +2 -2
- data/lib/bio/appl/pts1.rb +5 -5
- data/lib/bio/appl/tmhmm/report.rb +10 -1
- data/lib/bio/command.rb +297 -41
- data/lib/bio/compat/features.rb +157 -0
- data/lib/bio/compat/references.rb +128 -0
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
- data/lib/bio/db/biosql/sequence.rb +508 -0
- data/lib/bio/db/embl/common.rb +28 -12
- data/lib/bio/db/embl/embl.rb +107 -9
- data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
- data/lib/bio/db/embl/format_embl.rb +190 -0
- data/lib/bio/db/embl/sptr.rb +15 -16
- data/lib/bio/db/fantom.rb +6 -8
- data/lib/bio/db/fasta.rb +10 -507
- data/lib/bio/db/fasta/defline.rb +532 -0
- data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
- data/lib/bio/db/fasta/format_fasta.rb +97 -0
- data/lib/bio/db/genbank/common.rb +25 -8
- data/lib/bio/db/genbank/format_genbank.rb +187 -0
- data/lib/bio/db/genbank/genbank.rb +36 -1
- data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
- data/lib/bio/db/gff.rb +1791 -119
- data/lib/bio/db/kegg/glycan.rb +2 -6
- data/lib/bio/db/lasergene.rb +3 -3
- data/lib/bio/db/medline.rb +4 -1
- data/lib/bio/db/newick.rb +10 -10
- data/lib/bio/db/pdb/chain.rb +6 -2
- data/lib/bio/db/pdb/pdb.rb +12 -3
- data/lib/bio/db/rebase.rb +7 -8
- data/lib/bio/db/soft.rb +3 -3
- data/lib/bio/feature.rb +1 -88
- data/lib/bio/io/biosql/biodatabase.rb +64 -0
- data/lib/bio/io/biosql/bioentry.rb +29 -0
- data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
- data/lib/bio/io/biosql/bioentry_path.rb +12 -0
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
- data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
- data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
- data/lib/bio/io/biosql/biosequence.rb +11 -0
- data/lib/bio/io/biosql/comment.rb +7 -0
- data/lib/bio/io/biosql/config/database.yml +20 -0
- data/lib/bio/io/biosql/dbxref.rb +13 -0
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
- data/lib/bio/io/biosql/location.rb +32 -0
- data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
- data/lib/bio/io/biosql/ontology.rb +10 -0
- data/lib/bio/io/biosql/reference.rb +9 -0
- data/lib/bio/io/biosql/seqfeature.rb +32 -0
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
- data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
- data/lib/bio/io/biosql/taxon.rb +12 -0
- data/lib/bio/io/biosql/taxon_name.rb +9 -0
- data/lib/bio/io/biosql/term.rb +27 -0
- data/lib/bio/io/biosql/term_dbxref.rb +11 -0
- data/lib/bio/io/biosql/term_path.rb +12 -0
- data/lib/bio/io/biosql/term_relationship.rb +13 -0
- data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
- data/lib/bio/io/biosql/term_synonym.rb +10 -0
- data/lib/bio/io/das.rb +7 -7
- data/lib/bio/io/ddbjxml.rb +57 -0
- data/lib/bio/io/ensembl.rb +2 -2
- data/lib/bio/io/fetch.rb +28 -14
- data/lib/bio/io/flatfile.rb +17 -853
- data/lib/bio/io/flatfile/autodetection.rb +545 -0
- data/lib/bio/io/flatfile/buffer.rb +237 -0
- data/lib/bio/io/flatfile/index.rb +17 -7
- data/lib/bio/io/flatfile/indexer.rb +30 -12
- data/lib/bio/io/flatfile/splitter.rb +297 -0
- data/lib/bio/io/hinv.rb +442 -0
- data/lib/bio/io/keggapi.rb +2 -2
- data/lib/bio/io/ncbirest.rb +733 -0
- data/lib/bio/io/pubmed.rb +34 -80
- data/lib/bio/io/registry.rb +2 -2
- data/lib/bio/io/sql.rb +178 -357
- data/lib/bio/io/togows.rb +458 -0
- data/lib/bio/location.rb +106 -11
- data/lib/bio/pathway.rb +120 -14
- data/lib/bio/reference.rb +115 -101
- data/lib/bio/sequence.rb +164 -183
- data/lib/bio/sequence/adapter.rb +108 -0
- data/lib/bio/sequence/common.rb +22 -45
- data/lib/bio/sequence/compat.rb +2 -2
- data/lib/bio/sequence/dblink.rb +54 -0
- data/lib/bio/sequence/format.rb +254 -77
- data/lib/bio/sequence/format_raw.rb +23 -0
- data/lib/bio/shell.rb +3 -1
- data/lib/bio/shell/core.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +33 -4
- data/lib/bio/shell/plugin/ncbirest.rb +64 -0
- data/lib/bio/shell/plugin/togows.rb +40 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
- data/lib/bio/tree.rb +4 -2
- data/lib/bio/util/color_scheme.rb +2 -2
- data/lib/bio/util/contingency_table.rb +2 -2
- data/lib/bio/util/restriction_enzyme.rb +2 -2
- data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
- data/lib/bio/version.rb +25 -0
- data/rdoc.zsh +8 -0
- data/sample/any2fasta.rb +0 -0
- data/sample/biofetch.rb +0 -0
- data/sample/dbget +0 -0
- data/sample/demo_sequence.rb +158 -0
- data/sample/enzymes.rb +0 -0
- data/sample/fasta2tab.rb +0 -0
- data/sample/fastagrep.rb +72 -0
- data/sample/fastasort.rb +54 -0
- data/sample/fsplit.rb +0 -0
- data/sample/gb2fasta.rb +2 -3
- data/sample/gb2tab.rb +0 -0
- data/sample/gbtab2mysql.rb +0 -0
- data/sample/genes2nuc.rb +0 -0
- data/sample/genes2pep.rb +0 -0
- data/sample/genes2tab.rb +0 -0
- data/sample/genome2rb.rb +0 -0
- data/sample/genome2tab.rb +0 -0
- data/sample/goslim.rb +0 -0
- data/sample/gt2fasta.rb +0 -0
- data/sample/na2aa.rb +34 -0
- data/sample/pmfetch.rb +0 -0
- data/sample/pmsearch.rb +0 -0
- data/sample/ssearch2tab.rb +0 -0
- data/sample/tfastx2tab.rb +0 -0
- data/sample/vs-genes.rb +0 -0
- data/setup.rb +1596 -0
- data/test/data/blast/blastp-multi.m7 +188 -0
- data/test/data/command/echoarg2.bat +1 -0
- data/test/data/paml/codeml/control_file.txt +30 -0
- data/test/data/paml/codeml/output.txt +78 -0
- data/test/data/paml/codeml/rates +217 -0
- data/test/data/rpsblast/misc.rpsblast +193 -0
- data/test/data/soft/GDS100_partial.soft +0 -0
- data/test/data/soft/GSE3457_family_partial.soft +0 -0
- data/test/functional/bio/appl/test_pts1.rb +115 -0
- data/test/functional/bio/io/test_ensembl.rb +123 -80
- data/test/functional/bio/io/test_togows.rb +267 -0
- data/test/functional/bio/sequence/test_output_embl.rb +51 -0
- data/test/functional/bio/test_command.rb +301 -0
- data/test/runner.rb +17 -1
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
- data/test/unit/bio/appl/blast/test_report.rb +753 -35
- data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
- data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
- data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
- data/test/unit/bio/appl/test_blast.rb +135 -4
- data/test/unit/bio/appl/test_fasta.rb +2 -2
- data/test/unit/bio/appl/test_pts1.rb +1 -64
- data/test/unit/bio/db/embl/test_common.rb +15 -15
- data/test/unit/bio/db/embl/test_embl.rb +4 -4
- data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
- data/test/unit/bio/db/embl/test_sptr.rb +38 -1
- data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
- data/test/unit/bio/db/test_gff.rb +1151 -25
- data/test/unit/bio/db/test_medline.rb +127 -0
- data/test/unit/bio/db/test_nexus.rb +5 -1
- data/test/unit/bio/db/test_prosite.rb +4 -4
- data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
- data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
- data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
- data/test/unit/bio/io/test_ddbjxml.rb +8 -3
- data/test/unit/bio/io/test_fastacmd.rb +5 -5
- data/test/unit/bio/io/test_flatfile.rb +357 -106
- data/test/unit/bio/io/test_soapwsdl.rb +2 -2
- data/test/unit/bio/io/test_togows.rb +161 -0
- data/test/unit/bio/sequence/test_common.rb +210 -11
- data/test/unit/bio/sequence/test_compat.rb +3 -3
- data/test/unit/bio/sequence/test_dblink.rb +58 -0
- data/test/unit/bio/sequence/test_na.rb +2 -2
- data/test/unit/bio/test_command.rb +111 -50
- data/test/unit/bio/test_feature.rb +29 -1
- data/test/unit/bio/test_location.rb +566 -6
- data/test/unit/bio/test_pathway.rb +91 -65
- data/test/unit/bio/test_reference.rb +67 -13
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
- metadata +202 -167
- data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
data/lib/bio/sequence.rb
CHANGED
@@ -9,7 +9,7 @@
|
|
9
9
|
# Jan Aerts <jan.aerts@bbsrc.ac.uk>
|
10
10
|
# License:: The Ruby License
|
11
11
|
#
|
12
|
-
# $Id: sequence.rb,v 0.58
|
12
|
+
# $Id: sequence.rb,v 0.58.2.12 2008/06/17 15:25:22 ngoto Exp $
|
13
13
|
#
|
14
14
|
|
15
15
|
require 'bio/sequence/compat'
|
@@ -70,6 +70,9 @@ class Sequence
|
|
70
70
|
autoload :AA, 'bio/sequence/aa'
|
71
71
|
autoload :Generic, 'bio/sequence/generic'
|
72
72
|
autoload :Format, 'bio/sequence/format'
|
73
|
+
autoload :Adapter, 'bio/sequence/adapter'
|
74
|
+
|
75
|
+
include Format
|
73
76
|
|
74
77
|
# Create a new Bio::Sequence object
|
75
78
|
#
|
@@ -95,71 +98,137 @@ class Sequence
|
|
95
98
|
# Pass any unknown method calls to the wrapped sequence object. see
|
96
99
|
# http://www.rubycentral.com/book/ref_c_object.html#Object.method_missing
|
97
100
|
def method_missing(sym, *args, &block) #:nodoc:
|
98
|
-
|
101
|
+
begin
|
102
|
+
seq.__send__(sym, *args, &block)
|
103
|
+
rescue NoMethodError => evar
|
104
|
+
lineno = __LINE__ - 2
|
105
|
+
file = __FILE__
|
106
|
+
bt_here = [ "#{file}:#{lineno}:in \`__send__\'",
|
107
|
+
"#{file}:#{lineno}:in \`method_missing\'"
|
108
|
+
]
|
109
|
+
if bt_here == evar.backtrace[0, 2] then
|
110
|
+
bt = evar.backtrace[2..-1]
|
111
|
+
evar = evar.class.new("undefined method \`#{sym.to_s}\' for #{self.inspect}")
|
112
|
+
evar.set_backtrace(bt)
|
113
|
+
end
|
114
|
+
#p lineno
|
115
|
+
#p file
|
116
|
+
#p bt_here
|
117
|
+
#p evar.backtrace
|
118
|
+
raise(evar)
|
119
|
+
end
|
99
120
|
end
|
100
121
|
|
101
|
-
# The sequence identifier. For example, for a sequence
|
102
|
-
# of Genbank origin, this is the
|
122
|
+
# The sequence identifier (String). For example, for a sequence
|
123
|
+
# of Genbank origin, this is the locus name.
|
124
|
+
# For a sequence of EMBL origin, this is the primary accession number.
|
103
125
|
attr_accessor :entry_id
|
104
126
|
|
105
|
-
# A String with a description of the sequence
|
127
|
+
# A String with a description of the sequence (String)
|
106
128
|
attr_accessor :definition
|
107
129
|
|
108
|
-
# An Array of Bio::Feature objects
|
130
|
+
# Features (An Array of Bio::Feature objects)
|
109
131
|
attr_accessor :features
|
110
132
|
|
111
|
-
# An Array of Bio::Reference objects
|
133
|
+
# References (An Array of Bio::Reference objects)
|
112
134
|
attr_accessor :references
|
113
135
|
|
114
|
-
#
|
136
|
+
# Comments (String or an Array of String)
|
115
137
|
attr_accessor :comments
|
116
138
|
|
117
|
-
#
|
118
|
-
attr_accessor :date
|
119
|
-
|
120
|
-
# An Array of Strings
|
139
|
+
# Keywords (An Array of String)
|
121
140
|
attr_accessor :keywords
|
122
141
|
|
123
|
-
#
|
142
|
+
# Links to other database entries.
|
143
|
+
# (An Array of Bio::Sequence::DBLink objects)
|
124
144
|
attr_accessor :dblinks
|
125
|
-
|
126
|
-
# A taxonomy String
|
127
|
-
attr_accessor :taxonomy
|
128
|
-
|
145
|
+
|
129
146
|
# Bio::Sequence::NA/AA
|
130
147
|
attr_accessor :moltype
|
131
148
|
|
132
149
|
# The sequence object, usually Bio::Sequence::NA/AA,
|
133
150
|
# but could be a simple String
|
134
151
|
attr_accessor :seq
|
152
|
+
|
153
|
+
#---
|
154
|
+
# Attributes below have been added during BioHackathon2008
|
155
|
+
#+++
|
135
156
|
|
136
|
-
#
|
137
|
-
#
|
138
|
-
#
|
139
|
-
#
|
140
|
-
#
|
141
|
-
|
142
|
-
|
143
|
-
#
|
144
|
-
|
145
|
-
|
146
|
-
#
|
147
|
-
#
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
157
|
+
# Version number of the sequence (String or Integer).
|
158
|
+
# Unlike <tt>entry_version</tt>, <tt>sequence_version</tt> will be changed
|
159
|
+
# when the submitter of the sequence updates the entry.
|
160
|
+
# Normally, the same entry taken from different databases (EMBL, GenBank,
|
161
|
+
# and DDBJ) may have the same sequence_version.
|
162
|
+
attr_accessor :sequence_version
|
163
|
+
|
164
|
+
# Topology (String). "circular", "linear", or nil.
|
165
|
+
attr_accessor :topology
|
166
|
+
|
167
|
+
# Strandedness (String). "single" (single-stranded),
|
168
|
+
# "double" (double-stranded), "mixed" (mixed-stranded), or nil.
|
169
|
+
attr_accessor :strandedness
|
170
|
+
|
171
|
+
# molecular type (String). "DNA" or "RNA" for nucleotide sequence.
|
172
|
+
attr_accessor :molecule_type
|
173
|
+
|
174
|
+
# Data Class defined by EMBL (String)
|
175
|
+
# See http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html#3_1
|
176
|
+
attr_accessor :data_class
|
177
|
+
|
178
|
+
# Taxonomic Division defined by EMBL/GenBank/DDBJ (String)
|
179
|
+
# See http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html#3_2
|
180
|
+
attr_accessor :division
|
181
|
+
|
182
|
+
# Primary accession number (String)
|
183
|
+
attr_accessor :primary_accession
|
184
|
+
|
185
|
+
# Secondary accession numbers (Array of String)
|
186
|
+
attr_accessor :secondary_accessions
|
187
|
+
|
188
|
+
# Created date of the sequence entry (Date, DateTime, Time, or String)
|
189
|
+
attr_accessor :date_created
|
190
|
+
|
191
|
+
# Last modified date of the sequence entry (Date, DateTime, Time, or String)
|
192
|
+
attr_accessor :date_modified
|
193
|
+
|
194
|
+
# Release information when created (String)
|
195
|
+
attr_accessor :release_created
|
196
|
+
|
197
|
+
# Release information when last-modified (String)
|
198
|
+
attr_accessor :release_modified
|
199
|
+
|
200
|
+
# Version of the entry (String or Integer).
|
201
|
+
# Unlike <tt>sequence_version</tt>, <tt>entry_version</tt> is a database
|
202
|
+
# maintainer's internal version number.
|
203
|
+
# The version number will be changed when the database maintainer
|
204
|
+
# modifies the entry.
|
205
|
+
# The same enrty in EMBL, GenBank, and DDBJ may have different
|
206
|
+
# entry_version.
|
207
|
+
attr_accessor :entry_version
|
208
|
+
|
209
|
+
# Organism species (String). For example, "Escherichia coli".
|
210
|
+
attr_accessor :species
|
211
|
+
|
212
|
+
# Organism classification, taxonomic classification of the source organism.
|
213
|
+
# (Array of String)
|
214
|
+
attr_accessor :classification
|
215
|
+
alias taxonomy classification
|
216
|
+
|
217
|
+
# (not well supported) Organelle information (String).
|
218
|
+
attr_accessor :organelle
|
219
|
+
|
220
|
+
# Namespace of the sequence IDs described in entry_id, primary_accession,
|
221
|
+
# and secondary_accessions methods (String).
|
222
|
+
# For example, 'EMBL', 'GenBank', 'DDBJ', 'RefSeq'.
|
223
|
+
attr_accessor :id_namespace
|
224
|
+
|
225
|
+
# Sequence identifiers which are not described in entry_id,
|
226
|
+
# primary_accession,and secondary_accessions methods
|
227
|
+
# (Array of Bio::Sequence::DBLink objects).
|
228
|
+
# For example, NCBI GI number can be stored.
|
229
|
+
# Note that only identifiers of the entry itself should be stored.
|
230
|
+
# For database cross references, <tt>dblinks</tt> should be used.
|
231
|
+
attr_accessor :other_seqids
|
163
232
|
|
164
233
|
# Guess the type of sequence, Amino Acid or Nucleic Acid, and create a
|
165
234
|
# new sequence object (Bio::Sequence::AA or Bio::Sequence::NA) on the basis
|
@@ -174,9 +243,9 @@ class Sequence
|
|
174
243
|
def auto
|
175
244
|
@moltype = guess
|
176
245
|
if @moltype == NA
|
177
|
-
@seq = NA.new(
|
246
|
+
@seq = NA.new(seq)
|
178
247
|
else
|
179
|
-
@seq = AA.new(
|
248
|
+
@seq = AA.new(seq)
|
180
249
|
end
|
181
250
|
end
|
182
251
|
|
@@ -236,7 +305,7 @@ class Sequence
|
|
236
305
|
# * (optional) _index_: Fixnum (default 1)
|
237
306
|
# *Returns*:: Bio::Sequence::NA/AA
|
238
307
|
def guess(threshold = 0.9, length = 10000, index = 0)
|
239
|
-
str =
|
308
|
+
str = seq.to_s[index,length].to_s.extend Bio::Sequence::Common
|
240
309
|
cmp = str.composition
|
241
310
|
|
242
311
|
bases = cmp['A'] + cmp['T'] + cmp['G'] + cmp['C'] + cmp['U'] +
|
@@ -309,7 +378,7 @@ class Sequence
|
|
309
378
|
# ---
|
310
379
|
# *Returns*:: Bio::Sequence::NA
|
311
380
|
def na
|
312
|
-
@seq = NA.new(
|
381
|
+
@seq = NA.new(seq)
|
313
382
|
@moltype = NA
|
314
383
|
end
|
315
384
|
|
@@ -330,146 +399,58 @@ class Sequence
|
|
330
399
|
# ---
|
331
400
|
# *Returns*:: Bio::Sequence::AA
|
332
401
|
def aa
|
333
|
-
@seq = AA.new(
|
402
|
+
@seq = AA.new(seq)
|
334
403
|
@moltype = AA
|
335
404
|
end
|
336
|
-
|
337
|
-
end # Sequence
|
338
405
|
|
406
|
+
# Create a new Bio::Sequence object from a formatted string
|
407
|
+
# (GenBank, EMBL, fasta format, etc.)
|
408
|
+
#
|
409
|
+
# s = Bio::Sequence.input(str)
|
410
|
+
# ---
|
411
|
+
# *Arguments*:
|
412
|
+
# * (required) _str_: string
|
413
|
+
# * (optional) _format_: format specification (class or nil)
|
414
|
+
# *Returns*:: Bio::Sequence object
|
415
|
+
def self.input(str, format = nil)
|
416
|
+
if format then
|
417
|
+
klass = format
|
418
|
+
else
|
419
|
+
klass = Bio::FlatFile::AutoDetect.default.autodetect(str)
|
420
|
+
end
|
421
|
+
obj = klass.new(str)
|
422
|
+
obj.to_biosequence
|
423
|
+
end
|
339
424
|
|
340
|
-
|
425
|
+
# alias of Bio::Sequence.input
|
426
|
+
def self.read(str, format = nil)
|
427
|
+
input(str, format)
|
428
|
+
end
|
341
429
|
|
430
|
+
# accession numbers of the sequence
|
431
|
+
#
|
432
|
+
# *Returns*:: Array of String
|
433
|
+
def accessions
|
434
|
+
[ primary_accession, secondary_accessions ].flatten.compact
|
435
|
+
end
|
436
|
+
|
437
|
+
# Normally, users should not call this method directly.
|
438
|
+
# Use Bio::*#to_biosequence (e.g. Bio::GenBank#to_biosequence).
|
439
|
+
#
|
440
|
+
# Creates a new Bio::Sequence object from database data with an
|
441
|
+
# adapter module.
|
442
|
+
def self.adapter(source_data, adapter_module)
|
443
|
+
biosequence = self.new(nil)
|
444
|
+
biosequence.instance_eval {
|
445
|
+
remove_instance_variable(:@seq)
|
446
|
+
@source_data = source_data
|
447
|
+
}
|
448
|
+
biosequence.extend(adapter_module)
|
449
|
+
biosequence
|
450
|
+
end
|
451
|
+
|
452
|
+
end # Sequence
|
342
453
|
|
343
|
-
if __FILE__ == $0
|
344
|
-
|
345
|
-
puts "== Test Bio::Sequence::NA.new"
|
346
|
-
p Bio::Sequence::NA.new('')
|
347
|
-
p na = Bio::Sequence::NA.new('atgcatgcATGCATGCAAAA')
|
348
|
-
p rna = Bio::Sequence::NA.new('augcaugcaugcaugcaaaa')
|
349
|
-
|
350
|
-
puts "\n== Test Bio::Sequence::AA.new"
|
351
|
-
p Bio::Sequence::AA.new('')
|
352
|
-
p aa = Bio::Sequence::AA.new('ACDEFGHIKLMNPQRSTVWYU')
|
353
|
-
|
354
|
-
puts "\n== Test Bio::Sequence#to_s"
|
355
|
-
p na.to_s
|
356
|
-
p aa.to_s
|
357
|
-
|
358
|
-
puts "\n== Test Bio::Sequence#subseq(2,6)"
|
359
|
-
p na
|
360
|
-
p na.subseq(2,6)
|
361
|
-
|
362
|
-
puts "\n== Test Bio::Sequence#[2,6]"
|
363
|
-
p na
|
364
|
-
p na[2,6]
|
365
|
-
|
366
|
-
puts "\n== Test Bio::Sequence#to_fasta('hoge', 8)"
|
367
|
-
puts na.to_fasta('hoge', 8)
|
368
|
-
|
369
|
-
puts "\n== Test Bio::Sequence#window_search(15)"
|
370
|
-
p na
|
371
|
-
na.window_search(15) {|x| p x}
|
372
|
-
|
373
|
-
puts "\n== Test Bio::Sequence#total({'a'=>0.1,'t'=>0.2,'g'=>0.3,'c'=>0.4})"
|
374
|
-
p na.total({'a'=>0.1,'t'=>0.2,'g'=>0.3,'c'=>0.4})
|
375
|
-
|
376
|
-
puts "\n== Test Bio::Sequence#composition"
|
377
|
-
p na
|
378
|
-
p na.composition
|
379
|
-
p rna
|
380
|
-
p rna.composition
|
381
|
-
|
382
|
-
puts "\n== Test Bio::Sequence::NA#splicing('complement(join(1..5,16..20))')"
|
383
|
-
p na
|
384
|
-
p na.splicing("complement(join(1..5,16..20))")
|
385
|
-
p rna
|
386
|
-
p rna.splicing("complement(join(1..5,16..20))")
|
387
|
-
|
388
|
-
puts "\n== Test Bio::Sequence::NA#complement"
|
389
|
-
p na.complement
|
390
|
-
p rna.complement
|
391
|
-
p Bio::Sequence::NA.new('tacgyrkmhdbvswn').complement
|
392
|
-
p Bio::Sequence::NA.new('uacgyrkmhdbvswn').complement
|
393
|
-
|
394
|
-
puts "\n== Test Bio::Sequence::NA#translate"
|
395
|
-
p na
|
396
|
-
p na.translate
|
397
|
-
p rna
|
398
|
-
p rna.translate
|
399
|
-
|
400
|
-
puts "\n== Test Bio::Sequence::NA#gc_percent"
|
401
|
-
p na.gc_percent
|
402
|
-
p rna.gc_percent
|
403
|
-
|
404
|
-
puts "\n== Test Bio::Sequence::NA#illegal_bases"
|
405
|
-
p na.illegal_bases
|
406
|
-
p Bio::Sequence::NA.new('tacgyrkmhdbvswn').illegal_bases
|
407
|
-
p Bio::Sequence::NA.new('abcdefghijklmnopqrstuvwxyz-!%#$@').illegal_bases
|
408
|
-
|
409
|
-
puts "\n== Test Bio::Sequence::NA#molecular_weight"
|
410
|
-
p na
|
411
|
-
p na.molecular_weight
|
412
|
-
p rna
|
413
|
-
p rna.molecular_weight
|
414
|
-
|
415
|
-
puts "\n== Test Bio::Sequence::NA#to_re"
|
416
|
-
p Bio::Sequence::NA.new('atgcrymkdhvbswn')
|
417
|
-
p Bio::Sequence::NA.new('atgcrymkdhvbswn').to_re
|
418
|
-
p Bio::Sequence::NA.new('augcrymkdhvbswn')
|
419
|
-
p Bio::Sequence::NA.new('augcrymkdhvbswn').to_re
|
420
|
-
|
421
|
-
puts "\n== Test Bio::Sequence::NA#names"
|
422
|
-
p na.names
|
423
|
-
|
424
|
-
puts "\n== Test Bio::Sequence::NA#pikachu"
|
425
|
-
p na.pikachu
|
426
|
-
|
427
|
-
puts "\n== Test Bio::Sequence::NA#randomize"
|
428
|
-
print "Orig : "; p na
|
429
|
-
print "Rand : "; p na.randomize
|
430
|
-
print "Rand : "; p na.randomize
|
431
|
-
print "Rand : "; p na.randomize.randomize
|
432
|
-
print "Block : "; na.randomize do |x| print x end; puts
|
433
|
-
|
434
|
-
print "Orig : "; p rna
|
435
|
-
print "Rand : "; p rna.randomize
|
436
|
-
print "Rand : "; p rna.randomize
|
437
|
-
print "Rand : "; p rna.randomize.randomize
|
438
|
-
print "Block : "; rna.randomize do |x| print x end; puts
|
439
|
-
|
440
|
-
puts "\n== Test Bio::Sequence::NA.randomize(counts)"
|
441
|
-
print "Count : "; p counts = {'a'=>10,'c'=>20,'g'=>30,'t'=>40}
|
442
|
-
print "Rand : "; p Bio::Sequence::NA.randomize(counts)
|
443
|
-
print "Count : "; p counts = {'a'=>10,'c'=>20,'g'=>30,'u'=>40}
|
444
|
-
print "Rand : "; p Bio::Sequence::NA.randomize(counts)
|
445
|
-
print "Block : "; Bio::Sequence::NA.randomize(counts) {|x| print x}; puts
|
446
|
-
|
447
|
-
puts "\n== Test Bio::Sequence::AA#codes"
|
448
|
-
p aa
|
449
|
-
p aa.codes
|
450
|
-
|
451
|
-
puts "\n== Test Bio::Sequence::AA#names"
|
452
|
-
p aa
|
453
|
-
p aa.names
|
454
|
-
|
455
|
-
puts "\n== Test Bio::Sequence::AA#molecular_weight"
|
456
|
-
p aa.subseq(1,20)
|
457
|
-
p aa.subseq(1,20).molecular_weight
|
458
|
-
|
459
|
-
puts "\n== Test Bio::Sequence::AA#randomize"
|
460
|
-
aaseq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA'
|
461
|
-
s = Bio::Sequence::AA.new(aaseq)
|
462
|
-
print "Orig : "; p s
|
463
|
-
print "Rand : "; p s.randomize
|
464
|
-
print "Rand : "; p s.randomize
|
465
|
-
print "Rand : "; p s.randomize.randomize
|
466
|
-
print "Block : "; s.randomize {|x| print x}; puts
|
467
|
-
|
468
|
-
puts "\n== Test Bio::Sequence::AA.randomize(counts)"
|
469
|
-
print "Count : "; p counts = s.composition
|
470
|
-
print "Rand : "; puts Bio::Sequence::AA.randomize(counts)
|
471
|
-
print "Block : "; Bio::Sequence::AA.randomize(counts) {|x| print x}; puts
|
472
|
-
|
473
|
-
end
|
474
454
|
|
455
|
+
end # Bio
|
475
456
|
|
@@ -0,0 +1,108 @@
|
|
1
|
+
#
|
2
|
+
# = bio/sequence/adapter.rb - Bio::Sequence adapter helper module
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2008
|
5
|
+
# Naohisa Goto <ng@bioruby.org>,
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
# $Id:$
|
9
|
+
#
|
10
|
+
|
11
|
+
require 'bio/sequence'
|
12
|
+
|
13
|
+
# Internal use only. Normal users should not use this module.
|
14
|
+
#
|
15
|
+
# Helper methods for defining adapters used when converting data classes to
|
16
|
+
# Bio::Sequence class, with pseudo lazy evaluation and pseudo memoization.
|
17
|
+
#
|
18
|
+
# This module is used by using "extend", not "include".
|
19
|
+
#
|
20
|
+
module Bio::Sequence::Adapter
|
21
|
+
|
22
|
+
autoload :GenBank, 'bio/db/genbank/genbank_to_biosequence'
|
23
|
+
autoload :EMBL, 'bio/db/embl/embl_to_biosequence'
|
24
|
+
autoload :FastaFormat, 'bio/db/fasta/fasta_to_biosequence'
|
25
|
+
autoload :BioSQL, 'bio/db/biosql/biosql_to_biosequence'
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
# Defines a reader attribute method with psudo lazy evaluation/memoization.
|
30
|
+
#
|
31
|
+
# It defines a method <i>name</i> like attr_reader, but at the first time
|
32
|
+
# when the method <i>name</i> is called, it acts as follows:
|
33
|
+
# When instance variable @<i>name</i> is not defined,
|
34
|
+
# calls <tt>__get__<i>name</i>(@source_data)</tt> and stores the returned
|
35
|
+
# value to @<i>name</i>, and changes its behavior to the same as
|
36
|
+
# <tt>attr_reader </tt><i>:name</i>.
|
37
|
+
# When instance variable @name is already defined,
|
38
|
+
# its behavior is changed to the same as
|
39
|
+
# <tt>attr_reader </tt><i>:name</i>.
|
40
|
+
# When the object is frozen, storing to the instance variable and
|
41
|
+
# changing methods behavior do not occur, and the value of
|
42
|
+
# <tt>__get__<i>name</i>(@source_data)</tt> is returned.
|
43
|
+
#
|
44
|
+
# Note that it assumes that the source data object is stored in
|
45
|
+
# @source_data instance variable.
|
46
|
+
def attr_reader_lazy(name)
|
47
|
+
#$stderr.puts "attr_reader_lazy :#{name}"
|
48
|
+
varname = "@#{name}".intern
|
49
|
+
methodname = "__get__#{name}".intern
|
50
|
+
|
51
|
+
# module to reset method's behavior to normal attr_reader
|
52
|
+
reset = "Attr_#{name}".intern
|
53
|
+
const_set(reset, Module.new { attr_reader name })
|
54
|
+
reset_module_name = "#{self}::#{reset}"
|
55
|
+
|
56
|
+
# define attr method
|
57
|
+
module_eval <<__END_OF_DEF__
|
58
|
+
def #{name}
|
59
|
+
unless defined? #{varname} then
|
60
|
+
#$stderr.puts "LAZY #{name}: calling #{methodname}"
|
61
|
+
val = #{methodname}(@source_data)
|
62
|
+
#{varname} = val unless frozen?
|
63
|
+
else
|
64
|
+
val = #{varname}
|
65
|
+
end
|
66
|
+
unless frozen? then
|
67
|
+
#$stderr.puts "LAZY #{name}: finalize: attr_reader :#{name}"
|
68
|
+
self.extend(#{reset_module_name})
|
69
|
+
end
|
70
|
+
val
|
71
|
+
end
|
72
|
+
__END_OF_DEF__
|
73
|
+
end
|
74
|
+
|
75
|
+
# Defines a Bio::Sequence to Bio::* adapter method with
|
76
|
+
# psudo lazy evaluation and psudo memoization.
|
77
|
+
#
|
78
|
+
# Without block, defines a private method <tt>__get__<i>name</i>(orig)</tt>
|
79
|
+
# which calls <i>source_method</i> for @source_data.
|
80
|
+
#
|
81
|
+
# def__get__(name, source_method) is the same as:
|
82
|
+
# def __get__name(orig); orig.source_method; end
|
83
|
+
# attr_reader_lazy name
|
84
|
+
#
|
85
|
+
# If block is given, <tt>__get__<i>name</i>(orig)</tt> is defined
|
86
|
+
# with the block. The @source_data is given as an argument of the block,
|
87
|
+
# i.e. the block must get an argument.
|
88
|
+
#
|
89
|
+
def def_biosequence_adapter(name, source_method = name, &block)
|
90
|
+
methodname = "__get__#{name}".intern
|
91
|
+
|
92
|
+
if block then
|
93
|
+
define_method(methodname, block)
|
94
|
+
else
|
95
|
+
module_eval <<__END_OF_DEF__
|
96
|
+
def #{methodname}(orig)
|
97
|
+
orig.#{source_method}
|
98
|
+
end
|
99
|
+
__END_OF_DEF__
|
100
|
+
end
|
101
|
+
private methodname
|
102
|
+
attr_reader_lazy name
|
103
|
+
true
|
104
|
+
end
|
105
|
+
|
106
|
+
end #module Bio::Sequence::Adapter
|
107
|
+
|
108
|
+
|