bio 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +3421 -0
- data/KNOWN_ISSUES.rdoc +88 -0
- data/README.rdoc +252 -0
- data/README_DEV.rdoc +285 -0
- data/Rakefile +143 -0
- data/bin/bioruby +0 -0
- data/bin/br_biofetch.rb +0 -0
- data/bin/br_bioflat.rb +12 -1
- data/bin/br_biogetseq.rb +0 -0
- data/bin/br_pmfetch.rb +4 -3
- data/bioruby.gemspec +477 -0
- data/bioruby.gemspec.erb +117 -0
- data/doc/Changes-0.7.rd +7 -0
- data/doc/Changes-1.3.rdoc +239 -0
- data/doc/Tutorial.rd +296 -184
- data/doc/Tutorial.rd.html +1031 -0
- data/doc/Tutorial.rd.ja +111 -45
- data/doc/Tutorial.rd.ja.html +2225 -0
- data/doc/bioruby.css +281 -0
- data/extconf.rb +2 -0
- data/lib/bio.rb +29 -4
- data/lib/bio/appl/blast.rb +306 -121
- data/lib/bio/appl/blast/ddbj.rb +142 -0
- data/lib/bio/appl/blast/format0.rb +35 -25
- data/lib/bio/appl/blast/format8.rb +2 -2
- data/lib/bio/appl/blast/genomenet.rb +263 -0
- data/lib/bio/appl/blast/ncbioptions.rb +220 -0
- data/lib/bio/appl/blast/remote.rb +106 -0
- data/lib/bio/appl/blast/report.rb +260 -9
- data/lib/bio/appl/blast/rexml.rb +12 -5
- data/lib/bio/appl/blast/rpsblast.rb +277 -0
- data/lib/bio/appl/blast/wublast.rb +133 -12
- data/lib/bio/appl/blast/xmlparser.rb +35 -18
- data/lib/bio/appl/blat/report.rb +46 -5
- data/lib/bio/appl/emboss.rb +62 -13
- data/lib/bio/appl/fasta.rb +9 -11
- data/lib/bio/appl/genscan/report.rb +3 -3
- data/lib/bio/appl/hmmer.rb +1 -1
- data/lib/bio/appl/hmmer/report.rb +10 -10
- data/lib/bio/appl/paml/baseml.rb +95 -0
- data/lib/bio/appl/paml/baseml/report.rb +32 -0
- data/lib/bio/appl/paml/codeml.rb +242 -0
- data/lib/bio/appl/paml/codeml/rates.rb +67 -0
- data/lib/bio/appl/paml/codeml/report.rb +67 -0
- data/lib/bio/appl/paml/common.rb +348 -0
- data/lib/bio/appl/paml/common_report.rb +38 -0
- data/lib/bio/appl/paml/yn00.rb +103 -0
- data/lib/bio/appl/paml/yn00/report.rb +32 -0
- data/lib/bio/appl/psort.rb +2 -2
- data/lib/bio/appl/pts1.rb +5 -5
- data/lib/bio/appl/tmhmm/report.rb +10 -1
- data/lib/bio/command.rb +297 -41
- data/lib/bio/compat/features.rb +157 -0
- data/lib/bio/compat/references.rb +128 -0
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
- data/lib/bio/db/biosql/sequence.rb +508 -0
- data/lib/bio/db/embl/common.rb +28 -12
- data/lib/bio/db/embl/embl.rb +107 -9
- data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
- data/lib/bio/db/embl/format_embl.rb +190 -0
- data/lib/bio/db/embl/sptr.rb +15 -16
- data/lib/bio/db/fantom.rb +6 -8
- data/lib/bio/db/fasta.rb +10 -507
- data/lib/bio/db/fasta/defline.rb +532 -0
- data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
- data/lib/bio/db/fasta/format_fasta.rb +97 -0
- data/lib/bio/db/genbank/common.rb +25 -8
- data/lib/bio/db/genbank/format_genbank.rb +187 -0
- data/lib/bio/db/genbank/genbank.rb +36 -1
- data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
- data/lib/bio/db/gff.rb +1791 -119
- data/lib/bio/db/kegg/glycan.rb +2 -6
- data/lib/bio/db/lasergene.rb +3 -3
- data/lib/bio/db/medline.rb +4 -1
- data/lib/bio/db/newick.rb +10 -10
- data/lib/bio/db/pdb/chain.rb +6 -2
- data/lib/bio/db/pdb/pdb.rb +12 -3
- data/lib/bio/db/rebase.rb +7 -8
- data/lib/bio/db/soft.rb +3 -3
- data/lib/bio/feature.rb +1 -88
- data/lib/bio/io/biosql/biodatabase.rb +64 -0
- data/lib/bio/io/biosql/bioentry.rb +29 -0
- data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
- data/lib/bio/io/biosql/bioentry_path.rb +12 -0
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
- data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
- data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
- data/lib/bio/io/biosql/biosequence.rb +11 -0
- data/lib/bio/io/biosql/comment.rb +7 -0
- data/lib/bio/io/biosql/config/database.yml +20 -0
- data/lib/bio/io/biosql/dbxref.rb +13 -0
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
- data/lib/bio/io/biosql/location.rb +32 -0
- data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
- data/lib/bio/io/biosql/ontology.rb +10 -0
- data/lib/bio/io/biosql/reference.rb +9 -0
- data/lib/bio/io/biosql/seqfeature.rb +32 -0
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
- data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
- data/lib/bio/io/biosql/taxon.rb +12 -0
- data/lib/bio/io/biosql/taxon_name.rb +9 -0
- data/lib/bio/io/biosql/term.rb +27 -0
- data/lib/bio/io/biosql/term_dbxref.rb +11 -0
- data/lib/bio/io/biosql/term_path.rb +12 -0
- data/lib/bio/io/biosql/term_relationship.rb +13 -0
- data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
- data/lib/bio/io/biosql/term_synonym.rb +10 -0
- data/lib/bio/io/das.rb +7 -7
- data/lib/bio/io/ddbjxml.rb +57 -0
- data/lib/bio/io/ensembl.rb +2 -2
- data/lib/bio/io/fetch.rb +28 -14
- data/lib/bio/io/flatfile.rb +17 -853
- data/lib/bio/io/flatfile/autodetection.rb +545 -0
- data/lib/bio/io/flatfile/buffer.rb +237 -0
- data/lib/bio/io/flatfile/index.rb +17 -7
- data/lib/bio/io/flatfile/indexer.rb +30 -12
- data/lib/bio/io/flatfile/splitter.rb +297 -0
- data/lib/bio/io/hinv.rb +442 -0
- data/lib/bio/io/keggapi.rb +2 -2
- data/lib/bio/io/ncbirest.rb +733 -0
- data/lib/bio/io/pubmed.rb +34 -80
- data/lib/bio/io/registry.rb +2 -2
- data/lib/bio/io/sql.rb +178 -357
- data/lib/bio/io/togows.rb +458 -0
- data/lib/bio/location.rb +106 -11
- data/lib/bio/pathway.rb +120 -14
- data/lib/bio/reference.rb +115 -101
- data/lib/bio/sequence.rb +164 -183
- data/lib/bio/sequence/adapter.rb +108 -0
- data/lib/bio/sequence/common.rb +22 -45
- data/lib/bio/sequence/compat.rb +2 -2
- data/lib/bio/sequence/dblink.rb +54 -0
- data/lib/bio/sequence/format.rb +254 -77
- data/lib/bio/sequence/format_raw.rb +23 -0
- data/lib/bio/shell.rb +3 -1
- data/lib/bio/shell/core.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +33 -4
- data/lib/bio/shell/plugin/ncbirest.rb +64 -0
- data/lib/bio/shell/plugin/togows.rb +40 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
- data/lib/bio/tree.rb +4 -2
- data/lib/bio/util/color_scheme.rb +2 -2
- data/lib/bio/util/contingency_table.rb +2 -2
- data/lib/bio/util/restriction_enzyme.rb +2 -2
- data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
- data/lib/bio/version.rb +25 -0
- data/rdoc.zsh +8 -0
- data/sample/any2fasta.rb +0 -0
- data/sample/biofetch.rb +0 -0
- data/sample/dbget +0 -0
- data/sample/demo_sequence.rb +158 -0
- data/sample/enzymes.rb +0 -0
- data/sample/fasta2tab.rb +0 -0
- data/sample/fastagrep.rb +72 -0
- data/sample/fastasort.rb +54 -0
- data/sample/fsplit.rb +0 -0
- data/sample/gb2fasta.rb +2 -3
- data/sample/gb2tab.rb +0 -0
- data/sample/gbtab2mysql.rb +0 -0
- data/sample/genes2nuc.rb +0 -0
- data/sample/genes2pep.rb +0 -0
- data/sample/genes2tab.rb +0 -0
- data/sample/genome2rb.rb +0 -0
- data/sample/genome2tab.rb +0 -0
- data/sample/goslim.rb +0 -0
- data/sample/gt2fasta.rb +0 -0
- data/sample/na2aa.rb +34 -0
- data/sample/pmfetch.rb +0 -0
- data/sample/pmsearch.rb +0 -0
- data/sample/ssearch2tab.rb +0 -0
- data/sample/tfastx2tab.rb +0 -0
- data/sample/vs-genes.rb +0 -0
- data/setup.rb +1596 -0
- data/test/data/blast/blastp-multi.m7 +188 -0
- data/test/data/command/echoarg2.bat +1 -0
- data/test/data/paml/codeml/control_file.txt +30 -0
- data/test/data/paml/codeml/output.txt +78 -0
- data/test/data/paml/codeml/rates +217 -0
- data/test/data/rpsblast/misc.rpsblast +193 -0
- data/test/data/soft/GDS100_partial.soft +0 -0
- data/test/data/soft/GSE3457_family_partial.soft +0 -0
- data/test/functional/bio/appl/test_pts1.rb +115 -0
- data/test/functional/bio/io/test_ensembl.rb +123 -80
- data/test/functional/bio/io/test_togows.rb +267 -0
- data/test/functional/bio/sequence/test_output_embl.rb +51 -0
- data/test/functional/bio/test_command.rb +301 -0
- data/test/runner.rb +17 -1
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
- data/test/unit/bio/appl/blast/test_report.rb +753 -35
- data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
- data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
- data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
- data/test/unit/bio/appl/test_blast.rb +135 -4
- data/test/unit/bio/appl/test_fasta.rb +2 -2
- data/test/unit/bio/appl/test_pts1.rb +1 -64
- data/test/unit/bio/db/embl/test_common.rb +15 -15
- data/test/unit/bio/db/embl/test_embl.rb +4 -4
- data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
- data/test/unit/bio/db/embl/test_sptr.rb +38 -1
- data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
- data/test/unit/bio/db/test_gff.rb +1151 -25
- data/test/unit/bio/db/test_medline.rb +127 -0
- data/test/unit/bio/db/test_nexus.rb +5 -1
- data/test/unit/bio/db/test_prosite.rb +4 -4
- data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
- data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
- data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
- data/test/unit/bio/io/test_ddbjxml.rb +8 -3
- data/test/unit/bio/io/test_fastacmd.rb +5 -5
- data/test/unit/bio/io/test_flatfile.rb +357 -106
- data/test/unit/bio/io/test_soapwsdl.rb +2 -2
- data/test/unit/bio/io/test_togows.rb +161 -0
- data/test/unit/bio/sequence/test_common.rb +210 -11
- data/test/unit/bio/sequence/test_compat.rb +3 -3
- data/test/unit/bio/sequence/test_dblink.rb +58 -0
- data/test/unit/bio/sequence/test_na.rb +2 -2
- data/test/unit/bio/test_command.rb +111 -50
- data/test/unit/bio/test_feature.rb +29 -1
- data/test/unit/bio/test_location.rb +566 -6
- data/test/unit/bio/test_pathway.rb +91 -65
- data/test/unit/bio/test_reference.rb +67 -13
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
- metadata +202 -167
- data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
data/lib/bio/sequence.rb
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
# Jan Aerts <jan.aerts@bbsrc.ac.uk>
|
|
10
10
|
# License:: The Ruby License
|
|
11
11
|
#
|
|
12
|
-
# $Id: sequence.rb,v 0.58
|
|
12
|
+
# $Id: sequence.rb,v 0.58.2.12 2008/06/17 15:25:22 ngoto Exp $
|
|
13
13
|
#
|
|
14
14
|
|
|
15
15
|
require 'bio/sequence/compat'
|
|
@@ -70,6 +70,9 @@ class Sequence
|
|
|
70
70
|
autoload :AA, 'bio/sequence/aa'
|
|
71
71
|
autoload :Generic, 'bio/sequence/generic'
|
|
72
72
|
autoload :Format, 'bio/sequence/format'
|
|
73
|
+
autoload :Adapter, 'bio/sequence/adapter'
|
|
74
|
+
|
|
75
|
+
include Format
|
|
73
76
|
|
|
74
77
|
# Create a new Bio::Sequence object
|
|
75
78
|
#
|
|
@@ -95,71 +98,137 @@ class Sequence
|
|
|
95
98
|
# Pass any unknown method calls to the wrapped sequence object. see
|
|
96
99
|
# http://www.rubycentral.com/book/ref_c_object.html#Object.method_missing
|
|
97
100
|
def method_missing(sym, *args, &block) #:nodoc:
|
|
98
|
-
|
|
101
|
+
begin
|
|
102
|
+
seq.__send__(sym, *args, &block)
|
|
103
|
+
rescue NoMethodError => evar
|
|
104
|
+
lineno = __LINE__ - 2
|
|
105
|
+
file = __FILE__
|
|
106
|
+
bt_here = [ "#{file}:#{lineno}:in \`__send__\'",
|
|
107
|
+
"#{file}:#{lineno}:in \`method_missing\'"
|
|
108
|
+
]
|
|
109
|
+
if bt_here == evar.backtrace[0, 2] then
|
|
110
|
+
bt = evar.backtrace[2..-1]
|
|
111
|
+
evar = evar.class.new("undefined method \`#{sym.to_s}\' for #{self.inspect}")
|
|
112
|
+
evar.set_backtrace(bt)
|
|
113
|
+
end
|
|
114
|
+
#p lineno
|
|
115
|
+
#p file
|
|
116
|
+
#p bt_here
|
|
117
|
+
#p evar.backtrace
|
|
118
|
+
raise(evar)
|
|
119
|
+
end
|
|
99
120
|
end
|
|
100
121
|
|
|
101
|
-
# The sequence identifier. For example, for a sequence
|
|
102
|
-
# of Genbank origin, this is the
|
|
122
|
+
# The sequence identifier (String). For example, for a sequence
|
|
123
|
+
# of Genbank origin, this is the locus name.
|
|
124
|
+
# For a sequence of EMBL origin, this is the primary accession number.
|
|
103
125
|
attr_accessor :entry_id
|
|
104
126
|
|
|
105
|
-
# A String with a description of the sequence
|
|
127
|
+
# A String with a description of the sequence (String)
|
|
106
128
|
attr_accessor :definition
|
|
107
129
|
|
|
108
|
-
# An Array of Bio::Feature objects
|
|
130
|
+
# Features (An Array of Bio::Feature objects)
|
|
109
131
|
attr_accessor :features
|
|
110
132
|
|
|
111
|
-
# An Array of Bio::Reference objects
|
|
133
|
+
# References (An Array of Bio::Reference objects)
|
|
112
134
|
attr_accessor :references
|
|
113
135
|
|
|
114
|
-
#
|
|
136
|
+
# Comments (String or an Array of String)
|
|
115
137
|
attr_accessor :comments
|
|
116
138
|
|
|
117
|
-
#
|
|
118
|
-
attr_accessor :date
|
|
119
|
-
|
|
120
|
-
# An Array of Strings
|
|
139
|
+
# Keywords (An Array of String)
|
|
121
140
|
attr_accessor :keywords
|
|
122
141
|
|
|
123
|
-
#
|
|
142
|
+
# Links to other database entries.
|
|
143
|
+
# (An Array of Bio::Sequence::DBLink objects)
|
|
124
144
|
attr_accessor :dblinks
|
|
125
|
-
|
|
126
|
-
# A taxonomy String
|
|
127
|
-
attr_accessor :taxonomy
|
|
128
|
-
|
|
145
|
+
|
|
129
146
|
# Bio::Sequence::NA/AA
|
|
130
147
|
attr_accessor :moltype
|
|
131
148
|
|
|
132
149
|
# The sequence object, usually Bio::Sequence::NA/AA,
|
|
133
150
|
# but could be a simple String
|
|
134
151
|
attr_accessor :seq
|
|
152
|
+
|
|
153
|
+
#---
|
|
154
|
+
# Attributes below have been added during BioHackathon2008
|
|
155
|
+
#+++
|
|
135
156
|
|
|
136
|
-
#
|
|
137
|
-
#
|
|
138
|
-
#
|
|
139
|
-
#
|
|
140
|
-
#
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
#
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
#
|
|
147
|
-
#
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
157
|
+
# Version number of the sequence (String or Integer).
|
|
158
|
+
# Unlike <tt>entry_version</tt>, <tt>sequence_version</tt> will be changed
|
|
159
|
+
# when the submitter of the sequence updates the entry.
|
|
160
|
+
# Normally, the same entry taken from different databases (EMBL, GenBank,
|
|
161
|
+
# and DDBJ) may have the same sequence_version.
|
|
162
|
+
attr_accessor :sequence_version
|
|
163
|
+
|
|
164
|
+
# Topology (String). "circular", "linear", or nil.
|
|
165
|
+
attr_accessor :topology
|
|
166
|
+
|
|
167
|
+
# Strandedness (String). "single" (single-stranded),
|
|
168
|
+
# "double" (double-stranded), "mixed" (mixed-stranded), or nil.
|
|
169
|
+
attr_accessor :strandedness
|
|
170
|
+
|
|
171
|
+
# molecular type (String). "DNA" or "RNA" for nucleotide sequence.
|
|
172
|
+
attr_accessor :molecule_type
|
|
173
|
+
|
|
174
|
+
# Data Class defined by EMBL (String)
|
|
175
|
+
# See http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html#3_1
|
|
176
|
+
attr_accessor :data_class
|
|
177
|
+
|
|
178
|
+
# Taxonomic Division defined by EMBL/GenBank/DDBJ (String)
|
|
179
|
+
# See http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html#3_2
|
|
180
|
+
attr_accessor :division
|
|
181
|
+
|
|
182
|
+
# Primary accession number (String)
|
|
183
|
+
attr_accessor :primary_accession
|
|
184
|
+
|
|
185
|
+
# Secondary accession numbers (Array of String)
|
|
186
|
+
attr_accessor :secondary_accessions
|
|
187
|
+
|
|
188
|
+
# Created date of the sequence entry (Date, DateTime, Time, or String)
|
|
189
|
+
attr_accessor :date_created
|
|
190
|
+
|
|
191
|
+
# Last modified date of the sequence entry (Date, DateTime, Time, or String)
|
|
192
|
+
attr_accessor :date_modified
|
|
193
|
+
|
|
194
|
+
# Release information when created (String)
|
|
195
|
+
attr_accessor :release_created
|
|
196
|
+
|
|
197
|
+
# Release information when last-modified (String)
|
|
198
|
+
attr_accessor :release_modified
|
|
199
|
+
|
|
200
|
+
# Version of the entry (String or Integer).
|
|
201
|
+
# Unlike <tt>sequence_version</tt>, <tt>entry_version</tt> is a database
|
|
202
|
+
# maintainer's internal version number.
|
|
203
|
+
# The version number will be changed when the database maintainer
|
|
204
|
+
# modifies the entry.
|
|
205
|
+
# The same enrty in EMBL, GenBank, and DDBJ may have different
|
|
206
|
+
# entry_version.
|
|
207
|
+
attr_accessor :entry_version
|
|
208
|
+
|
|
209
|
+
# Organism species (String). For example, "Escherichia coli".
|
|
210
|
+
attr_accessor :species
|
|
211
|
+
|
|
212
|
+
# Organism classification, taxonomic classification of the source organism.
|
|
213
|
+
# (Array of String)
|
|
214
|
+
attr_accessor :classification
|
|
215
|
+
alias taxonomy classification
|
|
216
|
+
|
|
217
|
+
# (not well supported) Organelle information (String).
|
|
218
|
+
attr_accessor :organelle
|
|
219
|
+
|
|
220
|
+
# Namespace of the sequence IDs described in entry_id, primary_accession,
|
|
221
|
+
# and secondary_accessions methods (String).
|
|
222
|
+
# For example, 'EMBL', 'GenBank', 'DDBJ', 'RefSeq'.
|
|
223
|
+
attr_accessor :id_namespace
|
|
224
|
+
|
|
225
|
+
# Sequence identifiers which are not described in entry_id,
|
|
226
|
+
# primary_accession,and secondary_accessions methods
|
|
227
|
+
# (Array of Bio::Sequence::DBLink objects).
|
|
228
|
+
# For example, NCBI GI number can be stored.
|
|
229
|
+
# Note that only identifiers of the entry itself should be stored.
|
|
230
|
+
# For database cross references, <tt>dblinks</tt> should be used.
|
|
231
|
+
attr_accessor :other_seqids
|
|
163
232
|
|
|
164
233
|
# Guess the type of sequence, Amino Acid or Nucleic Acid, and create a
|
|
165
234
|
# new sequence object (Bio::Sequence::AA or Bio::Sequence::NA) on the basis
|
|
@@ -174,9 +243,9 @@ class Sequence
|
|
|
174
243
|
def auto
|
|
175
244
|
@moltype = guess
|
|
176
245
|
if @moltype == NA
|
|
177
|
-
@seq = NA.new(
|
|
246
|
+
@seq = NA.new(seq)
|
|
178
247
|
else
|
|
179
|
-
@seq = AA.new(
|
|
248
|
+
@seq = AA.new(seq)
|
|
180
249
|
end
|
|
181
250
|
end
|
|
182
251
|
|
|
@@ -236,7 +305,7 @@ class Sequence
|
|
|
236
305
|
# * (optional) _index_: Fixnum (default 1)
|
|
237
306
|
# *Returns*:: Bio::Sequence::NA/AA
|
|
238
307
|
def guess(threshold = 0.9, length = 10000, index = 0)
|
|
239
|
-
str =
|
|
308
|
+
str = seq.to_s[index,length].to_s.extend Bio::Sequence::Common
|
|
240
309
|
cmp = str.composition
|
|
241
310
|
|
|
242
311
|
bases = cmp['A'] + cmp['T'] + cmp['G'] + cmp['C'] + cmp['U'] +
|
|
@@ -309,7 +378,7 @@ class Sequence
|
|
|
309
378
|
# ---
|
|
310
379
|
# *Returns*:: Bio::Sequence::NA
|
|
311
380
|
def na
|
|
312
|
-
@seq = NA.new(
|
|
381
|
+
@seq = NA.new(seq)
|
|
313
382
|
@moltype = NA
|
|
314
383
|
end
|
|
315
384
|
|
|
@@ -330,146 +399,58 @@ class Sequence
|
|
|
330
399
|
# ---
|
|
331
400
|
# *Returns*:: Bio::Sequence::AA
|
|
332
401
|
def aa
|
|
333
|
-
@seq = AA.new(
|
|
402
|
+
@seq = AA.new(seq)
|
|
334
403
|
@moltype = AA
|
|
335
404
|
end
|
|
336
|
-
|
|
337
|
-
end # Sequence
|
|
338
405
|
|
|
406
|
+
# Create a new Bio::Sequence object from a formatted string
|
|
407
|
+
# (GenBank, EMBL, fasta format, etc.)
|
|
408
|
+
#
|
|
409
|
+
# s = Bio::Sequence.input(str)
|
|
410
|
+
# ---
|
|
411
|
+
# *Arguments*:
|
|
412
|
+
# * (required) _str_: string
|
|
413
|
+
# * (optional) _format_: format specification (class or nil)
|
|
414
|
+
# *Returns*:: Bio::Sequence object
|
|
415
|
+
def self.input(str, format = nil)
|
|
416
|
+
if format then
|
|
417
|
+
klass = format
|
|
418
|
+
else
|
|
419
|
+
klass = Bio::FlatFile::AutoDetect.default.autodetect(str)
|
|
420
|
+
end
|
|
421
|
+
obj = klass.new(str)
|
|
422
|
+
obj.to_biosequence
|
|
423
|
+
end
|
|
339
424
|
|
|
340
|
-
|
|
425
|
+
# alias of Bio::Sequence.input
|
|
426
|
+
def self.read(str, format = nil)
|
|
427
|
+
input(str, format)
|
|
428
|
+
end
|
|
341
429
|
|
|
430
|
+
# accession numbers of the sequence
|
|
431
|
+
#
|
|
432
|
+
# *Returns*:: Array of String
|
|
433
|
+
def accessions
|
|
434
|
+
[ primary_accession, secondary_accessions ].flatten.compact
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
# Normally, users should not call this method directly.
|
|
438
|
+
# Use Bio::*#to_biosequence (e.g. Bio::GenBank#to_biosequence).
|
|
439
|
+
#
|
|
440
|
+
# Creates a new Bio::Sequence object from database data with an
|
|
441
|
+
# adapter module.
|
|
442
|
+
def self.adapter(source_data, adapter_module)
|
|
443
|
+
biosequence = self.new(nil)
|
|
444
|
+
biosequence.instance_eval {
|
|
445
|
+
remove_instance_variable(:@seq)
|
|
446
|
+
@source_data = source_data
|
|
447
|
+
}
|
|
448
|
+
biosequence.extend(adapter_module)
|
|
449
|
+
biosequence
|
|
450
|
+
end
|
|
451
|
+
|
|
452
|
+
end # Sequence
|
|
342
453
|
|
|
343
|
-
if __FILE__ == $0
|
|
344
|
-
|
|
345
|
-
puts "== Test Bio::Sequence::NA.new"
|
|
346
|
-
p Bio::Sequence::NA.new('')
|
|
347
|
-
p na = Bio::Sequence::NA.new('atgcatgcATGCATGCAAAA')
|
|
348
|
-
p rna = Bio::Sequence::NA.new('augcaugcaugcaugcaaaa')
|
|
349
|
-
|
|
350
|
-
puts "\n== Test Bio::Sequence::AA.new"
|
|
351
|
-
p Bio::Sequence::AA.new('')
|
|
352
|
-
p aa = Bio::Sequence::AA.new('ACDEFGHIKLMNPQRSTVWYU')
|
|
353
|
-
|
|
354
|
-
puts "\n== Test Bio::Sequence#to_s"
|
|
355
|
-
p na.to_s
|
|
356
|
-
p aa.to_s
|
|
357
|
-
|
|
358
|
-
puts "\n== Test Bio::Sequence#subseq(2,6)"
|
|
359
|
-
p na
|
|
360
|
-
p na.subseq(2,6)
|
|
361
|
-
|
|
362
|
-
puts "\n== Test Bio::Sequence#[2,6]"
|
|
363
|
-
p na
|
|
364
|
-
p na[2,6]
|
|
365
|
-
|
|
366
|
-
puts "\n== Test Bio::Sequence#to_fasta('hoge', 8)"
|
|
367
|
-
puts na.to_fasta('hoge', 8)
|
|
368
|
-
|
|
369
|
-
puts "\n== Test Bio::Sequence#window_search(15)"
|
|
370
|
-
p na
|
|
371
|
-
na.window_search(15) {|x| p x}
|
|
372
|
-
|
|
373
|
-
puts "\n== Test Bio::Sequence#total({'a'=>0.1,'t'=>0.2,'g'=>0.3,'c'=>0.4})"
|
|
374
|
-
p na.total({'a'=>0.1,'t'=>0.2,'g'=>0.3,'c'=>0.4})
|
|
375
|
-
|
|
376
|
-
puts "\n== Test Bio::Sequence#composition"
|
|
377
|
-
p na
|
|
378
|
-
p na.composition
|
|
379
|
-
p rna
|
|
380
|
-
p rna.composition
|
|
381
|
-
|
|
382
|
-
puts "\n== Test Bio::Sequence::NA#splicing('complement(join(1..5,16..20))')"
|
|
383
|
-
p na
|
|
384
|
-
p na.splicing("complement(join(1..5,16..20))")
|
|
385
|
-
p rna
|
|
386
|
-
p rna.splicing("complement(join(1..5,16..20))")
|
|
387
|
-
|
|
388
|
-
puts "\n== Test Bio::Sequence::NA#complement"
|
|
389
|
-
p na.complement
|
|
390
|
-
p rna.complement
|
|
391
|
-
p Bio::Sequence::NA.new('tacgyrkmhdbvswn').complement
|
|
392
|
-
p Bio::Sequence::NA.new('uacgyrkmhdbvswn').complement
|
|
393
|
-
|
|
394
|
-
puts "\n== Test Bio::Sequence::NA#translate"
|
|
395
|
-
p na
|
|
396
|
-
p na.translate
|
|
397
|
-
p rna
|
|
398
|
-
p rna.translate
|
|
399
|
-
|
|
400
|
-
puts "\n== Test Bio::Sequence::NA#gc_percent"
|
|
401
|
-
p na.gc_percent
|
|
402
|
-
p rna.gc_percent
|
|
403
|
-
|
|
404
|
-
puts "\n== Test Bio::Sequence::NA#illegal_bases"
|
|
405
|
-
p na.illegal_bases
|
|
406
|
-
p Bio::Sequence::NA.new('tacgyrkmhdbvswn').illegal_bases
|
|
407
|
-
p Bio::Sequence::NA.new('abcdefghijklmnopqrstuvwxyz-!%#$@').illegal_bases
|
|
408
|
-
|
|
409
|
-
puts "\n== Test Bio::Sequence::NA#molecular_weight"
|
|
410
|
-
p na
|
|
411
|
-
p na.molecular_weight
|
|
412
|
-
p rna
|
|
413
|
-
p rna.molecular_weight
|
|
414
|
-
|
|
415
|
-
puts "\n== Test Bio::Sequence::NA#to_re"
|
|
416
|
-
p Bio::Sequence::NA.new('atgcrymkdhvbswn')
|
|
417
|
-
p Bio::Sequence::NA.new('atgcrymkdhvbswn').to_re
|
|
418
|
-
p Bio::Sequence::NA.new('augcrymkdhvbswn')
|
|
419
|
-
p Bio::Sequence::NA.new('augcrymkdhvbswn').to_re
|
|
420
|
-
|
|
421
|
-
puts "\n== Test Bio::Sequence::NA#names"
|
|
422
|
-
p na.names
|
|
423
|
-
|
|
424
|
-
puts "\n== Test Bio::Sequence::NA#pikachu"
|
|
425
|
-
p na.pikachu
|
|
426
|
-
|
|
427
|
-
puts "\n== Test Bio::Sequence::NA#randomize"
|
|
428
|
-
print "Orig : "; p na
|
|
429
|
-
print "Rand : "; p na.randomize
|
|
430
|
-
print "Rand : "; p na.randomize
|
|
431
|
-
print "Rand : "; p na.randomize.randomize
|
|
432
|
-
print "Block : "; na.randomize do |x| print x end; puts
|
|
433
|
-
|
|
434
|
-
print "Orig : "; p rna
|
|
435
|
-
print "Rand : "; p rna.randomize
|
|
436
|
-
print "Rand : "; p rna.randomize
|
|
437
|
-
print "Rand : "; p rna.randomize.randomize
|
|
438
|
-
print "Block : "; rna.randomize do |x| print x end; puts
|
|
439
|
-
|
|
440
|
-
puts "\n== Test Bio::Sequence::NA.randomize(counts)"
|
|
441
|
-
print "Count : "; p counts = {'a'=>10,'c'=>20,'g'=>30,'t'=>40}
|
|
442
|
-
print "Rand : "; p Bio::Sequence::NA.randomize(counts)
|
|
443
|
-
print "Count : "; p counts = {'a'=>10,'c'=>20,'g'=>30,'u'=>40}
|
|
444
|
-
print "Rand : "; p Bio::Sequence::NA.randomize(counts)
|
|
445
|
-
print "Block : "; Bio::Sequence::NA.randomize(counts) {|x| print x}; puts
|
|
446
|
-
|
|
447
|
-
puts "\n== Test Bio::Sequence::AA#codes"
|
|
448
|
-
p aa
|
|
449
|
-
p aa.codes
|
|
450
|
-
|
|
451
|
-
puts "\n== Test Bio::Sequence::AA#names"
|
|
452
|
-
p aa
|
|
453
|
-
p aa.names
|
|
454
|
-
|
|
455
|
-
puts "\n== Test Bio::Sequence::AA#molecular_weight"
|
|
456
|
-
p aa.subseq(1,20)
|
|
457
|
-
p aa.subseq(1,20).molecular_weight
|
|
458
|
-
|
|
459
|
-
puts "\n== Test Bio::Sequence::AA#randomize"
|
|
460
|
-
aaseq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA'
|
|
461
|
-
s = Bio::Sequence::AA.new(aaseq)
|
|
462
|
-
print "Orig : "; p s
|
|
463
|
-
print "Rand : "; p s.randomize
|
|
464
|
-
print "Rand : "; p s.randomize
|
|
465
|
-
print "Rand : "; p s.randomize.randomize
|
|
466
|
-
print "Block : "; s.randomize {|x| print x}; puts
|
|
467
|
-
|
|
468
|
-
puts "\n== Test Bio::Sequence::AA.randomize(counts)"
|
|
469
|
-
print "Count : "; p counts = s.composition
|
|
470
|
-
print "Rand : "; puts Bio::Sequence::AA.randomize(counts)
|
|
471
|
-
print "Block : "; Bio::Sequence::AA.randomize(counts) {|x| print x}; puts
|
|
472
|
-
|
|
473
|
-
end
|
|
474
454
|
|
|
455
|
+
end # Bio
|
|
475
456
|
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/sequence/adapter.rb - Bio::Sequence adapter helper module
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2008
|
|
5
|
+
# Naohisa Goto <ng@bioruby.org>,
|
|
6
|
+
# License:: The Ruby License
|
|
7
|
+
#
|
|
8
|
+
# $Id:$
|
|
9
|
+
#
|
|
10
|
+
|
|
11
|
+
require 'bio/sequence'
|
|
12
|
+
|
|
13
|
+
# Internal use only. Normal users should not use this module.
|
|
14
|
+
#
|
|
15
|
+
# Helper methods for defining adapters used when converting data classes to
|
|
16
|
+
# Bio::Sequence class, with pseudo lazy evaluation and pseudo memoization.
|
|
17
|
+
#
|
|
18
|
+
# This module is used by using "extend", not "include".
|
|
19
|
+
#
|
|
20
|
+
module Bio::Sequence::Adapter
|
|
21
|
+
|
|
22
|
+
autoload :GenBank, 'bio/db/genbank/genbank_to_biosequence'
|
|
23
|
+
autoload :EMBL, 'bio/db/embl/embl_to_biosequence'
|
|
24
|
+
autoload :FastaFormat, 'bio/db/fasta/fasta_to_biosequence'
|
|
25
|
+
autoload :BioSQL, 'bio/db/biosql/biosql_to_biosequence'
|
|
26
|
+
|
|
27
|
+
private
|
|
28
|
+
|
|
29
|
+
# Defines a reader attribute method with psudo lazy evaluation/memoization.
|
|
30
|
+
#
|
|
31
|
+
# It defines a method <i>name</i> like attr_reader, but at the first time
|
|
32
|
+
# when the method <i>name</i> is called, it acts as follows:
|
|
33
|
+
# When instance variable @<i>name</i> is not defined,
|
|
34
|
+
# calls <tt>__get__<i>name</i>(@source_data)</tt> and stores the returned
|
|
35
|
+
# value to @<i>name</i>, and changes its behavior to the same as
|
|
36
|
+
# <tt>attr_reader </tt><i>:name</i>.
|
|
37
|
+
# When instance variable @name is already defined,
|
|
38
|
+
# its behavior is changed to the same as
|
|
39
|
+
# <tt>attr_reader </tt><i>:name</i>.
|
|
40
|
+
# When the object is frozen, storing to the instance variable and
|
|
41
|
+
# changing methods behavior do not occur, and the value of
|
|
42
|
+
# <tt>__get__<i>name</i>(@source_data)</tt> is returned.
|
|
43
|
+
#
|
|
44
|
+
# Note that it assumes that the source data object is stored in
|
|
45
|
+
# @source_data instance variable.
|
|
46
|
+
def attr_reader_lazy(name)
|
|
47
|
+
#$stderr.puts "attr_reader_lazy :#{name}"
|
|
48
|
+
varname = "@#{name}".intern
|
|
49
|
+
methodname = "__get__#{name}".intern
|
|
50
|
+
|
|
51
|
+
# module to reset method's behavior to normal attr_reader
|
|
52
|
+
reset = "Attr_#{name}".intern
|
|
53
|
+
const_set(reset, Module.new { attr_reader name })
|
|
54
|
+
reset_module_name = "#{self}::#{reset}"
|
|
55
|
+
|
|
56
|
+
# define attr method
|
|
57
|
+
module_eval <<__END_OF_DEF__
|
|
58
|
+
def #{name}
|
|
59
|
+
unless defined? #{varname} then
|
|
60
|
+
#$stderr.puts "LAZY #{name}: calling #{methodname}"
|
|
61
|
+
val = #{methodname}(@source_data)
|
|
62
|
+
#{varname} = val unless frozen?
|
|
63
|
+
else
|
|
64
|
+
val = #{varname}
|
|
65
|
+
end
|
|
66
|
+
unless frozen? then
|
|
67
|
+
#$stderr.puts "LAZY #{name}: finalize: attr_reader :#{name}"
|
|
68
|
+
self.extend(#{reset_module_name})
|
|
69
|
+
end
|
|
70
|
+
val
|
|
71
|
+
end
|
|
72
|
+
__END_OF_DEF__
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Defines a Bio::Sequence to Bio::* adapter method with
|
|
76
|
+
# psudo lazy evaluation and psudo memoization.
|
|
77
|
+
#
|
|
78
|
+
# Without block, defines a private method <tt>__get__<i>name</i>(orig)</tt>
|
|
79
|
+
# which calls <i>source_method</i> for @source_data.
|
|
80
|
+
#
|
|
81
|
+
# def__get__(name, source_method) is the same as:
|
|
82
|
+
# def __get__name(orig); orig.source_method; end
|
|
83
|
+
# attr_reader_lazy name
|
|
84
|
+
#
|
|
85
|
+
# If block is given, <tt>__get__<i>name</i>(orig)</tt> is defined
|
|
86
|
+
# with the block. The @source_data is given as an argument of the block,
|
|
87
|
+
# i.e. the block must get an argument.
|
|
88
|
+
#
|
|
89
|
+
def def_biosequence_adapter(name, source_method = name, &block)
|
|
90
|
+
methodname = "__get__#{name}".intern
|
|
91
|
+
|
|
92
|
+
if block then
|
|
93
|
+
define_method(methodname, block)
|
|
94
|
+
else
|
|
95
|
+
module_eval <<__END_OF_DEF__
|
|
96
|
+
def #{methodname}(orig)
|
|
97
|
+
orig.#{source_method}
|
|
98
|
+
end
|
|
99
|
+
__END_OF_DEF__
|
|
100
|
+
end
|
|
101
|
+
private methodname
|
|
102
|
+
attr_reader_lazy name
|
|
103
|
+
true
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
end #module Bio::Sequence::Adapter
|
|
107
|
+
|
|
108
|
+
|