bio 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +3421 -0
- data/KNOWN_ISSUES.rdoc +88 -0
- data/README.rdoc +252 -0
- data/README_DEV.rdoc +285 -0
- data/Rakefile +143 -0
- data/bin/bioruby +0 -0
- data/bin/br_biofetch.rb +0 -0
- data/bin/br_bioflat.rb +12 -1
- data/bin/br_biogetseq.rb +0 -0
- data/bin/br_pmfetch.rb +4 -3
- data/bioruby.gemspec +477 -0
- data/bioruby.gemspec.erb +117 -0
- data/doc/Changes-0.7.rd +7 -0
- data/doc/Changes-1.3.rdoc +239 -0
- data/doc/Tutorial.rd +296 -184
- data/doc/Tutorial.rd.html +1031 -0
- data/doc/Tutorial.rd.ja +111 -45
- data/doc/Tutorial.rd.ja.html +2225 -0
- data/doc/bioruby.css +281 -0
- data/extconf.rb +2 -0
- data/lib/bio.rb +29 -4
- data/lib/bio/appl/blast.rb +306 -121
- data/lib/bio/appl/blast/ddbj.rb +142 -0
- data/lib/bio/appl/blast/format0.rb +35 -25
- data/lib/bio/appl/blast/format8.rb +2 -2
- data/lib/bio/appl/blast/genomenet.rb +263 -0
- data/lib/bio/appl/blast/ncbioptions.rb +220 -0
- data/lib/bio/appl/blast/remote.rb +106 -0
- data/lib/bio/appl/blast/report.rb +260 -9
- data/lib/bio/appl/blast/rexml.rb +12 -5
- data/lib/bio/appl/blast/rpsblast.rb +277 -0
- data/lib/bio/appl/blast/wublast.rb +133 -12
- data/lib/bio/appl/blast/xmlparser.rb +35 -18
- data/lib/bio/appl/blat/report.rb +46 -5
- data/lib/bio/appl/emboss.rb +62 -13
- data/lib/bio/appl/fasta.rb +9 -11
- data/lib/bio/appl/genscan/report.rb +3 -3
- data/lib/bio/appl/hmmer.rb +1 -1
- data/lib/bio/appl/hmmer/report.rb +10 -10
- data/lib/bio/appl/paml/baseml.rb +95 -0
- data/lib/bio/appl/paml/baseml/report.rb +32 -0
- data/lib/bio/appl/paml/codeml.rb +242 -0
- data/lib/bio/appl/paml/codeml/rates.rb +67 -0
- data/lib/bio/appl/paml/codeml/report.rb +67 -0
- data/lib/bio/appl/paml/common.rb +348 -0
- data/lib/bio/appl/paml/common_report.rb +38 -0
- data/lib/bio/appl/paml/yn00.rb +103 -0
- data/lib/bio/appl/paml/yn00/report.rb +32 -0
- data/lib/bio/appl/psort.rb +2 -2
- data/lib/bio/appl/pts1.rb +5 -5
- data/lib/bio/appl/tmhmm/report.rb +10 -1
- data/lib/bio/command.rb +297 -41
- data/lib/bio/compat/features.rb +157 -0
- data/lib/bio/compat/references.rb +128 -0
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
- data/lib/bio/db/biosql/sequence.rb +508 -0
- data/lib/bio/db/embl/common.rb +28 -12
- data/lib/bio/db/embl/embl.rb +107 -9
- data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
- data/lib/bio/db/embl/format_embl.rb +190 -0
- data/lib/bio/db/embl/sptr.rb +15 -16
- data/lib/bio/db/fantom.rb +6 -8
- data/lib/bio/db/fasta.rb +10 -507
- data/lib/bio/db/fasta/defline.rb +532 -0
- data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
- data/lib/bio/db/fasta/format_fasta.rb +97 -0
- data/lib/bio/db/genbank/common.rb +25 -8
- data/lib/bio/db/genbank/format_genbank.rb +187 -0
- data/lib/bio/db/genbank/genbank.rb +36 -1
- data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
- data/lib/bio/db/gff.rb +1791 -119
- data/lib/bio/db/kegg/glycan.rb +2 -6
- data/lib/bio/db/lasergene.rb +3 -3
- data/lib/bio/db/medline.rb +4 -1
- data/lib/bio/db/newick.rb +10 -10
- data/lib/bio/db/pdb/chain.rb +6 -2
- data/lib/bio/db/pdb/pdb.rb +12 -3
- data/lib/bio/db/rebase.rb +7 -8
- data/lib/bio/db/soft.rb +3 -3
- data/lib/bio/feature.rb +1 -88
- data/lib/bio/io/biosql/biodatabase.rb +64 -0
- data/lib/bio/io/biosql/bioentry.rb +29 -0
- data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
- data/lib/bio/io/biosql/bioentry_path.rb +12 -0
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
- data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
- data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
- data/lib/bio/io/biosql/biosequence.rb +11 -0
- data/lib/bio/io/biosql/comment.rb +7 -0
- data/lib/bio/io/biosql/config/database.yml +20 -0
- data/lib/bio/io/biosql/dbxref.rb +13 -0
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
- data/lib/bio/io/biosql/location.rb +32 -0
- data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
- data/lib/bio/io/biosql/ontology.rb +10 -0
- data/lib/bio/io/biosql/reference.rb +9 -0
- data/lib/bio/io/biosql/seqfeature.rb +32 -0
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
- data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
- data/lib/bio/io/biosql/taxon.rb +12 -0
- data/lib/bio/io/biosql/taxon_name.rb +9 -0
- data/lib/bio/io/biosql/term.rb +27 -0
- data/lib/bio/io/biosql/term_dbxref.rb +11 -0
- data/lib/bio/io/biosql/term_path.rb +12 -0
- data/lib/bio/io/biosql/term_relationship.rb +13 -0
- data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
- data/lib/bio/io/biosql/term_synonym.rb +10 -0
- data/lib/bio/io/das.rb +7 -7
- data/lib/bio/io/ddbjxml.rb +57 -0
- data/lib/bio/io/ensembl.rb +2 -2
- data/lib/bio/io/fetch.rb +28 -14
- data/lib/bio/io/flatfile.rb +17 -853
- data/lib/bio/io/flatfile/autodetection.rb +545 -0
- data/lib/bio/io/flatfile/buffer.rb +237 -0
- data/lib/bio/io/flatfile/index.rb +17 -7
- data/lib/bio/io/flatfile/indexer.rb +30 -12
- data/lib/bio/io/flatfile/splitter.rb +297 -0
- data/lib/bio/io/hinv.rb +442 -0
- data/lib/bio/io/keggapi.rb +2 -2
- data/lib/bio/io/ncbirest.rb +733 -0
- data/lib/bio/io/pubmed.rb +34 -80
- data/lib/bio/io/registry.rb +2 -2
- data/lib/bio/io/sql.rb +178 -357
- data/lib/bio/io/togows.rb +458 -0
- data/lib/bio/location.rb +106 -11
- data/lib/bio/pathway.rb +120 -14
- data/lib/bio/reference.rb +115 -101
- data/lib/bio/sequence.rb +164 -183
- data/lib/bio/sequence/adapter.rb +108 -0
- data/lib/bio/sequence/common.rb +22 -45
- data/lib/bio/sequence/compat.rb +2 -2
- data/lib/bio/sequence/dblink.rb +54 -0
- data/lib/bio/sequence/format.rb +254 -77
- data/lib/bio/sequence/format_raw.rb +23 -0
- data/lib/bio/shell.rb +3 -1
- data/lib/bio/shell/core.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +33 -4
- data/lib/bio/shell/plugin/ncbirest.rb +64 -0
- data/lib/bio/shell/plugin/togows.rb +40 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
- data/lib/bio/tree.rb +4 -2
- data/lib/bio/util/color_scheme.rb +2 -2
- data/lib/bio/util/contingency_table.rb +2 -2
- data/lib/bio/util/restriction_enzyme.rb +2 -2
- data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
- data/lib/bio/version.rb +25 -0
- data/rdoc.zsh +8 -0
- data/sample/any2fasta.rb +0 -0
- data/sample/biofetch.rb +0 -0
- data/sample/dbget +0 -0
- data/sample/demo_sequence.rb +158 -0
- data/sample/enzymes.rb +0 -0
- data/sample/fasta2tab.rb +0 -0
- data/sample/fastagrep.rb +72 -0
- data/sample/fastasort.rb +54 -0
- data/sample/fsplit.rb +0 -0
- data/sample/gb2fasta.rb +2 -3
- data/sample/gb2tab.rb +0 -0
- data/sample/gbtab2mysql.rb +0 -0
- data/sample/genes2nuc.rb +0 -0
- data/sample/genes2pep.rb +0 -0
- data/sample/genes2tab.rb +0 -0
- data/sample/genome2rb.rb +0 -0
- data/sample/genome2tab.rb +0 -0
- data/sample/goslim.rb +0 -0
- data/sample/gt2fasta.rb +0 -0
- data/sample/na2aa.rb +34 -0
- data/sample/pmfetch.rb +0 -0
- data/sample/pmsearch.rb +0 -0
- data/sample/ssearch2tab.rb +0 -0
- data/sample/tfastx2tab.rb +0 -0
- data/sample/vs-genes.rb +0 -0
- data/setup.rb +1596 -0
- data/test/data/blast/blastp-multi.m7 +188 -0
- data/test/data/command/echoarg2.bat +1 -0
- data/test/data/paml/codeml/control_file.txt +30 -0
- data/test/data/paml/codeml/output.txt +78 -0
- data/test/data/paml/codeml/rates +217 -0
- data/test/data/rpsblast/misc.rpsblast +193 -0
- data/test/data/soft/GDS100_partial.soft +0 -0
- data/test/data/soft/GSE3457_family_partial.soft +0 -0
- data/test/functional/bio/appl/test_pts1.rb +115 -0
- data/test/functional/bio/io/test_ensembl.rb +123 -80
- data/test/functional/bio/io/test_togows.rb +267 -0
- data/test/functional/bio/sequence/test_output_embl.rb +51 -0
- data/test/functional/bio/test_command.rb +301 -0
- data/test/runner.rb +17 -1
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
- data/test/unit/bio/appl/blast/test_report.rb +753 -35
- data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
- data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
- data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
- data/test/unit/bio/appl/test_blast.rb +135 -4
- data/test/unit/bio/appl/test_fasta.rb +2 -2
- data/test/unit/bio/appl/test_pts1.rb +1 -64
- data/test/unit/bio/db/embl/test_common.rb +15 -15
- data/test/unit/bio/db/embl/test_embl.rb +4 -4
- data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
- data/test/unit/bio/db/embl/test_sptr.rb +38 -1
- data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
- data/test/unit/bio/db/test_gff.rb +1151 -25
- data/test/unit/bio/db/test_medline.rb +127 -0
- data/test/unit/bio/db/test_nexus.rb +5 -1
- data/test/unit/bio/db/test_prosite.rb +4 -4
- data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
- data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
- data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
- data/test/unit/bio/io/test_ddbjxml.rb +8 -3
- data/test/unit/bio/io/test_fastacmd.rb +5 -5
- data/test/unit/bio/io/test_flatfile.rb +357 -106
- data/test/unit/bio/io/test_soapwsdl.rb +2 -2
- data/test/unit/bio/io/test_togows.rb +161 -0
- data/test/unit/bio/sequence/test_common.rb +210 -11
- data/test/unit/bio/sequence/test_compat.rb +3 -3
- data/test/unit/bio/sequence/test_dblink.rb +58 -0
- data/test/unit/bio/sequence/test_na.rb +2 -2
- data/test/unit/bio/test_command.rb +111 -50
- data/test/unit/bio/test_feature.rb +29 -1
- data/test/unit/bio/test_location.rb +566 -6
- data/test/unit/bio/test_pathway.rb +91 -65
- data/test/unit/bio/test_reference.rb +67 -13
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
- metadata +202 -167
- data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
|
@@ -0,0 +1,545 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/io/flatfile/autodetection.rb - file format auto-detection
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2001-2006 Naohisa Goto <ng@bioruby.org>
|
|
5
|
+
#
|
|
6
|
+
# License:: The Ruby License
|
|
7
|
+
#
|
|
8
|
+
# $Id:$
|
|
9
|
+
#
|
|
10
|
+
#
|
|
11
|
+
# See documents for Bio::FlatFile::AutoDetect and Bio::FlatFile.
|
|
12
|
+
#
|
|
13
|
+
|
|
14
|
+
require 'tsort'
|
|
15
|
+
require 'bio/io/flatfile'
|
|
16
|
+
|
|
17
|
+
module Bio
|
|
18
|
+
|
|
19
|
+
class FlatFile
|
|
20
|
+
|
|
21
|
+
# AutoDetect automatically determines database class of given data.
|
|
22
|
+
class AutoDetect
|
|
23
|
+
|
|
24
|
+
include TSort
|
|
25
|
+
|
|
26
|
+
# Array to store autodetection rules.
|
|
27
|
+
# This is defined only for inspect.
|
|
28
|
+
class RulesArray < Array
|
|
29
|
+
# visualize contents
|
|
30
|
+
def inspect
|
|
31
|
+
"[#{self.collect { |e| e.name.inspect }.join(' ')}]"
|
|
32
|
+
end
|
|
33
|
+
end #class RulesArray
|
|
34
|
+
|
|
35
|
+
# Template of a single rule of autodetection
|
|
36
|
+
class RuleTemplate
|
|
37
|
+
# Creates a new element.
|
|
38
|
+
def self.[](*arg)
|
|
39
|
+
self.new(*arg)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Creates a new element.
|
|
43
|
+
def initialize
|
|
44
|
+
@higher_priority_elements = RulesArray.new
|
|
45
|
+
@lower_priority_elements = RulesArray.new
|
|
46
|
+
@name = nil
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# self is prior to the _elem_.
|
|
50
|
+
def is_prior_to(elem)
|
|
51
|
+
return nil if self == elem
|
|
52
|
+
elem.higher_priority_elements << self
|
|
53
|
+
self.lower_priority_elements << elem
|
|
54
|
+
true
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# higher priority elements
|
|
58
|
+
attr_reader :higher_priority_elements
|
|
59
|
+
# lower priority elements
|
|
60
|
+
attr_reader :lower_priority_elements
|
|
61
|
+
|
|
62
|
+
# database classes
|
|
63
|
+
attr_reader :dbclasses
|
|
64
|
+
|
|
65
|
+
# unique name of the element
|
|
66
|
+
attr_accessor :name
|
|
67
|
+
|
|
68
|
+
# If given text (and/or meta information) is known, returns
|
|
69
|
+
# the database class.
|
|
70
|
+
# Otherwise, returns nil or false.
|
|
71
|
+
#
|
|
72
|
+
# _text_ will be a String.
|
|
73
|
+
# _meta_ will be a Hash.
|
|
74
|
+
# _meta_ may contain following keys.
|
|
75
|
+
# :path => pathname, filename or uri.
|
|
76
|
+
def guess(text, meta)
|
|
77
|
+
nil
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
# Gets constant from constant name given as a string.
|
|
82
|
+
def str2const(str)
|
|
83
|
+
const = Object
|
|
84
|
+
str.split(/\:\:/).each do |x|
|
|
85
|
+
const = const.const_get(x)
|
|
86
|
+
end
|
|
87
|
+
const
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Gets database class from given object.
|
|
91
|
+
# Current implementation is:
|
|
92
|
+
# if _obj_ is kind of String, regarded as a constant.
|
|
93
|
+
# Otherwise, returns _obj_ as is.
|
|
94
|
+
def get_dbclass(obj)
|
|
95
|
+
obj.kind_of?(String) ? str2const(obj) : obj
|
|
96
|
+
end
|
|
97
|
+
end #class Rule_Template
|
|
98
|
+
|
|
99
|
+
# RuleDebug is a class for debugging autodetect classes/methods
|
|
100
|
+
class RuleDebug < RuleTemplate
|
|
101
|
+
# Creates a new instance.
|
|
102
|
+
def initialize(name)
|
|
103
|
+
super()
|
|
104
|
+
@name = name
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# prints information to the $stderr.
|
|
108
|
+
def guess(text, meta)
|
|
109
|
+
$stderr.puts @name
|
|
110
|
+
$stderr.puts text.inspect
|
|
111
|
+
$stderr.puts meta.inspect
|
|
112
|
+
nil
|
|
113
|
+
end
|
|
114
|
+
end #class RuleDebug
|
|
115
|
+
|
|
116
|
+
# Special element that is always top or bottom priority.
|
|
117
|
+
class RuleSpecial < RuleTemplate
|
|
118
|
+
def initialize(name)
|
|
119
|
+
#super()
|
|
120
|
+
@name = name
|
|
121
|
+
end
|
|
122
|
+
# modification of @name is inhibited.
|
|
123
|
+
def name=(x)
|
|
124
|
+
raise 'cannot modify name'
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# always returns void array
|
|
128
|
+
def higher_priority_elements
|
|
129
|
+
[]
|
|
130
|
+
end
|
|
131
|
+
# always returns void array
|
|
132
|
+
def lower_priority_elements
|
|
133
|
+
[]
|
|
134
|
+
end
|
|
135
|
+
end #class RuleSpecial
|
|
136
|
+
|
|
137
|
+
# Special element that is always top priority.
|
|
138
|
+
TopRule = RuleSpecial.new('top')
|
|
139
|
+
# Special element that is always bottom priority.
|
|
140
|
+
BottomRule = RuleSpecial.new('bottom')
|
|
141
|
+
|
|
142
|
+
# A autodetection rule to use a regular expression
|
|
143
|
+
class RuleRegexp < RuleTemplate
|
|
144
|
+
# Creates a new instance.
|
|
145
|
+
def initialize(dbclass, re)
|
|
146
|
+
super()
|
|
147
|
+
@re = re
|
|
148
|
+
@name = dbclass.to_s
|
|
149
|
+
@dbclass = nil
|
|
150
|
+
@dbclass_lazy = dbclass
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# database class (lazy evaluation)
|
|
154
|
+
def dbclass
|
|
155
|
+
unless @dbclass
|
|
156
|
+
@dbclass = get_dbclass(@dbclass_lazy)
|
|
157
|
+
end
|
|
158
|
+
@dbclass
|
|
159
|
+
end
|
|
160
|
+
private :dbclass
|
|
161
|
+
|
|
162
|
+
# returns database classes
|
|
163
|
+
def dbclasses
|
|
164
|
+
[ dbclass ]
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# If given text matches the regexp, returns the database class.
|
|
168
|
+
# Otherwise, returns nil or false.
|
|
169
|
+
# _meta_ is ignored.
|
|
170
|
+
def guess(text, meta)
|
|
171
|
+
@re =~ text ? dbclass : nil
|
|
172
|
+
end
|
|
173
|
+
end #class RuleRegexp
|
|
174
|
+
|
|
175
|
+
# A autodetection rule to use more than two regular expressions.
|
|
176
|
+
# If given string matches one of the regular expressions,
|
|
177
|
+
# returns the database class.
|
|
178
|
+
class RuleRegexp2 < RuleRegexp
|
|
179
|
+
# Creates a new instance.
|
|
180
|
+
def initialize(dbclass, *regexps)
|
|
181
|
+
super(dbclass, nil)
|
|
182
|
+
@regexps = regexps
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# If given text matches one of the regexp, returns the database class.
|
|
186
|
+
# Otherwise, returns nil or false.
|
|
187
|
+
# _meta_ is ignored.
|
|
188
|
+
def guess(text, meta)
|
|
189
|
+
@regexps.each do |re|
|
|
190
|
+
return dbclass if re =~ text
|
|
191
|
+
end
|
|
192
|
+
nil
|
|
193
|
+
end
|
|
194
|
+
end #class RuleRegexp
|
|
195
|
+
|
|
196
|
+
# A autodetection rule that passes data to the proc object.
|
|
197
|
+
class RuleProc < RuleTemplate
|
|
198
|
+
# Creates a new instance.
|
|
199
|
+
def initialize(*dbclasses, &proc)
|
|
200
|
+
super()
|
|
201
|
+
@proc = proc
|
|
202
|
+
@dbclasses = nil
|
|
203
|
+
@dbclasses_lazy = dbclasses
|
|
204
|
+
@name = dbclasses.collect { |x| x.to_s }.join('|')
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# database classes (lazy evaluation)
|
|
208
|
+
def dbclasses
|
|
209
|
+
unless @dbclasses
|
|
210
|
+
@dbclasses = @dbclasses_lazy.collect { |x| get_dbclass(x) }
|
|
211
|
+
end
|
|
212
|
+
@dbclasses
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
# If given text (and/or meta information) is known, returns
|
|
216
|
+
# the database class.
|
|
217
|
+
# Otherwise, returns nil or false.
|
|
218
|
+
#
|
|
219
|
+
# Refer RuleTemplate#guess for _meta_.
|
|
220
|
+
def guess(text, meta)
|
|
221
|
+
@proc.call(text)
|
|
222
|
+
end
|
|
223
|
+
end #class RuleProc
|
|
224
|
+
|
|
225
|
+
# Creates a new Autodetect object
|
|
226
|
+
def initialize
|
|
227
|
+
# stores autodetection rules.
|
|
228
|
+
@rules = Hash.new
|
|
229
|
+
# stores elements (cache)
|
|
230
|
+
@elements = nil
|
|
231
|
+
self.add(TopRule)
|
|
232
|
+
self.add(BottomRule)
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# Adds a new element.
|
|
236
|
+
# Returns _elem_.
|
|
237
|
+
def add(elem)
|
|
238
|
+
raise 'element name conflicts' if @rules[elem.name]
|
|
239
|
+
@elements = nil
|
|
240
|
+
@rules[elem.name] = elem
|
|
241
|
+
elem
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# (required by TSort.)
|
|
245
|
+
# For all elements, yields each element.
|
|
246
|
+
def tsort_each_node(&x)
|
|
247
|
+
@rules.each_value(&x)
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# (required by TSort.)
|
|
251
|
+
# For a given element, yields each child
|
|
252
|
+
# (= lower priority elements) of the element.
|
|
253
|
+
def tsort_each_child(elem)
|
|
254
|
+
if elem == TopRule then
|
|
255
|
+
@rules.each_value do |e|
|
|
256
|
+
yield e unless e == TopRule or
|
|
257
|
+
e.lower_priority_elements.index(TopRule)
|
|
258
|
+
end
|
|
259
|
+
elsif elem == BottomRule then
|
|
260
|
+
@rules.each_value do |e|
|
|
261
|
+
yield e if e.higher_priority_elements.index(BottomRule)
|
|
262
|
+
end
|
|
263
|
+
else
|
|
264
|
+
elem.lower_priority_elements.each do |e|
|
|
265
|
+
yield e if e != BottomRule
|
|
266
|
+
end
|
|
267
|
+
unless elem.higher_priority_elements.index(BottomRule)
|
|
268
|
+
yield BottomRule
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
# Returns current elements as an array
|
|
274
|
+
# whose order fulfills all elements' priorities.
|
|
275
|
+
def elements
|
|
276
|
+
unless @elements
|
|
277
|
+
ary = tsort
|
|
278
|
+
ary.reverse!
|
|
279
|
+
@elements = ary
|
|
280
|
+
end
|
|
281
|
+
@elements
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
# rebuilds the object and clears internal cache.
|
|
285
|
+
def rehash
|
|
286
|
+
@rules.rehash
|
|
287
|
+
@elements = nil
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
# visualizes the object (mainly for debug)
|
|
291
|
+
def inspect
|
|
292
|
+
"<#{self.class.to_s} " +
|
|
293
|
+
self.elements.collect { |e| e.name.inspect }.join(' ') +
|
|
294
|
+
">"
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
# Iterates over each element.
|
|
298
|
+
def each_rule(&x) #:yields: elem
|
|
299
|
+
elements.each(&x)
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
# Autodetect from the text.
|
|
303
|
+
# Returns a database class if succeeded.
|
|
304
|
+
# Returns nil if failed.
|
|
305
|
+
def autodetect(text, meta = {})
|
|
306
|
+
r = nil
|
|
307
|
+
elements.each do |e|
|
|
308
|
+
#$stderr.puts e.name
|
|
309
|
+
r = e.guess(text, meta)
|
|
310
|
+
break if r
|
|
311
|
+
end
|
|
312
|
+
r
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
# autodetect from the FlatFile object.
|
|
316
|
+
# Returns a database class if succeeded.
|
|
317
|
+
# Returns nil if failed.
|
|
318
|
+
def autodetect_flatfile(ff, lines = 31)
|
|
319
|
+
meta = {}
|
|
320
|
+
stream = ff.instance_eval { @stream }
|
|
321
|
+
begin
|
|
322
|
+
path = stream.path
|
|
323
|
+
rescue NameError
|
|
324
|
+
end
|
|
325
|
+
if path then
|
|
326
|
+
meta[:path] = path
|
|
327
|
+
# call autodetect onece with meta and without any read action
|
|
328
|
+
if r = self.autodetect(stream.prefetch_buffer, meta)
|
|
329
|
+
return r
|
|
330
|
+
end
|
|
331
|
+
end
|
|
332
|
+
# reading stream
|
|
333
|
+
1.upto(lines) do |x|
|
|
334
|
+
break unless line = stream.prefetch_gets
|
|
335
|
+
if line.strip.size > 0 then
|
|
336
|
+
if r = self.autodetect(stream.prefetch_buffer, meta)
|
|
337
|
+
return r
|
|
338
|
+
end
|
|
339
|
+
end
|
|
340
|
+
end
|
|
341
|
+
return nil
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
# default autodetect object for class method
|
|
345
|
+
@default = nil
|
|
346
|
+
|
|
347
|
+
# returns the default autodetect object
|
|
348
|
+
def self.default
|
|
349
|
+
unless @default then
|
|
350
|
+
@default = self.make_default
|
|
351
|
+
end
|
|
352
|
+
@default
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
# sets the default autodetect object.
|
|
356
|
+
def self.default=(ad)
|
|
357
|
+
@default = ad
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
# make a new autodetect object
|
|
361
|
+
def self.[](*arg)
|
|
362
|
+
a = self.new
|
|
363
|
+
arg.each { |e| a.add(e) }
|
|
364
|
+
a
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
# make a default of default autodetect object
|
|
368
|
+
def self.make_default
|
|
369
|
+
a = self[
|
|
370
|
+
genbank = RuleRegexp[ 'Bio::GenBank',
|
|
371
|
+
/^LOCUS .+ bp .*[a-z]*[DR]?NA/ ],
|
|
372
|
+
genpept = RuleRegexp[ 'Bio::GenPept',
|
|
373
|
+
/^LOCUS .+ aa .+/ ],
|
|
374
|
+
medline = RuleRegexp[ 'Bio::MEDLINE',
|
|
375
|
+
/^PMID\- [0-9]+$/ ],
|
|
376
|
+
embl = RuleRegexp[ 'Bio::EMBL',
|
|
377
|
+
/^ID .+\; .*(DNA|RNA|XXX)\;/ ],
|
|
378
|
+
sptr = RuleRegexp2[ 'Bio::SPTR',
|
|
379
|
+
/^ID .+\; *PRT\;/,
|
|
380
|
+
/^ID [-A-Za-z0-9_\.]+ .+\; *[0-9]+ *AA\./ ],
|
|
381
|
+
prosite = RuleRegexp[ 'Bio::PROSITE',
|
|
382
|
+
/^ID [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
|
|
383
|
+
transfac = RuleRegexp[ 'Bio::TRANSFAC',
|
|
384
|
+
/^AC [-A-Za-z0-9_\.]+$/ ],
|
|
385
|
+
|
|
386
|
+
aaindex = RuleProc.new('Bio::AAindex1', 'Bio::AAindex2') do |text|
|
|
387
|
+
if /^H [-A-Z0-9_\.]+$/ =~ text then
|
|
388
|
+
if text =~ /^M [rc]/ then
|
|
389
|
+
Bio::AAindex2
|
|
390
|
+
elsif text =~ /^I A\/L/ then
|
|
391
|
+
Bio::AAindex1
|
|
392
|
+
else
|
|
393
|
+
false #fail to determine
|
|
394
|
+
end
|
|
395
|
+
else
|
|
396
|
+
nil
|
|
397
|
+
end
|
|
398
|
+
end,
|
|
399
|
+
|
|
400
|
+
litdb = RuleRegexp[ 'Bio::LITDB',
|
|
401
|
+
/^CODE [0-9]+$/ ],
|
|
402
|
+
brite = RuleRegexp[ 'Bio::KEGG::BRITE',
|
|
403
|
+
/^Entry [A-Z0-9]+/ ],
|
|
404
|
+
orthology = RuleRegexp[ 'Bio::KEGG::ORTHOLOGY',
|
|
405
|
+
/^ENTRY .+ KO\s*/ ],
|
|
406
|
+
drug = RuleRegexp[ 'Bio::KEGG::DRUG',
|
|
407
|
+
/^ENTRY .+ Drug\s*/ ],
|
|
408
|
+
glycan = RuleRegexp[ 'Bio::KEGG::GLYCAN',
|
|
409
|
+
/^ENTRY .+ Glycan\s*/ ],
|
|
410
|
+
enzyme = RuleRegexp2[ 'Bio::KEGG::ENZYME',
|
|
411
|
+
/^ENTRY EC [0-9\.]+$/,
|
|
412
|
+
/^ENTRY .+ Enzyme\s*/
|
|
413
|
+
],
|
|
414
|
+
compound = RuleRegexp2[ 'Bio::KEGG::COMPOUND',
|
|
415
|
+
/^ENTRY C[A-Za-z0-9\._]+$/,
|
|
416
|
+
/^ENTRY .+ Compound\s*/
|
|
417
|
+
],
|
|
418
|
+
reaction = RuleRegexp2[ 'Bio::KEGG::REACTION',
|
|
419
|
+
/^ENTRY R[A-Za-z0-9\._]+$/,
|
|
420
|
+
/^ENTRY .+ Reaction\s*/
|
|
421
|
+
],
|
|
422
|
+
genes = RuleRegexp[ 'Bio::KEGG::GENES',
|
|
423
|
+
/^ENTRY .+ (CDS|gene|.*RNA|Contig) / ],
|
|
424
|
+
genome = RuleRegexp[ 'Bio::KEGG::GENOME',
|
|
425
|
+
/^ENTRY [a-z]+$/ ],
|
|
426
|
+
|
|
427
|
+
fantom = RuleProc.new('Bio::FANTOM::MaXML::Cluster',
|
|
428
|
+
'Bio::FANTOM::MaXML::Sequence') do |text|
|
|
429
|
+
if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
|
|
430
|
+
case $1
|
|
431
|
+
when 'clusters'
|
|
432
|
+
Bio::FANTOM::MaXML::Cluster
|
|
433
|
+
when 'sequences'
|
|
434
|
+
Bio::FANTOM::MaXML::Sequence
|
|
435
|
+
else
|
|
436
|
+
nil #unknown
|
|
437
|
+
end
|
|
438
|
+
else
|
|
439
|
+
nil
|
|
440
|
+
end
|
|
441
|
+
end,
|
|
442
|
+
|
|
443
|
+
pdb = RuleRegexp[ 'Bio::PDB',
|
|
444
|
+
/^HEADER .{40}\d\d\-[A-Z]{3}\-\d\d [0-9A-Z]{4}/ ],
|
|
445
|
+
het = RuleRegexp[ 'Bio::PDB::ChemicalComponent',
|
|
446
|
+
/^RESIDUE +.+ +\d+\s*$/ ],
|
|
447
|
+
|
|
448
|
+
clustal = RuleRegexp2[ 'Bio::ClustalW::Report',
|
|
449
|
+
/^CLUSTAL .*\(.*\).*sequence +alignment/,
|
|
450
|
+
/^CLUSTAL FORMAT for T-COFFEE/ ],
|
|
451
|
+
|
|
452
|
+
gcg_msf = RuleRegexp[ 'Bio::GCG::Msf',
|
|
453
|
+
/^!!(N|A)A_MULTIPLE_ALIGNMENT .+/ ],
|
|
454
|
+
|
|
455
|
+
gcg_seq = RuleRegexp[ 'Bio::GCG::Seq',
|
|
456
|
+
/^!!(N|A)A_SEQUENCE .+/ ],
|
|
457
|
+
|
|
458
|
+
blastxml = RuleRegexp[ 'Bio::Blast::Report',
|
|
459
|
+
/\<\!DOCTYPE BlastOutput PUBLIC / ],
|
|
460
|
+
wublast = RuleRegexp[ 'Bio::Blast::WU::Report',
|
|
461
|
+
/^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
|
|
462
|
+
wutblast = RuleRegexp[ 'Bio::Blast::WU::Report_TBlast',
|
|
463
|
+
/^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
|
|
464
|
+
blast = RuleRegexp[ 'Bio::Blast::Default::Report',
|
|
465
|
+
/^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
|
|
466
|
+
tblast = RuleRegexp[ 'Bio::Blast::Default::Report_TBlast',
|
|
467
|
+
/^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
|
|
468
|
+
rpsblast = RuleRegexp[ 'Bio::Blast::RPSBlast::Report',
|
|
469
|
+
/^RPS\-BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
|
|
470
|
+
|
|
471
|
+
blat = RuleRegexp[ 'Bio::Blat::Report',
|
|
472
|
+
/^psLayout version \d+/ ],
|
|
473
|
+
spidey = RuleRegexp[ 'Bio::Spidey::Report',
|
|
474
|
+
/^\-\-SPIDEY version .+\-\-$/ ],
|
|
475
|
+
hmmer = RuleRegexp[ 'Bio::HMMER::Report',
|
|
476
|
+
/^HMMER +\d+\./ ],
|
|
477
|
+
sim4 = RuleRegexp[ 'Bio::Sim4::Report',
|
|
478
|
+
/^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
|
|
479
|
+
|
|
480
|
+
fastaformat = RuleProc.new('Bio::FastaFormat',
|
|
481
|
+
'Bio::NBRF',
|
|
482
|
+
'Bio::FastaNumericFormat') do |text|
|
|
483
|
+
if /^>.+$/ =~ text
|
|
484
|
+
case text
|
|
485
|
+
when /^>([PF]1|[DR][LC]|N[13]|XX)\;.+/
|
|
486
|
+
Bio::NBRF
|
|
487
|
+
when /^>.+$\s+(^\#.*$\s*)*^\s*\d*\s*[-a-zA-Z_\.\[\]\(\)\*\+\$]+/
|
|
488
|
+
Bio::FastaFormat
|
|
489
|
+
when /^>.+$\s+^\s*\d+(\s+\d+)*\s*$/
|
|
490
|
+
Bio::FastaNumericFormat
|
|
491
|
+
else
|
|
492
|
+
false
|
|
493
|
+
end
|
|
494
|
+
else
|
|
495
|
+
nil
|
|
496
|
+
end
|
|
497
|
+
end
|
|
498
|
+
]
|
|
499
|
+
|
|
500
|
+
# dependencies
|
|
501
|
+
# NCBI
|
|
502
|
+
genbank.is_prior_to genpept
|
|
503
|
+
# EMBL/UniProt
|
|
504
|
+
embl.is_prior_to sptr
|
|
505
|
+
sptr.is_prior_to prosite
|
|
506
|
+
prosite.is_prior_to transfac
|
|
507
|
+
# KEGG
|
|
508
|
+
#aaindex.is_prior_to litdb
|
|
509
|
+
#litdb.is_prior_to brite
|
|
510
|
+
brite.is_prior_to orthology
|
|
511
|
+
orthology.is_prior_to drug
|
|
512
|
+
drug.is_prior_to glycan
|
|
513
|
+
glycan.is_prior_to enzyme
|
|
514
|
+
enzyme.is_prior_to compound
|
|
515
|
+
compound.is_prior_to reaction
|
|
516
|
+
reaction.is_prior_to genes
|
|
517
|
+
genes.is_prior_to genome
|
|
518
|
+
# PDB
|
|
519
|
+
pdb.is_prior_to het
|
|
520
|
+
# BLAST
|
|
521
|
+
wublast.is_prior_to wutblast
|
|
522
|
+
wutblast.is_prior_to blast
|
|
523
|
+
blast.is_prior_to tblast
|
|
524
|
+
# FastaFormat
|
|
525
|
+
BottomRule.is_prior_to(fastaformat)
|
|
526
|
+
|
|
527
|
+
# for debug
|
|
528
|
+
#debug_first = RuleDebug.new('debug_first')
|
|
529
|
+
#a.add(debug_first)
|
|
530
|
+
#debug_first.is_prior_to(TopRule)
|
|
531
|
+
|
|
532
|
+
## for debug
|
|
533
|
+
#debug_last = RuleDebug.new('debug_last')
|
|
534
|
+
#a.add(debug_last)
|
|
535
|
+
#BottomRule.is_prior_to(debug_last)
|
|
536
|
+
#fastaformat.is_prior_to(debug_last)
|
|
537
|
+
|
|
538
|
+
a.rehash
|
|
539
|
+
return a
|
|
540
|
+
end
|
|
541
|
+
|
|
542
|
+
end #class AutoDetect
|
|
543
|
+
end #class FlatFile
|
|
544
|
+
end #module Bio
|
|
545
|
+
|