bio 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +3421 -0
- data/KNOWN_ISSUES.rdoc +88 -0
- data/README.rdoc +252 -0
- data/README_DEV.rdoc +285 -0
- data/Rakefile +143 -0
- data/bin/bioruby +0 -0
- data/bin/br_biofetch.rb +0 -0
- data/bin/br_bioflat.rb +12 -1
- data/bin/br_biogetseq.rb +0 -0
- data/bin/br_pmfetch.rb +4 -3
- data/bioruby.gemspec +477 -0
- data/bioruby.gemspec.erb +117 -0
- data/doc/Changes-0.7.rd +7 -0
- data/doc/Changes-1.3.rdoc +239 -0
- data/doc/Tutorial.rd +296 -184
- data/doc/Tutorial.rd.html +1031 -0
- data/doc/Tutorial.rd.ja +111 -45
- data/doc/Tutorial.rd.ja.html +2225 -0
- data/doc/bioruby.css +281 -0
- data/extconf.rb +2 -0
- data/lib/bio.rb +29 -4
- data/lib/bio/appl/blast.rb +306 -121
- data/lib/bio/appl/blast/ddbj.rb +142 -0
- data/lib/bio/appl/blast/format0.rb +35 -25
- data/lib/bio/appl/blast/format8.rb +2 -2
- data/lib/bio/appl/blast/genomenet.rb +263 -0
- data/lib/bio/appl/blast/ncbioptions.rb +220 -0
- data/lib/bio/appl/blast/remote.rb +106 -0
- data/lib/bio/appl/blast/report.rb +260 -9
- data/lib/bio/appl/blast/rexml.rb +12 -5
- data/lib/bio/appl/blast/rpsblast.rb +277 -0
- data/lib/bio/appl/blast/wublast.rb +133 -12
- data/lib/bio/appl/blast/xmlparser.rb +35 -18
- data/lib/bio/appl/blat/report.rb +46 -5
- data/lib/bio/appl/emboss.rb +62 -13
- data/lib/bio/appl/fasta.rb +9 -11
- data/lib/bio/appl/genscan/report.rb +3 -3
- data/lib/bio/appl/hmmer.rb +1 -1
- data/lib/bio/appl/hmmer/report.rb +10 -10
- data/lib/bio/appl/paml/baseml.rb +95 -0
- data/lib/bio/appl/paml/baseml/report.rb +32 -0
- data/lib/bio/appl/paml/codeml.rb +242 -0
- data/lib/bio/appl/paml/codeml/rates.rb +67 -0
- data/lib/bio/appl/paml/codeml/report.rb +67 -0
- data/lib/bio/appl/paml/common.rb +348 -0
- data/lib/bio/appl/paml/common_report.rb +38 -0
- data/lib/bio/appl/paml/yn00.rb +103 -0
- data/lib/bio/appl/paml/yn00/report.rb +32 -0
- data/lib/bio/appl/psort.rb +2 -2
- data/lib/bio/appl/pts1.rb +5 -5
- data/lib/bio/appl/tmhmm/report.rb +10 -1
- data/lib/bio/command.rb +297 -41
- data/lib/bio/compat/features.rb +157 -0
- data/lib/bio/compat/references.rb +128 -0
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
- data/lib/bio/db/biosql/sequence.rb +508 -0
- data/lib/bio/db/embl/common.rb +28 -12
- data/lib/bio/db/embl/embl.rb +107 -9
- data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
- data/lib/bio/db/embl/format_embl.rb +190 -0
- data/lib/bio/db/embl/sptr.rb +15 -16
- data/lib/bio/db/fantom.rb +6 -8
- data/lib/bio/db/fasta.rb +10 -507
- data/lib/bio/db/fasta/defline.rb +532 -0
- data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
- data/lib/bio/db/fasta/format_fasta.rb +97 -0
- data/lib/bio/db/genbank/common.rb +25 -8
- data/lib/bio/db/genbank/format_genbank.rb +187 -0
- data/lib/bio/db/genbank/genbank.rb +36 -1
- data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
- data/lib/bio/db/gff.rb +1791 -119
- data/lib/bio/db/kegg/glycan.rb +2 -6
- data/lib/bio/db/lasergene.rb +3 -3
- data/lib/bio/db/medline.rb +4 -1
- data/lib/bio/db/newick.rb +10 -10
- data/lib/bio/db/pdb/chain.rb +6 -2
- data/lib/bio/db/pdb/pdb.rb +12 -3
- data/lib/bio/db/rebase.rb +7 -8
- data/lib/bio/db/soft.rb +3 -3
- data/lib/bio/feature.rb +1 -88
- data/lib/bio/io/biosql/biodatabase.rb +64 -0
- data/lib/bio/io/biosql/bioentry.rb +29 -0
- data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
- data/lib/bio/io/biosql/bioentry_path.rb +12 -0
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
- data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
- data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
- data/lib/bio/io/biosql/biosequence.rb +11 -0
- data/lib/bio/io/biosql/comment.rb +7 -0
- data/lib/bio/io/biosql/config/database.yml +20 -0
- data/lib/bio/io/biosql/dbxref.rb +13 -0
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
- data/lib/bio/io/biosql/location.rb +32 -0
- data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
- data/lib/bio/io/biosql/ontology.rb +10 -0
- data/lib/bio/io/biosql/reference.rb +9 -0
- data/lib/bio/io/biosql/seqfeature.rb +32 -0
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
- data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
- data/lib/bio/io/biosql/taxon.rb +12 -0
- data/lib/bio/io/biosql/taxon_name.rb +9 -0
- data/lib/bio/io/biosql/term.rb +27 -0
- data/lib/bio/io/biosql/term_dbxref.rb +11 -0
- data/lib/bio/io/biosql/term_path.rb +12 -0
- data/lib/bio/io/biosql/term_relationship.rb +13 -0
- data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
- data/lib/bio/io/biosql/term_synonym.rb +10 -0
- data/lib/bio/io/das.rb +7 -7
- data/lib/bio/io/ddbjxml.rb +57 -0
- data/lib/bio/io/ensembl.rb +2 -2
- data/lib/bio/io/fetch.rb +28 -14
- data/lib/bio/io/flatfile.rb +17 -853
- data/lib/bio/io/flatfile/autodetection.rb +545 -0
- data/lib/bio/io/flatfile/buffer.rb +237 -0
- data/lib/bio/io/flatfile/index.rb +17 -7
- data/lib/bio/io/flatfile/indexer.rb +30 -12
- data/lib/bio/io/flatfile/splitter.rb +297 -0
- data/lib/bio/io/hinv.rb +442 -0
- data/lib/bio/io/keggapi.rb +2 -2
- data/lib/bio/io/ncbirest.rb +733 -0
- data/lib/bio/io/pubmed.rb +34 -80
- data/lib/bio/io/registry.rb +2 -2
- data/lib/bio/io/sql.rb +178 -357
- data/lib/bio/io/togows.rb +458 -0
- data/lib/bio/location.rb +106 -11
- data/lib/bio/pathway.rb +120 -14
- data/lib/bio/reference.rb +115 -101
- data/lib/bio/sequence.rb +164 -183
- data/lib/bio/sequence/adapter.rb +108 -0
- data/lib/bio/sequence/common.rb +22 -45
- data/lib/bio/sequence/compat.rb +2 -2
- data/lib/bio/sequence/dblink.rb +54 -0
- data/lib/bio/sequence/format.rb +254 -77
- data/lib/bio/sequence/format_raw.rb +23 -0
- data/lib/bio/shell.rb +3 -1
- data/lib/bio/shell/core.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +33 -4
- data/lib/bio/shell/plugin/ncbirest.rb +64 -0
- data/lib/bio/shell/plugin/togows.rb +40 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
- data/lib/bio/tree.rb +4 -2
- data/lib/bio/util/color_scheme.rb +2 -2
- data/lib/bio/util/contingency_table.rb +2 -2
- data/lib/bio/util/restriction_enzyme.rb +2 -2
- data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
- data/lib/bio/version.rb +25 -0
- data/rdoc.zsh +8 -0
- data/sample/any2fasta.rb +0 -0
- data/sample/biofetch.rb +0 -0
- data/sample/dbget +0 -0
- data/sample/demo_sequence.rb +158 -0
- data/sample/enzymes.rb +0 -0
- data/sample/fasta2tab.rb +0 -0
- data/sample/fastagrep.rb +72 -0
- data/sample/fastasort.rb +54 -0
- data/sample/fsplit.rb +0 -0
- data/sample/gb2fasta.rb +2 -3
- data/sample/gb2tab.rb +0 -0
- data/sample/gbtab2mysql.rb +0 -0
- data/sample/genes2nuc.rb +0 -0
- data/sample/genes2pep.rb +0 -0
- data/sample/genes2tab.rb +0 -0
- data/sample/genome2rb.rb +0 -0
- data/sample/genome2tab.rb +0 -0
- data/sample/goslim.rb +0 -0
- data/sample/gt2fasta.rb +0 -0
- data/sample/na2aa.rb +34 -0
- data/sample/pmfetch.rb +0 -0
- data/sample/pmsearch.rb +0 -0
- data/sample/ssearch2tab.rb +0 -0
- data/sample/tfastx2tab.rb +0 -0
- data/sample/vs-genes.rb +0 -0
- data/setup.rb +1596 -0
- data/test/data/blast/blastp-multi.m7 +188 -0
- data/test/data/command/echoarg2.bat +1 -0
- data/test/data/paml/codeml/control_file.txt +30 -0
- data/test/data/paml/codeml/output.txt +78 -0
- data/test/data/paml/codeml/rates +217 -0
- data/test/data/rpsblast/misc.rpsblast +193 -0
- data/test/data/soft/GDS100_partial.soft +0 -0
- data/test/data/soft/GSE3457_family_partial.soft +0 -0
- data/test/functional/bio/appl/test_pts1.rb +115 -0
- data/test/functional/bio/io/test_ensembl.rb +123 -80
- data/test/functional/bio/io/test_togows.rb +267 -0
- data/test/functional/bio/sequence/test_output_embl.rb +51 -0
- data/test/functional/bio/test_command.rb +301 -0
- data/test/runner.rb +17 -1
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
- data/test/unit/bio/appl/blast/test_report.rb +753 -35
- data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
- data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
- data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
- data/test/unit/bio/appl/test_blast.rb +135 -4
- data/test/unit/bio/appl/test_fasta.rb +2 -2
- data/test/unit/bio/appl/test_pts1.rb +1 -64
- data/test/unit/bio/db/embl/test_common.rb +15 -15
- data/test/unit/bio/db/embl/test_embl.rb +4 -4
- data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
- data/test/unit/bio/db/embl/test_sptr.rb +38 -1
- data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
- data/test/unit/bio/db/test_gff.rb +1151 -25
- data/test/unit/bio/db/test_medline.rb +127 -0
- data/test/unit/bio/db/test_nexus.rb +5 -1
- data/test/unit/bio/db/test_prosite.rb +4 -4
- data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
- data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
- data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
- data/test/unit/bio/io/test_ddbjxml.rb +8 -3
- data/test/unit/bio/io/test_fastacmd.rb +5 -5
- data/test/unit/bio/io/test_flatfile.rb +357 -106
- data/test/unit/bio/io/test_soapwsdl.rb +2 -2
- data/test/unit/bio/io/test_togows.rb +161 -0
- data/test/unit/bio/sequence/test_common.rb +210 -11
- data/test/unit/bio/sequence/test_compat.rb +3 -3
- data/test/unit/bio/sequence/test_dblink.rb +58 -0
- data/test/unit/bio/sequence/test_na.rb +2 -2
- data/test/unit/bio/test_command.rb +111 -50
- data/test/unit/bio/test_feature.rb +29 -1
- data/test/unit/bio/test_location.rb +566 -6
- data/test/unit/bio/test_pathway.rb +91 -65
- data/test/unit/bio/test_reference.rb +67 -13
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
- metadata +202 -167
- data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
# Toshiaki Katayama <k@bioruby.org>
|
|
8
8
|
# License:: The Ruby License
|
|
9
9
|
#
|
|
10
|
-
# $Id
|
|
10
|
+
# $Id:$
|
|
11
11
|
#
|
|
12
12
|
# == Description
|
|
13
13
|
#
|
|
@@ -115,26 +115,35 @@ class Blast
|
|
|
115
115
|
end
|
|
116
116
|
end
|
|
117
117
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
118
|
+
# set parameter of the key as val
|
|
119
|
+
def xml_set_parameter(key, val)
|
|
120
|
+
#labels = {
|
|
121
|
+
# 'matrix' => 'Parameters_matrix',
|
|
122
|
+
# 'expect' => 'Parameters_expect',
|
|
123
|
+
# 'include' => 'Parameters_include',
|
|
124
|
+
# 'sc-match' => 'Parameters_sc-match',
|
|
125
|
+
# 'sc-mismatch' => 'Parameters_sc-mismatch',
|
|
126
|
+
# 'gap-open' => 'Parameters_gap-open',
|
|
127
|
+
# 'gap-extend' => 'Parameters_gap-extend',
|
|
128
|
+
# 'filter' => 'Parameters_filter',
|
|
129
|
+
# 'pattern' => 'Parameters_pattern',
|
|
130
|
+
# 'entrez-query' => 'Parameters_entrez-query',
|
|
131
|
+
#}
|
|
132
|
+
k = key.sub(/\AParameters\_/, '')
|
|
133
|
+
@parameters[k] =
|
|
132
134
|
case k
|
|
133
|
-
when '
|
|
134
|
-
|
|
135
|
+
when 'expect', 'include'
|
|
136
|
+
val.to_f
|
|
137
|
+
when /\Agap\-/, /\Asc\-/
|
|
138
|
+
val.to_i
|
|
135
139
|
else
|
|
136
|
-
|
|
140
|
+
val
|
|
137
141
|
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def xmlparser_parse_parameters(hash)
|
|
145
|
+
hash.each do |k, v|
|
|
146
|
+
xml_set_parameter(k, v)
|
|
138
147
|
end
|
|
139
148
|
end
|
|
140
149
|
|
|
@@ -144,6 +153,14 @@ class Blast
|
|
|
144
153
|
@iterations.last.num = hash[tag].to_i
|
|
145
154
|
when 'Iteration_message'
|
|
146
155
|
@iterations.last.message = hash[tag].to_s
|
|
156
|
+
|
|
157
|
+
# for new BLAST XML format
|
|
158
|
+
when 'Iteration_query-ID'
|
|
159
|
+
@iterations.last.query_id = hash[tag].to_s
|
|
160
|
+
when 'Iteration_query-def'
|
|
161
|
+
@iterations.last.query_def = hash[tag].to_s
|
|
162
|
+
when 'Iteration_query-len'
|
|
163
|
+
@iterations.last.query_len = hash[tag].to_i
|
|
147
164
|
end
|
|
148
165
|
end
|
|
149
166
|
|
data/lib/bio/appl/blat/report.rb
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
#
|
|
2
2
|
# = bio/appl/blat/report.rb - BLAT result parser
|
|
3
3
|
#
|
|
4
|
-
# Copyright:: Copyright (C) 2004
|
|
4
|
+
# Copyright:: Copyright (C) 2004, 2006, 2008 Naohisa Goto <ng@bioruby.org>
|
|
5
5
|
# License:: The Ruby License
|
|
6
6
|
#
|
|
7
|
-
# $Id
|
|
7
|
+
# $Id:$
|
|
8
8
|
#
|
|
9
9
|
# BLAT result parser (psl / pslx format).
|
|
10
10
|
#
|
|
@@ -46,15 +46,18 @@ module Bio
|
|
|
46
46
|
# In Bio::Blat::Report, it it nil (1 entry 1 file).
|
|
47
47
|
DELIMITER = RS = nil # 1 file 1 entry
|
|
48
48
|
|
|
49
|
+
# Splitter for Bio::FlatFile
|
|
50
|
+
FLATFILE_SPLITTER = Bio::FlatFile::Splitter::LineOriented
|
|
51
|
+
|
|
49
52
|
# Creates a new Bio::Blat::Report object from BLAT result text (String).
|
|
50
53
|
# You can use Bio::FlatFile to read a file.
|
|
51
54
|
# Currently, results created with options -out=psl (default) or
|
|
52
55
|
# -out=pslx are supported.
|
|
53
|
-
def initialize(text)
|
|
56
|
+
def initialize(text = '')
|
|
54
57
|
flag = false
|
|
55
58
|
head = []
|
|
56
59
|
@hits = []
|
|
57
|
-
text.
|
|
60
|
+
text.each_line do |line|
|
|
58
61
|
if flag then
|
|
59
62
|
@hits << Hit.new(line)
|
|
60
63
|
else
|
|
@@ -71,7 +74,45 @@ module Bio
|
|
|
71
74
|
end
|
|
72
75
|
end
|
|
73
76
|
end
|
|
74
|
-
@columns = parse_header(head)
|
|
77
|
+
@columns = parse_header(head) unless head.empty?
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Adds a header line if the header data is not yet given and
|
|
81
|
+
# the given line is suitable for header.
|
|
82
|
+
# Returns self if adding header line is succeeded.
|
|
83
|
+
# Otherwise, returns false (the line is not added).
|
|
84
|
+
def add_header_line(line)
|
|
85
|
+
return false if defined? @columns
|
|
86
|
+
line = line.chomp
|
|
87
|
+
case line
|
|
88
|
+
when /^\d/
|
|
89
|
+
@columns = (defined? @header_lines) ? parse_header(@header_lines) : []
|
|
90
|
+
return false
|
|
91
|
+
when /\A\-+\s*\z/
|
|
92
|
+
@columns = (defined? @header_lines) ? parse_header(@header_lines) : []
|
|
93
|
+
return self
|
|
94
|
+
else
|
|
95
|
+
@header_lines ||= []
|
|
96
|
+
@header_lines.push line
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Adds a line to the entry if the given line is regarded as
|
|
101
|
+
# a part of the current entry.
|
|
102
|
+
# If the current entry (self) is empty, or the line has the same
|
|
103
|
+
# query name, the line is added and returns self.
|
|
104
|
+
# Otherwise, returns false (the line is not added).
|
|
105
|
+
def add_line(line)
|
|
106
|
+
if /\A\s*\z/ =~ line then
|
|
107
|
+
return @hits.empty? ? self : false
|
|
108
|
+
end
|
|
109
|
+
hit = Hit.new(line.chomp)
|
|
110
|
+
if @hits.empty? or @hits.first.query.name == hit.query.name then
|
|
111
|
+
@hits.push hit
|
|
112
|
+
return self
|
|
113
|
+
else
|
|
114
|
+
return false
|
|
115
|
+
end
|
|
75
116
|
end
|
|
76
117
|
|
|
77
118
|
# hits of the result.
|
data/lib/bio/appl/emboss.rb
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
# Copyright:: Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
|
|
6
6
|
# License:: The Ruby License
|
|
7
7
|
#
|
|
8
|
-
# $Id: emboss.rb,v 1.
|
|
8
|
+
# $Id: emboss.rb,v 1.9 2008/01/10 03:51:06 ngoto Exp $
|
|
9
9
|
#
|
|
10
10
|
|
|
11
11
|
module Bio
|
|
@@ -35,22 +35,34 @@ module Bio
|
|
|
35
35
|
#
|
|
36
36
|
# # Suppose that you could get the sequence for XLRHODOP by running
|
|
37
37
|
# # the EMBOSS command +seqret embl:xlrhodop+ on the command line.
|
|
38
|
-
# # Then you can get the output of that command in a
|
|
39
|
-
# # by
|
|
40
|
-
# xlrhodop = Bio::EMBOSS.
|
|
41
|
-
# puts xlrhodop
|
|
38
|
+
# # Then you can get the output of that command in a String object
|
|
39
|
+
# # by using Bio::EMBOSS.run method.
|
|
40
|
+
# xlrhodop = Bio::EMBOSS.run('seqret', 'embl:xlrhodop')
|
|
41
|
+
# puts xlrhodop
|
|
42
42
|
#
|
|
43
43
|
# # Or all in one go:
|
|
44
|
-
# puts Bio::EMBOSS.
|
|
44
|
+
# puts Bio::EMBOSS.run('seqret', 'embl:xlrhodop')
|
|
45
45
|
#
|
|
46
46
|
# # Similarly:
|
|
47
|
-
# puts Bio::EMBOSS.
|
|
48
|
-
#
|
|
49
|
-
# puts Bio::EMBOSS.
|
|
47
|
+
# puts Bio::EMBOSS.run('transeq', '-sbegin', '110','-send', '1171',
|
|
48
|
+
# 'embl:xlrhodop')
|
|
49
|
+
# puts Bio::EMBOSS.run('showfeat', 'embl:xlrhodop')
|
|
50
|
+
# puts Bio::EMBOSS.run('seqret', 'embl:xlrhodop', '-osformat', 'acedb')
|
|
50
51
|
#
|
|
51
52
|
# # A shortcut exists for this two-step process for +seqret+ and +entret+.
|
|
52
53
|
# puts Bio::EMBOSS.seqret('embl:xlrhodop')
|
|
53
54
|
# puts Bio::EMBOSS.entret('embl:xlrhodop')
|
|
55
|
+
#
|
|
56
|
+
# # You can use %w() syntax.
|
|
57
|
+
# puts Bio::EMBOSS.run(*%w( transeq -sbegin 110 -send 1171 embl:xlrhodop ))
|
|
58
|
+
#
|
|
59
|
+
# # You can also use Shellwords.shellwords.
|
|
60
|
+
# require 'shellwords'
|
|
61
|
+
# str = 'transeq -sbegin 110 -send 1171 embl:xlrhodop'
|
|
62
|
+
# cmd = Shellwords.shellwords(str)
|
|
63
|
+
# puts Bio::EMBOSS.run(*cmd)
|
|
64
|
+
#
|
|
65
|
+
|
|
54
66
|
#
|
|
55
67
|
# == Pre-requisites
|
|
56
68
|
#
|
|
@@ -76,8 +88,8 @@ class EMBOSS
|
|
|
76
88
|
# puts object.exec
|
|
77
89
|
# ---
|
|
78
90
|
# *Arguments*:
|
|
79
|
-
# * (required)
|
|
80
|
-
# *Returns*::
|
|
91
|
+
# * (required) _arg_: argument given to the emboss seqret command
|
|
92
|
+
# *Returns*:: String
|
|
81
93
|
def self.seqret(arg)
|
|
82
94
|
str = self.retrieve('seqret', arg)
|
|
83
95
|
end
|
|
@@ -92,12 +104,16 @@ class EMBOSS
|
|
|
92
104
|
# puts object.exec
|
|
93
105
|
# ---
|
|
94
106
|
# *Arguments*:
|
|
95
|
-
# * (required)
|
|
96
|
-
# *Returns*::
|
|
107
|
+
# * (required) _arg_: argument given to the emboss entret command
|
|
108
|
+
# *Returns*:: String
|
|
97
109
|
def self.entret(arg)
|
|
98
110
|
str = self.retrieve('entret', arg)
|
|
99
111
|
end
|
|
100
112
|
|
|
113
|
+
# WARNING: Bio::EMBOSS.new will be changed in the future because
|
|
114
|
+
# Bio::EMBOSS.new(cmd_line) is inconvenient and potential security hole.
|
|
115
|
+
# Using Bio::EMBOSS.run(program, options...) is strongly recommended.
|
|
116
|
+
#
|
|
101
117
|
# Initializes a new Bio::EMBOSS object. This provides a holder that can
|
|
102
118
|
# subsequently be executed (see Bio::EMBOSS.exec). The object does _not_
|
|
103
119
|
# hold any actual data when initialized.
|
|
@@ -114,6 +130,7 @@ class EMBOSS
|
|
|
114
130
|
# * (required) _command_: emboss command
|
|
115
131
|
# *Returns*:: Bio::EMBOSS object
|
|
116
132
|
def initialize(cmd_line)
|
|
133
|
+
warn 'Bio::EMBOSS.new(cmd_line) is inconvenient and potential security hole. Using Bio::EMBOSS.run(program, options...) is strongly recommended.'
|
|
117
134
|
@cmd_line = cmd_line + ' -stdout -auto'
|
|
118
135
|
end
|
|
119
136
|
|
|
@@ -142,6 +159,38 @@ class EMBOSS
|
|
|
142
159
|
# Result of the executed command
|
|
143
160
|
attr_reader :result
|
|
144
161
|
|
|
162
|
+
# Runs an emboss program and get the result as string.
|
|
163
|
+
# Note that "-auto -stdout" are automatically added to the options.
|
|
164
|
+
#
|
|
165
|
+
# Example 1:
|
|
166
|
+
#
|
|
167
|
+
# result = Bio::EMBOSS.run('seqret', 'embl:xlrhodop')
|
|
168
|
+
#
|
|
169
|
+
# Example 2:
|
|
170
|
+
#
|
|
171
|
+
# result = Bio::EMBOSS.run('water',
|
|
172
|
+
# '-asequence', 'swissprot:slpi_human',
|
|
173
|
+
# '-bsequence', 'swissprot:slpi_mouse')
|
|
174
|
+
#
|
|
175
|
+
# Example 3:
|
|
176
|
+
# options = %w( -asequence swissprot:slpi_human
|
|
177
|
+
# -bsequence swissprot:slpi_mouse )
|
|
178
|
+
# result = Bio::EMBOSS.run('needle', *options)
|
|
179
|
+
#
|
|
180
|
+
# For an overview of commands that can be used with this method, see the
|
|
181
|
+
# emboss website.
|
|
182
|
+
# ---
|
|
183
|
+
# *Arguments*:
|
|
184
|
+
# * (required) _program_: command name, or filename of an emboss program
|
|
185
|
+
# * _options_: options given to the emboss program
|
|
186
|
+
# *Returns*:: String
|
|
187
|
+
def self.run(program, *options)
|
|
188
|
+
cmd = [ program, *options ]
|
|
189
|
+
cmd.push '-auto'
|
|
190
|
+
cmd.push '-stdout'
|
|
191
|
+
return Bio::Command.query_command(cmd)
|
|
192
|
+
end
|
|
193
|
+
|
|
145
194
|
private
|
|
146
195
|
|
|
147
196
|
def self.retrieve(cmd, arg)
|
data/lib/bio/appl/fasta.rb
CHANGED
|
@@ -4,11 +4,11 @@
|
|
|
4
4
|
# Copyright:: Copyright (C) 2001, 2002 Toshiaki Katayama <k@bioruby.org>
|
|
5
5
|
# License:: The Ruby License
|
|
6
6
|
#
|
|
7
|
-
# $Id
|
|
7
|
+
# $Id:$
|
|
8
8
|
#
|
|
9
9
|
|
|
10
10
|
require 'net/http'
|
|
11
|
-
require '
|
|
11
|
+
require 'uri'
|
|
12
12
|
require 'bio/command'
|
|
13
13
|
require 'shellwords'
|
|
14
14
|
|
|
@@ -154,16 +154,14 @@ class Fasta
|
|
|
154
154
|
'style' => 'raw',
|
|
155
155
|
'prog' => @program,
|
|
156
156
|
'dbname' => @db,
|
|
157
|
-
'sequence' =>
|
|
158
|
-
'other_param' =>
|
|
157
|
+
'sequence' => query,
|
|
158
|
+
'other_param' => Bio::Command.make_command_line_unix(@options),
|
|
159
159
|
'ktup_value' => @ktup,
|
|
160
160
|
'matrix' => @matrix,
|
|
161
161
|
}
|
|
162
162
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
form.each do |k, v|
|
|
166
|
-
data.push("#{k}=#{v}") if v
|
|
163
|
+
form.keys.each do |k|
|
|
164
|
+
form.delete(k) unless form[k]
|
|
167
165
|
end
|
|
168
166
|
|
|
169
167
|
report = nil
|
|
@@ -172,7 +170,7 @@ class Fasta
|
|
|
172
170
|
http = Bio::Command.new_http(host)
|
|
173
171
|
http.open_timeout = 3000
|
|
174
172
|
http.read_timeout = 6000
|
|
175
|
-
result
|
|
173
|
+
result = Bio::Command.http_post_form(http, path, form)
|
|
176
174
|
# workaround 2006.8.1 - fixed for new batch queuing system
|
|
177
175
|
case result.code
|
|
178
176
|
when "302"
|
|
@@ -191,9 +189,9 @@ class Fasta
|
|
|
191
189
|
end
|
|
192
190
|
@output = result.body.to_s
|
|
193
191
|
# workaround 2005.08.12
|
|
194
|
-
re = %r{<A HREF="http://#{host}(/tmp/[^"]+)">Show all result</A>} # "
|
|
192
|
+
re = %r{<A HREF="http://#{host}(/tmp/[^"]+)">Show all result</A>}i # "
|
|
195
193
|
if path = @output[re, 1]
|
|
196
|
-
result
|
|
194
|
+
result = http.get(path)
|
|
197
195
|
@output = result.body
|
|
198
196
|
txt = @output.to_s.split(/\<pre\>/)[1]
|
|
199
197
|
raise 'cannot understand response' unless txt
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
# Mitsuteru C. Nakao <n@bioruby.org>
|
|
6
6
|
# License:: The Ruby License
|
|
7
7
|
#
|
|
8
|
-
# $Id
|
|
8
|
+
# $Id:$
|
|
9
9
|
#
|
|
10
10
|
# == Description
|
|
11
11
|
#
|
|
@@ -75,7 +75,7 @@ class Genscan
|
|
|
75
75
|
@isochore = nil
|
|
76
76
|
@matrix = nil
|
|
77
77
|
|
|
78
|
-
report.
|
|
78
|
+
report.each_line("\n") do |line|
|
|
79
79
|
case line
|
|
80
80
|
when /^GENSCAN/
|
|
81
81
|
parse_headline(line)
|
|
@@ -94,7 +94,7 @@ class Genscan
|
|
|
94
94
|
|
|
95
95
|
# genes/exons
|
|
96
96
|
genes_region = report[i...j]
|
|
97
|
-
genes_region.
|
|
97
|
+
genes_region.each_line("\n") do |line|
|
|
98
98
|
if /Init|Intr|Term|PlyA|Prom|Sngl/ =~ line
|
|
99
99
|
gn, en = line.strip.split(" +")[0].split(/\./).map {|i| i.to_i }
|
|
100
100
|
add_exon(gn, en, line)
|
data/lib/bio/appl/hmmer.rb
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
# Masashi Fujita <fujita@kuicr.kyoto-u.ac.jp>
|
|
8
8
|
# License:: The Ruby License
|
|
9
9
|
#
|
|
10
|
-
# $Id
|
|
10
|
+
# $Id:$
|
|
11
11
|
#
|
|
12
12
|
# == Description
|
|
13
13
|
#
|
|
@@ -61,7 +61,7 @@ class HMMER
|
|
|
61
61
|
#
|
|
62
62
|
def self.reports(multiple_report_text)
|
|
63
63
|
ary = []
|
|
64
|
-
multiple_report_text.
|
|
64
|
+
multiple_report_text.each_line("\n//\n") do |report|
|
|
65
65
|
if block_given?
|
|
66
66
|
yield Report.new(report)
|
|
67
67
|
else
|
|
@@ -267,7 +267,7 @@ class HMMER
|
|
|
267
267
|
program['license'] = program_data.split(/\n/)
|
|
268
268
|
|
|
269
269
|
parameter = {}
|
|
270
|
-
parameter_data.
|
|
270
|
+
parameter_data.each_line do |x|
|
|
271
271
|
if /^(.+?):\s+(.*?)\s*$/ =~ x
|
|
272
272
|
parameter[$1] = $2
|
|
273
273
|
end
|
|
@@ -281,7 +281,7 @@ class HMMER
|
|
|
281
281
|
# Bio::HMMER::Report#parse_query_info
|
|
282
282
|
def parse_query_info(data)
|
|
283
283
|
hash = {}
|
|
284
|
-
data.
|
|
284
|
+
data.each_line do |x|
|
|
285
285
|
if /^(.+?):\s+(.*?)\s*$/ =~ x
|
|
286
286
|
hash[$1] = $2
|
|
287
287
|
elsif /\s+\[(.+)\]/ =~ x
|
|
@@ -298,7 +298,7 @@ class HMMER
|
|
|
298
298
|
data.sub!(/.+?---\n/m, '').chop!
|
|
299
299
|
hits = []
|
|
300
300
|
return hits if data == "\t[no hits above thresholds]\n"
|
|
301
|
-
data.
|
|
301
|
+
data.each_line do |l|
|
|
302
302
|
hits.push(Hit.new(l))
|
|
303
303
|
end
|
|
304
304
|
hits
|
|
@@ -311,7 +311,7 @@ class HMMER
|
|
|
311
311
|
data.sub!(/.+?---\n/m, '').chop!
|
|
312
312
|
hsps=[]
|
|
313
313
|
return hsps if data == "\t[no hits above thresholds]\n"
|
|
314
|
-
data.
|
|
314
|
+
data.each_line do |l|
|
|
315
315
|
hsps.push(Hsp.new(l, is_hmmsearch))
|
|
316
316
|
end
|
|
317
317
|
return hsps
|
|
@@ -326,19 +326,19 @@ class HMMER
|
|
|
326
326
|
|
|
327
327
|
statistical_detail = {}
|
|
328
328
|
data.sub!(/(.+?)\n\n/m, '')
|
|
329
|
-
$1.
|
|
329
|
+
$1.each_line do |l|
|
|
330
330
|
statistical_detail[$1] = $2.to_f if /^\s*(.+?)\s*=\s*(\S+)/ =~ l
|
|
331
331
|
end
|
|
332
332
|
|
|
333
333
|
total_seq_searched = nil
|
|
334
334
|
data.sub!(/(.+?)\n\n/m, '')
|
|
335
|
-
$1.
|
|
335
|
+
$1.each_line do |l|
|
|
336
336
|
total_seq_searched = $2.to_i if /^\s*(.+)\s*:\s*(\S+)/ =~ l
|
|
337
337
|
end
|
|
338
338
|
|
|
339
339
|
whole_seq_top_hits = {}
|
|
340
340
|
data.sub!(/(.+?)\n\n/m, '')
|
|
341
|
-
$1.
|
|
341
|
+
$1.each_line do |l|
|
|
342
342
|
if /^\s*(.+?):\s*(\d+)\s*$/ =~ l
|
|
343
343
|
whole_seq_top_hits[$1] = $2.to_i
|
|
344
344
|
elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l
|
|
@@ -347,7 +347,7 @@ class HMMER
|
|
|
347
347
|
end
|
|
348
348
|
|
|
349
349
|
domain_top_hits = {}
|
|
350
|
-
data.
|
|
350
|
+
data.each_line do |l|
|
|
351
351
|
if /^\s*(.+?):\s*(\d+)\s*$/ =~ l
|
|
352
352
|
domain_top_hits[$1] = $2.to_i
|
|
353
353
|
elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l
|