bio 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +3421 -0
- data/KNOWN_ISSUES.rdoc +88 -0
- data/README.rdoc +252 -0
- data/README_DEV.rdoc +285 -0
- data/Rakefile +143 -0
- data/bin/bioruby +0 -0
- data/bin/br_biofetch.rb +0 -0
- data/bin/br_bioflat.rb +12 -1
- data/bin/br_biogetseq.rb +0 -0
- data/bin/br_pmfetch.rb +4 -3
- data/bioruby.gemspec +477 -0
- data/bioruby.gemspec.erb +117 -0
- data/doc/Changes-0.7.rd +7 -0
- data/doc/Changes-1.3.rdoc +239 -0
- data/doc/Tutorial.rd +296 -184
- data/doc/Tutorial.rd.html +1031 -0
- data/doc/Tutorial.rd.ja +111 -45
- data/doc/Tutorial.rd.ja.html +2225 -0
- data/doc/bioruby.css +281 -0
- data/extconf.rb +2 -0
- data/lib/bio.rb +29 -4
- data/lib/bio/appl/blast.rb +306 -121
- data/lib/bio/appl/blast/ddbj.rb +142 -0
- data/lib/bio/appl/blast/format0.rb +35 -25
- data/lib/bio/appl/blast/format8.rb +2 -2
- data/lib/bio/appl/blast/genomenet.rb +263 -0
- data/lib/bio/appl/blast/ncbioptions.rb +220 -0
- data/lib/bio/appl/blast/remote.rb +106 -0
- data/lib/bio/appl/blast/report.rb +260 -9
- data/lib/bio/appl/blast/rexml.rb +12 -5
- data/lib/bio/appl/blast/rpsblast.rb +277 -0
- data/lib/bio/appl/blast/wublast.rb +133 -12
- data/lib/bio/appl/blast/xmlparser.rb +35 -18
- data/lib/bio/appl/blat/report.rb +46 -5
- data/lib/bio/appl/emboss.rb +62 -13
- data/lib/bio/appl/fasta.rb +9 -11
- data/lib/bio/appl/genscan/report.rb +3 -3
- data/lib/bio/appl/hmmer.rb +1 -1
- data/lib/bio/appl/hmmer/report.rb +10 -10
- data/lib/bio/appl/paml/baseml.rb +95 -0
- data/lib/bio/appl/paml/baseml/report.rb +32 -0
- data/lib/bio/appl/paml/codeml.rb +242 -0
- data/lib/bio/appl/paml/codeml/rates.rb +67 -0
- data/lib/bio/appl/paml/codeml/report.rb +67 -0
- data/lib/bio/appl/paml/common.rb +348 -0
- data/lib/bio/appl/paml/common_report.rb +38 -0
- data/lib/bio/appl/paml/yn00.rb +103 -0
- data/lib/bio/appl/paml/yn00/report.rb +32 -0
- data/lib/bio/appl/psort.rb +2 -2
- data/lib/bio/appl/pts1.rb +5 -5
- data/lib/bio/appl/tmhmm/report.rb +10 -1
- data/lib/bio/command.rb +297 -41
- data/lib/bio/compat/features.rb +157 -0
- data/lib/bio/compat/references.rb +128 -0
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
- data/lib/bio/db/biosql/sequence.rb +508 -0
- data/lib/bio/db/embl/common.rb +28 -12
- data/lib/bio/db/embl/embl.rb +107 -9
- data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
- data/lib/bio/db/embl/format_embl.rb +190 -0
- data/lib/bio/db/embl/sptr.rb +15 -16
- data/lib/bio/db/fantom.rb +6 -8
- data/lib/bio/db/fasta.rb +10 -507
- data/lib/bio/db/fasta/defline.rb +532 -0
- data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
- data/lib/bio/db/fasta/format_fasta.rb +97 -0
- data/lib/bio/db/genbank/common.rb +25 -8
- data/lib/bio/db/genbank/format_genbank.rb +187 -0
- data/lib/bio/db/genbank/genbank.rb +36 -1
- data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
- data/lib/bio/db/gff.rb +1791 -119
- data/lib/bio/db/kegg/glycan.rb +2 -6
- data/lib/bio/db/lasergene.rb +3 -3
- data/lib/bio/db/medline.rb +4 -1
- data/lib/bio/db/newick.rb +10 -10
- data/lib/bio/db/pdb/chain.rb +6 -2
- data/lib/bio/db/pdb/pdb.rb +12 -3
- data/lib/bio/db/rebase.rb +7 -8
- data/lib/bio/db/soft.rb +3 -3
- data/lib/bio/feature.rb +1 -88
- data/lib/bio/io/biosql/biodatabase.rb +64 -0
- data/lib/bio/io/biosql/bioentry.rb +29 -0
- data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
- data/lib/bio/io/biosql/bioentry_path.rb +12 -0
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
- data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
- data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
- data/lib/bio/io/biosql/biosequence.rb +11 -0
- data/lib/bio/io/biosql/comment.rb +7 -0
- data/lib/bio/io/biosql/config/database.yml +20 -0
- data/lib/bio/io/biosql/dbxref.rb +13 -0
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
- data/lib/bio/io/biosql/location.rb +32 -0
- data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
- data/lib/bio/io/biosql/ontology.rb +10 -0
- data/lib/bio/io/biosql/reference.rb +9 -0
- data/lib/bio/io/biosql/seqfeature.rb +32 -0
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
- data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
- data/lib/bio/io/biosql/taxon.rb +12 -0
- data/lib/bio/io/biosql/taxon_name.rb +9 -0
- data/lib/bio/io/biosql/term.rb +27 -0
- data/lib/bio/io/biosql/term_dbxref.rb +11 -0
- data/lib/bio/io/biosql/term_path.rb +12 -0
- data/lib/bio/io/biosql/term_relationship.rb +13 -0
- data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
- data/lib/bio/io/biosql/term_synonym.rb +10 -0
- data/lib/bio/io/das.rb +7 -7
- data/lib/bio/io/ddbjxml.rb +57 -0
- data/lib/bio/io/ensembl.rb +2 -2
- data/lib/bio/io/fetch.rb +28 -14
- data/lib/bio/io/flatfile.rb +17 -853
- data/lib/bio/io/flatfile/autodetection.rb +545 -0
- data/lib/bio/io/flatfile/buffer.rb +237 -0
- data/lib/bio/io/flatfile/index.rb +17 -7
- data/lib/bio/io/flatfile/indexer.rb +30 -12
- data/lib/bio/io/flatfile/splitter.rb +297 -0
- data/lib/bio/io/hinv.rb +442 -0
- data/lib/bio/io/keggapi.rb +2 -2
- data/lib/bio/io/ncbirest.rb +733 -0
- data/lib/bio/io/pubmed.rb +34 -80
- data/lib/bio/io/registry.rb +2 -2
- data/lib/bio/io/sql.rb +178 -357
- data/lib/bio/io/togows.rb +458 -0
- data/lib/bio/location.rb +106 -11
- data/lib/bio/pathway.rb +120 -14
- data/lib/bio/reference.rb +115 -101
- data/lib/bio/sequence.rb +164 -183
- data/lib/bio/sequence/adapter.rb +108 -0
- data/lib/bio/sequence/common.rb +22 -45
- data/lib/bio/sequence/compat.rb +2 -2
- data/lib/bio/sequence/dblink.rb +54 -0
- data/lib/bio/sequence/format.rb +254 -77
- data/lib/bio/sequence/format_raw.rb +23 -0
- data/lib/bio/shell.rb +3 -1
- data/lib/bio/shell/core.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +33 -4
- data/lib/bio/shell/plugin/ncbirest.rb +64 -0
- data/lib/bio/shell/plugin/togows.rb +40 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
- data/lib/bio/tree.rb +4 -2
- data/lib/bio/util/color_scheme.rb +2 -2
- data/lib/bio/util/contingency_table.rb +2 -2
- data/lib/bio/util/restriction_enzyme.rb +2 -2
- data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
- data/lib/bio/version.rb +25 -0
- data/rdoc.zsh +8 -0
- data/sample/any2fasta.rb +0 -0
- data/sample/biofetch.rb +0 -0
- data/sample/dbget +0 -0
- data/sample/demo_sequence.rb +158 -0
- data/sample/enzymes.rb +0 -0
- data/sample/fasta2tab.rb +0 -0
- data/sample/fastagrep.rb +72 -0
- data/sample/fastasort.rb +54 -0
- data/sample/fsplit.rb +0 -0
- data/sample/gb2fasta.rb +2 -3
- data/sample/gb2tab.rb +0 -0
- data/sample/gbtab2mysql.rb +0 -0
- data/sample/genes2nuc.rb +0 -0
- data/sample/genes2pep.rb +0 -0
- data/sample/genes2tab.rb +0 -0
- data/sample/genome2rb.rb +0 -0
- data/sample/genome2tab.rb +0 -0
- data/sample/goslim.rb +0 -0
- data/sample/gt2fasta.rb +0 -0
- data/sample/na2aa.rb +34 -0
- data/sample/pmfetch.rb +0 -0
- data/sample/pmsearch.rb +0 -0
- data/sample/ssearch2tab.rb +0 -0
- data/sample/tfastx2tab.rb +0 -0
- data/sample/vs-genes.rb +0 -0
- data/setup.rb +1596 -0
- data/test/data/blast/blastp-multi.m7 +188 -0
- data/test/data/command/echoarg2.bat +1 -0
- data/test/data/paml/codeml/control_file.txt +30 -0
- data/test/data/paml/codeml/output.txt +78 -0
- data/test/data/paml/codeml/rates +217 -0
- data/test/data/rpsblast/misc.rpsblast +193 -0
- data/test/data/soft/GDS100_partial.soft +0 -0
- data/test/data/soft/GSE3457_family_partial.soft +0 -0
- data/test/functional/bio/appl/test_pts1.rb +115 -0
- data/test/functional/bio/io/test_ensembl.rb +123 -80
- data/test/functional/bio/io/test_togows.rb +267 -0
- data/test/functional/bio/sequence/test_output_embl.rb +51 -0
- data/test/functional/bio/test_command.rb +301 -0
- data/test/runner.rb +17 -1
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
- data/test/unit/bio/appl/blast/test_report.rb +753 -35
- data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
- data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
- data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
- data/test/unit/bio/appl/test_blast.rb +135 -4
- data/test/unit/bio/appl/test_fasta.rb +2 -2
- data/test/unit/bio/appl/test_pts1.rb +1 -64
- data/test/unit/bio/db/embl/test_common.rb +15 -15
- data/test/unit/bio/db/embl/test_embl.rb +4 -4
- data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
- data/test/unit/bio/db/embl/test_sptr.rb +38 -1
- data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
- data/test/unit/bio/db/test_gff.rb +1151 -25
- data/test/unit/bio/db/test_medline.rb +127 -0
- data/test/unit/bio/db/test_nexus.rb +5 -1
- data/test/unit/bio/db/test_prosite.rb +4 -4
- data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
- data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
- data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
- data/test/unit/bio/io/test_ddbjxml.rb +8 -3
- data/test/unit/bio/io/test_fastacmd.rb +5 -5
- data/test/unit/bio/io/test_flatfile.rb +357 -106
- data/test/unit/bio/io/test_soapwsdl.rb +2 -2
- data/test/unit/bio/io/test_togows.rb +161 -0
- data/test/unit/bio/sequence/test_common.rb +210 -11
- data/test/unit/bio/sequence/test_compat.rb +3 -3
- data/test/unit/bio/sequence/test_dblink.rb +58 -0
- data/test/unit/bio/sequence/test_na.rb +2 -2
- data/test/unit/bio/test_command.rb +111 -50
- data/test/unit/bio/test_feature.rb +29 -1
- data/test/unit/bio/test_location.rb +566 -6
- data/test/unit/bio/test_pathway.rb +91 -65
- data/test/unit/bio/test_reference.rb +67 -13
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
- metadata +202 -167
- data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
data/lib/bio/appl/blast/rexml.rb
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# Copyright:: Copyright (C) 2002, 2003 Toshiaki Katayama <k@bioruby.org>
|
|
5
5
|
# License:: The Ruby License
|
|
6
6
|
#
|
|
7
|
-
# $Id
|
|
7
|
+
# $Id:$
|
|
8
8
|
#
|
|
9
9
|
# == Note
|
|
10
10
|
#
|
|
@@ -37,9 +37,7 @@ module Bio
|
|
|
37
37
|
case name
|
|
38
38
|
when 'BlastOutput_param'
|
|
39
39
|
e.elements["Parameters"].each_element_with_text do |p|
|
|
40
|
-
|
|
41
|
-
v = p.text =~ /\D/ ? p.text : p.text.to_i
|
|
42
|
-
@parameters[k] = v
|
|
40
|
+
xml_set_parameter(p.name, p.text)
|
|
43
41
|
end
|
|
44
42
|
else
|
|
45
43
|
hash[name] = text if text.strip.size > 0
|
|
@@ -72,8 +70,17 @@ module Bio
|
|
|
72
70
|
v = s.text =~ /\D/ ? s.text.to_f : s.text.to_i
|
|
73
71
|
iteration.statistics[k] = v
|
|
74
72
|
end
|
|
73
|
+
|
|
74
|
+
# for new BLAST XML format
|
|
75
|
+
when 'Iteration_query-ID'
|
|
76
|
+
iteration.query_id = i.text
|
|
77
|
+
when 'Iteration_query-def'
|
|
78
|
+
iteration.query_def = i.text
|
|
79
|
+
when 'Iteration_query-len'
|
|
80
|
+
iteration.query_len = i.text.to_i
|
|
75
81
|
end
|
|
76
|
-
end
|
|
82
|
+
end #case i.name
|
|
83
|
+
|
|
77
84
|
return iteration
|
|
78
85
|
end
|
|
79
86
|
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/appl/blast/rpsblast.rb - NCBI RPS Blast default output parser
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2008 Naohisa Goto <ng@bioruby.org>
|
|
5
|
+
# License:: The Ruby License
|
|
6
|
+
#
|
|
7
|
+
# $Id:$
|
|
8
|
+
#
|
|
9
|
+
# == Description
|
|
10
|
+
#
|
|
11
|
+
# NCBI RPS Blast (Reversed Position Specific Blast) default
|
|
12
|
+
# (-m 0 option) output parser class, Bio::Blast::RPSBlast::Report
|
|
13
|
+
# and related classes/modules.
|
|
14
|
+
#
|
|
15
|
+
# == References
|
|
16
|
+
#
|
|
17
|
+
# * Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,
|
|
18
|
+
# Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),
|
|
19
|
+
# "Gapped BLAST and PSI-BLAST: a new generation of protein database search
|
|
20
|
+
# programs", Nucleic Acids Res. 25:3389-3402.
|
|
21
|
+
# * ftp://ftp.ncbi.nih.gov/blast/documents/rpsblast.html
|
|
22
|
+
# * http://www.ncbi.nlm.nih.gov/Structure/cdd/cdd_help.shtml
|
|
23
|
+
#
|
|
24
|
+
|
|
25
|
+
require 'bio/appl/blast/format0'
|
|
26
|
+
require 'bio/io/flatfile'
|
|
27
|
+
|
|
28
|
+
module Bio
|
|
29
|
+
class Blast
|
|
30
|
+
|
|
31
|
+
# NCBI RPS Blast (Reversed Position Specific Blast) namespace.
|
|
32
|
+
# Currently, this module is existing only for separating namespace.
|
|
33
|
+
# To parse RPSBlast results, see Bio::Blast::RPSBlast::Report documents.
|
|
34
|
+
module RPSBlast
|
|
35
|
+
|
|
36
|
+
# Flatfile splitter for RPS-BLAST reports.
|
|
37
|
+
# It is internally used when reading RPS-BLAST report.
|
|
38
|
+
# Normally, users do not need to use it directly.
|
|
39
|
+
#
|
|
40
|
+
# Note for Windows: RPS-BLAST results generated in Microsoft Windows
|
|
41
|
+
# may not be parsed correctly due to the line feed code problem.
|
|
42
|
+
# For a workaroud, convert line feed codes from Windows(DOS) to UNIX.
|
|
43
|
+
#
|
|
44
|
+
class RPSBlastSplitter < Bio::FlatFile::Splitter::Template
|
|
45
|
+
|
|
46
|
+
# Separator used to distinguish start of each report
|
|
47
|
+
ReportHead = /\A\n*(RPS\-BLAST|Query\=)/
|
|
48
|
+
|
|
49
|
+
# Delimiter used for IO#gets
|
|
50
|
+
Delimiter = "\n\n"
|
|
51
|
+
|
|
52
|
+
# creates a new splitter object
|
|
53
|
+
def initialize(klass, bstream)
|
|
54
|
+
super(klass, bstream)
|
|
55
|
+
@entry_head = nil
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Skips leader of the entry.
|
|
59
|
+
# In this class, only skips space characters.
|
|
60
|
+
def skip_leader
|
|
61
|
+
stream.skip_spaces
|
|
62
|
+
return nil
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Rewinds the stream
|
|
66
|
+
def rewind
|
|
67
|
+
@entry_head = nil
|
|
68
|
+
super
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# gets an entry
|
|
72
|
+
def get_entry
|
|
73
|
+
p0 = stream_pos()
|
|
74
|
+
pieces = []
|
|
75
|
+
flag_head = false # reached to start of header
|
|
76
|
+
flag_body = false # reached to start of body (Query=...)
|
|
77
|
+
while x = stream.gets(Delimiter)
|
|
78
|
+
if ReportHead =~ x then
|
|
79
|
+
case $1
|
|
80
|
+
when 'RPS-BLAST'
|
|
81
|
+
if pieces.empty? then
|
|
82
|
+
@entry_head = nil
|
|
83
|
+
flag_head = true
|
|
84
|
+
else
|
|
85
|
+
stream.ungets(x)
|
|
86
|
+
break
|
|
87
|
+
end
|
|
88
|
+
when 'Query='
|
|
89
|
+
if flag_body then
|
|
90
|
+
stream.ungets(x)
|
|
91
|
+
break
|
|
92
|
+
else
|
|
93
|
+
@entry_head = pieces.join('') if flag_head
|
|
94
|
+
flag_body = true
|
|
95
|
+
end
|
|
96
|
+
else
|
|
97
|
+
raise 'Bug: should not reach here'
|
|
98
|
+
end
|
|
99
|
+
end #if ReportHead...
|
|
100
|
+
pieces.push x
|
|
101
|
+
end #while
|
|
102
|
+
p1 = stream_pos()
|
|
103
|
+
|
|
104
|
+
self.entry_start_pos = p0
|
|
105
|
+
self.entry =
|
|
106
|
+
if pieces.empty? then
|
|
107
|
+
nil
|
|
108
|
+
elsif !flag_head and @entry_head then
|
|
109
|
+
@entry_head + pieces.join('')
|
|
110
|
+
else
|
|
111
|
+
pieces.join('')
|
|
112
|
+
end
|
|
113
|
+
self.entry_ended_pos = p1
|
|
114
|
+
return self.entry
|
|
115
|
+
end
|
|
116
|
+
end #class RPSBlastSplitter
|
|
117
|
+
|
|
118
|
+
# NCBI RPS Blast (Reversed Position Specific Blast)
|
|
119
|
+
# default output parser.
|
|
120
|
+
#
|
|
121
|
+
# It supports defalut (-m 0 option) output of the "rpsblast" command.
|
|
122
|
+
#
|
|
123
|
+
# Because this class inherits Bio::Blast::Default::Report,
|
|
124
|
+
# almost all methods are eqaul to Bio::Blast::Default::Report.
|
|
125
|
+
# Only DELIMITER (and RS) and few methods are different.
|
|
126
|
+
#
|
|
127
|
+
# By using Bio::FlatFile, (for example, Bio::FlatFile.open),
|
|
128
|
+
# rpsblast result generated from multiple query sequences is
|
|
129
|
+
# automatically splitted into multiple
|
|
130
|
+
# Bio::BLast::RPSBlast::Report objects corresponding to
|
|
131
|
+
# query sequences.
|
|
132
|
+
#
|
|
133
|
+
# Note for multi-fasta results WITH using Bio::FlatFile:
|
|
134
|
+
# Each splitted result is concatenated with header of the
|
|
135
|
+
# result which describes RPS-BLAST version and database
|
|
136
|
+
# information, if possible.
|
|
137
|
+
#
|
|
138
|
+
# Note for multi-fasta results WITHOUT using Bio::FlatFile:
|
|
139
|
+
# When parsing an output of rpsblast command running with
|
|
140
|
+
# multi-fasta sequences WITHOUT using Bio::FlatFile,
|
|
141
|
+
# each query's result is stored as an "iteration" of PSI-Blast.
|
|
142
|
+
# This behavior may be changed in the future.
|
|
143
|
+
#
|
|
144
|
+
# Note for nucleotide results: This class is not tested with
|
|
145
|
+
# nucleotide query and/or nucleotide databases.
|
|
146
|
+
#
|
|
147
|
+
class Report < Bio::Blast::Default::Report
|
|
148
|
+
# Delimter of each entry for RPS-BLAST.
|
|
149
|
+
DELIMITER = RS = "\nRPS-BLAST"
|
|
150
|
+
|
|
151
|
+
# (Integer) excess read size included in DELIMITER.
|
|
152
|
+
DELIMITER_OVERRUN = 9 # "RPS-BLAST"
|
|
153
|
+
|
|
154
|
+
# splitter for Bio::FlatFile support
|
|
155
|
+
FLATFILE_SPLITTER = RPSBlastSplitter
|
|
156
|
+
|
|
157
|
+
# Creates a new Report object from a string.
|
|
158
|
+
#
|
|
159
|
+
# Using Bio::FlatFile.open (or some other methods)
|
|
160
|
+
# is recommended instead of using this method directly.
|
|
161
|
+
# Refer Bio::Blast::RPSBlast::Report document for more information.
|
|
162
|
+
#
|
|
163
|
+
# Note for multi-fasta results WITHOUT using Bio::FlatFile:
|
|
164
|
+
# When parsing an output of rpsblast command running with
|
|
165
|
+
# multi-fasta sequences WITHOUT using Bio::FlatFile,
|
|
166
|
+
# each query's result is stored as an "iteration" of PSI-Blast.
|
|
167
|
+
# This behavior may be changed in the future.
|
|
168
|
+
#
|
|
169
|
+
# Note for nucleotide results: This class is not tested with
|
|
170
|
+
# nucleotide query and/or nucleotide databases.
|
|
171
|
+
#
|
|
172
|
+
def initialize(str)
|
|
173
|
+
str = str.sub(/\A\s+/, '')
|
|
174
|
+
# remove trailing entries for sure
|
|
175
|
+
str.sub!(/\n(RPS\-BLAST.*)/m, "\n")
|
|
176
|
+
@entry_overrun = $1
|
|
177
|
+
@entry = str
|
|
178
|
+
data = str.split(/(?:^[ \t]*\n)+/)
|
|
179
|
+
|
|
180
|
+
if data[0] and /\AQuery\=/ !~ data[0] then
|
|
181
|
+
format0_split_headers(data)
|
|
182
|
+
end
|
|
183
|
+
@iterations = format0_split_search(data)
|
|
184
|
+
format0_split_stat_params(data)
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Returns definition of the query.
|
|
188
|
+
# For a result of multi-fasta input, the first query's definition
|
|
189
|
+
# is returned (The same as <tt>iterations.first.query_def</tt>).
|
|
190
|
+
def query_def
|
|
191
|
+
iterations.first.query_def
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Returns length of the query.
|
|
195
|
+
# For a result of multi-fasta input, the first query's length
|
|
196
|
+
# is returned (The same as <tt>iterations.first.query_len</tt>).
|
|
197
|
+
def query_len
|
|
198
|
+
iterations.first.query_len
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
private
|
|
202
|
+
|
|
203
|
+
# Splits headers into the first line, reference, query line and
|
|
204
|
+
# database line.
|
|
205
|
+
def format0_split_headers(data)
|
|
206
|
+
@f0header = data.shift
|
|
207
|
+
@f0references = []
|
|
208
|
+
while data[0] and /\ADatabase\:/ !~ data[0]
|
|
209
|
+
@f0references.push data.shift
|
|
210
|
+
end
|
|
211
|
+
@f0database = data.shift
|
|
212
|
+
# In special case, a void line is inserted after database name.
|
|
213
|
+
if /\A +[\d\,]+ +sequences\; +[\d\,]+ total +letters\s*\z/ =~ data[0] then
|
|
214
|
+
@f0database.concat "\n"
|
|
215
|
+
@f0database.concat data.shift
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
# Splits the search results.
|
|
220
|
+
def format0_split_search(data)
|
|
221
|
+
iterations = []
|
|
222
|
+
dummystr = 'Searching..................................................done'
|
|
223
|
+
if r = data[0] and /^Searching/ =~ r then
|
|
224
|
+
dummystr = data.shift
|
|
225
|
+
end
|
|
226
|
+
while r = data[0] and /^Query\=/ =~ r
|
|
227
|
+
iterations << Iteration.new(data, dummystr)
|
|
228
|
+
end
|
|
229
|
+
iterations
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
# Iteration class for RPS-Blast.
|
|
233
|
+
# Though RPS-Blast does not iterate like PSI-BLAST,
|
|
234
|
+
# it aims to store a result of single query sequence.
|
|
235
|
+
#
|
|
236
|
+
# Normally, the instance of the class is generated
|
|
237
|
+
# by Bio::Blast::RPSBlast::Report object.
|
|
238
|
+
#
|
|
239
|
+
class Iteration < Bio::Blast::Default::Report::Iteration
|
|
240
|
+
# Creates a new Iteration object.
|
|
241
|
+
# It is designed to be called only internally from
|
|
242
|
+
# the Bio::Blast::RPSBlast::Report class.
|
|
243
|
+
# Users shall not use the method directly.
|
|
244
|
+
def initialize(data, dummystr)
|
|
245
|
+
if /\AQuery\=/ =~ data[0] then
|
|
246
|
+
sc = StringScanner.new(data.shift)
|
|
247
|
+
sc.skip(/\s*/)
|
|
248
|
+
if sc.skip_until(/Query\= */) then
|
|
249
|
+
q = []
|
|
250
|
+
begin
|
|
251
|
+
q << sc.scan(/.*/)
|
|
252
|
+
sc.skip(/\s*^ ?/)
|
|
253
|
+
end until !sc.rest or r = sc.skip(/ *\( *([\,\d]+) *letters *\)\s*\z/)
|
|
254
|
+
@query_len = sc[1].delete(',').to_i if r
|
|
255
|
+
@query_def = q.join(' ')
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
data.unshift(dummystr)
|
|
259
|
+
|
|
260
|
+
super(data)
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
# definition of the query
|
|
264
|
+
attr_reader :query_def
|
|
265
|
+
|
|
266
|
+
# length of the query sequence
|
|
267
|
+
attr_reader :query_len
|
|
268
|
+
|
|
269
|
+
end #class Iteration
|
|
270
|
+
|
|
271
|
+
end #class Report
|
|
272
|
+
|
|
273
|
+
end #module RPSBlast
|
|
274
|
+
|
|
275
|
+
end #module Blast
|
|
276
|
+
end #module Bio
|
|
277
|
+
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
#
|
|
2
2
|
# = bio/appl/blast/wublast.rb - WU-BLAST default output parser
|
|
3
3
|
#
|
|
4
|
-
# Copyright:: Copyright (C) 2003
|
|
4
|
+
# Copyright:: Copyright (C) 2003, 2008 Naohisa GOTO <ng@bioruby.org>
|
|
5
5
|
# License:: The Ruby License
|
|
6
6
|
#
|
|
7
|
-
# $Id
|
|
7
|
+
# $Id:$
|
|
8
8
|
#
|
|
9
9
|
# == Description
|
|
10
10
|
#
|
|
@@ -66,7 +66,94 @@ module Bio
|
|
|
66
66
|
@notice
|
|
67
67
|
end
|
|
68
68
|
|
|
69
|
+
# (WU-BLAST) Returns record number of the query.
|
|
70
|
+
# It may only be available for reports with multiple queries.
|
|
71
|
+
# Returns an Integer or nil.
|
|
72
|
+
def query_record_number
|
|
73
|
+
format0_parse_query
|
|
74
|
+
@query_record_number
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# (WU-BLAST) Returns exit code for the execution.
|
|
78
|
+
# Returns an Integer or nil.
|
|
79
|
+
def exit_code
|
|
80
|
+
if defined? @exit_code then
|
|
81
|
+
@exit_code
|
|
82
|
+
else
|
|
83
|
+
nil
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# (WU-BLAST) Returns the message bundled with the exit code output.
|
|
88
|
+
# The message will be shown when WU-BLAST ignores a fatal error
|
|
89
|
+
# due to the command line option "-nonnegok", "-novalidctxok",
|
|
90
|
+
# or "-shortqueryok".
|
|
91
|
+
#
|
|
92
|
+
# Returns a String or nil.
|
|
93
|
+
def exit_code_message
|
|
94
|
+
if defined? @exit_code_message then
|
|
95
|
+
@exit_code_message
|
|
96
|
+
else
|
|
97
|
+
nil
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# (WU-BLAST) Returns "NOTE:" information.
|
|
102
|
+
# Returns nil or an array containing String.
|
|
103
|
+
def notes
|
|
104
|
+
if defined? @notes then
|
|
105
|
+
@notes
|
|
106
|
+
else
|
|
107
|
+
nil
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# (WU-BLAST) Returns fatal error information.
|
|
112
|
+
# Returns nil or an array containing String.
|
|
113
|
+
def fatal_errors
|
|
114
|
+
if defined? @fatal_errors then
|
|
115
|
+
@fatal_errors
|
|
116
|
+
else
|
|
117
|
+
nil
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Returns the name (filename or title) of the database.
|
|
122
|
+
def db
|
|
123
|
+
unless defined?(@db)
|
|
124
|
+
if /Database *\: *(.*)/m =~ @f0database then
|
|
125
|
+
a = $1.split(/^/)
|
|
126
|
+
if a.size > 1 and /\ASearching\..+ done\s*\z/ =~ a[-1] then
|
|
127
|
+
a.pop
|
|
128
|
+
end
|
|
129
|
+
if a.size > 1 and /\A +[\d\,]+ +sequences\; +[\d\,]+ total +letters\.?\s*\z/ =~ a[-1] then
|
|
130
|
+
a.pop
|
|
131
|
+
end
|
|
132
|
+
@db = a.collect { |x| x.sub(/\s+\z/, '') }.join(' ')
|
|
133
|
+
end
|
|
134
|
+
end #unless
|
|
135
|
+
@db
|
|
136
|
+
end
|
|
137
|
+
|
|
69
138
|
private
|
|
139
|
+
# Parses the query lines (begins with "Query = ").
|
|
140
|
+
def format0_parse_query
|
|
141
|
+
unless defined?(@query_def)
|
|
142
|
+
sc = StringScanner.new(@f0query)
|
|
143
|
+
sc.skip(/\s*/)
|
|
144
|
+
if sc.skip_until(/Query\= */) then
|
|
145
|
+
q = []
|
|
146
|
+
begin
|
|
147
|
+
q << sc.scan(/.*/)
|
|
148
|
+
sc.skip(/\s*^ ?/)
|
|
149
|
+
end until !sc.rest or r = sc.skip(/ *\( *([\,\d]+) *letters *(\; *record *([\,\d]+) *)?\)\s*\z/)
|
|
150
|
+
@query_len = sc[1].delete(',').to_i if r
|
|
151
|
+
@query_record_number = sc[3].delete(',').to_i if r and sc[2]
|
|
152
|
+
@query_def = q.join(' ')
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
70
157
|
# Splits headers.
|
|
71
158
|
def format0_split_headers(data)
|
|
72
159
|
@f0header = data.shift
|
|
@@ -86,6 +173,19 @@ module Bio
|
|
|
86
173
|
end
|
|
87
174
|
end
|
|
88
175
|
@f0query = data.shift
|
|
176
|
+
@f0warnings ||= []
|
|
177
|
+
while r = data.first
|
|
178
|
+
case r
|
|
179
|
+
when /^WARNING\: /
|
|
180
|
+
@f0warnings << data.shift
|
|
181
|
+
when /^NOTE\: /
|
|
182
|
+
@notes ||= []
|
|
183
|
+
@notes << data.shift
|
|
184
|
+
else
|
|
185
|
+
break #from the above "while"
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
return if r = data.first and /\A(Parameters\:|EXIT CODE *\d+)/ =~ r
|
|
89
189
|
if r = data.first and !(/^Database\: / =~ r)
|
|
90
190
|
@f0translate_info = data.shift
|
|
91
191
|
end
|
|
@@ -94,23 +194,42 @@ module Bio
|
|
|
94
194
|
|
|
95
195
|
# Splits search data.
|
|
96
196
|
def format0_split_search(data)
|
|
197
|
+
@f0warnings ||= []
|
|
198
|
+
while r = data.first and r =~ /^WARNING\: /
|
|
199
|
+
@f0warnings << data.shift
|
|
200
|
+
end
|
|
97
201
|
[ Iteration.new(data) ]
|
|
98
202
|
end
|
|
99
203
|
|
|
100
204
|
# Splits statistics parameters.
|
|
101
205
|
def format0_split_stat_params(data)
|
|
102
|
-
@f0warnings
|
|
103
|
-
|
|
206
|
+
@f0warnings ||= []
|
|
207
|
+
while r = data.first and r =~ /^WARNING\: /
|
|
104
208
|
@f0warnings << data.shift
|
|
105
209
|
end
|
|
106
210
|
@f0wu_params = []
|
|
107
211
|
@f0wu_stats = []
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
212
|
+
ary = @f0wu_params
|
|
213
|
+
while r = data.shift
|
|
214
|
+
case r
|
|
215
|
+
when /\AStatistics\:/
|
|
216
|
+
ary = @f0wu_stats
|
|
217
|
+
when /\AEXIT CODE *(\d+)\s*(.*)$/
|
|
218
|
+
@exit_code = $1.to_i
|
|
219
|
+
if $2 and !$2.empty? then
|
|
220
|
+
@exit_code_message = r.sub(/\AEXIT CODE *(\d+)\s*/, '')
|
|
221
|
+
end
|
|
222
|
+
r = nil
|
|
223
|
+
when /\AFATAL\: /
|
|
224
|
+
@fatal_errors ||= []
|
|
225
|
+
@fatal_errors.push r
|
|
226
|
+
r = nil
|
|
227
|
+
when /\AWARNING\: /
|
|
228
|
+
@f0warnings ||= []
|
|
229
|
+
@f0warnings << r
|
|
230
|
+
r = nil
|
|
231
|
+
end
|
|
232
|
+
ary << r if r
|
|
114
233
|
end
|
|
115
234
|
@f0dbstat = F0dbstat.new(@f0wu_stats)
|
|
116
235
|
itr = @iterations[0]
|
|
@@ -205,8 +324,10 @@ module Bio
|
|
|
205
324
|
@num = 1
|
|
206
325
|
@f0message = []
|
|
207
326
|
@f0warnings = []
|
|
208
|
-
return unless r = data.
|
|
209
|
-
|
|
327
|
+
return unless r = data.first
|
|
328
|
+
return if /\AParameters\:$/ =~ r
|
|
329
|
+
return if /\AEXIT CODE *\d+/ =~ r
|
|
330
|
+
@f0hitlist << data.shift
|
|
210
331
|
return unless r = data.shift
|
|
211
332
|
unless /\*{3} +NONE +\*{3}/ =~ r then
|
|
212
333
|
@f0hitlist << r
|