bio-lazyblastxml 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+ gem "libxml-ruby", ">=2.0.5"
6
+
7
+ # Add dependencies to develop your gem here.
8
+ # Include everything needed to run rake, tests, features, etc.
9
+ group :development do
10
+ gem "shoulda", ">= 0"
11
+ gem "bundler", "~> 1.0.0"
12
+ gem "jeweler", "~> 1.6.0"
13
+ gem "rcov", ">= 0"
14
+ gem "bio", ">= 1.4.1"
15
+ end
@@ -0,0 +1,24 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ bio (1.4.1.5000)
5
+ git (1.2.5)
6
+ jeweler (1.6.0)
7
+ bundler (~> 1.0.0)
8
+ git (>= 1.2.5)
9
+ rake
10
+ libxml-ruby (2.0.5)
11
+ rake (0.8.7)
12
+ rcov (0.9.9)
13
+ shoulda (2.11.3)
14
+
15
+ PLATFORMS
16
+ ruby
17
+
18
+ DEPENDENCIES
19
+ bio (>= 1.4.1)
20
+ bundler (~> 1.0.0)
21
+ jeweler (~> 1.6.0)
22
+ libxml-ruby (>= 2.0.5)
23
+ rcov
24
+ shoulda
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 robsyme
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,19 @@
1
+ = bio-lazyblastxml
2
+
3
+ Description goes here.
4
+
5
+ == Contributing to bio-lazyblastxml
6
+
7
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
8
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
9
+ * Fork the project
10
+ * Start a feature/bugfix branch
11
+ * Commit and push until you are happy with your contribution
12
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
13
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2011 robsyme. See LICENSE.txt for
18
+ further details.
19
+
@@ -0,0 +1,53 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "bio-lazyblastxml"
18
+ gem.homepage = "http://github.com/robsyme/bioruby-lazyblastxml"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{A plugin that allows you to parse large blast XML output files lazily, reading only what you need. }
21
+ gem.description = %Q{This is very scrappy at the moment, and will need to be seriously cleaned up. It does what I need it to do for now. I'll fix it up in the coming weeks. Promise :)}
22
+ gem.email = "rob.syme@gmail.com"
23
+ gem.authors = ["robsyme"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rake/testtask'
29
+ Rake::TestTask.new(:test) do |test|
30
+ test.libs << 'lib' << 'test'
31
+ test.pattern = 'test/**/test_*.rb'
32
+ test.verbose = true
33
+ end
34
+
35
+ require 'rcov/rcovtask'
36
+ Rcov::RcovTask.new do |test|
37
+ test.libs << 'test'
38
+ test.pattern = 'test/**/test_*.rb'
39
+ test.verbose = true
40
+ test.rcov_opts << '--exclude "gems/*"'
41
+ end
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
+
49
+ rdoc.rdoc_dir = 'rdoc'
50
+ rdoc.title = "bio-lazyblastxml #{version}"
51
+ rdoc.rdoc_files.include('README*')
52
+ rdoc.rdoc_files.include('lib/**/*.rb')
53
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.2.0
@@ -0,0 +1,67 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{bio-lazyblastxml}
8
+ s.version = "0.2.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["robsyme"]
12
+ s.date = %q{2011-05-30}
13
+ s.description = %q{This is very scrappy at the moment, and will need to be seriously cleaned up. It does what I need it to do for now. I'll fix it up in the coming weeks. Promise :)}
14
+ s.email = %q{rob.syme@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ "Gemfile",
22
+ "Gemfile.lock",
23
+ "LICENSE.txt",
24
+ "README.rdoc",
25
+ "Rakefile",
26
+ "VERSION",
27
+ "bio-lazyblastxml.gemspec",
28
+ "lib/bio-lazyblastxml.rb",
29
+ "lib/bio/appl/blast/lazyblastxml.rb",
30
+ "test/data/test.blastout.xml",
31
+ "test/helper.rb",
32
+ "test/test_bio-lazyblastxml.rb"
33
+ ]
34
+ s.homepage = %q{http://github.com/robsyme/bioruby-lazyblastxml}
35
+ s.licenses = ["MIT"]
36
+ s.require_paths = ["lib"]
37
+ s.rubygems_version = %q{1.5.0}
38
+ s.summary = %q{A plugin that allows you to parse large blast XML output files lazily, reading only what you need.}
39
+
40
+ if s.respond_to? :specification_version then
41
+ s.specification_version = 3
42
+
43
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
44
+ s.add_runtime_dependency(%q<libxml-ruby>, [">= 2.0.5"])
45
+ s.add_development_dependency(%q<shoulda>, [">= 0"])
46
+ s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
47
+ s.add_development_dependency(%q<jeweler>, ["~> 1.6.0"])
48
+ s.add_development_dependency(%q<rcov>, [">= 0"])
49
+ s.add_development_dependency(%q<bio>, [">= 1.4.1"])
50
+ else
51
+ s.add_dependency(%q<libxml-ruby>, [">= 2.0.5"])
52
+ s.add_dependency(%q<shoulda>, [">= 0"])
53
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
54
+ s.add_dependency(%q<jeweler>, ["~> 1.6.0"])
55
+ s.add_dependency(%q<rcov>, [">= 0"])
56
+ s.add_dependency(%q<bio>, [">= 1.4.1"])
57
+ end
58
+ else
59
+ s.add_dependency(%q<libxml-ruby>, [">= 2.0.5"])
60
+ s.add_dependency(%q<shoulda>, [">= 0"])
61
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
62
+ s.add_dependency(%q<jeweler>, ["~> 1.6.0"])
63
+ s.add_dependency(%q<rcov>, [">= 0"])
64
+ s.add_dependency(%q<bio>, [">= 1.4.1"])
65
+ end
66
+ end
67
+
@@ -0,0 +1 @@
1
+ require 'bio/appl/blast/lazyblastxml'
@@ -0,0 +1,184 @@
1
+ require "libxml"
2
+
3
+ class Enumerator
4
+ def lazy_select(&block)
5
+ Enumerator.new do |yielder|
6
+ self.each do |val|
7
+ yielder.yield(val) if block.call(val)
8
+ end
9
+ end
10
+ end
11
+
12
+ def lazy_reject(&block)
13
+ Enumerator.new do |yielder|
14
+ self.each do |val|
15
+ yielder.yield(val) unless block.call(val)
16
+ end
17
+ end
18
+ end
19
+
20
+ def lazy_map(&block)
21
+ Enumerator.new do |yielder|
22
+ self.each do |value|
23
+ yielder.yield(block.call(value))
24
+ end
25
+ end
26
+ end
27
+ end
28
+
29
+ module Bio
30
+ class LazyBlast
31
+ module LazyNodeSelector
32
+ def find_nodes_named(*names)
33
+ @nodes.lazy_select{|reader| names.include?(reader.name)}
34
+ end
35
+
36
+ def next_node_named(*names)
37
+ find_nodes_named(*names).next
38
+ end
39
+
40
+ def next_value_named(*names)
41
+ next_node_named(*names).read_inner_xml
42
+ end
43
+ end
44
+ end
45
+ end
46
+
47
+ module Bio
48
+ class LazyBlast
49
+ class Report
50
+ include Enumerable
51
+ include Bio::LazyBlast::LazyNodeSelector
52
+
53
+ class Iteration
54
+ include Enumerable
55
+ include Bio::LazyBlast::LazyNodeSelector
56
+ attr_reader :statistics
57
+ attr_accessor :num
58
+ attr_accessor :message
59
+ attr_accessor :query_id
60
+ attr_accessor :query_def
61
+ attr_accessor :query_len
62
+
63
+ def setup_hits(xml_reader)
64
+ @reader = xml_reader
65
+ hits_finished = false
66
+ @nodes = Enumerator.new do |yielder|
67
+ while @reader.read and !(@reader.name == "Iteration_hits" and @reader.node_type == LibXML::XML::Reader::TYPE_END_ELEMENT) and !(@reader.value == "No hits found")
68
+ yielder << @reader if @reader.node_type == LibXML::XML::Reader::TYPE_ELEMENT
69
+ end
70
+ end
71
+ end
72
+
73
+ def each
74
+ find_nodes_named("Hit").each do |reader|
75
+ hit = Hit.new
76
+ hit.num = next_value_named("Hit_num").to_i
77
+ hit.hit_id = next_value_named("Hit_id")
78
+ hit.definition = next_value_named("Hit_def")
79
+ hit.accession = next_value_named("Hit_accession")
80
+ hit.len = next_value_named("Hit_len")
81
+ hit.setup_hsps(@reader)
82
+
83
+ yield hit
84
+ end
85
+ end
86
+
87
+ alias :each_hit :each
88
+
89
+ class Hit
90
+ include Enumerable
91
+ include Bio::LazyBlast::LazyNodeSelector
92
+ attr_accessor :num
93
+ attr_accessor :hit_id
94
+ attr_accessor :len
95
+ attr_accessor :definition
96
+ attr_accessor :accession
97
+
98
+ def setup_hsps(xml_reader)
99
+ @reader = xml_reader
100
+ @nodes = Enumerator.new do |yielder|
101
+ while @reader.read and !(@reader.name == "Hit_hsps" and @reader.node_type == LibXML::XML::Reader::TYPE_END_ELEMENT)
102
+ yielder << @reader if @reader.node_type == LibXML::XML::Reader::TYPE_ELEMENT
103
+ end
104
+ end
105
+ end
106
+
107
+ def each
108
+ find_nodes_named("Hsp").each do |reader|
109
+ hsp = Hsp.new
110
+ hsp.num = next_value_named("Hsp_num").to_i
111
+ hsp.bit_score = next_value_named("Hsp_bit-score").to_f
112
+ hsp.evalue = next_value_named("Hsp_evalue").to_f
113
+ hsp.query_from = next_value_named("Hsp_query-from").to_i
114
+ hsp.query_to = next_value_named("Hsp_query-to").to_i
115
+ hsp.hit_from = next_value_named("Hsp_hit-from").to_i
116
+ hsp.hit_to = next_value_named("Hsp_hit-to").to_i
117
+ hsp.query_frame = next_value_named("Hsp_query-frame").to_i
118
+ hsp.hit_frame = next_value_named("Hsp_hit-frame").to_i
119
+ hsp.identity = next_value_named("Hsp_positive").to_i
120
+ hsp.align_len = next_value_named("Hsp_align-len").to_i
121
+ hsp.qseq = next_value_named("Hsp_qseq").to_i
122
+ hsp.hseq = next_value_named("Hsp_hseq").to_i
123
+ hsp.midline = next_value_named("Hsp_midline").to_i
124
+ yield hsp
125
+ end
126
+ end
127
+
128
+ alias :each_hsp :each
129
+
130
+ class Hsp
131
+ attr_accessor :num
132
+ attr_accessor :bit_score
133
+ attr_accessor :evalue
134
+ attr_accessor :query_from
135
+ attr_accessor :query_to
136
+ attr_accessor :hit_from
137
+ attr_accessor :hit_to
138
+ attr_accessor :query_frame
139
+ attr_accessor :hit_frame
140
+ attr_accessor :identity
141
+ attr_accessor :positive
142
+ attr_accessor :gaps
143
+ attr_accessor :align_len
144
+ attr_accessor :density
145
+ attr_accessor :qseq
146
+ attr_accessor :hseq
147
+ attr_accessor :midline
148
+ attr_accessor :percent_identity
149
+ attr_accessor :mismatch_count
150
+ end
151
+
152
+ end
153
+
154
+ end
155
+
156
+ attr_reader :reader
157
+
158
+ def initialize(filename)
159
+ @reader = LibXML::XML::Reader.file(filename)
160
+ @nodes = Enumerator.new do |yielder|
161
+ while @reader.read
162
+ yielder << @reader if @reader.node_type == LibXML::XML::Reader::TYPE_ELEMENT
163
+ end
164
+ end
165
+ end
166
+
167
+ def each
168
+ find_nodes_named("Iteration").each do |reader|
169
+ iteration = Iteration.new
170
+ iteration.num = next_value_named("Iteration_iter-num").to_i
171
+ iteration.query_id = next_value_named("Iteration_query-ID")
172
+ iteration.query_def = next_value_named("Iteration_query-def")
173
+ iteration.query_len = next_value_named("Iteration_query-len").to_i
174
+ iteration.setup_hits(@reader)
175
+
176
+ yield iteration
177
+ end
178
+ end
179
+
180
+ alias :each_iteration :each
181
+
182
+ end
183
+ end
184
+ end
@@ -0,0 +1,167 @@
1
+ <?xml version="1.0"?>
2
+ <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
3
+ <BlastOutput>
4
+ <BlastOutput_program>blastp</BlastOutput_program>
5
+ <BlastOutput_version>blastp 2.2.21 [Jun-14-2009]</BlastOutput_version>
6
+ <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
7
+ <BlastOutput_db>db.fasta</BlastOutput_db>
8
+ <BlastOutput_query-ID>lcl|1_0</BlastOutput_query-ID>
9
+ <BlastOutput_query-def>test</BlastOutput_query-def>
10
+ <BlastOutput_query-len>16</BlastOutput_query-len>
11
+ <BlastOutput_param>
12
+ <Parameters>
13
+ <Parameters_matrix>BLOSUM62</Parameters_matrix>
14
+ <Parameters_expect>10</Parameters_expect>
15
+ <Parameters_gap-open>11</Parameters_gap-open>
16
+ <Parameters_gap-extend>1</Parameters_gap-extend>
17
+ <Parameters_filter>F</Parameters_filter>
18
+ </Parameters>
19
+ </BlastOutput_param>
20
+ <BlastOutput_iterations>
21
+ <Iteration>
22
+ <Iteration_iter-num>1</Iteration_iter-num>
23
+ <Iteration_query-ID>lcl|1_0</Iteration_query-ID>
24
+ <Iteration_query-def>test</Iteration_query-def>
25
+ <Iteration_query-len>16</Iteration_query-len>
26
+ <Iteration_stat>
27
+ <Statistics>
28
+ <Statistics_db-num>50</Statistics_db-num>
29
+ <Statistics_db-len>7350</Statistics_db-len>
30
+ <Statistics_hsp-len>0</Statistics_hsp-len>
31
+ <Statistics_eff-space>0</Statistics_eff-space>
32
+ <Statistics_kappa>0.041</Statistics_kappa>
33
+ <Statistics_lambda>0.267</Statistics_lambda>
34
+ <Statistics_entropy>0.14</Statistics_entropy>
35
+ </Statistics>
36
+ </Iteration_stat>
37
+ <Iteration_message>No hits found</Iteration_message>
38
+ </Iteration>
39
+ <Iteration>
40
+ <Iteration_iter-num>2</Iteration_iter-num>
41
+ <Iteration_query-ID>lcl|2_0</Iteration_query-ID>
42
+ <Iteration_query-def>SNOT_00028.1|EAT91523.1</Iteration_query-def>
43
+ <Iteration_query-len>321</Iteration_query-len>
44
+ <Iteration_hits>
45
+ <Hit>
46
+ <Hit_num>1</Hit_num>
47
+ <Hit_id>gnl|BL_ORD_ID|4</Hit_id>
48
+ <Hit_def>SNOT_00028.1|EAT91523.1</Hit_def>
49
+ <Hit_accession>4</Hit_accession>
50
+ <Hit_len>321</Hit_len>
51
+ <Hit_hsps>
52
+ <Hsp>
53
+ <Hsp_num>1</Hsp_num>
54
+ <Hsp_bit-score>677.937</Hsp_bit-score>
55
+ <Hsp_score>1748</Hsp_score>
56
+ <Hsp_evalue>0</Hsp_evalue>
57
+ <Hsp_query-from>1</Hsp_query-from>
58
+ <Hsp_query-to>321</Hsp_query-to>
59
+ <Hsp_hit-from>1</Hsp_hit-from>
60
+ <Hsp_hit-to>321</Hsp_hit-to>
61
+ <Hsp_query-frame>1</Hsp_query-frame>
62
+ <Hsp_hit-frame>1</Hsp_hit-frame>
63
+ <Hsp_identity>321</Hsp_identity>
64
+ <Hsp_positive>321</Hsp_positive>
65
+ <Hsp_align-len>321</Hsp_align-len>
66
+ <Hsp_qseq>MPHAKRRAISPTLAPYERIRPEETLNGTPKLKRHILIIGAVPFIAHHFQRDLASSFRFQITEKNYDVGDTWLENTYPGCTCGVPSDIYQYSFAPSKDWSTLFDSSGEIQNYLSRMVKRFSLHTHIEFDMVVEKCIWDEKRRKWLVNTRCEGIVAQGREADVVTIAAGIFNHYQNPEIPALETFKGIMMHTADWNHIVDLAGKKIGIIGAGASSFESYYNTLFPVFYRDSSTQHRKRQEMAAWMSGRIKDEDMRQKLVPKYELGCRRISPRESFLDAIQQDNVECVFEPIVSCKPKGLQTQAGAKQLDVIVAATGFNTSFRP</Hsp_qseq>
67
+ <Hsp_hseq>MPHAKRRAISPTLAPYERIRPEETLNGTPKLKRHILIIGAVPFIAHHFQRDLASSFRFQITEKNYDVGDTWLENTYPGCTCGVPSDIYQYSFAPSKDWSTLFDSSGEIQNYLSRMVKRFSLHTHIEFDMVVEKCIWDEKRRKWLVNTRCEGIVAQGREADVVTIAAGIFNHYQNPEIPALETFKGIMMHTADWNHIVDLAGKKIGIIGAGASSFESYYNTLFPVFYRDSSTQHRKRQEMAAWMSGRIKDEDMRQKLVPKYELGCRRISPRESFLDAIQQDNVECVFEPIVSCKPKGLQTQAGAKQLDVIVAATGFNTSFRP</Hsp_hseq>
68
+ <Hsp_midline>MPHAKRRAISPTLAPYERIRPEETLNGTPKLKRHILIIGAVPFIAHHFQRDLASSFRFQITEKNYDVGDTWLENTYPGCTCGVPSDIYQYSFAPSKDWSTLFDSSGEIQNYLSRMVKRFSLHTHIEFDMVVEKCIWDEKRRKWLVNTRCEGIVAQGREADVVTIAAGIFNHYQNPEIPALETFKGIMMHTADWNHIVDLAGKKIGIIGAGASSFESYYNTLFPVFYRDSSTQHRKRQEMAAWMSGRIKDEDMRQKLVPKYELGCRRISPRESFLDAIQQDNVECVFEPIVSCKPKGLQTQAGAKQLDVIVAATGFNTSFRP</Hsp_midline>
69
+ </Hsp>
70
+ </Hit_hsps>
71
+ </Hit>
72
+ <Hit>
73
+ <Hit_num>2</Hit_num>
74
+ <Hit_id>gnl|BL_ORD_ID|0</Hit_id>
75
+ <Hit_def>SNOT_00009.1|EAT91504.1</Hit_def>
76
+ <Hit_accession>0</Hit_accession>
77
+ <Hit_len>74</Hit_len>
78
+ <Hit_hsps>
79
+ <Hsp>
80
+ <Hsp_num>1</Hsp_num>
81
+ <Hsp_bit-score>18.4754</Hsp_bit-score>
82
+ <Hsp_score>36</Hsp_score>
83
+ <Hsp_evalue>4.05209</Hsp_evalue>
84
+ <Hsp_query-from>136</Hsp_query-from>
85
+ <Hsp_query-to>150</Hsp_query-to>
86
+ <Hsp_hit-from>41</Hsp_hit-from>
87
+ <Hsp_hit-to>55</Hsp_hit-to>
88
+ <Hsp_query-frame>1</Hsp_query-frame>
89
+ <Hsp_hit-frame>1</Hsp_hit-frame>
90
+ <Hsp_identity>5</Hsp_identity>
91
+ <Hsp_positive>7</Hsp_positive>
92
+ <Hsp_align-len>15</Hsp_align-len>
93
+ <Hsp_qseq>WDEKRRKWLVNTRCE</Hsp_qseq>
94
+ <Hsp_hseq>WDDAAGSWEFTTACQ</Hsp_hseq>
95
+ <Hsp_midline>WD+ W T C+</Hsp_midline>
96
+ </Hsp>
97
+ </Hit_hsps>
98
+ </Hit>
99
+ <Hit>
100
+ <Hit_num>3</Hit_num>
101
+ <Hit_id>gnl|BL_ORD_ID|48</Hit_id>
102
+ <Hit_def>SNOT_00532.1|EAT92027.1</Hit_def>
103
+ <Hit_accession>48</Hit_accession>
104
+ <Hit_len>110</Hit_len>
105
+ <Hit_hsps>
106
+ <Hsp>
107
+ <Hsp_num>1</Hsp_num>
108
+ <Hsp_bit-score>18.0902</Hsp_bit-score>
109
+ <Hsp_score>35</Hsp_score>
110
+ <Hsp_evalue>5.33653</Hsp_evalue>
111
+ <Hsp_query-from>232</Hsp_query-from>
112
+ <Hsp_query-to>244</Hsp_query-to>
113
+ <Hsp_hit-from>24</Hsp_hit-from>
114
+ <Hsp_hit-to>36</Hsp_hit-to>
115
+ <Hsp_query-frame>1</Hsp_query-frame>
116
+ <Hsp_hit-frame>1</Hsp_hit-frame>
117
+ <Hsp_identity>5</Hsp_identity>
118
+ <Hsp_positive>9</Hsp_positive>
119
+ <Hsp_align-len>13</Hsp_align-len>
120
+ <Hsp_qseq>QHRKRQEMAAWMS</Hsp_qseq>
121
+ <Hsp_hseq>RHRREQDQDRWMT</Hsp_hseq>
122
+ <Hsp_midline>+HR+ Q+ WM+</Hsp_midline>
123
+ </Hsp>
124
+ </Hit_hsps>
125
+ </Hit>
126
+ <Hit>
127
+ <Hit_num>4</Hit_num>
128
+ <Hit_id>gnl|BL_ORD_ID|35</Hit_id>
129
+ <Hit_def>SNOT_00418.1|EAT91913.1</Hit_def>
130
+ <Hit_accession>35</Hit_accession>
131
+ <Hit_len>228</Hit_len>
132
+ <Hit_hsps>
133
+ <Hsp>
134
+ <Hsp_num>1</Hsp_num>
135
+ <Hsp_bit-score>17.705</Hsp_bit-score>
136
+ <Hsp_score>34</Hsp_score>
137
+ <Hsp_evalue>6.35851</Hsp_evalue>
138
+ <Hsp_query-from>200</Hsp_query-from>
139
+ <Hsp_query-to>211</Hsp_query-to>
140
+ <Hsp_hit-from>200</Hsp_hit-from>
141
+ <Hsp_hit-to>211</Hsp_hit-to>
142
+ <Hsp_query-frame>1</Hsp_query-frame>
143
+ <Hsp_hit-frame>1</Hsp_hit-frame>
144
+ <Hsp_identity>7</Hsp_identity>
145
+ <Hsp_positive>9</Hsp_positive>
146
+ <Hsp_align-len>12</Hsp_align-len>
147
+ <Hsp_qseq>AGKKIGIIGAGA</Hsp_qseq>
148
+ <Hsp_hseq>ARDEIGLFGAGA</Hsp_hseq>
149
+ <Hsp_midline>A +IG+ GAGA</Hsp_midline>
150
+ </Hsp>
151
+ </Hit_hsps>
152
+ </Hit>
153
+ </Iteration_hits>
154
+ <Iteration_stat>
155
+ <Statistics>
156
+ <Statistics_db-num>50</Statistics_db-num>
157
+ <Statistics_db-len>7350</Statistics_db-len>
158
+ <Statistics_hsp-len>0</Statistics_hsp-len>
159
+ <Statistics_eff-space>0</Statistics_eff-space>
160
+ <Statistics_kappa>0.041</Statistics_kappa>
161
+ <Statistics_lambda>0.267</Statistics_lambda>
162
+ <Statistics_entropy>0.14</Statistics_entropy>
163
+ </Statistics>
164
+ </Iteration_stat>
165
+ </Iteration>
166
+ </BlastOutput_iterations>
167
+ </BlastOutput>
@@ -0,0 +1,19 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'minitest/unit'
11
+
12
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
14
+ require 'bio-lazyblastxml'
15
+
16
+ class MiniTest::Unit::TestCase
17
+ end
18
+
19
+ MiniTest::Unit.autorun
@@ -0,0 +1,67 @@
1
+ require "pp"
2
+ require 'helper'
3
+
4
+ class TestReport < MiniTest::Unit::TestCase
5
+ def setup
6
+ @blast_filename = 'test/data/test.blastout.xml'
7
+ @report = Bio::LazyBlast::Report.new(@blast_filename)
8
+ end
9
+
10
+ def test_report_creation
11
+ assert_kind_of Bio::LazyBlast::Report, @report, "Creating a new Report should return a report object"
12
+ end
13
+
14
+ def test_report_enumeration
15
+ assert_equal 2, @report.count, "Calling #count on the report object should return the number of iterations"
16
+ end
17
+
18
+ def test_report_iteration
19
+ assert @report.all?{|iteration| iteration.is_a? Bio::LazyBlast::Report::Iteration}, "All of the objects yielded by the report object should be a type of Bio::LazyBlast::Report::Iteration"
20
+ end
21
+
22
+ def test_report_instance_values
23
+ first_iteration = @report.first
24
+ assert_equal 1, first_iteration.num
25
+ assert_equal 'test', first_iteration.query_def
26
+ assert_equal 'lcl|1_0', first_iteration.query_id
27
+ assert_equal 16, first_iteration.query_len
28
+ end
29
+
30
+ end
31
+
32
+ class TestIteration < MiniTest::Unit::TestCase
33
+ def setup
34
+ @blast_filename = 'test/data/test.blastout.xml'
35
+ @report = Bio::LazyBlast::Report.new(@blast_filename)
36
+ end
37
+
38
+ def test_iteration_creation
39
+ assert_kind_of Bio::LazyBlast::Report::Iteration, @iter_1
40
+ end
41
+
42
+ def test_example_usage
43
+ outstring = ''
44
+ @report.each_iteration do |iteration|
45
+ outstring << "Query: %s\n" % iteration.query_def
46
+ iteration.each_hit do |hit|
47
+ outstring << " | hit: %s\n" % hit.definition
48
+ hit.each_hsp do |hsp|
49
+ outstring << " | hsp: %s\n" % hsp.evalue
50
+ end
51
+ end
52
+ end
53
+ final_string = <<-teststring
54
+ Query: test
55
+ Query: SNOT_00028.1|EAT91523.1
56
+ | hit: SNOT_00028.1|EAT91523.1
57
+ | hsp: 0.0
58
+ | hit: SNOT_00009.1|EAT91504.1
59
+ | hsp: 4.05209
60
+ | hit: SNOT_00532.1|EAT92027.1
61
+ | hsp: 5.33653
62
+ | hit: SNOT_00418.1|EAT91913.1
63
+ | hsp: 6.35851
64
+ teststring
65
+ assert_equal final_string, outstring
66
+ end
67
+ end
metadata ADDED
@@ -0,0 +1,137 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-lazyblastxml
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.2.0
6
+ platform: ruby
7
+ authors:
8
+ - robsyme
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-05-30 00:00:00 +08:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: libxml-ruby
18
+ requirement: &id001 !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 2.0.5
24
+ type: :runtime
25
+ prerelease: false
26
+ version_requirements: *id001
27
+ - !ruby/object:Gem::Dependency
28
+ name: shoulda
29
+ requirement: &id002 !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: "0"
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: *id002
38
+ - !ruby/object:Gem::Dependency
39
+ name: bundler
40
+ requirement: &id003 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 1.0.0
46
+ type: :development
47
+ prerelease: false
48
+ version_requirements: *id003
49
+ - !ruby/object:Gem::Dependency
50
+ name: jeweler
51
+ requirement: &id004 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ~>
55
+ - !ruby/object:Gem::Version
56
+ version: 1.6.0
57
+ type: :development
58
+ prerelease: false
59
+ version_requirements: *id004
60
+ - !ruby/object:Gem::Dependency
61
+ name: rcov
62
+ requirement: &id005 !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: "0"
68
+ type: :development
69
+ prerelease: false
70
+ version_requirements: *id005
71
+ - !ruby/object:Gem::Dependency
72
+ name: bio
73
+ requirement: &id006 !ruby/object:Gem::Requirement
74
+ none: false
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: 1.4.1
79
+ type: :development
80
+ prerelease: false
81
+ version_requirements: *id006
82
+ description: This is very scrappy at the moment, and will need to be seriously cleaned up. It does what I need it to do for now. I'll fix it up in the coming weeks. Promise :)
83
+ email: rob.syme@gmail.com
84
+ executables: []
85
+
86
+ extensions: []
87
+
88
+ extra_rdoc_files:
89
+ - LICENSE.txt
90
+ - README.rdoc
91
+ files:
92
+ - .document
93
+ - Gemfile
94
+ - Gemfile.lock
95
+ - LICENSE.txt
96
+ - README.rdoc
97
+ - Rakefile
98
+ - VERSION
99
+ - bio-lazyblastxml.gemspec
100
+ - lib/bio-lazyblastxml.rb
101
+ - lib/bio/appl/blast/lazyblastxml.rb
102
+ - test/data/test.blastout.xml
103
+ - test/helper.rb
104
+ - test/test_bio-lazyblastxml.rb
105
+ has_rdoc: true
106
+ homepage: http://github.com/robsyme/bioruby-lazyblastxml
107
+ licenses:
108
+ - MIT
109
+ post_install_message:
110
+ rdoc_options: []
111
+
112
+ require_paths:
113
+ - lib
114
+ required_ruby_version: !ruby/object:Gem::Requirement
115
+ none: false
116
+ requirements:
117
+ - - ">="
118
+ - !ruby/object:Gem::Version
119
+ hash: 3109622305771744063
120
+ segments:
121
+ - 0
122
+ version: "0"
123
+ required_rubygems_version: !ruby/object:Gem::Requirement
124
+ none: false
125
+ requirements:
126
+ - - ">="
127
+ - !ruby/object:Gem::Version
128
+ version: "0"
129
+ requirements: []
130
+
131
+ rubyforge_project:
132
+ rubygems_version: 1.5.0
133
+ signing_key:
134
+ specification_version: 3
135
+ summary: A plugin that allows you to parse large blast XML output files lazily, reading only what you need.
136
+ test_files: []
137
+