mzml 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +4 -4
- data/VERSION +1 -1
- data/bin/mzML2mgf.rb +53 -0
- data/lib/mzml.rb +28 -11
- data/mzml.gemspec +12 -9
- data/spec/mzml_spec.rb +33 -0
- data/spec/small.compressed.mzML +2107 -9043
- data/spec/small.mgf +102 -349
- data/spec/small.mzML +2121 -9046
- data/spec/spec_helper.rb +1 -1
- metadata +36 -20
data/Rakefile
CHANGED
@@ -10,10 +10,10 @@ begin
|
|
10
10
|
gem.email = "angel@delagoya.com"
|
11
11
|
gem.homepage = "http://github.com/delagoya/mzml"
|
12
12
|
gem.authors = ["Angel Pizarro"]
|
13
|
-
gem.add_development_dependency "rspec", "
|
13
|
+
gem.add_development_dependency "rspec", "1.3.0"
|
14
14
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
|
-
gem.add_dependency "nokogiri", "
|
16
|
-
|
15
|
+
gem.add_dependency "nokogiri", "1.4.1"
|
16
|
+
|
17
17
|
end
|
18
18
|
Jeweler::GemcutterTasks.new
|
19
19
|
rescue LoadError
|
@@ -39,6 +39,6 @@ task :default => :spec
|
|
39
39
|
require 'yard'
|
40
40
|
YARD::Rake::YardocTask.new do |yardoc|
|
41
41
|
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
42
|
-
yardoc.options = ["--title", "mzml #{version}", "-r", "README.rdoc"]
|
42
|
+
yardoc.options = ["--title", "mzml #{version}", "-r", "README.rdoc"]
|
43
43
|
yardoc.files = ['README*','lib/**/*.rb']
|
44
44
|
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/bin/mzML2mgf.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
#!/opt/local/bin/ruby
|
2
|
+
|
3
|
+
################################
|
4
|
+
####
|
5
|
+
##
|
6
|
+
# David Austin - UPENN
|
7
|
+
# converts mzML to MGF format
|
8
|
+
# set up to replicate msconvert but muuchh slower
|
9
|
+
#
|
10
|
+
|
11
|
+
require 'rubygems'
|
12
|
+
require 'mzml'
|
13
|
+
|
14
|
+
|
15
|
+
#first load nokogiri document
|
16
|
+
|
17
|
+
mzml = MzML::Doc.new(ARGV[0])
|
18
|
+
|
19
|
+
#now loop through each spectrum.. sort first to be the same as msconvert
|
20
|
+
|
21
|
+
sorted_keys = mzml.parse_index_list[:spectrum].keys.sort{ |x,y| x.split('=')[3].to_i <=> y.split('=')[3].to_i }
|
22
|
+
|
23
|
+
sorted_keys.each do |k|
|
24
|
+
|
25
|
+
s = mzml.spectrum(k)
|
26
|
+
unless s.precursor_list.nil? || s.precursor_list.empty?
|
27
|
+
|
28
|
+
|
29
|
+
#now we print!
|
30
|
+
|
31
|
+
puts "BEGIN IONS"
|
32
|
+
puts "TITLE=#{s.id}"
|
33
|
+
puts "RTINSECONDS=#{s.retention_time}"
|
34
|
+
puts "PEPMASS=#{s.precursor_mass} #{s.precursor_intensity}"
|
35
|
+
|
36
|
+
0.upto(s.mz.length-1) do |i|
|
37
|
+
|
38
|
+
puts "#{sprintf('%5.7f', s.mz[i])} #{sprintf('%4.9f', s.intensity[i])}"
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
puts "END IONS"
|
44
|
+
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
|
data/lib/mzml.rb
CHANGED
@@ -147,13 +147,15 @@ module MzML
|
|
147
147
|
attr_accessor :id, :default_array_length, :spot_id, :type,\
|
148
148
|
:charge, :precursor, :base_peak_mz, :base_peak_intensity, :ms_level, \
|
149
149
|
:high_mz, :low_mz, :title, :tic, :polarity, :representation, :mz_node, :intensity_node, \
|
150
|
-
:mz, :intensity, :precursor_list, :scan_list, :retention_time
|
150
|
+
:mz, :intensity, :precursor_list, :scan_list, :retention_time, :precursor_mass, :precursor_intensity
|
151
|
+
|
151
152
|
attr_reader :node, :params
|
152
153
|
|
153
154
|
# mz & intensity arrays will be don by proper methods maybe.
|
154
155
|
def initialize(spectrum_node)
|
155
156
|
@node = spectrum_node
|
156
157
|
@params = {}
|
158
|
+
@precursor_list = []
|
157
159
|
parse_element()
|
158
160
|
end
|
159
161
|
|
@@ -161,11 +163,11 @@ module MzML
|
|
161
163
|
# This method pulls out all of the annotation from the XML node
|
162
164
|
def parse_element
|
163
165
|
# id
|
164
|
-
@id = @node[:id]
|
165
|
-
@default_array_length = @node[:defaultArrayLength]
|
166
|
-
@spot_id = @node[:spotID]
|
166
|
+
@id = @node.xpath("spectrum")[0][:id]
|
167
|
+
@default_array_length = @node.xpath("spectrum")[0][:defaultArrayLength]
|
168
|
+
@spot_id = @node.xpath("spectrum")[0][:spotID]
|
167
169
|
# now reaching into params
|
168
|
-
@params = @node.xpath("cvParam").inject({}) do |memo,prm|
|
170
|
+
@params = @node.xpath("spectrum/cvParam").inject({}) do |memo,prm|
|
169
171
|
memo[prm[:name]] = prm[:value]
|
170
172
|
memo
|
171
173
|
end
|
@@ -178,13 +180,14 @@ module MzML
|
|
178
180
|
# polarity
|
179
181
|
# representation
|
180
182
|
# precursor list
|
181
|
-
if (@node.xpath("precursorList")[0])
|
183
|
+
if (! @node.xpath("spectrum/precursorList")[0].nil?)
|
182
184
|
parse_precursor_list()
|
185
|
+
get_parent_info()
|
183
186
|
else
|
184
|
-
@precursor_list =
|
187
|
+
@precursor_list = []
|
185
188
|
end
|
186
189
|
# scan list
|
187
|
-
if (@node.xpath("scanList")[0])
|
190
|
+
if (@node.xpath("spectrum/scanList")[0])
|
188
191
|
@scan_list = parse_scan_list()
|
189
192
|
else
|
190
193
|
@scan_list = nil
|
@@ -194,14 +197,27 @@ module MzML
|
|
194
197
|
end
|
195
198
|
|
196
199
|
def parse_precursor_list
|
197
|
-
@
|
200
|
+
@node.css("precursorList > precursor").each do |p|
|
198
201
|
[p[:spectrumRef], p]
|
202
|
+
@precursor_list << p
|
199
203
|
end
|
200
204
|
end
|
201
205
|
|
206
|
+
def get_parent_info
|
207
|
+
|
208
|
+
unless @precursor_list.empty?
|
209
|
+
|
210
|
+
@precursor_mass = @precursor_list[0].xpath("selectedIonList/selectedIon/cvParam[@accession='MS:1000744']")[0][:value] unless @precursor_list[0].xpath("selectedIonList/selectedIon/cvParam[@accession='MS:1000744']")[0].nil?
|
211
|
+
@precursor_intensity = @precursor_list[0].xpath("selectedIonList/selectedIon/cvParam[@accession='MS:1000042']")[0][:value] unless @precursor_list[0].xpath("selectedIonList/selectedIon/cvParam[@accession='MS:1000042']")[0].nil?
|
212
|
+
|
213
|
+
end
|
214
|
+
|
215
|
+
|
216
|
+
end
|
217
|
+
|
202
218
|
def parse_scan_list
|
203
|
-
@scan_list = @node.xpath("scanList/scan")
|
204
|
-
@retention_time = @node.xpath("scanList/scan/cvParam[@
|
219
|
+
@scan_list = @node.xpath("spectrum/scanList/scan")
|
220
|
+
@retention_time = @node.xpath("spectrum/scanList/scan/cvParam[@accession='MS:1000016']")[0][:value] unless @node.xpath("spectrum/scanList/scan/cvParam[@accession='MS:1000016']")[0].nil?
|
205
221
|
end
|
206
222
|
|
207
223
|
def parse_binary_data
|
@@ -214,6 +230,7 @@ module MzML
|
|
214
230
|
# 64-bit floats? default is 32-bit
|
215
231
|
dtype = @mz_node.xpath("cvParam[@accession='MS:1000523']")[0] ? "E*" : "e*"
|
216
232
|
@mz = data.unpack(dtype)
|
233
|
+
|
217
234
|
@intensity_node = @node.xpath("spectrum/binaryDataArrayList/binaryDataArray/cvParam[@accession='MS:1000515']").first.parent
|
218
235
|
data = Base64.decode64(@intensity_node.xpath("binary").text)
|
219
236
|
if @intensity_node.xpath("cvParam[@accession='MS:1000574']")[0]
|
data/mzml.gemspec
CHANGED
@@ -5,13 +5,15 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{mzml}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Angel Pizarro"]
|
12
|
-
s.date = %q{
|
12
|
+
s.date = %q{2010-04-23}
|
13
|
+
s.default_executable = %q{mzML2mgf.rb}
|
13
14
|
s.description = %q{A non-validating mzML parser. MzML is a standard data format for representing mass spectrometry data.}
|
14
15
|
s.email = %q{angel@delagoya.com}
|
16
|
+
s.executables = ["mzML2mgf.rb"]
|
15
17
|
s.extra_rdoc_files = [
|
16
18
|
"LICENSE",
|
17
19
|
"README.rdoc"
|
@@ -24,6 +26,7 @@ Gem::Specification.new do |s|
|
|
24
26
|
"README.rdoc",
|
25
27
|
"Rakefile",
|
26
28
|
"VERSION",
|
29
|
+
"bin/mzML2mgf.rb",
|
27
30
|
"lib/mzml.rb",
|
28
31
|
"mzml.gemspec",
|
29
32
|
"spec/mzml_spec.rb",
|
@@ -36,7 +39,7 @@ Gem::Specification.new do |s|
|
|
36
39
|
s.homepage = %q{http://github.com/delagoya/mzml}
|
37
40
|
s.rdoc_options = ["--charset=UTF-8"]
|
38
41
|
s.require_paths = ["lib"]
|
39
|
-
s.rubygems_version = %q{1.3.
|
42
|
+
s.rubygems_version = %q{1.3.6}
|
40
43
|
s.summary = %q{A non-validating mzML parser}
|
41
44
|
s.test_files = [
|
42
45
|
"spec/mzml_spec.rb",
|
@@ -48,15 +51,15 @@ Gem::Specification.new do |s|
|
|
48
51
|
s.specification_version = 3
|
49
52
|
|
50
53
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
51
|
-
s.add_development_dependency(%q<rspec>, ["
|
52
|
-
s.add_runtime_dependency(%q<nokogiri>, ["
|
54
|
+
s.add_development_dependency(%q<rspec>, ["= 1.3.0"])
|
55
|
+
s.add_runtime_dependency(%q<nokogiri>, ["= 1.4.1"])
|
53
56
|
else
|
54
|
-
s.add_dependency(%q<rspec>, ["
|
55
|
-
s.add_dependency(%q<nokogiri>, ["
|
57
|
+
s.add_dependency(%q<rspec>, ["= 1.3.0"])
|
58
|
+
s.add_dependency(%q<nokogiri>, ["= 1.4.1"])
|
56
59
|
end
|
57
60
|
else
|
58
|
-
s.add_dependency(%q<rspec>, ["
|
59
|
-
s.add_dependency(%q<nokogiri>, ["
|
61
|
+
s.add_dependency(%q<rspec>, ["= 1.3.0"])
|
62
|
+
s.add_dependency(%q<nokogiri>, ["= 1.4.1"])
|
60
63
|
end
|
61
64
|
end
|
62
65
|
|
data/spec/mzml_spec.rb
CHANGED
@@ -26,6 +26,7 @@ describe MzML do
|
|
26
26
|
s = mz.spectrum(mz.index[:spectrum].keys.first)
|
27
27
|
s.mz.should_not be_nil
|
28
28
|
end
|
29
|
+
|
29
30
|
it "should unmarshall the a 32 byte intensity array" do
|
30
31
|
mz = MzML::Doc.new(@file)
|
31
32
|
s = mz.spectrum(mz.index[:spectrum].keys.first)
|
@@ -42,7 +43,39 @@ describe MzML do
|
|
42
43
|
i.join(", ").should be == mgf.intensity.join(", ")
|
43
44
|
m.join(", ").should be == mgf.mz.join(", ")
|
44
45
|
end
|
46
|
+
|
47
|
+
it "should get a spectrum's id, default array length, retention time" do
|
48
|
+
mz = MzML::Doc.new(@file)
|
49
|
+
s = mz.spectrum(mz.index[:spectrum].keys.first)
|
50
|
+
s.id.should_not be_nil
|
51
|
+
s.default_array_length.should_not be_nil
|
52
|
+
s.retention_time.should_not be_nil
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
it "should get a spectrum's precursor information if it has a precursor" do
|
57
|
+
mz = MzML::Doc.new(@file)
|
58
|
+
found_at_least_one_precursor = false
|
59
|
+
mz.index[:spectrum].keys.each do |k|
|
60
|
+
s = mz.spectrum(k)
|
61
|
+
if ! s.precursor_list.empty?
|
62
|
+
s.precursor_mass.should_not be_nil
|
63
|
+
s.precursor_intensity.should_not be_nil
|
64
|
+
found_at_least_one_precursor = true
|
65
|
+
break
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
found_at_least_one_precursor.should == true
|
71
|
+
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
|
45
77
|
end
|
78
|
+
|
46
79
|
|
47
80
|
context "Given a valid mzML file that uses compression" do
|
48
81
|
it "should unmarshall and uncompress the 64 byte mz array" do
|