mzml 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +4 -4
- data/VERSION +1 -1
- data/bin/mzML2mgf.rb +53 -0
- data/lib/mzml.rb +28 -11
- data/mzml.gemspec +12 -9
- data/spec/mzml_spec.rb +33 -0
- data/spec/small.compressed.mzML +2107 -9043
- data/spec/small.mgf +102 -349
- data/spec/small.mzML +2121 -9046
- data/spec/spec_helper.rb +1 -1
- metadata +36 -20
data/Rakefile
CHANGED
@@ -10,10 +10,10 @@ begin
|
|
10
10
|
gem.email = "angel@delagoya.com"
|
11
11
|
gem.homepage = "http://github.com/delagoya/mzml"
|
12
12
|
gem.authors = ["Angel Pizarro"]
|
13
|
-
gem.add_development_dependency "rspec", "
|
13
|
+
gem.add_development_dependency "rspec", "1.3.0"
|
14
14
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
|
-
gem.add_dependency "nokogiri", "
|
16
|
-
|
15
|
+
gem.add_dependency "nokogiri", "1.4.1"
|
16
|
+
|
17
17
|
end
|
18
18
|
Jeweler::GemcutterTasks.new
|
19
19
|
rescue LoadError
|
@@ -39,6 +39,6 @@ task :default => :spec
|
|
39
39
|
require 'yard'
|
40
40
|
YARD::Rake::YardocTask.new do |yardoc|
|
41
41
|
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
42
|
-
yardoc.options = ["--title", "mzml #{version}", "-r", "README.rdoc"]
|
42
|
+
yardoc.options = ["--title", "mzml #{version}", "-r", "README.rdoc"]
|
43
43
|
yardoc.files = ['README*','lib/**/*.rb']
|
44
44
|
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/bin/mzML2mgf.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
#!/opt/local/bin/ruby
|
2
|
+
|
3
|
+
################################
|
4
|
+
####
|
5
|
+
##
|
6
|
+
# David Austin - UPENN
|
7
|
+
# converts mzML to MGF format
|
8
|
+
# set up to replicate msconvert but muuchh slower
|
9
|
+
#
|
10
|
+
|
11
|
+
require 'rubygems'
|
12
|
+
require 'mzml'
|
13
|
+
|
14
|
+
|
15
|
+
#first load nokogiri document
|
16
|
+
|
17
|
+
mzml = MzML::Doc.new(ARGV[0])
|
18
|
+
|
19
|
+
#now loop through each spectrum.. sort first to be the same as msconvert
|
20
|
+
|
21
|
+
sorted_keys = mzml.parse_index_list[:spectrum].keys.sort{ |x,y| x.split('=')[3].to_i <=> y.split('=')[3].to_i }
|
22
|
+
|
23
|
+
sorted_keys.each do |k|
|
24
|
+
|
25
|
+
s = mzml.spectrum(k)
|
26
|
+
unless s.precursor_list.nil? || s.precursor_list.empty?
|
27
|
+
|
28
|
+
|
29
|
+
#now we print!
|
30
|
+
|
31
|
+
puts "BEGIN IONS"
|
32
|
+
puts "TITLE=#{s.id}"
|
33
|
+
puts "RTINSECONDS=#{s.retention_time}"
|
34
|
+
puts "PEPMASS=#{s.precursor_mass} #{s.precursor_intensity}"
|
35
|
+
|
36
|
+
0.upto(s.mz.length-1) do |i|
|
37
|
+
|
38
|
+
puts "#{sprintf('%5.7f', s.mz[i])} #{sprintf('%4.9f', s.intensity[i])}"
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
puts "END IONS"
|
44
|
+
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
|
data/lib/mzml.rb
CHANGED
@@ -147,13 +147,15 @@ module MzML
|
|
147
147
|
attr_accessor :id, :default_array_length, :spot_id, :type,\
|
148
148
|
:charge, :precursor, :base_peak_mz, :base_peak_intensity, :ms_level, \
|
149
149
|
:high_mz, :low_mz, :title, :tic, :polarity, :representation, :mz_node, :intensity_node, \
|
150
|
-
:mz, :intensity, :precursor_list, :scan_list, :retention_time
|
150
|
+
:mz, :intensity, :precursor_list, :scan_list, :retention_time, :precursor_mass, :precursor_intensity
|
151
|
+
|
151
152
|
attr_reader :node, :params
|
152
153
|
|
153
154
|
# mz & intensity arrays will be don by proper methods maybe.
|
154
155
|
def initialize(spectrum_node)
|
155
156
|
@node = spectrum_node
|
156
157
|
@params = {}
|
158
|
+
@precursor_list = []
|
157
159
|
parse_element()
|
158
160
|
end
|
159
161
|
|
@@ -161,11 +163,11 @@ module MzML
|
|
161
163
|
# This method pulls out all of the annotation from the XML node
|
162
164
|
def parse_element
|
163
165
|
# id
|
164
|
-
@id = @node[:id]
|
165
|
-
@default_array_length = @node[:defaultArrayLength]
|
166
|
-
@spot_id = @node[:spotID]
|
166
|
+
@id = @node.xpath("spectrum")[0][:id]
|
167
|
+
@default_array_length = @node.xpath("spectrum")[0][:defaultArrayLength]
|
168
|
+
@spot_id = @node.xpath("spectrum")[0][:spotID]
|
167
169
|
# now reaching into params
|
168
|
-
@params = @node.xpath("cvParam").inject({}) do |memo,prm|
|
170
|
+
@params = @node.xpath("spectrum/cvParam").inject({}) do |memo,prm|
|
169
171
|
memo[prm[:name]] = prm[:value]
|
170
172
|
memo
|
171
173
|
end
|
@@ -178,13 +180,14 @@ module MzML
|
|
178
180
|
# polarity
|
179
181
|
# representation
|
180
182
|
# precursor list
|
181
|
-
if (@node.xpath("precursorList")[0])
|
183
|
+
if (! @node.xpath("spectrum/precursorList")[0].nil?)
|
182
184
|
parse_precursor_list()
|
185
|
+
get_parent_info()
|
183
186
|
else
|
184
|
-
@precursor_list =
|
187
|
+
@precursor_list = []
|
185
188
|
end
|
186
189
|
# scan list
|
187
|
-
if (@node.xpath("scanList")[0])
|
190
|
+
if (@node.xpath("spectrum/scanList")[0])
|
188
191
|
@scan_list = parse_scan_list()
|
189
192
|
else
|
190
193
|
@scan_list = nil
|
@@ -194,14 +197,27 @@ module MzML
|
|
194
197
|
end
|
195
198
|
|
196
199
|
def parse_precursor_list
|
197
|
-
@
|
200
|
+
@node.css("precursorList > precursor").each do |p|
|
198
201
|
[p[:spectrumRef], p]
|
202
|
+
@precursor_list << p
|
199
203
|
end
|
200
204
|
end
|
201
205
|
|
206
|
+
def get_parent_info
|
207
|
+
|
208
|
+
unless @precursor_list.empty?
|
209
|
+
|
210
|
+
@precursor_mass = @precursor_list[0].xpath("selectedIonList/selectedIon/cvParam[@accession='MS:1000744']")[0][:value] unless @precursor_list[0].xpath("selectedIonList/selectedIon/cvParam[@accession='MS:1000744']")[0].nil?
|
211
|
+
@precursor_intensity = @precursor_list[0].xpath("selectedIonList/selectedIon/cvParam[@accession='MS:1000042']")[0][:value] unless @precursor_list[0].xpath("selectedIonList/selectedIon/cvParam[@accession='MS:1000042']")[0].nil?
|
212
|
+
|
213
|
+
end
|
214
|
+
|
215
|
+
|
216
|
+
end
|
217
|
+
|
202
218
|
def parse_scan_list
|
203
|
-
@scan_list = @node.xpath("scanList/scan")
|
204
|
-
@retention_time = @node.xpath("scanList/scan/cvParam[@
|
219
|
+
@scan_list = @node.xpath("spectrum/scanList/scan")
|
220
|
+
@retention_time = @node.xpath("spectrum/scanList/scan/cvParam[@accession='MS:1000016']")[0][:value] unless @node.xpath("spectrum/scanList/scan/cvParam[@accession='MS:1000016']")[0].nil?
|
205
221
|
end
|
206
222
|
|
207
223
|
def parse_binary_data
|
@@ -214,6 +230,7 @@ module MzML
|
|
214
230
|
# 64-bit floats? default is 32-bit
|
215
231
|
dtype = @mz_node.xpath("cvParam[@accession='MS:1000523']")[0] ? "E*" : "e*"
|
216
232
|
@mz = data.unpack(dtype)
|
233
|
+
|
217
234
|
@intensity_node = @node.xpath("spectrum/binaryDataArrayList/binaryDataArray/cvParam[@accession='MS:1000515']").first.parent
|
218
235
|
data = Base64.decode64(@intensity_node.xpath("binary").text)
|
219
236
|
if @intensity_node.xpath("cvParam[@accession='MS:1000574']")[0]
|
data/mzml.gemspec
CHANGED
@@ -5,13 +5,15 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{mzml}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Angel Pizarro"]
|
12
|
-
s.date = %q{
|
12
|
+
s.date = %q{2010-04-23}
|
13
|
+
s.default_executable = %q{mzML2mgf.rb}
|
13
14
|
s.description = %q{A non-validating mzML parser. MzML is a standard data format for representing mass spectrometry data.}
|
14
15
|
s.email = %q{angel@delagoya.com}
|
16
|
+
s.executables = ["mzML2mgf.rb"]
|
15
17
|
s.extra_rdoc_files = [
|
16
18
|
"LICENSE",
|
17
19
|
"README.rdoc"
|
@@ -24,6 +26,7 @@ Gem::Specification.new do |s|
|
|
24
26
|
"README.rdoc",
|
25
27
|
"Rakefile",
|
26
28
|
"VERSION",
|
29
|
+
"bin/mzML2mgf.rb",
|
27
30
|
"lib/mzml.rb",
|
28
31
|
"mzml.gemspec",
|
29
32
|
"spec/mzml_spec.rb",
|
@@ -36,7 +39,7 @@ Gem::Specification.new do |s|
|
|
36
39
|
s.homepage = %q{http://github.com/delagoya/mzml}
|
37
40
|
s.rdoc_options = ["--charset=UTF-8"]
|
38
41
|
s.require_paths = ["lib"]
|
39
|
-
s.rubygems_version = %q{1.3.
|
42
|
+
s.rubygems_version = %q{1.3.6}
|
40
43
|
s.summary = %q{A non-validating mzML parser}
|
41
44
|
s.test_files = [
|
42
45
|
"spec/mzml_spec.rb",
|
@@ -48,15 +51,15 @@ Gem::Specification.new do |s|
|
|
48
51
|
s.specification_version = 3
|
49
52
|
|
50
53
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
51
|
-
s.add_development_dependency(%q<rspec>, ["
|
52
|
-
s.add_runtime_dependency(%q<nokogiri>, ["
|
54
|
+
s.add_development_dependency(%q<rspec>, ["= 1.3.0"])
|
55
|
+
s.add_runtime_dependency(%q<nokogiri>, ["= 1.4.1"])
|
53
56
|
else
|
54
|
-
s.add_dependency(%q<rspec>, ["
|
55
|
-
s.add_dependency(%q<nokogiri>, ["
|
57
|
+
s.add_dependency(%q<rspec>, ["= 1.3.0"])
|
58
|
+
s.add_dependency(%q<nokogiri>, ["= 1.4.1"])
|
56
59
|
end
|
57
60
|
else
|
58
|
-
s.add_dependency(%q<rspec>, ["
|
59
|
-
s.add_dependency(%q<nokogiri>, ["
|
61
|
+
s.add_dependency(%q<rspec>, ["= 1.3.0"])
|
62
|
+
s.add_dependency(%q<nokogiri>, ["= 1.4.1"])
|
60
63
|
end
|
61
64
|
end
|
62
65
|
|
data/spec/mzml_spec.rb
CHANGED
@@ -26,6 +26,7 @@ describe MzML do
|
|
26
26
|
s = mz.spectrum(mz.index[:spectrum].keys.first)
|
27
27
|
s.mz.should_not be_nil
|
28
28
|
end
|
29
|
+
|
29
30
|
it "should unmarshall the a 32 byte intensity array" do
|
30
31
|
mz = MzML::Doc.new(@file)
|
31
32
|
s = mz.spectrum(mz.index[:spectrum].keys.first)
|
@@ -42,7 +43,39 @@ describe MzML do
|
|
42
43
|
i.join(", ").should be == mgf.intensity.join(", ")
|
43
44
|
m.join(", ").should be == mgf.mz.join(", ")
|
44
45
|
end
|
46
|
+
|
47
|
+
it "should get a spectrum's id, default array length, retention time" do
|
48
|
+
mz = MzML::Doc.new(@file)
|
49
|
+
s = mz.spectrum(mz.index[:spectrum].keys.first)
|
50
|
+
s.id.should_not be_nil
|
51
|
+
s.default_array_length.should_not be_nil
|
52
|
+
s.retention_time.should_not be_nil
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
it "should get a spectrum's precursor information if it has a precursor" do
|
57
|
+
mz = MzML::Doc.new(@file)
|
58
|
+
found_at_least_one_precursor = false
|
59
|
+
mz.index[:spectrum].keys.each do |k|
|
60
|
+
s = mz.spectrum(k)
|
61
|
+
if ! s.precursor_list.empty?
|
62
|
+
s.precursor_mass.should_not be_nil
|
63
|
+
s.precursor_intensity.should_not be_nil
|
64
|
+
found_at_least_one_precursor = true
|
65
|
+
break
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
found_at_least_one_precursor.should == true
|
71
|
+
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
|
45
77
|
end
|
78
|
+
|
46
79
|
|
47
80
|
context "Given a valid mzML file that uses compression" do
|
48
81
|
it "should unmarshall and uncompress the 64 byte mz array" do
|