mzml 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -10,10 +10,10 @@ begin
10
10
  gem.email = "angel@delagoya.com"
11
11
  gem.homepage = "http://github.com/delagoya/mzml"
12
12
  gem.authors = ["Angel Pizarro"]
13
- gem.add_development_dependency "rspec", ">= 1.2.9"
13
+ gem.add_development_dependency "rspec", "1.3.0"
14
14
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
- gem.add_dependency "nokogiri", ">= 1.3.3"
16
-
15
+ gem.add_dependency "nokogiri", "1.4.1"
16
+
17
17
  end
18
18
  Jeweler::GemcutterTasks.new
19
19
  rescue LoadError
@@ -39,6 +39,6 @@ task :default => :spec
39
39
  require 'yard'
40
40
  YARD::Rake::YardocTask.new do |yardoc|
41
41
  version = File.exist?('VERSION') ? File.read('VERSION') : ""
42
- yardoc.options = ["--title", "mzml #{version}", "-r", "README.rdoc"]
42
+ yardoc.options = ["--title", "mzml #{version}", "-r", "README.rdoc"]
43
43
  yardoc.files = ['README*','lib/**/*.rb']
44
44
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.2.0
data/bin/mzML2mgf.rb ADDED
@@ -0,0 +1,53 @@
1
+ #!/opt/local/bin/ruby
2
+
3
+ ################################
4
+ ####
5
+ ##
6
+ # David Austin - UPENN
7
+ # converts mzML to MGF format
8
+ # set up to replicate msconvert but muuchh slower
9
+ #
10
+
11
+ require 'rubygems'
12
+ require 'mzml'
13
+
14
+
15
+ #first load nokogiri document
16
+
17
+ mzml = MzML::Doc.new(ARGV[0])
18
+
19
+ #now loop through each spectrum.. sort first to be the same as msconvert
20
+
21
+ sorted_keys = mzml.parse_index_list[:spectrum].keys.sort{ |x,y| x.split('=')[3].to_i <=> y.split('=')[3].to_i }
22
+
23
+ sorted_keys.each do |k|
24
+
25
+ s = mzml.spectrum(k)
26
+ unless s.precursor_list.nil? || s.precursor_list.empty?
27
+
28
+
29
+ #now we print!
30
+
31
+ puts "BEGIN IONS"
32
+ puts "TITLE=#{s.id}"
33
+ puts "RTINSECONDS=#{s.retention_time}"
34
+ puts "PEPMASS=#{s.precursor_mass} #{s.precursor_intensity}"
35
+
36
+ 0.upto(s.mz.length-1) do |i|
37
+
38
+ puts "#{sprintf('%5.7f', s.mz[i])} #{sprintf('%4.9f', s.intensity[i])}"
39
+
40
+ end
41
+
42
+
43
+ puts "END IONS"
44
+
45
+
46
+ end
47
+
48
+
49
+
50
+ end
51
+
52
+
53
+
data/lib/mzml.rb CHANGED
@@ -147,13 +147,15 @@ module MzML
147
147
  attr_accessor :id, :default_array_length, :spot_id, :type,\
148
148
  :charge, :precursor, :base_peak_mz, :base_peak_intensity, :ms_level, \
149
149
  :high_mz, :low_mz, :title, :tic, :polarity, :representation, :mz_node, :intensity_node, \
150
- :mz, :intensity, :precursor_list, :scan_list, :retention_time
150
+ :mz, :intensity, :precursor_list, :scan_list, :retention_time, :precursor_mass, :precursor_intensity
151
+
151
152
  attr_reader :node, :params
152
153
 
153
154
  # mz & intensity arrays will be don by proper methods maybe.
154
155
  def initialize(spectrum_node)
155
156
  @node = spectrum_node
156
157
  @params = {}
158
+ @precursor_list = []
157
159
  parse_element()
158
160
  end
159
161
 
@@ -161,11 +163,11 @@ module MzML
161
163
  # This method pulls out all of the annotation from the XML node
162
164
  def parse_element
163
165
  # id
164
- @id = @node[:id]
165
- @default_array_length = @node[:defaultArrayLength]
166
- @spot_id = @node[:spotID]
166
+ @id = @node.xpath("spectrum")[0][:id]
167
+ @default_array_length = @node.xpath("spectrum")[0][:defaultArrayLength]
168
+ @spot_id = @node.xpath("spectrum")[0][:spotID]
167
169
  # now reaching into params
168
- @params = @node.xpath("cvParam").inject({}) do |memo,prm|
170
+ @params = @node.xpath("spectrum/cvParam").inject({}) do |memo,prm|
169
171
  memo[prm[:name]] = prm[:value]
170
172
  memo
171
173
  end
@@ -178,13 +180,14 @@ module MzML
178
180
  # polarity
179
181
  # representation
180
182
  # precursor list
181
- if (@node.xpath("precursorList")[0])
183
+ if (! @node.xpath("spectrum/precursorList")[0].nil?)
182
184
  parse_precursor_list()
185
+ get_parent_info()
183
186
  else
184
- @precursor_list = nil
187
+ @precursor_list = []
185
188
  end
186
189
  # scan list
187
- if (@node.xpath("scanList")[0])
190
+ if (@node.xpath("spectrum/scanList")[0])
188
191
  @scan_list = parse_scan_list()
189
192
  else
190
193
  @scan_list = nil
@@ -194,14 +197,27 @@ module MzML
194
197
  end
195
198
 
196
199
  def parse_precursor_list
197
- @precursor_list = @node.css("precursorList > precursor").each do |p|
200
+ @node.css("precursorList > precursor").each do |p|
198
201
  [p[:spectrumRef], p]
202
+ @precursor_list << p
199
203
  end
200
204
  end
201
205
 
206
+ def get_parent_info
207
+
208
+ unless @precursor_list.empty?
209
+
210
+ @precursor_mass = @precursor_list[0].xpath("selectedIonList/selectedIon/cvParam[@accession='MS:1000744']")[0][:value] unless @precursor_list[0].xpath("selectedIonList/selectedIon/cvParam[@accession='MS:1000744']")[0].nil?
211
+ @precursor_intensity = @precursor_list[0].xpath("selectedIonList/selectedIon/cvParam[@accession='MS:1000042']")[0][:value] unless @precursor_list[0].xpath("selectedIonList/selectedIon/cvParam[@accession='MS:1000042']")[0].nil?
212
+
213
+ end
214
+
215
+
216
+ end
217
+
202
218
  def parse_scan_list
203
- @scan_list = @node.xpath("scanList/scan")
204
- @retention_time = @node.xpath("scanList/scan/cvParam[@accesion='MS:1000016']")[0]
219
+ @scan_list = @node.xpath("spectrum/scanList/scan")
220
+ @retention_time = @node.xpath("spectrum/scanList/scan/cvParam[@accession='MS:1000016']")[0][:value] unless @node.xpath("spectrum/scanList/scan/cvParam[@accession='MS:1000016']")[0].nil?
205
221
  end
206
222
 
207
223
  def parse_binary_data
@@ -214,6 +230,7 @@ module MzML
214
230
  # 64-bit floats? default is 32-bit
215
231
  dtype = @mz_node.xpath("cvParam[@accession='MS:1000523']")[0] ? "E*" : "e*"
216
232
  @mz = data.unpack(dtype)
233
+
217
234
  @intensity_node = @node.xpath("spectrum/binaryDataArrayList/binaryDataArray/cvParam[@accession='MS:1000515']").first.parent
218
235
  data = Base64.decode64(@intensity_node.xpath("binary").text)
219
236
  if @intensity_node.xpath("cvParam[@accession='MS:1000574']")[0]
data/mzml.gemspec CHANGED
@@ -5,13 +5,15 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{mzml}
8
- s.version = "0.1.0"
8
+ s.version = "0.2.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Angel Pizarro"]
12
- s.date = %q{2009-12-06}
12
+ s.date = %q{2010-04-23}
13
+ s.default_executable = %q{mzML2mgf.rb}
13
14
  s.description = %q{A non-validating mzML parser. MzML is a standard data format for representing mass spectrometry data.}
14
15
  s.email = %q{angel@delagoya.com}
16
+ s.executables = ["mzML2mgf.rb"]
15
17
  s.extra_rdoc_files = [
16
18
  "LICENSE",
17
19
  "README.rdoc"
@@ -24,6 +26,7 @@ Gem::Specification.new do |s|
24
26
  "README.rdoc",
25
27
  "Rakefile",
26
28
  "VERSION",
29
+ "bin/mzML2mgf.rb",
27
30
  "lib/mzml.rb",
28
31
  "mzml.gemspec",
29
32
  "spec/mzml_spec.rb",
@@ -36,7 +39,7 @@ Gem::Specification.new do |s|
36
39
  s.homepage = %q{http://github.com/delagoya/mzml}
37
40
  s.rdoc_options = ["--charset=UTF-8"]
38
41
  s.require_paths = ["lib"]
39
- s.rubygems_version = %q{1.3.5}
42
+ s.rubygems_version = %q{1.3.6}
40
43
  s.summary = %q{A non-validating mzML parser}
41
44
  s.test_files = [
42
45
  "spec/mzml_spec.rb",
@@ -48,15 +51,15 @@ Gem::Specification.new do |s|
48
51
  s.specification_version = 3
49
52
 
50
53
  if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
51
- s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
52
- s.add_runtime_dependency(%q<nokogiri>, [">= 1.3.3"])
54
+ s.add_development_dependency(%q<rspec>, ["= 1.3.0"])
55
+ s.add_runtime_dependency(%q<nokogiri>, ["= 1.4.1"])
53
56
  else
54
- s.add_dependency(%q<rspec>, [">= 1.2.9"])
55
- s.add_dependency(%q<nokogiri>, [">= 1.3.3"])
57
+ s.add_dependency(%q<rspec>, ["= 1.3.0"])
58
+ s.add_dependency(%q<nokogiri>, ["= 1.4.1"])
56
59
  end
57
60
  else
58
- s.add_dependency(%q<rspec>, [">= 1.2.9"])
59
- s.add_dependency(%q<nokogiri>, [">= 1.3.3"])
61
+ s.add_dependency(%q<rspec>, ["= 1.3.0"])
62
+ s.add_dependency(%q<nokogiri>, ["= 1.4.1"])
60
63
  end
61
64
  end
62
65
 
data/spec/mzml_spec.rb CHANGED
@@ -26,6 +26,7 @@ describe MzML do
26
26
  s = mz.spectrum(mz.index[:spectrum].keys.first)
27
27
  s.mz.should_not be_nil
28
28
  end
29
+
29
30
  it "should unmarshall the a 32 byte intensity array" do
30
31
  mz = MzML::Doc.new(@file)
31
32
  s = mz.spectrum(mz.index[:spectrum].keys.first)
@@ -42,7 +43,39 @@ describe MzML do
42
43
  i.join(", ").should be == mgf.intensity.join(", ")
43
44
  m.join(", ").should be == mgf.mz.join(", ")
44
45
  end
46
+
47
+ it "should get a spectrum's id, default array length, retention time" do
48
+ mz = MzML::Doc.new(@file)
49
+ s = mz.spectrum(mz.index[:spectrum].keys.first)
50
+ s.id.should_not be_nil
51
+ s.default_array_length.should_not be_nil
52
+ s.retention_time.should_not be_nil
53
+
54
+ end
55
+
56
+ it "should get a spectrum's precursor information if it has a precursor" do
57
+ mz = MzML::Doc.new(@file)
58
+ found_at_least_one_precursor = false
59
+ mz.index[:spectrum].keys.each do |k|
60
+ s = mz.spectrum(k)
61
+ if ! s.precursor_list.empty?
62
+ s.precursor_mass.should_not be_nil
63
+ s.precursor_intensity.should_not be_nil
64
+ found_at_least_one_precursor = true
65
+ break
66
+ end
67
+
68
+ end
69
+
70
+ found_at_least_one_precursor.should == true
71
+
72
+
73
+ end
74
+
75
+
76
+
45
77
  end
78
+
46
79
 
47
80
  context "Given a valid mzML file that uses compression" do
48
81
  it "should unmarshall and uncompress the 64 byte mz array" do