ms-msrun 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/.gitignore +3 -0
  2. data/.gitmodules +3 -0
  3. data/History +18 -0
  4. data/{README → README.rdoc} +0 -0
  5. data/Rakefile +93 -107
  6. data/VERSION +1 -0
  7. data/lib/lmat.rb +141 -39
  8. data/lib/ms/msrun/nokogiri.rb +1 -0
  9. data/lib/ms/msrun/search_dev_notes.txt +47 -0
  10. data/spec/lmat_spec.rb +87 -11
  11. data/spec/metadata/opd1/000.v1.mzXML.yml +3 -0
  12. data/spec/metadata/opd1/000.v2.1.mzXML.yml +3 -0
  13. data/spec/metadata/opd1/020.mzData.xml.yml +3 -0
  14. data/spec/metadata/opd1/020.v2.0.readw.mzXML.yml +3 -0
  15. data/spec/ms/msrun/hpricot.rb +38 -0
  16. data/spec/ms/msrun/index_spec.rb +12 -13
  17. data/spec/ms/msrun/search_spec.rb +5 -4
  18. data/spec/ms/msrun/sha1_spec.rb +3 -6
  19. data/spec/ms/msrun/test_parsing_xml_frags/parse_test.rb +25 -0
  20. data/spec/ms/msrun/test_parsing_xml_frags/test1.xml +5 -0
  21. data/spec/ms/msrun/test_parsing_xml_frags/test2.xml +6 -0
  22. data/spec/ms/msrun/test_parsing_xml_frags/test3.xml +4 -0
  23. data/spec/ms/msrun/test_parsing_xml_frags/test4.xml +11 -0
  24. data/spec/ms/msrun/test_parsing_xml_frags/test_failures.rb +47 -0
  25. data/spec/ms/msrun_bm.rb +22 -0
  26. data/spec/ms/msrun_spec.rb +90 -109
  27. data/spec/ms/scan_spec.rb +5 -6
  28. data/spec/ms/spectrum/compare_spec.rb +31 -28
  29. data/spec/ms/spectrum/filter_spec.rb +15 -13
  30. data/spec/spec_helper.rb +21 -0
  31. data/spec/testfiles/lmat/tmp1.lmat +0 -0
  32. data/spec/testfiles/lmat/tmp1.lmata +44 -0
  33. data/spec/testfiles/lmat/tmp2.lmata +11 -0
  34. data/spec/testfiles/opd1/000.v1.mzXML +418 -0
  35. data/spec/testfiles/opd1/000.v1.mzXML.key.yml +51 -0
  36. data/spec/testfiles/opd1/000.v2.1.mzXML +382 -0
  37. data/spec/testfiles/opd1/000.v2.1.mzXML.key.yml +51 -0
  38. data/spec/testfiles/opd1/020.mzData.xml +683 -0
  39. data/spec/testfiles/opd1/020.mzData.xml.key.yml +43 -0
  40. data/spec/testfiles/opd1/020.v2.0.readw.mzXML +382 -0
  41. data/spec/testfiles/opd1/020.v2.0.readw.mzXML.key.yml +46 -0
  42. metadata +85 -34
data/.gitignore ADDED
@@ -0,0 +1,3 @@
1
+ rdoc
2
+ *.swp
3
+ pkg
data/.gitmodules ADDED
@@ -0,0 +1,3 @@
1
+ [submodule "submodules/ms-testdata"]
2
+ path = submodules/ms-testdata
3
+ url = git@github.com:bahuvrihi/ms-testdata.git
data/History ADDED
@@ -0,0 +1,18 @@
1
+ == 0.1.1 / 2009-11-19
2
+ * using unified mspire template
3
+ * using spec-more (bacon) rather than minitest-spec
4
+
5
+ == 0.1.0
6
+ * switched to nokogiri as the xml parser
7
+ * using mzxml indices to read scans (lazy reading of scans)
8
+ * semi-lazy reading of spectrum (lazy string)
9
+ * the string is not converted into a spectrum until an mz value is asked for
10
+ * lots of interface changes
11
+
12
+ == 0.0.1
13
+ * initial release as gem
14
+ * some differences from mspire:
15
+ * all spectra are read in lazy
16
+ * cleaned up code a lot
17
+ * uses Struct instead of Arrayclass to minimize dependencies
18
+ * bumps task of providing indices for peak data onto AXML
File without changes
data/Rakefile CHANGED
@@ -1,128 +1,114 @@
1
- require 'rake'
2
1
  require 'rubygems'
3
- require 'rake/rdoctask'
4
- require 'rake/gempackagetask'
2
+ require 'rake'
3
+ require 'jeweler'
5
4
  require 'rake/testtask'
6
- require 'rake/clean'
7
- require 'fileutils'
8
-
9
- ###############################################
10
- # GLOBAL
11
- ###############################################
5
+ require 'rcov/rcovtask'
12
6
 
13
- FL = FileList
14
7
  NAME = "ms-msrun"
15
- FU = FileUtils
16
-
17
- readme = "README"
18
-
19
- rdoc_dir = 'rdoc'
20
- rdoc_extra_includes = [readme, "LICENSE"]
21
- rdoc_options = ['--main', readme, '--title', NAME, '--line-numbers', '--inline-source']
22
-
23
- lib_files = FL["lib/**/*.rb"]
24
- dist_files = lib_files + FL[readme, "LICENSE", "Rakefile", "{specs}/**/*"]
25
- changelog = 'CHANGELOG'
26
-
27
- ###############################################
28
- # DOC
29
- ###############################################
30
- Rake::RDocTask.new do |rd|
31
- rd.rdoc_dir = rdoc_dir
32
- rd.main = readme
33
- rd.rdoc_files.include( rdoc_extra_includes )
34
- rd.rdoc_files.include( lib_files.uniq )
35
- rd.options.push( *rdoc_options )
8
+ WEBSITE_BASE = "website"
9
+ WEBSITE_OUTPUT = WEBSITE_BASE + "/output"
10
+
11
+ gemspec = Gem::Specification.new do |s|
12
+ s.name = NAME
13
+ s.authors = ["John T. Prince"]
14
+ s.email = "jtprince@gmail.com"
15
+ s.homepage = "http://jtprince.github.com/" + NAME + "/"
16
+ s.summary = "an mspire library for working with LC/MS runs (mzxml, mzData, mzML)"
17
+ s.description = 'A library for working with LC/MS runs. Part of mspire. Has parsers for mzXML v1, 2, and 3, mzData (currently broken) and mzML (planned). Can convert to commonly desired search output (such as mgf). Fast random access of scans, and fast reading of the entire file.'
18
+ s.rubyforge_project = 'mspire'
19
+ s.add_dependency 'ms-core'
20
+ s.add_dependency 'nokogiri'
21
+ s.add_dependency 'narray'
22
+ s.add_development_dependency("spec-more")
36
23
  end
37
24
 
25
+ Jeweler::Tasks.new(gemspec)
38
26
 
39
- desc "Publish RDoc to RubyForge"
40
- task :publish_rdoc => [:rdoc] do
41
- require 'yaml'
42
-
43
- config = YAML.load(File.read(File.expand_path("~/.rubyforge/user-config.yml")))
44
- host = "#{config["username"]}@rubyforge.org"
45
-
46
- rsync_args = "-v -c -r"
47
- remote_dir = "/var/www/gforge-projects/mspire/projects/#{NAME}"
48
- local_dir = "rdoc"
49
-
50
- sh %{rsync #{rsync_args} #{local_dir}/ #{host}:#{remote_dir}}
27
+ Rake::TestTask.new(:spec) do |spec|
28
+ ENV['TEST'] = ENV['SPEC'] if ENV['SPEC']
29
+ spec.libs << 'lib' << 'spec'
30
+ spec.pattern = 'spec/**/*_spec.rb'
31
+ spec.verbose = true
51
32
  end
52
33
 
53
- #desc "create and upload docs to server"
54
- #task :upload_docs => [:rdoc] do
55
- # sh "scp -r #{rdoc_dir}/* jtprince@rubyforge.org:/var/www/gforge-projects/mspire/projects/ms-msrun/"
56
- #end
57
-
58
- ###############################################
59
- # TESTS
60
- ###############################################
61
-
62
- desc 'Default: Run specs.'
63
- task :default => :spec
64
-
65
- desc 'Run specs.'
66
- Rake::TestTask.new(:spec) do |t|
67
- #t.verbose = true
68
- #t.warning = true
69
- ENV['TEST'] = ENV['SPEC'] if ENV['SPEC']
70
- t.libs = ['lib']
71
- t.test_files = Dir.glob( File.join('spec', ENV['pattern'] || '**/*_spec.rb') )
72
- #t.options = "-v"
34
+ Rcov::RcovTask.new do |spec|
35
+ spec.libs << 'spec'
36
+ spec.pattern = 'spec/**/*_spec.rb'
37
+ spec.verbose = true
73
38
  end
74
39
 
75
- ###############################################
76
- # PACKAGE / INSTALL / UNINSTALL
77
- ###############################################
78
-
79
- tm = Time.now
80
- gemspec = Gem::Specification.new do |t|
81
- description = 'A library for working with LC/MS runs. Part of mspire. Has parsers for mzXML v1, 2, and 3, mzData and mzML. Can convert to commonly desired search output (such as mgf)'
82
- summary = "A library for working with LC/MS runs"
83
- t.platform = Gem::Platform::RUBY
84
- t.name = NAME
85
- t.version = IO.readlines(changelog).grep(/##.*version/).pop.split(/\s+/).last.chomp
86
- t.homepage = 'http://mspire.rubyforge.org/projects/ms-msrun'
87
- t.rubyforge_project = 'mspire'
88
- t.summary = summary
89
- t.date = "#{tm.year}-#{tm.month}-#{tm.day}"
90
- t.email = "jtprince@gmail.com"
91
- t.description = description
92
- t.has_rdoc = true
93
- t.authors = ["John Prince"]
94
- t.files = dist_files
95
- t.add_dependency 'ms-core'
96
- t.add_dependency 'nokogiri'
97
- t.add_dependency 'runarray'
98
- t.rdoc_options = rdoc_options
99
- t.extra_rdoc_files = rdoc_extra_includes
100
- t.executables = FL["bin/*"].map {|file| File.basename(file) }
101
- t.test_files = FL["spec/**/*_spec.rb"]
40
+ def rdoc_redirect(base_rdoc_output_dir, package_website_page, version)
41
+ content = %Q{
42
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
43
+ <html><head><title>mspire: } + NAME + %Q{rdoc</title>
44
+ <meta http-equiv="REFRESH" content="0;url=#{package_website_page}/rdoc/#{version}/">
45
+ </head> </html>
46
+ }
47
+ FileUtils.mkpath(base_rdoc_output_dir)
48
+ File.open(base_rdoc_output_dir + "/index.html", 'w') {|out| out.print content }
102
49
  end
103
50
 
104
- desc "Create packages."
105
- Rake::GemPackageTask.new(gemspec) do |pkg|
106
- #pkg.need_zip = true
107
- #pkg.need_tar = true
51
+ require 'rake/rdoctask'
52
+ Rake::RDocTask.new do |rdoc|
53
+ base_rdoc_output_dir = WEBSITE_OUTPUT + '/rdoc'
54
+ version = File.read('VERSION')
55
+ rdoc.rdoc_dir = 'rdoc'
56
+ rdoc.title = NAME + ' ' + version
57
+ rdoc.rdoc_files.include('README*')
58
+ rdoc.rdoc_files.include('lib/**/*.rb')
108
59
  end
109
60
 
110
- task :remove_pkg do
111
- FileUtils.rm_rf "pkg"
61
+ task :create_redirect do
62
+ base_rdoc_output_dir = WEBSITE_OUTPUT + '/rdoc'
63
+ rdoc_redirect(base_rdoc_output_dir, gemspec.homepage,version)
112
64
  end
113
65
 
114
- task :install => [:reinstall]
115
-
116
- desc "uninstalls the package, packages a fresh one, and installs"
117
- task :reinstall => [:remove_pkg, :clean, :package] do
118
- reply = `#{$gemcmd} list -l #{NAME}`
119
- if reply.include?(NAME + " (")
120
- %x( #{$gemcmd} uninstall -a -x #{NAME} )
66
+ namespace :website do
67
+ desc "checkout and configure the gh-pages submodule (assumes you have it)"
68
+ task :submodule_update do
69
+ if File.exist?(WEBSITE_OUTPUT + "/.git")
70
+ puts "!! not doing anything, #{WEBSITE_OUTPUT + "/.git"} already exists !!"
71
+ else
72
+
73
+ puts "(not sure why this won't work programmatically)"
74
+ puts "################################################"
75
+ puts "[Execute these commands]"
76
+ puts "################################################"
77
+ puts "git submodule init"
78
+ puts "git submodule update"
79
+ puts "pushd #{WEBSITE_OUTPUT}"
80
+ puts "git co --track -b gh-pages origin/gh-pages ;"
81
+ puts "popd"
82
+ puts "################################################"
83
+
84
+ # not sure why this won't work!
85
+ #%x{git submodule init}
86
+ #%x{git submodule update}
87
+ #Dir.chdir(WEBSITE_OUTPUT) do
88
+ # %x{git co --track -b gh-pages origin/gh-pages ;}
89
+ #end
90
+ end
121
91
  end
122
- FileUtils.cd("pkg") do
123
- cmd = "#{$gemcmd} install #{NAME}*.gem"
124
- puts "EXECUTING: #{cmd}"
125
- system cmd
92
+
93
+ desc "setup your initial gh-pages"
94
+ task :init_ghpages do
95
+ puts "################################################"
96
+ puts "[Execute these commands]"
97
+ puts "################################################"
98
+ puts "git symbolic-ref HEAD refs/heads/gh-pages"
99
+ puts "rm .git/index"
100
+ puts "git clean -fdx"
101
+ puts 'echo "Hello" > index.html'
102
+ puts "git add ."
103
+ puts 'git commit -a -m "my first gh-page"'
104
+ puts "git push origin gh-pages"
126
105
  end
106
+
127
107
  end
128
108
 
109
+ task :default => :spec
110
+
111
+ task :build => :gemspec
112
+
113
+ # credit: Rakefile modeled after Jeweler's
114
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.2.0
data/lib/lmat.rb CHANGED
@@ -1,13 +1,22 @@
1
1
 
2
- require 'runarray'
3
- include Runarray
2
+ require 'gsl'
3
+ require 'narray'
4
+ #include Runarray
5
+
6
+ include Math
7
+ include GSL
4
8
 
5
9
  ## Labeled matrix
6
10
 
7
11
  class Lmat
12
+
13
+ NUM_BYTE_SIZE = 4
14
+
15
+ # an narray object numerically labelling the m-axis
8
16
  attr_accessor :mvec
17
+ # an narray object numerically labelling the n-axis
9
18
  attr_accessor :nvec
10
- # an array of narray objects
19
+ # an mvec.size X nvec.size narray
11
20
  attr_accessor :mat
12
21
 
13
22
  ## Takes an array of narray objects
@@ -17,28 +26,66 @@ class Lmat
17
26
  @nvec = nvec
18
27
  end
19
28
 
20
- def max
21
- max = mat[0][0]
22
- mat.each do |row|
23
- row.each do |v|
24
- max = v if v > max
25
- end
29
+ class << self
30
+ def [](*args)
31
+ mat = NArray[*args]
32
+ (nlen, mlen) = mat.shape
33
+ obj = new(mat)
34
+ obj.mvec = NArray[0...mlen]
35
+ obj.nvec = NArray[0...nlen]
36
+ obj
26
37
  end
27
- max
38
+ end
39
+
40
+ def [](*args)
41
+ @mat[*args]
42
+ end
43
+
44
+ def []=(*args)
45
+ @mat.send('[]=', *args)
46
+ end
47
+
48
+ def slice=(*args)
49
+ @mat.send(:slice, *args)
50
+ end
51
+
52
+ def slice(*args)
53
+ @mat.slice(*args)
54
+ end
55
+
56
+ def inspect
57
+ # TODO: needs work (see ruport pivotted table output)
58
+ ["nvec=#{@nvec.inspect}", "mvec=#{@mvec.inspect}", "mat=#{@mat.inspect}"].join("\n")
59
+
60
+ start = ' ' << nvec.to_a.join(", ") << "\n"
61
+ start << (" " + ("-" * (start.size - 4))) << "\n"
62
+ mvec[].indgen!.each do |i|
63
+ start << "#{mvec[i]} | " << @mat[true, i].to_a.join(" ") << "\n"
64
+ end
65
+ start
66
+ end
67
+
68
+ def max
69
+ @mat.max
70
+ end
71
+
72
+ def dup
73
+ a = Lmat.new
74
+ a.mvec = self.mvec[]
75
+ a.nvec = self.nvec[]
76
+ a.mat = self.mat[]
77
+ a
28
78
  end
29
79
 
30
80
  # returns self
31
81
  def from_lmat(file)
32
- string = IO.read(file)
33
- mdim = string.unpack("i")
34
- @mvec = NArray.new(string.unpack("f#{mdim}"))
35
- ndim = string.unpack("i")
36
- @nvec = NArray.new(string.unpack("f#{ndim}"))
37
- rows = []
38
- mdim.times do
39
- rows << string.unpack("f#{ndim}")
82
+ File.open(file) do |io|
83
+ (@mvec, @nvec) = [true, true].map do |iv|
84
+ _len = io.read(4).unpack('I').first
85
+ NArray.to_na( io.read(_len*NUM_BYTE_SIZE), 'sfloat' )
86
+ end
87
+ @mat = NArray.to_na(io.read, 'sfloat', @nvec.size, @mvec.size)
40
88
  end
41
- @mat = rows
42
89
  self
43
90
  end
44
91
 
@@ -48,17 +95,16 @@ class Lmat
48
95
  File.open(file) do |io|
49
96
  num_m = io.readline.to_i
50
97
  mline = io.readline.chomp
51
- @mvec = NArray.new( mline.split(' ').map {|v| v.to_f } )
98
+ @mvec = NArray.to_na( mline.split(' ').map {|v| v.to_f } )
52
99
  raise RuntimeError, "bad m vec size" if mvec.size != num_m
53
100
  num_n = io.readline.to_i
54
101
  nline = io.readline.chomp
55
- @nvec = NArray.new( nline.split(' ').map {|v| v.to_f } )
102
+ @nvec = NArray.to_na( nline.split(' ').map {|v| v.to_f } )
56
103
  raise RuntimeError, "bad n vec size" if nvec.size != num_n
57
- @mat = NArray.new(num_m)
104
+ @mat = NArray.float(num_n, num_m)
58
105
  num_m.times do |m|
59
- line = io.readline
60
- line.chomp!
61
- @mat[m] = NArray.new(line.split(' ').map {|v| v.to_f })
106
+ line = io.readline.chomp!
107
+ @mat[true, m] = line.split(' ').map {|v| v.to_f }
62
108
  end
63
109
  end
64
110
  self
@@ -103,7 +149,7 @@ class Lmat
103
149
  num_scans = msrun.scan_count
104
150
  printf "Reading #{num_scans} spectra [.=100]" if $VERBOSE
105
151
  spectrum_cnt = 0
106
- msrun.each do |scan|
152
+ msrun.each(:ms_level => 1) do |scan|
107
153
  spectrum = scan.spectrum
108
154
  times << scan.time
109
155
  #(mz,inten) = spectrum_to_mz_and_inten(spectrum, VecD)
@@ -130,36 +176,92 @@ class Lmat
130
176
  # outputs vec lengths if set to true
131
177
  def to_s(with_vec_lengths=false)
132
178
  arr = []
133
- if with_vec_lengths; arr.push(@mvec.size) end
134
- arr.push(@mvec.join(" "))
135
- if with_vec_lengths; arr.push(@nvec.size) end
136
- arr.push(@nvec.join(" "), @mat.map {|v| v.join(" " ) }.join("\n")).join("\n")
179
+ arr.push(@mvec.size) if with_vec_lengths
180
+ arr.push(@mvec.to_a.join(" "))
181
+ arr.push(@nvec.size) if with_vec_lengths
182
+ arr.push(@nvec.to_a.join(" "))
183
+ (0...@mvec.size).each do |m_index|
184
+ arr.push(@mat[true, m_index].to_a.join(" "))
185
+ end
186
+ arr.join("\n")
137
187
  end
138
188
 
139
189
  def ==(other)
140
190
  other != nil && self.class == other.class && @nvec == other.nvec && @mvec == other.mvec && @mat == other.mat
141
191
  end
142
192
 
143
- def write(file=nil)
193
+ # returns a fresh lmat object
194
+ def warp_cols(new_m_values, deep_copy=false)
195
+ new_guy = self.dup
196
+ new_guy.warp_cols!(new_m_values, deep_copy)
197
+ new_guy
198
+ end
199
+
200
+ # warps the data in self based on interpolation of the cols. Evaluates the
201
+ # new_m_values for each column and returns a new lmat object with the m
202
+ # values set to new_m_values. nvec will be the same is in self.
203
+ def warp_cols!(new_m_values, deep_copy=false)
204
+ nvec[].indgen.each do |n|
205
+ self[n,true] = Spline.alloc(Interp::AKIMA, mvec, self[n, true]).eval(new_m_values)
206
+ end
207
+ self.nvec = deep_copy ? self.nvec[] : self.nvec
208
+ self.mvec = deep_copy ? new_m_values[] : new_m_values
209
+ self
210
+ end
211
+
212
+ def write(file=nil, int_format_string='i')
144
213
  handle = $>
145
214
  if file; handle = File.open(file, "wb") end
146
215
  bin_string = ""
147
- bin_string << [@mvec.size].pack("i")
148
- bin_string << @mvec.pack("f*")
149
- bin_string << [@nvec.size].pack("i")
150
- bin_string << @nvec.pack("f*")
151
- bin_string << @mat.flatten.pack("f*")
216
+ bin_string << [@mvec.size].pack(int_format_string)
217
+ bin_string << @mvec.to_s
218
+ bin_string << [@nvec.size].pack(int_format_string)
219
+ bin_string << @nvec.to_s
220
+ bin_string << @mat.to_s
152
221
  handle.print bin_string
153
222
  if file; handle.close end
154
223
  end
155
224
 
156
225
  def print(file=nil)
157
226
  handle = $>
158
- if file; handle = File.new(file, "w") end
227
+ handle = File.new(file, "w") if file
159
228
  handle.print( self.to_s(true) )
160
- #$stdout.print( self.to_s(true) )
161
- if file; handle.close end
229
+ handle.close if file
162
230
  end
231
+ end
232
+
233
+ class Lmat
234
+ module Gnuplot
235
+
236
+ # png output only right now, given no outfile, plot to X11
237
+ def plot(outfile=nil)
238
+ # modified from Hornet's eye
239
+ require 'gnuplot'
240
+ ::Gnuplot.open do |gp|
241
+ ::Gnuplot::SPlot.new(gp) do |plot|
242
+ if outfile
243
+ plot.terminal 'png'
244
+ plot.output outfile
245
+ end
246
+ plot.pm3d
247
+ plot.hidden3d
248
+ plot.palette 'defined ( 0 "black", 51 "blue", 102 "green", ' +
249
+ '153 "yellow", 204 "red", 255 "white" )'
250
+ plot.xlabel 'n'
251
+ plot.ylabel 'm'
252
+ plot.data << ::Gnuplot::DataSet.new( self ) do |ds|
253
+ ds.with = 'pm3d'
254
+ ds.matrix = true
255
+ end
256
+ end
257
+ end
258
+ end
163
259
 
260
+ def to_gsplot
261
+ require 'gnuplot'
262
+ [@mvec.to_a, @nvec.to_a, @mat.to_a].to_gsplot
263
+ end
264
+ end
265
+ include Gnuplot
164
266
  end
165
267