mspire-simulator 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -8,11 +8,14 @@ Description:
8
8
  Dependencies:
9
9
  ruby 1.9*
10
10
  weka 3.6.0 - May need to add to CLASSPATH see: http://weka.wikispaces.com/CLASSPATH+problems
11
- fftw 3.2.2 - Tested in Linux Mint 12 and Ubuntu Oneiric Ocelot
11
+ fftw 3.2.2 - Tested in Linux Mint 12 and Ubuntu Oneiric Ocelot and Ubuntu 12.04
12
+ SQLite3 3.6.16 or newer (Note: install this first if needed)
12
13
  == Examples
13
- The simplest way to run mspire-simulator is to give it a MZML file
14
- with a single centroided elution profile from which, the simulator
14
+
15
+ The simplest way to run mspire-simulator is to give it an mzML file
16
+ with a single centroided elution profile from which the simulator
15
17
  can extract needed parameters including:
18
+
16
19
  - Elution parameters: front, tail, and mu
17
20
  - Overlap range (for merging signals)
18
21
  - Sampling rate
@@ -29,9 +32,9 @@ To see all the available options:
29
32
 
30
33
  $ mspire-simulator --help
31
34
 
32
-
33
35
  === Charge State Calculator
34
- $ ruby lib/ms/isoelectric_calc.rb --ph 2 --distribution DRVYIHPFHL DRVYIHPF RVYIHPF VYIHPF
36
+
37
+ $ ruby lib/ms/isoelectric_calc.rb --ph 2 --distribution DRVYIHPFHL DRVYIHPF RVYIHPF VYIHPF
35
38
 
36
39
  will return:
37
40
 
@@ -46,13 +49,13 @@ VYIHPF @ pH 2.0: +1, 40.341305; +2, 59.658695
46
49
  To see all the available options:
47
50
  $ ruby lib/ms/isoelectric_calc.rb --help
48
51
 
49
-
50
-
51
52
  == TODO
53
+
52
54
  Because of the many options and parameters to specify we will be moving
53
55
  to a .init file format with a .init file editor.
54
56
 
55
- Other improvments to mspire simulator are also pending.
57
+ Other improvments to mspire-simulator are also pending.
58
+
56
59
  == Copyright
57
60
 
58
61
  See LICENSE.txt for further details.
data/Rakefile CHANGED
@@ -13,7 +13,7 @@ Jeweler::Tasks.new do |gem|
13
13
  gem.email = "andrewbnoyce@gmail.com"
14
14
  gem.authors = ["anoyce"]
15
15
 
16
- gem.add_dependency "mspire", "0.8.2"
16
+ gem.add_dependency "mspire", "0.8.5"
17
17
  gem.add_dependency "rubyvis", "= 0.5.2"
18
18
  gem.add_dependency "nokogiri", "= 1.5.2"
19
19
  gem.add_dependency "ffi", "= 1.0.11"
@@ -24,6 +24,7 @@ Jeweler::Tasks.new do |gem|
24
24
  gem.add_dependency "obo", "= 0.1.0"
25
25
  gem.add_dependency "trollop", "= 1.16.2"
26
26
  gem.add_dependency "MS-fragmenter", "= 0.0.2"
27
+ gem.add_dependency "sqlite3", "= 1.3.6"
27
28
 
28
29
  gem.executables = ["mspire-simulator","sim_mail"]
29
30
  gem.files.exclude "elution_curvefit.svg"
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.1
1
+ 0.3.0
data/bin/mspire-simulator CHANGED
@@ -1,7 +1,12 @@
1
1
  #!/usr/bin/env ruby
2
+ #SQLite version = 3.7.13
3
+ #sudo aptitude install sqlite3
4
+ #sudo aptitude install libsqlite3-dev
5
+ #gem install sqlite3
2
6
  $LOAD_PATH << './lib'
3
7
 
4
8
  require 'time'
9
+ require 'sqlite3'
5
10
  require 'progress'
6
11
  require 'nokogiri'
7
12
  require 'mspire/digester'
@@ -18,6 +23,7 @@ require 'ms/isoelectric_calc'
18
23
  require 'ms/sim_digester'
19
24
  require 'ms/sim_trollop'
20
25
  require 'ms/merger'
26
+ require 'ms/sim_modifications'
21
27
 
22
28
 
23
29
 
@@ -46,70 +52,71 @@ module MspireSimulator
46
52
 
47
53
  SampleLoad = 1.0 # Instrument dependent scaling, for an Orbitrap, assumed to be 1 ug
48
54
  # TODO define an option for sample loading, and a scaling function to define the peak intensities
49
-
50
- #------------------------Digest-----------------------------------------------
55
+ database = nil
56
+ if @opts[:memory] == "true"
57
+ database = ":memory:" #can be :memory: stored
58
+ else
59
+ if @opts[:databaseName] == "peptides_[Time.now.sec]"
60
+ database = "peptides_#{Time.now.sec}.sqlite3"
61
+ else
62
+ database = "#{@opts[:databaseName]}.sqlite3"
63
+ end
64
+ end
65
+ #
66
+ db = SQLite3::Database.new(database)
67
+ db.transaction
68
+ db.execute "CREATE TABLE IF NOT EXISTS peptides(Id INTEGER PRIMARY KEY, seq TEXT, mass REAL, charge INTEGER, mono_mz REAL, p_rt REAL, p_rt_index REAL, p_int REAL, abu REAL, sx REAL, rt_a INTEGER, rt_b INTEGER, prot_id INTEGER)"
69
+ db.execute "CREATE TABLE IF NOT EXISTS aac(Id INTEGER PRIMARY KEY, A INTEGER,R INTEGER,N INTEGER,D INTEGER,B INTEGER,C INTEGER,E INTEGER,Q INTEGER,Z INTEGER,G INTEGER,H INTEGER,I INTEGER,L INTEGER,K INTEGER,M INTEGER,F INTEGER,P INTEGER,S INTEGER,T INTEGER,W INTEGER,Y INTEGER,V INTEGER,J INTEGER, place_holder REAL)"
70
+ db.execute "CREATE TABLE IF NOT EXISTS core_spec(pep_id INTEGER PRIMARY KEY,mzs TEXT, ints TEXT)"
71
+ #
72
+
73
+ #------------------------Digest-&-Modifications-------------------------------
51
74
  peptides = []
52
- digester = MS::Sim_Digester.new(@opts[:digestor],@opts[:pH])
75
+ digester = MS::Sim_Digester.new(@opts,db)
53
76
  ARGV.each do |file|
54
- peptides<<digester.digest(file)
77
+ digester.digest(file)
55
78
  end
56
- peptides.flatten!.uniq!
57
79
  #-----------------------------------------------------------------------------
58
80
 
59
81
 
60
82
 
61
83
  #------------------------Create Spectrum--------------------------------------
62
- spectra = MS::Sim_Spectra.new(peptides, @opts, one_d)
63
- data = spectra.data
84
+ spectra = MS::Sim_Spectra.new(@opts, one_d,db)
64
85
 
65
86
  if noise == 'true'
66
- noise = spectra.noiseify
87
+ noise = spectra.noiseify(db)
67
88
  end
68
89
  #-----------------------------------------------------------------------------
69
90
 
70
91
 
71
92
 
72
93
  #------------------------Merge Overlaps---------------------------------------
73
- spectra.spectra = Merger.merge(spectra.spectra,@opts[:overlapRange].to_f)
94
+ Merger.merge(@opts[:overlapRange].to_f,db)
74
95
  #-----------------------------------------------------------------------------
75
96
 
76
97
 
77
98
 
99
+ #-----------------------MZML--------------------------------------------------
100
+ mzml = Mzml_Wrapper.new(db,@opts)
101
+ prog = Progress.new("Writing to mzml file...")
102
+ mzml.to_xml(out_file)
103
+ prog.finish!
104
+ #-----------------------------------------------------------------------------
105
+
106
+
107
+
78
108
  #------------------------Truth Files------------------------------------------
79
109
  if truth != "false"
80
110
  if truth == "xml"
81
- MS::Txml_file_writer.write(spectra.features,spectra.spectra,out_file)
111
+ MS::Txml_file_writer.write(db,out_file,@opts)
82
112
  elsif truth == "csv"
83
- MS::Tcsv_file_writer.write(spectra.spectra,data,noise,spectra.features,out_file)
113
+ MS::Tcsv_file_writer.write(db,out_file,@opts)
114
+ elsif truth == "xml_csv" or truth == "csv_xml"
115
+ MS::Txml_file_writer.write(db,out_file,@opts)
116
+ MS::Tcsv_file_writer.write(db,out_file,@opts)
84
117
  end
85
118
  end
86
119
  #-----------------------------------------------------------------------------
87
-
88
-
89
-
90
- #-----------------------Merge Finish------------------------------------------
91
- spectra.spectra = Merger.compact(spectra.spectra)
92
- #-----------------------------------------------------------------------------
93
-
94
-
95
-
96
- #-----------------------Clean UP----------------------------------------------
97
- spectra.features.each{|fe| fe.delete}
98
- peptides.clear
99
- digester.clean
100
- #-----------------------------------------------------------------------------
101
-
102
-
103
-
104
- #-----------------------MZML--------------------------------------------------
105
- data = spectra.spectra
106
- mzml = Mzml_Wrapper.new(data)
107
- puts "Writing to file..."
108
- mzml.to_xml(out_file)
109
- puts "Done."
110
- #-----------------------------------------------------------------------------
111
-
112
-
113
120
 
114
121
  rescue Exception => e #Clean up if exception
115
122
  puts e.message
@@ -118,18 +125,12 @@ module MspireSimulator
118
125
  if File.exists?(digester.digested_file)
119
126
  File.delete(digester.digested_file)
120
127
  end
121
- digester.clean
122
- end
123
- if spectra != nil
124
- spectra.features.each{|fe| fe.delete}
125
- end
126
- if !peptides.empty?
127
- peptides.each{|pep| pep.delete}
128
128
  end
129
+ system "rm #{database}"
129
130
  puts "Exception - Simulation Failed"
130
-
131
- system "sim_mail #{email} Exception - Simulation Failed" if email != "nil"
132
- else
133
- system "sim_mail #{email} Success! - Simulation Complete" if email != "nil"
131
+ ensure
132
+ system "sim_mail #{email} Simulation Complete" if email != "nil"
133
+ db.commit
134
+ db.close if db
134
135
  end
135
136
  end
data/lib/ms/merger.rb CHANGED
@@ -34,81 +34,42 @@ class Merger
34
34
  return a/b
35
35
  end
36
36
 
37
- def self.merge(spectra,half_range)
38
- new_data = {}
37
+ def self.merge(half_range,db)
38
+ prog = Progress.new("Merging Overlaps:")
39
+ db.execute "CREATE TABLE IF NOT EXISTS merged(merge_id INTEGER PRIMARY KEY, merged_vals TEXT, a_vals TEXT, b_vals TEXT)"
40
+ spectra = db.execute "SELECT * FROM spectra"
41
+ spectra = spectra.group_by{|spec| spec[2]}
39
42
  total = spectra.size
43
+ merge_id = 0
40
44
  k = 0
41
- prog = Progress.new("Merging Overlaps:")
42
- spectra.each do |rt,val|
45
+ spectra.each do |rt,peaks|
43
46
  if k.even?
44
47
  num = (((k/total)*100).to_i)
45
48
  prog.update(num)
46
49
  end
47
- peaks = val.transpose
48
- peaks.sort_by!{|a| a[0]} #mz
49
- peaks = peaks.transpose
50
- mzs = peaks[0]
51
- ints = peaks[1]
50
+ peaks.sort_by!{|a| a[2]} #mz
51
+ peaks_t = peaks.transpose
52
+ pep_ids = peaks_t[1]
53
+ cent_ids = peaks_t[0]
54
+ mzs = peaks_t[3]
55
+ ints = peaks_t[4]
52
56
  mzs.each_with_index do |mz,i|
53
- next if mz.class == Hash
54
57
  o_mz = mz
55
- mz = mz.keys[0][0] if mz.class == Hash
56
58
  range = (mz..mz+half_range)
57
59
  if range.include?(mzs[i+1])
58
60
  metaA_mz = [o_mz, mzs[i+1]]
59
61
  meta_int = [ints[i],ints[i+1]]
60
- sum = meta_int.flatten.inject(:+).to_f
61
- i1 = ints[i]
62
- i1 = ints[i].flatten.inject(:+) if ints[i].class == Array
63
- frac1 = (i1/sum) * 100
64
- frac2 = (ints[i+1]/sum) * 100
65
- metaB_mz = {[w_avg(metaA_mz,meta_int),frac1,frac2] => metaA_mz}
66
-
67
- mzs[i] = nil; mzs[i+1] = metaB_mz
68
- ints[i] = nil; ints[i+1] = meta_int
69
- end
70
- end
71
- spec = [mzs.compact,ints.compact]
72
- spec.ms_level = val.ms_level
73
- spec.ms2 = val.ms2
74
- new_data[rt] = spec
75
- k += 1
76
- end
77
- prog.finish!
78
- return new_data
79
- end
80
-
81
- def self.compact(spectra)
82
- @start = Time.now
83
- total = spectra.size
84
- k = 0
85
- num = 0
86
- prog = Progress.new("Merge Finishing:")
87
- step = total/100.0
88
- spectra.each do |rt,val|
89
- if k > step * (num + 1)
90
- num = (((k/total)*100).to_i)
91
- prog.update(num)
92
- end
93
- mzs = val[0]
94
- ints = val[1]
95
- mzs.each_with_index do |m,i|
96
- if m.class == Hash
97
- mzs[i] = m.keys[0][0]
98
- ints[i] = ints[i].flatten.inject(:+)
62
+ sum = ints[i] + ints[i+1]
63
+ new_mz = w_avg(metaA_mz,meta_int)
64
+ db.execute "DELETE FROM spectra WHERE cent_id=#{cent_ids[i]}"
65
+ db.execute "DELETE FROM spectra WHERE cent_id=#{cent_ids[i+1]}"
66
+ db.execute "INSERT INTO spectra VALUES(#{cent_ids[i]},#{pep_ids[i]},#{rt},#{new_mz},#{sum},#{merge_id})"
67
+ db.execute "INSERT INTO merged VALUES(#{merge_id}, '#{cent_ids[i]},#{pep_ids[i]},#{rt},#{new_mz},#{sum}', '#{peaks[i]}', '#{peaks[i+1]}')"
68
+ merge_id += 1
99
69
  end
100
70
  end
101
- spec = [mzs,ints]
102
- spec.ms_level = val.ms_level
103
- spec.ms2 = val.ms2
104
- spectra[rt] = spec
105
71
  k += 1
106
72
  end
107
73
  prog.finish!
108
- return spectra
109
74
  end
110
75
  end
111
-
112
- #test
113
- #data = {1 => [[1.0,1.5,1.7,3.0,4.0,5.0,6.0,7.0,8.0,9.0],[10,9,8,7,6,5,4,3,2,1]], 2 => [[1,2,3,4,5,6,7,8,9],[9,8,7,6,5,4,3,2,1]]}
114
- #p Merger.merge(data,0.5)
@@ -5,35 +5,48 @@ require 'mspire/mzml'
5
5
 
6
6
  class Mzml_Wrapper
7
7
 
8
- def initialize(spectra)
8
+ def initialize(db,opts)
9
+ prog = Progress.new("Converting to mzml:")
9
10
  #spectra is a Hash rt=>[[mzs],[ints]]
10
- ms2_count = 0
11
+ db.execute "CREATE TABLE IF NOT EXISTS ms2(ms2_id INTEGER PRIMARY KEY,cent_id INTEGER,pep_id INTEGER,rt REAL,mzs TEXT,ints TEXT)" if opts[:ms2] == "true"
12
+ sampling_rate = opts[:sampling_rate]
13
+ noise_max = opts[:noiseMaxInt]
11
14
  count = 0.0
12
15
  scan_number = 1
13
16
  specs = []
14
- prog = Progress.new("Converting to mzml:")
15
17
  num = 0
18
+ spectra = db.execute "SELECT * FROM spectra"
16
19
  total = spectra.size
20
+ spectra_g = spectra.group_by{|spec| spec[2]} #rt
17
21
  step = total/100
18
22
  spec_id = nil
19
- t_rt = 0
20
- spectra.sort.map do |rt,data|
23
+ spectra_g.sort.map do |rt,data|
21
24
  if count > step * (num + 1)
22
25
  num = (((count/total)*100).to_i)
23
26
  prog.update(num)
24
27
  end
25
- if t_rt > rt
26
- puts "OUT of ORDER"
27
- end
28
- t_rt = rt
28
+ data_t = data.transpose
29
+ mzs = data_t[3]
30
+ ints = data_t[4]
29
31
 
30
- ms_level = data.ms_level # method added to array class
32
+ #grab top 2 centroids for ms2
33
+ ms2s = []
34
+ if opts[:ms2] == "true"
35
+ top2 = ints.sort[-opts[:ms2s]..-1]
36
+ top2.each do |top|
37
+ if top > noise_max + 1000.0
38
+ cent = data[ints.index(top)]
39
+ ms2s<<cent if cent[1] != nil
40
+ end
41
+ end
42
+ end
43
+
31
44
 
32
45
  spc = Mspire::Mzml::Spectrum.new("scan=#{scan_number}") do |spec|
33
46
  spec.describe_many!(['MS:1000127', ['MS:1000511', 1]])
34
47
  spec.data_arrays = [
35
- Mspire::Mzml::DataArray.new(data[0]).describe!('MS:1000514'),
36
- Mspire::Mzml::DataArray.new(data[1]).describe!('MS:1000515')
48
+ Mspire::Mzml::DataArray.new(mzs).describe!('MS:1000514'),
49
+ Mspire::Mzml::DataArray.new(ints).describe!('MS:1000515')
37
50
  ]
38
51
  spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
39
52
  scan = Mspire::Mzml::Scan.new do |scan|
@@ -43,36 +56,44 @@ class Mzml_Wrapper
43
56
  end
44
57
  end
45
58
  specs<<spc
46
- if ms_level == 2
59
+ if !ms2s.empty?
47
60
  #[rt,[mzs],[ints]]
48
- ms2 = data.ms2
49
- ms2.each do |data|
50
- ms2_count += 1
51
- scan_number += 1
52
- spc2 = Mspire::Mzml::Spectrum.new("scan=#{scan_number}") do |spec|
53
- spec.describe_many!(['MS:1000127', ['MS:1000511', 2]])
54
- spec.data_arrays = [
55
- Mspire::Mzml::DataArray.new(data[1]).describe!('MS:1000514'),
56
- Mspire::Mzml::DataArray.new(data[2]).describe!('MS:1000515')
57
- ]
58
- spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
59
- scan = Mspire::Mzml::Scan.new do |scan|
60
- scan.describe! 'MS:1000016', data[0], 'UO:0000010'
61
+ ms2s.each do |cent|
62
+ pep = db.execute "SELECT seq,charge FROM peptides WHERE Id=#{cent[1]}"
63
+ seq = pep[0][0]
64
+ if seq.size > 1
65
+ charge = pep[0][1]
66
+ ms2_mzs = MS::Fragmenter.new.fragment(seq)
67
+ ms2_ints = Array.new(ms2_mzs.size,500.to_f)
68
+ rt = cent[2] + RThelper.RandomFloat(0.01,sampling_rate - 0.1)
69
+ db.execute "INSERT INTO ms2(cent_id,pep_id,rt,mzs,ints) VALUES(#{cent[0]},#{cent[1]},#{rt},'#{ms2_mzs}','#{ms2_ints}')"
70
+
71
+ scan_number += 1
72
+ spc2 = Mspire::Mzml::Spectrum.new("scan=#{scan_number}") do |spec|
73
+ spec.describe_many!(['MS:1000127', ['MS:1000511', 2]])
74
+ spec.data_arrays = [
75
+ Mspire::Mzml::DataArray.new(ms2_mzs).describe!('MS:1000514'),
76
+ Mspire::Mzml::DataArray.new(ms2_ints).describe!('MS:1000515')
77
+ ]
78
+ spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
79
+ scan = Mspire::Mzml::Scan.new do |scan|
80
+ scan.describe! 'MS:1000016', rt, 'UO:0000010'
81
+ end
82
+ sl << scan
61
83
  end
62
- sl << scan
84
+ precursor = Mspire::Mzml::Precursor.new( spc.id )
85
+ si = Mspire::Mzml::SelectedIon.new
86
+ # the selected ion m/z:
87
+ si.describe! "MS:1000744", cent[3] #pre_mz
88
+ # the selected ion charge state
89
+ si.describe! "MS:1000041", charge #pre_charge
90
+ # the selected ion intensity
91
+ si.describe! "MS:1000042", cent[4] #pre_int
92
+ precursor.selected_ions = [si]
93
+ spec.precursors = [precursor]
63
94
  end
64
- precursor = Mspire::Mzml::Precursor.new( spc.id )
65
- si = Mspire::Mzml::SelectedIon.new
66
- # the selected ion m/z:
67
- si.describe! "MS:1000744", data.pre_mz
68
- # the selected ion charge state
69
- si.describe! "MS:1000041", data.pre_charge
70
- # the selected ion intensity
71
- si.describe! "MS:1000042", data.pre_int
72
- precursor.selected_ions = [si]
73
- spec.precursors = [precursor]
74
95
  end
75
- specs<<spc2
96
+ specs<<spc2 if seq.size > 1
76
97
  end
77
98
  end
78
99
  count += 1
@@ -92,7 +113,7 @@ class Mzml_Wrapper
92
113
  mzml.instrument_configurations << default_instrument_config
93
114
  software = Mspire::Mzml::Software.new
94
115
  mzml.software_list << software
95
- default_data_processing = Mspire::Mzml::DataProcessing.new("did_nothing")
116
+ default_data_processing = Mspire::Mzml::DataProcessing.new("simulator options=#{opts}")
96
117
  mzml.data_processing_list << default_data_processing
97
118
  mzml.run = Mspire::Mzml::Run.new("simulated_run", default_instrument_config) do |run|
98
119
  spectrum_list = Mspire::Mzml::SpectrumList.new(default_data_processing, specs)
@@ -100,7 +121,6 @@ class Mzml_Wrapper
100
121
  end
101
122
  end
102
123
  prog.finish!
103
- puts "ms2 written = #{ms2_count}"
104
124
  return @mzml
105
125
  end
106
126