mspire-simulator 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -8,11 +8,14 @@ Description:
8
8
  Dependencies:
9
9
  ruby 1.9*
10
10
  weka 3.6.0 - May need to add to CLASSPATH see: http://weka.wikispaces.com/CLASSPATH+problems
11
- fftw 3.2.2 - Tested in Linux Mint 12 and Ubuntu Oneiric Ocelot
11
+ fftw 3.2.2 - Tested in Linux Mint 12 and Ubuntu Oneiric Ocelot and Ubuntu 12.04
12
+ SQLite3 3.6.16 or newer (Note: install this first if needed)
12
13
  == Examples
13
- The simplest way to run mspire-simulator is to give it a MZML file
14
- with a single centroided elution profile from which, the simulator
14
+
15
+ The simplest way to run mspire-simulator is to give it an mzML file
16
+ with a single centroided elution profile from which the simulator
15
17
  can extract needed parameters including:
18
+
16
19
  - Elution parameters: front, tail, and mu
17
20
  - Overlap range (for merging signals)
18
21
  - Sampling rate
@@ -29,9 +32,9 @@ To see all the available options:
29
32
 
30
33
  $ mspire-simulator --help
31
34
 
32
-
33
35
  === Charge State Calculator
34
- $ ruby lib/ms/isoelectric_calc.rb --ph 2 --distribution DRVYIHPFHL DRVYIHPF RVYIHPF VYIHPF
36
+
37
+ $ ruby lib/ms/isoelectric_calc.rb --ph 2 --distribution DRVYIHPFHL DRVYIHPF RVYIHPF VYIHPF
35
38
 
36
39
  will return:
37
40
 
@@ -46,13 +49,13 @@ VYIHPF @ pH 2.0: +1, 40.341305; +2, 59.658695
46
49
  To see all the available options:
47
50
  $ ruby lib/ms/isoelectric_calc.rb --help
48
51
 
49
-
50
-
51
52
  == TODO
53
+
52
54
  Because of the many options and parameters to specify we will be moving
53
55
  to a .init file format with a .init file editor.
54
56
 
55
- Other improvments to mspire simulator are also pending.
57
+ Other improvments to mspire-simulator are also pending.
58
+
56
59
  == Copyright
57
60
 
58
61
  See LICENSE.txt for further details.
data/Rakefile CHANGED
@@ -13,7 +13,7 @@ Jeweler::Tasks.new do |gem|
13
13
  gem.email = "andrewbnoyce@gmail.com"
14
14
  gem.authors = ["anoyce"]
15
15
 
16
- gem.add_dependency "mspire", "0.8.2"
16
+ gem.add_dependency "mspire", "0.8.5"
17
17
  gem.add_dependency "rubyvis", "= 0.5.2"
18
18
  gem.add_dependency "nokogiri", "= 1.5.2"
19
19
  gem.add_dependency "ffi", "= 1.0.11"
@@ -24,6 +24,7 @@ Jeweler::Tasks.new do |gem|
24
24
  gem.add_dependency "obo", "= 0.1.0"
25
25
  gem.add_dependency "trollop", "= 1.16.2"
26
26
  gem.add_dependency "MS-fragmenter", "= 0.0.2"
27
+ gem.add_dependency "sqlite3", "= 1.3.6"
27
28
 
28
29
  gem.executables = ["mspire-simulator","sim_mail"]
29
30
  gem.files.exclude "elution_curvefit.svg"
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.1
1
+ 0.3.0
data/bin/mspire-simulator CHANGED
@@ -1,7 +1,12 @@
1
1
  #!/usr/bin/env ruby
2
+ #SQLite version = 3.7.13
3
+ #sudo aptitude install sqlite3
4
+ #sudo aptitude install libsqlite3-dev
5
+ #gem install sqlite3
2
6
  $LOAD_PATH << './lib'
3
7
 
4
8
  require 'time'
9
+ require 'sqlite3'
5
10
  require 'progress'
6
11
  require 'nokogiri'
7
12
  require 'mspire/digester'
@@ -18,6 +23,7 @@ require 'ms/isoelectric_calc'
18
23
  require 'ms/sim_digester'
19
24
  require 'ms/sim_trollop'
20
25
  require 'ms/merger'
26
+ require 'ms/sim_modifications'
21
27
 
22
28
 
23
29
 
@@ -46,70 +52,71 @@ module MspireSimulator
46
52
 
47
53
  SampleLoad = 1.0 # Instrument dependent scaling, for an Orbitrap, assumed to be 1 ug
48
54
  # TODO define an option for sample loading, and a scaling function to define the peak intensities
49
-
50
- #------------------------Digest-----------------------------------------------
55
+ database = nil
56
+ if @opts[:memory] == "true"
57
+ database = ":memory:" #can be :memory: stored
58
+ else
59
+ if @opts[:databaseName] == "peptides_[Time.now.sec]"
60
+ database = "peptides_#{Time.now.sec}.sqlite3"
61
+ else
62
+ database = "#{@opts[:databaseName]}.sqlite3"
63
+ end
64
+ end
65
+ #
66
+ db = SQLite3::Database.new(database)
67
+ db.transaction
68
+ db.execute "CREATE TABLE IF NOT EXISTS peptides(Id INTEGER PRIMARY KEY, seq TEXT, mass REAL, charge INTEGER, mono_mz REAL, p_rt REAL, p_rt_index REAL, p_int REAL, abu REAL, sx REAL, rt_a INTEGER, rt_b INTEGER, prot_id INTEGER)"
69
+ db.execute "CREATE TABLE IF NOT EXISTS aac(Id INTEGER PRIMARY KEY, A INTEGER,R INTEGER,N INTEGER,D INTEGER,B INTEGER,C INTEGER,E INTEGER,Q INTEGER,Z INTEGER,G INTEGER,H INTEGER,I INTEGER,L INTEGER,K INTEGER,M INTEGER,F INTEGER,P INTEGER,S INTEGER,T INTEGER,W INTEGER,Y INTEGER,V INTEGER,J INTEGER, place_holder REAL)"
70
+ db.execute "CREATE TABLE IF NOT EXISTS core_spec(pep_id INTEGER PRIMARY KEY,mzs TEXT, ints TEXT)"
71
+ #
72
+
73
+ #------------------------Digest-&-Modifications-------------------------------
51
74
  peptides = []
52
- digester = MS::Sim_Digester.new(@opts[:digestor],@opts[:pH])
75
+ digester = MS::Sim_Digester.new(@opts,db)
53
76
  ARGV.each do |file|
54
- peptides<<digester.digest(file)
77
+ digester.digest(file)
55
78
  end
56
- peptides.flatten!.uniq!
57
79
  #-----------------------------------------------------------------------------
58
80
 
59
81
 
60
82
 
61
83
  #------------------------Create Spectrum--------------------------------------
62
- spectra = MS::Sim_Spectra.new(peptides, @opts, one_d)
63
- data = spectra.data
84
+ spectra = MS::Sim_Spectra.new(@opts, one_d,db)
64
85
 
65
86
  if noise == 'true'
66
- noise = spectra.noiseify
87
+ noise = spectra.noiseify(db)
67
88
  end
68
89
  #-----------------------------------------------------------------------------
69
90
 
70
91
 
71
92
 
72
93
  #------------------------Merge Overlaps---------------------------------------
73
- spectra.spectra = Merger.merge(spectra.spectra,@opts[:overlapRange].to_f)
94
+ Merger.merge(@opts[:overlapRange].to_f,db)
74
95
  #-----------------------------------------------------------------------------
75
96
 
76
97
 
77
98
 
99
+ #-----------------------MZML--------------------------------------------------
100
+ mzml = Mzml_Wrapper.new(db,@opts)
101
+ prog = Progress.new("Writing to mzml file...")
102
+ mzml.to_xml(out_file)
103
+ prog.finish!
104
+ #-----------------------------------------------------------------------------
105
+
106
+
107
+
78
108
  #------------------------Truth Files------------------------------------------
79
109
  if truth != "false"
80
110
  if truth == "xml"
81
- MS::Txml_file_writer.write(spectra.features,spectra.spectra,out_file)
111
+ MS::Txml_file_writer.write(db,out_file,@opts)
82
112
  elsif truth == "csv"
83
- MS::Tcsv_file_writer.write(spectra.spectra,data,noise,spectra.features,out_file)
113
+ MS::Tcsv_file_writer.write(db,out_file,@opts)
114
+ elsif truth == "xml_csv" or truth == "csv_xml"
115
+ MS::Txml_file_writer.write(db,out_file,@opts)
116
+ MS::Tcsv_file_writer.write(db,out_file,@opts)
84
117
  end
85
118
  end
86
119
  #-----------------------------------------------------------------------------
87
-
88
-
89
-
90
- #-----------------------Merge Finish------------------------------------------
91
- spectra.spectra = Merger.compact(spectra.spectra)
92
- #-----------------------------------------------------------------------------
93
-
94
-
95
-
96
- #-----------------------Clean UP----------------------------------------------
97
- spectra.features.each{|fe| fe.delete}
98
- peptides.clear
99
- digester.clean
100
- #-----------------------------------------------------------------------------
101
-
102
-
103
-
104
- #-----------------------MZML--------------------------------------------------
105
- data = spectra.spectra
106
- mzml = Mzml_Wrapper.new(data)
107
- puts "Writing to file..."
108
- mzml.to_xml(out_file)
109
- puts "Done."
110
- #-----------------------------------------------------------------------------
111
-
112
-
113
120
 
114
121
  rescue Exception => e #Clean up if exception
115
122
  puts e.message
@@ -118,18 +125,12 @@ module MspireSimulator
118
125
  if File.exists?(digester.digested_file)
119
126
  File.delete(digester.digested_file)
120
127
  end
121
- digester.clean
122
- end
123
- if spectra != nil
124
- spectra.features.each{|fe| fe.delete}
125
- end
126
- if !peptides.empty?
127
- peptides.each{|pep| pep.delete}
128
128
  end
129
+ system "rm #{database}"
129
130
  puts "Exception - Simulation Failed"
130
-
131
- system "sim_mail #{email} Exception - Simulation Failed" if email != "nil"
132
- else
133
- system "sim_mail #{email} Success! - Simulation Complete" if email != "nil"
131
+ ensure
132
+ system "sim_mail #{email} Simulation Complete" if email != "nil"
133
+ db.commit
134
+ db.close if db
134
135
  end
135
136
  end
data/lib/ms/merger.rb CHANGED
@@ -34,81 +34,42 @@ class Merger
34
34
  return a/b
35
35
  end
36
36
 
37
- def self.merge(spectra,half_range)
38
- new_data = {}
37
+ def self.merge(half_range,db)
38
+ prog = Progress.new("Merging Overlaps:")
39
+ db.execute "CREATE TABLE IF NOT EXISTS merged(merge_id INTEGER PRIMARY KEY, merged_vals TEXT, a_vals TEXT, b_vals TEXT)"
40
+ spectra = db.execute "SELECT * FROM spectra"
41
+ spectra = spectra.group_by{|spec| spec[2]}
39
42
  total = spectra.size
43
+ merge_id = 0
40
44
  k = 0
41
- prog = Progress.new("Merging Overlaps:")
42
- spectra.each do |rt,val|
45
+ spectra.each do |rt,peaks|
43
46
  if k.even?
44
47
  num = (((k/total)*100).to_i)
45
48
  prog.update(num)
46
49
  end
47
- peaks = val.transpose
48
- peaks.sort_by!{|a| a[0]} #mz
49
- peaks = peaks.transpose
50
- mzs = peaks[0]
51
- ints = peaks[1]
50
+ peaks.sort_by!{|a| a[2]} #mz
51
+ peaks_t = peaks.transpose
52
+ pep_ids = peaks_t[1]
53
+ cent_ids = peaks_t[0]
54
+ mzs = peaks_t[3]
55
+ ints = peaks_t[4]
52
56
  mzs.each_with_index do |mz,i|
53
- next if mz.class == Hash
54
57
  o_mz = mz
55
- mz = mz.keys[0][0] if mz.class == Hash
56
58
  range = (mz..mz+half_range)
57
59
  if range.include?(mzs[i+1])
58
60
  metaA_mz = [o_mz, mzs[i+1]]
59
61
  meta_int = [ints[i],ints[i+1]]
60
- sum = meta_int.flatten.inject(:+).to_f
61
- i1 = ints[i]
62
- i1 = ints[i].flatten.inject(:+) if ints[i].class == Array
63
- frac1 = (i1/sum) * 100
64
- frac2 = (ints[i+1]/sum) * 100
65
- metaB_mz = {[w_avg(metaA_mz,meta_int),frac1,frac2] => metaA_mz}
66
-
67
- mzs[i] = nil; mzs[i+1] = metaB_mz
68
- ints[i] = nil; ints[i+1] = meta_int
69
- end
70
- end
71
- spec = [mzs.compact,ints.compact]
72
- spec.ms_level = val.ms_level
73
- spec.ms2 = val.ms2
74
- new_data[rt] = spec
75
- k += 1
76
- end
77
- prog.finish!
78
- return new_data
79
- end
80
-
81
- def self.compact(spectra)
82
- @start = Time.now
83
- total = spectra.size
84
- k = 0
85
- num = 0
86
- prog = Progress.new("Merge Finishing:")
87
- step = total/100.0
88
- spectra.each do |rt,val|
89
- if k > step * (num + 1)
90
- num = (((k/total)*100).to_i)
91
- prog.update(num)
92
- end
93
- mzs = val[0]
94
- ints = val[1]
95
- mzs.each_with_index do |m,i|
96
- if m.class == Hash
97
- mzs[i] = m.keys[0][0]
98
- ints[i] = ints[i].flatten.inject(:+)
62
+ sum = ints[i] + ints[i+1]
63
+ new_mz = w_avg(metaA_mz,meta_int)
64
+ db.execute "DELETE FROM spectra WHERE cent_id=#{cent_ids[i]}"
65
+ db.execute "DELETE FROM spectra WHERE cent_id=#{cent_ids[i+1]}"
66
+ db.execute "INSERT INTO spectra VALUES(#{cent_ids[i]},#{pep_ids[i]},#{rt},#{new_mz},#{sum},#{merge_id})"
67
+ db.execute "INSERT INTO merged VALUES(#{merge_id}, '#{cent_ids[i]},#{pep_ids[i]},#{rt},#{new_mz},#{sum}', '#{peaks[i]}', '#{peaks[i+1]}')"
68
+ merge_id += 1
99
69
  end
100
70
  end
101
- spec = [mzs,ints]
102
- spec.ms_level = val.ms_level
103
- spec.ms2 = val.ms2
104
- spectra[rt] = spec
105
71
  k += 1
106
72
  end
107
73
  prog.finish!
108
- return spectra
109
74
  end
110
75
  end
111
-
112
- #test
113
- #data = {1 => [[1.0,1.5,1.7,3.0,4.0,5.0,6.0,7.0,8.0,9.0],[10,9,8,7,6,5,4,3,2,1]], 2 => [[1,2,3,4,5,6,7,8,9],[9,8,7,6,5,4,3,2,1]]}
114
- #p Merger.merge(data,0.5)
@@ -5,35 +5,48 @@ require 'mspire/mzml'
5
5
 
6
6
  class Mzml_Wrapper
7
7
 
8
- def initialize(spectra)
8
+ def initialize(db,opts)
9
+ prog = Progress.new("Converting to mzml:")
9
10
  #spectra is a Hash rt=>[[mzs],[ints]]
10
- ms2_count = 0
11
+ db.execute "CREATE TABLE IF NOT EXISTS ms2(ms2_id INTEGER PRIMARY KEY,cent_id INTEGER,pep_id INTEGER,rt REAL,mzs TEXT,ints TEXT)" if opts[:ms2] == "true"
12
+ sampling_rate = opts[:sampling_rate]
13
+ noise_max = opts[:noiseMaxInt]
11
14
  count = 0.0
12
15
  scan_number = 1
13
16
  specs = []
14
- prog = Progress.new("Converting to mzml:")
15
17
  num = 0
18
+ spectra = db.execute "SELECT * FROM spectra"
16
19
  total = spectra.size
20
+ spectra_g = spectra.group_by{|spec| spec[2]} #rt
17
21
  step = total/100
18
22
  spec_id = nil
19
- t_rt = 0
20
- spectra.sort.map do |rt,data|
23
+ spectra_g.sort.map do |rt,data|
21
24
  if count > step * (num + 1)
22
25
  num = (((count/total)*100).to_i)
23
26
  prog.update(num)
24
27
  end
25
- if t_rt > rt
26
- puts "OUT of ORDER"
27
- end
28
- t_rt = rt
28
+ data_t = data.transpose
29
+ mzs = data_t[3]
30
+ ints = data_t[4]
29
31
 
30
- ms_level = data.ms_level # method added to array class
32
+ #grab top 2 centroids for ms2
33
+ ms2s = []
34
+ if opts[:ms2] == "true"
35
+ top2 = ints.sort[-opts[:ms2s]..-1]
36
+ top2.each do |top|
37
+ if top > noise_max + 1000.0
38
+ cent = data[ints.index(top)]
39
+ ms2s<<cent if cent[1] != nil
40
+ end
41
+ end
42
+ end
43
+
31
44
 
32
45
  spc = Mspire::Mzml::Spectrum.new("scan=#{scan_number}") do |spec|
33
46
  spec.describe_many!(['MS:1000127', ['MS:1000511', 1]])
34
47
  spec.data_arrays = [
35
- Mspire::Mzml::DataArray.new(data[0]).describe!('MS:1000514'),
36
- Mspire::Mzml::DataArray.new(data[1]).describe!('MS:1000515')
48
+ Mspire::Mzml::DataArray.new(mzs).describe!('MS:1000514'),
49
+ Mspire::Mzml::DataArray.new(ints).describe!('MS:1000515')
37
50
  ]
38
51
  spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
39
52
  scan = Mspire::Mzml::Scan.new do |scan|
@@ -43,36 +56,44 @@ class Mzml_Wrapper
43
56
  end
44
57
  end
45
58
  specs<<spc
46
- if ms_level == 2
59
+ if !ms2s.empty?
47
60
  #[rt,[mzs],[ints]]
48
- ms2 = data.ms2
49
- ms2.each do |data|
50
- ms2_count += 1
51
- scan_number += 1
52
- spc2 = Mspire::Mzml::Spectrum.new("scan=#{scan_number}") do |spec|
53
- spec.describe_many!(['MS:1000127', ['MS:1000511', 2]])
54
- spec.data_arrays = [
55
- Mspire::Mzml::DataArray.new(data[1]).describe!('MS:1000514'),
56
- Mspire::Mzml::DataArray.new(data[2]).describe!('MS:1000515')
57
- ]
58
- spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
59
- scan = Mspire::Mzml::Scan.new do |scan|
60
- scan.describe! 'MS:1000016', data[0], 'UO:0000010'
61
+ ms2s.each do |cent|
62
+ pep = db.execute "SELECT seq,charge FROM peptides WHERE Id=#{cent[1]}"
63
+ seq = pep[0][0]
64
+ if seq.size > 1
65
+ charge = pep[0][1]
66
+ ms2_mzs = MS::Fragmenter.new.fragment(seq)
67
+ ms2_ints = Array.new(ms2_mzs.size,500.to_f)
68
+ rt = cent[2] + RThelper.RandomFloat(0.01,sampling_rate - 0.1)
69
+ db.execute "INSERT INTO ms2(cent_id,pep_id,rt,mzs,ints) VALUES(#{cent[0]},#{cent[1]},#{rt},'#{ms2_mzs}','#{ms2_ints}')"
70
+
71
+ scan_number += 1
72
+ spc2 = Mspire::Mzml::Spectrum.new("scan=#{scan_number}") do |spec|
73
+ spec.describe_many!(['MS:1000127', ['MS:1000511', 2]])
74
+ spec.data_arrays = [
75
+ Mspire::Mzml::DataArray.new(ms2_mzs).describe!('MS:1000514'),
76
+ Mspire::Mzml::DataArray.new(ms2_ints).describe!('MS:1000515')
77
+ ]
78
+ spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
79
+ scan = Mspire::Mzml::Scan.new do |scan|
80
+ scan.describe! 'MS:1000016', rt, 'UO:0000010'
81
+ end
82
+ sl << scan
61
83
  end
62
- sl << scan
84
+ precursor = Mspire::Mzml::Precursor.new( spc.id )
85
+ si = Mspire::Mzml::SelectedIon.new
86
+ # the selected ion m/z:
87
+ si.describe! "MS:1000744", cent[3] #pre_mz
88
+ # the selected ion charge state
89
+ si.describe! "MS:1000041", charge #pre_charge
90
+ # the selected ion intensity
91
+ si.describe! "MS:1000042", cent[4] #pre_int
92
+ precursor.selected_ions = [si]
93
+ spec.precursors = [precursor]
63
94
  end
64
- precursor = Mspire::Mzml::Precursor.new( spc.id )
65
- si = Mspire::Mzml::SelectedIon.new
66
- # the selected ion m/z:
67
- si.describe! "MS:1000744", data.pre_mz
68
- # the selected ion charge state
69
- si.describe! "MS:1000041", data.pre_charge
70
- # the selected ion intensity
71
- si.describe! "MS:1000042", data.pre_int
72
- precursor.selected_ions = [si]
73
- spec.precursors = [precursor]
74
95
  end
75
- specs<<spc2
96
+ specs<<spc2 if seq.size > 1
76
97
  end
77
98
  end
78
99
  count += 1
@@ -92,7 +113,7 @@ class Mzml_Wrapper
92
113
  mzml.instrument_configurations << default_instrument_config
93
114
  software = Mspire::Mzml::Software.new
94
115
  mzml.software_list << software
95
- default_data_processing = Mspire::Mzml::DataProcessing.new("did_nothing")
116
+ default_data_processing = Mspire::Mzml::DataProcessing.new("simulator options=#{opts}")
96
117
  mzml.data_processing_list << default_data_processing
97
118
  mzml.run = Mspire::Mzml::Run.new("simulated_run", default_instrument_config) do |run|
98
119
  spectrum_list = Mspire::Mzml::SpectrumList.new(default_data_processing, specs)
@@ -100,7 +121,6 @@ class Mzml_Wrapper
100
121
  end
101
122
  end
102
123
  prog.finish!
103
- puts "ms2 written = #{ms2_count}"
104
124
  return @mzml
105
125
  end
106
126