mspire-simulator 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/env ruby
2
+ # http://isoelectric.ovh.org/files/practise-isoelectric-point.html#mozTocId496531
3
+ # Taken from Ryan's github repo
4
+
5
+ Precision = 0.001
6
+ ResidueTable = {
7
+ :K => [2.18,8.95,10.53],
8
+ :E => [2.19,9.67,4.25],
9
+ :D => [1.88,9.60,3.65],
10
+ :H => [1.82,9.17,6.00],
11
+ :R => [2.17,9.04,12.48],
12
+ :Q => [2.17,9.13,nil],
13
+ :N => [2.02,8.80,nil],
14
+ :C => [1.96,10.28,8.18],
15
+ :T => [2.11,9.62,nil],
16
+ :S => [2.21,9.15,nil],
17
+ :W => [2.38,9.39,nil],
18
+ :Y => [2.20,9.11,10.07],
19
+ :F => [1.83,9.13,nil],
20
+ :M => [2.28,9.21,nil],
21
+ :I => [2.36,9.68,nil],
22
+ :L => [2.36,9.60,nil],
23
+ :V => [2.32,9.62,nil],
24
+ :P => [1.99,10.96,nil],
25
+ :A => [2.34,9.69,nil],
26
+ :G => [2.34,9.60,nil],
27
+ # These are the fringe cases... B and Z... Jerks, these are harder to calculate pIs
28
+ :B => [1.95,9.20,3.65],
29
+ :Z => [2.18,9.40,4.25],
30
+ :X => [2.20,9.40,nil],
31
+ :U => [1.96,10.28,5.20] # Unfortunately, I've only found the pKr for this... so I've used Cysteine's values.
32
+ }
33
+ PepCharges = Struct.new(:seq, :n_term, :c_term, :y_num, :c_num, :k_num, :h_num, :r_num, :d_num, :e_num, :u_num, :polar_num, :hydrophobic_num, :pi)
34
+ def identify_potential_charges(str)
35
+ string = str.upcase
36
+ first = string[0]; last = string[-1]
37
+ puts string if first.nil? or last.nil?
38
+ begin
39
+ out = PepCharges.new(string, ResidueTable[first.to_sym][0], ResidueTable[last.to_sym][1], 0, 0, 0 ,0 ,0 ,0, 0, 0, 0, 0, 0)
40
+ rescue NoMethodError
41
+ abort string
42
+ end
43
+ string.chars.each do |letter|
44
+ case letter
45
+ when "Y"
46
+ out.y_num += 1
47
+ when "C"
48
+ out.c_num += 1
49
+ when "K"
50
+ out.k_num += 1
51
+ when "H"
52
+ out.h_num += 1
53
+ when "R"
54
+ out.r_num += 1
55
+ when "D"
56
+ out.d_num += 1
57
+ when "E"
58
+ out.e_num += 1
59
+ when "U"
60
+ out.u_num += 1
61
+ when "S", "T", "N", "Q"
62
+ out.polar_num += 1
63
+ when "A", "V", "I", "L", "M", "F", "W", "G", "P"
64
+ out.hydrophobic_num += 1
65
+ end
66
+ end
67
+ out
68
+ end # Returns the PepCharges structure
69
+
70
+ def charge_at_pH(pep_charges, pH)
71
+ charge = 0
72
+ charge += -1/(1+10**(pep_charges.c_term-pH))
73
+ charge += -pep_charges.d_num/(1+10**(ResidueTable[:D][2]-pH))
74
+ charge += -pep_charges.e_num/(1+10**(ResidueTable[:E][2]-pH))
75
+ charge += -pep_charges.c_num/(1+10**(ResidueTable[:C][2]-pH))
76
+ charge += -pep_charges.y_num/(1+10**(ResidueTable[:Y][2]-pH))
77
+ charge += 1/(1+10**(pH - pep_charges.n_term))
78
+ charge += pep_charges.h_num/(1+10**(pH-ResidueTable[:H][2]))
79
+ charge += pep_charges.k_num/(1+10**(pH-ResidueTable[:K][2]))
80
+ charge += pep_charges.r_num/(1+10**(pH-ResidueTable[:R][2]))
81
+ charge
82
+ end
83
+
84
+
85
+ def calc_PI(pep_charges)
86
+ pH = 8; pH_prev = 0.0; pH_next = 14.0
87
+ charge = charge_at_pH(pep_charges, pH)
88
+ while pH-pH_prev > Precision and pH_next-pH > Precision
89
+ if charge < 0.0
90
+ tmp = pH
91
+ pH = pH - ((pH-pH_prev)/2)
92
+ charge = charge_at_pH(pep_charges, pH)
93
+ pH_next = tmp
94
+ else
95
+ tmp = pH
96
+ pH = pH + ((pH_next - pH)/2)
97
+ charge = charge_at_pH(pep_charges, pH)
98
+ pH_prev = tmp
99
+ end
100
+ # puts "charge: #{charge.round(2)}\tpH: #{pH.round(2)}\tpH_next: #{pH_next.round(2)}\tpH_prev: #{pH_prev.round(2)}"
101
+ end
102
+ pH
103
+ end
104
+ #pepcharges =[]
105
+ =begin
106
+ # RUN the ENTRY FILE HERE
107
+ pi = []
108
+ io = File.open(ARGV.shift, 'r')
109
+ io.each_line do |line|
110
+ pi << calc_PI(identify_potential_charges(line[/^([A-Z]+):.*/]))
111
+ end
112
+ =end
113
+ =begin
114
+ pIes = []
115
+ pepcharges.each do |a|
116
+ pIes << [a, calc_PI(a)]
117
+ end
118
+ =end
119
+ #out_pi = pepcharges.map {|a| calc_PI(a)}
120
+
121
+ #require 'yaml'
122
+ #File.open('pi_list.yml', 'w') {|f| YAML.dump( pi, f) }
data/lib/ms/merger.rb ADDED
@@ -0,0 +1,101 @@
1
+ require_relative '../progress'
2
+
3
+ class Merger
4
+ def self.mz_value(arr)
5
+ if arr.class == Hash
6
+ return arr.keys[0][0]
7
+ else
8
+ return arr
9
+ end
10
+ end
11
+
12
+ def self.int_value(arr)
13
+ if arr.class == Array
14
+ return arr.last + int_value(arr.first)
15
+ else
16
+ return arr
17
+ end
18
+ end
19
+
20
+ def self.w_avg(values,weights)
21
+ if values.class == hash
22
+ values = values.values.flatten
23
+ end
24
+ a = []
25
+ int = 0
26
+ mz = 0
27
+ values.each_with_index do |v,i|
28
+ mz = mz_value(v)
29
+ int = int_value(weights[i])
30
+ a<<mz*int
31
+ end
32
+ a = a.inject(:+)
33
+ b = weights.flatten.inject(:+)
34
+ return a/b
35
+ end
36
+
37
+ def self.merge(spectra,half_range)
38
+ @start = Time.now
39
+ new_data = {}
40
+ total = spectra.size
41
+ k = 0
42
+ spectra.each do |rt,val|
43
+ Progress.progress("Merging Overlaps:",(((k/total)*100).to_i))
44
+ peaks = val.transpose
45
+ peaks.sort_by!{|a| a[0]}
46
+ peaks = peaks.transpose
47
+ mzs = peaks[0]
48
+ ints = peaks[1]
49
+ mzs.each_with_index do |mz,i|
50
+ next if mz.class == Hash
51
+ o_mz = mz
52
+ mz = mz.keys[0][0] if mz.class == Hash
53
+ range = (mz..mz+half_range)
54
+ if range.include?(mzs[i+1])
55
+ metaA_mz = [o_mz, mzs[i+1]]
56
+ meta_int = [ints[i],ints[i+1]]
57
+ sum = meta_int.flatten.inject(:+).to_f
58
+ i1 = ints[i]
59
+ i1 = ints[i].flatten.inject(:+) if ints[i].class == Array
60
+ frac1 = (i1/sum) * 100
61
+ frac2 = (ints[i+1]/sum) * 100
62
+ metaB_mz = {[w_avg(metaA_mz,meta_int),frac1,frac2] => metaA_mz}
63
+
64
+ mzs[i] = nil; mzs[i+1] = metaB_mz
65
+ ints[i] = nil; ints[i+1] = meta_int
66
+ end
67
+ end
68
+ new_data[rt] = [mzs.compact,ints.compact]
69
+ k += 1
70
+ end
71
+ Progress.progress("Merging Overlaps:",100,Time.now-@start)
72
+ puts ''
73
+ return new_data
74
+ end
75
+
76
+ def self.compact(spectra)
77
+ @start = Time.now
78
+ total = spectra.size
79
+ k = 0
80
+ spectra.each do |rt,val|
81
+ Progress.progress("Merge Finishing:",(((k/total)*100).to_i))
82
+ mzs = val[0]
83
+ ints = val[1]
84
+ mzs.each_with_index do |m,i|
85
+ if m.class == Hash
86
+ mzs[i] = m.keys[0][0]
87
+ ints[i] = ints[i].flatten.inject(:+)
88
+ end
89
+ end
90
+ spectra[rt] = [mzs,ints]
91
+ k += 1
92
+ end
93
+ Progress.progress("Merge Finishing:",100,Time.now-@start)
94
+ puts ''
95
+ return spectra
96
+ end
97
+ end
98
+
99
+ #test
100
+ #data = {1 => [[1.0,1.5,1.7,3.0,4.0,5.0,6.0,7.0,8.0,9.0],[10,9,8,7,6,5,4,3,2,1]], 2 => [[1,2,3,4,5,6,7,8,9],[9,8,7,6,5,4,3,2,1]]}
101
+ #p Merger.merge(data,0.5)
@@ -0,0 +1,67 @@
1
+
2
+ require 'nokogiri'
3
+ require 'progress'
4
+ require 'mspire/mzml'
5
+
6
+ class Mzml_Wrapper
7
+
8
+ def initialize(spectra)
9
+ #spectra is a Hash rt=>[[mzs],[ints]]
10
+ @start = Time.now
11
+
12
+
13
+ count = 0.0
14
+ scan_number = 1
15
+ specs = []
16
+ spectra.each do |rt,data|
17
+ Progress.progress("Converting to mzml:",(((count/spectra.size)*100).to_i))
18
+
19
+ spc = Mspire::Mzml::Spectrum.new("scan=#{scan_number}") do |spec|
20
+ spec.describe_many!(['MS:1000127', ['MS:1000511', 1]])
21
+ spec.data_arrays = [
22
+ Mspire::Mzml::DataArray.new(data[0]).describe!('MS:1000514'),
23
+ Mspire::Mzml::DataArray.new(data[1]).describe!('MS:1000515')
24
+ ]
25
+ spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
26
+ scan = Mspire::Mzml::Scan.new do |scan|
27
+ scan.describe! 'MS:1000016', rt, 'UO:0000010'
28
+ end
29
+ sl << scan
30
+ end
31
+ end
32
+ count += 1
33
+ scan_number += 1
34
+ specs<<spc
35
+ end
36
+
37
+
38
+
39
+ @mzml = Mspire::Mzml.new do |mzml|
40
+ mzml.id = 'ms1'
41
+ mzml.cvs = Mspire::Mzml::CV::DEFAULT_CVS
42
+ mzml.file_description = Mspire::Mzml::FileDescription.new do |fd|
43
+ fd.file_content = Mspire::Mzml::FileContent.new
44
+ fd.source_files << Mspire::Mzml::SourceFile.new
45
+ end
46
+ default_instrument_config = Mspire::Mzml::InstrumentConfiguration.new("IC").describe!('MS:1000031')
47
+ mzml.instrument_configurations << default_instrument_config
48
+ software = Mspire::Mzml::Software.new
49
+ mzml.software_list << software
50
+ default_data_processing = Mspire::Mzml::DataProcessing.new("did_nothing")
51
+ mzml.data_processing_list << default_data_processing
52
+ mzml.run = Mspire::Mzml::Run.new("simulated_run", default_instrument_config) do |run|
53
+ spectrum_list = Mspire::Mzml::SpectrumList.new(default_data_processing, specs)
54
+ run.spectrum_list = spectrum_list
55
+ end
56
+ end
57
+ Progress.progress("Converting to mzml:",100,Time.now-@start)
58
+ puts ''
59
+ return @mzml
60
+ end
61
+
62
+ def to_xml(file)
63
+ return @mzml.to_xml(file)
64
+ end
65
+
66
+ end
67
+
data/lib/ms/noise.rb ADDED
@@ -0,0 +1,51 @@
1
+
2
+ require 'progress'
3
+ require 'ms/rt/rt_helper'
4
+
5
+ module MS
6
+ module Noise
7
+ module_function
8
+ def noiseify(density,max_mz)
9
+ # spectra is {rt => [[mzs],[ints]]}
10
+ @start = Time.now
11
+ @noise = {}
12
+ r_times = Sim_Spectra.r_times
13
+
14
+ count = 0.0
15
+ r_times.each do |rt|
16
+
17
+ Progress.progress("Adding noise:",(((count/r_times.size)*100).to_i))
18
+
19
+ nmzs = []
20
+ nints = []
21
+
22
+ density.times do
23
+ rmz = RThelper.RandomFloat(0.0,max_mz)
24
+ rint = RThelper.RandomFloat(50,1000)
25
+
26
+ nmzs<<rmz
27
+ nints<<rint
28
+ end
29
+ @noise[rt] = [nmzs,nints]
30
+ count += 1
31
+ end
32
+
33
+ Progress.progress("Adding noise:",100,Time.now-@start)
34
+ puts ''
35
+
36
+ return @noise
37
+ end
38
+
39
+
40
+ def spec_drops(drop_percentage)
41
+ r_times = Sim_Spectra.r_times
42
+ l = r_times.length
43
+ num_drops = drop_percentage * l
44
+ num_drops.to_i.times do
45
+ r_times.delete_at(rand(l+1))
46
+ end
47
+ return r_times
48
+ end
49
+
50
+ end
51
+ end
@@ -0,0 +1,31 @@
1
+
2
+ module RThelper
3
+
4
+ module_function
5
+ def normalized_gaussian(x,mu,sd)
6
+ x = x.to_f
7
+ mu = mu.to_f
8
+ sd = sd.to_f
9
+ return ((1/(Math.sqrt(2*(Math::PI)*(sd**2))))*(Math.exp(-(((x-mu)**2)/((2*sd)**2)))))
10
+ end
11
+
12
+ module_function
13
+ def gaussian(x,mu,sd,h)
14
+ x = x.to_f
15
+ mu = mu.to_f
16
+ sd = sd.to_f
17
+ h = h.to_f
18
+ return h*Math.exp(-(x-mu)**2/(sd**2))
19
+ end
20
+
21
+ module_function
22
+ def RandomFloat(a,b)
23
+ a = a.to_f
24
+ b = b.to_f
25
+ random = rand(2147483647.0) / 2147483647.0
26
+ diff = b - a
27
+ r = random * diff
28
+ return a + r
29
+ end
30
+ end
31
+
@@ -0,0 +1,81 @@
1
+
2
+ require 'time'
3
+ require 'progress'
4
+ require 'ms/sim_feature'
5
+ require 'ms/rt/weka'
6
+ require 'ms/sim_peptide'
7
+ require 'ms/rt/rt_helper'
8
+
9
+ module MS
10
+ module Rtgenerator
11
+
12
+ module_function
13
+ def generateRT(peptides, one_d)
14
+
15
+ @start = Time.now
16
+ @r_times = Sim_Spectra.r_times
17
+
18
+ # Gets retention times from the weka model
19
+ peptides = MS::Weka.predict_rts(peptides)
20
+ MS::Weka.predict_ints(peptides)
21
+
22
+
23
+ #-----------------------------------------------------------------
24
+ peptides.each_with_index do |pep,ind|
25
+ Progress.progress("Generating retention times:",(((ind+1)/peptides.size.to_f)*100).to_i)
26
+
27
+
28
+ #Fit retention times into scan times
29
+ max_rt = @r_times.max
30
+ p_rt = pep.p_rt * 10**-2
31
+ if p_rt > 1
32
+ pep.p_rt = @r_times.max
33
+ pep.p_rt_i = @r_times.index(pep.p_rt)
34
+ else
35
+ pep.p_rt = @r_times.find {|i| i >= (p_rt * max_rt)}
36
+ pep.p_rt_i = @r_times.index(pep.p_rt)
37
+ end
38
+
39
+ if pep.p_rt == nil
40
+ puts "\n\n\t#{pep} TIME-> #{p_rt*max_rt} :: Peptide not predicted in time range: try increasing run time\n\n."
41
+ else
42
+
43
+ #Give peptide retention times
44
+ head_length = nil
45
+ tail_length = nil
46
+ if one_d
47
+ head_length = 300.0
48
+ tail_length = 701
49
+ else
50
+ head_length = 100.0
51
+ tail_length = 300
52
+ end
53
+
54
+ a = @r_times.find {|i| i >= (pep.p_rt-head_length)}
55
+ b = @r_times.find {|i| i >= (pep.p_rt+tail_length)}
56
+ a = @r_times.index(a)
57
+ b = @r_times.index(b)
58
+
59
+ if a == nil
60
+ a = @r_times[0]
61
+ end
62
+
63
+ if b == nil
64
+ b = @r_times[@r_times.length-1]
65
+ end
66
+
67
+ pep.set_rts(a,b)
68
+
69
+ end
70
+ end
71
+ #-----------------------------------------------------------------
72
+
73
+
74
+ Progress.progress("Generating retention times:",100,Time.now-@start)
75
+ puts ""
76
+
77
+ return peptides
78
+
79
+ end
80
+ end
81
+ end
data/lib/ms/rt/weka.rb ADDED
@@ -0,0 +1,150 @@
1
+
2
+ require 'csv'
3
+
4
+ module MS
5
+ module Weka
6
+ #James Dalg
7
+ module_function
8
+ def predict_rts(peptides)
9
+ #mz,charge,intensity,rt,A,R,N,D,B,C,E,Q,Z,G,H,I,L,K,M,F,P,S,T,W,Y,V,J,mass,hydro,pi
10
+ #make arrf file to feed weka model
11
+ data = []
12
+ peptides.each do |pep|
13
+ data<<pep.aa_counts
14
+ end
15
+ arff = make_rt_arff(Time.now.nsec.to_s,data)
16
+ system("java weka.classifiers.functions.MultilayerPerceptron -T #{arff} -l bin/weka/M5Rules.model -p 24 > #{arff}.out")
17
+ system("rm #{arff}")
18
+
19
+ #extract what was predicted by weka model
20
+ file = File.open("#{arff}.out","r")
21
+ count = 0
22
+ while line = file.gets
23
+ if line =~ /(\d*\.\d{0,3}){1}/
24
+ peptides[count].p_rt = line.match(/(\d*\.\d{0,3}){1}/)[0].to_f
25
+ count += 1
26
+ end
27
+ end
28
+ system("rm #{arff}.out")
29
+ return peptides
30
+ end
31
+
32
+
33
+
34
+ def predict_ints(peptides)
35
+ data = []
36
+ peptides.each do |pep|
37
+ array = []
38
+ array<<pep.mono_mz<<pep.charge<<pep.mass<<pep.p_rt
39
+ data << array.concat(pep.aa_counts)
40
+ end
41
+ arff = make_int_arff(Time.now.nsec.to_s,data)
42
+ system("java weka.classifiers.trees.M5P -T #{arff} -l bin/weka/M5P.model -p 27 > #{arff}.out")
43
+ system("rm #{arff}")
44
+
45
+ #extract what was predicted by weka model
46
+ file = File.open("#{arff}.out","r")
47
+ count = 0
48
+ while line = file.gets
49
+ if line =~ /(\d*\.\d{0,3}){1}/
50
+ peptides[count].p_int = line.match(/(\d*\.\d{0,3}){1}/)[0].to_f
51
+ count += 1
52
+ end
53
+ end
54
+ system("rm #{arff}.out")
55
+ return peptides
56
+ end
57
+
58
+
59
+
60
+ #James Dalg
61
+ def make_rt_arff(sourcefile, training)
62
+ sourcefile<<".arff"
63
+ File.open(sourcefile, "wb") do |f| # need to cite f.puts (not %Q)? if so http://www.devdaily.com/blog/post/ruby/how-write-text-to-file-ruby-example
64
+ f.puts %Q{%
65
+ %
66
+ @RELATION molecularinfo
67
+ @ATTRIBUTE A NUMERIC
68
+ @ATTRIBUTE R NUMERIC
69
+ @ATTRIBUTE N NUMERIC
70
+ @ATTRIBUTE D NUMERIC
71
+ @ATTRIBUTE B NUMERIC
72
+ @ATTRIBUTE C NUMERIC
73
+ @ATTRIBUTE E NUMERIC
74
+ @ATTRIBUTE Q NUMERIC
75
+ @ATTRIBUTE Z NUMERIC
76
+ @ATTRIBUTE G NUMERIC
77
+ @ATTRIBUTE H NUMERIC
78
+ @ATTRIBUTE I NUMERIC
79
+ @ATTRIBUTE L NUMERIC
80
+ @ATTRIBUTE K NUMERIC
81
+ @ATTRIBUTE M NUMERIC
82
+ @ATTRIBUTE F NUMERIC
83
+ @ATTRIBUTE P NUMERIC
84
+ @ATTRIBUTE S NUMERIC
85
+ @ATTRIBUTE T NUMERIC
86
+ @ATTRIBUTE W NUMERIC
87
+ @ATTRIBUTE Y NUMERIC
88
+ @ATTRIBUTE V NUMERIC
89
+ @ATTRIBUTE J NUMERIC
90
+ @ATTRIBUTE rt NUMERIC
91
+ @DATA
92
+ %
93
+ % }
94
+ end
95
+ training.each do |innerarray|
96
+ CSV.open(sourcefile, "a") do |csv| #derived from sample code http://www.ruby-doc.org/stdlib-1.9.3/libdoc/csv/rdoc/CSV.html
97
+ csv << innerarray #idea may be slightly attributable to http://www.ruby-forum.com/topic/299571
98
+ end
99
+ end
100
+ return sourcefile
101
+ end
102
+
103
+
104
+ #James Dalg
105
+ def make_int_arff(sourcefile, training)
106
+ sourcefile<<".arff"
107
+ File.open(sourcefile, "wb") do |f| # need to cite f.puts (not %Q)? if so http://www.devdaily.com/blog/post/ruby/how-write-text-to-file-ruby-example
108
+ f.puts %Q{%
109
+ %
110
+ @RELATION molecularinfo
111
+ @ATTRIBUTE mz NUMERIC
112
+ @ATTRIBUTE charge NUMERIC
113
+ @ATTRIBUTE mass NUMERIC
114
+ @ATTRIBUTE rt NUMERIC
115
+ @ATTRIBUTE A NUMERIC
116
+ @ATTRIBUTE R NUMERIC
117
+ @ATTRIBUTE N NUMERIC
118
+ @ATTRIBUTE D NUMERIC
119
+ @ATTRIBUTE B NUMERIC
120
+ @ATTRIBUTE C NUMERIC
121
+ @ATTRIBUTE E NUMERIC
122
+ @ATTRIBUTE Q NUMERIC
123
+ @ATTRIBUTE Z NUMERIC
124
+ @ATTRIBUTE G NUMERIC
125
+ @ATTRIBUTE H NUMERIC
126
+ @ATTRIBUTE I NUMERIC
127
+ @ATTRIBUTE L NUMERIC
128
+ @ATTRIBUTE K NUMERIC
129
+ @ATTRIBUTE M NUMERIC
130
+ @ATTRIBUTE F NUMERIC
131
+ @ATTRIBUTE P NUMERIC
132
+ @ATTRIBUTE S NUMERIC
133
+ @ATTRIBUTE T NUMERIC
134
+ @ATTRIBUTE W NUMERIC
135
+ @ATTRIBUTE Y NUMERIC
136
+ @ATTRIBUTE V NUMERIC
137
+ @ATTRIBUTE intensity NUMERIC
138
+ @DATA
139
+ %
140
+ % }
141
+ end
142
+ training.each do |innerarray|
143
+ CSV.open(sourcefile, "a") do |csv| #derived from sample code http://www.ruby-doc.org/stdlib-1.9.3/libdoc/csv/rdoc/CSV.html
144
+ csv << innerarray #idea may be slightly attributable to http://www.ruby-forum.com/topic/299571
145
+ end
146
+ end
147
+ return sourcefile
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,92 @@
1
+
2
+ module MS
3
+ class Sim_Digester
4
+
5
+ attr_reader :digested_file
6
+ attr_writer :digested_file
7
+
8
+ def initialize(digestor,pH)
9
+ @digestor = digestor
10
+ @pH = pH
11
+ @digested_file = ".#{Time.now.nsec.to_s}"
12
+ end
13
+
14
+ def create_digested_file(file)
15
+ inFile = File.open(file,"r")
16
+ seq = ""
17
+ inFile.each_line do |sequence|
18
+ if sequence =~ />/ or sequence == "\n"
19
+ seq = seq<<";"
20
+ else
21
+ seq = seq<<sequence.chomp
22
+ end
23
+ end
24
+ inFile.close
25
+
26
+ proteins = seq.split(/;/).delete_if{|str| str == ""}
27
+
28
+ trypsin = Mspire::Digester[@digestor]
29
+
30
+ digested = []
31
+ d_file = File.open(@digested_file, "w")
32
+ proteins.each do |prot|
33
+ dig = trypsin.digest(prot)
34
+ dig.each do |d|
35
+ digested<<d
36
+ end
37
+ end
38
+ proteins.clear
39
+ digested.uniq!
40
+
41
+ trun_digested = []
42
+ if digested.length > 50000
43
+ 50000.times do
44
+ trun_digested<<digested[rand(digested.length)]
45
+ end
46
+ digested.clear
47
+ digested = trun_digested
48
+ end
49
+
50
+ digested.each do |dig|
51
+ d_file.puts(dig)
52
+ end
53
+ d_file.close
54
+ num_digested = digested.size
55
+ digested.clear
56
+ puts "Number of peptides: #{num_digested}"
57
+ return num_digested
58
+ end
59
+
60
+ def digest(file)
61
+ start = Time.now
62
+
63
+ num_digested = create_digested_file(file)
64
+
65
+ d_file = File.open(@digested_file, "r")
66
+ i = 0
67
+
68
+ peptides = []
69
+
70
+ d_file.each_line do |peptide_seq|
71
+ peptide_seq.chomp!
72
+ Progress.progress("Creating peptides '#{file}':",((i/num_digested.to_f)*100.0).to_i)
73
+
74
+ charge_ratio = charge_at_pH(identify_potential_charges(peptide_seq), @pH)
75
+ charge_f = charge_ratio.floor
76
+ charge_c = charge_ratio.ceil
77
+
78
+ peptide_f = MS::Peptide.new(peptide_seq, charge_f) if charge_f != 0
79
+ peptide_c = MS::Peptide.new(peptide_seq, charge_c) if charge_c != 0
80
+
81
+ peptides<<peptide_f if charge_f != 0
82
+ peptides<<peptide_c if charge_c != 0
83
+ i += 1
84
+ end
85
+ d_file.close
86
+ File.delete(@digested_file)
87
+ Progress.progress("Creating peptides '#{file}':",100,Time.now-start)
88
+ puts ''
89
+ return peptides
90
+ end
91
+ end
92
+ end