mspire-simulator 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/env ruby
2
+ # http://isoelectric.ovh.org/files/practise-isoelectric-point.html#mozTocId496531
3
+ # Taken from Ryan's github repo
4
+
5
+ Precision = 0.001
6
+ ResidueTable = {
7
+ :K => [2.18,8.95,10.53],
8
+ :E => [2.19,9.67,4.25],
9
+ :D => [1.88,9.60,3.65],
10
+ :H => [1.82,9.17,6.00],
11
+ :R => [2.17,9.04,12.48],
12
+ :Q => [2.17,9.13,nil],
13
+ :N => [2.02,8.80,nil],
14
+ :C => [1.96,10.28,8.18],
15
+ :T => [2.11,9.62,nil],
16
+ :S => [2.21,9.15,nil],
17
+ :W => [2.38,9.39,nil],
18
+ :Y => [2.20,9.11,10.07],
19
+ :F => [1.83,9.13,nil],
20
+ :M => [2.28,9.21,nil],
21
+ :I => [2.36,9.68,nil],
22
+ :L => [2.36,9.60,nil],
23
+ :V => [2.32,9.62,nil],
24
+ :P => [1.99,10.96,nil],
25
+ :A => [2.34,9.69,nil],
26
+ :G => [2.34,9.60,nil],
27
+ # These are the fringe cases... B and Z... Jerks, these are harder to calculate pIs
28
+ :B => [1.95,9.20,3.65],
29
+ :Z => [2.18,9.40,4.25],
30
+ :X => [2.20,9.40,nil],
31
+ :U => [1.96,10.28,5.20] # Unfortunately, I've only found the pKr for this... so I've used Cysteine's values.
32
+ }
33
+ PepCharges = Struct.new(:seq, :n_term, :c_term, :y_num, :c_num, :k_num, :h_num, :r_num, :d_num, :e_num, :u_num, :polar_num, :hydrophobic_num, :pi)
34
+ def identify_potential_charges(str)
35
+ string = str.upcase
36
+ first = string[0]; last = string[-1]
37
+ puts string if first.nil? or last.nil?
38
+ begin
39
+ out = PepCharges.new(string, ResidueTable[first.to_sym][0], ResidueTable[last.to_sym][1], 0, 0, 0 ,0 ,0 ,0, 0, 0, 0, 0, 0)
40
+ rescue NoMethodError
41
+ abort string
42
+ end
43
+ string.chars.each do |letter|
44
+ case letter
45
+ when "Y"
46
+ out.y_num += 1
47
+ when "C"
48
+ out.c_num += 1
49
+ when "K"
50
+ out.k_num += 1
51
+ when "H"
52
+ out.h_num += 1
53
+ when "R"
54
+ out.r_num += 1
55
+ when "D"
56
+ out.d_num += 1
57
+ when "E"
58
+ out.e_num += 1
59
+ when "U"
60
+ out.u_num += 1
61
+ when "S", "T", "N", "Q"
62
+ out.polar_num += 1
63
+ when "A", "V", "I", "L", "M", "F", "W", "G", "P"
64
+ out.hydrophobic_num += 1
65
+ end
66
+ end
67
+ out
68
+ end # Returns the PepCharges structure
69
+
70
+ def charge_at_pH(pep_charges, pH)
71
+ charge = 0
72
+ charge += -1/(1+10**(pep_charges.c_term-pH))
73
+ charge += -pep_charges.d_num/(1+10**(ResidueTable[:D][2]-pH))
74
+ charge += -pep_charges.e_num/(1+10**(ResidueTable[:E][2]-pH))
75
+ charge += -pep_charges.c_num/(1+10**(ResidueTable[:C][2]-pH))
76
+ charge += -pep_charges.y_num/(1+10**(ResidueTable[:Y][2]-pH))
77
+ charge += 1/(1+10**(pH - pep_charges.n_term))
78
+ charge += pep_charges.h_num/(1+10**(pH-ResidueTable[:H][2]))
79
+ charge += pep_charges.k_num/(1+10**(pH-ResidueTable[:K][2]))
80
+ charge += pep_charges.r_num/(1+10**(pH-ResidueTable[:R][2]))
81
+ charge
82
+ end
83
+
84
+
85
+ def calc_PI(pep_charges)
86
+ pH = 8; pH_prev = 0.0; pH_next = 14.0
87
+ charge = charge_at_pH(pep_charges, pH)
88
+ while pH-pH_prev > Precision and pH_next-pH > Precision
89
+ if charge < 0.0
90
+ tmp = pH
91
+ pH = pH - ((pH-pH_prev)/2)
92
+ charge = charge_at_pH(pep_charges, pH)
93
+ pH_next = tmp
94
+ else
95
+ tmp = pH
96
+ pH = pH + ((pH_next - pH)/2)
97
+ charge = charge_at_pH(pep_charges, pH)
98
+ pH_prev = tmp
99
+ end
100
+ # puts "charge: #{charge.round(2)}\tpH: #{pH.round(2)}\tpH_next: #{pH_next.round(2)}\tpH_prev: #{pH_prev.round(2)}"
101
+ end
102
+ pH
103
+ end
104
+ #pepcharges =[]
105
+ =begin
106
+ # RUN the ENTRY FILE HERE
107
+ pi = []
108
+ io = File.open(ARGV.shift, 'r')
109
+ io.each_line do |line|
110
+ pi << calc_PI(identify_potential_charges(line[/^([A-Z]+):.*/]))
111
+ end
112
+ =end
113
+ =begin
114
+ pIes = []
115
+ pepcharges.each do |a|
116
+ pIes << [a, calc_PI(a)]
117
+ end
118
+ =end
119
+ #out_pi = pepcharges.map {|a| calc_PI(a)}
120
+
121
+ #require 'yaml'
122
+ #File.open('pi_list.yml', 'w') {|f| YAML.dump( pi, f) }
data/lib/ms/merger.rb ADDED
@@ -0,0 +1,101 @@
1
+ require_relative '../progress'
2
+
3
+ class Merger
4
+ def self.mz_value(arr)
5
+ if arr.class == Hash
6
+ return arr.keys[0][0]
7
+ else
8
+ return arr
9
+ end
10
+ end
11
+
12
+ def self.int_value(arr)
13
+ if arr.class == Array
14
+ return arr.last + int_value(arr.first)
15
+ else
16
+ return arr
17
+ end
18
+ end
19
+
20
+ def self.w_avg(values,weights)
21
+ if values.class == hash
22
+ values = values.values.flatten
23
+ end
24
+ a = []
25
+ int = 0
26
+ mz = 0
27
+ values.each_with_index do |v,i|
28
+ mz = mz_value(v)
29
+ int = int_value(weights[i])
30
+ a<<mz*int
31
+ end
32
+ a = a.inject(:+)
33
+ b = weights.flatten.inject(:+)
34
+ return a/b
35
+ end
36
+
37
+ def self.merge(spectra,half_range)
38
+ @start = Time.now
39
+ new_data = {}
40
+ total = spectra.size
41
+ k = 0
42
+ spectra.each do |rt,val|
43
+ Progress.progress("Merging Overlaps:",(((k/total)*100).to_i))
44
+ peaks = val.transpose
45
+ peaks.sort_by!{|a| a[0]}
46
+ peaks = peaks.transpose
47
+ mzs = peaks[0]
48
+ ints = peaks[1]
49
+ mzs.each_with_index do |mz,i|
50
+ next if mz.class == Hash
51
+ o_mz = mz
52
+ mz = mz.keys[0][0] if mz.class == Hash
53
+ range = (mz..mz+half_range)
54
+ if range.include?(mzs[i+1])
55
+ metaA_mz = [o_mz, mzs[i+1]]
56
+ meta_int = [ints[i],ints[i+1]]
57
+ sum = meta_int.flatten.inject(:+).to_f
58
+ i1 = ints[i]
59
+ i1 = ints[i].flatten.inject(:+) if ints[i].class == Array
60
+ frac1 = (i1/sum) * 100
61
+ frac2 = (ints[i+1]/sum) * 100
62
+ metaB_mz = {[w_avg(metaA_mz,meta_int),frac1,frac2] => metaA_mz}
63
+
64
+ mzs[i] = nil; mzs[i+1] = metaB_mz
65
+ ints[i] = nil; ints[i+1] = meta_int
66
+ end
67
+ end
68
+ new_data[rt] = [mzs.compact,ints.compact]
69
+ k += 1
70
+ end
71
+ Progress.progress("Merging Overlaps:",100,Time.now-@start)
72
+ puts ''
73
+ return new_data
74
+ end
75
+
76
+ def self.compact(spectra)
77
+ @start = Time.now
78
+ total = spectra.size
79
+ k = 0
80
+ spectra.each do |rt,val|
81
+ Progress.progress("Merge Finishing:",(((k/total)*100).to_i))
82
+ mzs = val[0]
83
+ ints = val[1]
84
+ mzs.each_with_index do |m,i|
85
+ if m.class == Hash
86
+ mzs[i] = m.keys[0][0]
87
+ ints[i] = ints[i].flatten.inject(:+)
88
+ end
89
+ end
90
+ spectra[rt] = [mzs,ints]
91
+ k += 1
92
+ end
93
+ Progress.progress("Merge Finishing:",100,Time.now-@start)
94
+ puts ''
95
+ return spectra
96
+ end
97
+ end
98
+
99
+ #test
100
+ #data = {1 => [[1.0,1.5,1.7,3.0,4.0,5.0,6.0,7.0,8.0,9.0],[10,9,8,7,6,5,4,3,2,1]], 2 => [[1,2,3,4,5,6,7,8,9],[9,8,7,6,5,4,3,2,1]]}
101
+ #p Merger.merge(data,0.5)
@@ -0,0 +1,67 @@
1
+
2
+ require 'nokogiri'
3
+ require 'progress'
4
+ require 'mspire/mzml'
5
+
6
+ class Mzml_Wrapper
7
+
8
+ def initialize(spectra)
9
+ #spectra is a Hash rt=>[[mzs],[ints]]
10
+ @start = Time.now
11
+
12
+
13
+ count = 0.0
14
+ scan_number = 1
15
+ specs = []
16
+ spectra.each do |rt,data|
17
+ Progress.progress("Converting to mzml:",(((count/spectra.size)*100).to_i))
18
+
19
+ spc = Mspire::Mzml::Spectrum.new("scan=#{scan_number}") do |spec|
20
+ spec.describe_many!(['MS:1000127', ['MS:1000511', 1]])
21
+ spec.data_arrays = [
22
+ Mspire::Mzml::DataArray.new(data[0]).describe!('MS:1000514'),
23
+ Mspire::Mzml::DataArray.new(data[1]).describe!('MS:1000515')
24
+ ]
25
+ spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
26
+ scan = Mspire::Mzml::Scan.new do |scan|
27
+ scan.describe! 'MS:1000016', rt, 'UO:0000010'
28
+ end
29
+ sl << scan
30
+ end
31
+ end
32
+ count += 1
33
+ scan_number += 1
34
+ specs<<spc
35
+ end
36
+
37
+
38
+
39
+ @mzml = Mspire::Mzml.new do |mzml|
40
+ mzml.id = 'ms1'
41
+ mzml.cvs = Mspire::Mzml::CV::DEFAULT_CVS
42
+ mzml.file_description = Mspire::Mzml::FileDescription.new do |fd|
43
+ fd.file_content = Mspire::Mzml::FileContent.new
44
+ fd.source_files << Mspire::Mzml::SourceFile.new
45
+ end
46
+ default_instrument_config = Mspire::Mzml::InstrumentConfiguration.new("IC").describe!('MS:1000031')
47
+ mzml.instrument_configurations << default_instrument_config
48
+ software = Mspire::Mzml::Software.new
49
+ mzml.software_list << software
50
+ default_data_processing = Mspire::Mzml::DataProcessing.new("did_nothing")
51
+ mzml.data_processing_list << default_data_processing
52
+ mzml.run = Mspire::Mzml::Run.new("simulated_run", default_instrument_config) do |run|
53
+ spectrum_list = Mspire::Mzml::SpectrumList.new(default_data_processing, specs)
54
+ run.spectrum_list = spectrum_list
55
+ end
56
+ end
57
+ Progress.progress("Converting to mzml:",100,Time.now-@start)
58
+ puts ''
59
+ return @mzml
60
+ end
61
+
62
+ def to_xml(file)
63
+ return @mzml.to_xml(file)
64
+ end
65
+
66
+ end
67
+
data/lib/ms/noise.rb ADDED
@@ -0,0 +1,51 @@
1
+
2
+ require 'progress'
3
+ require 'ms/rt/rt_helper'
4
+
5
+ module MS
6
+ module Noise
7
+ module_function
8
+ def noiseify(density,max_mz)
9
+ # spectra is {rt => [[mzs],[ints]]}
10
+ @start = Time.now
11
+ @noise = {}
12
+ r_times = Sim_Spectra.r_times
13
+
14
+ count = 0.0
15
+ r_times.each do |rt|
16
+
17
+ Progress.progress("Adding noise:",(((count/r_times.size)*100).to_i))
18
+
19
+ nmzs = []
20
+ nints = []
21
+
22
+ density.times do
23
+ rmz = RThelper.RandomFloat(0.0,max_mz)
24
+ rint = RThelper.RandomFloat(50,1000)
25
+
26
+ nmzs<<rmz
27
+ nints<<rint
28
+ end
29
+ @noise[rt] = [nmzs,nints]
30
+ count += 1
31
+ end
32
+
33
+ Progress.progress("Adding noise:",100,Time.now-@start)
34
+ puts ''
35
+
36
+ return @noise
37
+ end
38
+
39
+
40
+ def spec_drops(drop_percentage)
41
+ r_times = Sim_Spectra.r_times
42
+ l = r_times.length
43
+ num_drops = drop_percentage * l
44
+ num_drops.to_i.times do
45
+ r_times.delete_at(rand(l+1))
46
+ end
47
+ return r_times
48
+ end
49
+
50
+ end
51
+ end
@@ -0,0 +1,31 @@
1
+
2
+ module RThelper
3
+
4
+ module_function
5
+ def normalized_gaussian(x,mu,sd)
6
+ x = x.to_f
7
+ mu = mu.to_f
8
+ sd = sd.to_f
9
+ return ((1/(Math.sqrt(2*(Math::PI)*(sd**2))))*(Math.exp(-(((x-mu)**2)/((2*sd)**2)))))
10
+ end
11
+
12
+ module_function
13
+ def gaussian(x,mu,sd,h)
14
+ x = x.to_f
15
+ mu = mu.to_f
16
+ sd = sd.to_f
17
+ h = h.to_f
18
+ return h*Math.exp(-(x-mu)**2/(sd**2))
19
+ end
20
+
21
+ module_function
22
+ def RandomFloat(a,b)
23
+ a = a.to_f
24
+ b = b.to_f
25
+ random = rand(2147483647.0) / 2147483647.0
26
+ diff = b - a
27
+ r = random * diff
28
+ return a + r
29
+ end
30
+ end
31
+
@@ -0,0 +1,81 @@
1
+
2
+ require 'time'
3
+ require 'progress'
4
+ require 'ms/sim_feature'
5
+ require 'ms/rt/weka'
6
+ require 'ms/sim_peptide'
7
+ require 'ms/rt/rt_helper'
8
+
9
+ module MS
10
+ module Rtgenerator
11
+
12
+ module_function
13
+ def generateRT(peptides, one_d)
14
+
15
+ @start = Time.now
16
+ @r_times = Sim_Spectra.r_times
17
+
18
+ # Gets retention times from the weka model
19
+ peptides = MS::Weka.predict_rts(peptides)
20
+ MS::Weka.predict_ints(peptides)
21
+
22
+
23
+ #-----------------------------------------------------------------
24
+ peptides.each_with_index do |pep,ind|
25
+ Progress.progress("Generating retention times:",(((ind+1)/peptides.size.to_f)*100).to_i)
26
+
27
+
28
+ #Fit retention times into scan times
29
+ max_rt = @r_times.max
30
+ p_rt = pep.p_rt * 10**-2
31
+ if p_rt > 1
32
+ pep.p_rt = @r_times.max
33
+ pep.p_rt_i = @r_times.index(pep.p_rt)
34
+ else
35
+ pep.p_rt = @r_times.find {|i| i >= (p_rt * max_rt)}
36
+ pep.p_rt_i = @r_times.index(pep.p_rt)
37
+ end
38
+
39
+ if pep.p_rt == nil
40
+ puts "\n\n\t#{pep} TIME-> #{p_rt*max_rt} :: Peptide not predicted in time range: try increasing run time\n\n."
41
+ else
42
+
43
+ #Give peptide retention times
44
+ head_length = nil
45
+ tail_length = nil
46
+ if one_d
47
+ head_length = 300.0
48
+ tail_length = 701
49
+ else
50
+ head_length = 100.0
51
+ tail_length = 300
52
+ end
53
+
54
+ a = @r_times.find {|i| i >= (pep.p_rt-head_length)}
55
+ b = @r_times.find {|i| i >= (pep.p_rt+tail_length)}
56
+ a = @r_times.index(a)
57
+ b = @r_times.index(b)
58
+
59
+ if a == nil
60
+ a = @r_times[0]
61
+ end
62
+
63
+ if b == nil
64
+ b = @r_times[@r_times.length-1]
65
+ end
66
+
67
+ pep.set_rts(a,b)
68
+
69
+ end
70
+ end
71
+ #-----------------------------------------------------------------
72
+
73
+
74
+ Progress.progress("Generating retention times:",100,Time.now-@start)
75
+ puts ""
76
+
77
+ return peptides
78
+
79
+ end
80
+ end
81
+ end
data/lib/ms/rt/weka.rb ADDED
@@ -0,0 +1,150 @@
1
+
2
+ require 'csv'
3
+
4
+ module MS
5
+ module Weka
6
+ #James Dalg
7
+ module_function
8
+ def predict_rts(peptides)
9
+ #mz,charge,intensity,rt,A,R,N,D,B,C,E,Q,Z,G,H,I,L,K,M,F,P,S,T,W,Y,V,J,mass,hydro,pi
10
+ #make arrf file to feed weka model
11
+ data = []
12
+ peptides.each do |pep|
13
+ data<<pep.aa_counts
14
+ end
15
+ arff = make_rt_arff(Time.now.nsec.to_s,data)
16
+ system("java weka.classifiers.functions.MultilayerPerceptron -T #{arff} -l bin/weka/M5Rules.model -p 24 > #{arff}.out")
17
+ system("rm #{arff}")
18
+
19
+ #extract what was predicted by weka model
20
+ file = File.open("#{arff}.out","r")
21
+ count = 0
22
+ while line = file.gets
23
+ if line =~ /(\d*\.\d{0,3}){1}/
24
+ peptides[count].p_rt = line.match(/(\d*\.\d{0,3}){1}/)[0].to_f
25
+ count += 1
26
+ end
27
+ end
28
+ system("rm #{arff}.out")
29
+ return peptides
30
+ end
31
+
32
+
33
+
34
+ def predict_ints(peptides)
35
+ data = []
36
+ peptides.each do |pep|
37
+ array = []
38
+ array<<pep.mono_mz<<pep.charge<<pep.mass<<pep.p_rt
39
+ data << array.concat(pep.aa_counts)
40
+ end
41
+ arff = make_int_arff(Time.now.nsec.to_s,data)
42
+ system("java weka.classifiers.trees.M5P -T #{arff} -l bin/weka/M5P.model -p 27 > #{arff}.out")
43
+ system("rm #{arff}")
44
+
45
+ #extract what was predicted by weka model
46
+ file = File.open("#{arff}.out","r")
47
+ count = 0
48
+ while line = file.gets
49
+ if line =~ /(\d*\.\d{0,3}){1}/
50
+ peptides[count].p_int = line.match(/(\d*\.\d{0,3}){1}/)[0].to_f
51
+ count += 1
52
+ end
53
+ end
54
+ system("rm #{arff}.out")
55
+ return peptides
56
+ end
57
+
58
+
59
+
60
+ #James Dalg
61
+ def make_rt_arff(sourcefile, training)
62
+ sourcefile<<".arff"
63
+ File.open(sourcefile, "wb") do |f| # need to cite f.puts (not %Q)? if so http://www.devdaily.com/blog/post/ruby/how-write-text-to-file-ruby-example
64
+ f.puts %Q{%
65
+ %
66
+ @RELATION molecularinfo
67
+ @ATTRIBUTE A NUMERIC
68
+ @ATTRIBUTE R NUMERIC
69
+ @ATTRIBUTE N NUMERIC
70
+ @ATTRIBUTE D NUMERIC
71
+ @ATTRIBUTE B NUMERIC
72
+ @ATTRIBUTE C NUMERIC
73
+ @ATTRIBUTE E NUMERIC
74
+ @ATTRIBUTE Q NUMERIC
75
+ @ATTRIBUTE Z NUMERIC
76
+ @ATTRIBUTE G NUMERIC
77
+ @ATTRIBUTE H NUMERIC
78
+ @ATTRIBUTE I NUMERIC
79
+ @ATTRIBUTE L NUMERIC
80
+ @ATTRIBUTE K NUMERIC
81
+ @ATTRIBUTE M NUMERIC
82
+ @ATTRIBUTE F NUMERIC
83
+ @ATTRIBUTE P NUMERIC
84
+ @ATTRIBUTE S NUMERIC
85
+ @ATTRIBUTE T NUMERIC
86
+ @ATTRIBUTE W NUMERIC
87
+ @ATTRIBUTE Y NUMERIC
88
+ @ATTRIBUTE V NUMERIC
89
+ @ATTRIBUTE J NUMERIC
90
+ @ATTRIBUTE rt NUMERIC
91
+ @DATA
92
+ %
93
+ % }
94
+ end
95
+ training.each do |innerarray|
96
+ CSV.open(sourcefile, "a") do |csv| #derived from sample code http://www.ruby-doc.org/stdlib-1.9.3/libdoc/csv/rdoc/CSV.html
97
+ csv << innerarray #idea may be slightly attributable to http://www.ruby-forum.com/topic/299571
98
+ end
99
+ end
100
+ return sourcefile
101
+ end
102
+
103
+
104
+ #James Dalg
105
+ def make_int_arff(sourcefile, training)
106
+ sourcefile<<".arff"
107
+ File.open(sourcefile, "wb") do |f| # need to cite f.puts (not %Q)? if so http://www.devdaily.com/blog/post/ruby/how-write-text-to-file-ruby-example
108
+ f.puts %Q{%
109
+ %
110
+ @RELATION molecularinfo
111
+ @ATTRIBUTE mz NUMERIC
112
+ @ATTRIBUTE charge NUMERIC
113
+ @ATTRIBUTE mass NUMERIC
114
+ @ATTRIBUTE rt NUMERIC
115
+ @ATTRIBUTE A NUMERIC
116
+ @ATTRIBUTE R NUMERIC
117
+ @ATTRIBUTE N NUMERIC
118
+ @ATTRIBUTE D NUMERIC
119
+ @ATTRIBUTE B NUMERIC
120
+ @ATTRIBUTE C NUMERIC
121
+ @ATTRIBUTE E NUMERIC
122
+ @ATTRIBUTE Q NUMERIC
123
+ @ATTRIBUTE Z NUMERIC
124
+ @ATTRIBUTE G NUMERIC
125
+ @ATTRIBUTE H NUMERIC
126
+ @ATTRIBUTE I NUMERIC
127
+ @ATTRIBUTE L NUMERIC
128
+ @ATTRIBUTE K NUMERIC
129
+ @ATTRIBUTE M NUMERIC
130
+ @ATTRIBUTE F NUMERIC
131
+ @ATTRIBUTE P NUMERIC
132
+ @ATTRIBUTE S NUMERIC
133
+ @ATTRIBUTE T NUMERIC
134
+ @ATTRIBUTE W NUMERIC
135
+ @ATTRIBUTE Y NUMERIC
136
+ @ATTRIBUTE V NUMERIC
137
+ @ATTRIBUTE intensity NUMERIC
138
+ @DATA
139
+ %
140
+ % }
141
+ end
142
+ training.each do |innerarray|
143
+ CSV.open(sourcefile, "a") do |csv| #derived from sample code http://www.ruby-doc.org/stdlib-1.9.3/libdoc/csv/rdoc/CSV.html
144
+ csv << innerarray #idea may be slightly attributable to http://www.ruby-forum.com/topic/299571
145
+ end
146
+ end
147
+ return sourcefile
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,92 @@
1
+
2
+ module MS
3
+ class Sim_Digester
4
+
5
+ attr_reader :digested_file
6
+ attr_writer :digested_file
7
+
8
+ def initialize(digestor,pH)
9
+ @digestor = digestor
10
+ @pH = pH
11
+ @digested_file = ".#{Time.now.nsec.to_s}"
12
+ end
13
+
14
+ def create_digested_file(file)
15
+ inFile = File.open(file,"r")
16
+ seq = ""
17
+ inFile.each_line do |sequence|
18
+ if sequence =~ />/ or sequence == "\n"
19
+ seq = seq<<";"
20
+ else
21
+ seq = seq<<sequence.chomp
22
+ end
23
+ end
24
+ inFile.close
25
+
26
+ proteins = seq.split(/;/).delete_if{|str| str == ""}
27
+
28
+ trypsin = Mspire::Digester[@digestor]
29
+
30
+ digested = []
31
+ d_file = File.open(@digested_file, "w")
32
+ proteins.each do |prot|
33
+ dig = trypsin.digest(prot)
34
+ dig.each do |d|
35
+ digested<<d
36
+ end
37
+ end
38
+ proteins.clear
39
+ digested.uniq!
40
+
41
+ trun_digested = []
42
+ if digested.length > 50000
43
+ 50000.times do
44
+ trun_digested<<digested[rand(digested.length)]
45
+ end
46
+ digested.clear
47
+ digested = trun_digested
48
+ end
49
+
50
+ digested.each do |dig|
51
+ d_file.puts(dig)
52
+ end
53
+ d_file.close
54
+ num_digested = digested.size
55
+ digested.clear
56
+ puts "Number of peptides: #{num_digested}"
57
+ return num_digested
58
+ end
59
+
60
+ def digest(file)
61
+ start = Time.now
62
+
63
+ num_digested = create_digested_file(file)
64
+
65
+ d_file = File.open(@digested_file, "r")
66
+ i = 0
67
+
68
+ peptides = []
69
+
70
+ d_file.each_line do |peptide_seq|
71
+ peptide_seq.chomp!
72
+ Progress.progress("Creating peptides '#{file}':",((i/num_digested.to_f)*100.0).to_i)
73
+
74
+ charge_ratio = charge_at_pH(identify_potential_charges(peptide_seq), @pH)
75
+ charge_f = charge_ratio.floor
76
+ charge_c = charge_ratio.ceil
77
+
78
+ peptide_f = MS::Peptide.new(peptide_seq, charge_f) if charge_f != 0
79
+ peptide_c = MS::Peptide.new(peptide_seq, charge_c) if charge_c != 0
80
+
81
+ peptides<<peptide_f if charge_f != 0
82
+ peptides<<peptide_c if charge_c != 0
83
+ i += 1
84
+ end
85
+ d_file.close
86
+ File.delete(@digested_file)
87
+ Progress.progress("Creating peptides '#{file}':",100,Time.now-start)
88
+ puts ''
89
+ return peptides
90
+ end
91
+ end
92
+ end