mspire 0.3.1 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. data/Rakefile +2 -2
  2. data/bin/bioworks_to_pepxml.rb +15 -3
  3. data/bin/ms_to_lmat.rb +2 -1
  4. data/bin/sqt_group.rb +26 -0
  5. data/changelog.txt +36 -0
  6. data/lib/ms/msrun.rb +3 -1
  7. data/lib/ms/parser/mzdata/dom.rb +14 -14
  8. data/lib/ms/scan.rb +3 -3
  9. data/lib/mspire.rb +1 -1
  10. data/lib/sample_enzyme.rb +39 -0
  11. data/lib/spec_id.rb +18 -0
  12. data/lib/spec_id/aa_freqs.rb +6 -9
  13. data/lib/spec_id/digestor.rb +16 -17
  14. data/lib/spec_id/mass.rb +63 -1
  15. data/lib/spec_id/parser/proph.rb +101 -2
  16. data/lib/spec_id/precision/filter.rb +3 -2
  17. data/lib/spec_id/precision/filter/cmdline.rb +3 -1
  18. data/lib/spec_id/precision/filter/output.rb +1 -0
  19. data/lib/spec_id/precision/prob.rb +88 -21
  20. data/lib/spec_id/precision/prob/cmdline.rb +28 -16
  21. data/lib/spec_id/precision/prob/output.rb +8 -2
  22. data/lib/spec_id/proph/pep_summary.rb +25 -12
  23. data/lib/spec_id/sequest.rb +28 -0
  24. data/lib/spec_id/sequest/pepxml.rb +142 -197
  25. data/lib/spec_id/sqt.rb +349 -0
  26. data/lib/spec_id/srf.rb +33 -23
  27. data/lib/validator.rb +40 -57
  28. data/lib/validator/aa.rb +3 -90
  29. data/lib/validator/aa_est.rb +112 -0
  30. data/lib/validator/cmdline.rb +163 -31
  31. data/lib/validator/decoy.rb +15 -7
  32. data/lib/validator/digestion_based.rb +5 -4
  33. data/lib/validator/q_value.rb +32 -0
  34. data/script/peps_per_bin.rb +67 -0
  35. data/script/sqt_to_meta.rb +24 -0
  36. data/specs/bin/bioworks_to_pepxml_spec.rb +3 -3
  37. data/specs/bin/fasta_shaker_spec.rb +2 -2
  38. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +7 -10
  39. data/specs/bin/filter_and_validate_spec.rb +25 -6
  40. data/specs/bin/ms_to_lmat_spec.rb +2 -2
  41. data/specs/bin/prob_validate_spec.rb +5 -3
  42. data/specs/sample_enzyme_spec.rb +86 -1
  43. data/specs/spec_helper.rb +11 -9
  44. data/specs/spec_id/bioworks_spec.rb +2 -1
  45. data/specs/spec_id/precision/filter_spec.rb +5 -5
  46. data/specs/spec_id/precision/prob_spec.rb +0 -67
  47. data/specs/spec_id/proph/pep_summary_spec.rb +42 -87
  48. data/specs/spec_id/protein_summary_spec.rb +4 -4
  49. data/specs/spec_id/sequest/pepxml_spec.rb +1 -79
  50. data/specs/spec_id/sequest_spec.rb +38 -0
  51. data/specs/spec_id/sqt_spec.rb +111 -3
  52. data/specs/spec_id_spec.rb +2 -0
  53. data/specs/transmem/phobius_spec.rb +3 -1
  54. data/specs/transmem/toppred_spec.rb +1 -1
  55. data/specs/validator/aa_est_spec.rb +66 -0
  56. data/specs/validator/aa_spec.rb +1 -68
  57. data/specs/validator/background_spec.rb +2 -0
  58. data/specs/validator/bias_spec.rb +3 -27
  59. data/specs/validator/decoy_spec.rb +2 -2
  60. data/specs/validator/transmem_spec.rb +2 -1
  61. data/test_files/small.sqt +87 -0
  62. metadata +312 -293
@@ -3,10 +3,12 @@ require 'validator'
3
3
  class Validator::Decoy < Validator
4
4
  include Precision::Calculator::Decoy
5
5
 
6
+ # a Regexp (if concatenated) or a String (the filename of separate run)
6
7
  attr_accessor :constraint
7
8
 
8
9
  attr_accessor :decoy_on_match
9
10
  attr_accessor :correct_wins
11
+ attr_accessor :decoy_to_target_ratio
10
12
 
11
13
  attr_accessor :last_pep_was_decoy
12
14
 
@@ -16,13 +18,19 @@ class Validator::Decoy < Validator
16
18
 
17
19
  attr_reader :normal_peps_just_submitted
18
20
 
19
- def initialize(constraint=nil, decoy_on_match = true, correct_wins = true)
20
- @decoy_on_match = decoy_on_match
21
- @correct_wins = correct_wins
22
- @constraint = constraint
21
+ DEFAULTS = {
22
+ :decoy_on_match => true,
23
+ :correct_wins => true,
24
+ :decoy_to_target_ratio => 1.0,
25
+ }
26
+
27
+ def initialize(opts={})
28
+ merged = DEFAULTS.merge(opts)
29
+ @constraint, @decoy_on_match, @correct_wins, @decoy_to_target_ratio = merged.values_at(:constraint, :decoy_on_match, :correct_wins, :decoy_to_target_ratio)
23
30
  end
24
31
 
25
32
  # returns [normal, decoy] (?? I think ??)
33
+ # reads the full protein reference
26
34
  def partition(peps)
27
35
  if @decoy_on_match
28
36
  if @correct_wins
@@ -74,15 +82,15 @@ class Validator::Decoy < Validator
74
82
  @normal_peps_just_submitted = normal
75
83
  @increment_normal += normal.size
76
84
  @increment_decoy += decoy.size
77
- calc_precision(@increment_normal, @increment_decoy)
85
+ calc_precision(@increment_normal, @increment_decoy, @decoy_to_target_ratio)
78
86
  end
79
87
 
80
88
  def pephit_precision(peps, separate_peps=nil)
81
89
  if separate_peps
82
- calc_precision(peps.size, separate_peps.size)
90
+ calc_precision(peps.size, separate_peps.size, @decoy_to_target_ratio)
83
91
  else
84
92
  (norm, decoy) = partition(peps)
85
- calc_precision(norm.size, decoy.size)
93
+ calc_precision(norm.size, decoy.size, @decoy_to_target_ratio)
86
94
  end
87
95
  end
88
96
 
@@ -6,7 +6,8 @@ require 'spec_id/sequest/params'
6
6
  # SpecID::Pep objects using the pephit_precision method.
7
7
  class Validator::DigestionBased < Validator
8
8
  DEFAULTS = {
9
- :false_to_total_ratio => 1.0,
9
+ #:false_to_total_ratio => 1.0, # disable because this needs to be set
10
+ # explicitly
10
11
  :background => 0.0,
11
12
  }
12
13
 
@@ -42,13 +43,13 @@ class Validator::DigestionBased < Validator
42
43
  # returns [num_tps, num_fps]
43
44
  def calc_precision_prep(num_tps, num_fps)
44
45
  total_peps_passing_partition = num_tps + num_fps
45
- num_fps = adjust_fps_for_background(num_tps, num_fps, @background)
46
+ num_fps = adjust_fps_for_background(num_tps, num_fps, background)
46
47
  ## we must use the false_to_total_ratio to estimate how many are really
47
48
  ## incorrect!
48
49
  # FALSE/TOTAL = FALSE(found)/TOTAL(found)
49
50
  # TOTAL(found) = FALSE(found) * TOTAL/FALSE
50
51
  # = FALSE(found) / (FALSE/TOTAL)
51
- total_false = num_fps / @false_to_total_ratio
52
+ total_false = num_fps / false_to_total_ratio
52
53
  # NOTE: the partition algorithm drops peptides that are transmembrane
53
54
  # under certain options. Thus, the total false estimate must be tempered
54
55
  # by this lower number of total peptides.
@@ -60,7 +61,7 @@ class Validator::DigestionBased < Validator
60
61
  # assumes partition returns (tps, fps)
61
62
  def set_false_to_total_ratio(peps)
62
63
  (tps, fps) = partition(peps)
63
- @false_to_total_ratio = fps.size.to_f / (tps.size + fps.size)
64
+ self.false_to_total_ratio = fps.size.to_f / (tps.size + fps.size)
64
65
  self
65
66
  end
66
67
 
@@ -0,0 +1,32 @@
1
+
2
+
3
+ # from percolator
4
+ # This is a trivial class (since q-values are so straightforward with regards
5
+ # to precision), but it allows us to work with q-values using the same
6
+ # interface as all other validators
7
+ class Validator::QValue
8
+
9
+ # objs should respond_to :q_value
10
+ # q-values: 0.0 means no false discoveries, 0.5 means 50% false discoveries
11
+ # 1 - (the largest q value) is the precision
12
+ def precision(objs)
13
+ return 1.0 if objs.size == 0
14
+ largest_q_value = objs.map {|v| v.q_value }.max
15
+ prec = 1.0 - largest_q_value
16
+ end
17
+
18
+
19
+ # objs should respond_to :q_value
20
+ # These should be added from low q-value to high q-value
21
+ # The last q-value added determines the precision
22
+ def increment_precision(objs)
23
+ if objs.is_a?(SpecID::Pep) or objs.is_a?(SpecID::Prot)
24
+ objs = [objs]
25
+ end
26
+ precision(objs)
27
+ end
28
+
29
+ alias_method :pephit_precision, :precision
30
+ alias_method :prothit_precision, :precision
31
+ alias_method :increment_pephits_precision, :increment_precision
32
+ end
@@ -0,0 +1,67 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ require 'generator'
4
+ require 'optparse'
5
+
6
+ require 'fasta'
7
+ require 'sample_enzyme'
8
+ require 'spec_id/digestor'
9
+ require 'spec_id/mass'
10
+ require 'vec'
11
+
12
+ opt = {}
13
+ opt[:missed_cleavages] = 0 # ~ parts per million
14
+ opt[:bin_size] = 0.001 # ~ parts per million
15
+ opt[:min] = 300.0
16
+ opt[:max] = 4500.0
17
+ opt[:h_plus] = 1.0
18
+
19
+ opts = OptionParser.new do |op|
20
+ op.banner = "usage: #{File.basename(__FILE__)} *.fasta"
21
+ op.separator "Outputs a close estimate of number of peptides per bin."
22
+ op.separator "Uses m+H+ as the peptide mass."
23
+ op.separator "[for speed, assumes that there is a peptide mass close to the extremes]"
24
+ op.on("-b", "--bin_size <F>", Float, "size of bins [#{opt[:bin_size]}]") {|v| opt[:bin_size] = v }
25
+ op.on("-x", "--max <F>", Float, "max mass to accept [#{opt[:max]}]") {|v| opt[:max] = v }
26
+ op.on("-n", "--min <F>", Float, "min mass to accept [#{opt[:min]}]") {|v| opt[:min] = v }
27
+ op.on("-h", "--h_plus <F>", Float, "value of H+ to use [#{opt[:h_plus]}]") {|v| opt[:h_plus] = v }
28
+ op.on("-m", "--missed_cleavages <N>", Integer, "num missed cleavages [#{opt[:missed_cleavages]}]") {|v| opt[:missed_cleavages] = v }
29
+ end
30
+
31
+ opts.parse!
32
+
33
+ if ARGV.size == 0
34
+ puts opts.to_s
35
+ exit
36
+ end
37
+
38
+ min_mass = opt[:min]
39
+ max_mass = opt[:max]
40
+
41
+ ARGV.each do |file|
42
+ fasta = Fasta.new(file)
43
+ uniq_aaseqs = fasta.map do |prot|
44
+ SampleEnzyme.tryptic(prot.aaseq, opt[:missed_cleavages])
45
+ end.flatten.uniq
46
+
47
+ masses = Mass::Calculator.new(Mass::MONO, opt[:h_plus]).masses(uniq_aaseqs)
48
+ passing_masses = Mass::Calculator.new(Mass::MONO, opt[:h_plus]).masses(uniq_aaseqs).select do |mh|
49
+ ((mh >= min_mass) and (mh <= max_mass))
50
+ end
51
+
52
+ ## warn if the masses aren't close to the end points
53
+ if (max_mass - passing_masses.max) > 1.0
54
+ warn "highest mass is not that close to max: #{passing_masses.max}"
55
+ end
56
+ if (passing_masses.min - min_mass) > 1.0
57
+ warn "lowest mass is not that close to min: #{passing_masses.min}"
58
+ end
59
+
60
+ num_bins = (max_mass - min_mass) / opt[:bin_size]
61
+
62
+ (bins, freqs) = VecD.new(passing_masses).histogram(num_bins)
63
+
64
+ # report
65
+ puts "#{file}: #{freqs.avg}"
66
+
67
+ end
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/ruby -s
2
+
3
+ require 'optparse'
4
+
5
+ $outfile = 'meta.sqm'
6
+ opts = OptionParser.new do |op|
7
+ op.banner = "usage: #{File.basename(__FILE__)} <file>.sqt ..."
8
+ op.separator "outputs meta.sqm (a sqt meta file)"
9
+ op.on("-o", "--outfile <file>", "currently: #{$outfile}") {|v| $outfile = v}
10
+ end
11
+
12
+ opts.parse!
13
+
14
+ if ARGV.size == 0
15
+ puts opts.to_s
16
+ exit
17
+ end
18
+
19
+ File.open($outfile, 'w') do |out|
20
+ ARGV.each do |file|
21
+ out.puts File.expand_path(file)
22
+ end
23
+ end
24
+
@@ -41,7 +41,7 @@ describe 'bioworks_to_pepxml.rb' do
41
41
  cmd = "#{@cmd} -p #{@tf_params} -o #{@out_path} #{@tf_bioworks_xml} -m #{@tf_mzxml_path} -d /work/special/path --copy_mzxml"
42
42
  ## FILES EXIST:
43
43
  prc = proc {|file|
44
- file.should exist
44
+ file.exist_as_a_file?.should be_true
45
45
  beginning = IO.readlines(file)[0,50].join("\n")
46
46
  $XML_SANITY_LINES.each do |line|
47
47
  beginning.should include(line)
@@ -55,7 +55,7 @@ describe 'bioworks_to_pepxml.rb' do
55
55
  ## COPY MZXML:
56
56
  %w(000 020).each do |file|
57
57
  mzxml_file = File.join(@out_path, "#{file}.mzXML")
58
- mzxml_file.should exist
58
+ mzxml_file.exist_as_a_file?.should be_true
59
59
  end
60
60
  ## CLEANUP:
61
61
  unless @no_delete then FileUtils.rm_rf(@out_path) end
@@ -68,7 +68,7 @@ describe 'bioworks_to_pepxml.rb' do
68
68
  db_re = /C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta/
69
69
  IO.read(@tf_params).should =~ db_re
70
70
  prc = proc {|file|
71
- file.should exist
71
+ file.exist_as_a_file?.should be_true
72
72
  IO.read(file).should_not =~ db_re
73
73
  }
74
74
  _basic(cmd, prc)
@@ -200,13 +200,13 @@ EDITPEP
200
200
  end
201
201
 
202
202
  def fastalns(fn)
203
- fn.should exist
203
+ fn.exist_as_a_file?.should be_true
204
204
  IO.read(fn).split("\n")
205
205
  end
206
206
 
207
207
  # returns the fasta object proteins
208
208
  def fastap(fn)
209
- @f.should exist
209
+ @f.exist_as_a_file?.should be_true
210
210
  Fasta.new(fn).prots
211
211
  end
212
212
 
@@ -2,10 +2,10 @@
2
2
  pephits_precision:
3
3
  - validator: decoy
4
4
  value: 0.992932862190813
5
- - validator: badAA
5
+ - validator: badAAEst
6
6
  value: 0.178006237270664
7
- - validator: badAA
8
- value: -0.0247654296463377
7
+ - validator: badAAEst
8
+ value: -0.0247654296463379
9
9
  - validator: badAA
10
10
  value: 0.301413862599215
11
11
  - validator: bias
@@ -94,22 +94,19 @@ params:
94
94
  :decoy_on_match: true
95
95
  :correct_wins: true
96
96
  - :calculated_background: 0.127208480565371
97
- :type: badAA
98
- :class: Validator::AA
97
+ :type: badAAEst
98
+ :class: Validator::AAEst
99
99
  :background: 0.001
100
100
  :frequency: 0.0147528119278054
101
- :false_to_total_ratio: 1.0
102
101
  - :calculated_background: 0.402826855123675
103
- :type: badAA
104
- :class: Validator::AA
102
+ :type: badAAEst
103
+ :class: Validator::AAEst
105
104
  :background: 0.0
106
105
  :frequency: 0.0463510332199843
107
- :false_to_total_ratio: 1.0
108
106
  - :calculated_background: 0.127208480565371
109
107
  :type: badAA
110
108
  :class: Validator::AA
111
109
  :background: 0.001
112
- :frequency:
113
110
  :false_to_total_ratio: 0.180662732637313
114
111
  - :calculated_background: 0.773851590106007
115
112
  :type: bias
@@ -1,3 +1,5 @@
1
+ require 'yaml'
2
+
1
3
  require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
4
 
3
5
  require 'spec_id/precision/filter'
@@ -80,7 +82,7 @@ describe 'filter_and_validate.rb on small bioworks file' do
80
82
  `#{run_normal}`
81
83
  end
82
84
  structs = [ht_file, hs_file].map do |file|
83
- file.should exist
85
+ file.exist_as_a_file?.should be_true
84
86
  struct = YAML.load_file(file)
85
87
  File.unlink file
86
88
  struct
@@ -104,7 +106,7 @@ describe 'filter_and_validate.rb on small bioworks file' do
104
106
 
105
107
  it 'handles multiple validators of the same kind (except, of course, decoy)' do
106
108
 
107
- struct = @st_to_yaml.call( "#{@fake_bioworks_file} --proteins -1 0.0 -2 0.0 -3 0.0 -d 0.01 -p 1000000 --decoy /^DECOY_/ --digestion #{@small_fasta_file},#{@params_file} --bad_aa C,true,0.001 --bad_aa E,true --bad_aa C,false,0.001 --bias #{@small_bias_fasta_file},true --bias #{@small_bias_fasta_file},false --bias #{@small_bias_fasta_file},true,0.2 --tmm #{@phobius_file},1,true,0.8,0.2 --tmm #{@phobius_file} --tmm #{@toppred_file},3,true,false --tmm #{@toppred_file} --tps #{@small_bias_fasta_file} -o text_table:#{@table_output_file} " )
109
+ struct = @st_to_yaml.call( "#{@fake_bioworks_file} --proteins -1 0.0 -2 0.0 -3 0.0 -d 0.01 -p 1000000 --decoy /^DECOY_/ --digestion #{@small_fasta_file},#{@params_file} --bad_aa_est C,0.001 --bad_aa_est E --bad_aa C,0.001 --bias #{@small_bias_fasta_file},true --bias #{@small_bias_fasta_file},false --bias #{@small_bias_fasta_file},true,0.2 --fasta #{@small_fasta_file} --tmm #{@phobius_file},1,true,0.8,0.2 --tmm #{@phobius_file} --tmm #{@toppred_file},3,true,false --tmm #{@toppred_file} --tps #{@small_bias_fasta_file} -o text_table:#{@table_output_file} " )
108
110
  frozen = YAML.load_file( File.dirname(__FILE__) + "/filter_and_validate__multiple_vals_helper.yaml" )
109
111
 
110
112
  ## Pephits precision:
@@ -121,8 +123,25 @@ describe 'filter_and_validate.rb on small bioworks file' do
121
123
  frp = frozen['params']
122
124
  stp = struct['params']
123
125
 
126
+ #puts "frozen validators:"
127
+ #p frp['validators']
128
+
129
+ #puts "seen validators:"
130
+ #p stp['validators']
131
+
124
132
  frp['validators'].zip(stp['validators']) do |f,s|
125
- f.should == s
133
+ if f.is_a? Hash
134
+ f.keys.each do |k|
135
+ if k == :file or k == :transmem_file
136
+ File.basename(f[k]).should == File.basename(s[k].gsub('\\','/'))
137
+ else
138
+ s[k].should == f[k]
139
+ #f[k].should == s[k]
140
+ end
141
+ end
142
+ else
143
+ f.should == s
144
+ end
126
145
  end
127
146
 
128
147
  %w(ties prefilter top_hit_by decoy_on_match postfilter include_ties_in_top_hit_postfilter hits_together proteins include_ties_in_top_hit_prefilter).each do |k|
@@ -148,9 +167,9 @@ describe 'filter_and_validate.rb on small bioworks file' do
148
167
  text_table = IO.read(@table_output_file)
149
168
 
150
169
  # frozen
151
- headings_re = Regexp.new( %w(num decoy badAA badAA badAA bias bias bias tmm tmm tmm tmm tps).join("\\s+") )
152
- data_re = Regexp.new( %w(peps 283 0.993 0.178 -0.025 0.301 0.195 -4.793 0.403 0.438 -0.267 -0.156 -0.020 0.226).join("\\s+") )
153
- prot_re = Regexp.new( %w(106 0.972 0.019 0.0 0.038 0.019 0.0 0.094 0.123 0.0 0.0 0.0 0.028).join("\\s+") )
170
+ headings_re = Regexp.new( %w(num decoy badAAEst badAAEst badAA bias bias bias tmm tmm tmm tmm tps).join("\\s+") )
171
+ data_re = Regexp.new( %w(peps 283 0.993 0.178006 -0.024765 0.301 0.195 -4.793 0.403 0.438 -0.267 -0.156 -0.020 0.226).join("\\s+") )
172
+ prot_re = Regexp.new( %w(106 0.972 0.018868 0.0 0.038 0.019 0.0 0.094 0.123 0.0 0.0 0.0 0.028).join("\\s+") )
154
173
  text_table.should =~ headings_re
155
174
  text_table.should =~ data_re
156
175
  text_table.should =~ prot_re
@@ -16,7 +16,7 @@ describe 'ms_to_lmat.rb' do
16
16
  cmd = "#{@cmd} #{@mzxml} --ascii"
17
17
  `#{cmd}`
18
18
  newfile = @mzxml.sub(".mzXML", ".lmata")
19
- newfile.should exist
19
+ newfile.exist_as_a_file?.should be_true
20
20
  IO.read(newfile).should == IO.read(@ans_lmata)
21
21
  File.unlink(newfile)
22
22
  end
@@ -26,7 +26,7 @@ describe 'ms_to_lmat.rb' do
26
26
  cmd = "#{@cmd} #{@mzxml}"
27
27
  `#{cmd}`
28
28
  newfile = @mzxml.sub(".mzXML", ".lmat")
29
- newfile.should exist
29
+ newfile.exist_as_a_file?.should be_true
30
30
  IO.read(newfile).should == IO.read(@ans_lmat)
31
31
  File.unlink(newfile)
32
32
  end
@@ -1,3 +1,5 @@
1
+ require 'yaml'
2
+
1
3
  require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
4
 
3
5
  require 'spec_id/precision/prob'
@@ -47,19 +49,19 @@ describe 'filter_and_validate.rb on small bioworks file' do
47
49
  it 'responds to --prob init' do
48
50
  normal = @st_to_yaml.call( @args + " --prob" )
49
51
 
50
- normal[:pephits_precision].first[:values].zip([1.0, 1.0, 0.996655518394649, 0.918918918918919]) do |got,exp|
52
+ normal[:pephits_precision].first[:values].zip([1.0, 1.0, 0.993333333333333, 0.85]) do |got,exp|
51
53
  got.should be_close(exp, 0.000000000001)
52
54
  end
53
55
  #normal_nsp = @st_to_yaml.call( @args + " --prob nsp" )
54
56
  #normal.should == normal_nsp
55
57
  init = @st_to_yaml.call( @args + " --prob init" )
56
58
  init.should_not == normal
57
- init[:pephits_precision].first[:values].zip([1.0, 0.974358974358974, 0.981324278438031, 0.890429958391123]) do |got,exp|
59
+ init[:pephits_precision].first[:values].zip([1.0, 0.95, 0.963333333333333, 0.8025]) do |got,exp|
58
60
  got.should be_close(exp, 0.000000000001)
59
61
  end
60
62
  with_sort_by = @st_to_yaml.call( @args + " --prob nsp --sort_by_init" )
61
63
  # frozen
62
- with_sort_by[:pephits_precision].first[:values].zip([1.0, 0.994974874371859, 0.996655518394649, 0.918918918918919]) do |got,exp|
64
+ with_sort_by[:pephits_precision].first[:values].zip([1.0, 0.99, 0.993333333333333, 0.85]) do |got,exp|
63
65
  got.should be_close(exp, 0.000000000001)
64
66
  end
65
67
  end
@@ -33,9 +33,94 @@ describe SampleEnzyme, "digesting sequences" do
33
33
  peps = SampleEnzyme.new('trypsin').digest(st, 2)
34
34
  peps.select {|aaseq| aaseq == 'CCCCK'}.size.should == 2
35
35
  end
36
-
37
36
 
38
37
  end
39
38
 
39
+ describe SampleEnzyme, 'making enzyme calculations on sequences and aaseqs' do
40
+
41
+ before(:each) do
42
+ @full_KRP = SampleEnzyme.new do |se|
43
+ se.name = 'trypsin'
44
+ se.cut = 'KR'
45
+ se.no_cut = 'P'
46
+ se.sense = 'C'
47
+ end
48
+ @just_KR = SampleEnzyme.new do |se|
49
+ se.name = 'trypsin'
50
+ se.cut = 'KR'
51
+ se.no_cut = ''
52
+ se.sense = 'C'
53
+ end
54
+ end
55
+
56
+ it 'calculates the number of tolerant termini' do
57
+ exp = [{
58
+ # full KR/P
59
+ 'K.EPTIDR.E' => 2,
60
+ 'K.PEPTIDR.E' => 1,
61
+ 'F.EEPTIDR.E' => 1,
62
+ 'F.PEPTIDW.R' => 0,
63
+ },
64
+ {
65
+ # just KR
66
+ 'K.EPTIDR.E' => 2,
67
+ 'K.PEPTIDR.E' => 2,
68
+ 'F.EEPTIDR.E' => 1,
69
+ 'F.PEPTIDW.R' => 0,
70
+ }
71
+ ]
72
+ scall = Sequest::PepXML::SearchHit
73
+ sample_enzyme_ar = [@full_KRP, @just_KR]
74
+ sample_enzyme_ar.zip(exp) do |sample_enzyme,hash|
75
+ hash.each do |seq, val|
76
+ sample_enzyme.num_tol_term(seq).should == val
77
+ end
78
+ end
79
+ end
80
+
81
+ it 'calculates number of missed cleavages' do
82
+ exp = [{
83
+ "EPTIDR" => 0,
84
+ "PEPTIDR" => 0,
85
+ "EEPTIDR" => 0,
86
+ "PEPTIDW" => 0,
87
+ "PERPTIDW" => 0,
88
+ "PEPKPTIDW" => 0,
89
+ "PEPKTIDW" => 1,
90
+ "RTTIDR" => 1,
91
+ "RTTIKK" => 2,
92
+ "PKEPRTIDW" => 2,
93
+ "PKEPRTIDKP" => 2,
94
+ "PKEPRAALKPEERPTIDKW" => 3,
95
+ },
96
+ {
97
+ "EPTIDR" => 0,
98
+ "PEPTIDR" => 0,
99
+ "EEPTIDR" => 0,
100
+ "PEPTIDW" => 0,
101
+ "PERPTIDW" => 1,
102
+ "PEPKPTIDW" => 1,
103
+ "PEPKTIDW" => 1,
104
+ "RTTIDR" => 1,
105
+ "RTTIKK" => 2,
106
+ "PKEPRTIDW" => 2,
107
+ "PKEPRTIDKP" => 3,
108
+ "PKEPRAALKPEERPTIDKW" => 5,
109
+ }
110
+ ]
111
+
112
+ sample_enzyme_ar = [@full_KRP, @just_KR]
113
+ sample_enzyme_ar.zip(exp) do |sample_enzyme, hash|
114
+ hash.each do |aaseq, val|
115
+ #first, middle, last = SpecID::Pep.split_sequence(seq)
116
+ # note that we are only using the middle section!
117
+ sample_enzyme.num_missed_cleavages(aaseq).should == val
118
+ end
119
+ end
120
+ end
121
+
122
+ end
123
+
124
+
40
125
 
41
126