mspire 0.3.1 → 0.3.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. data/Rakefile +2 -2
  2. data/bin/bioworks_to_pepxml.rb +15 -3
  3. data/bin/ms_to_lmat.rb +2 -1
  4. data/bin/sqt_group.rb +26 -0
  5. data/changelog.txt +36 -0
  6. data/lib/ms/msrun.rb +3 -1
  7. data/lib/ms/parser/mzdata/dom.rb +14 -14
  8. data/lib/ms/scan.rb +3 -3
  9. data/lib/mspire.rb +1 -1
  10. data/lib/sample_enzyme.rb +39 -0
  11. data/lib/spec_id.rb +18 -0
  12. data/lib/spec_id/aa_freqs.rb +6 -9
  13. data/lib/spec_id/digestor.rb +16 -17
  14. data/lib/spec_id/mass.rb +63 -1
  15. data/lib/spec_id/parser/proph.rb +101 -2
  16. data/lib/spec_id/precision/filter.rb +3 -2
  17. data/lib/spec_id/precision/filter/cmdline.rb +3 -1
  18. data/lib/spec_id/precision/filter/output.rb +1 -0
  19. data/lib/spec_id/precision/prob.rb +88 -21
  20. data/lib/spec_id/precision/prob/cmdline.rb +28 -16
  21. data/lib/spec_id/precision/prob/output.rb +8 -2
  22. data/lib/spec_id/proph/pep_summary.rb +25 -12
  23. data/lib/spec_id/sequest.rb +28 -0
  24. data/lib/spec_id/sequest/pepxml.rb +142 -197
  25. data/lib/spec_id/sqt.rb +349 -0
  26. data/lib/spec_id/srf.rb +33 -23
  27. data/lib/validator.rb +40 -57
  28. data/lib/validator/aa.rb +3 -90
  29. data/lib/validator/aa_est.rb +112 -0
  30. data/lib/validator/cmdline.rb +163 -31
  31. data/lib/validator/decoy.rb +15 -7
  32. data/lib/validator/digestion_based.rb +5 -4
  33. data/lib/validator/q_value.rb +32 -0
  34. data/script/peps_per_bin.rb +67 -0
  35. data/script/sqt_to_meta.rb +24 -0
  36. data/specs/bin/bioworks_to_pepxml_spec.rb +3 -3
  37. data/specs/bin/fasta_shaker_spec.rb +2 -2
  38. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +7 -10
  39. data/specs/bin/filter_and_validate_spec.rb +25 -6
  40. data/specs/bin/ms_to_lmat_spec.rb +2 -2
  41. data/specs/bin/prob_validate_spec.rb +5 -3
  42. data/specs/sample_enzyme_spec.rb +86 -1
  43. data/specs/spec_helper.rb +11 -9
  44. data/specs/spec_id/bioworks_spec.rb +2 -1
  45. data/specs/spec_id/precision/filter_spec.rb +5 -5
  46. data/specs/spec_id/precision/prob_spec.rb +0 -67
  47. data/specs/spec_id/proph/pep_summary_spec.rb +42 -87
  48. data/specs/spec_id/protein_summary_spec.rb +4 -4
  49. data/specs/spec_id/sequest/pepxml_spec.rb +1 -79
  50. data/specs/spec_id/sequest_spec.rb +38 -0
  51. data/specs/spec_id/sqt_spec.rb +111 -3
  52. data/specs/spec_id_spec.rb +2 -0
  53. data/specs/transmem/phobius_spec.rb +3 -1
  54. data/specs/transmem/toppred_spec.rb +1 -1
  55. data/specs/validator/aa_est_spec.rb +66 -0
  56. data/specs/validator/aa_spec.rb +1 -68
  57. data/specs/validator/background_spec.rb +2 -0
  58. data/specs/validator/bias_spec.rb +3 -27
  59. data/specs/validator/decoy_spec.rb +2 -2
  60. data/specs/validator/transmem_spec.rb +2 -1
  61. data/test_files/small.sqt +87 -0
  62. metadata +312 -293
@@ -3,10 +3,12 @@ require 'validator'
3
3
  class Validator::Decoy < Validator
4
4
  include Precision::Calculator::Decoy
5
5
 
6
+ # a Regexp (if concatenated) or a String (the filename of separate run)
6
7
  attr_accessor :constraint
7
8
 
8
9
  attr_accessor :decoy_on_match
9
10
  attr_accessor :correct_wins
11
+ attr_accessor :decoy_to_target_ratio
10
12
 
11
13
  attr_accessor :last_pep_was_decoy
12
14
 
@@ -16,13 +18,19 @@ class Validator::Decoy < Validator
16
18
 
17
19
  attr_reader :normal_peps_just_submitted
18
20
 
19
- def initialize(constraint=nil, decoy_on_match = true, correct_wins = true)
20
- @decoy_on_match = decoy_on_match
21
- @correct_wins = correct_wins
22
- @constraint = constraint
21
+ DEFAULTS = {
22
+ :decoy_on_match => true,
23
+ :correct_wins => true,
24
+ :decoy_to_target_ratio => 1.0,
25
+ }
26
+
27
+ def initialize(opts={})
28
+ merged = DEFAULTS.merge(opts)
29
+ @constraint, @decoy_on_match, @correct_wins, @decoy_to_target_ratio = merged.values_at(:constraint, :decoy_on_match, :correct_wins, :decoy_to_target_ratio)
23
30
  end
24
31
 
25
32
  # returns [normal, decoy] (?? I think ??)
33
+ # reads the full protein reference
26
34
  def partition(peps)
27
35
  if @decoy_on_match
28
36
  if @correct_wins
@@ -74,15 +82,15 @@ class Validator::Decoy < Validator
74
82
  @normal_peps_just_submitted = normal
75
83
  @increment_normal += normal.size
76
84
  @increment_decoy += decoy.size
77
- calc_precision(@increment_normal, @increment_decoy)
85
+ calc_precision(@increment_normal, @increment_decoy, @decoy_to_target_ratio)
78
86
  end
79
87
 
80
88
  def pephit_precision(peps, separate_peps=nil)
81
89
  if separate_peps
82
- calc_precision(peps.size, separate_peps.size)
90
+ calc_precision(peps.size, separate_peps.size, @decoy_to_target_ratio)
83
91
  else
84
92
  (norm, decoy) = partition(peps)
85
- calc_precision(norm.size, decoy.size)
93
+ calc_precision(norm.size, decoy.size, @decoy_to_target_ratio)
86
94
  end
87
95
  end
88
96
 
@@ -6,7 +6,8 @@ require 'spec_id/sequest/params'
6
6
  # SpecID::Pep objects using the pephit_precision method.
7
7
  class Validator::DigestionBased < Validator
8
8
  DEFAULTS = {
9
- :false_to_total_ratio => 1.0,
9
+ #:false_to_total_ratio => 1.0, # disable because this needs to be set
10
+ # explicitly
10
11
  :background => 0.0,
11
12
  }
12
13
 
@@ -42,13 +43,13 @@ class Validator::DigestionBased < Validator
42
43
  # returns [num_tps, num_fps]
43
44
  def calc_precision_prep(num_tps, num_fps)
44
45
  total_peps_passing_partition = num_tps + num_fps
45
- num_fps = adjust_fps_for_background(num_tps, num_fps, @background)
46
+ num_fps = adjust_fps_for_background(num_tps, num_fps, background)
46
47
  ## we must use the false_to_total_ratio to estimate how many are really
47
48
  ## incorrect!
48
49
  # FALSE/TOTAL = FALSE(found)/TOTAL(found)
49
50
  # TOTAL(found) = FALSE(found) * TOTAL/FALSE
50
51
  # = FALSE(found) / (FALSE/TOTAL)
51
- total_false = num_fps / @false_to_total_ratio
52
+ total_false = num_fps / false_to_total_ratio
52
53
  # NOTE: the partition algorithm drops peptides that are transmembrane
53
54
  # under certain options. Thus, the total false estimate must be tempered
54
55
  # by this lower number of total peptides.
@@ -60,7 +61,7 @@ class Validator::DigestionBased < Validator
60
61
  # assumes partition returns (tps, fps)
61
62
  def set_false_to_total_ratio(peps)
62
63
  (tps, fps) = partition(peps)
63
- @false_to_total_ratio = fps.size.to_f / (tps.size + fps.size)
64
+ self.false_to_total_ratio = fps.size.to_f / (tps.size + fps.size)
64
65
  self
65
66
  end
66
67
 
@@ -0,0 +1,32 @@
1
+
2
+
3
+ # from percolator
4
+ # This is a trivial class (since q-values are so straightforward with regards
5
+ # to precision), but it allows us to work with q-values using the same
6
+ # interface as all other validators
7
+ class Validator::QValue
8
+
9
+ # objs should respond_to :q_value
10
+ # q-values: 0.0 means no false discoveries, 0.5 means 50% false discoveries
11
+ # 1 - (the largest q value) is the precision
12
+ def precision(objs)
13
+ return 1.0 if objs.size == 0
14
+ largest_q_value = objs.map {|v| v.q_value }.max
15
+ prec = 1.0 - largest_q_value
16
+ end
17
+
18
+
19
+ # objs should respond_to :q_value
20
+ # These should be added from low q-value to high q-value
21
+ # The last q-value added determines the precision
22
+ def increment_precision(objs)
23
+ if objs.is_a?(SpecID::Pep) or objs.is_a?(SpecID::Prot)
24
+ objs = [objs]
25
+ end
26
+ precision(objs)
27
+ end
28
+
29
+ alias_method :pephit_precision, :precision
30
+ alias_method :prothit_precision, :precision
31
+ alias_method :increment_pephits_precision, :increment_precision
32
+ end
@@ -0,0 +1,67 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ require 'generator'
4
+ require 'optparse'
5
+
6
+ require 'fasta'
7
+ require 'sample_enzyme'
8
+ require 'spec_id/digestor'
9
+ require 'spec_id/mass'
10
+ require 'vec'
11
+
12
+ opt = {}
13
+ opt[:missed_cleavages] = 0 # ~ parts per million
14
+ opt[:bin_size] = 0.001 # ~ parts per million
15
+ opt[:min] = 300.0
16
+ opt[:max] = 4500.0
17
+ opt[:h_plus] = 1.0
18
+
19
+ opts = OptionParser.new do |op|
20
+ op.banner = "usage: #{File.basename(__FILE__)} *.fasta"
21
+ op.separator "Outputs a close estimate of number of peptides per bin."
22
+ op.separator "Uses m+H+ as the peptide mass."
23
+ op.separator "[for speed, assumes that there is a peptide mass close to the extremes]"
24
+ op.on("-b", "--bin_size <F>", Float, "size of bins [#{opt[:bin_size]}]") {|v| opt[:bin_size] = v }
25
+ op.on("-x", "--max <F>", Float, "max mass to accept [#{opt[:max]}]") {|v| opt[:max] = v }
26
+ op.on("-n", "--min <F>", Float, "min mass to accept [#{opt[:min]}]") {|v| opt[:min] = v }
27
+ op.on("-h", "--h_plus <F>", Float, "value of H+ to use [#{opt[:h_plus]}]") {|v| opt[:h_plus] = v }
28
+ op.on("-m", "--missed_cleavages <N>", Integer, "num missed cleavages [#{opt[:missed_cleavages]}]") {|v| opt[:missed_cleavages] = v }
29
+ end
30
+
31
+ opts.parse!
32
+
33
+ if ARGV.size == 0
34
+ puts opts.to_s
35
+ exit
36
+ end
37
+
38
+ min_mass = opt[:min]
39
+ max_mass = opt[:max]
40
+
41
+ ARGV.each do |file|
42
+ fasta = Fasta.new(file)
43
+ uniq_aaseqs = fasta.map do |prot|
44
+ SampleEnzyme.tryptic(prot.aaseq, opt[:missed_cleavages])
45
+ end.flatten.uniq
46
+
47
+ masses = Mass::Calculator.new(Mass::MONO, opt[:h_plus]).masses(uniq_aaseqs)
48
+ passing_masses = Mass::Calculator.new(Mass::MONO, opt[:h_plus]).masses(uniq_aaseqs).select do |mh|
49
+ ((mh >= min_mass) and (mh <= max_mass))
50
+ end
51
+
52
+ ## warn if the masses aren't close to the end points
53
+ if (max_mass - passing_masses.max) > 1.0
54
+ warn "highest mass is not that close to max: #{passing_masses.max}"
55
+ end
56
+ if (passing_masses.min - min_mass) > 1.0
57
+ warn "lowest mass is not that close to min: #{passing_masses.min}"
58
+ end
59
+
60
+ num_bins = (max_mass - min_mass) / opt[:bin_size]
61
+
62
+ (bins, freqs) = VecD.new(passing_masses).histogram(num_bins)
63
+
64
+ # report
65
+ puts "#{file}: #{freqs.avg}"
66
+
67
+ end
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/ruby -s
2
+
3
+ require 'optparse'
4
+
5
+ $outfile = 'meta.sqm'
6
+ opts = OptionParser.new do |op|
7
+ op.banner = "usage: #{File.basename(__FILE__)} <file>.sqt ..."
8
+ op.separator "outputs meta.sqm (a sqt meta file)"
9
+ op.on("-o", "--outfile <file>", "currently: #{$outfile}") {|v| $outfile = v}
10
+ end
11
+
12
+ opts.parse!
13
+
14
+ if ARGV.size == 0
15
+ puts opts.to_s
16
+ exit
17
+ end
18
+
19
+ File.open($outfile, 'w') do |out|
20
+ ARGV.each do |file|
21
+ out.puts File.expand_path(file)
22
+ end
23
+ end
24
+
@@ -41,7 +41,7 @@ describe 'bioworks_to_pepxml.rb' do
41
41
  cmd = "#{@cmd} -p #{@tf_params} -o #{@out_path} #{@tf_bioworks_xml} -m #{@tf_mzxml_path} -d /work/special/path --copy_mzxml"
42
42
  ## FILES EXIST:
43
43
  prc = proc {|file|
44
- file.should exist
44
+ file.exist_as_a_file?.should be_true
45
45
  beginning = IO.readlines(file)[0,50].join("\n")
46
46
  $XML_SANITY_LINES.each do |line|
47
47
  beginning.should include(line)
@@ -55,7 +55,7 @@ describe 'bioworks_to_pepxml.rb' do
55
55
  ## COPY MZXML:
56
56
  %w(000 020).each do |file|
57
57
  mzxml_file = File.join(@out_path, "#{file}.mzXML")
58
- mzxml_file.should exist
58
+ mzxml_file.exist_as_a_file?.should be_true
59
59
  end
60
60
  ## CLEANUP:
61
61
  unless @no_delete then FileUtils.rm_rf(@out_path) end
@@ -68,7 +68,7 @@ describe 'bioworks_to_pepxml.rb' do
68
68
  db_re = /C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta/
69
69
  IO.read(@tf_params).should =~ db_re
70
70
  prc = proc {|file|
71
- file.should exist
71
+ file.exist_as_a_file?.should be_true
72
72
  IO.read(file).should_not =~ db_re
73
73
  }
74
74
  _basic(cmd, prc)
@@ -200,13 +200,13 @@ EDITPEP
200
200
  end
201
201
 
202
202
  def fastalns(fn)
203
- fn.should exist
203
+ fn.exist_as_a_file?.should be_true
204
204
  IO.read(fn).split("\n")
205
205
  end
206
206
 
207
207
  # returns the fasta object proteins
208
208
  def fastap(fn)
209
- @f.should exist
209
+ @f.exist_as_a_file?.should be_true
210
210
  Fasta.new(fn).prots
211
211
  end
212
212
 
@@ -2,10 +2,10 @@
2
2
  pephits_precision:
3
3
  - validator: decoy
4
4
  value: 0.992932862190813
5
- - validator: badAA
5
+ - validator: badAAEst
6
6
  value: 0.178006237270664
7
- - validator: badAA
8
- value: -0.0247654296463377
7
+ - validator: badAAEst
8
+ value: -0.0247654296463379
9
9
  - validator: badAA
10
10
  value: 0.301413862599215
11
11
  - validator: bias
@@ -94,22 +94,19 @@ params:
94
94
  :decoy_on_match: true
95
95
  :correct_wins: true
96
96
  - :calculated_background: 0.127208480565371
97
- :type: badAA
98
- :class: Validator::AA
97
+ :type: badAAEst
98
+ :class: Validator::AAEst
99
99
  :background: 0.001
100
100
  :frequency: 0.0147528119278054
101
- :false_to_total_ratio: 1.0
102
101
  - :calculated_background: 0.402826855123675
103
- :type: badAA
104
- :class: Validator::AA
102
+ :type: badAAEst
103
+ :class: Validator::AAEst
105
104
  :background: 0.0
106
105
  :frequency: 0.0463510332199843
107
- :false_to_total_ratio: 1.0
108
106
  - :calculated_background: 0.127208480565371
109
107
  :type: badAA
110
108
  :class: Validator::AA
111
109
  :background: 0.001
112
- :frequency:
113
110
  :false_to_total_ratio: 0.180662732637313
114
111
  - :calculated_background: 0.773851590106007
115
112
  :type: bias
@@ -1,3 +1,5 @@
1
+ require 'yaml'
2
+
1
3
  require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
4
 
3
5
  require 'spec_id/precision/filter'
@@ -80,7 +82,7 @@ describe 'filter_and_validate.rb on small bioworks file' do
80
82
  `#{run_normal}`
81
83
  end
82
84
  structs = [ht_file, hs_file].map do |file|
83
- file.should exist
85
+ file.exist_as_a_file?.should be_true
84
86
  struct = YAML.load_file(file)
85
87
  File.unlink file
86
88
  struct
@@ -104,7 +106,7 @@ describe 'filter_and_validate.rb on small bioworks file' do
104
106
 
105
107
  it 'handles multiple validators of the same kind (except, of course, decoy)' do
106
108
 
107
- struct = @st_to_yaml.call( "#{@fake_bioworks_file} --proteins -1 0.0 -2 0.0 -3 0.0 -d 0.01 -p 1000000 --decoy /^DECOY_/ --digestion #{@small_fasta_file},#{@params_file} --bad_aa C,true,0.001 --bad_aa E,true --bad_aa C,false,0.001 --bias #{@small_bias_fasta_file},true --bias #{@small_bias_fasta_file},false --bias #{@small_bias_fasta_file},true,0.2 --tmm #{@phobius_file},1,true,0.8,0.2 --tmm #{@phobius_file} --tmm #{@toppred_file},3,true,false --tmm #{@toppred_file} --tps #{@small_bias_fasta_file} -o text_table:#{@table_output_file} " )
109
+ struct = @st_to_yaml.call( "#{@fake_bioworks_file} --proteins -1 0.0 -2 0.0 -3 0.0 -d 0.01 -p 1000000 --decoy /^DECOY_/ --digestion #{@small_fasta_file},#{@params_file} --bad_aa_est C,0.001 --bad_aa_est E --bad_aa C,0.001 --bias #{@small_bias_fasta_file},true --bias #{@small_bias_fasta_file},false --bias #{@small_bias_fasta_file},true,0.2 --fasta #{@small_fasta_file} --tmm #{@phobius_file},1,true,0.8,0.2 --tmm #{@phobius_file} --tmm #{@toppred_file},3,true,false --tmm #{@toppred_file} --tps #{@small_bias_fasta_file} -o text_table:#{@table_output_file} " )
108
110
  frozen = YAML.load_file( File.dirname(__FILE__) + "/filter_and_validate__multiple_vals_helper.yaml" )
109
111
 
110
112
  ## Pephits precision:
@@ -121,8 +123,25 @@ describe 'filter_and_validate.rb on small bioworks file' do
121
123
  frp = frozen['params']
122
124
  stp = struct['params']
123
125
 
126
+ #puts "frozen validators:"
127
+ #p frp['validators']
128
+
129
+ #puts "seen validators:"
130
+ #p stp['validators']
131
+
124
132
  frp['validators'].zip(stp['validators']) do |f,s|
125
- f.should == s
133
+ if f.is_a? Hash
134
+ f.keys.each do |k|
135
+ if k == :file or k == :transmem_file
136
+ File.basename(f[k]).should == File.basename(s[k].gsub('\\','/'))
137
+ else
138
+ s[k].should == f[k]
139
+ #f[k].should == s[k]
140
+ end
141
+ end
142
+ else
143
+ f.should == s
144
+ end
126
145
  end
127
146
 
128
147
  %w(ties prefilter top_hit_by decoy_on_match postfilter include_ties_in_top_hit_postfilter hits_together proteins include_ties_in_top_hit_prefilter).each do |k|
@@ -148,9 +167,9 @@ describe 'filter_and_validate.rb on small bioworks file' do
148
167
  text_table = IO.read(@table_output_file)
149
168
 
150
169
  # frozen
151
- headings_re = Regexp.new( %w(num decoy badAA badAA badAA bias bias bias tmm tmm tmm tmm tps).join("\\s+") )
152
- data_re = Regexp.new( %w(peps 283 0.993 0.178 -0.025 0.301 0.195 -4.793 0.403 0.438 -0.267 -0.156 -0.020 0.226).join("\\s+") )
153
- prot_re = Regexp.new( %w(106 0.972 0.019 0.0 0.038 0.019 0.0 0.094 0.123 0.0 0.0 0.0 0.028).join("\\s+") )
170
+ headings_re = Regexp.new( %w(num decoy badAAEst badAAEst badAA bias bias bias tmm tmm tmm tmm tps).join("\\s+") )
171
+ data_re = Regexp.new( %w(peps 283 0.993 0.178006 -0.024765 0.301 0.195 -4.793 0.403 0.438 -0.267 -0.156 -0.020 0.226).join("\\s+") )
172
+ prot_re = Regexp.new( %w(106 0.972 0.018868 0.0 0.038 0.019 0.0 0.094 0.123 0.0 0.0 0.0 0.028).join("\\s+") )
154
173
  text_table.should =~ headings_re
155
174
  text_table.should =~ data_re
156
175
  text_table.should =~ prot_re
@@ -16,7 +16,7 @@ describe 'ms_to_lmat.rb' do
16
16
  cmd = "#{@cmd} #{@mzxml} --ascii"
17
17
  `#{cmd}`
18
18
  newfile = @mzxml.sub(".mzXML", ".lmata")
19
- newfile.should exist
19
+ newfile.exist_as_a_file?.should be_true
20
20
  IO.read(newfile).should == IO.read(@ans_lmata)
21
21
  File.unlink(newfile)
22
22
  end
@@ -26,7 +26,7 @@ describe 'ms_to_lmat.rb' do
26
26
  cmd = "#{@cmd} #{@mzxml}"
27
27
  `#{cmd}`
28
28
  newfile = @mzxml.sub(".mzXML", ".lmat")
29
- newfile.should exist
29
+ newfile.exist_as_a_file?.should be_true
30
30
  IO.read(newfile).should == IO.read(@ans_lmat)
31
31
  File.unlink(newfile)
32
32
  end
@@ -1,3 +1,5 @@
1
+ require 'yaml'
2
+
1
3
  require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
4
 
3
5
  require 'spec_id/precision/prob'
@@ -47,19 +49,19 @@ describe 'filter_and_validate.rb on small bioworks file' do
47
49
  it 'responds to --prob init' do
48
50
  normal = @st_to_yaml.call( @args + " --prob" )
49
51
 
50
- normal[:pephits_precision].first[:values].zip([1.0, 1.0, 0.996655518394649, 0.918918918918919]) do |got,exp|
52
+ normal[:pephits_precision].first[:values].zip([1.0, 1.0, 0.993333333333333, 0.85]) do |got,exp|
51
53
  got.should be_close(exp, 0.000000000001)
52
54
  end
53
55
  #normal_nsp = @st_to_yaml.call( @args + " --prob nsp" )
54
56
  #normal.should == normal_nsp
55
57
  init = @st_to_yaml.call( @args + " --prob init" )
56
58
  init.should_not == normal
57
- init[:pephits_precision].first[:values].zip([1.0, 0.974358974358974, 0.981324278438031, 0.890429958391123]) do |got,exp|
59
+ init[:pephits_precision].first[:values].zip([1.0, 0.95, 0.963333333333333, 0.8025]) do |got,exp|
58
60
  got.should be_close(exp, 0.000000000001)
59
61
  end
60
62
  with_sort_by = @st_to_yaml.call( @args + " --prob nsp --sort_by_init" )
61
63
  # frozen
62
- with_sort_by[:pephits_precision].first[:values].zip([1.0, 0.994974874371859, 0.996655518394649, 0.918918918918919]) do |got,exp|
64
+ with_sort_by[:pephits_precision].first[:values].zip([1.0, 0.99, 0.993333333333333, 0.85]) do |got,exp|
63
65
  got.should be_close(exp, 0.000000000001)
64
66
  end
65
67
  end
@@ -33,9 +33,94 @@ describe SampleEnzyme, "digesting sequences" do
33
33
  peps = SampleEnzyme.new('trypsin').digest(st, 2)
34
34
  peps.select {|aaseq| aaseq == 'CCCCK'}.size.should == 2
35
35
  end
36
-
37
36
 
38
37
  end
39
38
 
39
+ describe SampleEnzyme, 'making enzyme calculations on sequences and aaseqs' do
40
+
41
+ before(:each) do
42
+ @full_KRP = SampleEnzyme.new do |se|
43
+ se.name = 'trypsin'
44
+ se.cut = 'KR'
45
+ se.no_cut = 'P'
46
+ se.sense = 'C'
47
+ end
48
+ @just_KR = SampleEnzyme.new do |se|
49
+ se.name = 'trypsin'
50
+ se.cut = 'KR'
51
+ se.no_cut = ''
52
+ se.sense = 'C'
53
+ end
54
+ end
55
+
56
+ it 'calculates the number of tolerant termini' do
57
+ exp = [{
58
+ # full KR/P
59
+ 'K.EPTIDR.E' => 2,
60
+ 'K.PEPTIDR.E' => 1,
61
+ 'F.EEPTIDR.E' => 1,
62
+ 'F.PEPTIDW.R' => 0,
63
+ },
64
+ {
65
+ # just KR
66
+ 'K.EPTIDR.E' => 2,
67
+ 'K.PEPTIDR.E' => 2,
68
+ 'F.EEPTIDR.E' => 1,
69
+ 'F.PEPTIDW.R' => 0,
70
+ }
71
+ ]
72
+ scall = Sequest::PepXML::SearchHit
73
+ sample_enzyme_ar = [@full_KRP, @just_KR]
74
+ sample_enzyme_ar.zip(exp) do |sample_enzyme,hash|
75
+ hash.each do |seq, val|
76
+ sample_enzyme.num_tol_term(seq).should == val
77
+ end
78
+ end
79
+ end
80
+
81
+ it 'calculates number of missed cleavages' do
82
+ exp = [{
83
+ "EPTIDR" => 0,
84
+ "PEPTIDR" => 0,
85
+ "EEPTIDR" => 0,
86
+ "PEPTIDW" => 0,
87
+ "PERPTIDW" => 0,
88
+ "PEPKPTIDW" => 0,
89
+ "PEPKTIDW" => 1,
90
+ "RTTIDR" => 1,
91
+ "RTTIKK" => 2,
92
+ "PKEPRTIDW" => 2,
93
+ "PKEPRTIDKP" => 2,
94
+ "PKEPRAALKPEERPTIDKW" => 3,
95
+ },
96
+ {
97
+ "EPTIDR" => 0,
98
+ "PEPTIDR" => 0,
99
+ "EEPTIDR" => 0,
100
+ "PEPTIDW" => 0,
101
+ "PERPTIDW" => 1,
102
+ "PEPKPTIDW" => 1,
103
+ "PEPKTIDW" => 1,
104
+ "RTTIDR" => 1,
105
+ "RTTIKK" => 2,
106
+ "PKEPRTIDW" => 2,
107
+ "PKEPRTIDKP" => 3,
108
+ "PKEPRAALKPEERPTIDKW" => 5,
109
+ }
110
+ ]
111
+
112
+ sample_enzyme_ar = [@full_KRP, @just_KR]
113
+ sample_enzyme_ar.zip(exp) do |sample_enzyme, hash|
114
+ hash.each do |aaseq, val|
115
+ #first, middle, last = SpecID::Pep.split_sequence(seq)
116
+ # note that we are only using the middle section!
117
+ sample_enzyme.num_missed_cleavages(aaseq).should == val
118
+ end
119
+ end
120
+ end
121
+
122
+ end
123
+
124
+
40
125
 
41
126