mspire 0.3.1 → 0.3.9
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +2 -2
- data/bin/bioworks_to_pepxml.rb +15 -3
- data/bin/ms_to_lmat.rb +2 -1
- data/bin/sqt_group.rb +26 -0
- data/changelog.txt +36 -0
- data/lib/ms/msrun.rb +3 -1
- data/lib/ms/parser/mzdata/dom.rb +14 -14
- data/lib/ms/scan.rb +3 -3
- data/lib/mspire.rb +1 -1
- data/lib/sample_enzyme.rb +39 -0
- data/lib/spec_id.rb +18 -0
- data/lib/spec_id/aa_freqs.rb +6 -9
- data/lib/spec_id/digestor.rb +16 -17
- data/lib/spec_id/mass.rb +63 -1
- data/lib/spec_id/parser/proph.rb +101 -2
- data/lib/spec_id/precision/filter.rb +3 -2
- data/lib/spec_id/precision/filter/cmdline.rb +3 -1
- data/lib/spec_id/precision/filter/output.rb +1 -0
- data/lib/spec_id/precision/prob.rb +88 -21
- data/lib/spec_id/precision/prob/cmdline.rb +28 -16
- data/lib/spec_id/precision/prob/output.rb +8 -2
- data/lib/spec_id/proph/pep_summary.rb +25 -12
- data/lib/spec_id/sequest.rb +28 -0
- data/lib/spec_id/sequest/pepxml.rb +142 -197
- data/lib/spec_id/sqt.rb +349 -0
- data/lib/spec_id/srf.rb +33 -23
- data/lib/validator.rb +40 -57
- data/lib/validator/aa.rb +3 -90
- data/lib/validator/aa_est.rb +112 -0
- data/lib/validator/cmdline.rb +163 -31
- data/lib/validator/decoy.rb +15 -7
- data/lib/validator/digestion_based.rb +5 -4
- data/lib/validator/q_value.rb +32 -0
- data/script/peps_per_bin.rb +67 -0
- data/script/sqt_to_meta.rb +24 -0
- data/specs/bin/bioworks_to_pepxml_spec.rb +3 -3
- data/specs/bin/fasta_shaker_spec.rb +2 -2
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +7 -10
- data/specs/bin/filter_and_validate_spec.rb +25 -6
- data/specs/bin/ms_to_lmat_spec.rb +2 -2
- data/specs/bin/prob_validate_spec.rb +5 -3
- data/specs/sample_enzyme_spec.rb +86 -1
- data/specs/spec_helper.rb +11 -9
- data/specs/spec_id/bioworks_spec.rb +2 -1
- data/specs/spec_id/precision/filter_spec.rb +5 -5
- data/specs/spec_id/precision/prob_spec.rb +0 -67
- data/specs/spec_id/proph/pep_summary_spec.rb +42 -87
- data/specs/spec_id/protein_summary_spec.rb +4 -4
- data/specs/spec_id/sequest/pepxml_spec.rb +1 -79
- data/specs/spec_id/sequest_spec.rb +38 -0
- data/specs/spec_id/sqt_spec.rb +111 -3
- data/specs/spec_id_spec.rb +2 -0
- data/specs/transmem/phobius_spec.rb +3 -1
- data/specs/transmem/toppred_spec.rb +1 -1
- data/specs/validator/aa_est_spec.rb +66 -0
- data/specs/validator/aa_spec.rb +1 -68
- data/specs/validator/background_spec.rb +2 -0
- data/specs/validator/bias_spec.rb +3 -27
- data/specs/validator/decoy_spec.rb +2 -2
- data/specs/validator/transmem_spec.rb +2 -1
- data/test_files/small.sqt +87 -0
- metadata +312 -293
data/lib/validator/decoy.rb
CHANGED
@@ -3,10 +3,12 @@ require 'validator'
|
|
3
3
|
class Validator::Decoy < Validator
|
4
4
|
include Precision::Calculator::Decoy
|
5
5
|
|
6
|
+
# a Regexp (if concatenated) or a String (the filename of separate run)
|
6
7
|
attr_accessor :constraint
|
7
8
|
|
8
9
|
attr_accessor :decoy_on_match
|
9
10
|
attr_accessor :correct_wins
|
11
|
+
attr_accessor :decoy_to_target_ratio
|
10
12
|
|
11
13
|
attr_accessor :last_pep_was_decoy
|
12
14
|
|
@@ -16,13 +18,19 @@ class Validator::Decoy < Validator
|
|
16
18
|
|
17
19
|
attr_reader :normal_peps_just_submitted
|
18
20
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
21
|
+
DEFAULTS = {
|
22
|
+
:decoy_on_match => true,
|
23
|
+
:correct_wins => true,
|
24
|
+
:decoy_to_target_ratio => 1.0,
|
25
|
+
}
|
26
|
+
|
27
|
+
def initialize(opts={})
|
28
|
+
merged = DEFAULTS.merge(opts)
|
29
|
+
@constraint, @decoy_on_match, @correct_wins, @decoy_to_target_ratio = merged.values_at(:constraint, :decoy_on_match, :correct_wins, :decoy_to_target_ratio)
|
23
30
|
end
|
24
31
|
|
25
32
|
# returns [normal, decoy] (?? I think ??)
|
33
|
+
# reads the full protein reference
|
26
34
|
def partition(peps)
|
27
35
|
if @decoy_on_match
|
28
36
|
if @correct_wins
|
@@ -74,15 +82,15 @@ class Validator::Decoy < Validator
|
|
74
82
|
@normal_peps_just_submitted = normal
|
75
83
|
@increment_normal += normal.size
|
76
84
|
@increment_decoy += decoy.size
|
77
|
-
calc_precision(@increment_normal, @increment_decoy)
|
85
|
+
calc_precision(@increment_normal, @increment_decoy, @decoy_to_target_ratio)
|
78
86
|
end
|
79
87
|
|
80
88
|
def pephit_precision(peps, separate_peps=nil)
|
81
89
|
if separate_peps
|
82
|
-
calc_precision(peps.size, separate_peps.size)
|
90
|
+
calc_precision(peps.size, separate_peps.size, @decoy_to_target_ratio)
|
83
91
|
else
|
84
92
|
(norm, decoy) = partition(peps)
|
85
|
-
calc_precision(norm.size, decoy.size)
|
93
|
+
calc_precision(norm.size, decoy.size, @decoy_to_target_ratio)
|
86
94
|
end
|
87
95
|
end
|
88
96
|
|
@@ -6,7 +6,8 @@ require 'spec_id/sequest/params'
|
|
6
6
|
# SpecID::Pep objects using the pephit_precision method.
|
7
7
|
class Validator::DigestionBased < Validator
|
8
8
|
DEFAULTS = {
|
9
|
-
|
9
|
+
#:false_to_total_ratio => 1.0, # disable because this needs to be set
|
10
|
+
# explicitly
|
10
11
|
:background => 0.0,
|
11
12
|
}
|
12
13
|
|
@@ -42,13 +43,13 @@ class Validator::DigestionBased < Validator
|
|
42
43
|
# returns [num_tps, num_fps]
|
43
44
|
def calc_precision_prep(num_tps, num_fps)
|
44
45
|
total_peps_passing_partition = num_tps + num_fps
|
45
|
-
num_fps = adjust_fps_for_background(num_tps, num_fps,
|
46
|
+
num_fps = adjust_fps_for_background(num_tps, num_fps, background)
|
46
47
|
## we must use the false_to_total_ratio to estimate how many are really
|
47
48
|
## incorrect!
|
48
49
|
# FALSE/TOTAL = FALSE(found)/TOTAL(found)
|
49
50
|
# TOTAL(found) = FALSE(found) * TOTAL/FALSE
|
50
51
|
# = FALSE(found) / (FALSE/TOTAL)
|
51
|
-
total_false = num_fps /
|
52
|
+
total_false = num_fps / false_to_total_ratio
|
52
53
|
# NOTE: the partition algorithm drops peptides that are transmembrane
|
53
54
|
# under certain options. Thus, the total false estimate must be tempered
|
54
55
|
# by this lower number of total peptides.
|
@@ -60,7 +61,7 @@ class Validator::DigestionBased < Validator
|
|
60
61
|
# assumes partition returns (tps, fps)
|
61
62
|
def set_false_to_total_ratio(peps)
|
62
63
|
(tps, fps) = partition(peps)
|
63
|
-
|
64
|
+
self.false_to_total_ratio = fps.size.to_f / (tps.size + fps.size)
|
64
65
|
self
|
65
66
|
end
|
66
67
|
|
@@ -0,0 +1,32 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
# from percolator
|
4
|
+
# This is a trivial class (since q-values are so straightforward with regards
|
5
|
+
# to precision), but it allows us to work with q-values using the same
|
6
|
+
# interface as all other validators
|
7
|
+
class Validator::QValue
|
8
|
+
|
9
|
+
# objs should respond_to :q_value
|
10
|
+
# q-values: 0.0 means no false discoveries, 0.5 means 50% false discoveries
|
11
|
+
# 1 - (the largest q value) is the precision
|
12
|
+
def precision(objs)
|
13
|
+
return 1.0 if objs.size == 0
|
14
|
+
largest_q_value = objs.map {|v| v.q_value }.max
|
15
|
+
prec = 1.0 - largest_q_value
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
# objs should respond_to :q_value
|
20
|
+
# These should be added from low q-value to high q-value
|
21
|
+
# The last q-value added determines the precision
|
22
|
+
def increment_precision(objs)
|
23
|
+
if objs.is_a?(SpecID::Pep) or objs.is_a?(SpecID::Prot)
|
24
|
+
objs = [objs]
|
25
|
+
end
|
26
|
+
precision(objs)
|
27
|
+
end
|
28
|
+
|
29
|
+
alias_method :pephit_precision, :precision
|
30
|
+
alias_method :prothit_precision, :precision
|
31
|
+
alias_method :increment_pephits_precision, :increment_precision
|
32
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
require 'generator'
|
4
|
+
require 'optparse'
|
5
|
+
|
6
|
+
require 'fasta'
|
7
|
+
require 'sample_enzyme'
|
8
|
+
require 'spec_id/digestor'
|
9
|
+
require 'spec_id/mass'
|
10
|
+
require 'vec'
|
11
|
+
|
12
|
+
opt = {}
|
13
|
+
opt[:missed_cleavages] = 0 # ~ parts per million
|
14
|
+
opt[:bin_size] = 0.001 # ~ parts per million
|
15
|
+
opt[:min] = 300.0
|
16
|
+
opt[:max] = 4500.0
|
17
|
+
opt[:h_plus] = 1.0
|
18
|
+
|
19
|
+
opts = OptionParser.new do |op|
|
20
|
+
op.banner = "usage: #{File.basename(__FILE__)} *.fasta"
|
21
|
+
op.separator "Outputs a close estimate of number of peptides per bin."
|
22
|
+
op.separator "Uses m+H+ as the peptide mass."
|
23
|
+
op.separator "[for speed, assumes that there is a peptide mass close to the extremes]"
|
24
|
+
op.on("-b", "--bin_size <F>", Float, "size of bins [#{opt[:bin_size]}]") {|v| opt[:bin_size] = v }
|
25
|
+
op.on("-x", "--max <F>", Float, "max mass to accept [#{opt[:max]}]") {|v| opt[:max] = v }
|
26
|
+
op.on("-n", "--min <F>", Float, "min mass to accept [#{opt[:min]}]") {|v| opt[:min] = v }
|
27
|
+
op.on("-h", "--h_plus <F>", Float, "value of H+ to use [#{opt[:h_plus]}]") {|v| opt[:h_plus] = v }
|
28
|
+
op.on("-m", "--missed_cleavages <N>", Integer, "num missed cleavages [#{opt[:missed_cleavages]}]") {|v| opt[:missed_cleavages] = v }
|
29
|
+
end
|
30
|
+
|
31
|
+
opts.parse!
|
32
|
+
|
33
|
+
if ARGV.size == 0
|
34
|
+
puts opts.to_s
|
35
|
+
exit
|
36
|
+
end
|
37
|
+
|
38
|
+
min_mass = opt[:min]
|
39
|
+
max_mass = opt[:max]
|
40
|
+
|
41
|
+
ARGV.each do |file|
|
42
|
+
fasta = Fasta.new(file)
|
43
|
+
uniq_aaseqs = fasta.map do |prot|
|
44
|
+
SampleEnzyme.tryptic(prot.aaseq, opt[:missed_cleavages])
|
45
|
+
end.flatten.uniq
|
46
|
+
|
47
|
+
masses = Mass::Calculator.new(Mass::MONO, opt[:h_plus]).masses(uniq_aaseqs)
|
48
|
+
passing_masses = Mass::Calculator.new(Mass::MONO, opt[:h_plus]).masses(uniq_aaseqs).select do |mh|
|
49
|
+
((mh >= min_mass) and (mh <= max_mass))
|
50
|
+
end
|
51
|
+
|
52
|
+
## warn if the masses aren't close to the end points
|
53
|
+
if (max_mass - passing_masses.max) > 1.0
|
54
|
+
warn "highest mass is not that close to max: #{passing_masses.max}"
|
55
|
+
end
|
56
|
+
if (passing_masses.min - min_mass) > 1.0
|
57
|
+
warn "lowest mass is not that close to min: #{passing_masses.min}"
|
58
|
+
end
|
59
|
+
|
60
|
+
num_bins = (max_mass - min_mass) / opt[:bin_size]
|
61
|
+
|
62
|
+
(bins, freqs) = VecD.new(passing_masses).histogram(num_bins)
|
63
|
+
|
64
|
+
# report
|
65
|
+
puts "#{file}: #{freqs.avg}"
|
66
|
+
|
67
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/ruby -s
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
$outfile = 'meta.sqm'
|
6
|
+
opts = OptionParser.new do |op|
|
7
|
+
op.banner = "usage: #{File.basename(__FILE__)} <file>.sqt ..."
|
8
|
+
op.separator "outputs meta.sqm (a sqt meta file)"
|
9
|
+
op.on("-o", "--outfile <file>", "currently: #{$outfile}") {|v| $outfile = v}
|
10
|
+
end
|
11
|
+
|
12
|
+
opts.parse!
|
13
|
+
|
14
|
+
if ARGV.size == 0
|
15
|
+
puts opts.to_s
|
16
|
+
exit
|
17
|
+
end
|
18
|
+
|
19
|
+
File.open($outfile, 'w') do |out|
|
20
|
+
ARGV.each do |file|
|
21
|
+
out.puts File.expand_path(file)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
@@ -41,7 +41,7 @@ describe 'bioworks_to_pepxml.rb' do
|
|
41
41
|
cmd = "#{@cmd} -p #{@tf_params} -o #{@out_path} #{@tf_bioworks_xml} -m #{@tf_mzxml_path} -d /work/special/path --copy_mzxml"
|
42
42
|
## FILES EXIST:
|
43
43
|
prc = proc {|file|
|
44
|
-
file.should
|
44
|
+
file.exist_as_a_file?.should be_true
|
45
45
|
beginning = IO.readlines(file)[0,50].join("\n")
|
46
46
|
$XML_SANITY_LINES.each do |line|
|
47
47
|
beginning.should include(line)
|
@@ -55,7 +55,7 @@ describe 'bioworks_to_pepxml.rb' do
|
|
55
55
|
## COPY MZXML:
|
56
56
|
%w(000 020).each do |file|
|
57
57
|
mzxml_file = File.join(@out_path, "#{file}.mzXML")
|
58
|
-
mzxml_file.should
|
58
|
+
mzxml_file.exist_as_a_file?.should be_true
|
59
59
|
end
|
60
60
|
## CLEANUP:
|
61
61
|
unless @no_delete then FileUtils.rm_rf(@out_path) end
|
@@ -68,7 +68,7 @@ describe 'bioworks_to_pepxml.rb' do
|
|
68
68
|
db_re = /C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta/
|
69
69
|
IO.read(@tf_params).should =~ db_re
|
70
70
|
prc = proc {|file|
|
71
|
-
file.should
|
71
|
+
file.exist_as_a_file?.should be_true
|
72
72
|
IO.read(file).should_not =~ db_re
|
73
73
|
}
|
74
74
|
_basic(cmd, prc)
|
@@ -200,13 +200,13 @@ EDITPEP
|
|
200
200
|
end
|
201
201
|
|
202
202
|
def fastalns(fn)
|
203
|
-
fn.should
|
203
|
+
fn.exist_as_a_file?.should be_true
|
204
204
|
IO.read(fn).split("\n")
|
205
205
|
end
|
206
206
|
|
207
207
|
# returns the fasta object proteins
|
208
208
|
def fastap(fn)
|
209
|
-
@f.should
|
209
|
+
@f.exist_as_a_file?.should be_true
|
210
210
|
Fasta.new(fn).prots
|
211
211
|
end
|
212
212
|
|
@@ -2,10 +2,10 @@
|
|
2
2
|
pephits_precision:
|
3
3
|
- validator: decoy
|
4
4
|
value: 0.992932862190813
|
5
|
-
- validator:
|
5
|
+
- validator: badAAEst
|
6
6
|
value: 0.178006237270664
|
7
|
-
- validator:
|
8
|
-
value: -0.
|
7
|
+
- validator: badAAEst
|
8
|
+
value: -0.0247654296463379
|
9
9
|
- validator: badAA
|
10
10
|
value: 0.301413862599215
|
11
11
|
- validator: bias
|
@@ -94,22 +94,19 @@ params:
|
|
94
94
|
:decoy_on_match: true
|
95
95
|
:correct_wins: true
|
96
96
|
- :calculated_background: 0.127208480565371
|
97
|
-
:type:
|
98
|
-
:class: Validator::
|
97
|
+
:type: badAAEst
|
98
|
+
:class: Validator::AAEst
|
99
99
|
:background: 0.001
|
100
100
|
:frequency: 0.0147528119278054
|
101
|
-
:false_to_total_ratio: 1.0
|
102
101
|
- :calculated_background: 0.402826855123675
|
103
|
-
:type:
|
104
|
-
:class: Validator::
|
102
|
+
:type: badAAEst
|
103
|
+
:class: Validator::AAEst
|
105
104
|
:background: 0.0
|
106
105
|
:frequency: 0.0463510332199843
|
107
|
-
:false_to_total_ratio: 1.0
|
108
106
|
- :calculated_background: 0.127208480565371
|
109
107
|
:type: badAA
|
110
108
|
:class: Validator::AA
|
111
109
|
:background: 0.001
|
112
|
-
:frequency:
|
113
110
|
:false_to_total_ratio: 0.180662732637313
|
114
111
|
- :calculated_background: 0.773851590106007
|
115
112
|
:type: bias
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
1
3
|
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
2
4
|
|
3
5
|
require 'spec_id/precision/filter'
|
@@ -80,7 +82,7 @@ describe 'filter_and_validate.rb on small bioworks file' do
|
|
80
82
|
`#{run_normal}`
|
81
83
|
end
|
82
84
|
structs = [ht_file, hs_file].map do |file|
|
83
|
-
file.should
|
85
|
+
file.exist_as_a_file?.should be_true
|
84
86
|
struct = YAML.load_file(file)
|
85
87
|
File.unlink file
|
86
88
|
struct
|
@@ -104,7 +106,7 @@ describe 'filter_and_validate.rb on small bioworks file' do
|
|
104
106
|
|
105
107
|
it 'handles multiple validators of the same kind (except, of course, decoy)' do
|
106
108
|
|
107
|
-
struct = @st_to_yaml.call( "#{@fake_bioworks_file} --proteins -1 0.0 -2 0.0 -3 0.0 -d 0.01 -p 1000000 --decoy /^DECOY_/ --digestion #{@small_fasta_file},#{@params_file} --
|
109
|
+
struct = @st_to_yaml.call( "#{@fake_bioworks_file} --proteins -1 0.0 -2 0.0 -3 0.0 -d 0.01 -p 1000000 --decoy /^DECOY_/ --digestion #{@small_fasta_file},#{@params_file} --bad_aa_est C,0.001 --bad_aa_est E --bad_aa C,0.001 --bias #{@small_bias_fasta_file},true --bias #{@small_bias_fasta_file},false --bias #{@small_bias_fasta_file},true,0.2 --fasta #{@small_fasta_file} --tmm #{@phobius_file},1,true,0.8,0.2 --tmm #{@phobius_file} --tmm #{@toppred_file},3,true,false --tmm #{@toppred_file} --tps #{@small_bias_fasta_file} -o text_table:#{@table_output_file} " )
|
108
110
|
frozen = YAML.load_file( File.dirname(__FILE__) + "/filter_and_validate__multiple_vals_helper.yaml" )
|
109
111
|
|
110
112
|
## Pephits precision:
|
@@ -121,8 +123,25 @@ describe 'filter_and_validate.rb on small bioworks file' do
|
|
121
123
|
frp = frozen['params']
|
122
124
|
stp = struct['params']
|
123
125
|
|
126
|
+
#puts "frozen validators:"
|
127
|
+
#p frp['validators']
|
128
|
+
|
129
|
+
#puts "seen validators:"
|
130
|
+
#p stp['validators']
|
131
|
+
|
124
132
|
frp['validators'].zip(stp['validators']) do |f,s|
|
125
|
-
f.
|
133
|
+
if f.is_a? Hash
|
134
|
+
f.keys.each do |k|
|
135
|
+
if k == :file or k == :transmem_file
|
136
|
+
File.basename(f[k]).should == File.basename(s[k].gsub('\\','/'))
|
137
|
+
else
|
138
|
+
s[k].should == f[k]
|
139
|
+
#f[k].should == s[k]
|
140
|
+
end
|
141
|
+
end
|
142
|
+
else
|
143
|
+
f.should == s
|
144
|
+
end
|
126
145
|
end
|
127
146
|
|
128
147
|
%w(ties prefilter top_hit_by decoy_on_match postfilter include_ties_in_top_hit_postfilter hits_together proteins include_ties_in_top_hit_prefilter).each do |k|
|
@@ -148,9 +167,9 @@ describe 'filter_and_validate.rb on small bioworks file' do
|
|
148
167
|
text_table = IO.read(@table_output_file)
|
149
168
|
|
150
169
|
# frozen
|
151
|
-
headings_re = Regexp.new( %w(num decoy
|
152
|
-
data_re = Regexp.new( %w(peps 283 0.993 0.
|
153
|
-
prot_re = Regexp.new( %w(106 0.972 0.
|
170
|
+
headings_re = Regexp.new( %w(num decoy badAAEst badAAEst badAA bias bias bias tmm tmm tmm tmm tps).join("\\s+") )
|
171
|
+
data_re = Regexp.new( %w(peps 283 0.993 0.178006 -0.024765 0.301 0.195 -4.793 0.403 0.438 -0.267 -0.156 -0.020 0.226).join("\\s+") )
|
172
|
+
prot_re = Regexp.new( %w(106 0.972 0.018868 0.0 0.038 0.019 0.0 0.094 0.123 0.0 0.0 0.0 0.028).join("\\s+") )
|
154
173
|
text_table.should =~ headings_re
|
155
174
|
text_table.should =~ data_re
|
156
175
|
text_table.should =~ prot_re
|
@@ -16,7 +16,7 @@ describe 'ms_to_lmat.rb' do
|
|
16
16
|
cmd = "#{@cmd} #{@mzxml} --ascii"
|
17
17
|
`#{cmd}`
|
18
18
|
newfile = @mzxml.sub(".mzXML", ".lmata")
|
19
|
-
newfile.should
|
19
|
+
newfile.exist_as_a_file?.should be_true
|
20
20
|
IO.read(newfile).should == IO.read(@ans_lmata)
|
21
21
|
File.unlink(newfile)
|
22
22
|
end
|
@@ -26,7 +26,7 @@ describe 'ms_to_lmat.rb' do
|
|
26
26
|
cmd = "#{@cmd} #{@mzxml}"
|
27
27
|
`#{cmd}`
|
28
28
|
newfile = @mzxml.sub(".mzXML", ".lmat")
|
29
|
-
newfile.should
|
29
|
+
newfile.exist_as_a_file?.should be_true
|
30
30
|
IO.read(newfile).should == IO.read(@ans_lmat)
|
31
31
|
File.unlink(newfile)
|
32
32
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
1
3
|
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
2
4
|
|
3
5
|
require 'spec_id/precision/prob'
|
@@ -47,19 +49,19 @@ describe 'filter_and_validate.rb on small bioworks file' do
|
|
47
49
|
it 'responds to --prob init' do
|
48
50
|
normal = @st_to_yaml.call( @args + " --prob" )
|
49
51
|
|
50
|
-
normal[:pephits_precision].first[:values].zip([1.0, 1.0, 0.
|
52
|
+
normal[:pephits_precision].first[:values].zip([1.0, 1.0, 0.993333333333333, 0.85]) do |got,exp|
|
51
53
|
got.should be_close(exp, 0.000000000001)
|
52
54
|
end
|
53
55
|
#normal_nsp = @st_to_yaml.call( @args + " --prob nsp" )
|
54
56
|
#normal.should == normal_nsp
|
55
57
|
init = @st_to_yaml.call( @args + " --prob init" )
|
56
58
|
init.should_not == normal
|
57
|
-
init[:pephits_precision].first[:values].zip([1.0, 0.
|
59
|
+
init[:pephits_precision].first[:values].zip([1.0, 0.95, 0.963333333333333, 0.8025]) do |got,exp|
|
58
60
|
got.should be_close(exp, 0.000000000001)
|
59
61
|
end
|
60
62
|
with_sort_by = @st_to_yaml.call( @args + " --prob nsp --sort_by_init" )
|
61
63
|
# frozen
|
62
|
-
with_sort_by[:pephits_precision].first[:values].zip([1.0, 0.
|
64
|
+
with_sort_by[:pephits_precision].first[:values].zip([1.0, 0.99, 0.993333333333333, 0.85]) do |got,exp|
|
63
65
|
got.should be_close(exp, 0.000000000001)
|
64
66
|
end
|
65
67
|
end
|
data/specs/sample_enzyme_spec.rb
CHANGED
@@ -33,9 +33,94 @@ describe SampleEnzyme, "digesting sequences" do
|
|
33
33
|
peps = SampleEnzyme.new('trypsin').digest(st, 2)
|
34
34
|
peps.select {|aaseq| aaseq == 'CCCCK'}.size.should == 2
|
35
35
|
end
|
36
|
-
|
37
36
|
|
38
37
|
end
|
39
38
|
|
39
|
+
describe SampleEnzyme, 'making enzyme calculations on sequences and aaseqs' do
|
40
|
+
|
41
|
+
before(:each) do
|
42
|
+
@full_KRP = SampleEnzyme.new do |se|
|
43
|
+
se.name = 'trypsin'
|
44
|
+
se.cut = 'KR'
|
45
|
+
se.no_cut = 'P'
|
46
|
+
se.sense = 'C'
|
47
|
+
end
|
48
|
+
@just_KR = SampleEnzyme.new do |se|
|
49
|
+
se.name = 'trypsin'
|
50
|
+
se.cut = 'KR'
|
51
|
+
se.no_cut = ''
|
52
|
+
se.sense = 'C'
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'calculates the number of tolerant termini' do
|
57
|
+
exp = [{
|
58
|
+
# full KR/P
|
59
|
+
'K.EPTIDR.E' => 2,
|
60
|
+
'K.PEPTIDR.E' => 1,
|
61
|
+
'F.EEPTIDR.E' => 1,
|
62
|
+
'F.PEPTIDW.R' => 0,
|
63
|
+
},
|
64
|
+
{
|
65
|
+
# just KR
|
66
|
+
'K.EPTIDR.E' => 2,
|
67
|
+
'K.PEPTIDR.E' => 2,
|
68
|
+
'F.EEPTIDR.E' => 1,
|
69
|
+
'F.PEPTIDW.R' => 0,
|
70
|
+
}
|
71
|
+
]
|
72
|
+
scall = Sequest::PepXML::SearchHit
|
73
|
+
sample_enzyme_ar = [@full_KRP, @just_KR]
|
74
|
+
sample_enzyme_ar.zip(exp) do |sample_enzyme,hash|
|
75
|
+
hash.each do |seq, val|
|
76
|
+
sample_enzyme.num_tol_term(seq).should == val
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
it 'calculates number of missed cleavages' do
|
82
|
+
exp = [{
|
83
|
+
"EPTIDR" => 0,
|
84
|
+
"PEPTIDR" => 0,
|
85
|
+
"EEPTIDR" => 0,
|
86
|
+
"PEPTIDW" => 0,
|
87
|
+
"PERPTIDW" => 0,
|
88
|
+
"PEPKPTIDW" => 0,
|
89
|
+
"PEPKTIDW" => 1,
|
90
|
+
"RTTIDR" => 1,
|
91
|
+
"RTTIKK" => 2,
|
92
|
+
"PKEPRTIDW" => 2,
|
93
|
+
"PKEPRTIDKP" => 2,
|
94
|
+
"PKEPRAALKPEERPTIDKW" => 3,
|
95
|
+
},
|
96
|
+
{
|
97
|
+
"EPTIDR" => 0,
|
98
|
+
"PEPTIDR" => 0,
|
99
|
+
"EEPTIDR" => 0,
|
100
|
+
"PEPTIDW" => 0,
|
101
|
+
"PERPTIDW" => 1,
|
102
|
+
"PEPKPTIDW" => 1,
|
103
|
+
"PEPKTIDW" => 1,
|
104
|
+
"RTTIDR" => 1,
|
105
|
+
"RTTIKK" => 2,
|
106
|
+
"PKEPRTIDW" => 2,
|
107
|
+
"PKEPRTIDKP" => 3,
|
108
|
+
"PKEPRAALKPEERPTIDKW" => 5,
|
109
|
+
}
|
110
|
+
]
|
111
|
+
|
112
|
+
sample_enzyme_ar = [@full_KRP, @just_KR]
|
113
|
+
sample_enzyme_ar.zip(exp) do |sample_enzyme, hash|
|
114
|
+
hash.each do |aaseq, val|
|
115
|
+
#first, middle, last = SpecID::Pep.split_sequence(seq)
|
116
|
+
# note that we are only using the middle section!
|
117
|
+
sample_enzyme.num_missed_cleavages(aaseq).should == val
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
123
|
+
|
124
|
+
|
40
125
|
|
41
126
|
|