mspire 0.3.1 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +2 -2
- data/bin/bioworks_to_pepxml.rb +15 -3
- data/bin/ms_to_lmat.rb +2 -1
- data/bin/sqt_group.rb +26 -0
- data/changelog.txt +36 -0
- data/lib/ms/msrun.rb +3 -1
- data/lib/ms/parser/mzdata/dom.rb +14 -14
- data/lib/ms/scan.rb +3 -3
- data/lib/mspire.rb +1 -1
- data/lib/sample_enzyme.rb +39 -0
- data/lib/spec_id.rb +18 -0
- data/lib/spec_id/aa_freqs.rb +6 -9
- data/lib/spec_id/digestor.rb +16 -17
- data/lib/spec_id/mass.rb +63 -1
- data/lib/spec_id/parser/proph.rb +101 -2
- data/lib/spec_id/precision/filter.rb +3 -2
- data/lib/spec_id/precision/filter/cmdline.rb +3 -1
- data/lib/spec_id/precision/filter/output.rb +1 -0
- data/lib/spec_id/precision/prob.rb +88 -21
- data/lib/spec_id/precision/prob/cmdline.rb +28 -16
- data/lib/spec_id/precision/prob/output.rb +8 -2
- data/lib/spec_id/proph/pep_summary.rb +25 -12
- data/lib/spec_id/sequest.rb +28 -0
- data/lib/spec_id/sequest/pepxml.rb +142 -197
- data/lib/spec_id/sqt.rb +349 -0
- data/lib/spec_id/srf.rb +33 -23
- data/lib/validator.rb +40 -57
- data/lib/validator/aa.rb +3 -90
- data/lib/validator/aa_est.rb +112 -0
- data/lib/validator/cmdline.rb +163 -31
- data/lib/validator/decoy.rb +15 -7
- data/lib/validator/digestion_based.rb +5 -4
- data/lib/validator/q_value.rb +32 -0
- data/script/peps_per_bin.rb +67 -0
- data/script/sqt_to_meta.rb +24 -0
- data/specs/bin/bioworks_to_pepxml_spec.rb +3 -3
- data/specs/bin/fasta_shaker_spec.rb +2 -2
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +7 -10
- data/specs/bin/filter_and_validate_spec.rb +25 -6
- data/specs/bin/ms_to_lmat_spec.rb +2 -2
- data/specs/bin/prob_validate_spec.rb +5 -3
- data/specs/sample_enzyme_spec.rb +86 -1
- data/specs/spec_helper.rb +11 -9
- data/specs/spec_id/bioworks_spec.rb +2 -1
- data/specs/spec_id/precision/filter_spec.rb +5 -5
- data/specs/spec_id/precision/prob_spec.rb +0 -67
- data/specs/spec_id/proph/pep_summary_spec.rb +42 -87
- data/specs/spec_id/protein_summary_spec.rb +4 -4
- data/specs/spec_id/sequest/pepxml_spec.rb +1 -79
- data/specs/spec_id/sequest_spec.rb +38 -0
- data/specs/spec_id/sqt_spec.rb +111 -3
- data/specs/spec_id_spec.rb +2 -0
- data/specs/transmem/phobius_spec.rb +3 -1
- data/specs/transmem/toppred_spec.rb +1 -1
- data/specs/validator/aa_est_spec.rb +66 -0
- data/specs/validator/aa_spec.rb +1 -68
- data/specs/validator/background_spec.rb +2 -0
- data/specs/validator/bias_spec.rb +3 -27
- data/specs/validator/decoy_spec.rb +2 -2
- data/specs/validator/transmem_spec.rb +2 -1
- data/test_files/small.sqt +87 -0
- metadata +312 -293
data/lib/validator/decoy.rb
CHANGED
@@ -3,10 +3,12 @@ require 'validator'
|
|
3
3
|
class Validator::Decoy < Validator
|
4
4
|
include Precision::Calculator::Decoy
|
5
5
|
|
6
|
+
# a Regexp (if concatenated) or a String (the filename of separate run)
|
6
7
|
attr_accessor :constraint
|
7
8
|
|
8
9
|
attr_accessor :decoy_on_match
|
9
10
|
attr_accessor :correct_wins
|
11
|
+
attr_accessor :decoy_to_target_ratio
|
10
12
|
|
11
13
|
attr_accessor :last_pep_was_decoy
|
12
14
|
|
@@ -16,13 +18,19 @@ class Validator::Decoy < Validator
|
|
16
18
|
|
17
19
|
attr_reader :normal_peps_just_submitted
|
18
20
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
21
|
+
DEFAULTS = {
|
22
|
+
:decoy_on_match => true,
|
23
|
+
:correct_wins => true,
|
24
|
+
:decoy_to_target_ratio => 1.0,
|
25
|
+
}
|
26
|
+
|
27
|
+
def initialize(opts={})
|
28
|
+
merged = DEFAULTS.merge(opts)
|
29
|
+
@constraint, @decoy_on_match, @correct_wins, @decoy_to_target_ratio = merged.values_at(:constraint, :decoy_on_match, :correct_wins, :decoy_to_target_ratio)
|
23
30
|
end
|
24
31
|
|
25
32
|
# returns [normal, decoy] (?? I think ??)
|
33
|
+
# reads the full protein reference
|
26
34
|
def partition(peps)
|
27
35
|
if @decoy_on_match
|
28
36
|
if @correct_wins
|
@@ -74,15 +82,15 @@ class Validator::Decoy < Validator
|
|
74
82
|
@normal_peps_just_submitted = normal
|
75
83
|
@increment_normal += normal.size
|
76
84
|
@increment_decoy += decoy.size
|
77
|
-
calc_precision(@increment_normal, @increment_decoy)
|
85
|
+
calc_precision(@increment_normal, @increment_decoy, @decoy_to_target_ratio)
|
78
86
|
end
|
79
87
|
|
80
88
|
def pephit_precision(peps, separate_peps=nil)
|
81
89
|
if separate_peps
|
82
|
-
calc_precision(peps.size, separate_peps.size)
|
90
|
+
calc_precision(peps.size, separate_peps.size, @decoy_to_target_ratio)
|
83
91
|
else
|
84
92
|
(norm, decoy) = partition(peps)
|
85
|
-
calc_precision(norm.size, decoy.size)
|
93
|
+
calc_precision(norm.size, decoy.size, @decoy_to_target_ratio)
|
86
94
|
end
|
87
95
|
end
|
88
96
|
|
@@ -6,7 +6,8 @@ require 'spec_id/sequest/params'
|
|
6
6
|
# SpecID::Pep objects using the pephit_precision method.
|
7
7
|
class Validator::DigestionBased < Validator
|
8
8
|
DEFAULTS = {
|
9
|
-
|
9
|
+
#:false_to_total_ratio => 1.0, # disable because this needs to be set
|
10
|
+
# explicitly
|
10
11
|
:background => 0.0,
|
11
12
|
}
|
12
13
|
|
@@ -42,13 +43,13 @@ class Validator::DigestionBased < Validator
|
|
42
43
|
# returns [num_tps, num_fps]
|
43
44
|
def calc_precision_prep(num_tps, num_fps)
|
44
45
|
total_peps_passing_partition = num_tps + num_fps
|
45
|
-
num_fps = adjust_fps_for_background(num_tps, num_fps,
|
46
|
+
num_fps = adjust_fps_for_background(num_tps, num_fps, background)
|
46
47
|
## we must use the false_to_total_ratio to estimate how many are really
|
47
48
|
## incorrect!
|
48
49
|
# FALSE/TOTAL = FALSE(found)/TOTAL(found)
|
49
50
|
# TOTAL(found) = FALSE(found) * TOTAL/FALSE
|
50
51
|
# = FALSE(found) / (FALSE/TOTAL)
|
51
|
-
total_false = num_fps /
|
52
|
+
total_false = num_fps / false_to_total_ratio
|
52
53
|
# NOTE: the partition algorithm drops peptides that are transmembrane
|
53
54
|
# under certain options. Thus, the total false estimate must be tempered
|
54
55
|
# by this lower number of total peptides.
|
@@ -60,7 +61,7 @@ class Validator::DigestionBased < Validator
|
|
60
61
|
# assumes partition returns (tps, fps)
|
61
62
|
def set_false_to_total_ratio(peps)
|
62
63
|
(tps, fps) = partition(peps)
|
63
|
-
|
64
|
+
self.false_to_total_ratio = fps.size.to_f / (tps.size + fps.size)
|
64
65
|
self
|
65
66
|
end
|
66
67
|
|
@@ -0,0 +1,32 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
# from percolator
|
4
|
+
# This is a trivial class (since q-values are so straightforward with regards
|
5
|
+
# to precision), but it allows us to work with q-values using the same
|
6
|
+
# interface as all other validators
|
7
|
+
class Validator::QValue
|
8
|
+
|
9
|
+
# objs should respond_to :q_value
|
10
|
+
# q-values: 0.0 means no false discoveries, 0.5 means 50% false discoveries
|
11
|
+
# 1 - (the largest q value) is the precision
|
12
|
+
def precision(objs)
|
13
|
+
return 1.0 if objs.size == 0
|
14
|
+
largest_q_value = objs.map {|v| v.q_value }.max
|
15
|
+
prec = 1.0 - largest_q_value
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
# objs should respond_to :q_value
|
20
|
+
# These should be added from low q-value to high q-value
|
21
|
+
# The last q-value added determines the precision
|
22
|
+
def increment_precision(objs)
|
23
|
+
if objs.is_a?(SpecID::Pep) or objs.is_a?(SpecID::Prot)
|
24
|
+
objs = [objs]
|
25
|
+
end
|
26
|
+
precision(objs)
|
27
|
+
end
|
28
|
+
|
29
|
+
alias_method :pephit_precision, :precision
|
30
|
+
alias_method :prothit_precision, :precision
|
31
|
+
alias_method :increment_pephits_precision, :increment_precision
|
32
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
require 'generator'
|
4
|
+
require 'optparse'
|
5
|
+
|
6
|
+
require 'fasta'
|
7
|
+
require 'sample_enzyme'
|
8
|
+
require 'spec_id/digestor'
|
9
|
+
require 'spec_id/mass'
|
10
|
+
require 'vec'
|
11
|
+
|
12
|
+
opt = {}
|
13
|
+
opt[:missed_cleavages] = 0 # ~ parts per million
|
14
|
+
opt[:bin_size] = 0.001 # ~ parts per million
|
15
|
+
opt[:min] = 300.0
|
16
|
+
opt[:max] = 4500.0
|
17
|
+
opt[:h_plus] = 1.0
|
18
|
+
|
19
|
+
opts = OptionParser.new do |op|
|
20
|
+
op.banner = "usage: #{File.basename(__FILE__)} *.fasta"
|
21
|
+
op.separator "Outputs a close estimate of number of peptides per bin."
|
22
|
+
op.separator "Uses m+H+ as the peptide mass."
|
23
|
+
op.separator "[for speed, assumes that there is a peptide mass close to the extremes]"
|
24
|
+
op.on("-b", "--bin_size <F>", Float, "size of bins [#{opt[:bin_size]}]") {|v| opt[:bin_size] = v }
|
25
|
+
op.on("-x", "--max <F>", Float, "max mass to accept [#{opt[:max]}]") {|v| opt[:max] = v }
|
26
|
+
op.on("-n", "--min <F>", Float, "min mass to accept [#{opt[:min]}]") {|v| opt[:min] = v }
|
27
|
+
op.on("-h", "--h_plus <F>", Float, "value of H+ to use [#{opt[:h_plus]}]") {|v| opt[:h_plus] = v }
|
28
|
+
op.on("-m", "--missed_cleavages <N>", Integer, "num missed cleavages [#{opt[:missed_cleavages]}]") {|v| opt[:missed_cleavages] = v }
|
29
|
+
end
|
30
|
+
|
31
|
+
opts.parse!
|
32
|
+
|
33
|
+
if ARGV.size == 0
|
34
|
+
puts opts.to_s
|
35
|
+
exit
|
36
|
+
end
|
37
|
+
|
38
|
+
min_mass = opt[:min]
|
39
|
+
max_mass = opt[:max]
|
40
|
+
|
41
|
+
ARGV.each do |file|
|
42
|
+
fasta = Fasta.new(file)
|
43
|
+
uniq_aaseqs = fasta.map do |prot|
|
44
|
+
SampleEnzyme.tryptic(prot.aaseq, opt[:missed_cleavages])
|
45
|
+
end.flatten.uniq
|
46
|
+
|
47
|
+
masses = Mass::Calculator.new(Mass::MONO, opt[:h_plus]).masses(uniq_aaseqs)
|
48
|
+
passing_masses = Mass::Calculator.new(Mass::MONO, opt[:h_plus]).masses(uniq_aaseqs).select do |mh|
|
49
|
+
((mh >= min_mass) and (mh <= max_mass))
|
50
|
+
end
|
51
|
+
|
52
|
+
## warn if the masses aren't close to the end points
|
53
|
+
if (max_mass - passing_masses.max) > 1.0
|
54
|
+
warn "highest mass is not that close to max: #{passing_masses.max}"
|
55
|
+
end
|
56
|
+
if (passing_masses.min - min_mass) > 1.0
|
57
|
+
warn "lowest mass is not that close to min: #{passing_masses.min}"
|
58
|
+
end
|
59
|
+
|
60
|
+
num_bins = (max_mass - min_mass) / opt[:bin_size]
|
61
|
+
|
62
|
+
(bins, freqs) = VecD.new(passing_masses).histogram(num_bins)
|
63
|
+
|
64
|
+
# report
|
65
|
+
puts "#{file}: #{freqs.avg}"
|
66
|
+
|
67
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/ruby -s
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
$outfile = 'meta.sqm'
|
6
|
+
opts = OptionParser.new do |op|
|
7
|
+
op.banner = "usage: #{File.basename(__FILE__)} <file>.sqt ..."
|
8
|
+
op.separator "outputs meta.sqm (a sqt meta file)"
|
9
|
+
op.on("-o", "--outfile <file>", "currently: #{$outfile}") {|v| $outfile = v}
|
10
|
+
end
|
11
|
+
|
12
|
+
opts.parse!
|
13
|
+
|
14
|
+
if ARGV.size == 0
|
15
|
+
puts opts.to_s
|
16
|
+
exit
|
17
|
+
end
|
18
|
+
|
19
|
+
File.open($outfile, 'w') do |out|
|
20
|
+
ARGV.each do |file|
|
21
|
+
out.puts File.expand_path(file)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
@@ -41,7 +41,7 @@ describe 'bioworks_to_pepxml.rb' do
|
|
41
41
|
cmd = "#{@cmd} -p #{@tf_params} -o #{@out_path} #{@tf_bioworks_xml} -m #{@tf_mzxml_path} -d /work/special/path --copy_mzxml"
|
42
42
|
## FILES EXIST:
|
43
43
|
prc = proc {|file|
|
44
|
-
file.should
|
44
|
+
file.exist_as_a_file?.should be_true
|
45
45
|
beginning = IO.readlines(file)[0,50].join("\n")
|
46
46
|
$XML_SANITY_LINES.each do |line|
|
47
47
|
beginning.should include(line)
|
@@ -55,7 +55,7 @@ describe 'bioworks_to_pepxml.rb' do
|
|
55
55
|
## COPY MZXML:
|
56
56
|
%w(000 020).each do |file|
|
57
57
|
mzxml_file = File.join(@out_path, "#{file}.mzXML")
|
58
|
-
mzxml_file.should
|
58
|
+
mzxml_file.exist_as_a_file?.should be_true
|
59
59
|
end
|
60
60
|
## CLEANUP:
|
61
61
|
unless @no_delete then FileUtils.rm_rf(@out_path) end
|
@@ -68,7 +68,7 @@ describe 'bioworks_to_pepxml.rb' do
|
|
68
68
|
db_re = /C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta/
|
69
69
|
IO.read(@tf_params).should =~ db_re
|
70
70
|
prc = proc {|file|
|
71
|
-
file.should
|
71
|
+
file.exist_as_a_file?.should be_true
|
72
72
|
IO.read(file).should_not =~ db_re
|
73
73
|
}
|
74
74
|
_basic(cmd, prc)
|
@@ -200,13 +200,13 @@ EDITPEP
|
|
200
200
|
end
|
201
201
|
|
202
202
|
def fastalns(fn)
|
203
|
-
fn.should
|
203
|
+
fn.exist_as_a_file?.should be_true
|
204
204
|
IO.read(fn).split("\n")
|
205
205
|
end
|
206
206
|
|
207
207
|
# returns the fasta object proteins
|
208
208
|
def fastap(fn)
|
209
|
-
@f.should
|
209
|
+
@f.exist_as_a_file?.should be_true
|
210
210
|
Fasta.new(fn).prots
|
211
211
|
end
|
212
212
|
|
@@ -2,10 +2,10 @@
|
|
2
2
|
pephits_precision:
|
3
3
|
- validator: decoy
|
4
4
|
value: 0.992932862190813
|
5
|
-
- validator:
|
5
|
+
- validator: badAAEst
|
6
6
|
value: 0.178006237270664
|
7
|
-
- validator:
|
8
|
-
value: -0.
|
7
|
+
- validator: badAAEst
|
8
|
+
value: -0.0247654296463379
|
9
9
|
- validator: badAA
|
10
10
|
value: 0.301413862599215
|
11
11
|
- validator: bias
|
@@ -94,22 +94,19 @@ params:
|
|
94
94
|
:decoy_on_match: true
|
95
95
|
:correct_wins: true
|
96
96
|
- :calculated_background: 0.127208480565371
|
97
|
-
:type:
|
98
|
-
:class: Validator::
|
97
|
+
:type: badAAEst
|
98
|
+
:class: Validator::AAEst
|
99
99
|
:background: 0.001
|
100
100
|
:frequency: 0.0147528119278054
|
101
|
-
:false_to_total_ratio: 1.0
|
102
101
|
- :calculated_background: 0.402826855123675
|
103
|
-
:type:
|
104
|
-
:class: Validator::
|
102
|
+
:type: badAAEst
|
103
|
+
:class: Validator::AAEst
|
105
104
|
:background: 0.0
|
106
105
|
:frequency: 0.0463510332199843
|
107
|
-
:false_to_total_ratio: 1.0
|
108
106
|
- :calculated_background: 0.127208480565371
|
109
107
|
:type: badAA
|
110
108
|
:class: Validator::AA
|
111
109
|
:background: 0.001
|
112
|
-
:frequency:
|
113
110
|
:false_to_total_ratio: 0.180662732637313
|
114
111
|
- :calculated_background: 0.773851590106007
|
115
112
|
:type: bias
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
1
3
|
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
2
4
|
|
3
5
|
require 'spec_id/precision/filter'
|
@@ -80,7 +82,7 @@ describe 'filter_and_validate.rb on small bioworks file' do
|
|
80
82
|
`#{run_normal}`
|
81
83
|
end
|
82
84
|
structs = [ht_file, hs_file].map do |file|
|
83
|
-
file.should
|
85
|
+
file.exist_as_a_file?.should be_true
|
84
86
|
struct = YAML.load_file(file)
|
85
87
|
File.unlink file
|
86
88
|
struct
|
@@ -104,7 +106,7 @@ describe 'filter_and_validate.rb on small bioworks file' do
|
|
104
106
|
|
105
107
|
it 'handles multiple validators of the same kind (except, of course, decoy)' do
|
106
108
|
|
107
|
-
struct = @st_to_yaml.call( "#{@fake_bioworks_file} --proteins -1 0.0 -2 0.0 -3 0.0 -d 0.01 -p 1000000 --decoy /^DECOY_/ --digestion #{@small_fasta_file},#{@params_file} --
|
109
|
+
struct = @st_to_yaml.call( "#{@fake_bioworks_file} --proteins -1 0.0 -2 0.0 -3 0.0 -d 0.01 -p 1000000 --decoy /^DECOY_/ --digestion #{@small_fasta_file},#{@params_file} --bad_aa_est C,0.001 --bad_aa_est E --bad_aa C,0.001 --bias #{@small_bias_fasta_file},true --bias #{@small_bias_fasta_file},false --bias #{@small_bias_fasta_file},true,0.2 --fasta #{@small_fasta_file} --tmm #{@phobius_file},1,true,0.8,0.2 --tmm #{@phobius_file} --tmm #{@toppred_file},3,true,false --tmm #{@toppred_file} --tps #{@small_bias_fasta_file} -o text_table:#{@table_output_file} " )
|
108
110
|
frozen = YAML.load_file( File.dirname(__FILE__) + "/filter_and_validate__multiple_vals_helper.yaml" )
|
109
111
|
|
110
112
|
## Pephits precision:
|
@@ -121,8 +123,25 @@ describe 'filter_and_validate.rb on small bioworks file' do
|
|
121
123
|
frp = frozen['params']
|
122
124
|
stp = struct['params']
|
123
125
|
|
126
|
+
#puts "frozen validators:"
|
127
|
+
#p frp['validators']
|
128
|
+
|
129
|
+
#puts "seen validators:"
|
130
|
+
#p stp['validators']
|
131
|
+
|
124
132
|
frp['validators'].zip(stp['validators']) do |f,s|
|
125
|
-
f.
|
133
|
+
if f.is_a? Hash
|
134
|
+
f.keys.each do |k|
|
135
|
+
if k == :file or k == :transmem_file
|
136
|
+
File.basename(f[k]).should == File.basename(s[k].gsub('\\','/'))
|
137
|
+
else
|
138
|
+
s[k].should == f[k]
|
139
|
+
#f[k].should == s[k]
|
140
|
+
end
|
141
|
+
end
|
142
|
+
else
|
143
|
+
f.should == s
|
144
|
+
end
|
126
145
|
end
|
127
146
|
|
128
147
|
%w(ties prefilter top_hit_by decoy_on_match postfilter include_ties_in_top_hit_postfilter hits_together proteins include_ties_in_top_hit_prefilter).each do |k|
|
@@ -148,9 +167,9 @@ describe 'filter_and_validate.rb on small bioworks file' do
|
|
148
167
|
text_table = IO.read(@table_output_file)
|
149
168
|
|
150
169
|
# frozen
|
151
|
-
headings_re = Regexp.new( %w(num decoy
|
152
|
-
data_re = Regexp.new( %w(peps 283 0.993 0.
|
153
|
-
prot_re = Regexp.new( %w(106 0.972 0.
|
170
|
+
headings_re = Regexp.new( %w(num decoy badAAEst badAAEst badAA bias bias bias tmm tmm tmm tmm tps).join("\\s+") )
|
171
|
+
data_re = Regexp.new( %w(peps 283 0.993 0.178006 -0.024765 0.301 0.195 -4.793 0.403 0.438 -0.267 -0.156 -0.020 0.226).join("\\s+") )
|
172
|
+
prot_re = Regexp.new( %w(106 0.972 0.018868 0.0 0.038 0.019 0.0 0.094 0.123 0.0 0.0 0.0 0.028).join("\\s+") )
|
154
173
|
text_table.should =~ headings_re
|
155
174
|
text_table.should =~ data_re
|
156
175
|
text_table.should =~ prot_re
|
@@ -16,7 +16,7 @@ describe 'ms_to_lmat.rb' do
|
|
16
16
|
cmd = "#{@cmd} #{@mzxml} --ascii"
|
17
17
|
`#{cmd}`
|
18
18
|
newfile = @mzxml.sub(".mzXML", ".lmata")
|
19
|
-
newfile.should
|
19
|
+
newfile.exist_as_a_file?.should be_true
|
20
20
|
IO.read(newfile).should == IO.read(@ans_lmata)
|
21
21
|
File.unlink(newfile)
|
22
22
|
end
|
@@ -26,7 +26,7 @@ describe 'ms_to_lmat.rb' do
|
|
26
26
|
cmd = "#{@cmd} #{@mzxml}"
|
27
27
|
`#{cmd}`
|
28
28
|
newfile = @mzxml.sub(".mzXML", ".lmat")
|
29
|
-
newfile.should
|
29
|
+
newfile.exist_as_a_file?.should be_true
|
30
30
|
IO.read(newfile).should == IO.read(@ans_lmat)
|
31
31
|
File.unlink(newfile)
|
32
32
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
1
3
|
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
2
4
|
|
3
5
|
require 'spec_id/precision/prob'
|
@@ -47,19 +49,19 @@ describe 'filter_and_validate.rb on small bioworks file' do
|
|
47
49
|
it 'responds to --prob init' do
|
48
50
|
normal = @st_to_yaml.call( @args + " --prob" )
|
49
51
|
|
50
|
-
normal[:pephits_precision].first[:values].zip([1.0, 1.0, 0.
|
52
|
+
normal[:pephits_precision].first[:values].zip([1.0, 1.0, 0.993333333333333, 0.85]) do |got,exp|
|
51
53
|
got.should be_close(exp, 0.000000000001)
|
52
54
|
end
|
53
55
|
#normal_nsp = @st_to_yaml.call( @args + " --prob nsp" )
|
54
56
|
#normal.should == normal_nsp
|
55
57
|
init = @st_to_yaml.call( @args + " --prob init" )
|
56
58
|
init.should_not == normal
|
57
|
-
init[:pephits_precision].first[:values].zip([1.0, 0.
|
59
|
+
init[:pephits_precision].first[:values].zip([1.0, 0.95, 0.963333333333333, 0.8025]) do |got,exp|
|
58
60
|
got.should be_close(exp, 0.000000000001)
|
59
61
|
end
|
60
62
|
with_sort_by = @st_to_yaml.call( @args + " --prob nsp --sort_by_init" )
|
61
63
|
# frozen
|
62
|
-
with_sort_by[:pephits_precision].first[:values].zip([1.0, 0.
|
64
|
+
with_sort_by[:pephits_precision].first[:values].zip([1.0, 0.99, 0.993333333333333, 0.85]) do |got,exp|
|
63
65
|
got.should be_close(exp, 0.000000000001)
|
64
66
|
end
|
65
67
|
end
|
data/specs/sample_enzyme_spec.rb
CHANGED
@@ -33,9 +33,94 @@ describe SampleEnzyme, "digesting sequences" do
|
|
33
33
|
peps = SampleEnzyme.new('trypsin').digest(st, 2)
|
34
34
|
peps.select {|aaseq| aaseq == 'CCCCK'}.size.should == 2
|
35
35
|
end
|
36
|
-
|
37
36
|
|
38
37
|
end
|
39
38
|
|
39
|
+
describe SampleEnzyme, 'making enzyme calculations on sequences and aaseqs' do
|
40
|
+
|
41
|
+
before(:each) do
|
42
|
+
@full_KRP = SampleEnzyme.new do |se|
|
43
|
+
se.name = 'trypsin'
|
44
|
+
se.cut = 'KR'
|
45
|
+
se.no_cut = 'P'
|
46
|
+
se.sense = 'C'
|
47
|
+
end
|
48
|
+
@just_KR = SampleEnzyme.new do |se|
|
49
|
+
se.name = 'trypsin'
|
50
|
+
se.cut = 'KR'
|
51
|
+
se.no_cut = ''
|
52
|
+
se.sense = 'C'
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'calculates the number of tolerant termini' do
|
57
|
+
exp = [{
|
58
|
+
# full KR/P
|
59
|
+
'K.EPTIDR.E' => 2,
|
60
|
+
'K.PEPTIDR.E' => 1,
|
61
|
+
'F.EEPTIDR.E' => 1,
|
62
|
+
'F.PEPTIDW.R' => 0,
|
63
|
+
},
|
64
|
+
{
|
65
|
+
# just KR
|
66
|
+
'K.EPTIDR.E' => 2,
|
67
|
+
'K.PEPTIDR.E' => 2,
|
68
|
+
'F.EEPTIDR.E' => 1,
|
69
|
+
'F.PEPTIDW.R' => 0,
|
70
|
+
}
|
71
|
+
]
|
72
|
+
scall = Sequest::PepXML::SearchHit
|
73
|
+
sample_enzyme_ar = [@full_KRP, @just_KR]
|
74
|
+
sample_enzyme_ar.zip(exp) do |sample_enzyme,hash|
|
75
|
+
hash.each do |seq, val|
|
76
|
+
sample_enzyme.num_tol_term(seq).should == val
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
it 'calculates number of missed cleavages' do
|
82
|
+
exp = [{
|
83
|
+
"EPTIDR" => 0,
|
84
|
+
"PEPTIDR" => 0,
|
85
|
+
"EEPTIDR" => 0,
|
86
|
+
"PEPTIDW" => 0,
|
87
|
+
"PERPTIDW" => 0,
|
88
|
+
"PEPKPTIDW" => 0,
|
89
|
+
"PEPKTIDW" => 1,
|
90
|
+
"RTTIDR" => 1,
|
91
|
+
"RTTIKK" => 2,
|
92
|
+
"PKEPRTIDW" => 2,
|
93
|
+
"PKEPRTIDKP" => 2,
|
94
|
+
"PKEPRAALKPEERPTIDKW" => 3,
|
95
|
+
},
|
96
|
+
{
|
97
|
+
"EPTIDR" => 0,
|
98
|
+
"PEPTIDR" => 0,
|
99
|
+
"EEPTIDR" => 0,
|
100
|
+
"PEPTIDW" => 0,
|
101
|
+
"PERPTIDW" => 1,
|
102
|
+
"PEPKPTIDW" => 1,
|
103
|
+
"PEPKTIDW" => 1,
|
104
|
+
"RTTIDR" => 1,
|
105
|
+
"RTTIKK" => 2,
|
106
|
+
"PKEPRTIDW" => 2,
|
107
|
+
"PKEPRTIDKP" => 3,
|
108
|
+
"PKEPRAALKPEERPTIDKW" => 5,
|
109
|
+
}
|
110
|
+
]
|
111
|
+
|
112
|
+
sample_enzyme_ar = [@full_KRP, @just_KR]
|
113
|
+
sample_enzyme_ar.zip(exp) do |sample_enzyme, hash|
|
114
|
+
hash.each do |aaseq, val|
|
115
|
+
#first, middle, last = SpecID::Pep.split_sequence(seq)
|
116
|
+
# note that we are only using the middle section!
|
117
|
+
sample_enzyme.num_missed_cleavages(aaseq).should == val
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
123
|
+
|
124
|
+
|
40
125
|
|
41
126
|
|