mspire 0.4.9 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +27 -17
- data/changelog.txt +31 -62
- data/lib/ms/calc.rb +32 -0
- data/lib/ms/data/interleaved.rb +60 -0
- data/lib/ms/data/lazy_io.rb +73 -0
- data/lib/ms/data/lazy_string.rb +15 -0
- data/lib/ms/data/simple.rb +59 -0
- data/lib/ms/data/transposed.rb +41 -0
- data/lib/ms/data.rb +57 -0
- data/lib/ms/format/format_error.rb +12 -0
- data/lib/ms/spectrum.rb +25 -384
- data/lib/ms/support/binary_search.rb +126 -0
- data/lib/ms.rb +10 -10
- metadata +38 -350
- data/INSTALL +0 -58
- data/README.rdoc +0 -18
- data/Rakefile +0 -330
- data/bin/aafreqs.rb +0 -23
- data/bin/bioworks2excel.rb +0 -14
- data/bin/bioworks_to_pepxml.rb +0 -148
- data/bin/bioworks_to_pepxml_gui.rb +0 -225
- data/bin/fasta_shaker.rb +0 -5
- data/bin/filter_and_validate.rb +0 -5
- data/bin/gi2annot.rb +0 -14
- data/bin/id_class_anal.rb +0 -112
- data/bin/id_precision.rb +0 -172
- data/bin/ms_to_lmat.rb +0 -67
- data/bin/pepproph_filter.rb +0 -16
- data/bin/prob_validate.rb +0 -6
- data/bin/protein_summary.rb +0 -6
- data/bin/protxml2prots_peps.rb +0 -32
- data/bin/raw_to_mzXML.rb +0 -55
- data/bin/run_percolator.rb +0 -122
- data/bin/sqt_group.rb +0 -26
- data/bin/srf_group.rb +0 -27
- data/bin/srf_to_sqt.rb +0 -40
- data/lib/align/chams.rb +0 -78
- data/lib/align.rb +0 -154
- data/lib/archive/targz.rb +0 -94
- data/lib/bsearch.rb +0 -120
- data/lib/core_extensions.rb +0 -16
- data/lib/fasta.rb +0 -626
- data/lib/gi.rb +0 -124
- data/lib/group_by.rb +0 -10
- data/lib/index_by.rb +0 -11
- data/lib/merge_deep.rb +0 -21
- data/lib/ms/converter/mzxml.rb +0 -77
- data/lib/ms/gradient_program.rb +0 -170
- data/lib/ms/msrun.rb +0 -244
- data/lib/ms/msrun_index.rb +0 -108
- data/lib/ms/parser/mzdata/axml.rb +0 -67
- data/lib/ms/parser/mzdata/dom.rb +0 -175
- data/lib/ms/parser/mzdata/libxml.rb +0 -7
- data/lib/ms/parser/mzdata.rb +0 -31
- data/lib/ms/parser/mzxml/axml.rb +0 -70
- data/lib/ms/parser/mzxml/dom.rb +0 -182
- data/lib/ms/parser/mzxml/hpricot.rb +0 -253
- data/lib/ms/parser/mzxml/libxml.rb +0 -19
- data/lib/ms/parser/mzxml/regexp.rb +0 -122
- data/lib/ms/parser/mzxml/rexml.rb +0 -72
- data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
- data/lib/ms/parser/mzxml.rb +0 -282
- data/lib/ms/parser.rb +0 -108
- data/lib/ms/precursor.rb +0 -25
- data/lib/ms/scan.rb +0 -81
- data/lib/mspire.rb +0 -4
- data/lib/pi_zero.rb +0 -244
- data/lib/qvalue.rb +0 -161
- data/lib/roc.rb +0 -187
- data/lib/sample_enzyme.rb +0 -160
- data/lib/scan_i.rb +0 -21
- data/lib/spec_id/aa_freqs.rb +0 -170
- data/lib/spec_id/bioworks.rb +0 -497
- data/lib/spec_id/digestor.rb +0 -138
- data/lib/spec_id/mass.rb +0 -179
- data/lib/spec_id/parser/proph.rb +0 -335
- data/lib/spec_id/precision/filter/cmdline.rb +0 -218
- data/lib/spec_id/precision/filter/interactive.rb +0 -134
- data/lib/spec_id/precision/filter/output.rb +0 -148
- data/lib/spec_id/precision/filter.rb +0 -637
- data/lib/spec_id/precision/output.rb +0 -60
- data/lib/spec_id/precision/prob/cmdline.rb +0 -160
- data/lib/spec_id/precision/prob/output.rb +0 -94
- data/lib/spec_id/precision/prob.rb +0 -249
- data/lib/spec_id/proph/pep_summary.rb +0 -104
- data/lib/spec_id/proph/prot_summary.rb +0 -484
- data/lib/spec_id/proph.rb +0 -4
- data/lib/spec_id/protein_summary.rb +0 -489
- data/lib/spec_id/sequest/params.rb +0 -316
- data/lib/spec_id/sequest/pepxml.rb +0 -1458
- data/lib/spec_id/sequest.rb +0 -33
- data/lib/spec_id/sqt.rb +0 -349
- data/lib/spec_id/srf.rb +0 -973
- data/lib/spec_id.rb +0 -778
- data/lib/spec_id_xml.rb +0 -99
- data/lib/transmem/phobius.rb +0 -147
- data/lib/transmem/toppred.rb +0 -368
- data/lib/transmem.rb +0 -157
- data/lib/validator/aa.rb +0 -48
- data/lib/validator/aa_est.rb +0 -112
- data/lib/validator/background.rb +0 -77
- data/lib/validator/bias.rb +0 -95
- data/lib/validator/cmdline.rb +0 -431
- data/lib/validator/decoy.rb +0 -107
- data/lib/validator/digestion_based.rb +0 -70
- data/lib/validator/probability.rb +0 -51
- data/lib/validator/prot_from_pep.rb +0 -234
- data/lib/validator/q_value.rb +0 -32
- data/lib/validator/transmem.rb +0 -272
- data/lib/validator/true_pos.rb +0 -46
- data/lib/validator.rb +0 -197
- data/lib/xml.rb +0 -38
- data/lib/xml_style_parser.rb +0 -119
- data/lib/xmlparser_wrapper.rb +0 -19
- data/release_notes.txt +0 -2
- data/script/compile_and_plot_smriti_final.rb +0 -97
- data/script/create_little_pepxml.rb +0 -61
- data/script/degenerate_peptides.rb +0 -47
- data/script/estimate_fpr_by_cysteine.rb +0 -226
- data/script/extract_gradient_programs.rb +0 -56
- data/script/find_cysteine_background.rb +0 -137
- data/script/genuine_tps_and_probs.rb +0 -136
- data/script/get_apex_values_rexml.rb +0 -44
- data/script/histogram_probs.rb +0 -61
- data/script/mascot_fix_pepxml.rb +0 -123
- data/script/msvis.rb +0 -42
- data/script/mzXML2timeIndex.rb +0 -25
- data/script/peps_per_bin.rb +0 -67
- data/script/prep_dir.rb +0 -121
- data/script/simple_protein_digestion.rb +0 -27
- data/script/smriti_final_analysis.rb +0 -103
- data/script/sqt_to_meta.rb +0 -24
- data/script/top_hit_per_scan.rb +0 -67
- data/script/toppred_to_yaml.rb +0 -47
- data/script/tpp_installer.rb +0 -249
- data/specs/align_spec.rb +0 -79
- data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
- data/specs/bin/fasta_shaker_spec.rb +0 -259
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
- data/specs/bin/filter_and_validate_spec.rb +0 -180
- data/specs/bin/ms_to_lmat_spec.rb +0 -34
- data/specs/bin/prob_validate_spec.rb +0 -86
- data/specs/bin/protein_summary_spec.rb +0 -14
- data/specs/fasta_spec.rb +0 -354
- data/specs/gi_spec.rb +0 -22
- data/specs/load_bin_path.rb +0 -7
- data/specs/merge_deep_spec.rb +0 -13
- data/specs/ms/gradient_program_spec.rb +0 -77
- data/specs/ms/msrun_spec.rb +0 -498
- data/specs/ms/parser_spec.rb +0 -92
- data/specs/ms/spectrum_spec.rb +0 -87
- data/specs/pi_zero_spec.rb +0 -115
- data/specs/qvalue_spec.rb +0 -39
- data/specs/roc_spec.rb +0 -251
- data/specs/rspec_autotest.rb +0 -149
- data/specs/sample_enzyme_spec.rb +0 -126
- data/specs/spec_helper.rb +0 -135
- data/specs/spec_id/aa_freqs_spec.rb +0 -52
- data/specs/spec_id/bioworks_spec.rb +0 -148
- data/specs/spec_id/digestor_spec.rb +0 -75
- data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
- data/specs/spec_id/precision/filter/output_spec.rb +0 -31
- data/specs/spec_id/precision/filter_spec.rb +0 -246
- data/specs/spec_id/precision/prob_spec.rb +0 -44
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
- data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
- data/specs/spec_id/protein_summary_spec.rb +0 -189
- data/specs/spec_id/sequest/params_spec.rb +0 -68
- data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
- data/specs/spec_id/sequest_spec.rb +0 -38
- data/specs/spec_id/sqt_spec.rb +0 -246
- data/specs/spec_id/srf_spec.rb +0 -172
- data/specs/spec_id/srf_spec_helper.rb +0 -139
- data/specs/spec_id_helper.rb +0 -33
- data/specs/spec_id_spec.rb +0 -366
- data/specs/spec_id_xml_spec.rb +0 -33
- data/specs/transmem/phobius_spec.rb +0 -425
- data/specs/transmem/toppred_spec.rb +0 -298
- data/specs/transmem_spec.rb +0 -60
- data/specs/transmem_spec_shared.rb +0 -64
- data/specs/validator/aa_est_spec.rb +0 -66
- data/specs/validator/aa_spec.rb +0 -40
- data/specs/validator/background_spec.rb +0 -67
- data/specs/validator/bias_spec.rb +0 -122
- data/specs/validator/decoy_spec.rb +0 -51
- data/specs/validator/fasta_helper.rb +0 -26
- data/specs/validator/prot_from_pep_spec.rb +0 -141
- data/specs/validator/transmem_spec.rb +0 -146
- data/specs/validator/true_pos_spec.rb +0 -58
- data/specs/validator_helper.rb +0 -33
- data/specs/xml_spec.rb +0 -12
- data/test_files/000_pepxml18_small.xml +0 -206
- data/test_files/020a.mzXML.timeIndex +0 -4710
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
- data/test_files/4-03-03_small-prot.xml +0 -321
- data/test_files/4-03-03_small.xml +0 -3876
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +0 -5999
- data/test_files/bioworks31.params +0 -77
- data/test_files/bioworks32.params +0 -62
- data/test_files/bioworks33.params +0 -63
- data/test_files/bioworks_single_run_small.xml +0 -7237
- data/test_files/bioworks_small.fasta +0 -212
- data/test_files/bioworks_small.params +0 -63
- data/test_files/bioworks_small.phobius +0 -109
- data/test_files/bioworks_small.toppred.out +0 -2847
- data/test_files/bioworks_small.xml +0 -5610
- data/test_files/bioworks_with_INV_small.xml +0 -3753
- data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +0 -304
- data/test_files/messups.fasta +0 -297
- data/test_files/opd1/000.my_answer.100lines.xml +0 -101
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
- data/test_files/opd1/000_020_3prots-prot.xml +0 -62
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
- data/test_files/opd1/sequest.3.1.params +0 -77
- data/test_files/opd1/sequest.3.2.params +0 -62
- data/test_files/opd1/twenty_scans.mzXML +0 -418
- data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +0 -9
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
- data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
- data/test_files/pepproph_small.xml +0 -4691
- data/test_files/phobius.small.noheader.txt +0 -50
- data/test_files/phobius.small.small.txt +0 -53
- data/test_files/s01_anC1_ld020mM.key.txt +0 -25
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +0 -297
- data/test_files/small.sqt +0 -87
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +0 -14340
- data/test_files/tf_bioworks2excel.txt.actual +0 -1035
- data/test_files/toppred.small.out +0 -416
- data/test_files/toppred.xml.out +0 -318
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
- data/test_files/yeast_gly_small-prot.xml +0 -265
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
- data/test_files/yeast_gly_small.xml +0 -3807
- data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,246 +0,0 @@
|
|
1
|
-
require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
|
2
|
-
require 'spec_id/srf'
|
3
|
-
require 'spec_id/precision/filter'
|
4
|
-
|
5
|
-
require File.dirname(__FILE__) + '/../../spec_id_helper'
|
6
|
-
|
7
|
-
require 'set'
|
8
|
-
require 'set_from_hash'
|
9
|
-
|
10
|
-
describe SpecID::Precision::Filter::Peps do
|
11
|
-
it 'does basic top hit filtering with ties=true|false|:as_array' do
|
12
|
-
hashes = [
|
13
|
-
{:aaseq=> 'A', :first_scan => 1, :xcorr => 1.5, :deltacn => 0.1, :ppm => 40, :charge => 2}, # 0
|
14
|
-
{:aaseq=> 'B', :first_scan => 1, :xcorr => 1.5, :deltacn => 0.1, :ppm => 40, :charge => 2}, # 1
|
15
|
-
{:aaseq=> 'C', :first_scan => 1, :xcorr => 1.4, :deltacn => 0.1, :ppm => 40, :charge => 2}, # 2
|
16
|
-
{:aaseq=> 'D', :first_scan => 1, :xcorr => 1.4, :deltacn => 0.2, :ppm => 25, :charge => 2}, # 3
|
17
|
-
{:aaseq=> 'D', :first_scan => 2, :xcorr => 1.9, :deltacn => 0.1, :ppm => 25, :charge => 2}, # 4
|
18
|
-
]
|
19
|
-
pep_klass = SRF::OUT::Pep
|
20
|
-
@sequest_peps = hashes.map do |hash|
|
21
|
-
hash[:prots] = []
|
22
|
-
pep = pep_klass.new.set_from_hash(hash)
|
23
|
-
end
|
24
|
-
# no tie:
|
25
|
-
options = {
|
26
|
-
:per => [:first_scan, :charge],
|
27
|
-
:by => [:xcorr, {:down => [:xcorr]}],
|
28
|
-
:ties => false
|
29
|
-
}
|
30
|
-
peps = SpecID::Precision::Filter::Peps.new.top_hit(@sequest_peps, options)
|
31
|
-
peps.size.should == 2
|
32
|
-
set_of_hash_xcorrs = [0,4].map {|i| hashes[i][:xcorr] }.to_set
|
33
|
-
peps.map {|v| v.xcorr }.to_set.should == set_of_hash_xcorrs
|
34
|
-
|
35
|
-
# with tie == true:
|
36
|
-
options[:ties] = true
|
37
|
-
peps = SpecID::Precision::Filter::Peps.new.top_hit(@sequest_peps, options)
|
38
|
-
peps.size.should == 3
|
39
|
-
set_of_hash_xcorrs = [0,1,4].map {|i| hashes[i][:xcorr] }.to_set
|
40
|
-
peps.map{|v| v.xcorr}.to_set.should == set_of_hash_xcorrs
|
41
|
-
|
42
|
-
# with tie == :as_array
|
43
|
-
options[:ties] = :as_array
|
44
|
-
peps = SpecID::Precision::Filter::Peps.new.top_hit(@sequest_peps, options)
|
45
|
-
peps.size.should == 2
|
46
|
-
peps.any? {|v| v.class == Array }.should be_true
|
47
|
-
peps.select {|v| v.is_a? pep_klass }.first.should equal(@sequest_peps[4])
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
|
52
|
-
describe 'filtering on a small bioworks file' do
|
53
|
-
before(:each) do
|
54
|
-
@file = Tfiles + '/bioworks_small.xml'
|
55
|
-
@spec_id = SpecID.new(@file)
|
56
|
-
end
|
57
|
-
|
58
|
-
it 'filters with basic sequest filters' do
|
59
|
-
opts = {:sequest => {:xcorr1 => 1.0, :xcorr2 => 1.0, :xcorr3 => 1.0, :deltacn => 0.1, :ppm => 1000.0, :include_deltacnstar => false} }
|
60
|
-
ans = SpecID::Precision::Filter.new.filter_and_validate(@spec_id, opts)
|
61
|
-
|
62
|
-
|
63
|
-
ans[:params][:sequest].should == opts[:sequest]
|
64
|
-
# FROZEN:
|
65
|
-
ans[:pephits].size.should == 4
|
66
|
-
|
67
|
-
ans[:pephits].each do |pephit|
|
68
|
-
pephit.pass_filters?(opts[:sequest]).should be_true
|
69
|
-
pephit.fail_filters?(opts[:sequest]).should be_false
|
70
|
-
end
|
71
|
-
before = @spec_id.peps.size
|
72
|
-
ans[:pephits].each do |pephit|
|
73
|
-
@spec_id.peps.delete(pephit)
|
74
|
-
end
|
75
|
-
@spec_id.peps.size.should == before - 4
|
76
|
-
@spec_id.peps.each do |not_passing_pep|
|
77
|
-
not_passing_pep.pass_filters?(opts[:sequest]).should_not be_true
|
78
|
-
end
|
79
|
-
|
80
|
-
ans[:pephits].map {|v| v.aaseq }.to_set.size == 4
|
81
|
-
end
|
82
|
-
|
83
|
-
it 'can exclude deltacnstar' do
|
84
|
-
opts = {:sequest => {:xcorr1 => 1.0, :xcorr2 => 1.0, :xcorr3 => 1.0, :deltacn => 0.1, :ppm => 1000.0, :include_deltacnstar => false} }
|
85
|
-
# make two hits have the deltacnstar deltacn of 1.1
|
86
|
-
sorted = @spec_id.peps.sort_by {|pep| [pep.xcorr, pep.deltacn, 1.0/pep.ppm, pep.first_scan, pep.aaseq] }
|
87
|
-
# for two of these indices:
|
88
|
-
[286, 287].each do |index|
|
89
|
-
sorted[index].deltacn = 1.1
|
90
|
-
sorted[index].deltacn.should == 1.1
|
91
|
-
end
|
92
|
-
ans = SpecID::Precision::Filter.new.filter_and_validate(@spec_id, opts)
|
93
|
-
|
94
|
-
ans[:params][:sequest].should == opts[:sequest]
|
95
|
-
# FROZEN:
|
96
|
-
ans[:pephits].size.should == 2
|
97
|
-
end
|
98
|
-
|
99
|
-
end
|
100
|
-
|
101
|
-
describe 'filtering on small bioworks file with inverse prots' do
|
102
|
-
before(:each) do
|
103
|
-
@regexp = /^INV_/o
|
104
|
-
@file = Tfiles + '/bioworks_with_INV_small.xml'
|
105
|
-
@spec_id = SpecID.new(@file)
|
106
|
-
vals = [Validator::Decoy.new(:constraint => @regexp)]
|
107
|
-
@opts = {:sequest => {:xcorr1 => 1.0, :xcorr2 => 1.0, :xcorr3 => 1.0, :deltacn => 0.1, :ppm => 1000.0, :include_deltacnstar=> false}, :validators => vals}
|
108
|
-
end
|
109
|
-
|
110
|
-
it 'gets decoy precision' do
|
111
|
-
ans = SpecID::Precision::Filter.new.filter_and_validate(@spec_id, @opts)
|
112
|
-
peps = ans[:pephits]
|
113
|
-
vals = ans[:pephits_precision]
|
114
|
-
# FROZEN:
|
115
|
-
peps.size.should == 150
|
116
|
-
peps.hash_by(:aaseq).size.should == 74
|
117
|
-
vals.first.should == 149.0/150
|
118
|
-
end
|
119
|
-
|
120
|
-
it 'gets cys precision with freq' do
|
121
|
-
# this does a minimal test to see if this functions properly
|
122
|
-
# (not for accuracy, which is done in validator_spec)
|
123
|
-
## WITH FASTA FILE:
|
124
|
-
val1 = Validator::AAEst.new('C').set_frequency(Fasta.new(Tfiles + '/small.fasta').prots)
|
125
|
-
@opts[:validators] << val1 # obviously this guy is not his
|
126
|
-
ans1 = SpecID::Precision::Filter.new.filter_and_validate(@spec_id, @opts)
|
127
|
-
peps = ans1[:pephits]
|
128
|
-
vals1 = ans1[:pephits_precision]
|
129
|
-
# FROZEN:
|
130
|
-
vals1.last.should be_close(0.84432189117806, 0.0000000001)
|
131
|
-
|
132
|
-
## WITH A CYSTEINE BACKGROUND:
|
133
|
-
background_cys = 0.0172
|
134
|
-
val3 = Validator::AAEst.new('C', :background => background_cys).set_frequency(Fasta.new(Tfiles + '/small.fasta').prots)
|
135
|
-
@opts[:validators][1] = val3
|
136
|
-
ans3 = SpecID::Precision::Filter.new.filter_and_validate(@spec_id, @opts)
|
137
|
-
peps = ans3[:pephits]
|
138
|
-
vals3 = ans3[:pephits_precision]
|
139
|
-
# FROZEN:
|
140
|
-
vals3.last.should be_close(0.944734271368211, 0.00000000001)
|
141
|
-
end
|
142
|
-
end
|
143
|
-
|
144
|
-
describe 'filtering on a real srf file' do
|
145
|
-
|
146
|
-
spec_large do
|
147
|
-
it 'does tmm with a toppred file on srf' do
|
148
|
-
opts = {:sequest => {:xcorr1 => 1.0, :xcorr2 => 1.0, :xcorr3 => 1.0, :deltacn => 0.1, :ppm => 1000.0, :include_deltacnstar => false}}
|
149
|
-
dir = Tfiles_l + '/opd1_2runs_2mods/sequest33'
|
150
|
-
tmm_file = dir + '/ecoli_K12_ncbi_20060321.toppred.xml'
|
151
|
-
fasta_file = dir + '/ecoli_K12_ncbi_20060321.fasta'
|
152
|
-
sequest_file = dir + '/ecoli.params'
|
153
|
-
srf_file = dir + '/020.srf'
|
154
|
-
spec_id = SpecID.new(srf_file)
|
155
|
-
# :tmm -> [transmembrane file,min_tm_seqs=1,expect_soluble=true,correct_wins=true,no_include_tm_peps=0.8, bkg=0] # a toppred.out file
|
156
|
-
|
157
|
-
regexp = /FAKINGIT_OUT/
|
158
|
-
opts[:decoy] = regexp
|
159
|
-
decoy_val = Validator::Decoy.new(:constraint => regexp) # this is not real, just to test
|
160
|
-
cys_val = Validator::AAEst.new('C').set_frequency(Fasta.new(fasta_file).prots)
|
161
|
-
tmm_val = Validator::Transmem::Protein.new(tmm_file, :min_num_tms => 1, :soluble_fraction => true, :correct_wins => true, :no_include_tm_peps => false, :background => 0.0).set_false_to_total_ratio( Digestor.digest( Fasta.new(fasta_file), Sequest::Params.new(sequest_file) ) )
|
162
|
-
opts[:validators] = [decoy_val, cys_val, tmm_val]
|
163
|
-
ans = SpecID::Precision::Filter.new.filter_and_validate(spec_id, opts)
|
164
|
-
peps = ans[:pephits]
|
165
|
-
vals = ans[:pephits_precision]
|
166
|
-
|
167
|
-
# frozen:
|
168
|
-
vals[0].should == 1.0
|
169
|
-
vals[1].should be_close(0.366612274427855, 0.00000001)
|
170
|
-
#vals[2].should be_close(0.396396396396396, 0.00000001)
|
171
|
-
# if the srf file is not 'filtered' by proper sequest vals, should give
|
172
|
-
# this:
|
173
|
-
#vals[2].should be_close(-0.204031426241371, 0.00000001)
|
174
|
-
vals[2].should be_close(-0.199538771665843, 0.00000001)
|
175
|
-
peps.size.should == 444
|
176
|
-
end
|
177
|
-
end
|
178
|
-
|
179
|
-
# This is what I was doing before. I think I may have been forgetting to
|
180
|
-
# remove the INV_ peptide from these counts!
|
181
|
-
# or more likely, the peptide hits were pep+prot hits!
|
182
|
-
# SpecID::Filterer.run_from_argv([@small_inv].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000 -f INV_))) )
|
183
|
-
### FROZEN:
|
184
|
-
#assert_match(/pep_hits\s+151/, output)
|
185
|
-
#assert_match(/uniq_aa_hits\s+75/, output)
|
186
|
-
#assert_match(/prot_hits\s+13/, output)
|
187
|
-
|
188
|
-
end
|
189
|
-
|
190
|
-
describe SpecID::Precision::Filter::Peps do
|
191
|
-
|
192
|
-
before(:all) do
|
193
|
-
hashes = [
|
194
|
-
{:xcorr => 1.2, :deltacn => 0.1, :ppm => 40, :charge => 2},
|
195
|
-
{:xcorr => 1.3, :deltacn => 0.1, :ppm => 50, :charge => 3},
|
196
|
-
{:xcorr => 1.4, :deltacn => 0.1, :ppm => 50, :charge => 1},
|
197
|
-
{:xcorr => 1.5, :deltacn => 1.1, :ppm => 20, :charge => 2},
|
198
|
-
{:xcorr => 1.3, :deltacn => 0.1, :ppm => 20, :charge => 2},
|
199
|
-
{:xcorr => 1.3, :deltacn => 0.1, :ppm => 40, :charge => 2},
|
200
|
-
]
|
201
|
-
@sequest_peps = hashes.map do |hash|
|
202
|
-
pep = SRF::OUT::Pep.new.set_from_hash(hash)
|
203
|
-
end
|
204
|
-
#sp = GenericSpecID.new.set_from_hash({:peps => peps})
|
205
|
-
|
206
|
-
end
|
207
|
-
|
208
|
-
it 'filters sequest peptides' do
|
209
|
-
args_and_expected = {
|
210
|
-
#deltacnstar false
|
211
|
-
[1.2, 1.2, 1.2, 0.1, 50, false] => 5, # "all passing"
|
212
|
-
[1.6, 1.6, 1.6, 0.1, 50, false] => 0, # "xcorrs too high"
|
213
|
-
[1.6, 1.0, 1.0, 0.1, 50, false] => 4, # "one xcorr too high"
|
214
|
-
[1.0, 1.6, 1.0, 0.1, 50, false] => 2, # "one xcorr too high"
|
215
|
-
[1.0, 1.0, 1.6, 0.1, 50, false] => 4, # "one xcorr too high"
|
216
|
-
[1.2, 1.2, 1.2, 0.2, 50, false] => 0, # "high deltacn"
|
217
|
-
|
218
|
-
## includedeltcnstars :
|
219
|
-
[1.2, 1.2, 1.2, 0.1, 50, true] => 6, # "all passing"
|
220
|
-
[1.2, 1.2, 1.2, 0.2, 50, true] => 1, # "high deltacn"
|
221
|
-
[1.0, 1.0, 1.6, 0.1, 50, true] => 5, # "one xcorr too high"
|
222
|
-
##
|
223
|
-
[1.0, 1.0, 1.0, 0.05, 60, true] => 6, ## testing ppm filtering:
|
224
|
-
[1.0, 1.0, 1.0, 0.05, 10, true] => 0,
|
225
|
-
}
|
226
|
-
args_and_expected.each do |args,exp|
|
227
|
-
filt = SpecID::Precision::Filter::Peps.new(:standard_sequest_filter, *args)
|
228
|
-
filt.filter(@sequest_peps).size.should == exp
|
229
|
-
end
|
230
|
-
end
|
231
|
-
|
232
|
-
it 'can change the pep array permanently' do
|
233
|
-
args_and_expected = {[1.2, 1.2, 1.2, 0.2, 50, true] => 1} # "high deltacn"
|
234
|
-
array_to_change = @sequest_peps.dup
|
235
|
-
array_to_change.size.should == @sequest_peps.size
|
236
|
-
args_and_expected.each do |args,exp|
|
237
|
-
filt = SpecID::Precision::Filter::Peps.new(:standard_sequest_filter, *args)
|
238
|
-
filt.filter!(array_to_change)
|
239
|
-
end
|
240
|
-
array_to_change.size.should_not == @sequest_peps.size
|
241
|
-
end
|
242
|
-
|
243
|
-
end
|
244
|
-
|
245
|
-
|
246
|
-
|
@@ -1,44 +0,0 @@
|
|
1
|
-
require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
|
2
|
-
|
3
|
-
require 'spec_id/precision/prob'
|
4
|
-
require 'spec_id'
|
5
|
-
require 'spec_id/proph'
|
6
|
-
require 'validator'
|
7
|
-
require 'fasta'
|
8
|
-
require 'spec_id/sequest/params'
|
9
|
-
|
10
|
-
|
11
|
-
describe 'finding precision Proph::Prot::Pep objects' do
|
12
|
-
before(:each) do
|
13
|
-
@spec_id = GenericSpecID.new
|
14
|
-
# actual sort order: 3, 0, 4, 1, 2
|
15
|
-
peps = [
|
16
|
-
# 0: canonical
|
17
|
-
{:peptide_sequence => '0', :initial_probability => 0.63, :nsp_adjusted_probability => 0.62, :weight => 1.0, :is_nondegenerate_evidence => true, :n_enzymatic_termini => 2, :n_sibling_peptides => 0.0, :n_instances => 1, :is_contributing_evidence => true},
|
18
|
-
# 1: lower init prob
|
19
|
-
{:peptide_sequence => '1', :initial_probability => 0.60, :nsp_adjusted_probability => 0.62, :weight => 1.0, :is_nondegenerate_evidence => true, :n_enzymatic_termini => 2, :n_sibling_peptides => 0.0, :n_instances => 1, :is_contributing_evidence => true},
|
20
|
-
# 2: lower nsp prob
|
21
|
-
{:peptide_sequence => '2', :initial_probability => 0.63, :nsp_adjusted_probability => 0.52, :weight => 1.0, :is_nondegenerate_evidence => true, :n_enzymatic_termini => 2, :n_sibling_peptides => 0.0, :n_instances => 1, :is_contributing_evidence => true},
|
22
|
-
# extra instances! (best hit)
|
23
|
-
{:peptide_sequence => '3', :initial_probability => 0.63, :nsp_adjusted_probability => 0.62, :weight => 1.0, :is_nondegenerate_evidence => true, :n_enzymatic_termini => 2, :n_sibling_peptides => 0.0, :n_instances => 5, :is_contributing_evidence => true},
|
24
|
-
# is nondegen = false
|
25
|
-
{:peptide_sequence => '4', :initial_probability => 0.63, :nsp_adjusted_probability => 0.62, :weight => 1.0, :is_nondegenerate_evidence => false, :n_enzymatic_termini => 2, :n_sibling_peptides => 0.0, :n_instances => 1, :is_contributing_evidence => true},].map {|v| Proph::Prot::Pep.new(v) }
|
26
|
-
@spec_id.peps = peps
|
27
|
-
end
|
28
|
-
|
29
|
-
it 'runs without any validator' do
|
30
|
-
answer = SpecID::Precision::Prob.new.precision_vs_num_hits(@spec_id)
|
31
|
-
answer.keys.map {|v| v.to_s }.sort.should == ["aaseqs", "charges", "count", "params", "pephits", "pephits_precision", "probabilities"]
|
32
|
-
answer[:aaseqs].should == %w(3 0 4 1 2)
|
33
|
-
end
|
34
|
-
|
35
|
-
it 'returns modified peptides if any modified peptides' do
|
36
|
-
@spec_id.peps[1].mod_info = Sequest::PepXML::SearchHit::ModificationInfo.new(['MODIFIED', []])
|
37
|
-
answer = SpecID::Precision::Prob.new.precision_vs_num_hits(@spec_id)
|
38
|
-
answer.keys.map {|v| v.to_s }.sort.should == ["aaseqs", "charges", "count", "modified_peptides", "params", "pephits", "pephits_precision", "probabilities"]
|
39
|
-
end
|
40
|
-
|
41
|
-
end
|
42
|
-
|
43
|
-
|
44
|
-
|
File without changes
|
@@ -1,98 +0,0 @@
|
|
1
|
-
require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
|
2
|
-
|
3
|
-
require 'spec_id/proph/pep_summary'
|
4
|
-
|
5
|
-
ToCheck = {
|
6
|
-
:spectrum_query => {:first => {:spectrum => "020.42.42.3", :start_scan=>42, :end_scan=>42, :precursor_neutral_mass=>1015.77285654469, :assumed_charge=>3, :index=>1 },
|
7
|
-
:last => {:spectrum=>"020.344.344.3", :start_scan=>344, :end_scan=>344, :precursor_neutral_mass=>1447.6040333025, :assumed_charge=>3, :index=>18 },
|
8
|
-
},
|
9
|
-
|
10
|
-
:search_hit => {:first => {:hit_rank=>1, :peptide=>"GTGVSVTR", :peptide_prev_aa=>"R", :peptide_next_aa=>"S", :protein=>"gi|49176370|ref|YP_026228.1|", :num_tot_proteins=>1, :num_matched_ions=>10, :tot_num_ions=>70, :calc_neutral_pep_mass=>1015.79382542, :massdiff=>-0.0209688753124055, :num_tol_term=>2, :num_missed_cleavages=>0, :is_rejected=>0, :xcorr=>1.06543827056885, :deltacn => 0.192325830459595, :deltacnstar=>0, :spscore=>77.8397979736328, :sprank=>3, :probability=>0.07881571, :fval=>0.1592, :ntt=>2, :nmc=> 0, :massd=>-0.021},
|
11
|
-
:last => { :hit_rank=>1, :peptide=>"VAALRVPGGASLTR", :peptide_prev_aa=>"R", :peptide_next_aa=>"K", :protein=>"gi|16129819|ref|NP_416380.1|", :num_tot_proteins=>1, :num_matched_ions=>16, :tot_num_ions=>78, :calc_neutral_pep_mass=>1447.58289842, :massdiff=> 0.0211348825000641, :num_tol_term=>2, :num_missed_cleavages=>1, :is_rejected=>0, :xcorr=>1.3090912103653, :deltacn => 0.259967535734177, :deltacnstar => 0, :spscore => 118.513412475586, :sprank => 4, :probability=>0.27738378, :fval=>1.3810, :ntt=>2, :nmc=>1, :massd=>0.021 },
|
12
|
-
}
|
13
|
-
}
|
14
|
-
|
15
|
-
|
16
|
-
describe Proph::PepSummary, "reading a small .xml file" do
|
17
|
-
before(:each) do
|
18
|
-
file = Tfiles + '/opd1_2runs_2mods/interact-opd1_mods__small.xml'
|
19
|
-
@obj = Proph::PepSummary.new(file)
|
20
|
-
end
|
21
|
-
|
22
|
-
it 'should raise an error if not a peptide prophet file' do
|
23
|
-
lambda { Proph::PepSummary.new(Tfiles + '/opd1/000.tpp_2.9.2.first10.xml')}.should raise_error(ArgumentError)
|
24
|
-
end
|
25
|
-
|
26
|
-
it 'has msms_run_summary objects with spectrum_queries' do
|
27
|
-
@obj.msms_run_summaries.size.should == 1
|
28
|
-
sqs = @obj.msms_run_summaries.first.spectrum_queries
|
29
|
-
sqs.size.should == 18
|
30
|
-
|
31
|
-
[:first, :last].each do |mth|
|
32
|
-
ToCheck[:spectrum_query][mth].each do |k,v|
|
33
|
-
if v.is_a? Float
|
34
|
-
sqs.send(mth).send(k).should be_close(v, 0.0000000001)
|
35
|
-
else
|
36
|
-
sqs.send(mth).send(k).should == v
|
37
|
-
end
|
38
|
-
end
|
39
|
-
ToCheck[:search_hit][mth].each do |k,v|
|
40
|
-
if v.is_a? Float
|
41
|
-
sqs.send(mth).search_results.first.search_hits.first.send(k).should be_close(v, 0.0000000001)
|
42
|
-
else
|
43
|
-
sqs.send(mth).search_results.first.search_hits.first.send(k).should == v
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
it 'has pephits (which are descended from SearchHit)' do
|
50
|
-
@obj.peps.size.should == 18
|
51
|
-
[:hit_rank, :probability, :fval, :ntt, :nmc, :massd].each do |guy|
|
52
|
-
@obj.peps.first.should respond_to(guy)
|
53
|
-
end
|
54
|
-
|
55
|
-
[:first, :last].each do |mth|
|
56
|
-
ToCheck[:search_hit][mth].each do |k,v|
|
57
|
-
if v.is_a? Float
|
58
|
-
@obj.peps.send(mth).send(k).should be_close(v, 0.0000000001)
|
59
|
-
else
|
60
|
-
@obj.peps.send(mth).send(k).should == v
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
end
|
66
|
-
|
67
|
-
end
|
68
|
-
|
69
|
-
describe Proph::PepSummary, 'reading a large .xml file' do
|
70
|
-
spec_large do
|
71
|
-
before(:all) do
|
72
|
-
file = Tfiles_l + '/opd1_2runs_2mods/prophet/interact-opd1_mods.xml'
|
73
|
-
@obj = Proph::PepSummary.new(file)
|
74
|
-
end
|
75
|
-
|
76
|
-
it 'has peps of class Proph::PepSummary::Pep' do
|
77
|
-
@obj.peps.first.class.to_s.should == 'Proph::PepSummary::Pep'
|
78
|
-
@obj.peps.size.should == 1643
|
79
|
-
end
|
80
|
-
|
81
|
-
it 'contains peps that respond_to :aaseq' do
|
82
|
-
@obj.peps.first.should respond_to(:aaseq)
|
83
|
-
end
|
84
|
-
|
85
|
-
it 'has prots (also callable from peps)' do
|
86
|
-
(@obj.prots.size > 0).should be_true
|
87
|
-
@obj.peps.all? {|v| v.prots.size > 0 }.should be_true
|
88
|
-
peps_with_prots = @obj.peps.select {|v| v.prots.size > 1 }
|
89
|
-
# frozen:
|
90
|
-
peps_with_prots.first.prots.size.should == 3
|
91
|
-
peps_with_prots.first.prots.first.name.should == "gi|16128676|ref|NP_415229.1|"
|
92
|
-
peps_with_prots.first.prots.first.protein_descr.should == "RhsC protein in RhsC element [Escherichia coli K12]"
|
93
|
-
peps_with_prots.first.prots.first.reference.should == "gi|16128676|ref|NP_415229.1| RhsC protein in RhsC element [Escherichia coli K12]"
|
94
|
-
peps_with_prots.first.prots.last.protein_descr.should == "RhsA protein in RhsA element [Escherichia coli K12]"
|
95
|
-
end
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
@@ -1,128 +0,0 @@
|
|
1
|
-
require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
|
2
|
-
|
3
|
-
require 'spec_id/proph/prot_summary'
|
4
|
-
|
5
|
-
describe Proph::ProtSummary, "reading a -prot.xml file" do
|
6
|
-
before(:each) do
|
7
|
-
file = Tfiles + '/opd1/000_020_3prots-prot.xml'
|
8
|
-
@obj = Proph::ProtSummary.new(file)
|
9
|
-
end
|
10
|
-
|
11
|
-
it 'extracts protein groups with probabilities' do
|
12
|
-
@obj.prot_groups.size.should == 3
|
13
|
-
@obj.prot_groups.first.probability.should == 1.0
|
14
|
-
@obj.prot_groups[2].probability == 0.98
|
15
|
-
end
|
16
|
-
|
17
|
-
it 'extracts protein hit attributes' do
|
18
|
-
prot = @obj.prot_groups[1].prots.first
|
19
|
-
%w(protein_name n_indistinguishable_proteins probability percent_coverage unique_stripped_peptides group_sibling_id total_number_peptides pct_spectrum_ids).zip(["gi|16132019|ref|NP_418618.1|", 1, 1.0, 13.0, "FRDGLK+AIQFAQDVGIRVIQLAGYDVYYQEANNETRR".split('+'), "a", 2, 0.41]) do |name, val|
|
20
|
-
prot.send(name).should == val
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
it 'can detect -prot.xml version' do
|
25
|
-
answer = ['1.9', '4']
|
26
|
-
files = ['/yeast_gly_small-prot.xml', '/interact-opd1_mods_small-prot.xml'].map {|v| Tfiles + v}
|
27
|
-
files.zip(answer) do |file,answ|
|
28
|
-
Proph::ProtSummary.new.get_version(file).should == answ
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
it 'has prots, peps, and prot_groups ' do
|
33
|
-
@obj.peps.should_not be_nil
|
34
|
-
@obj.prots.should_not be_nil
|
35
|
-
@obj.prot_groups.should_not be_nil
|
36
|
-
end
|
37
|
-
|
38
|
-
end
|
39
|
-
|
40
|
-
####################################################
|
41
|
-
# OTHER TESTS NOT IMPLEMENTED (do we need these??)
|
42
|
-
####################################################
|
43
|
-
|
44
|
-
=begin
|
45
|
-
|
46
|
-
require 'test/unit'
|
47
|
-
require 'spec_id'
|
48
|
-
require 'ms/scan'
|
49
|
-
|
50
|
-
class ProphTest < Test::Unit::TestCase
|
51
|
-
|
52
|
-
def initialize(arg)
|
53
|
-
super(arg)
|
54
|
-
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
55
|
-
@pepproph_xml = @tfiles + 'pepproph_small.xml'
|
56
|
-
end
|
57
|
-
|
58
|
-
def Xtest_filter_by_min_pep_prob
|
59
|
-
obj = Proph::Pep::Parser.new
|
60
|
-
new_file = "tfiles/tmp.xml"
|
61
|
-
assert_match(/peptideprophet_result probability="0.[0-5]/, IO.read(@pepproph_xml))
|
62
|
-
obj.filter_by_min_pep_prob(@pepproph_xml, new_file, 0.50)
|
63
|
-
assert_no_match(/peptideprophet_result probability="0.[0-5]/, IO.read(new_file))
|
64
|
-
assert_match(/<peptideprophet_result[^>]*probability="0.[6-9][^>]*>/, IO.read(new_file))
|
65
|
-
File.unlink new_file
|
66
|
-
end
|
67
|
-
|
68
|
-
def Xtest_uniq_by_seqcharge
|
69
|
-
cls = Proph::Pep
|
70
|
-
p1 = cls.new({ :charge => '2', :sequence => 'PEPTIDE' })
|
71
|
-
p2 = cls.new({ :charge => '3', :sequence => 'PEPTIDE' })
|
72
|
-
p3 = cls.new({ :charge => '2', :sequence => 'PEPTIDE' })
|
73
|
-
p4 = cls.new({ :charge => '2', :sequence => 'APEPTIDE' })
|
74
|
-
p5 = cls.new({ :charge => '2', :sequence => 'APEPTIDE' })
|
75
|
-
un_peps = cls.uniq_by_seqcharge([p1,p2,p3,p4,p5])
|
76
|
-
## WHY ISn't that working? below!
|
77
|
-
##assert_equal([p1,p2,p4].to_set, un_peps.to_set)
|
78
|
-
assert(equal_sets([p1,p2,p4], un_peps))
|
79
|
-
end
|
80
|
-
|
81
|
-
def Xequal_sets(arr1, arr2)
|
82
|
-
c1 = arr1.dup
|
83
|
-
c2 = arr2.dup
|
84
|
-
arr1.each do |c|
|
85
|
-
arr2.each do |d|
|
86
|
-
if c == d
|
87
|
-
c1.delete c
|
88
|
-
c2.delete d
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
92
|
-
if (c1.size == c2.size) && (c1.size == 0)
|
93
|
-
true
|
94
|
-
else
|
95
|
-
false
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
99
|
-
def Xtest_arithmetic_avg_scan_by_parent_time
|
100
|
-
i1 = 100015.0
|
101
|
-
i2 = 30000.0
|
102
|
-
i3 = 100.0
|
103
|
-
t1 = 0.13
|
104
|
-
t2 = 0.23
|
105
|
-
t3 = 0.33
|
106
|
-
p1 = MS::Scan.new(1,1, t1)
|
107
|
-
p2 = MS::Scan.new(2,1, t2)
|
108
|
-
p3 = MS::Scan.new(3,1, t3)
|
109
|
-
s1 = MS::Scan.new(1,2,0.10, 300.2, i1, p1)
|
110
|
-
s2 = MS::Scan.new(2,2,0.20, 301.1, i2, p2)
|
111
|
-
s3 = MS::Scan.new(3,2,0.30, 302.0, i3, p3)
|
112
|
-
scan = Proph::Pep.new({:scans => [s1,s2,s3]}).arithmetic_avg_scan_by_parent_time
|
113
|
-
tot_inten = i1 + i2 + i3
|
114
|
-
tm = ( t1 * (i1/tot_inten) + t2 * (i2/tot_inten) + t3 * (i3/tot_inten) )
|
115
|
-
{:ms_level => 2, :prec_inten => 130115.0/3, :num => nil, :prec_mz => 301.1.to_f, :time => tm }.each do |k,v|
|
116
|
-
if k == :prec_mz # not sure why this is bugging out, but..
|
117
|
-
assert_equal(v.to_s, scan.send(k).to_s)
|
118
|
-
else
|
119
|
-
assert_equal(v, scan.send(k))
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
end
|
124
|
-
|
125
|
-
|
126
|
-
end
|
127
|
-
|
128
|
-
=end
|