mspire 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/INSTALL +1 -0
- data/README +25 -0
- data/Rakefile +129 -40
- data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
- data/bin/bioworks_to_pepxml.rb +1 -0
- data/bin/fasta_shaker.rb +1 -96
- data/bin/filter_and_validate.rb +5 -0
- data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
- data/bin/prob_validate.rb +6 -0
- data/bin/raw_to_mzXML.rb +2 -2
- data/bin/srf_group.rb +1 -0
- data/bin/srf_to_sqt.rb +40 -0
- data/changelog.txt +68 -0
- data/lib/align/chams.rb +6 -6
- data/lib/align.rb +4 -3
- data/lib/bsearch.rb +120 -0
- data/lib/fasta.rb +318 -86
- data/lib/group_by.rb +10 -0
- data/lib/index_by.rb +11 -0
- data/lib/merge_deep.rb +21 -0
- data/lib/{spec → ms/converter}/mzxml.rb +77 -109
- data/lib/ms/gradient_program.rb +171 -0
- data/lib/ms/msrun.rb +209 -0
- data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
- data/lib/ms/parser/mzdata/axml.rb +12 -0
- data/lib/ms/parser/mzdata/dom.rb +160 -0
- data/lib/ms/parser/mzdata/libxml.rb +7 -0
- data/lib/ms/parser/mzdata.rb +25 -0
- data/lib/ms/parser/mzxml/axml.rb +11 -0
- data/lib/ms/parser/mzxml/dom.rb +159 -0
- data/lib/ms/parser/mzxml/hpricot.rb +253 -0
- data/lib/ms/parser/mzxml/libxml.rb +15 -0
- data/lib/ms/parser/mzxml/regexp.rb +122 -0
- data/lib/ms/parser/mzxml/rexml.rb +72 -0
- data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
- data/lib/ms/parser/mzxml.rb +175 -0
- data/lib/ms/parser.rb +108 -0
- data/lib/ms/precursor.rb +10 -0
- data/lib/ms/scan.rb +81 -0
- data/lib/ms/spectrum.rb +193 -0
- data/lib/ms.rb +10 -0
- data/lib/mspire.rb +4 -0
- data/lib/roc.rb +61 -1
- data/lib/sample_enzyme.rb +31 -8
- data/lib/scan_i.rb +21 -0
- data/lib/spec_id/aa_freqs.rb +7 -3
- data/lib/spec_id/bioworks.rb +20 -14
- data/lib/spec_id/digestor.rb +139 -0
- data/lib/spec_id/mass.rb +116 -0
- data/lib/spec_id/parser/proph.rb +236 -0
- data/lib/spec_id/precision/filter/cmdline.rb +209 -0
- data/lib/spec_id/precision/filter/interactive.rb +134 -0
- data/lib/spec_id/precision/filter/output.rb +147 -0
- data/lib/spec_id/precision/filter.rb +623 -0
- data/lib/spec_id/precision/output.rb +60 -0
- data/lib/spec_id/precision/prob/cmdline.rb +139 -0
- data/lib/spec_id/precision/prob/output.rb +88 -0
- data/lib/spec_id/precision/prob.rb +171 -0
- data/lib/spec_id/proph/pep_summary.rb +92 -0
- data/lib/spec_id/proph/prot_summary.rb +484 -0
- data/lib/spec_id/proph.rb +2 -466
- data/lib/spec_id/protein_summary.rb +2 -2
- data/lib/spec_id/sequest/params.rb +316 -0
- data/lib/spec_id/sequest/pepxml.rb +1513 -0
- data/lib/spec_id/sequest.rb +2 -1672
- data/lib/spec_id/srf.rb +445 -177
- data/lib/spec_id.rb +183 -95
- data/lib/spec_id_xml.rb +8 -10
- data/lib/transmem/phobius.rb +147 -0
- data/lib/transmem/toppred.rb +368 -0
- data/lib/transmem.rb +157 -0
- data/lib/validator/aa.rb +135 -0
- data/lib/validator/background.rb +73 -0
- data/lib/validator/bias.rb +95 -0
- data/lib/validator/cmdline.rb +260 -0
- data/lib/validator/decoy.rb +94 -0
- data/lib/validator/digestion_based.rb +69 -0
- data/lib/validator/probability.rb +48 -0
- data/lib/validator/prot_from_pep.rb +234 -0
- data/lib/validator/transmem.rb +272 -0
- data/lib/validator/true_pos.rb +46 -0
- data/lib/validator.rb +214 -0
- data/lib/xml.rb +38 -0
- data/lib/xml_style_parser.rb +105 -0
- data/lib/xmlparser_wrapper.rb +19 -0
- data/script/compile_and_plot_smriti_final.rb +97 -0
- data/script/extract_gradient_programs.rb +56 -0
- data/script/get_apex_values_rexml.rb +44 -0
- data/script/mzXML2timeIndex.rb +1 -1
- data/script/smriti_final_analysis.rb +103 -0
- data/script/toppred_to_yaml.rb +47 -0
- data/script/tpp_installer.rb +1 -1
- data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
- data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
- data/specs/bin/fasta_shaker_spec.rb +259 -0
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
- data/specs/bin/filter_and_validate_spec.rb +124 -0
- data/specs/bin/ms_to_lmat_spec.rb +34 -0
- data/specs/bin/prob_validate_spec.rb +62 -0
- data/specs/bin/protein_summary_spec.rb +10 -0
- data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
- data/specs/gi_spec.rb +22 -0
- data/specs/load_bin_path.rb +7 -0
- data/specs/merge_deep_spec.rb +13 -0
- data/specs/ms/gradient_program_spec.rb +77 -0
- data/specs/ms/msrun_spec.rb +455 -0
- data/specs/ms/parser_spec.rb +92 -0
- data/specs/ms/spectrum_spec.rb +89 -0
- data/specs/roc_spec.rb +251 -0
- data/specs/rspec_autotest.rb +149 -0
- data/specs/sample_enzyme_spec.rb +41 -0
- data/specs/spec_helper.rb +133 -0
- data/specs/spec_id/aa_freqs_spec.rb +52 -0
- data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
- data/specs/spec_id/digestor_spec.rb +75 -0
- data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
- data/specs/spec_id/precision/filter/output_spec.rb +31 -0
- data/specs/spec_id/precision/filter_spec.rb +243 -0
- data/specs/spec_id/precision/prob_spec.rb +111 -0
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
- data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
- data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
- data/specs/spec_id/sequest/params_spec.rb +68 -0
- data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
- data/specs/spec_id/sqt_spec.rb +138 -0
- data/specs/spec_id/srf_spec.rb +209 -0
- data/specs/spec_id/srf_spec_helper.rb +302 -0
- data/specs/spec_id_helper.rb +33 -0
- data/specs/spec_id_spec.rb +361 -0
- data/specs/spec_id_xml_spec.rb +33 -0
- data/specs/transmem/phobius_spec.rb +423 -0
- data/specs/transmem/toppred_spec.rb +297 -0
- data/specs/transmem_spec.rb +60 -0
- data/specs/transmem_spec_shared.rb +64 -0
- data/specs/validator/aa_spec.rb +107 -0
- data/specs/validator/background_spec.rb +51 -0
- data/specs/validator/bias_spec.rb +146 -0
- data/specs/validator/decoy_spec.rb +51 -0
- data/specs/validator/fasta_helper.rb +26 -0
- data/specs/validator/prot_from_pep_spec.rb +141 -0
- data/specs/validator/transmem_spec.rb +145 -0
- data/specs/validator/true_pos_spec.rb +58 -0
- data/specs/validator_helper.rb +33 -0
- data/specs/xml_spec.rb +12 -0
- data/test_files/000_pepxml18_small.xml +206 -0
- data/test_files/020a.mzXML.timeIndex +4710 -0
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
- data/test_files/4-03-03_small-prot.xml +321 -0
- data/test_files/4-03-03_small.xml +3876 -0
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +5999 -0
- data/test_files/bioworks31.params +77 -0
- data/test_files/bioworks32.params +62 -0
- data/test_files/bioworks33.params +63 -0
- data/test_files/bioworks_single_run_small.xml +7237 -0
- data/test_files/bioworks_small.fasta +212 -0
- data/test_files/bioworks_small.params +63 -0
- data/test_files/bioworks_small.phobius +109 -0
- data/test_files/bioworks_small.toppred.out +2847 -0
- data/test_files/bioworks_small.xml +5610 -0
- data/test_files/bioworks_with_INV_small.xml +3753 -0
- data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +304 -0
- data/test_files/messups.fasta +297 -0
- data/test_files/opd1/000.my_answer.100lines.xml +101 -0
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
- data/test_files/opd1/000_020_3prots-prot.xml +62 -0
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
- data/test_files/opd1/sequest.3.1.params +77 -0
- data/test_files/opd1/sequest.3.2.params +62 -0
- data/test_files/opd1/twenty_scans.mzXML +418 -0
- data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +9 -0
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
- data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
- data/test_files/pepproph_small.xml +4691 -0
- data/test_files/phobius.small.noheader.txt +50 -0
- data/test_files/phobius.small.small.txt +53 -0
- data/test_files/s01_anC1_ld020mM.key.txt +25 -0
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +297 -0
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +14340 -0
- data/test_files/tf_bioworks2excel.txt.actual +1035 -0
- data/test_files/toppred.small.out +416 -0
- data/test_files/toppred.xml.out +318 -0
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
- data/test_files/yeast_gly_small-prot.xml +265 -0
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
- data/test_files/yeast_gly_small.xml +3807 -0
- data/test_files/yeast_gly_small2.parentTimes +6 -0
- metadata +273 -57
- data/bin/filter.rb +0 -6
- data/bin/precision.rb +0 -5
- data/lib/spec/mzdata/parser.rb +0 -108
- data/lib/spec/mzdata.rb +0 -48
- data/lib/spec/mzxml/parser.rb +0 -449
- data/lib/spec/scan.rb +0 -55
- data/lib/spec_id/filter.rb +0 -797
- data/lib/spec_id/precision.rb +0 -421
- data/lib/toppred.rb +0 -18
- data/script/filter-peps.rb +0 -164
- data/test/tc_aa_freqs.rb +0 -59
- data/test/tc_fasta_shaker.rb +0 -149
- data/test/tc_filter.rb +0 -203
- data/test/tc_filter_peps.rb +0 -46
- data/test/tc_gi.rb +0 -17
- data/test/tc_id_class_anal.rb +0 -70
- data/test/tc_id_precision.rb +0 -89
- data/test/tc_msrun.rb +0 -88
- data/test/tc_mzxml.rb +0 -88
- data/test/tc_mzxml_to_lmat.rb +0 -36
- data/test/tc_peptide_parent_times.rb +0 -27
- data/test/tc_precision.rb +0 -60
- data/test/tc_roc.rb +0 -166
- data/test/tc_sample_enzyme.rb +0 -32
- data/test/tc_scan.rb +0 -26
- data/test/tc_sequest.rb +0 -336
- data/test/tc_spec.rb +0 -78
- data/test/tc_spec_id.rb +0 -201
- data/test/tc_spec_id_xml.rb +0 -36
- data/test/tc_srf.rb +0 -262
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper'
|
|
2
|
)
|
|
3
|
+
require 'spec_id/aa_freqs'
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
describe SpecID::AAFreqs, "given a small fasta file" do
|
|
8
|
+
before(:all) do
|
|
9
|
+
@sf = Tfiles + "/small.fasta"
|
|
10
|
+
@fobj = Fasta.new(@sf)
|
|
11
|
+
@obj = SpecID::AAFreqs.new(@fobj)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
it 'calculates AA freqs properly' do
|
|
15
|
+
expect = {:I=>0.0628918621937819, :S=>0.0539719475147049, :D=>0.0526145691939758, :Z=>0.0, :L=>0.102772929998061, :T=>0.0491888048607071, :E=>0.0609527503070261, :O=>0.0, :C=>0.0157714433456144, :K=>0.0471850559110594, :U=>0.0, :Q=>0.0382651412319824, :W=>0.0137030573330748, :A=>0.101997285243359, :M=>0.0294745006786892, :J=>0.0, :G=>0.0811195139292871, :Y=>0.0254670027793937, :X=>0.0, :F=>0.0418201796910348, :R=>0.0546829552065154, :V=>0.0702604873634542, :H=>0.0213302307543145, :B=>0.0, :N=>0.03471010277293, :P=>0.0418201796910348}
|
|
16
|
+
aaf = @obj.aafreqs
|
|
17
|
+
expect.each do |k,v|
|
|
18
|
+
#aaf.key?(k).should be_true
|
|
19
|
+
aaf.should have_key(k)
|
|
20
|
+
aaf[k].should be_close(v, 0.00000001)
|
|
21
|
+
end
|
|
22
|
+
sum = 0.0
|
|
23
|
+
aaf.values.each do |v|
|
|
24
|
+
sum += v
|
|
25
|
+
end
|
|
26
|
+
sum.should be_close(1.0, 0.0000000000001)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
it 'gets actual and expected nums for at least 1 amino acid' do
|
|
30
|
+
peptide_aaseqs = @fobj.prots.map do |prot|
|
|
31
|
+
prot.aaseq[0..12]
|
|
32
|
+
end
|
|
33
|
+
peptide_aaseqs.size.should == 50
|
|
34
|
+
(ac,ex) = @obj.actual_and_expected_number(peptide_aaseqs, :C, 1)
|
|
35
|
+
ac.should == 9
|
|
36
|
+
ex.should be_close(9.33530631238985, 0.0000000001)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
describe SpecID::AAFreqs, "with class methods" do
|
|
41
|
+
it 'creates a probability of length lookup table' do
|
|
42
|
+
expecting = [0.0, 0.01, 0.0199, 0.029701, 0.0394039900000001]
|
|
43
|
+
SpecID::AAFreqs.probability_of_length_table(0.01, 4).zip(expecting) do |answ, exp|
|
|
44
|
+
answ.should be_close(exp, 0.0000000001)
|
|
45
|
+
end
|
|
46
|
+
expecting = [0.0, 0.2, 0.36, 0.488, 0.5904]
|
|
47
|
+
SpecID::AAFreqs.probability_of_length_table(0.2, 4).zip(expecting) do |answ, exp|
|
|
48
|
+
answ.should be_close(exp, 0.0000000001)
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
|
|
@@ -1,78 +1,51 @@
|
|
|
1
|
+
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
|
1
2
|
|
|
2
|
-
require 'test/unit'
|
|
3
3
|
require 'spec_id'
|
|
4
|
-
require '
|
|
5
|
-
|
|
6
|
-
class BioworksTest < Test::Unit::TestCase
|
|
7
|
-
|
|
8
|
-
def initialize(arg)
|
|
9
|
-
super(arg)
|
|
10
|
-
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
|
11
|
-
@tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
|
|
12
|
-
@tf_bioworks_xml = @tfiles_l + "bioworks.xml"
|
|
13
|
-
@tf_bioworks_xml_small = @tfiles + "bioworks_small.xml"
|
|
14
|
-
@tf_bioworks_xml_really_small = @tfiles + "bioworks_with_INV_small.xml"
|
|
15
|
-
@tf_params = @tfiles + "bioworks32.params"
|
|
16
|
-
@tf_bioworks_single_xml_small = @tfiles + 'bioworks_single_run_small.xml'
|
|
17
|
-
@tf_bioworks_to_excel = @tfiles + 'tf_bioworks2excel.bioXML'
|
|
18
|
-
@tf_bioworks_to_excel_actual = @tfiles + 'tf_bioworks2excel.txt.actual'
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
def test_bioworks_pep
|
|
22
|
-
hash = {:sequence => 0, :mass => 1, :deltamass => 2, :charge => 3, :xcorr => 4, :deltacn => 5, :sp => 6, :rsp => 7, :ions => 8, :count => 9, :tic => 10, :prots => 11, :base_name => 12, :first_scan => 13, :last_scan => 14, :peptide_probability => 15, :file => 16, :_num_prots => 17, :_first_prot => 18}
|
|
23
|
-
pep = Bioworks::Pep.new(hash)
|
|
24
|
-
hash.each do |k,v|
|
|
25
|
-
assert_equal(v, pep.send(k))
|
|
26
|
-
end
|
|
27
|
-
end
|
|
28
|
-
|
|
4
|
+
require 'spec_id/bioworks'
|
|
5
|
+
#require 'benchmark'
|
|
29
6
|
|
|
7
|
+
describe Bioworks, 'set from an xml file' do
|
|
30
8
|
# NEED TO DEBUG THIS PROB!
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def Xtest_xml_parsing_speed
|
|
39
|
-
if File.exist? @tfiles_l
|
|
40
|
-
#puts Benchmark.bm {|b|
|
|
41
|
-
obj = Bioworks.new(@tf_bioworks_xml)
|
|
42
|
-
#}
|
|
43
|
-
else
|
|
44
|
-
assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
|
|
45
|
-
end
|
|
9
|
+
it 'can set one with labeled proteins' do
|
|
10
|
+
file = Tfiles + "/bioworks_with_INV_small.xml"
|
|
11
|
+
obj = Bioworks.new(file)
|
|
12
|
+
obj.prots.size.should == 19
|
|
13
|
+
file = Tfiles + '/bioworks_small.xml'
|
|
14
|
+
obj = Bioworks.new(file)
|
|
15
|
+
obj.prots.size.should == 106
|
|
46
16
|
end
|
|
47
17
|
|
|
48
|
-
|
|
49
|
-
|
|
18
|
+
it 'can parse an xml file NOT derived from multi-concensus' do
|
|
19
|
+
tf_bioworks_single_xml_small = Tfiles + '/bioworks_single_run_small.xml'
|
|
20
|
+
obj = Bioworks.new(tf_bioworks_single_xml_small)
|
|
50
21
|
gfn = '5prot_mix_michrom_20fmol_200pmol'
|
|
51
22
|
origfilename = '5prot_mix_michrom_20fmol_200pmol.RAW'
|
|
52
23
|
origfilepath = 'C:\Xcalibur\sequest'
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
24
|
+
obj.global_filename.should == gfn
|
|
25
|
+
obj.origfilename.should == origfilename
|
|
26
|
+
obj.origfilepath.should == origfilepath
|
|
27
|
+
obj.prots.size.should == 7
|
|
28
|
+
obj.prots.first.peps.first.base_name.should == gfn
|
|
29
|
+
obj.prots.first.peps.first.file.should == "152"
|
|
30
|
+
obj.prots.first.peps.first.charge.should == 2
|
|
60
31
|
# @TODO: add more tests here
|
|
61
32
|
end
|
|
62
33
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
34
|
+
it 'can output in excel format (**semi-verified right now)' do
|
|
35
|
+
tf_bioworks_to_excel = Tfiles + '/tf_bioworks2excel.bioXML'
|
|
36
|
+
tf_bioworks_to_excel_actual = Tfiles + '/tf_bioworks2excel.txt.actual'
|
|
37
|
+
tmpfile = Tfiles + "/tf_bioworks_to_excel.tmp"
|
|
38
|
+
bio = Bioworks.new(tf_bioworks_to_excel)
|
|
39
|
+
bio.to_excel(tmpfile)
|
|
40
|
+
File.should exist(tmpfile)
|
|
41
|
+
exp = _arr_of_arrs(tf_bioworks_to_excel_actual)
|
|
69
42
|
act = _arr_of_arrs(tmpfile)
|
|
70
43
|
exp.each_index do |i|
|
|
71
44
|
break if i == 23 ## this is where the ordering becomes arbitrary between guys with the same scans, but different filenames
|
|
72
45
|
_assert_equal_pieces(exp[i], act[i], exp[i][0] =~ /\d/)
|
|
73
46
|
end
|
|
74
47
|
|
|
75
|
-
|
|
48
|
+
File.unlink tmpfile
|
|
76
49
|
end
|
|
77
50
|
|
|
78
51
|
# prot is boolean if this is a protein line!
|
|
@@ -80,22 +53,21 @@ class BioworksTest < Test::Unit::TestCase
|
|
|
80
53
|
# equal as floats (by delta)
|
|
81
54
|
exp.each_index do |i|
|
|
82
55
|
if i == 5 # both prots and peps
|
|
83
|
-
|
|
56
|
+
act[i].to_f.should be_close(exp[i].to_f, 0.1)
|
|
84
57
|
elsif i == 3 && !prot
|
|
85
|
-
|
|
58
|
+
act[i].to_f.should be_close(exp[i].to_f, 0.01)
|
|
86
59
|
elsif i == 6 && !prot
|
|
87
|
-
|
|
60
|
+
act[i].to_f.should be_close(exp[i].to_f, 0.01)
|
|
88
61
|
elsif i == 9 && prot
|
|
89
62
|
## NEED TO GET THESE BACK (for consistency):
|
|
90
|
-
|
|
63
|
+
#act[i].split(" ")[0].should =~ exp[i].split(" ")[0]
|
|
91
64
|
else
|
|
92
65
|
## NEED TO GET THESE BACK (for consistency):
|
|
93
|
-
|
|
66
|
+
#act[i].should == exp[i]
|
|
94
67
|
end
|
|
95
68
|
end
|
|
96
69
|
end
|
|
97
70
|
|
|
98
|
-
|
|
99
71
|
# takes a bioworks excel (in txt format) and outputs an arr of arrs
|
|
100
72
|
def _arr_of_arrs(file)
|
|
101
73
|
IO.readlines(file).collect do |line|
|
|
@@ -104,7 +76,7 @@ class BioworksTest < Test::Unit::TestCase
|
|
|
104
76
|
end
|
|
105
77
|
end
|
|
106
78
|
|
|
107
|
-
|
|
79
|
+
it 'can return unique peptides and proteins by sequence+charge (private)' do
|
|
108
80
|
cnt = 0
|
|
109
81
|
answer = [%w(2 PEPTIDE), %w(3 PEPTIDE), %w(3 PEPY), %w(2 PEPY)]
|
|
110
82
|
exp_peps = answer.collect! do |arr|
|
|
@@ -125,7 +97,7 @@ class BioworksTest < Test::Unit::TestCase
|
|
|
125
97
|
both[0].prots = [both[1]]
|
|
126
98
|
both[0]
|
|
127
99
|
end
|
|
128
|
-
|
|
100
|
+
|
|
129
101
|
peptides = [%w(2 PEPTIDE), %w(3 PEPTIDE), %w(2 PEPTIDE), %w(3 PEPY), %w(3 PEPTIDE), %w(3 PEPTIDE), %w(2 PEPY)].collect do |arr|
|
|
130
102
|
pep = Bioworks::Pep.new
|
|
131
103
|
pep.charge = arr[0]
|
|
@@ -136,27 +108,40 @@ class BioworksTest < Test::Unit::TestCase
|
|
|
136
108
|
pep
|
|
137
109
|
end
|
|
138
110
|
peptides, proteins = Bioworks.new._uniq_peps_by_sequence_charge(peptides)
|
|
139
|
-
|
|
111
|
+
proteins.size.should == peptides.size
|
|
140
112
|
exp_peps.each_with_index do |pep, i|
|
|
141
|
-
|
|
142
|
-
|
|
113
|
+
peptides[i].charge.should == pep.charge
|
|
114
|
+
peptides[i].sequence.should == pep.sequence
|
|
143
115
|
end
|
|
144
116
|
|
|
145
117
|
exp_prots.each_index do |i|
|
|
146
118
|
exp_prots[i].each_index do |j|
|
|
147
|
-
|
|
119
|
+
proteins[i][j].reference.should == exp_prots[i][j].reference
|
|
148
120
|
end
|
|
149
121
|
end
|
|
150
122
|
end
|
|
151
123
|
|
|
152
|
-
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
describe Bioworks::Pep do
|
|
127
|
+
it 'can be initialized from a hash' do
|
|
128
|
+
hash = {:sequence => 0, :mass => 1, :deltamass => 2, :charge => 3, :xcorr => 4, :deltacn => 5, :sp => 6, :rsp => 7, :ions => 8, :count => 9, :tic => 10, :prots => 11, :base_name => 12, :first_scan => 13, :last_scan => 14, :peptide_probability => 15, :file => 16, :_num_prots => 17, :_first_prot => 18}
|
|
129
|
+
pep = Bioworks::Pep.new(hash)
|
|
130
|
+
hash.each do |k,v|
|
|
131
|
+
pep.send(k).should == v
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
it 'correctly extracts file information' do
|
|
153
136
|
pep = Bioworks::Pep.new
|
|
154
137
|
testing = ['005a, 1131', '005b, 1131 - 1133', '1131', '1131 - 1133']
|
|
155
138
|
answers = [%w(005a 1131 1131), %w(005b 1131 1133), [nil, '1131', '1131'], [nil, '1131', '1133']]
|
|
156
139
|
testing.zip(answers) do |ar|
|
|
157
140
|
ans = pep.class.extract_file_info(ar[0])
|
|
158
|
-
|
|
141
|
+
ans.join(" ").should == ar[1].join(" ")
|
|
159
142
|
end
|
|
160
143
|
end
|
|
161
144
|
|
|
162
145
|
end
|
|
146
|
+
|
|
147
|
+
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
require 'set'
|
|
2
|
+
|
|
3
|
+
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
|
4
|
+
require 'spec_id/digestor'
|
|
5
|
+
require 'spec_id/sequest/params'
|
|
6
|
+
require 'fasta'
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
describe 'selecting peptides based on size' do
|
|
10
|
+
before(:each) do
|
|
11
|
+
# (M+H)+ PEPTIDE
|
|
12
|
+
# http://db.systemsbiology.net:8080/proteomicsToolkit/FragIonServlet.html
|
|
13
|
+
mono = {
|
|
14
|
+
'AACK' => 392.19681,
|
|
15
|
+
'PEPTIDE' => 800.36783,
|
|
16
|
+
'TTTYW' => 671.72767,
|
|
17
|
+
'AGGGGGGLKNADEEEP' => 1457.65088,
|
|
18
|
+
'IMNDR' => 648.31396
|
|
19
|
+
|
|
20
|
+
}
|
|
21
|
+
avg = {
|
|
22
|
+
'AACK' => 392.49375,
|
|
23
|
+
'PEPTIDE' => 800.84071,
|
|
24
|
+
'TTTYW' => 671.30411,
|
|
25
|
+
'AGGGGGGLKNADEEEP' => 1458.48147,
|
|
26
|
+
'IMNDR' => 648.75518, # 648.76, thermo
|
|
27
|
+
}
|
|
28
|
+
@pepseqs = [%w(AACK PEPTIDE TTTYW), %w(AGGGGGGLKNADEEEP IMNDR)]
|
|
29
|
+
# basically the protein sequence ONLY matters if the peptide is n or c
|
|
30
|
+
# terminal and there is an n or c terminal modification for ONLY the
|
|
31
|
+
# protein.
|
|
32
|
+
@protseqs = %w(LLLLAACKLLLLLLLPEPTIDELLLLLLTTTYWLLL LLLLAGGGGGGLKNADEEEPLLLLLLIMNDRLLL)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
it 'is sensitive to mono/avg' do
|
|
36
|
+
h_plus = false
|
|
37
|
+
|
|
38
|
+
expect = [%w(PEPTIDE TTTYW), %w(IMNDR)]
|
|
39
|
+
masses_hash = Mass::MONO
|
|
40
|
+
answ = Digestor.new.limit_sizes(@protseqs, @pepseqs, 400.0, 800.38, masses_hash, h_plus)
|
|
41
|
+
answ.to_set.should == expect.to_set
|
|
42
|
+
masses_hash = Mass::AVG
|
|
43
|
+
expect = [%w(TTTYW), %w(IMNDR)]
|
|
44
|
+
answ = Digestor.new.limit_sizes(@protseqs, @pepseqs, 400.0, 800.38, masses_hash, h_plus)
|
|
45
|
+
answ.to_set.should == expect.to_set
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
it 'is sensitive to static mass changes' do
|
|
49
|
+
expect_before = [%w(PEPTIDE TTTYW), %w(IMNDR)]
|
|
50
|
+
h_plus = false
|
|
51
|
+
masses_hash = Mass::MONO
|
|
52
|
+
answ = Digestor.new.limit_sizes(@protseqs, @pepseqs, 400.0, 800.38, Mass::MONO, h_plus)
|
|
53
|
+
answ.to_set.should == expect_before.to_set
|
|
54
|
+
|
|
55
|
+
static = {:C => 20.0}
|
|
56
|
+
expect_after = [%w(AACK PEPTIDE TTTYW), %w(IMNDR)]
|
|
57
|
+
masses_hash = Mass::MONO.dup
|
|
58
|
+
masses_hash[:C] = masses_hash[:C] + 20.0
|
|
59
|
+
answ = Digestor.new.limit_sizes(@protseqs, @pepseqs, 400.0, 800.38, masses_hash, h_plus)
|
|
60
|
+
#answ.to_set.should == expect_before.to_set
|
|
61
|
+
answ.to_set.should == expect_after.to_set
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
it 'returns peptides linked to their proteins given fasta and params' do
|
|
65
|
+
fasta_obj = Fasta.new(Tfiles + '/small.fasta')
|
|
66
|
+
params_obj = Sequest::Params.new(Tfiles + '/bioworks32.params')
|
|
67
|
+
peps = Digestor.digest(fasta_obj, params_obj)
|
|
68
|
+
peps.first.is_a?(SpecID::Pep).should be_true
|
|
69
|
+
# frozen
|
|
70
|
+
peps.size.should == 2843
|
|
71
|
+
# frozen
|
|
72
|
+
peps.select {|v| v.prots.size > 1 }.size.should == 10
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
require File.expand_path( File.dirname(__FILE__) + '/../../../spec_helper' )
|
|
2
|
+
|
|
3
|
+
require 'spec_id/precision/filter'
|
|
4
|
+
|
|
5
|
+
describe SpecID::Precision::Filter::CmdlineParser, 'getting all command line options correct' do
|
|
6
|
+
|
|
7
|
+
before(:all) do
|
|
8
|
+
@bioworks_file = Tfiles + '/bioworks_small.xml'
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
it_should 'gets all defaults correct with nothing passed in' do
|
|
12
|
+
(spec_id_obj, options, option_parser) = SpecID::Precision::Filter::CmdlineParser.new.parse([@bioworks_file])
|
|
13
|
+
p options
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
it_should 'gets all passed in params correct' do
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
end
|
|
20
|
+
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
require File.expand_path( File.dirname(__FILE__) + '/../../../spec_helper' )
|
|
2
|
+
require 'spec_id/precision/filter'
|
|
3
|
+
require 'spec_id/precision/filter/output'
|
|
4
|
+
|
|
5
|
+
describe 'transforming hash with symbols into strings' do
|
|
6
|
+
it 'works' do
|
|
7
|
+
hash = {:one=>2, :this=>{:one=>"string", 3=>{:four=>5}}}
|
|
8
|
+
new_hash = SpecID::Precision::Output.symbol_keys_to_string(hash)
|
|
9
|
+
new_hash.should == {'one'=>2, 'this'=>{'one'=>"string", 3=>{'four'=>5}}}
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
describe 'outputs' do
|
|
14
|
+
before(:each) do
|
|
15
|
+
@file = Tfiles + '/bioworks_with_INV_small.xml'
|
|
16
|
+
@opts = {}
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
it 'makes a table' do
|
|
20
|
+
my_file = Tfiles + '/filtering_tmp.tmp'
|
|
21
|
+
File.unlink my_file if File.exist? my_file
|
|
22
|
+
@opts[:output] = [[:text_table, my_file]]
|
|
23
|
+
SpecID::Precision::Filter.new.filter_and_validate(SpecID.new(@file), @opts)
|
|
24
|
+
#reply = capture_stdout {
|
|
25
|
+
# SpecID::Precision::Filter.new.filter_and_validate(SpecID.new(@file), @opts)
|
|
26
|
+
#}
|
|
27
|
+
# frozen
|
|
28
|
+
IO.read(my_file) =~ /138/
|
|
29
|
+
File.unlink my_file if File.exist? my_file
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
|
|
2
|
+
require 'spec_id/srf'
|
|
3
|
+
require 'spec_id/precision/filter'
|
|
4
|
+
|
|
5
|
+
require File.dirname(__FILE__) + '/../../spec_id_helper'
|
|
6
|
+
|
|
7
|
+
require 'set'
|
|
8
|
+
require 'set_from_hash'
|
|
9
|
+
|
|
10
|
+
describe SpecID::Precision::Filter::Peps do
|
|
11
|
+
it 'does basic top hit filtering with ties=true|false|:as_array' do
|
|
12
|
+
hashes = [
|
|
13
|
+
{:aaseq=> 'A', :first_scan => 1, :xcorr => 1.5, :deltacn => 0.1, :ppm => 40, :charge => 2}, # 0
|
|
14
|
+
{:aaseq=> 'B', :first_scan => 1, :xcorr => 1.5, :deltacn => 0.1, :ppm => 40, :charge => 2}, # 1
|
|
15
|
+
{:aaseq=> 'C', :first_scan => 1, :xcorr => 1.4, :deltacn => 0.1, :ppm => 40, :charge => 2}, # 2
|
|
16
|
+
{:aaseq=> 'D', :first_scan => 1, :xcorr => 1.4, :deltacn => 0.2, :ppm => 25, :charge => 2}, # 3
|
|
17
|
+
{:aaseq=> 'D', :first_scan => 2, :xcorr => 1.9, :deltacn => 0.1, :ppm => 25, :charge => 2}, # 4
|
|
18
|
+
]
|
|
19
|
+
pep_klass = SRF::OUT::Pep
|
|
20
|
+
@sequest_peps = hashes.map do |hash|
|
|
21
|
+
hash[:prots] = []
|
|
22
|
+
pep = pep_klass.new.set_from_hash(hash)
|
|
23
|
+
end
|
|
24
|
+
# no tie:
|
|
25
|
+
options = {
|
|
26
|
+
:per => [:first_scan, :charge],
|
|
27
|
+
:by => [:xcorr, {:down => [:xcorr]}],
|
|
28
|
+
:ties => false
|
|
29
|
+
}
|
|
30
|
+
peps = SpecID::Precision::Filter::Peps.new.top_hit(@sequest_peps, options)
|
|
31
|
+
peps.size.should == 2
|
|
32
|
+
set_of_hash_xcorrs = [0,4].map {|i| hashes[i][:xcorr] }.to_set
|
|
33
|
+
peps.map {|v| v.xcorr }.to_set.should == set_of_hash_xcorrs
|
|
34
|
+
|
|
35
|
+
# with tie == true:
|
|
36
|
+
options[:ties] = true
|
|
37
|
+
peps = SpecID::Precision::Filter::Peps.new.top_hit(@sequest_peps, options)
|
|
38
|
+
peps.size.should == 3
|
|
39
|
+
set_of_hash_xcorrs = [0,1,4].map {|i| hashes[i][:xcorr] }.to_set
|
|
40
|
+
peps.map{|v| v.xcorr}.to_set.should == set_of_hash_xcorrs
|
|
41
|
+
|
|
42
|
+
# with tie == :as_array
|
|
43
|
+
options[:ties] = :as_array
|
|
44
|
+
peps = SpecID::Precision::Filter::Peps.new.top_hit(@sequest_peps, options)
|
|
45
|
+
peps.size.should == 2
|
|
46
|
+
peps.any? {|v| v.class == Array }.should be_true
|
|
47
|
+
peps.select {|v| v.is_a? pep_klass }.first.should equal(@sequest_peps[4])
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
describe 'filtering on a small bioworks file' do
|
|
53
|
+
before(:each) do
|
|
54
|
+
@file = Tfiles + '/bioworks_small.xml'
|
|
55
|
+
@spec_id = SpecID.new(@file)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
it 'filters with basic sequest filters' do
|
|
59
|
+
opts = {:sequest => {:xcorr1 => 1.0, :xcorr2 => 1.0, :xcorr3 => 1.0, :deltacn => 0.1, :ppm => 1000.0, :include_deltacnstar => false} }
|
|
60
|
+
ans = SpecID::Precision::Filter.new.filter_and_validate(@spec_id, opts)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
ans[:params][:sequest].should == opts[:sequest]
|
|
64
|
+
# FROZEN:
|
|
65
|
+
ans[:pephits].size.should == 4
|
|
66
|
+
|
|
67
|
+
ans[:pephits].each do |pephit|
|
|
68
|
+
pephit.pass_filters?(opts[:sequest]).should be_true
|
|
69
|
+
pephit.fail_filters?(opts[:sequest]).should be_false
|
|
70
|
+
end
|
|
71
|
+
before = @spec_id.peps.size
|
|
72
|
+
ans[:pephits].each do |pephit|
|
|
73
|
+
@spec_id.peps.delete(pephit)
|
|
74
|
+
end
|
|
75
|
+
@spec_id.peps.size.should == before - 4
|
|
76
|
+
@spec_id.peps.each do |not_passing_pep|
|
|
77
|
+
not_passing_pep.pass_filters?(opts[:sequest]).should_not be_true
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
ans[:pephits].map {|v| v.aaseq }.to_set.size == 4
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
it 'can exclude deltacnstar' do
|
|
84
|
+
opts = {:sequest => {:xcorr1 => 1.0, :xcorr2 => 1.0, :xcorr3 => 1.0, :deltacn => 0.1, :ppm => 1000.0, :include_deltacnstar => false} }
|
|
85
|
+
# make two hits have the deltacnstar deltacn of 1.1
|
|
86
|
+
sorted = @spec_id.peps.sort_by {|pep| [pep.xcorr, pep.deltacn, 1.0/pep.ppm, pep.first_scan, pep.aaseq] }
|
|
87
|
+
# for two of these indices:
|
|
88
|
+
[286, 287].each do |index|
|
|
89
|
+
sorted[index].deltacn = 1.1
|
|
90
|
+
sorted[index].deltacn.should == 1.1
|
|
91
|
+
end
|
|
92
|
+
ans = SpecID::Precision::Filter.new.filter_and_validate(@spec_id, opts)
|
|
93
|
+
|
|
94
|
+
ans[:params][:sequest].should == opts[:sequest]
|
|
95
|
+
# FROZEN:
|
|
96
|
+
ans[:pephits].size.should == 2
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
describe 'filtering on small bioworks file with inverse prots' do
|
|
102
|
+
before(:each) do
|
|
103
|
+
@regexp = /^INV_/o
|
|
104
|
+
@file = Tfiles + '/bioworks_with_INV_small.xml'
|
|
105
|
+
@spec_id = SpecID.new(@file)
|
|
106
|
+
vals = [Validator::Decoy.new(@regexp)]
|
|
107
|
+
@opts = {:sequest => {:xcorr1 => 1.0, :xcorr2 => 1.0, :xcorr3 => 1.0, :deltacn => 0.1, :ppm => 1000.0, :include_deltacnstar=> false}, :validators => vals}
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
it 'gets decoy precision' do
|
|
111
|
+
ans = SpecID::Precision::Filter.new.filter_and_validate(@spec_id, @opts)
|
|
112
|
+
peps = ans[:pephits]
|
|
113
|
+
vals = ans[:pephits_precision]
|
|
114
|
+
# FROZEN:
|
|
115
|
+
peps.size.should == 150
|
|
116
|
+
peps.hash_by(:aaseq).size.should == 74
|
|
117
|
+
vals.first.should == 149.0/150
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
it 'gets cys precision with freq' do
|
|
121
|
+
# this does a minimal test to see if this functions properly
|
|
122
|
+
# (not for accuracy, which is done in validator_spec)
|
|
123
|
+
## WITH FASTA FILE:
|
|
124
|
+
val1 = Validator::AA.new('C').set_frequency(Fasta.new(Tfiles + '/small.fasta'))
|
|
125
|
+
@opts[:validators] << val1 # obviously this guy is not his
|
|
126
|
+
ans1 = SpecID::Precision::Filter.new.filter_and_validate(@spec_id, @opts)
|
|
127
|
+
peps = ans1[:pephits]
|
|
128
|
+
vals1 = ans1[:pephits_precision]
|
|
129
|
+
# FROZEN:
|
|
130
|
+
vals1.last.should be_close(0.84432189117806, 0.0000000001)
|
|
131
|
+
|
|
132
|
+
## WITH A CYSTEINE BACKGROUND:
|
|
133
|
+
background_cys = 0.0172
|
|
134
|
+
val3 = Validator::AA.new('C', :background => background_cys).set_frequency(Fasta.new(Tfiles + '/small.fasta'))
|
|
135
|
+
@opts[:validators][1] = val3
|
|
136
|
+
ans3 = SpecID::Precision::Filter.new.filter_and_validate(@spec_id, @opts)
|
|
137
|
+
peps = ans3[:pephits]
|
|
138
|
+
vals3 = ans3[:pephits_precision]
|
|
139
|
+
# FROZEN:
|
|
140
|
+
vals3.last.should be_close(0.944734271368211, 0.00000000001)
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
describe 'filtering on a real srf file' do
|
|
145
|
+
|
|
146
|
+
spec_large do
|
|
147
|
+
it 'does tmm with a toppred file on srf' do
|
|
148
|
+
opts = {:sequest => {:xcorr1 => 1.0, :xcorr2 => 1.0, :xcorr3 => 1.0, :deltacn => 0.1, :ppm => 1000.0, :include_deltacnstar => false}}
|
|
149
|
+
dir = Tfiles_l + '/opd1_2runs_2mods/sequest'
|
|
150
|
+
tmm_file = dir + '/ecoli_K12_ncbi_20060321.toppred.xml'
|
|
151
|
+
fasta_file = dir + '/ecoli_K12_ncbi_20060321.fasta'
|
|
152
|
+
sequest_file = dir + '/ecoli.params'
|
|
153
|
+
srf_file = dir + '/020.srf'
|
|
154
|
+
spec_id = SpecID.new(srf_file)
|
|
155
|
+
# :tmm -> [transmembrane file,min_tm_seqs=1,expect_soluble=true,correct_wins=true,no_include_tm_peps=0.8, bkg=0] # a toppred.out file
|
|
156
|
+
|
|
157
|
+
regexp = /FAKINGIT_OUT/
|
|
158
|
+
opts[:decoy] = regexp
|
|
159
|
+
decoy_val = Validator::Decoy.new(regexp) # this is not real, just to test
|
|
160
|
+
cys_val = Validator::AA.new('C').set_frequency(Fasta.new(fasta_file))
|
|
161
|
+
tmm_val = Validator::Transmem::Protein.new(tmm_file, :min_num_tms => 1, :soluble_fraction => true, :correct_wins => true, :no_include_tm_peps => false, :background => 0.0).set_false_to_total_ratio( Digestor.digest( Fasta.new(fasta_file), Sequest::Params.new(sequest_file) ) )
|
|
162
|
+
opts[:validators] = [decoy_val, cys_val, tmm_val]
|
|
163
|
+
ans = SpecID::Precision::Filter.new.filter_and_validate(spec_id, opts)
|
|
164
|
+
peps = ans[:pephits]
|
|
165
|
+
vals = ans[:pephits_precision]
|
|
166
|
+
|
|
167
|
+
# frozen:
|
|
168
|
+
vals[0].should == 1.0
|
|
169
|
+
vals[1].should be_close(0.366612274427855, 0.00000001)
|
|
170
|
+
#vals[2].should be_close(0.396396396396396, 0.00000001)
|
|
171
|
+
# if the srf file is not 'filtered' by proper sequest vals, should give
|
|
172
|
+
# this:
|
|
173
|
+
#vals[2].should be_close(-0.204031426241371, 0.00000001)
|
|
174
|
+
vals[2].should be_close(-0.199538771665843, 0.00000001)
|
|
175
|
+
peps.size.should == 444
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# This is what I was doing before. I think I may have been forgetting to
|
|
180
|
+
# remove the INV_ peptide from these counts!
|
|
181
|
+
# or more likely, the peptide hits were pep+prot hits!
|
|
182
|
+
# SpecID::Filterer.run_from_argv([@small_inv].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000 -f INV_))) )
|
|
183
|
+
### FROZEN:
|
|
184
|
+
#assert_match(/pep_hits\s+151/, output)
|
|
185
|
+
#assert_match(/uniq_aa_hits\s+75/, output)
|
|
186
|
+
#assert_match(/prot_hits\s+13/, output)
|
|
187
|
+
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
describe SpecID::Precision::Filter::Peps do
|
|
191
|
+
|
|
192
|
+
before(:all) do
|
|
193
|
+
hashes = [
|
|
194
|
+
{:xcorr => 1.2, :deltacn => 0.1, :ppm => 40, :charge => 2},
|
|
195
|
+
{:xcorr => 1.3, :deltacn => 0.1, :ppm => 50, :charge => 3},
|
|
196
|
+
{:xcorr => 1.4, :deltacn => 0.1, :ppm => 50, :charge => 1},
|
|
197
|
+
{:xcorr => 1.5, :deltacn => 1.1, :ppm => 20, :charge => 2},
|
|
198
|
+
{:xcorr => 1.3, :deltacn => 0.1, :ppm => 20, :charge => 2},
|
|
199
|
+
{:xcorr => 1.3, :deltacn => 0.1, :ppm => 40, :charge => 2},
|
|
200
|
+
]
|
|
201
|
+
@sequest_peps = hashes.map do |hash|
|
|
202
|
+
pep = SRF::OUT::Pep.new.set_from_hash(hash)
|
|
203
|
+
end
|
|
204
|
+
#sp = GenericSpecID.new.set_from_hash({:peps => peps})
|
|
205
|
+
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
it 'filters sequest peptides' do
|
|
209
|
+
args_and_expected = {
|
|
210
|
+
#deltacnstar false
|
|
211
|
+
[1.2, 1.2, 1.2, 0.1, 50, false] => 5, # "all passing"
|
|
212
|
+
[1.6, 1.6, 1.6, 0.1, 50, false] => 0, # "xcorrs too high"
|
|
213
|
+
[1.6, 1.0, 1.0, 0.1, 50, false] => 4, # "one xcorr too high"
|
|
214
|
+
[1.0, 1.6, 1.0, 0.1, 50, false] => 2, # "one xcorr too high"
|
|
215
|
+
[1.0, 1.0, 1.6, 0.1, 50, false] => 4, # "one xcorr too high"
|
|
216
|
+
[1.2, 1.2, 1.2, 0.2, 50, false] => 0, # "high deltacn"
|
|
217
|
+
|
|
218
|
+
## includedeltcnstars :
|
|
219
|
+
[1.2, 1.2, 1.2, 0.1, 50, true] => 6, # "all passing"
|
|
220
|
+
[1.2, 1.2, 1.2, 0.2, 50, true] => 1, # "high deltacn"
|
|
221
|
+
[1.0, 1.0, 1.6, 0.1, 50, true] => 5, # "one xcorr too high"
|
|
222
|
+
}
|
|
223
|
+
args_and_expected.each do |args,exp|
|
|
224
|
+
filt = SpecID::Precision::Filter::Peps.new(:standard_sequest_filter, *args)
|
|
225
|
+
filt.filter(@sequest_peps).size.should == exp
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
it 'can change the pep array permanently' do
|
|
230
|
+
args_and_expected = {[1.2, 1.2, 1.2, 0.2, 50, true] => 1} # "high deltacn"
|
|
231
|
+
array_to_change = @sequest_peps.dup
|
|
232
|
+
array_to_change.size.should == @sequest_peps.size
|
|
233
|
+
args_and_expected.each do |args,exp|
|
|
234
|
+
filt = SpecID::Precision::Filter::Peps.new(:standard_sequest_filter, *args)
|
|
235
|
+
filt.filter!(array_to_change)
|
|
236
|
+
end
|
|
237
|
+
array_to_change.size.should_not == @sequest_peps.size
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
|