mspire 0.2.4 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/INSTALL +1 -0
- data/README +25 -0
- data/Rakefile +129 -40
- data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
- data/bin/bioworks_to_pepxml.rb +1 -0
- data/bin/fasta_shaker.rb +1 -96
- data/bin/filter_and_validate.rb +5 -0
- data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
- data/bin/prob_validate.rb +6 -0
- data/bin/raw_to_mzXML.rb +2 -2
- data/bin/srf_group.rb +1 -0
- data/bin/srf_to_sqt.rb +40 -0
- data/changelog.txt +68 -0
- data/lib/align/chams.rb +6 -6
- data/lib/align.rb +4 -3
- data/lib/bsearch.rb +120 -0
- data/lib/fasta.rb +318 -86
- data/lib/group_by.rb +10 -0
- data/lib/index_by.rb +11 -0
- data/lib/merge_deep.rb +21 -0
- data/lib/{spec → ms/converter}/mzxml.rb +77 -109
- data/lib/ms/gradient_program.rb +171 -0
- data/lib/ms/msrun.rb +209 -0
- data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
- data/lib/ms/parser/mzdata/axml.rb +12 -0
- data/lib/ms/parser/mzdata/dom.rb +160 -0
- data/lib/ms/parser/mzdata/libxml.rb +7 -0
- data/lib/ms/parser/mzdata.rb +25 -0
- data/lib/ms/parser/mzxml/axml.rb +11 -0
- data/lib/ms/parser/mzxml/dom.rb +159 -0
- data/lib/ms/parser/mzxml/hpricot.rb +253 -0
- data/lib/ms/parser/mzxml/libxml.rb +15 -0
- data/lib/ms/parser/mzxml/regexp.rb +122 -0
- data/lib/ms/parser/mzxml/rexml.rb +72 -0
- data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
- data/lib/ms/parser/mzxml.rb +175 -0
- data/lib/ms/parser.rb +108 -0
- data/lib/ms/precursor.rb +10 -0
- data/lib/ms/scan.rb +81 -0
- data/lib/ms/spectrum.rb +193 -0
- data/lib/ms.rb +10 -0
- data/lib/mspire.rb +4 -0
- data/lib/roc.rb +61 -1
- data/lib/sample_enzyme.rb +31 -8
- data/lib/scan_i.rb +21 -0
- data/lib/spec_id/aa_freqs.rb +7 -3
- data/lib/spec_id/bioworks.rb +20 -14
- data/lib/spec_id/digestor.rb +139 -0
- data/lib/spec_id/mass.rb +116 -0
- data/lib/spec_id/parser/proph.rb +236 -0
- data/lib/spec_id/precision/filter/cmdline.rb +209 -0
- data/lib/spec_id/precision/filter/interactive.rb +134 -0
- data/lib/spec_id/precision/filter/output.rb +147 -0
- data/lib/spec_id/precision/filter.rb +623 -0
- data/lib/spec_id/precision/output.rb +60 -0
- data/lib/spec_id/precision/prob/cmdline.rb +139 -0
- data/lib/spec_id/precision/prob/output.rb +88 -0
- data/lib/spec_id/precision/prob.rb +171 -0
- data/lib/spec_id/proph/pep_summary.rb +92 -0
- data/lib/spec_id/proph/prot_summary.rb +484 -0
- data/lib/spec_id/proph.rb +2 -466
- data/lib/spec_id/protein_summary.rb +2 -2
- data/lib/spec_id/sequest/params.rb +316 -0
- data/lib/spec_id/sequest/pepxml.rb +1513 -0
- data/lib/spec_id/sequest.rb +2 -1672
- data/lib/spec_id/srf.rb +445 -177
- data/lib/spec_id.rb +183 -95
- data/lib/spec_id_xml.rb +8 -10
- data/lib/transmem/phobius.rb +147 -0
- data/lib/transmem/toppred.rb +368 -0
- data/lib/transmem.rb +157 -0
- data/lib/validator/aa.rb +135 -0
- data/lib/validator/background.rb +73 -0
- data/lib/validator/bias.rb +95 -0
- data/lib/validator/cmdline.rb +260 -0
- data/lib/validator/decoy.rb +94 -0
- data/lib/validator/digestion_based.rb +69 -0
- data/lib/validator/probability.rb +48 -0
- data/lib/validator/prot_from_pep.rb +234 -0
- data/lib/validator/transmem.rb +272 -0
- data/lib/validator/true_pos.rb +46 -0
- data/lib/validator.rb +214 -0
- data/lib/xml.rb +38 -0
- data/lib/xml_style_parser.rb +105 -0
- data/lib/xmlparser_wrapper.rb +19 -0
- data/script/compile_and_plot_smriti_final.rb +97 -0
- data/script/extract_gradient_programs.rb +56 -0
- data/script/get_apex_values_rexml.rb +44 -0
- data/script/mzXML2timeIndex.rb +1 -1
- data/script/smriti_final_analysis.rb +103 -0
- data/script/toppred_to_yaml.rb +47 -0
- data/script/tpp_installer.rb +1 -1
- data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
- data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
- data/specs/bin/fasta_shaker_spec.rb +259 -0
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
- data/specs/bin/filter_and_validate_spec.rb +124 -0
- data/specs/bin/ms_to_lmat_spec.rb +34 -0
- data/specs/bin/prob_validate_spec.rb +62 -0
- data/specs/bin/protein_summary_spec.rb +10 -0
- data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
- data/specs/gi_spec.rb +22 -0
- data/specs/load_bin_path.rb +7 -0
- data/specs/merge_deep_spec.rb +13 -0
- data/specs/ms/gradient_program_spec.rb +77 -0
- data/specs/ms/msrun_spec.rb +455 -0
- data/specs/ms/parser_spec.rb +92 -0
- data/specs/ms/spectrum_spec.rb +89 -0
- data/specs/roc_spec.rb +251 -0
- data/specs/rspec_autotest.rb +149 -0
- data/specs/sample_enzyme_spec.rb +41 -0
- data/specs/spec_helper.rb +133 -0
- data/specs/spec_id/aa_freqs_spec.rb +52 -0
- data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
- data/specs/spec_id/digestor_spec.rb +75 -0
- data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
- data/specs/spec_id/precision/filter/output_spec.rb +31 -0
- data/specs/spec_id/precision/filter_spec.rb +243 -0
- data/specs/spec_id/precision/prob_spec.rb +111 -0
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
- data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
- data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
- data/specs/spec_id/sequest/params_spec.rb +68 -0
- data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
- data/specs/spec_id/sqt_spec.rb +138 -0
- data/specs/spec_id/srf_spec.rb +209 -0
- data/specs/spec_id/srf_spec_helper.rb +302 -0
- data/specs/spec_id_helper.rb +33 -0
- data/specs/spec_id_spec.rb +361 -0
- data/specs/spec_id_xml_spec.rb +33 -0
- data/specs/transmem/phobius_spec.rb +423 -0
- data/specs/transmem/toppred_spec.rb +297 -0
- data/specs/transmem_spec.rb +60 -0
- data/specs/transmem_spec_shared.rb +64 -0
- data/specs/validator/aa_spec.rb +107 -0
- data/specs/validator/background_spec.rb +51 -0
- data/specs/validator/bias_spec.rb +146 -0
- data/specs/validator/decoy_spec.rb +51 -0
- data/specs/validator/fasta_helper.rb +26 -0
- data/specs/validator/prot_from_pep_spec.rb +141 -0
- data/specs/validator/transmem_spec.rb +145 -0
- data/specs/validator/true_pos_spec.rb +58 -0
- data/specs/validator_helper.rb +33 -0
- data/specs/xml_spec.rb +12 -0
- data/test_files/000_pepxml18_small.xml +206 -0
- data/test_files/020a.mzXML.timeIndex +4710 -0
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
- data/test_files/4-03-03_small-prot.xml +321 -0
- data/test_files/4-03-03_small.xml +3876 -0
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +5999 -0
- data/test_files/bioworks31.params +77 -0
- data/test_files/bioworks32.params +62 -0
- data/test_files/bioworks33.params +63 -0
- data/test_files/bioworks_single_run_small.xml +7237 -0
- data/test_files/bioworks_small.fasta +212 -0
- data/test_files/bioworks_small.params +63 -0
- data/test_files/bioworks_small.phobius +109 -0
- data/test_files/bioworks_small.toppred.out +2847 -0
- data/test_files/bioworks_small.xml +5610 -0
- data/test_files/bioworks_with_INV_small.xml +3753 -0
- data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +304 -0
- data/test_files/messups.fasta +297 -0
- data/test_files/opd1/000.my_answer.100lines.xml +101 -0
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
- data/test_files/opd1/000_020_3prots-prot.xml +62 -0
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
- data/test_files/opd1/sequest.3.1.params +77 -0
- data/test_files/opd1/sequest.3.2.params +62 -0
- data/test_files/opd1/twenty_scans.mzXML +418 -0
- data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +9 -0
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
- data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
- data/test_files/pepproph_small.xml +4691 -0
- data/test_files/phobius.small.noheader.txt +50 -0
- data/test_files/phobius.small.small.txt +53 -0
- data/test_files/s01_anC1_ld020mM.key.txt +25 -0
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +297 -0
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +14340 -0
- data/test_files/tf_bioworks2excel.txt.actual +1035 -0
- data/test_files/toppred.small.out +416 -0
- data/test_files/toppred.xml.out +318 -0
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
- data/test_files/yeast_gly_small-prot.xml +265 -0
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
- data/test_files/yeast_gly_small.xml +3807 -0
- data/test_files/yeast_gly_small2.parentTimes +6 -0
- metadata +273 -57
- data/bin/filter.rb +0 -6
- data/bin/precision.rb +0 -5
- data/lib/spec/mzdata/parser.rb +0 -108
- data/lib/spec/mzdata.rb +0 -48
- data/lib/spec/mzxml/parser.rb +0 -449
- data/lib/spec/scan.rb +0 -55
- data/lib/spec_id/filter.rb +0 -797
- data/lib/spec_id/precision.rb +0 -421
- data/lib/toppred.rb +0 -18
- data/script/filter-peps.rb +0 -164
- data/test/tc_aa_freqs.rb +0 -59
- data/test/tc_fasta_shaker.rb +0 -149
- data/test/tc_filter.rb +0 -203
- data/test/tc_filter_peps.rb +0 -46
- data/test/tc_gi.rb +0 -17
- data/test/tc_id_class_anal.rb +0 -70
- data/test/tc_id_precision.rb +0 -89
- data/test/tc_msrun.rb +0 -88
- data/test/tc_mzxml.rb +0 -88
- data/test/tc_mzxml_to_lmat.rb +0 -36
- data/test/tc_peptide_parent_times.rb +0 -27
- data/test/tc_precision.rb +0 -60
- data/test/tc_roc.rb +0 -166
- data/test/tc_sample_enzyme.rb +0 -32
- data/test/tc_scan.rb +0 -26
- data/test/tc_sequest.rb +0 -336
- data/test/tc_spec.rb +0 -78
- data/test/tc_spec_id.rb +0 -201
- data/test/tc_spec_id_xml.rb +0 -36
- data/test/tc_srf.rb +0 -262
@@ -0,0 +1,111 @@
|
|
1
|
+
require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
|
2
|
+
|
3
|
+
require 'spec_id/precision/prob'
|
4
|
+
require 'spec_id'
|
5
|
+
require 'spec_id/proph'
|
6
|
+
require 'validator'
|
7
|
+
require 'fasta'
|
8
|
+
require 'spec_id/sequest/params'
|
9
|
+
|
10
|
+
|
11
|
+
describe 'finding precision Proph::Prot::Pep objects' do
|
12
|
+
before(:each) do
|
13
|
+
@spec_id = GenericSpecID.new
|
14
|
+
# actual sort order: 3, 0, 4, 1, 2
|
15
|
+
peps = [
|
16
|
+
# 0: canonical
|
17
|
+
{:peptide_sequence => '0', :initial_probability => 0.63, :nsp_adjusted_probability => 0.62, :weight => 1.0, :is_nondegenerate_evidence => true, :n_enzymatic_termini => 2, :n_sibling_peptides => 0.0, :n_instances => 1, :is_contributing_evidence => true},
|
18
|
+
# 1: lower init prob
|
19
|
+
{:peptide_sequence => '1', :initial_probability => 0.60, :nsp_adjusted_probability => 0.62, :weight => 1.0, :is_nondegenerate_evidence => true, :n_enzymatic_termini => 2, :n_sibling_peptides => 0.0, :n_instances => 1, :is_contributing_evidence => true},
|
20
|
+
# 2: lower nsp prob
|
21
|
+
{:peptide_sequence => '2', :initial_probability => 0.63, :nsp_adjusted_probability => 0.52, :weight => 1.0, :is_nondegenerate_evidence => true, :n_enzymatic_termini => 2, :n_sibling_peptides => 0.0, :n_instances => 1, :is_contributing_evidence => true},
|
22
|
+
# extra instances! (best hit)
|
23
|
+
{:peptide_sequence => '3', :initial_probability => 0.63, :nsp_adjusted_probability => 0.62, :weight => 1.0, :is_nondegenerate_evidence => true, :n_enzymatic_termini => 2, :n_sibling_peptides => 0.0, :n_instances => 5, :is_contributing_evidence => true},
|
24
|
+
# is nondegen = false
|
25
|
+
{:peptide_sequence => '4', :initial_probability => 0.63, :nsp_adjusted_probability => 0.62, :weight => 1.0, :is_nondegenerate_evidence => false, :n_enzymatic_termini => 2, :n_sibling_peptides => 0.0, :n_instances => 1, :is_contributing_evidence => true},].map {|v| Proph::Prot::Pep.new(v) }
|
26
|
+
@spec_id.peps = peps
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'runs without any validator' do
|
30
|
+
answer = SpecID::Precision::Prob.new.precision_vs_num_hits(@spec_id)
|
31
|
+
answer.keys.map {|v| v.to_s }.sort.should == ["aaseqs", "charges", "count", "params", "pephits_precision", "probabilities"]
|
32
|
+
answer[:aaseqs].should == %w(3 0 4 1 2)
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'returns modified peptides if any modified peptides' do
|
36
|
+
@spec_id.peps[1].mod_info = Sequest::PepXML::SearchHit::ModificationInfo.new(['MODIFIED', []])
|
37
|
+
answer = SpecID::Precision::Prob.new.precision_vs_num_hits(@spec_id)
|
38
|
+
answer.keys.map {|v| v.to_s }.sort.should == ["aaseqs", "charges", "count", "modified_peptides", "params", "pephits_precision", "probabilities"]
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
=begin
|
45
|
+
it 'gets precision with all validators (including probability and decoy)' do
|
46
|
+
## create some decoy peptides!
|
47
|
+
@spec_id.peps.sort_by {|pep| pep.probability }[100..-1].each_with_index do |pep,i|
|
48
|
+
if i % 3 == 0
|
49
|
+
pep.prots.each {|prot| prot.protein_name = 'DECOY_' + prot.protein_name }
|
50
|
+
end
|
51
|
+
end
|
52
|
+
# check which ones are ACTUALLY normal and decoy
|
53
|
+
(decoy, normal) = @spec_id.peps.partition do |pep|
|
54
|
+
pep.prots.all? {|prot| prot.protein_name =~ /^DECOY_/}
|
55
|
+
end
|
56
|
+
num_decoy = decoy.size
|
57
|
+
num_normal = normal.size
|
58
|
+
end
|
59
|
+
|
60
|
+
prob_spec_helper = File.expand_path( File.dirname(__FILE__) + '/prob_spec_helper' )
|
61
|
+
|
62
|
+
# this does a minimal test to see if this functions properly
|
63
|
+
# (not for accuracy, which is done in validator_spec)
|
64
|
+
## WITH FASTA FILE:
|
65
|
+
base_dir = Tfiles_l + '/opd1_2runs_2mods/sequest'
|
66
|
+
fasta_file = base_dir + '/ecoli_K12_ncbi_20060321.fasta'
|
67
|
+
params_file = base_dir + '/ecoli.params'
|
68
|
+
bias_file = base_dir + '/ecoli_K12_ncbi_20060321.bias.fasta'
|
69
|
+
toppred_file = base_dir + '/ecoli_K12_ncbi_20060321.toppred.xml'
|
70
|
+
|
71
|
+
fasta_file.should exist
|
72
|
+
|
73
|
+
prob = Validator::Probability.new
|
74
|
+
badaa_freq = Validator::AA.new('C', :frequency => 0.0115866200193321)
|
75
|
+
badaa_dig = Validator::AA.new('C')
|
76
|
+
bias = Validator::Bias.new(Fasta.new(bias_file))
|
77
|
+
transmem = Validator::Transmem::Protein.new(toppred_file)
|
78
|
+
decoy = Validator::Decoy.new(/^DECOY_/)
|
79
|
+
|
80
|
+
turn_on_digestion = true
|
81
|
+
if turn_on_digestion
|
82
|
+
# digestion based validators need this set!
|
83
|
+
digested_peps = Digestor.digest(Fasta.new(fasta_file), Sequest::Params.new(params_file))
|
84
|
+
[badaa_dig, bias, transmem].each do |val|
|
85
|
+
val.set_false_to_total_ratio(digested_peps)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
transmem.transmem_status_hash = transmem.create_transmem_status_hash(@spec_id.peps)
|
89
|
+
val_list = [decoy, badaa_freq, badaa_dig, prob, bias, transmem]
|
90
|
+
|
91
|
+
opts = { :validators => val_list }
|
92
|
+
|
93
|
+
|
94
|
+
hash = SpecID::Precision::Prob.new.precision_vs_num_hits(@spec_id, opts)
|
95
|
+
#puts "OUTPUT: "
|
96
|
+
#puts hash.to_yaml
|
97
|
+
|
98
|
+
|
99
|
+
# frozen
|
100
|
+
e_hash = ProbMSHelper::Answer2
|
101
|
+
# hash[:pephits_precision].size.should == e_hash[:pephits_precision].size
|
102
|
+
# other data types are tested above, just testing validators
|
103
|
+
hash[:pephits_precision].zip( e_hash[:pephits_precision] ) do |val_hash, val_hash_e|
|
104
|
+
val_hash[:values].size.should == num_normal
|
105
|
+
#val_hash[:validator].should == val_hash_e[:validator]
|
106
|
+
val_hash[:values].zip(val_hash_e[:values]) {|v,e| v.should be_close(e, 0.000000001)}
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
=end
|
File without changes
|
@@ -0,0 +1,143 @@
|
|
1
|
+
require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
|
2
|
+
|
3
|
+
require 'spec_id/proph/pep_summary'
|
4
|
+
|
5
|
+
ToCheck = {
|
6
|
+
:spectrum_query => {:first => {:spectrum => "020.42.42.3", :start_scan=>42, :end_scan=>42, :precursor_neutral_mass=>1015.77285654469, :assumed_charge=>3, :index=>1 },
|
7
|
+
:last => {:spectrum=>"020.344.344.3", :start_scan=>344, :end_scan=>344, :precursor_neutral_mass=>1447.6040333025, :assumed_charge=>3, :index=>18 },
|
8
|
+
},
|
9
|
+
|
10
|
+
:search_hit => {:first => {:hit_rank=>1, :peptide=>"GTGVSVTR", :peptide_prev_aa=>"R", :peptide_next_aa=>"S", :protein=>"gi|49176370|ref|YP_026228.1|", :num_tot_proteins=>1, :num_matched_ions=>10, :tot_num_ions=>70, :calc_neutral_pep_mass=>1015.79382542, :massdiff=>-0.0209688753124055, :num_tol_term=>2, :num_missed_cleavages=>0, :is_rejected=>0, :xcorr=>1.06543827056885, :deltacn => 0.192325830459595, :deltacnstar=>0, :spscore=>77.8397979736328, :sprank=>3, :probability=>0.07881571, :fval=>0.1592, :ntt=>2, :nmc=> 0, :massd=>-0.021},
|
11
|
+
:last => { :hit_rank=>1, :peptide=>"VAALRVPGGASLTR", :peptide_prev_aa=>"R", :peptide_next_aa=>"K", :protein=>"gi|16129819|ref|NP_416380.1|", :num_tot_proteins=>1, :num_matched_ions=>16, :tot_num_ions=>78, :calc_neutral_pep_mass=>1447.58289842, :massdiff=> 0.0211348825000641, :num_tol_term=>2, :num_missed_cleavages=>1, :is_rejected=>0, :xcorr=>1.3090912103653, :deltacn => 0.259967535734177, :deltacnstar => 0, :spscore => 118.513412475586, :sprank => 4, :probability=>0.27738378, :fval=>1.3810, :ntt=>2, :nmc=>1, :massd=>0.021 },
|
12
|
+
}
|
13
|
+
}
|
14
|
+
|
15
|
+
|
16
|
+
describe Proph::PepSummary, "reading a .xml file" do
|
17
|
+
before(:each) do
|
18
|
+
file = Tfiles + '/opd1_2runs_2mods/interact-opd1_mods__small.xml'
|
19
|
+
@obj = Proph::PepSummary.new(file)
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should raise an error if not a peptide prophet file' do
|
23
|
+
lambda { Proph::PepSummary.new(Tfiles + '/opd1/000.tpp_2.9.2.first10.xml')}.should raise_error(ArgumentError)
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'has spectrum queries' do
|
27
|
+
@obj.spectrum_queries.size.should == 18
|
28
|
+
|
29
|
+
[:first, :last].each do |mth|
|
30
|
+
ToCheck[:spectrum_query][mth].each do |k,v|
|
31
|
+
@obj.spectrum_queries.send(mth).send(k).should == v
|
32
|
+
end
|
33
|
+
ToCheck[:search_hit][mth].each do |k,v|
|
34
|
+
@obj.spectrum_queries.send(mth).search_results.first.search_hits.first.send(k).should == v
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'has pephits (which are descended from SearchHit)' do
|
40
|
+
@obj.peps.size.should == 18
|
41
|
+
[:hit_rank, :probability, :fval, :ntt, :nmc, :massd].each do |guy|
|
42
|
+
@obj.peps.first.should respond_to(guy)
|
43
|
+
end
|
44
|
+
|
45
|
+
[:first, :last].each do |mth|
|
46
|
+
ToCheck[:search_hit][mth].each do |k,v|
|
47
|
+
@obj.peps.send(mth).send(k).should == v
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
####################################################
|
56
|
+
# OTHER TESTS NOT IMPLEMENTED (do we need these??)
|
57
|
+
####################################################
|
58
|
+
|
59
|
+
=begin
|
60
|
+
|
61
|
+
require 'test/unit'
|
62
|
+
require 'spec_id'
|
63
|
+
require 'ms/scan'
|
64
|
+
|
65
|
+
class ProphTest < Test::Unit::TestCase
|
66
|
+
|
67
|
+
def initialize(arg)
|
68
|
+
super(arg)
|
69
|
+
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
70
|
+
@pepproph_xml = @tfiles + 'pepproph_small.xml'
|
71
|
+
end
|
72
|
+
|
73
|
+
def Xtest_filter_by_min_pep_prob
|
74
|
+
obj = Proph::Pep::Parser.new
|
75
|
+
new_file = "tfiles/tmp.xml"
|
76
|
+
assert_match(/peptideprophet_result probability="0.[0-5]/, IO.read(@pepproph_xml))
|
77
|
+
obj.filter_by_min_pep_prob(@pepproph_xml, new_file, 0.50)
|
78
|
+
assert_no_match(/peptideprophet_result probability="0.[0-5]/, IO.read(new_file))
|
79
|
+
assert_match(/<peptideprophet_result[^>]*probability="0.[6-9][^>]*>/, IO.read(new_file))
|
80
|
+
File.unlink new_file
|
81
|
+
end
|
82
|
+
|
83
|
+
def Xtest_uniq_by_seqcharge
|
84
|
+
cls = Proph::Pep
|
85
|
+
p1 = cls.new({ :charge => '2', :sequence => 'PEPTIDE' })
|
86
|
+
p2 = cls.new({ :charge => '3', :sequence => 'PEPTIDE' })
|
87
|
+
p3 = cls.new({ :charge => '2', :sequence => 'PEPTIDE' })
|
88
|
+
p4 = cls.new({ :charge => '2', :sequence => 'APEPTIDE' })
|
89
|
+
p5 = cls.new({ :charge => '2', :sequence => 'APEPTIDE' })
|
90
|
+
un_peps = cls.uniq_by_seqcharge([p1,p2,p3,p4,p5])
|
91
|
+
## WHY ISn't that working? below!
|
92
|
+
##assert_equal([p1,p2,p4].to_set, un_peps.to_set)
|
93
|
+
assert(equal_sets([p1,p2,p4], un_peps))
|
94
|
+
end
|
95
|
+
|
96
|
+
def Xequal_sets(arr1, arr2)
|
97
|
+
c1 = arr1.dup
|
98
|
+
c2 = arr2.dup
|
99
|
+
arr1.each do |c|
|
100
|
+
arr2.each do |d|
|
101
|
+
if c == d
|
102
|
+
c1.delete c
|
103
|
+
c2.delete d
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
if (c1.size == c2.size) && (c1.size == 0)
|
108
|
+
true
|
109
|
+
else
|
110
|
+
false
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def Xtest_arithmetic_avg_scan_by_parent_time
|
115
|
+
i1 = 100015.0
|
116
|
+
i2 = 30000.0
|
117
|
+
i3 = 100.0
|
118
|
+
t1 = 0.13
|
119
|
+
t2 = 0.23
|
120
|
+
t3 = 0.33
|
121
|
+
p1 = MS::Scan.new(1,1, t1)
|
122
|
+
p2 = MS::Scan.new(2,1, t2)
|
123
|
+
p3 = MS::Scan.new(3,1, t3)
|
124
|
+
s1 = MS::Scan.new(1,2,0.10, 300.2, i1, p1)
|
125
|
+
s2 = MS::Scan.new(2,2,0.20, 301.1, i2, p2)
|
126
|
+
s3 = MS::Scan.new(3,2,0.30, 302.0, i3, p3)
|
127
|
+
scan = Proph::Pep.new({:scans => [s1,s2,s3]}).arithmetic_avg_scan_by_parent_time
|
128
|
+
tot_inten = i1 + i2 + i3
|
129
|
+
tm = ( t1 * (i1/tot_inten) + t2 * (i2/tot_inten) + t3 * (i3/tot_inten) )
|
130
|
+
{:ms_level => 2, :prec_inten => 130115.0/3, :num => nil, :prec_mz => 301.1.to_f, :time => tm }.each do |k,v|
|
131
|
+
if k == :prec_mz # not sure why this is bugging out, but..
|
132
|
+
assert_equal(v.to_s, scan.send(k).to_s)
|
133
|
+
else
|
134
|
+
assert_equal(v, scan.send(k))
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
end
|
139
|
+
|
140
|
+
|
141
|
+
end
|
142
|
+
|
143
|
+
=end
|
@@ -1,10 +1,51 @@
|
|
1
|
+
require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
|
1
2
|
|
3
|
+
require 'spec_id/proph/prot_summary'
|
2
4
|
|
3
|
-
|
4
|
-
|
5
|
-
|
5
|
+
describe Proph::ProtSummary, "reading a -prot.xml file" do
|
6
|
+
before(:each) do
|
7
|
+
file = Tfiles + '/opd1/000_020_3prots-prot.xml'
|
8
|
+
@obj = Proph::ProtSummary.new(file)
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'extracts protein groups with probabilities' do
|
12
|
+
@obj.prot_groups.size.should == 3
|
13
|
+
@obj.prot_groups.first.probability.should == 1.0
|
14
|
+
@obj.prot_groups[2].probability == 0.98
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'extracts protein hit attributes' do
|
18
|
+
prot = @obj.prot_groups[1].prots.first
|
19
|
+
%w(protein_name n_indistinguishable_proteins probability percent_coverage unique_stripped_peptides group_sibling_id total_number_peptides pct_spectrum_ids).zip(["gi|16132019|ref|NP_418618.1|", 1, 1.0, 13.0, "FRDGLK+AIQFAQDVGIRVIQLAGYDVYYQEANNETRR".split('+'), "a", 2, 0.41]) do |name, val|
|
20
|
+
prot.send(name).should == val
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'can detect -prot.xml version' do
|
25
|
+
answer = ['1.9', '4']
|
26
|
+
files = ['/yeast_gly_small-prot.xml', '/interact-opd1_mods_small-prot.xml'].map {|v| Tfiles + v}
|
27
|
+
files.zip(answer) do |file,answ|
|
28
|
+
Proph::ProtSummary.new.get_version(file).should == answ
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'has prots, peps, and prot_groups ' do
|
33
|
+
@obj.peps.should_not be_nil
|
34
|
+
@obj.prots.should_not be_nil
|
35
|
+
@obj.prot_groups.should_not be_nil
|
36
|
+
end
|
6
37
|
|
38
|
+
end
|
39
|
+
|
40
|
+
####################################################
|
41
|
+
# OTHER TESTS NOT IMPLEMENTED (do we need these??)
|
42
|
+
####################################################
|
7
43
|
|
44
|
+
=begin
|
45
|
+
|
46
|
+
require 'test/unit'
|
47
|
+
require 'spec_id'
|
48
|
+
require 'ms/scan'
|
8
49
|
|
9
50
|
class ProphTest < Test::Unit::TestCase
|
10
51
|
|
@@ -14,29 +55,6 @@ class ProphTest < Test::Unit::TestCase
|
|
14
55
|
@pepproph_xml = @tfiles + 'pepproph_small.xml'
|
15
56
|
end
|
16
57
|
|
17
|
-
def test_parse_protxml_file
|
18
|
-
file = @tfiles + 'opd1/000_020_3prots-prot.xml'
|
19
|
-
#obj = Proph::ProtSummary.new
|
20
|
-
obj = Proph::ProtSummary.new(file)
|
21
|
-
assert_equal(3, obj.prot_groups.size)
|
22
|
-
assert_equal("1.00", obj.prot_groups.first.probability)
|
23
|
-
assert_equal("0.98", obj.prot_groups[2].probability)
|
24
|
-
assert_equal_xml_atts_to_obj('protein_name="gi|16132019|ref|NP_418618.1|" n_indistinguishable_proteins="1" probability="1.00" percent_coverage="13.0" unique_stripped_peptides="FRDGLK+AIQFAQDVGIRVIQLAGYDVYYQEANNETRR" group_sibling_id="a" total_number_peptides="2" pct_spectrum_ids="0.41"', obj.prot_groups[1].prots.first)
|
25
|
-
end
|
26
|
-
|
27
|
-
def assert_equal_xml_atts_to_obj(string, obj, msg=nil)
|
28
|
-
parts = string.split(/\s+/)
|
29
|
-
parts.each do |part|
|
30
|
-
pi = part.split('=')
|
31
|
-
value = pi[1].sub(/^"/,'').sub(/"$/,'')
|
32
|
-
if pi[0] == "probability"
|
33
|
-
value = value.to_f
|
34
|
-
end
|
35
|
-
assert_equal(value, obj.send(pi[0].to_sym))
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
|
40
58
|
def Xtest_filter_by_min_pep_prob
|
41
59
|
obj = Proph::Pep::Parser.new
|
42
60
|
new_file = "tfiles/tmp.xml"
|
@@ -85,12 +103,12 @@ class ProphTest < Test::Unit::TestCase
|
|
85
103
|
t1 = 0.13
|
86
104
|
t2 = 0.23
|
87
105
|
t3 = 0.33
|
88
|
-
p1 =
|
89
|
-
p2 =
|
90
|
-
p3 =
|
91
|
-
s1 =
|
92
|
-
s2 =
|
93
|
-
s3 =
|
106
|
+
p1 = MS::Scan.new(1,1, t1)
|
107
|
+
p2 = MS::Scan.new(2,1, t2)
|
108
|
+
p3 = MS::Scan.new(3,1, t3)
|
109
|
+
s1 = MS::Scan.new(1,2,0.10, 300.2, i1, p1)
|
110
|
+
s2 = MS::Scan.new(2,2,0.20, 301.1, i2, p2)
|
111
|
+
s3 = MS::Scan.new(3,2,0.30, 302.0, i3, p3)
|
94
112
|
scan = Proph::Pep.new({:scans => [s1,s2,s3]}).arithmetic_avg_scan_by_parent_time
|
95
113
|
tot_inten = i1 + i2 + i3
|
96
114
|
tm = ( t1 * (i1/tot_inten) + t2 * (i2/tot_inten) + t3 * (i3/tot_inten) )
|
@@ -106,3 +124,5 @@ class ProphTest < Test::Unit::TestCase
|
|
106
124
|
|
107
125
|
|
108
126
|
end
|
127
|
+
|
128
|
+
=end
|
@@ -1,3 +1,87 @@
|
|
1
|
+
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
2
|
+
require 'spec_id/protein_summary'
|
3
|
+
|
4
|
+
xdescribe ProteinSummary do
|
5
|
+
|
6
|
+
before(:all) do
|
7
|
+
@tf_proph = Tfiles_l + "/opd1/000_020-prot.xml"
|
8
|
+
@tf_summary = Tfiles_l + "/opd1/000_020-prot.summary.html"
|
9
|
+
@tf_bioworks_small = Tfiles + '/bioworks_small.xml'
|
10
|
+
@tf_bioworks_small_summary_html = Tfiles + '/bioworks_small.summary.html'
|
11
|
+
@tf_proph_cat_inv = Tfiles + '/opd1/opd1_cat_inv_small-prot.xml'
|
12
|
+
@tf_proph_cat_inv_summary_html = Tfiles + '/opd1/opd1_cat_inv_small-prot.summary.html'
|
13
|
+
@tf_proph_cat_inv_summary_png = Tfiles + '/opd1/opd1_cat_inv_small-prot.summary.png'
|
14
|
+
@tf_peptide_count = Tfiles + "/peptide_counts.tmp.txt"
|
15
|
+
@no_delete = false
|
16
|
+
end
|
17
|
+
|
18
|
+
spec_large do
|
19
|
+
it 'does basic summary on prophet file' do
|
20
|
+
runit "-c 5.0 #{@tf_proph}"
|
21
|
+
@tf_summary.should exist
|
22
|
+
string = IO.read(@tf_summary)
|
23
|
+
string.should =~ /gi\|16132176\|ref\|NP_418775\.1\|/
|
24
|
+
string.should =~ /16132176/
|
25
|
+
File.unlink(@tf_summary) unless @no_delete
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'does basic summary on bioworks.xml file' do
|
30
|
+
runit "#{@tf_bioworks_small}"
|
31
|
+
@tf_bioworks_small_summary_html.should exist
|
32
|
+
File.unlink @tf_bioworks_small_summary_html unless @no_delete
|
33
|
+
# @TODO: need to freeze the output here
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
it 'calculates precision values with bioworks files' do
|
38
|
+
## Could reimplement a separate file approach?
|
39
|
+
#reply = `#{@cmd} -f #{@tf_bioworks_small} #{@tf_bioworks_small} --precision`
|
40
|
+
runit "#{@tf_bioworks_small} --precision"
|
41
|
+
IO.read(@tf_bioworks_small_summary_html).should =~ /# hits.*106/m
|
42
|
+
# should add more tests here...
|
43
|
+
@tf_bioworks_small_summary_html.should exist
|
44
|
+
File.unlink @tf_bioworks_small_summary_html unless @no_delete
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'calculates precision values with prophet files' do
|
48
|
+
runit "#{@tf_proph_cat_inv} -f INV_ --prefix --precision"
|
49
|
+
html = IO.read(@tf_proph_cat_inv_summary_html)
|
50
|
+
html.should =~ /# hits/
|
51
|
+
html.should =~ /2.*0\.0000/m
|
52
|
+
html.should =~ /3.*0\.3333/m
|
53
|
+
html.should =~ /7.*0\.5714/m
|
54
|
+
|
55
|
+
File.unlink @tf_proph_cat_inv_summary_html unless @no_delete
|
56
|
+
File.unlink @tf_proph_cat_inv_summary_png unless @no_delete
|
57
|
+
end
|
58
|
+
|
59
|
+
spec_large do
|
60
|
+
it 'gives correct peptide counts' do
|
61
|
+
runit "-c 5.0 #{@tf_proph} --peptide_count #{@tf_peptide_count}"
|
62
|
+
@tf_peptide_count.should exist
|
63
|
+
file = IO.read(@tf_peptide_count)
|
64
|
+
file.should include("gi|16132176|ref|NP_418775.1|\t2")
|
65
|
+
file.should include("gi|16131996|ref|NP_418595.1|\t1")
|
66
|
+
file.should include("gi|16131692|ref|NP_418288.1|\t4")
|
67
|
+
File.unlink @tf_peptide_count unless @no_delete
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def runit(string_or_args)
|
72
|
+
args = if string_or_args.is_a? String
|
73
|
+
string_or_args.split(/\s+/)
|
74
|
+
else
|
75
|
+
string_or_args
|
76
|
+
end
|
77
|
+
ProteinSummary.new.create_from_command_line_args(args)
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
=begin
|
1
85
|
|
2
86
|
require 'test/unit'
|
3
87
|
require 'spec_id/protein_summary'
|
@@ -102,3 +186,4 @@ class ProphProtSummaryTest < Test::Unit::TestCase
|
|
102
186
|
|
103
187
|
end
|
104
188
|
|
189
|
+
=end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
|
2
|
+
require 'spec_id/sequest/params'
|
3
|
+
|
4
|
+
|
5
|
+
describe "a sequest params object", :shared => true do
|
6
|
+
before(:each) do
|
7
|
+
@obj = Sequest::Params.new(@file)
|
8
|
+
end
|
9
|
+
it 'gives enzyme_specificity' do
|
10
|
+
ar = @obj.enzyme_specificity
|
11
|
+
ar.size.should == 3
|
12
|
+
ar.should == @enzyme_specificity
|
13
|
+
end
|
14
|
+
it 'returns static mods callable by key' do
|
15
|
+
@obj.add_Cterm_peptide.should == @add_Cterm_peptide
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
describe Sequest::Params, "with a bioworks 3.1 params" do
|
21
|
+
before(:all) do
|
22
|
+
@file = Tfiles + '/bioworks31.params'
|
23
|
+
@obj = Sequest::Params.new(@file)
|
24
|
+
@enzyme_specificity = [1, 'KR', '']
|
25
|
+
@add_Cterm_peptide = '0.0000'
|
26
|
+
end
|
27
|
+
it_should_behave_like 'a sequest params object'
|
28
|
+
end
|
29
|
+
|
30
|
+
describe Sequest::Params, "with a bioworks 3.2 params" do
|
31
|
+
before(:all) do
|
32
|
+
@file = Tfiles + '/bioworks32.params'
|
33
|
+
@obj = Sequest::Params.new(@file)
|
34
|
+
@enzyme_specificity = [1, 'KR', 'P']
|
35
|
+
@add_Cterm_peptide = '0.0000'
|
36
|
+
end
|
37
|
+
it_should_behave_like 'a sequest params object'
|
38
|
+
end
|
39
|
+
|
40
|
+
describe Sequest::Params, "with a bioworks 3.3 params" do
|
41
|
+
before(:all) do
|
42
|
+
@file = Tfiles + '/bioworks33.params'
|
43
|
+
@obj = Sequest::Params.new(@file)
|
44
|
+
@enzyme_specificity = [1, 'KR', '']
|
45
|
+
@add_Cterm_peptide = '0.0000'
|
46
|
+
end
|
47
|
+
it_should_behave_like 'a sequest params object'
|
48
|
+
end
|
49
|
+
|
50
|
+
describe Sequest::Params, "given a bioworks 3.2 params (from .srf file)" do
|
51
|
+
before(:all) do
|
52
|
+
@file = Tfiles + '/7MIX_STD_110802_1.sequest_params_fragment.srf'
|
53
|
+
@obj = Sequest::Params.new(@file)
|
54
|
+
@enzyme_specificity = [1, 'KR', 'P']
|
55
|
+
@add_Cterm_peptide = '0.0000'
|
56
|
+
end
|
57
|
+
it_should_behave_like 'a sequest params object'
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
describe Sequest::Params do
|
62
|
+
it '(private) can give a system independent basename' do
|
63
|
+
Sequest::Params.new._sys_ind_basename("C:\\Xcalibur\\database\\hello.fasta").should == "hello.fasta"
|
64
|
+
Sequest::Params.new._sys_ind_basename("/work/john/hello.fasta").should == "hello.fasta"
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
|