mspire 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/INSTALL +1 -0
- data/README +25 -0
- data/Rakefile +129 -40
- data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
- data/bin/bioworks_to_pepxml.rb +1 -0
- data/bin/fasta_shaker.rb +1 -96
- data/bin/filter_and_validate.rb +5 -0
- data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
- data/bin/prob_validate.rb +6 -0
- data/bin/raw_to_mzXML.rb +2 -2
- data/bin/srf_group.rb +1 -0
- data/bin/srf_to_sqt.rb +40 -0
- data/changelog.txt +68 -0
- data/lib/align/chams.rb +6 -6
- data/lib/align.rb +4 -3
- data/lib/bsearch.rb +120 -0
- data/lib/fasta.rb +318 -86
- data/lib/group_by.rb +10 -0
- data/lib/index_by.rb +11 -0
- data/lib/merge_deep.rb +21 -0
- data/lib/{spec → ms/converter}/mzxml.rb +77 -109
- data/lib/ms/gradient_program.rb +171 -0
- data/lib/ms/msrun.rb +209 -0
- data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
- data/lib/ms/parser/mzdata/axml.rb +12 -0
- data/lib/ms/parser/mzdata/dom.rb +160 -0
- data/lib/ms/parser/mzdata/libxml.rb +7 -0
- data/lib/ms/parser/mzdata.rb +25 -0
- data/lib/ms/parser/mzxml/axml.rb +11 -0
- data/lib/ms/parser/mzxml/dom.rb +159 -0
- data/lib/ms/parser/mzxml/hpricot.rb +253 -0
- data/lib/ms/parser/mzxml/libxml.rb +15 -0
- data/lib/ms/parser/mzxml/regexp.rb +122 -0
- data/lib/ms/parser/mzxml/rexml.rb +72 -0
- data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
- data/lib/ms/parser/mzxml.rb +175 -0
- data/lib/ms/parser.rb +108 -0
- data/lib/ms/precursor.rb +10 -0
- data/lib/ms/scan.rb +81 -0
- data/lib/ms/spectrum.rb +193 -0
- data/lib/ms.rb +10 -0
- data/lib/mspire.rb +4 -0
- data/lib/roc.rb +61 -1
- data/lib/sample_enzyme.rb +31 -8
- data/lib/scan_i.rb +21 -0
- data/lib/spec_id/aa_freqs.rb +7 -3
- data/lib/spec_id/bioworks.rb +20 -14
- data/lib/spec_id/digestor.rb +139 -0
- data/lib/spec_id/mass.rb +116 -0
- data/lib/spec_id/parser/proph.rb +236 -0
- data/lib/spec_id/precision/filter/cmdline.rb +209 -0
- data/lib/spec_id/precision/filter/interactive.rb +134 -0
- data/lib/spec_id/precision/filter/output.rb +147 -0
- data/lib/spec_id/precision/filter.rb +623 -0
- data/lib/spec_id/precision/output.rb +60 -0
- data/lib/spec_id/precision/prob/cmdline.rb +139 -0
- data/lib/spec_id/precision/prob/output.rb +88 -0
- data/lib/spec_id/precision/prob.rb +171 -0
- data/lib/spec_id/proph/pep_summary.rb +92 -0
- data/lib/spec_id/proph/prot_summary.rb +484 -0
- data/lib/spec_id/proph.rb +2 -466
- data/lib/spec_id/protein_summary.rb +2 -2
- data/lib/spec_id/sequest/params.rb +316 -0
- data/lib/spec_id/sequest/pepxml.rb +1513 -0
- data/lib/spec_id/sequest.rb +2 -1672
- data/lib/spec_id/srf.rb +445 -177
- data/lib/spec_id.rb +183 -95
- data/lib/spec_id_xml.rb +8 -10
- data/lib/transmem/phobius.rb +147 -0
- data/lib/transmem/toppred.rb +368 -0
- data/lib/transmem.rb +157 -0
- data/lib/validator/aa.rb +135 -0
- data/lib/validator/background.rb +73 -0
- data/lib/validator/bias.rb +95 -0
- data/lib/validator/cmdline.rb +260 -0
- data/lib/validator/decoy.rb +94 -0
- data/lib/validator/digestion_based.rb +69 -0
- data/lib/validator/probability.rb +48 -0
- data/lib/validator/prot_from_pep.rb +234 -0
- data/lib/validator/transmem.rb +272 -0
- data/lib/validator/true_pos.rb +46 -0
- data/lib/validator.rb +214 -0
- data/lib/xml.rb +38 -0
- data/lib/xml_style_parser.rb +105 -0
- data/lib/xmlparser_wrapper.rb +19 -0
- data/script/compile_and_plot_smriti_final.rb +97 -0
- data/script/extract_gradient_programs.rb +56 -0
- data/script/get_apex_values_rexml.rb +44 -0
- data/script/mzXML2timeIndex.rb +1 -1
- data/script/smriti_final_analysis.rb +103 -0
- data/script/toppred_to_yaml.rb +47 -0
- data/script/tpp_installer.rb +1 -1
- data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
- data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
- data/specs/bin/fasta_shaker_spec.rb +259 -0
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
- data/specs/bin/filter_and_validate_spec.rb +124 -0
- data/specs/bin/ms_to_lmat_spec.rb +34 -0
- data/specs/bin/prob_validate_spec.rb +62 -0
- data/specs/bin/protein_summary_spec.rb +10 -0
- data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
- data/specs/gi_spec.rb +22 -0
- data/specs/load_bin_path.rb +7 -0
- data/specs/merge_deep_spec.rb +13 -0
- data/specs/ms/gradient_program_spec.rb +77 -0
- data/specs/ms/msrun_spec.rb +455 -0
- data/specs/ms/parser_spec.rb +92 -0
- data/specs/ms/spectrum_spec.rb +89 -0
- data/specs/roc_spec.rb +251 -0
- data/specs/rspec_autotest.rb +149 -0
- data/specs/sample_enzyme_spec.rb +41 -0
- data/specs/spec_helper.rb +133 -0
- data/specs/spec_id/aa_freqs_spec.rb +52 -0
- data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
- data/specs/spec_id/digestor_spec.rb +75 -0
- data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
- data/specs/spec_id/precision/filter/output_spec.rb +31 -0
- data/specs/spec_id/precision/filter_spec.rb +243 -0
- data/specs/spec_id/precision/prob_spec.rb +111 -0
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
- data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
- data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
- data/specs/spec_id/sequest/params_spec.rb +68 -0
- data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
- data/specs/spec_id/sqt_spec.rb +138 -0
- data/specs/spec_id/srf_spec.rb +209 -0
- data/specs/spec_id/srf_spec_helper.rb +302 -0
- data/specs/spec_id_helper.rb +33 -0
- data/specs/spec_id_spec.rb +361 -0
- data/specs/spec_id_xml_spec.rb +33 -0
- data/specs/transmem/phobius_spec.rb +423 -0
- data/specs/transmem/toppred_spec.rb +297 -0
- data/specs/transmem_spec.rb +60 -0
- data/specs/transmem_spec_shared.rb +64 -0
- data/specs/validator/aa_spec.rb +107 -0
- data/specs/validator/background_spec.rb +51 -0
- data/specs/validator/bias_spec.rb +146 -0
- data/specs/validator/decoy_spec.rb +51 -0
- data/specs/validator/fasta_helper.rb +26 -0
- data/specs/validator/prot_from_pep_spec.rb +141 -0
- data/specs/validator/transmem_spec.rb +145 -0
- data/specs/validator/true_pos_spec.rb +58 -0
- data/specs/validator_helper.rb +33 -0
- data/specs/xml_spec.rb +12 -0
- data/test_files/000_pepxml18_small.xml +206 -0
- data/test_files/020a.mzXML.timeIndex +4710 -0
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
- data/test_files/4-03-03_small-prot.xml +321 -0
- data/test_files/4-03-03_small.xml +3876 -0
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +5999 -0
- data/test_files/bioworks31.params +77 -0
- data/test_files/bioworks32.params +62 -0
- data/test_files/bioworks33.params +63 -0
- data/test_files/bioworks_single_run_small.xml +7237 -0
- data/test_files/bioworks_small.fasta +212 -0
- data/test_files/bioworks_small.params +63 -0
- data/test_files/bioworks_small.phobius +109 -0
- data/test_files/bioworks_small.toppred.out +2847 -0
- data/test_files/bioworks_small.xml +5610 -0
- data/test_files/bioworks_with_INV_small.xml +3753 -0
- data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +304 -0
- data/test_files/messups.fasta +297 -0
- data/test_files/opd1/000.my_answer.100lines.xml +101 -0
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
- data/test_files/opd1/000_020_3prots-prot.xml +62 -0
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
- data/test_files/opd1/sequest.3.1.params +77 -0
- data/test_files/opd1/sequest.3.2.params +62 -0
- data/test_files/opd1/twenty_scans.mzXML +418 -0
- data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +9 -0
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
- data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
- data/test_files/pepproph_small.xml +4691 -0
- data/test_files/phobius.small.noheader.txt +50 -0
- data/test_files/phobius.small.small.txt +53 -0
- data/test_files/s01_anC1_ld020mM.key.txt +25 -0
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +297 -0
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +14340 -0
- data/test_files/tf_bioworks2excel.txt.actual +1035 -0
- data/test_files/toppred.small.out +416 -0
- data/test_files/toppred.xml.out +318 -0
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
- data/test_files/yeast_gly_small-prot.xml +265 -0
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
- data/test_files/yeast_gly_small.xml +3807 -0
- data/test_files/yeast_gly_small2.parentTimes +6 -0
- metadata +273 -57
- data/bin/filter.rb +0 -6
- data/bin/precision.rb +0 -5
- data/lib/spec/mzdata/parser.rb +0 -108
- data/lib/spec/mzdata.rb +0 -48
- data/lib/spec/mzxml/parser.rb +0 -449
- data/lib/spec/scan.rb +0 -55
- data/lib/spec_id/filter.rb +0 -797
- data/lib/spec_id/precision.rb +0 -421
- data/lib/toppred.rb +0 -18
- data/script/filter-peps.rb +0 -164
- data/test/tc_aa_freqs.rb +0 -59
- data/test/tc_fasta_shaker.rb +0 -149
- data/test/tc_filter.rb +0 -203
- data/test/tc_filter_peps.rb +0 -46
- data/test/tc_gi.rb +0 -17
- data/test/tc_id_class_anal.rb +0 -70
- data/test/tc_id_precision.rb +0 -89
- data/test/tc_msrun.rb +0 -88
- data/test/tc_mzxml.rb +0 -88
- data/test/tc_mzxml_to_lmat.rb +0 -36
- data/test/tc_peptide_parent_times.rb +0 -27
- data/test/tc_precision.rb +0 -60
- data/test/tc_roc.rb +0 -166
- data/test/tc_sample_enzyme.rb +0 -32
- data/test/tc_scan.rb +0 -26
- data/test/tc_sequest.rb +0 -336
- data/test/tc_spec.rb +0 -78
- data/test/tc_spec_id.rb +0 -201
- data/test/tc_spec_id_xml.rb +0 -36
- data/test/tc_srf.rb +0 -262
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
|
|
2
|
+
|
|
3
|
+
require 'spec_id'
|
|
4
|
+
require 'spec_id/srf'
|
|
5
|
+
|
|
6
|
+
# we use this to set the values of generic proteins below
|
|
7
|
+
require 'set_from_hash'
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
describe 'creating a list of proteins from peptides', :shared => true do
|
|
11
|
+
before(:each) do
|
|
12
|
+
# EXPECTS @prots and a @meth proc that takes two args, an array of
|
|
13
|
+
# peptides and the details of the list creation
|
|
14
|
+
|
|
15
|
+
hashes = [
|
|
16
|
+
{:aaseq => 'PEP0', :xcorr => 1.2, :deltacn => 0.1, :ppm => 40, :charge => 2, :prots => [prots[0],prots[1]]},
|
|
17
|
+
{:aaseq => 'PEP1', :xcorr => 1.3, :deltacn => 0.1, :ppm => 50, :charge => 3, :prots => [prots[1],prots[2]]},
|
|
18
|
+
{:aaseq => 'PEP2', :xcorr => 1.4, :deltacn => 0.1, :ppm => 50, :charge => 1, :prots => [prots[3]]},
|
|
19
|
+
{:aaseq => 'PEP3', :xcorr => 1.5, :deltacn => 1.1, :ppm => 20, :charge => 2, :prots => [prots[4]]},
|
|
20
|
+
{:aaseq => 'PEP4', :xcorr => 1.3, :deltacn => 0.1, :ppm => 20, :charge => 2, :prots => [prots[0]]},
|
|
21
|
+
{:aaseq => 'PEP5', :xcorr => 1.3, :deltacn => 0.1, :ppm => 40, :charge => 2, :prots => prots[1,2]},
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
@peps = hashes.map do |hash|
|
|
25
|
+
SRF::OUT::Pep.new.set_from_hash(hash)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
it 'compiles protein lists from peps not touching peps attr (:no_update)' do
|
|
30
|
+
|
|
31
|
+
prts = @meth.call(@peps, :no_update)
|
|
32
|
+
exp = (0..4).map do |n|
|
|
33
|
+
"prot_" + n.to_s
|
|
34
|
+
end
|
|
35
|
+
refs = prts.map {|v| v.reference }.sort
|
|
36
|
+
refs.should == exp
|
|
37
|
+
prts.each do |prt|
|
|
38
|
+
prt.peps.should == []
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
it 'compiles protein lists with updated peps attribute (:update)' do
|
|
43
|
+
|
|
44
|
+
prts = @meth.call(@peps, :update)
|
|
45
|
+
prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first
|
|
46
|
+
protein_match(prts, 'prot_0', %w(PEP0 PEP4))
|
|
47
|
+
protein_match(prts, 'prot_1', %w(PEP0 PEP1 PEP5))
|
|
48
|
+
protein_match(prts, 'prot_2', %w(PEP1 PEP5))
|
|
49
|
+
protein_match(prts, 'prot_3', %w(PEP2))
|
|
50
|
+
protein_match(prts, 'prot_4', %w(PEP3))
|
|
51
|
+
srt_ref = prts.map {|v| v.reference}.sort
|
|
52
|
+
%w(prot_0 prot_1 prot_2 prot_3 prot_4).should == srt_ref # just the right number of prots
|
|
53
|
+
prot_0 = prts.select {|v| v.reference == 'prot_0'}.first
|
|
54
|
+
prot_0_before.__id__.should == prot_0.__id__ # proteins are identical
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first.__id__
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
it 'compiles protein lists of new proteins (:new)' do
|
|
61
|
+
prts = SpecID.protein_list(@peps, :new)
|
|
62
|
+
prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first
|
|
63
|
+
protein_match(prts, 'prot_0', %w(PEP0 PEP4))
|
|
64
|
+
protein_match(prts, 'prot_1', %w(PEP0 PEP1 PEP5))
|
|
65
|
+
protein_match(prts, 'prot_2', %w(PEP1 PEP5))
|
|
66
|
+
protein_match(prts, 'prot_3', %w(PEP2))
|
|
67
|
+
protein_match(prts, 'prot_4', %w(PEP3))
|
|
68
|
+
srt_ref = prts.map {|v| v.reference}.sort
|
|
69
|
+
#assert_equal(%w(prot_0 prot_1 prot_2 prot_3 prot_4), srt_ref, "just the right number of prots")
|
|
70
|
+
%w(prot_0 prot_1 prot_2 prot_3 prot_4).should == srt_ref # just the right number of prots
|
|
71
|
+
prot_0 = prts.select {|v| v.reference == 'prot_0'}.first
|
|
72
|
+
#assert_not_equal(prot_0_before, prot_0.__id__, "proteins are not identical")
|
|
73
|
+
prot_0_before.should_not == prot_0.__id__ # proteins are not identical
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# checks that among prts, the protein with ref has peptides with pepseqs
|
|
77
|
+
# aaseqs
|
|
78
|
+
def protein_match(prts, ref, pepseqs)
|
|
79
|
+
prt = prts.select{|v| v.reference == ref }.first
|
|
80
|
+
sorted_prt_peps_aaseqs = prt.peps.map {|v| v.aaseq }.sort
|
|
81
|
+
sorted_pepseqs = pepseqs.sort
|
|
82
|
+
pepseqs.should == sorted_prt_peps_aaseqs
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
describe SpecID, 'with generic proteins' do
|
|
88
|
+
before(:all) do
|
|
89
|
+
@prots = (0..7).map do |n|
|
|
90
|
+
SpecID::GenericProt.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
|
|
91
|
+
end
|
|
92
|
+
@meth = proc {|peps, kind| SpecID.protein_list(peps, kind) }
|
|
93
|
+
end
|
|
94
|
+
it_should_behave_like 'creating a list of proteins from peptides'
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
describe SpecID, 'with array based proteins' do
|
|
98
|
+
before(:all) do
|
|
99
|
+
@prots = (0..7).map do |n|
|
|
100
|
+
SRF::OUT::Prot.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
|
|
101
|
+
end
|
|
102
|
+
@meth = proc {|peps, kind| SpecID.protein_list(peps, kind) }
|
|
103
|
+
end
|
|
104
|
+
it_should_behave_like 'creating a list of proteins from peptides'
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
module Boolean ; end
|
|
108
|
+
class TrueClass ; include Boolean end
|
|
109
|
+
class FalseClass; include Boolean end
|
|
110
|
+
|
|
111
|
+
describe SpecID, 'being created' do
|
|
112
|
+
it 'can be from small bioworks.xml' do
|
|
113
|
+
sp = SpecID.new(Tfiles + '/bioworks_small.xml')
|
|
114
|
+
sp.prots.size.should == 106
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
it 'can be from small -prot.xml (newer prophet versions)' do
|
|
118
|
+
prot_xml = Tfiles + '/interact-opd1_mods_small-prot.xml'
|
|
119
|
+
sp = SpecID.new(prot_xml)
|
|
120
|
+
sp.is_a?(SpecID).should be_true
|
|
121
|
+
sp.is_a?(Proph::ProtSummary).should be_true
|
|
122
|
+
sp.prots.size.should == 20
|
|
123
|
+
sp.peps.size.should == 31
|
|
124
|
+
types = {
|
|
125
|
+
:protein_name => String,
|
|
126
|
+
:n_indistinguishable_proteins => Integer,
|
|
127
|
+
:probability => Float,
|
|
128
|
+
:percent_coverage => Float,
|
|
129
|
+
:unique_stripped_peptides => Array,
|
|
130
|
+
:group_sibling_id => String,
|
|
131
|
+
:total_number_peptides => Integer,
|
|
132
|
+
:pct_spectrum_ids => Float,
|
|
133
|
+
:peps => Array,
|
|
134
|
+
}
|
|
135
|
+
sp.prots.each do |prot|
|
|
136
|
+
types.each { |cl,tp| prot.send(cl).is_a?(tp).should be_true }
|
|
137
|
+
end
|
|
138
|
+
types = {
|
|
139
|
+
:aaseq => String,
|
|
140
|
+
:peptide_sequence => String,
|
|
141
|
+
:charge => Integer,
|
|
142
|
+
:initial_probability => Float,
|
|
143
|
+
:nsp_adjusted_probability => Float,
|
|
144
|
+
:weight => Float,
|
|
145
|
+
:is_nondegenerate_evidence => Boolean, # no Boolean class
|
|
146
|
+
:n_enzymatic_termini => Integer,
|
|
147
|
+
:n_sibling_peptides => Float,
|
|
148
|
+
:n_sibling_peptides_bin => Integer,
|
|
149
|
+
:n_instances => Integer,
|
|
150
|
+
:is_contributing_evidence => Boolean,
|
|
151
|
+
:calc_neutral_pep_mass => Float,
|
|
152
|
+
:modification_info => Object,
|
|
153
|
+
:mod_info => Object,
|
|
154
|
+
}
|
|
155
|
+
sp.peps.each do |pep|
|
|
156
|
+
types.each { |cl,tp| pep.send(cl).is_a?(tp).should be_true }
|
|
157
|
+
end
|
|
158
|
+
prot_ars = []
|
|
159
|
+
sp.peps.each do |pep|
|
|
160
|
+
if pep.prots.size > 1
|
|
161
|
+
prot_ars << pep.prots
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
prot_ars.each do |prt_ar|
|
|
165
|
+
prt_ar.each do |prt|
|
|
166
|
+
# the nils because this is a small file and their proteins are not
|
|
167
|
+
# found
|
|
168
|
+
((prt.is_a?(SpecID::Prot) == true) or prt.nil?).should be_true
|
|
169
|
+
((prt.is_a?(Proph::Prot) == true) or prt.nil?).should be_true
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
mod_objects = []
|
|
173
|
+
sp.peps.each do |pep|
|
|
174
|
+
if !pep.mod_info.nil?
|
|
175
|
+
mod_objects << pep.mod_info
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
# frozen
|
|
179
|
+
mod_objects.size.should == 23
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
spec_large do
|
|
183
|
+
it 'works on a large file' do
|
|
184
|
+
file = Tfiles_l + '/opd1_2runs_2mods/prophet/interact-opd1_mods-prot.xml'
|
|
185
|
+
#file = '/work/john/db_quest/verify_prophet/orbi/prophet_results/orbi_f00-prot.xml'
|
|
186
|
+
start = Time.now
|
|
187
|
+
sp = SpecID.new(file)
|
|
188
|
+
puts "- Took #{Time.now - start} seconds to read"
|
|
189
|
+
prot_ars = []
|
|
190
|
+
sp.peps.each do |pep|
|
|
191
|
+
if pep.prots.size > 1
|
|
192
|
+
prot_ars << pep.prots
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
prot_ars.each do |prt_ar|
|
|
196
|
+
prt_ar.each do |prt|
|
|
197
|
+
# the nils because this is a small file and their proteins are not
|
|
198
|
+
# found
|
|
199
|
+
prt.is_a?(SpecID::Prot).should be_true
|
|
200
|
+
prt.is_a?(Proph::Prot).should be_true
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
it_should 'can be from -prot.xml (older prophet versions)' do
|
|
208
|
+
prot_xml = Tfiles + '/4-03-03_small-prot.xml'
|
|
209
|
+
prot_xml = Tfiles + '/yeast_gly_small-prot.xml'
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
describe SpecID, 'class methods' do
|
|
214
|
+
|
|
215
|
+
it 'determines filetype (small files)' do
|
|
216
|
+
files = {
|
|
217
|
+
:bioworks => Tfiles + "/bioworks_small.xml",
|
|
218
|
+
:protproph => Tfiles + '/opd1/000_020_3prots-prot.xml',
|
|
219
|
+
:pepproph => Tfiles + '/opd1_2runs_2mods/interact-opd1_mods__small.xml',
|
|
220
|
+
:srf => Tfiles + '/head_of_7MIX.srf',
|
|
221
|
+
:srg => 'whatever.srg',
|
|
222
|
+
}
|
|
223
|
+
files.each do |key,val|
|
|
224
|
+
SpecID.file_type(val).should == key.to_s
|
|
225
|
+
end
|
|
226
|
+
## WOULD BE NICE TO GET THIS WORKING, TOO
|
|
227
|
+
# assert_equal('protproph', SpecID.file_type(@old_prot_proph))
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
it 'can remove non-standard amino acids' do
|
|
231
|
+
hash = {"K.PEPTIDE.Z" => "K.PEPTIDE.Z", "K.*M" => "K.M", "aI" => 'I', "YI.&" => "YI.", "EI.!@#\$%^&*(){}[]|\\;:'\"<>,?/EI" => 'EI.EI'}
|
|
232
|
+
cl = proc {|v| SpecID::Pep.remove_non_amino_acids(v) }
|
|
233
|
+
hash.each do |k,v|
|
|
234
|
+
cl.call(k).should == v
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
describe SpecID, "determining the minimum set of proteins from pephits" do
|
|
241
|
+
|
|
242
|
+
before(:all) do
|
|
243
|
+
class MyProt ; include SpecID::Prot ; end
|
|
244
|
+
class MyPep ; include SpecID::Pep ; attr_accessor :xcorr end
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
it 'can do occams razor on small set' do
|
|
248
|
+
|
|
249
|
+
prots = (0..6).to_a.map do |n|
|
|
250
|
+
prot = MyProt.new
|
|
251
|
+
prot.reference = "ref_#{n}"
|
|
252
|
+
prot
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
peps = (0..12).to_a.map {|v| MyPep.new }
|
|
256
|
+
|
|
257
|
+
# 0 1 2 3 4 5 6 7 8 9 10 11 12
|
|
258
|
+
aaseqs = %w(AAA BBB CCC ABC AAA BBB CCC ABC DDD EEE FFF EEEEE DDD)
|
|
259
|
+
xcorrs = [1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 0.5, 0.6, 0.7, 0.8, 0.5]
|
|
260
|
+
|
|
261
|
+
peps.zip(aaseqs, xcorrs) do |pep,aaseq,xcorr|
|
|
262
|
+
pep.aaseq = aaseq
|
|
263
|
+
pep.xcorr = xcorr
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
prots[0].peps = peps[0,4]
|
|
267
|
+
prots[1].peps = [peps[2]] ## should be missing
|
|
268
|
+
|
|
269
|
+
test_prots = prots[0,2]
|
|
270
|
+
answ = SpecID.occams_razor(test_prots)
|
|
271
|
+
answ.each do |an|
|
|
272
|
+
an[0].is_a?(SpecID::Prot).should be_true
|
|
273
|
+
end
|
|
274
|
+
first = answ.first
|
|
275
|
+
first[0].should == prots[0]
|
|
276
|
+
equal_array_content( prots[0].peps, first[1])
|
|
277
|
+
|
|
278
|
+
require 'pp'
|
|
279
|
+
#pp answ
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
prots[0].peps = peps[0,4]
|
|
283
|
+
prots[1].peps = [peps[2]] ## should be missing
|
|
284
|
+
prots[2].peps = [] ## should be missing
|
|
285
|
+
|
|
286
|
+
answ = SpecID.occams_razor(test_prots, true)
|
|
287
|
+
puts '- NEED MORE tests HERE!' if $specdoc
|
|
288
|
+
#pp answ
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
#prots[2].peps = [peps[2]]
|
|
292
|
+
#prots[2].peps.push( peps[3] ) ## should be there since it has 2
|
|
293
|
+
#prots[3].peps = [peps[3]] ## should be missing
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
def equal_array_content(exp1, ans, message='')
|
|
297
|
+
exp1.each do |item|
|
|
298
|
+
ans.should include(item)
|
|
299
|
+
end
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
require 'fasta'
|
|
307
|
+
|
|
308
|
+
describe SpecID::Pep, "with a small fasta object" do
|
|
309
|
+
before(:each) do
|
|
310
|
+
@prots = []
|
|
311
|
+
|
|
312
|
+
aaseq = ('A'..'Z').to_a.join('')
|
|
313
|
+
header = "prot1"
|
|
314
|
+
@prots << Fasta::Prot.new(header, aaseq)
|
|
315
|
+
|
|
316
|
+
aaseq = ('A'..'Z').to_a.reverse.join('')
|
|
317
|
+
header = "prot1_reverse"
|
|
318
|
+
@prots << Fasta::Prot.new(header, aaseq)
|
|
319
|
+
|
|
320
|
+
aaseq = ('A'..'Z').to_a.join('')
|
|
321
|
+
header = "prot1_identical"
|
|
322
|
+
@prots << Fasta::Prot.new(header, aaseq)
|
|
323
|
+
|
|
324
|
+
aaseq = ('A'..'E').to_a.join('')
|
|
325
|
+
header = "prot1_short"
|
|
326
|
+
@prots << Fasta::Prot.new(header, aaseq)
|
|
327
|
+
|
|
328
|
+
aaseq = ('A'..'E').to_a.reverse.join('')
|
|
329
|
+
header = "prot1_reverse_short"
|
|
330
|
+
@prots << Fasta::Prot.new(header, aaseq)
|
|
331
|
+
|
|
332
|
+
@fasta = Fasta.new(@prots)
|
|
333
|
+
|
|
334
|
+
end
|
|
335
|
+
it "can find protein groups from a fasta object" do
|
|
336
|
+
pep_seqs = %w(ABCD DEFG ABCD DEFG EDCB FEDCB XYZ RANDOM AEABA)
|
|
337
|
+
arr = SpecID::Pep.protein_groups_by_sequence(pep_seqs, @fasta)
|
|
338
|
+
|
|
339
|
+
prots = @prots
|
|
340
|
+
exp = [[prots[0], prots[2], prots[3]], [prots[0], prots[2]], [prots[0], prots[2], prots[3]], [prots[0],prots[2]], [prots[1], prots[4]], [prots[1]], [prots[0], prots[2]], [], []]
|
|
341
|
+
|
|
342
|
+
arr.should == exp
|
|
343
|
+
end
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
###########################
|
|
348
|
+
# old tests
|
|
349
|
+
###########################
|
|
350
|
+
|
|
351
|
+
=begin
|
|
352
|
+
def test_classify_by_false_flag
|
|
353
|
+
file = @tfiles + "bioworks_with_INV_small.xml"
|
|
354
|
+
sp = SpecID.new(file)
|
|
355
|
+
assert_equal(19, sp.prots.size)
|
|
356
|
+
(tp, fp) = sp.classify_by_false_flag(:prots, "INV_", true, true)
|
|
357
|
+
assert_equal(4, fp.size, "num false pos")
|
|
358
|
+
assert_equal(15, tp.size, "num true pos")
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
=end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
|
|
2
|
+
|
|
3
|
+
require 'spec_id_xml'
|
|
4
|
+
|
|
5
|
+
describe SpecIDXML, 'included with a simple object' do
|
|
6
|
+
before(:all) do
|
|
7
|
+
class Bob
|
|
8
|
+
include SpecIDXML
|
|
9
|
+
def initialize(first=nil, second=nil)
|
|
10
|
+
@first = first ; @second = second
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
it 'creates short element xmls using an objects instance variables' do
|
|
16
|
+
obj = Bob.new(1, 2)
|
|
17
|
+
st = obj.short_element_xml_from_instance_vars("bob")
|
|
18
|
+
# the ordering is arbitrary: "<bob first=\"1\" second=\"2\"/>\n"
|
|
19
|
+
st.should =~ /second="2"/
|
|
20
|
+
st.should =~ /first="1"/
|
|
21
|
+
st.should =~ /^<bob /
|
|
22
|
+
st.should =~ />$/
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
it 'escapes special characters' do
|
|
26
|
+
obj = Bob.new
|
|
27
|
+
obj.escape_special_chars("&><\"'").should == "&><"'"
|
|
28
|
+
obj.escape_special_chars("PE&PT>I<D\"E'").should == "PE&PT>I<D"E'"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
|