mspire 0.2.4 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/INSTALL +1 -0
- data/README +25 -0
- data/Rakefile +129 -40
- data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
- data/bin/bioworks_to_pepxml.rb +1 -0
- data/bin/fasta_shaker.rb +1 -96
- data/bin/filter_and_validate.rb +5 -0
- data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
- data/bin/prob_validate.rb +6 -0
- data/bin/raw_to_mzXML.rb +2 -2
- data/bin/srf_group.rb +1 -0
- data/bin/srf_to_sqt.rb +40 -0
- data/changelog.txt +68 -0
- data/lib/align/chams.rb +6 -6
- data/lib/align.rb +4 -3
- data/lib/bsearch.rb +120 -0
- data/lib/fasta.rb +318 -86
- data/lib/group_by.rb +10 -0
- data/lib/index_by.rb +11 -0
- data/lib/merge_deep.rb +21 -0
- data/lib/{spec → ms/converter}/mzxml.rb +77 -109
- data/lib/ms/gradient_program.rb +171 -0
- data/lib/ms/msrun.rb +209 -0
- data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
- data/lib/ms/parser/mzdata/axml.rb +12 -0
- data/lib/ms/parser/mzdata/dom.rb +160 -0
- data/lib/ms/parser/mzdata/libxml.rb +7 -0
- data/lib/ms/parser/mzdata.rb +25 -0
- data/lib/ms/parser/mzxml/axml.rb +11 -0
- data/lib/ms/parser/mzxml/dom.rb +159 -0
- data/lib/ms/parser/mzxml/hpricot.rb +253 -0
- data/lib/ms/parser/mzxml/libxml.rb +15 -0
- data/lib/ms/parser/mzxml/regexp.rb +122 -0
- data/lib/ms/parser/mzxml/rexml.rb +72 -0
- data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
- data/lib/ms/parser/mzxml.rb +175 -0
- data/lib/ms/parser.rb +108 -0
- data/lib/ms/precursor.rb +10 -0
- data/lib/ms/scan.rb +81 -0
- data/lib/ms/spectrum.rb +193 -0
- data/lib/ms.rb +10 -0
- data/lib/mspire.rb +4 -0
- data/lib/roc.rb +61 -1
- data/lib/sample_enzyme.rb +31 -8
- data/lib/scan_i.rb +21 -0
- data/lib/spec_id/aa_freqs.rb +7 -3
- data/lib/spec_id/bioworks.rb +20 -14
- data/lib/spec_id/digestor.rb +139 -0
- data/lib/spec_id/mass.rb +116 -0
- data/lib/spec_id/parser/proph.rb +236 -0
- data/lib/spec_id/precision/filter/cmdline.rb +209 -0
- data/lib/spec_id/precision/filter/interactive.rb +134 -0
- data/lib/spec_id/precision/filter/output.rb +147 -0
- data/lib/spec_id/precision/filter.rb +623 -0
- data/lib/spec_id/precision/output.rb +60 -0
- data/lib/spec_id/precision/prob/cmdline.rb +139 -0
- data/lib/spec_id/precision/prob/output.rb +88 -0
- data/lib/spec_id/precision/prob.rb +171 -0
- data/lib/spec_id/proph/pep_summary.rb +92 -0
- data/lib/spec_id/proph/prot_summary.rb +484 -0
- data/lib/spec_id/proph.rb +2 -466
- data/lib/spec_id/protein_summary.rb +2 -2
- data/lib/spec_id/sequest/params.rb +316 -0
- data/lib/spec_id/sequest/pepxml.rb +1513 -0
- data/lib/spec_id/sequest.rb +2 -1672
- data/lib/spec_id/srf.rb +445 -177
- data/lib/spec_id.rb +183 -95
- data/lib/spec_id_xml.rb +8 -10
- data/lib/transmem/phobius.rb +147 -0
- data/lib/transmem/toppred.rb +368 -0
- data/lib/transmem.rb +157 -0
- data/lib/validator/aa.rb +135 -0
- data/lib/validator/background.rb +73 -0
- data/lib/validator/bias.rb +95 -0
- data/lib/validator/cmdline.rb +260 -0
- data/lib/validator/decoy.rb +94 -0
- data/lib/validator/digestion_based.rb +69 -0
- data/lib/validator/probability.rb +48 -0
- data/lib/validator/prot_from_pep.rb +234 -0
- data/lib/validator/transmem.rb +272 -0
- data/lib/validator/true_pos.rb +46 -0
- data/lib/validator.rb +214 -0
- data/lib/xml.rb +38 -0
- data/lib/xml_style_parser.rb +105 -0
- data/lib/xmlparser_wrapper.rb +19 -0
- data/script/compile_and_plot_smriti_final.rb +97 -0
- data/script/extract_gradient_programs.rb +56 -0
- data/script/get_apex_values_rexml.rb +44 -0
- data/script/mzXML2timeIndex.rb +1 -1
- data/script/smriti_final_analysis.rb +103 -0
- data/script/toppred_to_yaml.rb +47 -0
- data/script/tpp_installer.rb +1 -1
- data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
- data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
- data/specs/bin/fasta_shaker_spec.rb +259 -0
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
- data/specs/bin/filter_and_validate_spec.rb +124 -0
- data/specs/bin/ms_to_lmat_spec.rb +34 -0
- data/specs/bin/prob_validate_spec.rb +62 -0
- data/specs/bin/protein_summary_spec.rb +10 -0
- data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
- data/specs/gi_spec.rb +22 -0
- data/specs/load_bin_path.rb +7 -0
- data/specs/merge_deep_spec.rb +13 -0
- data/specs/ms/gradient_program_spec.rb +77 -0
- data/specs/ms/msrun_spec.rb +455 -0
- data/specs/ms/parser_spec.rb +92 -0
- data/specs/ms/spectrum_spec.rb +89 -0
- data/specs/roc_spec.rb +251 -0
- data/specs/rspec_autotest.rb +149 -0
- data/specs/sample_enzyme_spec.rb +41 -0
- data/specs/spec_helper.rb +133 -0
- data/specs/spec_id/aa_freqs_spec.rb +52 -0
- data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
- data/specs/spec_id/digestor_spec.rb +75 -0
- data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
- data/specs/spec_id/precision/filter/output_spec.rb +31 -0
- data/specs/spec_id/precision/filter_spec.rb +243 -0
- data/specs/spec_id/precision/prob_spec.rb +111 -0
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
- data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
- data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
- data/specs/spec_id/sequest/params_spec.rb +68 -0
- data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
- data/specs/spec_id/sqt_spec.rb +138 -0
- data/specs/spec_id/srf_spec.rb +209 -0
- data/specs/spec_id/srf_spec_helper.rb +302 -0
- data/specs/spec_id_helper.rb +33 -0
- data/specs/spec_id_spec.rb +361 -0
- data/specs/spec_id_xml_spec.rb +33 -0
- data/specs/transmem/phobius_spec.rb +423 -0
- data/specs/transmem/toppred_spec.rb +297 -0
- data/specs/transmem_spec.rb +60 -0
- data/specs/transmem_spec_shared.rb +64 -0
- data/specs/validator/aa_spec.rb +107 -0
- data/specs/validator/background_spec.rb +51 -0
- data/specs/validator/bias_spec.rb +146 -0
- data/specs/validator/decoy_spec.rb +51 -0
- data/specs/validator/fasta_helper.rb +26 -0
- data/specs/validator/prot_from_pep_spec.rb +141 -0
- data/specs/validator/transmem_spec.rb +145 -0
- data/specs/validator/true_pos_spec.rb +58 -0
- data/specs/validator_helper.rb +33 -0
- data/specs/xml_spec.rb +12 -0
- data/test_files/000_pepxml18_small.xml +206 -0
- data/test_files/020a.mzXML.timeIndex +4710 -0
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
- data/test_files/4-03-03_small-prot.xml +321 -0
- data/test_files/4-03-03_small.xml +3876 -0
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +5999 -0
- data/test_files/bioworks31.params +77 -0
- data/test_files/bioworks32.params +62 -0
- data/test_files/bioworks33.params +63 -0
- data/test_files/bioworks_single_run_small.xml +7237 -0
- data/test_files/bioworks_small.fasta +212 -0
- data/test_files/bioworks_small.params +63 -0
- data/test_files/bioworks_small.phobius +109 -0
- data/test_files/bioworks_small.toppred.out +2847 -0
- data/test_files/bioworks_small.xml +5610 -0
- data/test_files/bioworks_with_INV_small.xml +3753 -0
- data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +304 -0
- data/test_files/messups.fasta +297 -0
- data/test_files/opd1/000.my_answer.100lines.xml +101 -0
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
- data/test_files/opd1/000_020_3prots-prot.xml +62 -0
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
- data/test_files/opd1/sequest.3.1.params +77 -0
- data/test_files/opd1/sequest.3.2.params +62 -0
- data/test_files/opd1/twenty_scans.mzXML +418 -0
- data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +9 -0
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
- data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
- data/test_files/pepproph_small.xml +4691 -0
- data/test_files/phobius.small.noheader.txt +50 -0
- data/test_files/phobius.small.small.txt +53 -0
- data/test_files/s01_anC1_ld020mM.key.txt +25 -0
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +297 -0
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +14340 -0
- data/test_files/tf_bioworks2excel.txt.actual +1035 -0
- data/test_files/toppred.small.out +416 -0
- data/test_files/toppred.xml.out +318 -0
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
- data/test_files/yeast_gly_small-prot.xml +265 -0
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
- data/test_files/yeast_gly_small.xml +3807 -0
- data/test_files/yeast_gly_small2.parentTimes +6 -0
- metadata +273 -57
- data/bin/filter.rb +0 -6
- data/bin/precision.rb +0 -5
- data/lib/spec/mzdata/parser.rb +0 -108
- data/lib/spec/mzdata.rb +0 -48
- data/lib/spec/mzxml/parser.rb +0 -449
- data/lib/spec/scan.rb +0 -55
- data/lib/spec_id/filter.rb +0 -797
- data/lib/spec_id/precision.rb +0 -421
- data/lib/toppred.rb +0 -18
- data/script/filter-peps.rb +0 -164
- data/test/tc_aa_freqs.rb +0 -59
- data/test/tc_fasta_shaker.rb +0 -149
- data/test/tc_filter.rb +0 -203
- data/test/tc_filter_peps.rb +0 -46
- data/test/tc_gi.rb +0 -17
- data/test/tc_id_class_anal.rb +0 -70
- data/test/tc_id_precision.rb +0 -89
- data/test/tc_msrun.rb +0 -88
- data/test/tc_mzxml.rb +0 -88
- data/test/tc_mzxml_to_lmat.rb +0 -36
- data/test/tc_peptide_parent_times.rb +0 -27
- data/test/tc_precision.rb +0 -60
- data/test/tc_roc.rb +0 -166
- data/test/tc_sample_enzyme.rb +0 -32
- data/test/tc_scan.rb +0 -26
- data/test/tc_sequest.rb +0 -336
- data/test/tc_spec.rb +0 -78
- data/test/tc_spec_id.rb +0 -201
- data/test/tc_spec_id_xml.rb +0 -36
- data/test/tc_srf.rb +0 -262
data/test/tc_sequest.rb
DELETED
@@ -1,336 +0,0 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
require 'spec_id'
|
5
|
-
require 'spec_id/sequest'
|
6
|
-
require 'test/unit'
|
7
|
-
require 'spec/mzxml'
|
8
|
-
|
9
|
-
|
10
|
-
NODELETE = false
|
11
|
-
|
12
|
-
class SequestTest < Test::Unit::TestCase
|
13
|
-
|
14
|
-
def initialize(arg)
|
15
|
-
super(arg)
|
16
|
-
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
17
|
-
@tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
|
18
|
-
@tf_params = @tfiles + "bioworks32.params"
|
19
|
-
@tf_mzxml_path = @tfiles_l + "yeast_gly_mzXML"
|
20
|
-
@tf_bioworks_xml = @tfiles + "bioworks_small.xml"
|
21
|
-
end
|
22
|
-
|
23
|
-
def Xtest_set_from_bioworks
|
24
|
-
if File.exist? @tfiles_l
|
25
|
-
out_path = '.'
|
26
|
-
pepxml_objs = Sequest::PepXML.set_from_bioworks_xml(@tf_bioworks_xml, @tf_params, {:ms_path => @tf_mzxml_path, :out_path => out_path})
|
27
|
-
pepxml_objs.each do |obj|
|
28
|
-
assert(obj.spectrum_queries.size > 2)
|
29
|
-
assert(obj.spectrum_queries.first.search_results.first.search_hits.size > 0)
|
30
|
-
end
|
31
|
-
else
|
32
|
-
assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
# assert_equal_by_pairs (really any old array)
|
37
|
-
def aep(obj, arrs)
|
38
|
-
arrs.each do |arr|
|
39
|
-
assert_equal(arr[0], obj.send(arr[1]), "#{arr[1]}")
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
#swap the first to guys first
|
44
|
-
def aeps(obj, arrs)
|
45
|
-
arrs.each do |arr|
|
46
|
-
arr[0], arr[1] = arr[1], arr[0]
|
47
|
-
end
|
48
|
-
aep(obj, arrs)
|
49
|
-
end
|
50
|
-
|
51
|
-
## turn this off if you are doing lots of tests
|
52
|
-
def Xtest_set_from_bioworks ## new one for opd1
|
53
|
-
if File.exist? @tfiles_l
|
54
|
-
st = Time.new
|
55
|
-
params = @tfiles + "opd1/sequest.3.2.params"
|
56
|
-
bioworks_xml = @tfiles_l + "opd1/bioworks.000.oldparams.xml"
|
57
|
-
mzxml_path = @tfiles + "opd1"
|
58
|
-
out_path = @tfiles
|
59
|
-
pepxml_version = 18
|
60
|
-
pepxml_objs = Sequest::PepXML.set_from_bioworks_xml(bioworks_xml, params, {:ms_data => mzxml_path, :out_path => out_path, :pepxml_version => pepxml_version, :sample_enzyme => "trypsin"})
|
61
|
-
puts "TOOK #{Time.new - st}secs"
|
62
|
-
po = pepxml_objs.first
|
63
|
-
assert_equal(pepxml_version, Sequest::PepXML.pepxml_version)
|
64
|
-
|
65
|
-
# MSMSPipelineAnalysis
|
66
|
-
pipe = po.msms_pipeline_analysis
|
67
|
-
aep(pipe, [
|
68
|
-
['http://regis-web.systemsbiology.net/pepXML', :xmlns],
|
69
|
-
['http://www.w3.org/2001/XMLSchema-instance', :xmlns_xsi],
|
70
|
-
['http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v18.xsd', :xsi_schema_location],
|
71
|
-
['000.xml', :summary_xml],
|
72
|
-
])
|
73
|
-
|
74
|
-
# MSMSRunSummary
|
75
|
-
rs = pipe.msms_run_summary
|
76
|
-
assert_match(/test\/tfiles\/000/, rs.base_name)
|
77
|
-
aep(rs, [ ['ThermoFinnigan', :ms_manufacturer], ['LCQ Deca XP Plus', :ms_model], ['ESI', :ms_ionization], ['Ion Trap', :ms_mass_analyzer], ['UNKNOWN', :ms_detector], ['raw', :raw_data_type], ['.mzXML', :raw_data], ])
|
78
|
-
|
79
|
-
# SampleEnzyme
|
80
|
-
se = rs.sample_enzyme
|
81
|
-
aep(se, [ ['trypsin', :name], ['KR', :cut], ['P', :no_cut], ['C', :sense], ])
|
82
|
-
|
83
|
-
# SearchSummary
|
84
|
-
ss = rs.search_summary
|
85
|
-
assert_match(/test\/tfiles\/000/, ss.base_name)
|
86
|
-
assert_match(/1\.500/, ss.peptide_mass_tol)
|
87
|
-
aeps(ss, [ # normal attributes
|
88
|
-
[:search_engine, "SEQUEST"], [:precursor_mass_type, "average"], [:fragment_mass_type, "average"], [:out_data_type, "out"], [:out_data, ".tgz"], [:search_id, "1"],
|
89
|
-
|
90
|
-
# enzymatic_search_constraint
|
91
|
-
[:enzyme, 'Trypsin'], [:max_num_internal_cleavages, '2'], [:min_number_termini, '2'],
|
92
|
-
|
93
|
-
# parameters
|
94
|
-
[:fragment_ion_tol, "1.0000"], [:ion_series, "0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0"], [:max_num_differential_AA_per_mod, "3"], [:nucleotide_reading_frame, "0"], [:num_output_lines, "10"], [:remove_precursor_peak, "0"], [:ion_cutoff_percentage, "0.0000"], [:match_peak_count, "0"], [:match_peak_allowed_error, "1"], [:match_peak_tolerance, "1.0000"], [:protein_mass_filter, "0 0"],
|
95
|
-
])
|
96
|
-
|
97
|
-
# SearchDatabase
|
98
|
-
sd = ss.search_database
|
99
|
-
aeps(sd, [ [:local_path, "C:\\Xcalibur\\database\\ecoli_K12.fasta"], [:seq_type, 'AA'], ])
|
100
|
-
|
101
|
-
# SpectrumQueries
|
102
|
-
sq = rs.spectrum_queries
|
103
|
-
spec = sq.first
|
104
|
-
aeps(spec, [
|
105
|
-
[:spectrum, "000.100.100.1"], [:start_scan, "100"], [:end_scan, "100"],
|
106
|
-
#[:precursor_neutral_mass, "1074.5920"], # out2summary
|
107
|
-
[:precursor_neutral_mass, "1074.666926"], # mine
|
108
|
-
[:assumed_charge, "1"], [:index, "1"],
|
109
|
-
])
|
110
|
-
sh = spec.search_results.first.search_hits.first
|
111
|
-
aeps(sh, [
|
112
|
-
# normal attributes
|
113
|
-
[:hit_rank, "1"],
|
114
|
-
[:peptide, "SIYFRNFK"],
|
115
|
-
[:peptide_prev_aa, "R"],
|
116
|
-
[:peptide_next_aa, "G"],
|
117
|
-
[:protein, "gi|16130084|ref|NP_416651.1|"],
|
118
|
-
[:num_tot_proteins, "1"],
|
119
|
-
[:num_matched_ions, "4"],
|
120
|
-
[:tot_num_ions, "14"],
|
121
|
-
#[:calc_neutral_pep_mass, "1074.1920"], # out2summary
|
122
|
-
[:calc_neutral_pep_mass, "1074.23261"], # mine
|
123
|
-
#[:massdiff, "+0.400000"], # out2summary
|
124
|
-
[:massdiff, "+0.434316000000081"], # mine
|
125
|
-
[:num_tol_term, "2"], [:num_missed_cleavages, "1"], [:is_rejected, "0"],
|
126
|
-
|
127
|
-
# search_score
|
128
|
-
[:xcorr, "0.400"], [:deltacn, "0.023"], [:deltacnstar, "0"], [:spscore, "78.8"], [:sprank, "1"],
|
129
|
-
])
|
130
|
-
|
131
|
-
spec = sq[1]
|
132
|
-
aeps(spec, [
|
133
|
-
[:spectrum, "000.1000.1000.1"], [:start_scan, "1000"], [:end_scan, "1000"], #[:precursor_neutral_mass, "663.1920"], # out2summary
|
134
|
-
[:precursor_neutral_mass, "663.206111"], # mine
|
135
|
-
[:assumed_charge, "1"], [:index, "2"],
|
136
|
-
])
|
137
|
-
|
138
|
-
sh = spec.search_results.first.search_hits.first
|
139
|
-
aeps(sh, [
|
140
|
-
# normal attributes
|
141
|
-
[:hit_rank, "1"], [:peptide, "ALADFK"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "S"], [:protein, "gi|16128765|ref|NP_415318.1|"], [:num_tot_proteins, "1"], [:num_matched_ions, "5"], [:tot_num_ions, "10"],
|
142
|
-
[:num_tol_term, "2"], [:num_missed_cleavages, "0"], [:is_rejected, "0"],
|
143
|
-
#[:massdiff, "-0.600000"], # out2summary
|
144
|
-
[:massdiff, "-0.556499000000031"], # mine
|
145
|
-
#[:calc_neutral_pep_mass, "663.7920"], # out2summary
|
146
|
-
[:calc_neutral_pep_mass, "663.76261"], # mine
|
147
|
-
|
148
|
-
# search_score
|
149
|
-
[:xcorr, "0.965"], [:deltacn, "0.132"], [:deltacnstar, "0"], [:spscore, "81.1"], [:sprank, "1"],
|
150
|
-
])
|
151
|
-
|
152
|
-
spec = sq[9]
|
153
|
-
aeps(spec, [
|
154
|
-
[:spectrum, "000.1008.1008.2"], [:start_scan, "1008"], [:end_scan, "1008"], [:assumed_charge, "2"], [:index, "10"],
|
155
|
-
#[:precursor_neutral_mass, "691.0920"], # out2summary
|
156
|
-
[:precursor_neutral_mass, "691.150992"], # mine
|
157
|
-
])
|
158
|
-
|
159
|
-
sh = spec.search_results.first.search_hits.first
|
160
|
-
aeps(sh, [
|
161
|
-
# normal attributes
|
162
|
-
[:hit_rank, "1"], [:peptide, "RLFTR"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "A"], [:protein, "gi|16130457|ref|NP_417027.1|"], [:num_tot_proteins, "1"], [:num_matched_ions, "5"], [:tot_num_ions, "8"], [:num_tol_term, "2"],
|
163
|
-
|
164
|
-
#[:num_missed_cleavages, "0"], # out2summary misses this!
|
165
|
-
[:num_missed_cleavages, "1"],
|
166
|
-
[:is_rejected, "0"],
|
167
|
-
#[:calc_neutral_pep_mass, "691.7920"], # out2summary
|
168
|
-
[:calc_neutral_pep_mass, "691.82261"], # mine
|
169
|
-
#[:massdiff, "-0.700000"], # out2summary
|
170
|
-
[:massdiff, "-0.67161800000008"], # mine
|
171
|
-
|
172
|
-
# search_score
|
173
|
-
[:xcorr, "0.903"], [:deltacn, "0.333"], [:deltacnstar, "0"], [:spscore, "172.8"], [:sprank, "1"],
|
174
|
-
])
|
175
|
-
|
176
|
-
## IF ARE OBJECT IS CORRECT, THEN WE GET THE OUTPUT:
|
177
|
-
string = po.to_pepxml
|
178
|
-
|
179
|
-
ans_lines = IO.read(@tfiles + "opd1/000.my_answer.100lines.xml").split("\n")
|
180
|
-
string.split("\n").each_with_index do |line,i|
|
181
|
-
base_name_re = /base_name=".*?\/test/o
|
182
|
-
if i > 99 ; break end
|
183
|
-
if i == 1
|
184
|
-
assert_equal(ans_lines[i].sub(/date=".*?"/,''), line.sub(/date=".*?"/,''))
|
185
|
-
elsif i == 2
|
186
|
-
assert_equal(ans_lines[i].sub(base_name_re,''), line.sub(base_name_re,''))
|
187
|
-
else
|
188
|
-
assert_equal(ans_lines[i].sub(base_name_re,''), line.sub(base_name_re,''))
|
189
|
-
#assert_equal(ans_lines[i], line)
|
190
|
-
end
|
191
|
-
end
|
192
|
-
else
|
193
|
-
assert_nil(puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})"))
|
194
|
-
end
|
195
|
-
|
196
|
-
#assert_match(/#{Regexp.escape("")}/, string)
|
197
|
-
|
198
|
-
end
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
def Xtest_calc_num_tol_term
|
203
|
-
params = Sequest::Params.new(@tf_params)
|
204
|
-
scall = Sequest::PepXML::SearchHit
|
205
|
-
sym = :calc_num_tol_term
|
206
|
-
assert_equal(2, scall.send(sym, params, "K.EPTIDR.E"))
|
207
|
-
assert_equal(1, scall.send(sym, params, "K.PEPTIDR.E"))
|
208
|
-
assert_equal(1, scall.send(sym, params, "F.EEPTIDR.E"))
|
209
|
-
assert_equal(0, scall.send(sym, params, "F.PEPTIDW.R"))
|
210
|
-
end
|
211
|
-
|
212
|
-
def Xtest_calc_num_missed_cleavages
|
213
|
-
params = Sequest::Params.new(@tf_params)
|
214
|
-
scall = Sequest::PepXML::SearchHit
|
215
|
-
sym = :calc_num_missed_cleavages
|
216
|
-
assert_equal(0, scall.send(sym, params, "K.EPTIDR.E"))
|
217
|
-
assert_equal(0, scall.send(sym, params, "K.PEPTIDR.E"))
|
218
|
-
assert_equal(0, scall.send(sym, params, "F.EEPTIDR.E"))
|
219
|
-
assert_equal(0, scall.send(sym, params, "F.PEPTIDW.R"))
|
220
|
-
assert_equal(1, scall.send(sym, params, "F.PEPRTIDW.R"))
|
221
|
-
assert_equal(1, scall.send(sym, params, "F.PEPKTIDW.R"))
|
222
|
-
assert_equal(2, scall.send(sym, params, "F.PKEPRTIDW.R"))
|
223
|
-
assert_equal(3, scall.send(sym, params, "F.PKEPRTIDKW.R"))
|
224
|
-
assert_equal(3, scall.send(sym, params, "F.PKEPRAALKPEERPTIDKW.R"))
|
225
|
-
assert_equal(1, scall.send(sym, params, "K.RTTIDR.E"))
|
226
|
-
assert_equal(2, scall.send(sym, params, "K.RTTIKK.E"))
|
227
|
-
end
|
228
|
-
|
229
|
-
|
230
|
-
def Xtest_sys_ind_basename
|
231
|
-
assert_equal("hello.fasta", Sequest::Params.new._sys_ind_basename("C:\\Xcalibur\\database\\hello.fasta"))
|
232
|
-
assert_equal("hello.fasta", Sequest::Params.new._sys_ind_basename("/work/john/hello.fasta"))
|
233
|
-
end
|
234
|
-
|
235
|
-
def Xtest_modifications
|
236
|
-
obj = Sequest::PepXML::Modifications.new(nil, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
|
237
|
-
answ = {[:C, 12.0]=>"^", [:S, 80.0]=>"@", [:M, 29.0]=>"#", [:M, 15.9]=>"*", [:ct, 12.33]=>"[", [:nt, 14.2]=>"]"}
|
238
|
-
assert_equal(answ, obj.mod_symbols_hash, "mod_symbols_hash")
|
239
|
-
|
240
|
-
## need more here
|
241
|
-
end
|
242
|
-
|
243
|
-
def Xtest_modification_info
|
244
|
-
hash = {
|
245
|
-
:mod_nterm_mass => 520.2,
|
246
|
-
:modified_peptide => "MOD*IFI^E&D",
|
247
|
-
:mod_aminoacid_mass => [[3, 150.3], [6, 345.2]],
|
248
|
-
}
|
249
|
-
answ = "<modification_info mod_nterm_mass=\"520.2\" modified_peptide=\"MOD*IFI^E&D\">\n\t<mod_aminoacid_mass position=\"3\" mass=\"150.3\"/>\n\t<mod_aminoacid_mass position=\"6\" mass=\"345.2\"/>\n</modification_info>\n"
|
250
|
-
string = Sequest::PepXML::SearchHit::ModificationInfo.new(hash).to_pepxml
|
251
|
-
assert_match(_re('<modification_info'), answ)
|
252
|
-
assert_match(_re(" mod_nterm_mass=\"520.2\""), answ)
|
253
|
-
assert_match(_re(" modified_peptide=\"MOD*IFI^E&D\""), answ)
|
254
|
-
assert_match(_re("<mod_aminoacid_mass"), answ)
|
255
|
-
assert_match(_re(" position=\"3\""), answ)
|
256
|
-
assert_match(_re(" mass=\"150.3\""), answ)
|
257
|
-
assert_match(_re(" position=\"6\""), answ)
|
258
|
-
assert_match(_re(" mass=\"345.2\""), answ)
|
259
|
-
assert_match(_re("</modification_info>"), answ)
|
260
|
-
end
|
261
|
-
|
262
|
-
def _re(st)
|
263
|
-
/#{Regexp.escape(st)}/
|
264
|
-
end
|
265
|
-
|
266
|
-
def test_modifications
|
267
|
-
params = Sequest::Params.new(@tf_params)
|
268
|
-
mod_string = "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) "
|
269
|
-
params.diff_search_options = "15.90000 M 29.00000 M 80.00000 S 12.00000 C"
|
270
|
-
params.term_diff_search_options = "14.20000 12.33000"
|
271
|
-
mod = Sequest::PepXML::Modifications.new(params, mod_string)
|
272
|
-
## no mods
|
273
|
-
peptide = "PEPTIDE"
|
274
|
-
assert_equal(nil, mod.modification_info(peptide))
|
275
|
-
peptide = "]M*EC^S@IDM#M*EMSCM["
|
276
|
-
modinfo = mod.modification_info(peptide)
|
277
|
-
assert_equal(peptide, modinfo.modified_peptide)
|
278
|
-
assert_in_delta(146.40054, modinfo.mod_nterm_mass, 0.000001)
|
279
|
-
assert_in_delta(160.52994, modinfo.mod_cterm_mass, 0.000001)
|
280
|
-
end
|
281
|
-
|
282
|
-
# splits string on ' 'and matches the line found by find_line_regexp in
|
283
|
-
# lines
|
284
|
-
def match_modline_pieces(lines, find_line_regexp, string)
|
285
|
-
pieces = string.split(' ').map {|v| /#{Regexp.escape(v)}/ }
|
286
|
-
lines.each do |line|
|
287
|
-
if line =~ find_line_regexp
|
288
|
-
pieces.each do |piece|
|
289
|
-
assert_match(piece, line)
|
290
|
-
end
|
291
|
-
end
|
292
|
-
end
|
293
|
-
end
|
294
|
-
|
295
|
-
def test_modifications_in_run
|
296
|
-
if File.exist? @tfiles_l
|
297
|
-
modfiles_sequest_dir = @tfiles_l + 'opd1_2runs_2mods/sequest/'
|
298
|
-
modfiles_data_dir = @tfiles_l + 'opd1_2runs_2mods/data/'
|
299
|
-
srgfile = modfiles_sequest_dir + 'tmp.srg'
|
300
|
-
out_path = modfiles_sequest_dir + 'pepxml'
|
301
|
-
modfiles = %w(020 040).map do |file|
|
302
|
-
modfiles_sequest_dir + file + ".srf"
|
303
|
-
end
|
304
|
-
objs = Sequest::PepXML.set_from_bioworks( SRFGroup.new(modfiles).to_srg(srgfile), {:ms_data => modfiles_data_dir, :out_path => out_path, :print => true, :backup_db_path => '/project/marcotte/marcotte/ms/database'} )
|
305
|
-
%w(020 040).each do |file|
|
306
|
-
fn = out_path + '/' + file + '.xml'
|
307
|
-
assert(File.exist?(fn), "file #{fn} exists")
|
308
|
-
beginning = IO.read(fn)
|
309
|
-
lines = beginning.split("\n")
|
310
|
-
[
|
311
|
-
[/aminoacid="M"/, '<aminoacid_modification symbol="*" massdiff="+15.9994" aminoacid="M" variable="Y" binary="N" mass="147.192"'],
|
312
|
-
|
313
|
-
[/aminoacid="S"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="S" variable="Y" binary="N" mass="167.0581"'],
|
314
|
-
[/aminoacid="T"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="T" variable="Y" binary="N" mass="181.085"'],
|
315
|
-
[/aminoacid="Y"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="Y" variable="Y" binary="N" mass="243.1559"'],
|
316
|
-
[/parameter name="diff_search_options"/, '<parameter name="diff_search_options" value="15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y"/>'],
|
317
|
-
].each do |a,b|
|
318
|
-
match_modline_pieces(lines, a, b)
|
319
|
-
end
|
320
|
-
[
|
321
|
-
'<modification_info modified_peptide="Y#RLGGS#T#K">',
|
322
|
-
'<mod_aminoacid_mass position="1" mass="243.1559"/>',
|
323
|
-
'<mod_aminoacid_mass position="7" mass="167.0581"/>',
|
324
|
-
'</modification_info>',
|
325
|
-
'<mod_aminoacid_mass position="9" mass="181.085"/>'
|
326
|
-
].each do |line|
|
327
|
-
assert_match(/#{Regexp.escape(line)}/, beginning, "a modification info for a peptide")
|
328
|
-
end
|
329
|
-
File.unlink(fn) unless NODELETE
|
330
|
-
end
|
331
|
-
else
|
332
|
-
assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
|
333
|
-
end
|
334
|
-
end
|
335
|
-
end
|
336
|
-
|
data/test/tc_spec.rb
DELETED
@@ -1,78 +0,0 @@
|
|
1
|
-
|
2
|
-
require 'test/unit'
|
3
|
-
require 'spec/mzxml/parser'
|
4
|
-
require 'benchmark'
|
5
|
-
|
6
|
-
$SPEED_TEST = false
|
7
|
-
|
8
|
-
class SpecTest < Test::Unit::TestCase
|
9
|
-
|
10
|
-
def initialize(arg)
|
11
|
-
super(arg)
|
12
|
-
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
13
|
-
@tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
|
14
|
-
@tscans = @tfiles + "opd1/twenty_scans.mzXML"
|
15
|
-
@tf_mzxml_path = @tfiles_l + "yeast_gly_mzXML"
|
16
|
-
#@big_file = "/work/john/ISB_Proteomics_18Set/mzXML/sergei_digest_A_full_01.mzXML"
|
17
|
-
@big_file = "../bioworks2prophet/xml/opd00001_test_set/opd00001_prophprepped/000.mzXML"
|
18
|
-
end
|
19
|
-
|
20
|
-
def test_mzxml_path_precursor_mz_by_scan
|
21
|
-
if File.exist? @tfiles_l
|
22
|
-
hash = Spec::MzXML::Parser.new.precursor_mz_by_scan_for_path(@tf_mzxml_path, "*.mzXML")
|
23
|
-
assert_equal(%w(000 020), hash.keys.sort)
|
24
|
-
assert(hash["000"].size > 0)
|
25
|
-
assert(hash["020"].size > 0)
|
26
|
-
else
|
27
|
-
assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
def test_mzxml_precursor_mz_by_scan
|
32
|
-
answ = {11=>"1122.119141", 6=>"390.947449", 12=>"444.804504", 7=>"1221.905518", 8=>"1322.036621", 14=>"446.796082", 15=>"1122.041260", 16=>"1421.951416", 18=>"358.676636", 2=>"391.045410", 20=>"1422.277100", 19=>"1460.548340", 3=>"446.009033", 10=>"1322.000732", 4=>"1222.033203"}
|
33
|
-
|
34
|
-
loaded_xmlparser = false
|
35
|
-
$".each do |lib| if lib =~ /xmlparser/ then loaded_xmlparser = true end end
|
36
|
-
|
37
|
-
types = %w(regex rexml)
|
38
|
-
if loaded_xmlparser
|
39
|
-
types.push("xmlparser")
|
40
|
-
else
|
41
|
-
puts "'xmlparser' not loaded: SKIPPING 'xmlparser' testing'"
|
42
|
-
end
|
43
|
-
types.unshift(nil)
|
44
|
-
|
45
|
-
types.each do |ty|
|
46
|
-
arr = Spec::MzXML::Parser.new.precursor_mz_by_scan(@tscans, ty)
|
47
|
-
assert_hash_equal_arr(answ, arr)
|
48
|
-
# On my linux box these are the speed comparisons:
|
49
|
-
# REXMLStreamParser ~ 28.5 sec
|
50
|
-
# REGEX ~ 3.7 sec
|
51
|
-
# REGEX without procs??
|
52
|
-
# XMLParser ~0.85 sec
|
53
|
-
|
54
|
-
# Speed test
|
55
|
-
if $SPEED_TEST
|
56
|
-
puts "PARSETYPE = #{ty ? ty : "DEFAULT"}: "
|
57
|
-
puts Benchmark.measure {
|
58
|
-
arr = Spec::MzXML::Parser.new.precursor_mz_by_scan(@big_file, ty ? ty : nil)
|
59
|
-
}
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
def Xtest_mzxml_precursor_mz_and_inten_by_scan
|
65
|
-
arr = Spec::MzXML.precursor_mz_and_inten_by_scan(@tscans)
|
66
|
-
answ = {11=>["1122.119141", "1188303.000000"], 6=>["390.947449", "6191130.000000"], 12=>["444.804504", "716303.000000"], 7=>["1221.905518", "2245001.000000"], 8=>["1322.036621", "1946525.000000"], 14=>["446.796082", "1472386.000000"], 15=>["1122.041260", "1411827.000000"], 16=>["1421.951416", "1187501.000000"] , 18=>["358.676636", "826186.000000"], 2=>["391.045410", "6986078.000000"], 20=>["1422.277100", "709884.000000"], 19=>["1460.548340", "720317.000000"], 3=>["446.009033", "1531503.000000"], 10=>["1322.000732", "1475536.000000"], 4=>["1222.033203", "1520220.000000"]}
|
67
|
-
|
68
|
-
assert_hash_equal_arr(answ, arr)
|
69
|
-
end
|
70
|
-
|
71
|
-
def assert_hash_equal_arr(hash,arr)
|
72
|
-
hash.each do |k,v|
|
73
|
-
assert_equal(hash[k], arr[k])
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
|
data/test/tc_spec_id.rb
DELETED
@@ -1,201 +0,0 @@
|
|
1
|
-
|
2
|
-
require 'test/unit'
|
3
|
-
require 'spec_id'
|
4
|
-
|
5
|
-
|
6
|
-
class SpecIDTest < Test::Unit::TestCase
|
7
|
-
|
8
|
-
def initialize(arg)
|
9
|
-
super(arg)
|
10
|
-
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
11
|
-
@tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
|
12
|
-
@bw = @tfiles + "bioworks_small.xml"
|
13
|
-
@old_prot_proph = @tfiles + 'yeast_gly_small-prot.xml'
|
14
|
-
@prot_proph = @tfiles + 'opd1/000_020_3prots-prot.xml'
|
15
|
-
@srf = @tfiles_l + '7MIX_STD_110802_1.srf'
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_spec_id_creation
|
19
|
-
sp = SpecID.new(@bw)
|
20
|
-
assert_equal(106, sp.prots.size)
|
21
|
-
end
|
22
|
-
|
23
|
-
def test_classify_by_false_flag
|
24
|
-
file = @tfiles + "bioworks_with_INV_small.xml"
|
25
|
-
sp = SpecID.new(file)
|
26
|
-
assert_equal(19, sp.prots.size)
|
27
|
-
(tp, fp) = sp.classify_by_false_flag(:prots, "INV_", true, true)
|
28
|
-
assert_equal(4, fp.size, "num false pos")
|
29
|
-
assert_equal(15, tp.size, "num true pos")
|
30
|
-
end
|
31
|
-
|
32
|
-
|
33
|
-
def test_precision
|
34
|
-
require 'roc'
|
35
|
-
file = @tfiles + "bioworks_with_INV_small.xml"
|
36
|
-
# 4 INV and 15 non-inv for 19 total prots
|
37
|
-
answ = %w( t t t t t t t t t t F t t t t F t F F )
|
38
|
-
index = 0
|
39
|
-
answ.collect! do |bool|
|
40
|
-
bo = false
|
41
|
-
if bool == 't'; bo = true end
|
42
|
-
index += 1
|
43
|
-
write_index = index
|
44
|
-
## in the bioworks_with_INV_small.xml, protein 8 and 9 have the same
|
45
|
-
## probability as protein 7
|
46
|
-
if write_index == 8 || write_index == 9
|
47
|
-
write_index = 7
|
48
|
-
end
|
49
|
-
[write_index, bo]
|
50
|
-
end
|
51
|
-
roc = ROC.new
|
52
|
-
tp, fp = ROC.new.prep_list(answ)
|
53
|
-
(exp_tp, exp_fp) = roc.tps_and_ppv(tp, fp)
|
54
|
-
|
55
|
-
sp = SpecID.new(file)
|
56
|
-
assert_equal(19, sp.prots.size)
|
57
|
-
tp, fp = sp.rank_and_classify(:prots, proc {|prt| prt.probability }, proc {|prt| if prt.reference =~ /^INV_/ ; false; else; true; end })
|
58
|
-
(tps, ys) = roc.tps_and_ppv(tp, fp)
|
59
|
-
assert_equal(exp_tp, tps)
|
60
|
-
assert_equal(exp_fp, ys)
|
61
|
-
(num_hits, prec) = sp.num_hits_and_ppv_for_prob("INV_", true)
|
62
|
-
# @TODO: assert these guys for consistencies sake:
|
63
|
-
assert_in_delta_arrays([1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 14, 15], tps, 0.0000001)
|
64
|
-
# Consistency check only:
|
65
|
-
assert_in_delta_arrays([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.909090909090909, 0.916666666666667, 0.923076923076923, 0.928571428571429, 0.866666666666667], prec, 0.0000001)
|
66
|
-
end
|
67
|
-
|
68
|
-
def assert_in_delta_arrays(one, two, delta, message=nil)
|
69
|
-
one.each_with_index do |v,i|
|
70
|
-
assert_in_delta(v, two[i], delta, message)
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
def test_file_type
|
75
|
-
assert_equal('bioworks', SpecID.file_type(@bw))
|
76
|
-
assert_equal('protproph', SpecID.file_type(@prot_proph))
|
77
|
-
assert_equal('srg', SpecID.file_type('whatever.srg'))
|
78
|
-
## WOULD BE NICE TO GET THIS WORKING, TOO
|
79
|
-
# assert_equal('protproph', SpecID.file_type(@old_prot_proph))
|
80
|
-
if File.exist? @tfiles_l
|
81
|
-
assert File.exist?(@srf), "file #{@srf} is there"
|
82
|
-
assert_equal('srf', SpecID.file_type(@srf))
|
83
|
-
else
|
84
|
-
assert_nil( puts("\n--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
def test_non_standard_aa_removal
|
89
|
-
hash = {"K.PEPTIDE.Z" => "K.PEPTIDE.Z", "K.*M" => "K.M", "aI" => 'I', "YI.&" => "YI.", "EI.!@#\$%^&*(){}[]|\\;:'\"<>,?/EI" => 'EI.EI'}
|
90
|
-
cl = proc {|v| SpecID::Pep.remove_non_amino_acids(v) }
|
91
|
-
hash.each do |k,v|
|
92
|
-
assert_equal(v, cl.call(k))
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
end
|
99
|
-
|
100
|
-
class MyProt ; include SpecID::Prot ; end
|
101
|
-
class MyPep ; include SpecID::Pep ; attr_accessor :xcorr end
|
102
|
-
|
103
|
-
|
104
|
-
class TestOccamsRazor < Test::Unit::TestCase
|
105
|
-
|
106
|
-
def test_small
|
107
|
-
|
108
|
-
prots = (0..6).to_a.map do |n|
|
109
|
-
prot = MyProt.new
|
110
|
-
prot.reference = "ref_#{n}"
|
111
|
-
prot
|
112
|
-
end
|
113
|
-
|
114
|
-
peps = (0..12).to_a.map {|v| MyPep.new }
|
115
|
-
|
116
|
-
# 0 1 2 3 4 5 6 7 8 9 10 11 12
|
117
|
-
aaseqs = %w(AAA BBB CCC ABC AAA BBB CCC ABC DDD EEE FFF EEEEE DDD)
|
118
|
-
xcorrs = [1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 0.5, 0.6, 0.7, 0.8, 0.5]
|
119
|
-
|
120
|
-
peps.zip(aaseqs, xcorrs) do |pep,aaseq,xcorr|
|
121
|
-
pep.aaseq = aaseq
|
122
|
-
pep.xcorr = xcorr
|
123
|
-
end
|
124
|
-
|
125
|
-
prots[0].peps = peps[0,4]
|
126
|
-
prots[1].peps = [peps[2]] ## should be missing
|
127
|
-
|
128
|
-
test_prots = prots[0,2]
|
129
|
-
answ = SpecID.occams_razor(test_prots)
|
130
|
-
answ.each do |an|
|
131
|
-
assert( an[0].is_a?(SpecID::Prot), "prots are there")
|
132
|
-
end
|
133
|
-
first = answ.first
|
134
|
-
assert_equal( prots[0], first[0])
|
135
|
-
assert_equal_array_content( prots[0].peps, first[1])
|
136
|
-
|
137
|
-
require 'pp'
|
138
|
-
#pp answ
|
139
|
-
|
140
|
-
|
141
|
-
prots[0].peps = peps[0,4]
|
142
|
-
prots[1].peps = [peps[2]] ## should be missing
|
143
|
-
prots[2].peps = [] ## should be missing
|
144
|
-
|
145
|
-
answ = SpecID.occams_razor(test_prots, true)
|
146
|
-
#pp answ
|
147
|
-
|
148
|
-
|
149
|
-
#prots[2].peps = [peps[2]]
|
150
|
-
#prots[2].peps.push( peps[3] ) ## should be there since it has 2
|
151
|
-
#prots[3].peps = [peps[3]] ## should be missing
|
152
|
-
end
|
153
|
-
|
154
|
-
def assert_equal_array_content(exp1, ans, message='')
|
155
|
-
exp1.each do |item|
|
156
|
-
assert(ans.include?(item), "finding #{item}: #{message}")
|
157
|
-
end
|
158
|
-
end
|
159
|
-
end
|
160
|
-
|
161
|
-
|
162
|
-
require 'fasta'
|
163
|
-
|
164
|
-
class TestProteinGroups < Test::Unit::TestCase
|
165
|
-
|
166
|
-
def test_small
|
167
|
-
prots = []
|
168
|
-
|
169
|
-
aaseq = ('A'..'Z').to_a.join('')
|
170
|
-
header = "prot1"
|
171
|
-
prots << Fasta::Prot.new(header, aaseq)
|
172
|
-
|
173
|
-
aaseq = ('A'..'Z').to_a.reverse.join('')
|
174
|
-
header = "prot1_reverse"
|
175
|
-
prots << Fasta::Prot.new(header, aaseq)
|
176
|
-
|
177
|
-
aaseq = ('A'..'Z').to_a.join('')
|
178
|
-
header = "prot1_identical"
|
179
|
-
prots << Fasta::Prot.new(header, aaseq)
|
180
|
-
|
181
|
-
aaseq = ('A'..'E').to_a.join('')
|
182
|
-
header = "prot1_short"
|
183
|
-
prots << Fasta::Prot.new(header, aaseq)
|
184
|
-
|
185
|
-
aaseq = ('A'..'E').to_a.reverse.join('')
|
186
|
-
header = "prot1_reverse_short"
|
187
|
-
prots << Fasta::Prot.new(header, aaseq)
|
188
|
-
|
189
|
-
fasta = Fasta.new(prots)
|
190
|
-
|
191
|
-
pep_seqs = %w(ABCD DEFG ABCD DEFG EDCB FEDCB XYZ RANDOM AEABA)
|
192
|
-
|
193
|
-
arr = SpecID::Pep.protein_groups_by_sequence(pep_seqs, fasta)
|
194
|
-
|
195
|
-
exp = [[prots[0], prots[2], prots[3]], [prots[0], prots[2]], [prots[0], prots[2], prots[3]], [prots[0],prots[2]], [prots[1], prots[4]], [prots[1]], [prots[0], prots[2]], [], []]
|
196
|
-
|
197
|
-
assert_equal(exp, arr)
|
198
|
-
end
|
199
|
-
|
200
|
-
end
|
201
|
-
|
data/test/tc_spec_id_xml.rb
DELETED
@@ -1,36 +0,0 @@
|
|
1
|
-
require 'test/unit'
|
2
|
-
require 'spec_id'
|
3
|
-
require 'ostruct'
|
4
|
-
|
5
|
-
class Bob
|
6
|
-
include SpecIDXML
|
7
|
-
def initialize(first, second)
|
8
|
-
@first = first ; @second = second
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
|
13
|
-
class SpecIDXMLTest < Test::Unit::TestCase
|
14
|
-
include SpecIDXML
|
15
|
-
|
16
|
-
def initialize(*args)
|
17
|
-
super(*args)
|
18
|
-
end
|
19
|
-
|
20
|
-
def test_short_element_xml_from_instance_vars
|
21
|
-
obj = Bob.new(1, 2)
|
22
|
-
st = obj.short_element_xml_from_instance_vars("bob")
|
23
|
-
assert_match(/second="2"/, st)
|
24
|
-
assert_match(/first="1"/, st)
|
25
|
-
assert_match(/^<bob /, st)
|
26
|
-
assert_match(/>$/, st)
|
27
|
-
end
|
28
|
-
|
29
|
-
def test_escape_special_chars
|
30
|
-
assert_equal("&><"'" , escape_special_chars("&><\"'"))
|
31
|
-
assert_equal("PE&PT>I<D"E'", escape_special_chars("PE&PT>I<D\"E'"))
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
35
|
-
|
36
|
-
|