mspire 0.2.4 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/INSTALL +1 -0
- data/README +25 -0
- data/Rakefile +129 -40
- data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
- data/bin/bioworks_to_pepxml.rb +1 -0
- data/bin/fasta_shaker.rb +1 -96
- data/bin/filter_and_validate.rb +5 -0
- data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
- data/bin/prob_validate.rb +6 -0
- data/bin/raw_to_mzXML.rb +2 -2
- data/bin/srf_group.rb +1 -0
- data/bin/srf_to_sqt.rb +40 -0
- data/changelog.txt +68 -0
- data/lib/align/chams.rb +6 -6
- data/lib/align.rb +4 -3
- data/lib/bsearch.rb +120 -0
- data/lib/fasta.rb +318 -86
- data/lib/group_by.rb +10 -0
- data/lib/index_by.rb +11 -0
- data/lib/merge_deep.rb +21 -0
- data/lib/{spec → ms/converter}/mzxml.rb +77 -109
- data/lib/ms/gradient_program.rb +171 -0
- data/lib/ms/msrun.rb +209 -0
- data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
- data/lib/ms/parser/mzdata/axml.rb +12 -0
- data/lib/ms/parser/mzdata/dom.rb +160 -0
- data/lib/ms/parser/mzdata/libxml.rb +7 -0
- data/lib/ms/parser/mzdata.rb +25 -0
- data/lib/ms/parser/mzxml/axml.rb +11 -0
- data/lib/ms/parser/mzxml/dom.rb +159 -0
- data/lib/ms/parser/mzxml/hpricot.rb +253 -0
- data/lib/ms/parser/mzxml/libxml.rb +15 -0
- data/lib/ms/parser/mzxml/regexp.rb +122 -0
- data/lib/ms/parser/mzxml/rexml.rb +72 -0
- data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
- data/lib/ms/parser/mzxml.rb +175 -0
- data/lib/ms/parser.rb +108 -0
- data/lib/ms/precursor.rb +10 -0
- data/lib/ms/scan.rb +81 -0
- data/lib/ms/spectrum.rb +193 -0
- data/lib/ms.rb +10 -0
- data/lib/mspire.rb +4 -0
- data/lib/roc.rb +61 -1
- data/lib/sample_enzyme.rb +31 -8
- data/lib/scan_i.rb +21 -0
- data/lib/spec_id/aa_freqs.rb +7 -3
- data/lib/spec_id/bioworks.rb +20 -14
- data/lib/spec_id/digestor.rb +139 -0
- data/lib/spec_id/mass.rb +116 -0
- data/lib/spec_id/parser/proph.rb +236 -0
- data/lib/spec_id/precision/filter/cmdline.rb +209 -0
- data/lib/spec_id/precision/filter/interactive.rb +134 -0
- data/lib/spec_id/precision/filter/output.rb +147 -0
- data/lib/spec_id/precision/filter.rb +623 -0
- data/lib/spec_id/precision/output.rb +60 -0
- data/lib/spec_id/precision/prob/cmdline.rb +139 -0
- data/lib/spec_id/precision/prob/output.rb +88 -0
- data/lib/spec_id/precision/prob.rb +171 -0
- data/lib/spec_id/proph/pep_summary.rb +92 -0
- data/lib/spec_id/proph/prot_summary.rb +484 -0
- data/lib/spec_id/proph.rb +2 -466
- data/lib/spec_id/protein_summary.rb +2 -2
- data/lib/spec_id/sequest/params.rb +316 -0
- data/lib/spec_id/sequest/pepxml.rb +1513 -0
- data/lib/spec_id/sequest.rb +2 -1672
- data/lib/spec_id/srf.rb +445 -177
- data/lib/spec_id.rb +183 -95
- data/lib/spec_id_xml.rb +8 -10
- data/lib/transmem/phobius.rb +147 -0
- data/lib/transmem/toppred.rb +368 -0
- data/lib/transmem.rb +157 -0
- data/lib/validator/aa.rb +135 -0
- data/lib/validator/background.rb +73 -0
- data/lib/validator/bias.rb +95 -0
- data/lib/validator/cmdline.rb +260 -0
- data/lib/validator/decoy.rb +94 -0
- data/lib/validator/digestion_based.rb +69 -0
- data/lib/validator/probability.rb +48 -0
- data/lib/validator/prot_from_pep.rb +234 -0
- data/lib/validator/transmem.rb +272 -0
- data/lib/validator/true_pos.rb +46 -0
- data/lib/validator.rb +214 -0
- data/lib/xml.rb +38 -0
- data/lib/xml_style_parser.rb +105 -0
- data/lib/xmlparser_wrapper.rb +19 -0
- data/script/compile_and_plot_smriti_final.rb +97 -0
- data/script/extract_gradient_programs.rb +56 -0
- data/script/get_apex_values_rexml.rb +44 -0
- data/script/mzXML2timeIndex.rb +1 -1
- data/script/smriti_final_analysis.rb +103 -0
- data/script/toppred_to_yaml.rb +47 -0
- data/script/tpp_installer.rb +1 -1
- data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
- data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
- data/specs/bin/fasta_shaker_spec.rb +259 -0
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
- data/specs/bin/filter_and_validate_spec.rb +124 -0
- data/specs/bin/ms_to_lmat_spec.rb +34 -0
- data/specs/bin/prob_validate_spec.rb +62 -0
- data/specs/bin/protein_summary_spec.rb +10 -0
- data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
- data/specs/gi_spec.rb +22 -0
- data/specs/load_bin_path.rb +7 -0
- data/specs/merge_deep_spec.rb +13 -0
- data/specs/ms/gradient_program_spec.rb +77 -0
- data/specs/ms/msrun_spec.rb +455 -0
- data/specs/ms/parser_spec.rb +92 -0
- data/specs/ms/spectrum_spec.rb +89 -0
- data/specs/roc_spec.rb +251 -0
- data/specs/rspec_autotest.rb +149 -0
- data/specs/sample_enzyme_spec.rb +41 -0
- data/specs/spec_helper.rb +133 -0
- data/specs/spec_id/aa_freqs_spec.rb +52 -0
- data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
- data/specs/spec_id/digestor_spec.rb +75 -0
- data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
- data/specs/spec_id/precision/filter/output_spec.rb +31 -0
- data/specs/spec_id/precision/filter_spec.rb +243 -0
- data/specs/spec_id/precision/prob_spec.rb +111 -0
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
- data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
- data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
- data/specs/spec_id/sequest/params_spec.rb +68 -0
- data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
- data/specs/spec_id/sqt_spec.rb +138 -0
- data/specs/spec_id/srf_spec.rb +209 -0
- data/specs/spec_id/srf_spec_helper.rb +302 -0
- data/specs/spec_id_helper.rb +33 -0
- data/specs/spec_id_spec.rb +361 -0
- data/specs/spec_id_xml_spec.rb +33 -0
- data/specs/transmem/phobius_spec.rb +423 -0
- data/specs/transmem/toppred_spec.rb +297 -0
- data/specs/transmem_spec.rb +60 -0
- data/specs/transmem_spec_shared.rb +64 -0
- data/specs/validator/aa_spec.rb +107 -0
- data/specs/validator/background_spec.rb +51 -0
- data/specs/validator/bias_spec.rb +146 -0
- data/specs/validator/decoy_spec.rb +51 -0
- data/specs/validator/fasta_helper.rb +26 -0
- data/specs/validator/prot_from_pep_spec.rb +141 -0
- data/specs/validator/transmem_spec.rb +145 -0
- data/specs/validator/true_pos_spec.rb +58 -0
- data/specs/validator_helper.rb +33 -0
- data/specs/xml_spec.rb +12 -0
- data/test_files/000_pepxml18_small.xml +206 -0
- data/test_files/020a.mzXML.timeIndex +4710 -0
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
- data/test_files/4-03-03_small-prot.xml +321 -0
- data/test_files/4-03-03_small.xml +3876 -0
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +5999 -0
- data/test_files/bioworks31.params +77 -0
- data/test_files/bioworks32.params +62 -0
- data/test_files/bioworks33.params +63 -0
- data/test_files/bioworks_single_run_small.xml +7237 -0
- data/test_files/bioworks_small.fasta +212 -0
- data/test_files/bioworks_small.params +63 -0
- data/test_files/bioworks_small.phobius +109 -0
- data/test_files/bioworks_small.toppred.out +2847 -0
- data/test_files/bioworks_small.xml +5610 -0
- data/test_files/bioworks_with_INV_small.xml +3753 -0
- data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +304 -0
- data/test_files/messups.fasta +297 -0
- data/test_files/opd1/000.my_answer.100lines.xml +101 -0
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
- data/test_files/opd1/000_020_3prots-prot.xml +62 -0
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
- data/test_files/opd1/sequest.3.1.params +77 -0
- data/test_files/opd1/sequest.3.2.params +62 -0
- data/test_files/opd1/twenty_scans.mzXML +418 -0
- data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +9 -0
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
- data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
- data/test_files/pepproph_small.xml +4691 -0
- data/test_files/phobius.small.noheader.txt +50 -0
- data/test_files/phobius.small.small.txt +53 -0
- data/test_files/s01_anC1_ld020mM.key.txt +25 -0
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +297 -0
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +14340 -0
- data/test_files/tf_bioworks2excel.txt.actual +1035 -0
- data/test_files/toppred.small.out +416 -0
- data/test_files/toppred.xml.out +318 -0
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
- data/test_files/yeast_gly_small-prot.xml +265 -0
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
- data/test_files/yeast_gly_small.xml +3807 -0
- data/test_files/yeast_gly_small2.parentTimes +6 -0
- metadata +273 -57
- data/bin/filter.rb +0 -6
- data/bin/precision.rb +0 -5
- data/lib/spec/mzdata/parser.rb +0 -108
- data/lib/spec/mzdata.rb +0 -48
- data/lib/spec/mzxml/parser.rb +0 -449
- data/lib/spec/scan.rb +0 -55
- data/lib/spec_id/filter.rb +0 -797
- data/lib/spec_id/precision.rb +0 -421
- data/lib/toppred.rb +0 -18
- data/script/filter-peps.rb +0 -164
- data/test/tc_aa_freqs.rb +0 -59
- data/test/tc_fasta_shaker.rb +0 -149
- data/test/tc_filter.rb +0 -203
- data/test/tc_filter_peps.rb +0 -46
- data/test/tc_gi.rb +0 -17
- data/test/tc_id_class_anal.rb +0 -70
- data/test/tc_id_precision.rb +0 -89
- data/test/tc_msrun.rb +0 -88
- data/test/tc_mzxml.rb +0 -88
- data/test/tc_mzxml_to_lmat.rb +0 -36
- data/test/tc_peptide_parent_times.rb +0 -27
- data/test/tc_precision.rb +0 -60
- data/test/tc_roc.rb +0 -166
- data/test/tc_sample_enzyme.rb +0 -32
- data/test/tc_scan.rb +0 -26
- data/test/tc_sequest.rb +0 -336
- data/test/tc_spec.rb +0 -78
- data/test/tc_spec_id.rb +0 -201
- data/test/tc_spec_id_xml.rb +0 -36
- data/test/tc_srf.rb +0 -262
data/test/tc_fasta_shaker.rb
DELETED
@@ -1,149 +0,0 @@
|
|
1
|
-
|
2
|
-
require 'test/unit'
|
3
|
-
require 'fasta'
|
4
|
-
require File.dirname(__FILE__) + '/load_bin_path.rb'
|
5
|
-
|
6
|
-
Filestring = ">gi|P1
|
7
|
-
AMKRGAN
|
8
|
-
>gi|P2
|
9
|
-
CRGATKKTAGRPMEK
|
10
|
-
>gi|P3
|
11
|
-
PEPTIDE
|
12
|
-
"
|
13
|
-
|
14
|
-
Rev = ">gi|P1
|
15
|
-
NAGRKMA
|
16
|
-
>gi|P2
|
17
|
-
KEMPRGATKKTAGRC
|
18
|
-
>gi|P3
|
19
|
-
EDITPEP
|
20
|
-
"
|
21
|
-
|
22
|
-
RevTryptic = ">gi|P1
|
23
|
-
MAKRNAG
|
24
|
-
>gi|P2
|
25
|
-
CRTAGKKEMPRGATK
|
26
|
-
>gi|P3
|
27
|
-
EDITPEP
|
28
|
-
"
|
29
|
-
|
30
|
-
ShuffTryptic = ">gi|P1
|
31
|
-
MAKRNAG
|
32
|
-
>gi|P2
|
33
|
-
CRTAGKKEMPRGATK
|
34
|
-
>gi|P3
|
35
|
-
EDITPEP
|
36
|
-
"
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
class TestBasic < Test::Unit::TestCase
|
41
|
-
|
42
|
-
def setup
|
43
|
-
testdir = File.dirname(__FILE__)
|
44
|
-
libdir = testdir + '/../lib'
|
45
|
-
bindir = testdir + '/../bin'
|
46
|
-
progname = "fasta_shaker.rb"
|
47
|
-
@cmd = "ruby -I #{libdir} #{bindir}/#{progname} "
|
48
|
-
@tfiles = testdir + '/tfiles/'
|
49
|
-
@tmpfile = @tfiles + "littlefasta.trash.fasta"
|
50
|
-
File.open(@tmpfile, "w") {|fh| fh.print Filestring }
|
51
|
-
@f = @tfiles + "trash.fasta"
|
52
|
-
end
|
53
|
-
|
54
|
-
def teardown
|
55
|
-
File.unlink @tmpfile if File.exist? @tmpfile
|
56
|
-
File.unlink @f if File.exist? @f
|
57
|
-
end
|
58
|
-
|
59
|
-
def Xtest_reverse
|
60
|
-
cmd = @cmd + "reverse #{@tmpfile} -o #{@f}"
|
61
|
-
system cmd
|
62
|
-
assert_equal(Rev, fastap(@f).to_s)
|
63
|
-
end
|
64
|
-
|
65
|
-
def Xtest_reverse_tryptic
|
66
|
-
cmd = @cmd + "reverse #{@tmpfile} -o #{@f} --tryptic_peptides"
|
67
|
-
system cmd
|
68
|
-
assert_equal(RevTryptic, fastap(@f).to_s)
|
69
|
-
end
|
70
|
-
|
71
|
-
def test_shuff_tryptic
|
72
|
-
cmd = @cmd + "shuffle #{@tmpfile} -o #{@f} --tryptic_peptides"
|
73
|
-
|
74
|
-
system cmd
|
75
|
-
lns = fastap(@f).to_s.split("\n")
|
76
|
-
assert_equal('KR', lns[1][2..3])
|
77
|
-
assert_equal('R', lns[3][1..1])
|
78
|
-
assert_equal('CRGATKKTAGRPMEK'.size, lns[3].size, "sequence is same size")
|
79
|
-
assert_not_equal('CRGATKKTAGRPMEK', lns[3], "sequence is randomised from original [remote chance of failure] rerun to make sure")
|
80
|
-
end
|
81
|
-
|
82
|
-
def Xtest_shuffle
|
83
|
-
cmd = @cmd + "shuffle #{@tmpfile} -o #{@f}"
|
84
|
-
system cmd
|
85
|
-
clines = strlns(Filestring)
|
86
|
-
lns = fastalns(@f)
|
87
|
-
lns.each_with_index do |line,i|
|
88
|
-
assert_equal(clines[i].size, line.size, "same size lines: A: <<#{clines[i]}>> B: <<#{line}>>")
|
89
|
-
end
|
90
|
-
assert_equal('CRGATKKTAGRPMEK'.size, lns[3].size, "sequence is same size")
|
91
|
-
assert_not_equal('CRGATKKTAGRPMEK', lns[3], "sequence is randomised from original [remote chance of failure] rerun to make sure")
|
92
|
-
end
|
93
|
-
|
94
|
-
def Xtest_cat
|
95
|
-
cmd = @cmd + "reverse #{@tmpfile} -c -o #{@f}"
|
96
|
-
`#{cmd}` ## suppress warning
|
97
|
-
lns = fastalns(@f)
|
98
|
-
assert_equal(strlns(Filestring), lns[0..5], "first part equal")
|
99
|
-
assert_equal(strlns(Rev), lns[6..-1], "second part equal")
|
100
|
-
end
|
101
|
-
|
102
|
-
def Xtest_fraction
|
103
|
-
cmd = @cmd + "reverse #{@tmpfile} -f 2.6 -o #{@f}"
|
104
|
-
`#{cmd}`
|
105
|
-
assert_equal(8, fastap(@f).size)
|
106
|
-
|
107
|
-
cmd = @cmd + "shuffle #{@tmpfile} -f 2.0 -c -p MINE_ -o #{@f}"
|
108
|
-
`#{cmd}`
|
109
|
-
assert_equal(9, fastap(@f).size)
|
110
|
-
fp = fastap(@f)
|
111
|
-
fp[0..2].each do |prt|
|
112
|
-
assert_match(/^>/, prt.header, "prefix matches")
|
113
|
-
end
|
114
|
-
fp[3..5].each do |prt|
|
115
|
-
assert_match(/^>MINE_f0_/, prt.header, "prefix matches")
|
116
|
-
end
|
117
|
-
fp[6..8].each do |prt|
|
118
|
-
assert_match(/^>MINE_f1_/, prt.header, "prefix matches")
|
119
|
-
end
|
120
|
-
#cmd = @cmd + "reverse #{@tmpfile} -c -f 2.0 -o #{@f}"
|
121
|
-
end
|
122
|
-
|
123
|
-
def Xtest_prefix
|
124
|
-
cmd = @cmd + "reverse #{@tmpfile} -p SILLY_ -o #{@f}"
|
125
|
-
`#{cmd}`
|
126
|
-
fp = fastap(@f)
|
127
|
-
fp.each do |prt|
|
128
|
-
assert_match(/^>SILLY_.+/, prt.header)
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
|
-
|
133
|
-
private
|
134
|
-
def strlns(str)
|
135
|
-
str.split("\n")
|
136
|
-
end
|
137
|
-
|
138
|
-
def fastalns(fn)
|
139
|
-
assert(File.exist?(fn), "FILE: #{fn} exists")
|
140
|
-
IO.read(fn).split("\n")
|
141
|
-
end
|
142
|
-
|
143
|
-
# returns the fasta object proteins
|
144
|
-
def fastap(fn)
|
145
|
-
assert(File.exist?(fn), "FILE: #{fn} exists")
|
146
|
-
Fasta.new.read_file(fn).prots
|
147
|
-
end
|
148
|
-
|
149
|
-
end
|
data/test/tc_filter.rb
DELETED
@@ -1,203 +0,0 @@
|
|
1
|
-
|
2
|
-
require 'test/unit'
|
3
|
-
require 'spec_id/filter'
|
4
|
-
require 'spec_id/srf'
|
5
|
-
require 'set_from_hash'
|
6
|
-
require File.dirname(__FILE__) + '/test_helper'
|
7
|
-
|
8
|
-
$VERBOSE = false
|
9
|
-
|
10
|
-
|
11
|
-
class TestFilter < Test::Unit::TestCase
|
12
|
-
|
13
|
-
def initialize(arg)
|
14
|
-
super(arg)
|
15
|
-
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
16
|
-
@tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
|
17
|
-
@small_inv = @tfiles + 'bioworks_with_INV_small.xml'
|
18
|
-
@small = @tfiles + 'bioworks_small.xml'
|
19
|
-
## SRF:
|
20
|
-
@zero_srf = @tfiles_l + 'opd1_cat_inv/000.srf'
|
21
|
-
@twenty_srf = @tfiles_l + 'opd1_cat_inv/020.srf'
|
22
|
-
@zero_srg = @tfiles_l + 'bioworks_000.srg'
|
23
|
-
@both_srg = @tfiles_l + 'bioworks_both.srg'
|
24
|
-
## FASTA:
|
25
|
-
@opd1_fasta = @tfiles_l + 'opd1_cat_inv/ecoli_K12_ncbi_20060321.fasta'
|
26
|
-
@opd1_correct_fasta = @tfiles_l + 'opd1_cat_inv/correct_fictitious_314.fasta'
|
27
|
-
if File.exist? @tfiles_l
|
28
|
-
File.open(@zero_srg, 'w') {|fh| fh.puts( File.expand_path(@zero_srf) ) }
|
29
|
-
File.open(@both_srg, 'w') {|fh| fh.puts( File.expand_path(@zero_srf) ); fh.puts( File.expand_path(@twenty_srf) ) }
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
def test_protein_fppr
|
34
|
-
peps_per_prot = [4,4,3,2,2]
|
35
|
-
(num, mean_fppr, std_num, std_fppr) = SpecID::Filter.new.protein_fppr(peps_per_prot, 1, 10)
|
36
|
-
assert_equal(0, mean_fppr, "no prots completely wrong")
|
37
|
-
assert_equal(0, std_fppr, "no prots completely wrong")
|
38
|
-
(num, mean_fppr, std_num, std_fppr) = SpecID::Filter.new.protein_fppr(peps_per_prot, 14, 10)
|
39
|
-
assert_equal(4.0/5, mean_fppr, "only one prot right")
|
40
|
-
assert_equal(0.0, std_fppr, "only one prot right")
|
41
|
-
end
|
42
|
-
|
43
|
-
def test_filter_sequest
|
44
|
-
hashes = [
|
45
|
-
{:xcorr => 1.2, :deltacn => 0.1, :ppm => 40, :charge => 2},
|
46
|
-
{:xcorr => 1.3, :deltacn => 0.1, :ppm => 50, :charge => 3},
|
47
|
-
{:xcorr => 1.4, :deltacn => 0.1, :ppm => 50, :charge => 1},
|
48
|
-
{:xcorr => 1.5, :deltacn => 1.1, :ppm => 20, :charge => 2},
|
49
|
-
{:xcorr => 1.3, :deltacn => 0.1, :ppm => 20, :charge => 2},
|
50
|
-
{:xcorr => 1.3, :deltacn => 0.1, :ppm => 40, :charge => 2},
|
51
|
-
]
|
52
|
-
peps = hashes.map do |hash|
|
53
|
-
pep = SRF::OUT::Pep.new.set_from_hash(hash)
|
54
|
-
end
|
55
|
-
sp = GenericSpecID.new.set_from_hash({:peps => peps})
|
56
|
-
before_size = sp.peps.size
|
57
|
-
assert_filter([1.2, 1.2, 1.2, 0.1, 50], sp, 5, "all passing")
|
58
|
-
assert_filter([1.6, 1.6, 1.6, 0.1, 50], sp, 0, "xcorrs too high")
|
59
|
-
assert_filter([1.6, 1.0, 1.0, 0.1, 50], sp, 4, "one xcorr too high")
|
60
|
-
assert_filter([1.0, 1.6, 1.0, 0.1, 50], sp, 2, "one xcorr too high")
|
61
|
-
assert_filter([1.0, 1.0, 1.6, 0.1, 50], sp, 4, "one xcorr too high")
|
62
|
-
assert_filter([1.2, 1.2, 1.2, 0.2, 50], sp, 0, "high deltacn")
|
63
|
-
|
64
|
-
## with deltcnstars:
|
65
|
-
assert_filter([1.2, 1.2, 1.2, 0.1, 50], sp, 6, "all passing", true)
|
66
|
-
assert_filter([1.2, 1.2, 1.2, 0.2, 50], sp, 1, "high deltacn", true)
|
67
|
-
assert_filter([1.0, 1.0, 1.6, 0.1, 50], sp, 5, "one xcorr too high", true)
|
68
|
-
end
|
69
|
-
|
70
|
-
def assert_filter(filter_args, spec_id, expected_passing, message, include_deltcn=false)
|
71
|
-
npeps = spec_id.filter_sequest(filter_args, include_deltcn)
|
72
|
-
assert_equal(expected_passing, npeps.size, message)
|
73
|
-
end
|
74
|
-
|
75
|
-
def test_passing_proteins
|
76
|
-
hash_prots = (0..7).map do |n|
|
77
|
-
SpecID::GenericProt.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
|
78
|
-
end
|
79
|
-
arr_prots = (0..7).map do |n|
|
80
|
-
SRF::OUT::Prot.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
|
81
|
-
end
|
82
|
-
[hash_prots, arr_prots].each do |prots|
|
83
|
-
|
84
|
-
hashes = [
|
85
|
-
{:aaseq => 'PEP0', :xcorr => 1.2, :deltacn => 0.1, :ppm => 40, :charge => 2, :prots => [prots[0],prots[1]]},
|
86
|
-
{:aaseq => 'PEP1', :xcorr => 1.3, :deltacn => 0.1, :ppm => 50, :charge => 3, :prots => [prots[1],prots[2]]},
|
87
|
-
{:aaseq => 'PEP2', :xcorr => 1.4, :deltacn => 0.1, :ppm => 50, :charge => 1, :prots => [prots[3]]},
|
88
|
-
{:aaseq => 'PEP3', :xcorr => 1.5, :deltacn => 1.1, :ppm => 20, :charge => 2, :prots => [prots[4]]},
|
89
|
-
{:aaseq => 'PEP4', :xcorr => 1.3, :deltacn => 0.1, :ppm => 20, :charge => 2, :prots => [prots[0]]},
|
90
|
-
{:aaseq => 'PEP5', :xcorr => 1.3, :deltacn => 0.1, :ppm => 40, :charge => 2, :prots => prots[1,2]},
|
91
|
-
]
|
92
|
-
|
93
|
-
peps = hashes.map do |hash|
|
94
|
-
SRF::OUT::Pep.new.set_from_hash(hash)
|
95
|
-
end
|
96
|
-
|
97
|
-
|
98
|
-
prts = SpecID.passing_proteins(peps)
|
99
|
-
exp = (0..4).map do |n|
|
100
|
-
"prot_" + n.to_s
|
101
|
-
end
|
102
|
-
refs = prts.map { |v| v.reference }.sort
|
103
|
-
assert_equal(exp, refs)
|
104
|
-
|
105
|
-
|
106
|
-
prts = SpecID.passing_proteins(peps, :update)
|
107
|
-
prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first
|
108
|
-
assert_protein_match(prts, 'prot_0', %w(PEP0 PEP4))
|
109
|
-
assert_protein_match(prts, 'prot_1', %w(PEP0 PEP1 PEP5))
|
110
|
-
assert_protein_match(prts, 'prot_2', %w(PEP1 PEP5))
|
111
|
-
assert_protein_match(prts, 'prot_3', %w(PEP2))
|
112
|
-
assert_protein_match(prts, 'prot_4', %w(PEP3))
|
113
|
-
srt_ref = prts.map {|v| v.reference}.sort
|
114
|
-
assert_equal(%w(prot_0 prot_1 prot_2 prot_3 prot_4), srt_ref, "just the right number of prots")
|
115
|
-
prot_0 = prts.select {|v| v.reference == 'prot_0'}.first
|
116
|
-
assert_equal(prot_0_before.__id__, prot_0.__id__, "proteins are identical")
|
117
|
-
|
118
|
-
|
119
|
-
prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first.__id__
|
120
|
-
|
121
|
-
prts = SpecID.passing_proteins(peps, :new)
|
122
|
-
assert_protein_match(prts, 'prot_0', %w(PEP0 PEP4))
|
123
|
-
assert_protein_match(prts, 'prot_1', %w(PEP0 PEP1 PEP5))
|
124
|
-
assert_protein_match(prts, 'prot_2', %w(PEP1 PEP5))
|
125
|
-
assert_protein_match(prts, 'prot_3', %w(PEP2))
|
126
|
-
assert_protein_match(prts, 'prot_4', %w(PEP3))
|
127
|
-
srt_ref = prts.map {|v| v.reference}.sort
|
128
|
-
assert_equal(%w(prot_0 prot_1 prot_2 prot_3 prot_4), srt_ref, "just the right number of prots")
|
129
|
-
prot_0 = prts.select {|v| v.reference == 'prot_0'}.first
|
130
|
-
assert_not_equal(prot_0_before, prot_0.__id__, "proteins are not identical")
|
131
|
-
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
def assert_protein_match(prts, ref, pepseqs, message='')
|
136
|
-
prt = prts.select{|v| v.reference == ref }.first
|
137
|
-
sorted_prt_peps_aaseqs = prt.peps.map {|v| v.aaseq }.sort
|
138
|
-
sorted_pepseqs = pepseqs.sort
|
139
|
-
assert_equal(pepseqs, sorted_prt_peps_aaseqs, message)
|
140
|
-
end
|
141
|
-
|
142
|
-
def test_usage
|
143
|
-
output = capture_stdout {
|
144
|
-
SpecID::Filter.run_from_argv([])
|
145
|
-
}
|
146
|
-
assert_match('usage:', output)
|
147
|
-
end
|
148
|
-
|
149
|
-
def test_basic_bioworks_xml
|
150
|
-
|
151
|
-
output = capture_stdout {
|
152
|
-
SpecID::Filter.run_from_argv([@small].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000))) )
|
153
|
-
}
|
154
|
-
## FROZEN:
|
155
|
-
assert_match(/pep_hits\s+4/, output)
|
156
|
-
assert_match(/uniq_aa_hits\s+4/, output)
|
157
|
-
assert_match(/prot_hits\s+4/, output)
|
158
|
-
|
159
|
-
|
160
|
-
output = capture_stdout {
|
161
|
-
SpecID::Filter.run_from_argv([@small_inv].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000 -f INV_))) )
|
162
|
-
}
|
163
|
-
#puts ""
|
164
|
-
#puts output
|
165
|
-
## FROZEN:
|
166
|
-
assert_match(/pep_hits\s+151/, output)
|
167
|
-
assert_match(/uniq_aa_hits\s+75/, output)
|
168
|
-
assert_match(/prot_hits\s+13/, output)
|
169
|
-
end
|
170
|
-
|
171
|
-
def test_srf
|
172
|
-
if File.exist? @tfiles_l
|
173
|
-
## dcy
|
174
|
-
output = capture_stdout {
|
175
|
-
SpecID::Filter.run_from_argv([@zero_srg].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000 -f INV_))) )
|
176
|
-
}
|
177
|
-
## FROZEN:
|
178
|
-
#puts ""
|
179
|
-
#puts output
|
180
|
-
assert_match(/pep_hits\s+2111\s+107\.2/, output)
|
181
|
-
assert_match(/uniq_aa_hits\s+2034\s+106\.6/, output)
|
182
|
-
assert_match(/prot_hits\s+1454\s+100\.0/, output)
|
183
|
-
|
184
|
-
## cys tps fps COMBINED
|
185
|
-
# tps are fictitious!
|
186
|
-
output = capture_stdout {
|
187
|
-
# that's the background freq for ecoli that this file's from
|
188
|
-
SpecID::Filter.run_from_argv([@zero_srg].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000 --occams_razor --cys 0.0115866200193321 --t).push(@opd1_correct_fasta))))
|
189
|
-
}
|
190
|
-
#puts ""
|
191
|
-
#puts output
|
192
|
-
## FROZEN:
|
193
|
-
assert_match(/num\s+tps%\s+cys%/, output, "header")
|
194
|
-
assert_match(/pep_hits\s+4374\s+9\d\.\d.*\s+83\.7/, output)
|
195
|
-
assert_match(/uniq_aa_hits\s+4203\s+9\d\.\d.*\s+82\.8/, output)
|
196
|
-
assert_match(/prot_hits\s+2986\s+9\d\..*\s+7\d\./, output)
|
197
|
-
assert_match(/occams.*\s+2986\s+8\d\..*\s+7\d\./, output)
|
198
|
-
else
|
199
|
-
assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})" ))
|
200
|
-
end
|
201
|
-
end
|
202
|
-
|
203
|
-
end
|
data/test/tc_filter_peps.rb
DELETED
@@ -1,46 +0,0 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'test/unit'
|
4
|
-
|
5
|
-
class TestFilter < Test::Unit::TestCase
|
6
|
-
ROOT_DIR = File.join(File.dirname(__FILE__), '..')
|
7
|
-
|
8
|
-
def test_filter_results
|
9
|
-
@tfiles = File.dirname(File.expand_path(__FILE__)) + '/tfiles/'
|
10
|
-
@bfile = @tfiles + "bioworks_with_SHUFF_small.xml"
|
11
|
-
|
12
|
-
cmd_core = "ruby -I #{File.join(ROOT_DIR, 'lib')} #{File.join(ROOT_DIR, 'script', 'filter-peps.rb')} "
|
13
|
-
#puts `#{cmd_core}`
|
14
|
-
cmd = cmd_core + "SHUFF_ #{@bfile}"
|
15
|
-
output = `#{cmd}`
|
16
|
-
|
17
|
-
freeze = %{FILENAME\tPepProts\tScanChargeBest\tScanChargeTop10\tScanBest\tScanTop10\tSeqChargeBest\tSeqChargeTop10
|
18
|
-
TP: #{@tfiles}bioworks_with_SHUFF_small.xml\t3\t3\t3\t3\t3\t3\t3
|
19
|
-
FP: #{@tfiles}bioworks_with_SHUFF_small.xml\t3\t3\t3\t3\t3\t2\t3
|
20
|
-
DIFF: #{@tfiles}bioworks_with_SHUFF_small.xml\t0\t0\t0\t0\t0\t1\t0
|
21
|
-
}
|
22
|
-
assert_equal(freeze, output)
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
cmd = cmd_core + "SHUFF_ #{@bfile} -1 1.0 -2 2.0 -3 3.0"
|
28
|
-
output = `#{cmd}`
|
29
|
-
|
30
|
-
freeze = %{FILENAME\tPepProts\tScanChargeBest\tScanChargeTop10\tScanBest\tScanTop10\tSeqChargeBest\tSeqChargeTop10
|
31
|
-
TP: #{@tfiles}bioworks_with_SHUFF_small.xml\t3\t3\t3\t3\t3\t3\t3
|
32
|
-
FP: #{@tfiles}bioworks_with_SHUFF_small.xml\t4\t4\t4\t4\t4\t3\t4
|
33
|
-
DIFF: #{@tfiles}bioworks_with_SHUFF_small.xml\t-1\t-1\t-1\t-1\t-1\t0\t-1
|
34
|
-
}
|
35
|
-
assert_equal(freeze, output)
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
end
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
end
|
data/test/tc_gi.rb
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
|
2
|
-
require 'test/unit'
|
3
|
-
require 'gi'
|
4
|
-
|
5
|
-
|
6
|
-
class Gi2AnnotTest < Test::Unit::TestCase
|
7
|
-
ROOT_DIR = File.join(File.dirname(__FILE__), '..')
|
8
|
-
|
9
|
-
def test_single_query
|
10
|
-
annot = GI.gi2annot([836805])
|
11
|
-
if annot
|
12
|
-
assert_equal('proteosome component PRE4 [Saccharomyces cerevisiae]', annot.first)
|
13
|
-
else
|
14
|
-
assert_nil( puts("SKIPPING gi test (no internet connection available)") )
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
data/test/tc_id_class_anal.rb
DELETED
@@ -1,70 +0,0 @@
|
|
1
|
-
|
2
|
-
require 'test/unit'
|
3
|
-
require File.dirname(File.expand_path(__FILE__)) + '/load_bin_path'
|
4
|
-
|
5
|
-
|
6
|
-
class IDClassAnalTest < Test::Unit::TestCase
|
7
|
-
|
8
|
-
def initialize(arg)
|
9
|
-
super(arg)
|
10
|
-
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
11
|
-
@tf_bioworks_esmall_xml = @tfiles + "bioworks_with_INV_small.xml"
|
12
|
-
@tf_bioworks_small_xml = @tfiles + "bioworks_small.xml"
|
13
|
-
@tf_bioworks_shuff = @tfiles + "bioworks_with_SHUFF_small.xml"
|
14
|
-
@tf_proph_inv = @tfiles + "opd1/opd1_cat_inv_small-prot.xml"
|
15
|
-
@cmd = "ruby -I#{File.join(File.dirname(__FILE__), "..", "lib")} -S id_class_anal.rb "
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_usage
|
19
|
-
assert_match(/usage:/, `#{@cmd}`)
|
20
|
-
end
|
21
|
-
|
22
|
-
def test_proph_basic
|
23
|
-
output = `#{@cmd} -p INV_ #{@tf_proph_inv}`
|
24
|
-
fps = [1.00, 1.00, 0.97]
|
25
|
-
tps = [1.00, 1.00, 0.98, 0.97, 0.97, 0.97, 0.97]
|
26
|
-
#File.open("tmp.csv","w") do |fh| fh.print output end
|
27
|
-
assert 1
|
28
|
-
end
|
29
|
-
|
30
|
-
def test_basic
|
31
|
-
output = `#{@cmd} -p INV_ #{@tf_bioworks_esmall_xml}`
|
32
|
-
exp = [
|
33
|
-
[1, 1.0, 0.0],
|
34
|
-
[2, 1.0, 0.0],
|
35
|
-
[3, 1.0, 0.0],
|
36
|
-
[4, 1.0, 0.0],
|
37
|
-
[5, 1.0, 0.0],
|
38
|
-
[6, 1.0, 0.0],
|
39
|
-
[9, 1.0, 0.0],
|
40
|
-
[10, 1.0, 0.0],
|
41
|
-
[11, 0.909090909090909],
|
42
|
-
[12, 0.916666666666667],
|
43
|
-
[13, 0.923076923076923],
|
44
|
-
[14, 0.928571428571429],
|
45
|
-
[15, 0.866666666666667],
|
46
|
-
]
|
47
|
-
outarr = output.split($/)
|
48
|
-
exp.each_with_index do |line,i|
|
49
|
-
outfloats = outarr[i+1].split("\t").collect {|v| v.to_f }
|
50
|
-
line.each_with_index do |v,j|
|
51
|
-
assert_in_delta(v, outfloats[j], 0.00000000000000001)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def test_multiple_output
|
57
|
-
myplot = 'class_anal.toplot'
|
58
|
-
output = `#{@cmd} -j -p INV_,SHUFF_ #{@tf_bioworks_esmall_xml} #{@tf_bioworks_shuff}`
|
59
|
-
assert(output.size > 10) ## @TODO: BETTER HERE
|
60
|
-
assert(File.exist?(myplot), "file #{myplot} exists")
|
61
|
-
File.unlink myplot
|
62
|
-
end
|
63
|
-
|
64
|
-
def test_jtplot_output
|
65
|
-
myplot = 'class_anal.toplot'
|
66
|
-
output = `#{@cmd} -p INV_ -j #{@tf_bioworks_esmall_xml}`
|
67
|
-
assert(File.exist?(myplot), "file #{myplot} exists")
|
68
|
-
File.unlink myplot
|
69
|
-
end
|
70
|
-
end
|
data/test/tc_id_precision.rb
DELETED
@@ -1,89 +0,0 @@
|
|
1
|
-
|
2
|
-
require 'test/unit'
|
3
|
-
require File.dirname(File.expand_path(__FILE__)) + '/load_bin_path'
|
4
|
-
|
5
|
-
class IDPrecisionTest < Test::Unit::TestCase
|
6
|
-
|
7
|
-
def initialize(arg)
|
8
|
-
super(arg)
|
9
|
-
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
10
|
-
@tf_bioworks_inv_xml = @tfiles + "bioworks_with_INV_small.xml"
|
11
|
-
@tf_bioworks_shuff = @tfiles + "bioworks_with_SHUFF_small.xml"
|
12
|
-
@cmd = "ruby -I#{File.join(File.dirname(__FILE__), "..", "lib")} -S id_precision.rb "
|
13
|
-
end
|
14
|
-
|
15
|
-
def test_usage
|
16
|
-
#puts "RUNNING: #{@cmd}"
|
17
|
-
assert_match(/usage:/, `#{@cmd}`)
|
18
|
-
end
|
19
|
-
|
20
|
-
## freeze the output
|
21
|
-
def test_basic
|
22
|
-
cmd = "#{@cmd} INV_ #{@tf_bioworks_inv_xml}"
|
23
|
-
#puts "RUNNING: #{cmd}"
|
24
|
-
reply = `#{cmd}`
|
25
|
-
string =<<END
|
26
|
-
# NH = number of hits
|
27
|
-
# TP = true positives
|
28
|
-
# FP = false positives
|
29
|
-
# PR = precision = TP/(TP+FP)
|
30
|
-
PepProts: NH,PepProts: PR,SeqCharge: NH,SeqCharge: PR,Scan(TopHit): NH,Scan(TopHit): PR,Scan(Top10): NH,Scan(Top10): PR,ScanCharge(TopHit): NH,ScanCharge(TopHit): PR,ScanCharge(Top10): NH,ScanCharge(Top10): PR
|
31
|
-
75, 1.0, 37, 1.0, 75, 1.0, 75, 1.0, 75, 1.0, 75, 1.0
|
32
|
-
95, 1.0, 49, 1.0, 95, 1.0, 95, 1.0, 95, 1.0, 95, 1.0
|
33
|
-
155, 1.0, 67, 1.0, 123, 1.0, 155, 1.0, 125, 1.0, 155, 1.0
|
34
|
-
186, 1.0, 85, 1.0, 154, 1.0, 186, 1.0, 156, 1.0, 186, 1.0
|
35
|
-
196, 1.0, 90, 1.0, 161, 1.0, 196, 1.0, 163, 1.0, 196, 1.0
|
36
|
-
214, 1.0, 94, 1.0, 168, 1.0, 214, 1.0, 170, 1.0, 214, 1.0
|
37
|
-
215, 1.0, 95, 1.0, 169, 1.0, 215, 1.0, 171, 1.0, 215, 1.0
|
38
|
-
217, 0.995391705069124, 97, 0.989690721649485, 171, 0.994152046783626, 217, 0.995391705069124, 173, 0.994219653179191, 217, 0.995391705069124
|
39
|
-
219, 0.995433789954338, 99, 0.98989898989899, 172, 0.994186046511628, 219, 0.995433789954338, 175, 0.994285714285714, 219, 0.995433789954338
|
40
|
-
227, 0.995594713656388, 106, 0.990566037735849, 180, 0.994444444444444, 227, 0.995594713656388, 183, 0.994535519125683, 227, 0.995594713656388
|
41
|
-
228, 0.995614035087719, 107, 0.990654205607477, 181, 0.994475138121547, 228, 0.995614035087719, 184, 0.994565217391304, 228, 0.995614035087719
|
42
|
-
229, 0.991266375545852, 108, 0.981481481481482, 182, 0.989010989010989, 229, 0.991266375545852, 185, 0.989189189189189, 229, 0.991266375545852
|
43
|
-
END
|
44
|
-
|
45
|
-
# This was the result we were getting before first hashing on protein
|
46
|
-
# sequences and doing uniqe peptide hits. It is very similar ( but not
|
47
|
-
# exactly the same) to what we are doing now). Must have something to do
|
48
|
-
# with the way things are hashed out.
|
49
|
-
before_doing_uniq_peptides=<<END
|
50
|
-
# NH = number of hits
|
51
|
-
# TP = true positives
|
52
|
-
# FP = false positives
|
53
|
-
# PR = precision = TP/(TP+FP)
|
54
|
-
PepProts: NH,PepProts: PR,SeqCharge: NH,SeqCharge: PR,Scan(TopHit): NH,Scan(TopHit): PR,Scan(Top10): NH,Scan(Top10): PR,ScanCharge(TopHit): NH,ScanCharge(TopHit): PR,ScanCharge(Top10): NH,ScanCharge(Top10): PR
|
55
|
-
75, 1.0, 37, 1.0, 75, 1.0, 75, 1.0, 75, 1.0, 75, 1.0
|
56
|
-
95, 1.0, 49, 1.0, 95, 1.0, 95, 1.0, 95, 1.0, 95, 1.0
|
57
|
-
125, 1.0, 67, 1.0, 123, 1.0, 125, 1.0, 125, 1.0, 125, 1.0
|
58
|
-
155, 1.0, 85, 1.0, 154, 1.0, 155, 1.0, 156, 1.0, 155, 1.0
|
59
|
-
186, 1.0, 90, 1.0, 161, 1.0, 186, 1.0, 163, 1.0, 186, 1.0
|
60
|
-
193, 1.0, 94, 1.0, 168, 1.0, 193, 1.0, 170, 1.0, 193, 1.0
|
61
|
-
204, 1.0, 95, 1.0, 169, 1.0, 204, 1.0, 171, 1.0, 204, 1.0
|
62
|
-
212, 1.0, 97, 0.989690721649485, 171, 0.994152046783626, 212, 1.0, 173, 0.994219653179191, 212, 1.0
|
63
|
-
214, 0.995327102803738, 99, 0.98989898989899, 172, 0.994186046511628, 214, 0.995327102803738, 175, 0.994285714285714, 214, 0.995327102803738
|
64
|
-
216, 0.99537037037037, 106, 0.990566037735849, 180, 0.994444444444444, 216, 0.99537037037037, 183, 0.994535519125683, 216, 0.99537037037037
|
65
|
-
227, 0.995594713656388, 107, 0.990654205607477, 181, 0.994475138121547, 227, 0.995594713656388, 184, 0.994565217391304, 227, 0.995594713656388
|
66
|
-
228, 0.995614035087719, 108, 0.981481481481482, 182, 0.989010989010989, 228, 0.995614035087719, 185, 0.989189189189189, 228, 0.995614035087719
|
67
|
-
229, 0.991266375545852, , , , , 229, 0.991266375545852, , , 229, 0.991266375545852
|
68
|
-
END
|
69
|
-
assert_equal(string, reply)
|
70
|
-
end
|
71
|
-
|
72
|
-
def test_basic_with_area
|
73
|
-
cmd = "#{@cmd} INV_ #{@tf_bioworks_inv_xml} -a"
|
74
|
-
#puts "RUNNING: #{cmd}"
|
75
|
-
reply = `#{cmd}`
|
76
|
-
# This is what we were getting before hashing for uniqe peptides
|
77
|
-
# It is very similar (but not identical to previous output)
|
78
|
-
string =<<END
|
79
|
-
Filename PepProts SeqCharge Scan(TopHit) Scan(Top10) ScanCharge(TopHit) ScanCharge(Top10)
|
80
|
-
./test/tfiles/bioworks_with_INV_small.xml 228.925377117814 107.877585995136 181.929045912105 228.925377117814 184.924437525838 228.925377117814
|
81
|
-
END
|
82
|
-
|
83
|
-
string =<<NEWEND
|
84
|
-
Filename PepProts SeqCharge Scan(TopHit) Scan(Top10) ScanCharge(TopHit) ScanCharge(Top10)
|
85
|
-
./test/tfiles/bioworks_with_INV_small.xml 228.939375794224 107.877585995136 181.929045912105 228.939375794224 184.924437525838 228.939375794224
|
86
|
-
NEWEND
|
87
|
-
assert_equal(string, reply, "area under the curve")
|
88
|
-
end
|
89
|
-
end
|
data/test/tc_msrun.rb
DELETED
@@ -1,88 +0,0 @@
|
|
1
|
-
|
2
|
-
require 'test/unit'
|
3
|
-
require 'spec/msrun'
|
4
|
-
require 'spec/mzxml/parser'
|
5
|
-
|
6
|
-
|
7
|
-
class MSRunTest < Test::Unit::TestCase
|
8
|
-
|
9
|
-
def initialize(arg)
|
10
|
-
super(arg)
|
11
|
-
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
12
|
-
@ti_file = @tfiles + "020a.mzXML.timeIndex"
|
13
|
-
@mzxml_file = @tfiles + "opd1/twenty_scans.mzXML"
|
14
|
-
end
|
15
|
-
|
16
|
-
def test_precursor_mz_by_scan
|
17
|
-
answer = [nil, nil, "391.045410", "446.009033", "1222.033203", nil, "390.947449", "1221.905518", "1322.036621", nil, "1322.000732", "1122.119141", "444.804504", nil, "446.796082", "1122.041260", "1421.951416", nil, "358.676636", "1460.548340", "1422.277100"]
|
18
|
-
array = Spec::MSRun.precursor_mz_by_scan(@mzxml_file)
|
19
|
-
assert_equal(answer, array)
|
20
|
-
end
|
21
|
-
|
22
|
-
def test_basename_noext
|
23
|
-
obj = Spec::MSRunIndex.new
|
24
|
-
{'path/to/file.mzXML' => 'file', 'other/path/file1.mzXML.timeIndex' => 'file1', 'path2/path3/file2.timeIndex' => 'file2', 'other/path/file3.weird' => 'file3', '/path/file4.ext1.ext2'=> 'file4.ext1'}.each do |k,v|
|
25
|
-
obj.basename_noext = k
|
26
|
-
assert_equal(v, obj.basename_noext)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
def test_msrun_index_from_file
|
31
|
-
guy = Spec::MSRunIndex.new(@ti_file)
|
32
|
-
sbn = guy.scans_by_num
|
33
|
-
s1 = sbn[1]
|
34
|
-
s2 = sbn[2]
|
35
|
-
sl = sbn[-1]
|
36
|
-
assert [1, 1, 600.020000], [s1.num, s1.ms_level, s1.time]
|
37
|
-
assert [2, 2, 601.280000], [s2.num, s2.ms_level, s2.time]
|
38
|
-
assert [3496, 2, 4802.130000], [sl.num, sl.ms_level, sl.time]
|
39
|
-
end
|
40
|
-
|
41
|
-
def test_msrun_index_from_mzXML
|
42
|
-
exp = [
|
43
|
-
"1 1 0.44",
|
44
|
-
"2 2 1.9 391.045410 6986078.0",
|
45
|
-
"2 3 2.75 446.009033 1531503.0",
|
46
|
-
"2 4 3.64 1222.033203 1520220.0",
|
47
|
-
"1 5 5.15",
|
48
|
-
"2 6 6.5 390.947449 6191130.0",
|
49
|
-
"2 7 7.47 1221.905518 2245001.0",
|
50
|
-
"2 8 9.14 1322.036621 1946525.0",
|
51
|
-
"1 9 10.69",
|
52
|
-
"2 10 12.0 1322.000732 1475536.0",
|
53
|
-
"2 11 13.66 1122.119141 1188303.0",
|
54
|
-
"2 12 15.37 444.804504 716303.0",
|
55
|
-
"1 13 16.4",
|
56
|
-
"2 14 17.77 446.796082 1472386.0",
|
57
|
-
"2 15 18.77 1122.041260 1411827.0",
|
58
|
-
"2 16 20.65 1421.951416 1187501.0",
|
59
|
-
"1 17 22.37",
|
60
|
-
"2 18 23.74 358.676636 826186.0",
|
61
|
-
"2 19 25.23 1460.548340 720317.0",
|
62
|
-
"2 20 27.05 1422.277100 709884.0",
|
63
|
-
]
|
64
|
-
%w(xmlparser rexml).each do |parser|
|
65
|
-
scans = Spec::MzXML::Parser.new.scans_by_num(@mzxml_file, parser)
|
66
|
-
obj = Spec::MSRunIndex.new
|
67
|
-
obj.scans_by_num = scans
|
68
|
-
assert_equal(Spec::MSRunIndex, obj.class)
|
69
|
-
obj.scans_by_num.each_with_index do |scan,i|
|
70
|
-
next if i == 0
|
71
|
-
assert_equal(Spec::Scan, scan.class)
|
72
|
-
assert_equal_values(exp[i-1], scan.to_index_file_string)
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
# takes two space delimited strings
|
78
|
-
# and asks if they are the same (as floats)
|
79
|
-
def assert_equal_values(string1, string2)
|
80
|
-
arr1 = string1.split(" ")
|
81
|
-
arr2 = string2.split(" ")
|
82
|
-
arr1.each_with_index do |val,i|
|
83
|
-
assert_equal(val.to_f, arr2[i].to_f)
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
end
|
88
|
-
|