mspire 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/INSTALL +1 -0
- data/README +25 -0
- data/Rakefile +129 -40
- data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
- data/bin/bioworks_to_pepxml.rb +1 -0
- data/bin/fasta_shaker.rb +1 -96
- data/bin/filter_and_validate.rb +5 -0
- data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
- data/bin/prob_validate.rb +6 -0
- data/bin/raw_to_mzXML.rb +2 -2
- data/bin/srf_group.rb +1 -0
- data/bin/srf_to_sqt.rb +40 -0
- data/changelog.txt +68 -0
- data/lib/align/chams.rb +6 -6
- data/lib/align.rb +4 -3
- data/lib/bsearch.rb +120 -0
- data/lib/fasta.rb +318 -86
- data/lib/group_by.rb +10 -0
- data/lib/index_by.rb +11 -0
- data/lib/merge_deep.rb +21 -0
- data/lib/{spec → ms/converter}/mzxml.rb +77 -109
- data/lib/ms/gradient_program.rb +171 -0
- data/lib/ms/msrun.rb +209 -0
- data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
- data/lib/ms/parser/mzdata/axml.rb +12 -0
- data/lib/ms/parser/mzdata/dom.rb +160 -0
- data/lib/ms/parser/mzdata/libxml.rb +7 -0
- data/lib/ms/parser/mzdata.rb +25 -0
- data/lib/ms/parser/mzxml/axml.rb +11 -0
- data/lib/ms/parser/mzxml/dom.rb +159 -0
- data/lib/ms/parser/mzxml/hpricot.rb +253 -0
- data/lib/ms/parser/mzxml/libxml.rb +15 -0
- data/lib/ms/parser/mzxml/regexp.rb +122 -0
- data/lib/ms/parser/mzxml/rexml.rb +72 -0
- data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
- data/lib/ms/parser/mzxml.rb +175 -0
- data/lib/ms/parser.rb +108 -0
- data/lib/ms/precursor.rb +10 -0
- data/lib/ms/scan.rb +81 -0
- data/lib/ms/spectrum.rb +193 -0
- data/lib/ms.rb +10 -0
- data/lib/mspire.rb +4 -0
- data/lib/roc.rb +61 -1
- data/lib/sample_enzyme.rb +31 -8
- data/lib/scan_i.rb +21 -0
- data/lib/spec_id/aa_freqs.rb +7 -3
- data/lib/spec_id/bioworks.rb +20 -14
- data/lib/spec_id/digestor.rb +139 -0
- data/lib/spec_id/mass.rb +116 -0
- data/lib/spec_id/parser/proph.rb +236 -0
- data/lib/spec_id/precision/filter/cmdline.rb +209 -0
- data/lib/spec_id/precision/filter/interactive.rb +134 -0
- data/lib/spec_id/precision/filter/output.rb +147 -0
- data/lib/spec_id/precision/filter.rb +623 -0
- data/lib/spec_id/precision/output.rb +60 -0
- data/lib/spec_id/precision/prob/cmdline.rb +139 -0
- data/lib/spec_id/precision/prob/output.rb +88 -0
- data/lib/spec_id/precision/prob.rb +171 -0
- data/lib/spec_id/proph/pep_summary.rb +92 -0
- data/lib/spec_id/proph/prot_summary.rb +484 -0
- data/lib/spec_id/proph.rb +2 -466
- data/lib/spec_id/protein_summary.rb +2 -2
- data/lib/spec_id/sequest/params.rb +316 -0
- data/lib/spec_id/sequest/pepxml.rb +1513 -0
- data/lib/spec_id/sequest.rb +2 -1672
- data/lib/spec_id/srf.rb +445 -177
- data/lib/spec_id.rb +183 -95
- data/lib/spec_id_xml.rb +8 -10
- data/lib/transmem/phobius.rb +147 -0
- data/lib/transmem/toppred.rb +368 -0
- data/lib/transmem.rb +157 -0
- data/lib/validator/aa.rb +135 -0
- data/lib/validator/background.rb +73 -0
- data/lib/validator/bias.rb +95 -0
- data/lib/validator/cmdline.rb +260 -0
- data/lib/validator/decoy.rb +94 -0
- data/lib/validator/digestion_based.rb +69 -0
- data/lib/validator/probability.rb +48 -0
- data/lib/validator/prot_from_pep.rb +234 -0
- data/lib/validator/transmem.rb +272 -0
- data/lib/validator/true_pos.rb +46 -0
- data/lib/validator.rb +214 -0
- data/lib/xml.rb +38 -0
- data/lib/xml_style_parser.rb +105 -0
- data/lib/xmlparser_wrapper.rb +19 -0
- data/script/compile_and_plot_smriti_final.rb +97 -0
- data/script/extract_gradient_programs.rb +56 -0
- data/script/get_apex_values_rexml.rb +44 -0
- data/script/mzXML2timeIndex.rb +1 -1
- data/script/smriti_final_analysis.rb +103 -0
- data/script/toppred_to_yaml.rb +47 -0
- data/script/tpp_installer.rb +1 -1
- data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
- data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
- data/specs/bin/fasta_shaker_spec.rb +259 -0
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
- data/specs/bin/filter_and_validate_spec.rb +124 -0
- data/specs/bin/ms_to_lmat_spec.rb +34 -0
- data/specs/bin/prob_validate_spec.rb +62 -0
- data/specs/bin/protein_summary_spec.rb +10 -0
- data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
- data/specs/gi_spec.rb +22 -0
- data/specs/load_bin_path.rb +7 -0
- data/specs/merge_deep_spec.rb +13 -0
- data/specs/ms/gradient_program_spec.rb +77 -0
- data/specs/ms/msrun_spec.rb +455 -0
- data/specs/ms/parser_spec.rb +92 -0
- data/specs/ms/spectrum_spec.rb +89 -0
- data/specs/roc_spec.rb +251 -0
- data/specs/rspec_autotest.rb +149 -0
- data/specs/sample_enzyme_spec.rb +41 -0
- data/specs/spec_helper.rb +133 -0
- data/specs/spec_id/aa_freqs_spec.rb +52 -0
- data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
- data/specs/spec_id/digestor_spec.rb +75 -0
- data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
- data/specs/spec_id/precision/filter/output_spec.rb +31 -0
- data/specs/spec_id/precision/filter_spec.rb +243 -0
- data/specs/spec_id/precision/prob_spec.rb +111 -0
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
- data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
- data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
- data/specs/spec_id/sequest/params_spec.rb +68 -0
- data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
- data/specs/spec_id/sqt_spec.rb +138 -0
- data/specs/spec_id/srf_spec.rb +209 -0
- data/specs/spec_id/srf_spec_helper.rb +302 -0
- data/specs/spec_id_helper.rb +33 -0
- data/specs/spec_id_spec.rb +361 -0
- data/specs/spec_id_xml_spec.rb +33 -0
- data/specs/transmem/phobius_spec.rb +423 -0
- data/specs/transmem/toppred_spec.rb +297 -0
- data/specs/transmem_spec.rb +60 -0
- data/specs/transmem_spec_shared.rb +64 -0
- data/specs/validator/aa_spec.rb +107 -0
- data/specs/validator/background_spec.rb +51 -0
- data/specs/validator/bias_spec.rb +146 -0
- data/specs/validator/decoy_spec.rb +51 -0
- data/specs/validator/fasta_helper.rb +26 -0
- data/specs/validator/prot_from_pep_spec.rb +141 -0
- data/specs/validator/transmem_spec.rb +145 -0
- data/specs/validator/true_pos_spec.rb +58 -0
- data/specs/validator_helper.rb +33 -0
- data/specs/xml_spec.rb +12 -0
- data/test_files/000_pepxml18_small.xml +206 -0
- data/test_files/020a.mzXML.timeIndex +4710 -0
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
- data/test_files/4-03-03_small-prot.xml +321 -0
- data/test_files/4-03-03_small.xml +3876 -0
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +5999 -0
- data/test_files/bioworks31.params +77 -0
- data/test_files/bioworks32.params +62 -0
- data/test_files/bioworks33.params +63 -0
- data/test_files/bioworks_single_run_small.xml +7237 -0
- data/test_files/bioworks_small.fasta +212 -0
- data/test_files/bioworks_small.params +63 -0
- data/test_files/bioworks_small.phobius +109 -0
- data/test_files/bioworks_small.toppred.out +2847 -0
- data/test_files/bioworks_small.xml +5610 -0
- data/test_files/bioworks_with_INV_small.xml +3753 -0
- data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +304 -0
- data/test_files/messups.fasta +297 -0
- data/test_files/opd1/000.my_answer.100lines.xml +101 -0
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
- data/test_files/opd1/000_020_3prots-prot.xml +62 -0
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
- data/test_files/opd1/sequest.3.1.params +77 -0
- data/test_files/opd1/sequest.3.2.params +62 -0
- data/test_files/opd1/twenty_scans.mzXML +418 -0
- data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +9 -0
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
- data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
- data/test_files/pepproph_small.xml +4691 -0
- data/test_files/phobius.small.noheader.txt +50 -0
- data/test_files/phobius.small.small.txt +53 -0
- data/test_files/s01_anC1_ld020mM.key.txt +25 -0
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +297 -0
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +14340 -0
- data/test_files/tf_bioworks2excel.txt.actual +1035 -0
- data/test_files/toppred.small.out +416 -0
- data/test_files/toppred.xml.out +318 -0
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
- data/test_files/yeast_gly_small-prot.xml +265 -0
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
- data/test_files/yeast_gly_small.xml +3807 -0
- data/test_files/yeast_gly_small2.parentTimes +6 -0
- metadata +273 -57
- data/bin/filter.rb +0 -6
- data/bin/precision.rb +0 -5
- data/lib/spec/mzdata/parser.rb +0 -108
- data/lib/spec/mzdata.rb +0 -48
- data/lib/spec/mzxml/parser.rb +0 -449
- data/lib/spec/scan.rb +0 -55
- data/lib/spec_id/filter.rb +0 -797
- data/lib/spec_id/precision.rb +0 -421
- data/lib/toppred.rb +0 -18
- data/script/filter-peps.rb +0 -164
- data/test/tc_aa_freqs.rb +0 -59
- data/test/tc_fasta_shaker.rb +0 -149
- data/test/tc_filter.rb +0 -203
- data/test/tc_filter_peps.rb +0 -46
- data/test/tc_gi.rb +0 -17
- data/test/tc_id_class_anal.rb +0 -70
- data/test/tc_id_precision.rb +0 -89
- data/test/tc_msrun.rb +0 -88
- data/test/tc_mzxml.rb +0 -88
- data/test/tc_mzxml_to_lmat.rb +0 -36
- data/test/tc_peptide_parent_times.rb +0 -27
- data/test/tc_precision.rb +0 -60
- data/test/tc_roc.rb +0 -166
- data/test/tc_sample_enzyme.rb +0 -32
- data/test/tc_scan.rb +0 -26
- data/test/tc_sequest.rb +0 -336
- data/test/tc_spec.rb +0 -78
- data/test/tc_spec_id.rb +0 -201
- data/test/tc_spec_id_xml.rb +0 -36
- data/test/tc_srf.rb +0 -262
data/lib/spec_id/precision.rb
DELETED
|
@@ -1,421 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
require 'optparse'
|
|
3
|
-
require 'ostruct'
|
|
4
|
-
require 'generator'
|
|
5
|
-
require 'roc'
|
|
6
|
-
|
|
7
|
-
## silence this bad boy
|
|
8
|
-
tmp = $VERBOSE ; $VERBOSE = nil
|
|
9
|
-
require 'gnuplot'
|
|
10
|
-
$VERBOSE = tmp
|
|
11
|
-
|
|
12
|
-
class String
|
|
13
|
-
def margin
|
|
14
|
-
self.gsub(/^\s*\|/,'')
|
|
15
|
-
end
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
class Prec ; end
|
|
19
|
-
|
|
20
|
-
module Prec::PlotHelper
|
|
21
|
-
|
|
22
|
-
PLOT_TYPE = 'XYData'
|
|
23
|
-
TITLE = 'Precision vs. Num Hits [ Precision = Positive Predictive Value = TP/(TP+FP) ]'
|
|
24
|
-
XAXIS = 'Num Hits (excludes known false positives)'
|
|
25
|
-
EXT = '.toplot'
|
|
26
|
-
IMAGE_EXT = '.png'
|
|
27
|
-
|
|
28
|
-
def create_to_plot_file(all_arrs, key, files, filename_noext)
|
|
29
|
-
## CREATE the PLOT IMAGE:
|
|
30
|
-
to_plot = filename_noext + EXT
|
|
31
|
-
png = filename_noext + IMAGE_EXT
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
File.open(to_plot,'w') do |out|
|
|
35
|
-
out.puts PLOT_TYPE
|
|
36
|
-
out.puts filename_noext
|
|
37
|
-
out.puts TITLE
|
|
38
|
-
out.puts XAXIS
|
|
39
|
-
out.puts escape_to_gnuplot(y_axis_label(key))
|
|
40
|
-
files.each_with_index do |file,i|
|
|
41
|
-
#p key[i]
|
|
42
|
-
#p all_arrs[i]
|
|
43
|
-
|
|
44
|
-
key[i].each_with_index do |k,j|
|
|
45
|
-
out.puts(escape_to_gnuplot("#{file}: #{k[1][1]}"))
|
|
46
|
-
out.puts all_arrs[i][j][0].join(' ')
|
|
47
|
-
out.puts all_arrs[i][j][1].join(' ')
|
|
48
|
-
end
|
|
49
|
-
end
|
|
50
|
-
end
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
## outputs a .toplot file based on filename_noext, creates a png file, and
|
|
55
|
-
## writes html to fh that will load the png file up
|
|
56
|
-
## This is a self contained module that can be swapped out for a
|
|
57
|
-
## completely different plotting program if desired.
|
|
58
|
-
def plot_figure(all_arrs, key, files, filename_noext)
|
|
59
|
-
|
|
60
|
-
## CREATE the PLOT IMAGE:
|
|
61
|
-
to_plot = filename_noext+'.toplot'
|
|
62
|
-
png = filename_noext+'.png'
|
|
63
|
-
|
|
64
|
-
tmp = $VERBOSE ; $VERBOSE = nil
|
|
65
|
-
Gnuplot.open do |gp|
|
|
66
|
-
Gnuplot::Plot.new( gp ) do |plot|
|
|
67
|
-
plot.terminal "png noenhanced"
|
|
68
|
-
plot.output png
|
|
69
|
-
plot.title TITLE
|
|
70
|
-
plot.xlabel XAXIS
|
|
71
|
-
plot.ylabel escape_to_gnuplot(y_axis_label(key))
|
|
72
|
-
plot.style "line 1 lt 1"
|
|
73
|
-
plot.style "line 2 lt 12"
|
|
74
|
-
#plot.style "line 1 lt 1 lw #{opts.lw} pt 7 ps #{opts.ps}",
|
|
75
|
-
plot.yrange "[-0.05:#{1.05 + 0.020*files.size}]"
|
|
76
|
-
files.each_with_index do |file,i|
|
|
77
|
-
key[i].each_with_index do |k,j|
|
|
78
|
-
plot.data << Gnuplot::DataSet.new( [ all_arrs[i][j][0], all_arrs[i][j][1] ] ) do |ds|
|
|
79
|
-
ds.with = "lines"
|
|
80
|
-
ds.title = escape_to_gnuplot("#{file}: #{k[1][1]}")
|
|
81
|
-
end
|
|
82
|
-
end
|
|
83
|
-
end
|
|
84
|
-
end
|
|
85
|
-
end
|
|
86
|
-
$VERBOSE = tmp
|
|
87
|
-
|
|
88
|
-
## CREATE the HTML to load the plot:
|
|
89
|
-
basename_filename_noext = File.basename(filename_noext)
|
|
90
|
-
output = "<div id=\"plot\"><table class=\"image\" align=\"center\">\n"
|
|
91
|
-
#output << "<caption align=\"bottom\">Additional views of this data may be obtained by using the <span class=\"code\">plot.rb</span> command on '#{to_plot}' (type <span class=\"code\">plot.rb</span> for more details). Plot generated with command: <span class=\"code\">#{plot_cmd}</span></caption>\n"
|
|
92
|
-
output << "<tr><td><img src=\"#{basename_filename_noext}.png\" title=\"File #{basename_filename_noext} must be in the same directory as this html.\"/></td></tr>\n"
|
|
93
|
-
output << "</table></div>\n"
|
|
94
|
-
output
|
|
95
|
-
end # plot_figure
|
|
96
|
-
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
module Prec::HTML
|
|
100
|
-
|
|
101
|
-
# html and body tags
|
|
102
|
-
def html
|
|
103
|
-
"|<html>
|
|
104
|
-
|#{yield}
|
|
105
|
-
|</html>\n".margin
|
|
106
|
-
end
|
|
107
|
-
|
|
108
|
-
def body
|
|
109
|
-
"|<body>
|
|
110
|
-
| #{yield}
|
|
111
|
-
|</body>\n".margin
|
|
112
|
-
end
|
|
113
|
-
|
|
114
|
-
def header
|
|
115
|
-
"|<head>
|
|
116
|
-
| #{style}
|
|
117
|
-
|</head>\n".margin
|
|
118
|
-
end
|
|
119
|
-
|
|
120
|
-
def td
|
|
121
|
-
"<td>#{yield}</td>"
|
|
122
|
-
end
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
def style
|
|
126
|
-
'
|
|
127
|
-
<style type="text/css">
|
|
128
|
-
div#tp_table {
|
|
129
|
-
text-align: center;
|
|
130
|
-
margin-top: 50px;
|
|
131
|
-
margin-bottom: 50px;
|
|
132
|
-
}
|
|
133
|
-
span.code {
|
|
134
|
-
font-family: Courier,Monospace;
|
|
135
|
-
font-size: 80%;
|
|
136
|
-
}
|
|
137
|
-
table {
|
|
138
|
-
border-width:1px;
|
|
139
|
-
border-color:#CCCCCC;
|
|
140
|
-
border-collapse: collapse;
|
|
141
|
-
}
|
|
142
|
-
caption {
|
|
143
|
-
font-size: 90%;
|
|
144
|
-
}
|
|
145
|
-
td,th {
|
|
146
|
-
padding-top: 2px;
|
|
147
|
-
padding-bottom: 2px;
|
|
148
|
-
padding-left: 1;
|
|
149
|
-
padding-right: 1;
|
|
150
|
-
}
|
|
151
|
-
th.small {
|
|
152
|
-
font-size: 80%;
|
|
153
|
-
font-weight: normal;
|
|
154
|
-
padding: 1px;
|
|
155
|
-
}
|
|
156
|
-
td.redline {
|
|
157
|
-
background-color: #FF0000;
|
|
158
|
-
color: #FFFFFF
|
|
159
|
-
}
|
|
160
|
-
div#plot {
|
|
161
|
-
margin: 30px;
|
|
162
|
-
text-align:center
|
|
163
|
-
}
|
|
164
|
-
hr {color: sienna}
|
|
165
|
-
body { font-size: 8pt; font-family: Arial,Helvetica,Times}
|
|
166
|
-
</style>
|
|
167
|
-
'
|
|
168
|
-
|
|
169
|
-
end
|
|
170
|
-
|
|
171
|
-
def table
|
|
172
|
-
"|<table border=\"1\" align=\"center\" style=\"font-size:100%\">
|
|
173
|
-
| #{yield}
|
|
174
|
-
|</table>\n".margin
|
|
175
|
-
end
|
|
176
|
-
|
|
177
|
-
def tr
|
|
178
|
-
"|<tr>
|
|
179
|
-
| #{yield}
|
|
180
|
-
|</tr>\n".margin
|
|
181
|
-
end
|
|
182
|
-
end # module HTML
|
|
183
|
-
|
|
184
|
-
class Prec
|
|
185
|
-
include Prec::PlotHelper
|
|
186
|
-
|
|
187
|
-
###########################################################
|
|
188
|
-
# GLOBAL SETTINGS:
|
|
189
|
-
DATA_PREC = 4 # decimal places of precision for ppv data
|
|
190
|
-
STDOUT_JTPLOT_BASE = "ppv" # if there is no outfile
|
|
191
|
-
###########################################################
|
|
192
|
-
|
|
193
|
-
include Prec::HTML
|
|
194
|
-
|
|
195
|
-
## returns an html string
|
|
196
|
-
def precision(argv)
|
|
197
|
-
opt = parse_args(argv)
|
|
198
|
-
files = argv.to_a
|
|
199
|
-
out_string = create_precision_data(files, opt)
|
|
200
|
-
[out_string, opt]
|
|
201
|
-
end
|
|
202
|
-
|
|
203
|
-
def run_cmd_line(argv)
|
|
204
|
-
output_string, opt, file_as_decoy = precision(argv)
|
|
205
|
-
if file_as_decoy
|
|
206
|
-
puts output_string
|
|
207
|
-
else
|
|
208
|
-
## open file and write to it..
|
|
209
|
-
if opt.o == 'STDOUT'
|
|
210
|
-
print output_string
|
|
211
|
-
else
|
|
212
|
-
File.open(opt.o,'w') do |fh| fh.print output_string end
|
|
213
|
-
end
|
|
214
|
-
end
|
|
215
|
-
end
|
|
216
|
-
|
|
217
|
-
# returns the outfile with no extension
|
|
218
|
-
def outfile_noext(opt)
|
|
219
|
-
if opt == 'STDOUT'
|
|
220
|
-
"#{STDOUT_JTPLOT_BASE}"
|
|
221
|
-
else
|
|
222
|
-
opt.sub(/#{Regexp.escape(File.extname(opt))}$/, '')
|
|
223
|
-
end
|
|
224
|
-
end
|
|
225
|
-
|
|
226
|
-
def file_noext(file)
|
|
227
|
-
file.sub(/#{Regexp.escape(File.extname(file))}$/, '')
|
|
228
|
-
end
|
|
229
|
-
|
|
230
|
-
def parse_args(argv)
|
|
231
|
-
|
|
232
|
-
opt = OpenStruct.new
|
|
233
|
-
opt.o = 'STDOUT'
|
|
234
|
-
opts = OptionParser.new do |op|
|
|
235
|
-
op.banner = "Usage: #{File.basename(__FILE__)} [options] bioworks.xml|proph-prot.xml ..."
|
|
236
|
-
op.separator ""
|
|
237
|
-
op.separator "Abbreviations and Definitions:"
|
|
238
|
-
op.separator " TP = True Positives"
|
|
239
|
-
op.separator " FP = False Positives"
|
|
240
|
-
op.separator " Precision = Positive Predictive Value = [TP/(TP+FP)]"
|
|
241
|
-
op.separator ""
|
|
242
|
-
op.separator "Output: "
|
|
243
|
-
op.separator " 1. Decoy as separate search: PPV to STDOUT"
|
|
244
|
-
op.separator " 2. Decoy proteins from concatenated database: '.html'"
|
|
245
|
-
op.separator ""
|
|
246
|
-
op.separator "Options:"
|
|
247
|
-
|
|
248
|
-
op.on("-f", "--fp_data <prefix_or_file>", "flag -or- decoy FILE") {|v| opt.f = v }
|
|
249
|
-
op.separator ""
|
|
250
|
-
op.separator " If searched with a concatenated DB, give a false flag to decoy proteins."
|
|
251
|
-
op.separator " If files have different flags, separate with commas."
|
|
252
|
-
op.separator " If searched with a separate decoy DB, give the FILE name of decoy data"
|
|
253
|
-
op.on("--prefix", "false flag as prefix only") {|v| opt.prefix = v }
|
|
254
|
-
op.separator ""
|
|
255
|
-
## NOT YET FUNCTIONAL: op.on("-e", "--peptides", "do peptides instead of proteins")
|
|
256
|
-
op.separator ""
|
|
257
|
-
op.on("-o", "--outfile <file>", "write output to file (def: #{opt.o})") {|v| opt.o = v}
|
|
258
|
-
op.on("-a", "--area", "output area under the curve instead of the plot") {|v| opt.a = v}
|
|
259
|
-
op.on("-j", "--plot_file", "output to_plot file") {|v| opt.j = v}
|
|
260
|
-
op.on_tail("
|
|
261
|
-
Example:
|
|
262
|
-
For a search on a concatenated database where the decoy proteins have
|
|
263
|
-
been flagged with the prefix 'INV_' for both Bioworks and ProteinProphet
|
|
264
|
-
output:
|
|
265
|
-
|
|
266
|
-
#{File.basename(__FILE__)} -f INV_ bioworks.xml proph-prot.xml
|
|
267
|
-
|
|
268
|
-
")
|
|
269
|
-
end
|
|
270
|
-
opts.parse!(argv)
|
|
271
|
-
|
|
272
|
-
if argv.size < 1
|
|
273
|
-
puts opts
|
|
274
|
-
exit
|
|
275
|
-
end
|
|
276
|
-
|
|
277
|
-
opt
|
|
278
|
-
end
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
## collapses arrays to one level deep so we can sync them up
|
|
282
|
-
def arrays_to_one_level_deep(all_arrs)
|
|
283
|
-
mostly_flat = []
|
|
284
|
-
all_arrs.each do |per_file|
|
|
285
|
-
per_file.each do |per_style|
|
|
286
|
-
mostly_flat << per_style[0]
|
|
287
|
-
mostly_flat << per_style[1]
|
|
288
|
-
end
|
|
289
|
-
end
|
|
290
|
-
mostly_flat
|
|
291
|
-
end
|
|
292
|
-
|
|
293
|
-
# prints rows and th for the data
|
|
294
|
-
def table_cells(all_arrs, key)
|
|
295
|
-
## columns specific headings:
|
|
296
|
-
all_string = ""
|
|
297
|
-
all_string << tr do
|
|
298
|
-
line = ""
|
|
299
|
-
key.each do |per_file|
|
|
300
|
-
per_file.each do |per_ds|
|
|
301
|
-
line << "<th class=\"small\">#{per_ds[1][0]}</th><th class=\"small\">#{per_ds[1][1]}</th>"
|
|
302
|
-
end
|
|
303
|
-
end
|
|
304
|
-
line
|
|
305
|
-
end
|
|
306
|
-
mostly_flat = arrays_to_one_level_deep(all_arrs)
|
|
307
|
-
SyncEnumerator.new(*mostly_flat).each do |row|
|
|
308
|
-
all_string << tr do
|
|
309
|
-
string = row.map {|it|
|
|
310
|
-
sty="%d"
|
|
311
|
-
if it.class == Float ; sty="%.#{DATA_PREC}f" end
|
|
312
|
-
td{ sprintf(sty,it)}
|
|
313
|
-
}.join
|
|
314
|
-
end
|
|
315
|
-
end
|
|
316
|
-
all_string
|
|
317
|
-
end
|
|
318
|
-
|
|
319
|
-
def html_table_output(all_arrs, key, files, filename_noext)
|
|
320
|
-
num_datasets_per_file = all_arrs.first.size
|
|
321
|
-
num_cols_per_dataset = 2
|
|
322
|
-
big_colspan = num_datasets_per_file * num_cols_per_dataset
|
|
323
|
-
output = table do
|
|
324
|
-
tr do
|
|
325
|
-
files.map do |file|
|
|
326
|
-
"<th colspan=\"#{big_colspan}\">#{file}</th>"
|
|
327
|
-
end.join
|
|
328
|
-
end +
|
|
329
|
-
tr do
|
|
330
|
-
key.map do |arr|
|
|
331
|
-
arr.map do |ds|
|
|
332
|
-
"<th colspan=\"2\">#{ds.first}</th>"
|
|
333
|
-
end
|
|
334
|
-
end
|
|
335
|
-
end +
|
|
336
|
-
table_cells(all_arrs, key)
|
|
337
|
-
end
|
|
338
|
-
"<div id=\"tp_table\">" + output + "</div>"
|
|
339
|
-
end
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
def y_axis_label(key)
|
|
343
|
-
## We only take the keys for the first file, as it's assumed that the major
|
|
344
|
-
## labels will be identical for all of them
|
|
345
|
-
labels = key.first.map {|tp| tp.first }.uniq
|
|
346
|
-
labels.join " | "
|
|
347
|
-
end
|
|
348
|
-
|
|
349
|
-
# escapes any ' chars
|
|
350
|
-
def escape_to_gnuplot(string)
|
|
351
|
-
# long way, but it works.
|
|
352
|
-
new_string = ""
|
|
353
|
-
string.split(//).each do |chr|
|
|
354
|
-
if chr == "'" ; new_string << "\\" end
|
|
355
|
-
new_string << chr
|
|
356
|
-
end
|
|
357
|
-
new_string
|
|
358
|
-
end
|
|
359
|
-
|
|
360
|
-
# if opt.f, then a prefix is assumed.
|
|
361
|
-
# if a file =~ /-prot.xml$/ then a precision plot based on probability is
|
|
362
|
-
# also created
|
|
363
|
-
def create_precision_data(files, opt)
|
|
364
|
-
#$stderr.puts "using prefix #{opt.f} ..."
|
|
365
|
-
|
|
366
|
-
if opt.f
|
|
367
|
-
prefix_arr = SpecID.extend_args(opt.f, files.size)
|
|
368
|
-
end
|
|
369
|
-
all_arrs = []
|
|
370
|
-
key = []
|
|
371
|
-
out_noext = outfile_noext(opt.o)
|
|
372
|
-
files.each_with_index do |file,i|
|
|
373
|
-
all_arrs[i] = []
|
|
374
|
-
key[i] = []
|
|
375
|
-
sp = SpecID.new(file)
|
|
376
|
-
#headers = ["#{file_noext(file)} Precision [TP/(TP+FP)]", "#{file_noext(file)} FPR [FP/(FP+TP)]"]
|
|
377
|
-
if opt.f
|
|
378
|
-
(num_hits, ppv) = sp.num_hits_and_ppv_for_prob(prefix_arr[i], opt.prefix)
|
|
379
|
-
all_arrs[i] << [num_hits,ppv]
|
|
380
|
-
key[i] << ["Precision", ["# hits", "Prec (decoy)"]]
|
|
381
|
-
end
|
|
382
|
-
if file =~ /-prot\.xml$/
|
|
383
|
-
## These are just from protein prophet probabilities:
|
|
384
|
-
(num_hits, ppv) = sp.num_hits_and_ppv_for_protein_prophet_probabilities
|
|
385
|
-
all_arrs[i] << [num_hits,ppv]
|
|
386
|
-
key[i] << ["Precision", ["# hits", "Prec (prob)"]]
|
|
387
|
-
end
|
|
388
|
-
end
|
|
389
|
-
|
|
390
|
-
string = ''
|
|
391
|
-
if opt.a
|
|
392
|
-
roc = ROC.new
|
|
393
|
-
#string << "***********************************************************\n"
|
|
394
|
-
#string << "AREA UNDER CURVE:\n"
|
|
395
|
-
key.each_with_index do |file,i|
|
|
396
|
-
string << "#{files[i]} (area under curve)\n"
|
|
397
|
-
key[i].each_index do |j|
|
|
398
|
-
string << "#{key[i][j][0]} [#{ key[i][j][1]}]:\t"
|
|
399
|
-
num_hits = all_arrs[i][j][0]
|
|
400
|
-
oth = all_arrs[i][j][1]
|
|
401
|
-
string << roc.area_under_curve(num_hits, oth).to_s << "\n"
|
|
402
|
-
end
|
|
403
|
-
end
|
|
404
|
-
#string << "***********************************************************\n"
|
|
405
|
-
else
|
|
406
|
-
if opt.j
|
|
407
|
-
create_to_plot_file(all_arrs, key, files, out_noext)
|
|
408
|
-
end
|
|
409
|
-
string = html do
|
|
410
|
-
header +
|
|
411
|
-
body do
|
|
412
|
-
plot_figure(all_arrs, key, files, out_noext) +
|
|
413
|
-
html_table_output(all_arrs, key, files, out_noext)
|
|
414
|
-
end
|
|
415
|
-
end
|
|
416
|
-
end
|
|
417
|
-
string
|
|
418
|
-
end
|
|
419
|
-
|
|
420
|
-
end # class SpecID
|
|
421
|
-
|
data/lib/toppred.rb
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
# reader for the http://bioweb.pasteur.fr/seqanal/interfaces/toppred.html
|
|
3
|
-
# output
|
|
4
|
-
class TopPred
|
|
5
|
-
|
|
6
|
-
attr_accessor :hmmmm
|
|
7
|
-
|
|
8
|
-
def initialize(toppred_out_file=nil)
|
|
9
|
-
if toppred_out_file
|
|
10
|
-
from_file(toppred_out_file)
|
|
11
|
-
end
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
def from_file(toppred_out_file)
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
end
|
|
18
|
-
|
data/script/filter-peps.rb
DELETED
|
@@ -1,164 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/ruby -w
|
|
2
|
-
|
|
3
|
-
require 'spec_id'
|
|
4
|
-
require 'optparse'
|
|
5
|
-
require 'ostruct'
|
|
6
|
-
|
|
7
|
-
DELIMITER = "\t"
|
|
8
|
-
|
|
9
|
-
$opt = OpenStruct.new
|
|
10
|
-
$opt.deltacn = 0.2
|
|
11
|
-
$opt.charge1 = 1.5
|
|
12
|
-
$opt.charge2 = 2.0
|
|
13
|
-
$opt.charge3 = 2.5
|
|
14
|
-
|
|
15
|
-
opts = OptionParser.new do |op|
|
|
16
|
-
op.banner = "usage: #{File.basename(__FILE__)} [options] prefixlist bioworks.xml ..."
|
|
17
|
-
op.on("-1", "--charge1 <cutoff>", "xcorr <= cutoff for charge (#{$opt.charge1})") { |v| $opt.charge1 = v.to_f }
|
|
18
|
-
op.on("-2", "--charge2 <cutoff>", "xcorr <= cutoff for charge (#{$opt.charge2})") { |v| $opt.charge2 = v.to_f }
|
|
19
|
-
op.on("-3", "--charge3 <cutoff>", "xcorr <= cutoff for charge (#{$opt.charge3})") { |v| $opt.charge3 = v.to_f }
|
|
20
|
-
op.on("-d", "--deltacn <cutoff>", "deltacn >= cutoff (#{$opt.deltacn})") { |v| $opt.deltacn = v.to_f }
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
opts.parse!
|
|
24
|
-
|
|
25
|
-
if ARGV.size < 2
|
|
26
|
-
puts opts
|
|
27
|
-
exit
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
prefix_list = ARGV.shift
|
|
32
|
-
prefixes = prefix_list.split ","
|
|
33
|
-
files = ARGV.to_a
|
|
34
|
-
|
|
35
|
-
## Fill in the prefix array with the last prefix given
|
|
36
|
-
last_prefix = prefixes.first
|
|
37
|
-
if files.size > prefixes.size
|
|
38
|
-
files.each_with_index do |file,i|
|
|
39
|
-
if prefixes[i]
|
|
40
|
-
last_prefix = prefixes[i]
|
|
41
|
-
else
|
|
42
|
-
prefixes[i] = last_prefix
|
|
43
|
-
end
|
|
44
|
-
end
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
###############################
|
|
48
|
-
#CH1 = 1.0
|
|
49
|
-
#CH2 = 2.0
|
|
50
|
-
#CH3 = 3.0
|
|
51
|
-
#DELTACN = 0.2
|
|
52
|
-
###############################
|
|
53
|
-
|
|
54
|
-
def passes(pep)
|
|
55
|
-
if pep.deltacn <= $opt.deltacn
|
|
56
|
-
case pep.charge
|
|
57
|
-
when 1
|
|
58
|
-
pep.xcorr >= $opt.charge1
|
|
59
|
-
when 2
|
|
60
|
-
pep.xcorr >= $opt.charge2
|
|
61
|
-
when 3
|
|
62
|
-
pep.xcorr >= $opt.charge3
|
|
63
|
-
end
|
|
64
|
-
else
|
|
65
|
-
false
|
|
66
|
-
end
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
# adds two categories with results from the hash
|
|
71
|
-
def analyze(pep_groups, category, hash)
|
|
72
|
-
best = best_xcorr(pep_groups)
|
|
73
|
-
top10 = top10_xcorr(pep_groups)
|
|
74
|
-
hash[category+"Best"] = filter(best).size
|
|
75
|
-
hash[category+"Top10"] = filter(top10).size
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
# returns a hash containing the number of peptides passing the thresholds
|
|
79
|
-
def number_passing(peps)
|
|
80
|
-
np = {}
|
|
81
|
-
np["PepProts"] = filter(peps).size
|
|
82
|
-
|
|
83
|
-
by_scan_charge = peps.hash_by(:base_name, :first_scan, :last_scan, :charge).values
|
|
84
|
-
analyze(by_scan_charge, "ScanCharge", np)
|
|
85
|
-
|
|
86
|
-
by_scan = peps.hash_by(:base_name, :first_scan, :last_scan).values
|
|
87
|
-
analyze(by_scan, "Scan", np)
|
|
88
|
-
|
|
89
|
-
by_seq_charge = peps.hash_by(:base_name, :sequence, :charge).values
|
|
90
|
-
analyze(by_seq_charge, "SeqCharge", np)
|
|
91
|
-
|
|
92
|
-
np
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
# key = :symbol, val = [:lt|:gt|:lte|:gte, val]
|
|
97
|
-
def filter(peps)
|
|
98
|
-
peps.select {|pep| passes(pep)}
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
def top10_xcorr(pep_groups)
|
|
102
|
-
peptides_by_tens = []
|
|
103
|
-
pep_groups.each do |group|
|
|
104
|
-
arr = group.sort {|a,b| b.xcorr <=> a.xcorr }.slice(0,10)
|
|
105
|
-
peptides_by_tens.push(*arr)
|
|
106
|
-
end
|
|
107
|
-
peptides_by_tens
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
def best_xcorr(pep_groups)
|
|
111
|
-
min_peptides = pep_groups.collect do |group|
|
|
112
|
-
group.max {|a,b| a.xcorr <=> b.xcorr }
|
|
113
|
-
end
|
|
114
|
-
end
|
|
115
|
-
|
|
116
|
-
headers = %w(PepProts ScanChargeBest ScanChargeTop10 ScanBest ScanTop10 SeqChargeBest SeqChargeTop10)
|
|
117
|
-
csv_headers = headers.dup
|
|
118
|
-
csv_headers.unshift "FILENAME"
|
|
119
|
-
|
|
120
|
-
puts csv_headers.join(DELIMITER)
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
files.each_with_index do |file,i|
|
|
124
|
-
|
|
125
|
-
obj = SpecID.new(file)
|
|
126
|
-
obj.peps = obj.pep_prots
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
obj.peps.each do |pep|
|
|
130
|
-
pep.charge = pep.charge.to_i
|
|
131
|
-
pep.xcorr = pep.xcorr.to_f
|
|
132
|
-
pep.deltacn = pep.deltacn.to_f
|
|
133
|
-
end
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
re_prefix = /^#{Regexp.escape(prefixes[i])}/
|
|
137
|
-
prc = proc {|it| it.prots.first.reference =~ re_prefix }
|
|
138
|
-
#(match, nomatch) = obj.classify(:peps, prc)
|
|
139
|
-
(fp, tp) = obj.classify(:peps, prc)
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
(fp_pass, tp_pass) = [fp,tp].map {|v| number_passing(v) }
|
|
143
|
-
|
|
144
|
-
# print to file out
|
|
145
|
-
|
|
146
|
-
tp = headers.map do |head|
|
|
147
|
-
tp_pass[head]
|
|
148
|
-
end
|
|
149
|
-
fp = headers.map do |head|
|
|
150
|
-
fp_pass[head]
|
|
151
|
-
end
|
|
152
|
-
diffs = []
|
|
153
|
-
tp.each_index do |i|
|
|
154
|
-
diffs << (tp[i] - fp[i])
|
|
155
|
-
end
|
|
156
|
-
tp.unshift("TP: " + file)
|
|
157
|
-
fp.unshift("FP: " + file)
|
|
158
|
-
diffs.unshift("DIFF: " + file)
|
|
159
|
-
puts tp.join(DELIMITER)
|
|
160
|
-
puts fp.join(DELIMITER)
|
|
161
|
-
puts diffs.join(DELIMITER)
|
|
162
|
-
|
|
163
|
-
end
|
|
164
|
-
|
data/test/tc_aa_freqs.rb
DELETED
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
require 'test/unit'
|
|
4
|
-
require 'spec_id/aa_freqs'
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class FastaTest < Test::Unit::TestCase
|
|
8
|
-
|
|
9
|
-
def initialize(arg)
|
|
10
|
-
super(arg)
|
|
11
|
-
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
|
12
|
-
@sf = @tfiles + "small.fasta"
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
def test_basic
|
|
16
|
-
obj = SpecID::AAFreqs.new(@sf)
|
|
17
|
-
expect = {:I=>0.0628918621937819, :S=>0.0539719475147049, :D=>0.0526145691939758, :Z=>0.0, :L=>0.102772929998061, :T=>0.0491888048607071, :E=>0.0609527503070261, :O=>0.0, :C=>0.0157714433456144, :K=>0.0471850559110594, :U=>0.0, :Q=>0.0382651412319824, :W=>0.0137030573330748, :A=>0.101997285243359, :M=>0.0294745006786892, :J=>0.0, :G=>0.0811195139292871, :Y=>0.0254670027793937, :X=>0.0, :F=>0.0418201796910348, :R=>0.0546829552065154, :V=>0.0702604873634542, :H=>0.0213302307543145, :B=>0.0, :N=>0.03471010277293, :P=>0.0418201796910348}
|
|
18
|
-
aaf = obj.aafreqs
|
|
19
|
-
expect.each do |k,v|
|
|
20
|
-
assert(aaf.key?(k))
|
|
21
|
-
assert_in_delta(v, aaf[k], 0.00000001, "freqs match up")
|
|
22
|
-
end
|
|
23
|
-
sum = 0.0
|
|
24
|
-
aaf.values.each do |v|
|
|
25
|
-
sum += v
|
|
26
|
-
end
|
|
27
|
-
assert_in_delta(1.0, sum, 0.0000000000001, "all freqs add to 1")
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
def test_probability_of_length_table
|
|
31
|
-
# p SpecID::AAFreqs.probability_of_length_table(0.01, 4)
|
|
32
|
-
assert_equal_arrs_in_delta([0.0, 0.01, 0.0199, 0.029701, 0.0394039900000001], SpecID::AAFreqs.probability_of_length_table(0.01, 4), 0.000000001)
|
|
33
|
-
|
|
34
|
-
assert_equal_arrs_in_delta([0.0, 0.2, 0.36, 0.488, 0.5904], SpecID::AAFreqs.probability_of_length_table(0.2, 4), 0.000000001)
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
def test_actual_and_expected_number
|
|
38
|
-
fobj = Fasta.new.read_file(@sf)
|
|
39
|
-
obj = SpecID::AAFreqs.new
|
|
40
|
-
obj.aafreqs = obj.calculate_frequencies(fobj)
|
|
41
|
-
|
|
42
|
-
peptide_aaseqs = fobj.prots.map do |prot|
|
|
43
|
-
prot.aaseq[0..12]
|
|
44
|
-
end
|
|
45
|
-
assert_equal(50, peptide_aaseqs.size, 'sanity check')
|
|
46
|
-
(ac,ex) = obj.actual_and_expected_number(peptide_aaseqs, :C, 1)
|
|
47
|
-
assert_equal(9, ac)
|
|
48
|
-
assert_in_delta( 9.33530631238985, ex, 0.0000000001)
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
private
|
|
52
|
-
def assert_equal_arrs_in_delta(expect, actual, delta)
|
|
53
|
-
expect.each_with_index do |v,i|
|
|
54
|
-
assert_in_delta(v, actual[i], delta)
|
|
55
|
-
end
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
end
|