mspire 0.4.9 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +27 -17
- data/changelog.txt +31 -62
- data/lib/ms/calc.rb +32 -0
- data/lib/ms/data/interleaved.rb +60 -0
- data/lib/ms/data/lazy_io.rb +73 -0
- data/lib/ms/data/lazy_string.rb +15 -0
- data/lib/ms/data/simple.rb +59 -0
- data/lib/ms/data/transposed.rb +41 -0
- data/lib/ms/data.rb +57 -0
- data/lib/ms/format/format_error.rb +12 -0
- data/lib/ms/spectrum.rb +25 -384
- data/lib/ms/support/binary_search.rb +126 -0
- data/lib/ms.rb +10 -10
- metadata +38 -350
- data/INSTALL +0 -58
- data/README.rdoc +0 -18
- data/Rakefile +0 -330
- data/bin/aafreqs.rb +0 -23
- data/bin/bioworks2excel.rb +0 -14
- data/bin/bioworks_to_pepxml.rb +0 -148
- data/bin/bioworks_to_pepxml_gui.rb +0 -225
- data/bin/fasta_shaker.rb +0 -5
- data/bin/filter_and_validate.rb +0 -5
- data/bin/gi2annot.rb +0 -14
- data/bin/id_class_anal.rb +0 -112
- data/bin/id_precision.rb +0 -172
- data/bin/ms_to_lmat.rb +0 -67
- data/bin/pepproph_filter.rb +0 -16
- data/bin/prob_validate.rb +0 -6
- data/bin/protein_summary.rb +0 -6
- data/bin/protxml2prots_peps.rb +0 -32
- data/bin/raw_to_mzXML.rb +0 -55
- data/bin/run_percolator.rb +0 -122
- data/bin/sqt_group.rb +0 -26
- data/bin/srf_group.rb +0 -27
- data/bin/srf_to_sqt.rb +0 -40
- data/lib/align/chams.rb +0 -78
- data/lib/align.rb +0 -154
- data/lib/archive/targz.rb +0 -94
- data/lib/bsearch.rb +0 -120
- data/lib/core_extensions.rb +0 -16
- data/lib/fasta.rb +0 -626
- data/lib/gi.rb +0 -124
- data/lib/group_by.rb +0 -10
- data/lib/index_by.rb +0 -11
- data/lib/merge_deep.rb +0 -21
- data/lib/ms/converter/mzxml.rb +0 -77
- data/lib/ms/gradient_program.rb +0 -170
- data/lib/ms/msrun.rb +0 -244
- data/lib/ms/msrun_index.rb +0 -108
- data/lib/ms/parser/mzdata/axml.rb +0 -67
- data/lib/ms/parser/mzdata/dom.rb +0 -175
- data/lib/ms/parser/mzdata/libxml.rb +0 -7
- data/lib/ms/parser/mzdata.rb +0 -31
- data/lib/ms/parser/mzxml/axml.rb +0 -70
- data/lib/ms/parser/mzxml/dom.rb +0 -182
- data/lib/ms/parser/mzxml/hpricot.rb +0 -253
- data/lib/ms/parser/mzxml/libxml.rb +0 -19
- data/lib/ms/parser/mzxml/regexp.rb +0 -122
- data/lib/ms/parser/mzxml/rexml.rb +0 -72
- data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
- data/lib/ms/parser/mzxml.rb +0 -282
- data/lib/ms/parser.rb +0 -108
- data/lib/ms/precursor.rb +0 -25
- data/lib/ms/scan.rb +0 -81
- data/lib/mspire.rb +0 -4
- data/lib/pi_zero.rb +0 -244
- data/lib/qvalue.rb +0 -161
- data/lib/roc.rb +0 -187
- data/lib/sample_enzyme.rb +0 -160
- data/lib/scan_i.rb +0 -21
- data/lib/spec_id/aa_freqs.rb +0 -170
- data/lib/spec_id/bioworks.rb +0 -497
- data/lib/spec_id/digestor.rb +0 -138
- data/lib/spec_id/mass.rb +0 -179
- data/lib/spec_id/parser/proph.rb +0 -335
- data/lib/spec_id/precision/filter/cmdline.rb +0 -218
- data/lib/spec_id/precision/filter/interactive.rb +0 -134
- data/lib/spec_id/precision/filter/output.rb +0 -148
- data/lib/spec_id/precision/filter.rb +0 -637
- data/lib/spec_id/precision/output.rb +0 -60
- data/lib/spec_id/precision/prob/cmdline.rb +0 -160
- data/lib/spec_id/precision/prob/output.rb +0 -94
- data/lib/spec_id/precision/prob.rb +0 -249
- data/lib/spec_id/proph/pep_summary.rb +0 -104
- data/lib/spec_id/proph/prot_summary.rb +0 -484
- data/lib/spec_id/proph.rb +0 -4
- data/lib/spec_id/protein_summary.rb +0 -489
- data/lib/spec_id/sequest/params.rb +0 -316
- data/lib/spec_id/sequest/pepxml.rb +0 -1458
- data/lib/spec_id/sequest.rb +0 -33
- data/lib/spec_id/sqt.rb +0 -349
- data/lib/spec_id/srf.rb +0 -973
- data/lib/spec_id.rb +0 -778
- data/lib/spec_id_xml.rb +0 -99
- data/lib/transmem/phobius.rb +0 -147
- data/lib/transmem/toppred.rb +0 -368
- data/lib/transmem.rb +0 -157
- data/lib/validator/aa.rb +0 -48
- data/lib/validator/aa_est.rb +0 -112
- data/lib/validator/background.rb +0 -77
- data/lib/validator/bias.rb +0 -95
- data/lib/validator/cmdline.rb +0 -431
- data/lib/validator/decoy.rb +0 -107
- data/lib/validator/digestion_based.rb +0 -70
- data/lib/validator/probability.rb +0 -51
- data/lib/validator/prot_from_pep.rb +0 -234
- data/lib/validator/q_value.rb +0 -32
- data/lib/validator/transmem.rb +0 -272
- data/lib/validator/true_pos.rb +0 -46
- data/lib/validator.rb +0 -197
- data/lib/xml.rb +0 -38
- data/lib/xml_style_parser.rb +0 -119
- data/lib/xmlparser_wrapper.rb +0 -19
- data/release_notes.txt +0 -2
- data/script/compile_and_plot_smriti_final.rb +0 -97
- data/script/create_little_pepxml.rb +0 -61
- data/script/degenerate_peptides.rb +0 -47
- data/script/estimate_fpr_by_cysteine.rb +0 -226
- data/script/extract_gradient_programs.rb +0 -56
- data/script/find_cysteine_background.rb +0 -137
- data/script/genuine_tps_and_probs.rb +0 -136
- data/script/get_apex_values_rexml.rb +0 -44
- data/script/histogram_probs.rb +0 -61
- data/script/mascot_fix_pepxml.rb +0 -123
- data/script/msvis.rb +0 -42
- data/script/mzXML2timeIndex.rb +0 -25
- data/script/peps_per_bin.rb +0 -67
- data/script/prep_dir.rb +0 -121
- data/script/simple_protein_digestion.rb +0 -27
- data/script/smriti_final_analysis.rb +0 -103
- data/script/sqt_to_meta.rb +0 -24
- data/script/top_hit_per_scan.rb +0 -67
- data/script/toppred_to_yaml.rb +0 -47
- data/script/tpp_installer.rb +0 -249
- data/specs/align_spec.rb +0 -79
- data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
- data/specs/bin/fasta_shaker_spec.rb +0 -259
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
- data/specs/bin/filter_and_validate_spec.rb +0 -180
- data/specs/bin/ms_to_lmat_spec.rb +0 -34
- data/specs/bin/prob_validate_spec.rb +0 -86
- data/specs/bin/protein_summary_spec.rb +0 -14
- data/specs/fasta_spec.rb +0 -354
- data/specs/gi_spec.rb +0 -22
- data/specs/load_bin_path.rb +0 -7
- data/specs/merge_deep_spec.rb +0 -13
- data/specs/ms/gradient_program_spec.rb +0 -77
- data/specs/ms/msrun_spec.rb +0 -498
- data/specs/ms/parser_spec.rb +0 -92
- data/specs/ms/spectrum_spec.rb +0 -87
- data/specs/pi_zero_spec.rb +0 -115
- data/specs/qvalue_spec.rb +0 -39
- data/specs/roc_spec.rb +0 -251
- data/specs/rspec_autotest.rb +0 -149
- data/specs/sample_enzyme_spec.rb +0 -126
- data/specs/spec_helper.rb +0 -135
- data/specs/spec_id/aa_freqs_spec.rb +0 -52
- data/specs/spec_id/bioworks_spec.rb +0 -148
- data/specs/spec_id/digestor_spec.rb +0 -75
- data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
- data/specs/spec_id/precision/filter/output_spec.rb +0 -31
- data/specs/spec_id/precision/filter_spec.rb +0 -246
- data/specs/spec_id/precision/prob_spec.rb +0 -44
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
- data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
- data/specs/spec_id/protein_summary_spec.rb +0 -189
- data/specs/spec_id/sequest/params_spec.rb +0 -68
- data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
- data/specs/spec_id/sequest_spec.rb +0 -38
- data/specs/spec_id/sqt_spec.rb +0 -246
- data/specs/spec_id/srf_spec.rb +0 -172
- data/specs/spec_id/srf_spec_helper.rb +0 -139
- data/specs/spec_id_helper.rb +0 -33
- data/specs/spec_id_spec.rb +0 -366
- data/specs/spec_id_xml_spec.rb +0 -33
- data/specs/transmem/phobius_spec.rb +0 -425
- data/specs/transmem/toppred_spec.rb +0 -298
- data/specs/transmem_spec.rb +0 -60
- data/specs/transmem_spec_shared.rb +0 -64
- data/specs/validator/aa_est_spec.rb +0 -66
- data/specs/validator/aa_spec.rb +0 -40
- data/specs/validator/background_spec.rb +0 -67
- data/specs/validator/bias_spec.rb +0 -122
- data/specs/validator/decoy_spec.rb +0 -51
- data/specs/validator/fasta_helper.rb +0 -26
- data/specs/validator/prot_from_pep_spec.rb +0 -141
- data/specs/validator/transmem_spec.rb +0 -146
- data/specs/validator/true_pos_spec.rb +0 -58
- data/specs/validator_helper.rb +0 -33
- data/specs/xml_spec.rb +0 -12
- data/test_files/000_pepxml18_small.xml +0 -206
- data/test_files/020a.mzXML.timeIndex +0 -4710
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
- data/test_files/4-03-03_small-prot.xml +0 -321
- data/test_files/4-03-03_small.xml +0 -3876
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +0 -5999
- data/test_files/bioworks31.params +0 -77
- data/test_files/bioworks32.params +0 -62
- data/test_files/bioworks33.params +0 -63
- data/test_files/bioworks_single_run_small.xml +0 -7237
- data/test_files/bioworks_small.fasta +0 -212
- data/test_files/bioworks_small.params +0 -63
- data/test_files/bioworks_small.phobius +0 -109
- data/test_files/bioworks_small.toppred.out +0 -2847
- data/test_files/bioworks_small.xml +0 -5610
- data/test_files/bioworks_with_INV_small.xml +0 -3753
- data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +0 -304
- data/test_files/messups.fasta +0 -297
- data/test_files/opd1/000.my_answer.100lines.xml +0 -101
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
- data/test_files/opd1/000_020_3prots-prot.xml +0 -62
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
- data/test_files/opd1/sequest.3.1.params +0 -77
- data/test_files/opd1/sequest.3.2.params +0 -62
- data/test_files/opd1/twenty_scans.mzXML +0 -418
- data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +0 -9
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
- data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
- data/test_files/pepproph_small.xml +0 -4691
- data/test_files/phobius.small.noheader.txt +0 -50
- data/test_files/phobius.small.small.txt +0 -53
- data/test_files/s01_anC1_ld020mM.key.txt +0 -25
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +0 -297
- data/test_files/small.sqt +0 -87
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +0 -14340
- data/test_files/tf_bioworks2excel.txt.actual +0 -1035
- data/test_files/toppred.small.out +0 -416
- data/test_files/toppred.xml.out +0 -318
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
- data/test_files/yeast_gly_small-prot.xml +0 -265
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
- data/test_files/yeast_gly_small.xml +0 -3807
- data/test_files/yeast_gly_small2.parentTimes +0 -6
|
@@ -1,489 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
require 'axml'
|
|
4
|
-
require 'hash_by'
|
|
5
|
-
require 'optparse'
|
|
6
|
-
require 'ostruct'
|
|
7
|
-
require 'spec_id'
|
|
8
|
-
#require 'spec_id/precision' # gone now
|
|
9
|
-
require 'gi'
|
|
10
|
-
|
|
11
|
-
#############################################################
|
|
12
|
-
# GLOBALS:
|
|
13
|
-
PRECISION_PROGRAM_BASE = 'precision'
|
|
14
|
-
DEF_PREFIX = "INV_"
|
|
15
|
-
DEF_PERCENT_FP = "5.0"
|
|
16
|
-
#############################################################
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
# @TODO: add group probability title (showin all group probabilities) for protein prob
|
|
20
|
-
|
|
21
|
-
#class String
|
|
22
|
-
# def margin
|
|
23
|
-
# self.gsub(/^\s*\|/,'')
|
|
24
|
-
# end
|
|
25
|
-
#end
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class ProteinSummary
|
|
29
|
-
module HTML
|
|
30
|
-
def header
|
|
31
|
-
%Q{<html>
|
|
32
|
-
<head
|
|
33
|
-
#{style}
|
|
34
|
-
</head>
|
|
35
|
-
<body>
|
|
36
|
-
<script type="text/javascript">
|
|
37
|
-
<!--
|
|
38
|
-
function toggle_vis(id) {
|
|
39
|
-
var e = document.getElementById(id);
|
|
40
|
-
if(e.style.display == 'none')
|
|
41
|
-
e.style.display = 'block';
|
|
42
|
-
else
|
|
43
|
-
e.style.display = 'none';
|
|
44
|
-
}
|
|
45
|
-
//-->
|
|
46
|
-
</script>
|
|
47
|
-
}
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
def style
|
|
51
|
-
'
|
|
52
|
-
<style type="text/css">
|
|
53
|
-
table {
|
|
54
|
-
border-width:1px;
|
|
55
|
-
border-color:#DDDDDD;
|
|
56
|
-
border-collapse: collapse;
|
|
57
|
-
}
|
|
58
|
-
td,th {
|
|
59
|
-
padding-top: 2px;
|
|
60
|
-
padding-bottom: 2px;
|
|
61
|
-
padding-left: 5;
|
|
62
|
-
padding-right: 5;
|
|
63
|
-
}
|
|
64
|
-
td.redline {
|
|
65
|
-
background-color: #FF0000;
|
|
66
|
-
color: #FFFFFF
|
|
67
|
-
}
|
|
68
|
-
div.file_info, div.software, div.fppr, div.num_proteins{
|
|
69
|
-
margin-left: 20px;
|
|
70
|
-
margin-top: 20px;
|
|
71
|
-
}
|
|
72
|
-
div.main {
|
|
73
|
-
margin-left: 10px;
|
|
74
|
-
margin-right: 10px;
|
|
75
|
-
margin-top: 50px;
|
|
76
|
-
margin-bottom: 50px;
|
|
77
|
-
}
|
|
78
|
-
div#error {
|
|
79
|
-
margin: 30px;
|
|
80
|
-
text-align:center
|
|
81
|
-
}
|
|
82
|
-
hr {color: sienna}
|
|
83
|
-
body { font-size: 8pt; font-family: Arial,Helvetica,Times}
|
|
84
|
-
</style>
|
|
85
|
-
'
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
# an anchor and a title
|
|
89
|
-
def at(display, title)
|
|
90
|
-
"<a title=\"#{title}\">#{display}</a>"
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
def trailer
|
|
94
|
-
%q{
|
|
95
|
-
</body>
|
|
96
|
-
</html>
|
|
97
|
-
}
|
|
98
|
-
end
|
|
99
|
-
|
|
100
|
-
def tr
|
|
101
|
-
"|<tr>
|
|
102
|
-
| #{yield}
|
|
103
|
-
|</tr>\n".margin
|
|
104
|
-
end
|
|
105
|
-
|
|
106
|
-
def table
|
|
107
|
-
"|<div class=\"main\"><table align=\"center\" border=\"1\" style=\"font-size:100%\" width=\"800px\">
|
|
108
|
-
| #{yield}
|
|
109
|
-
|</table></div>\n".margin
|
|
110
|
-
end
|
|
111
|
-
|
|
112
|
-
def tds(arr)
|
|
113
|
-
arr.map {|v| "<td>#{v}</td>"}.join
|
|
114
|
-
end
|
|
115
|
-
|
|
116
|
-
def ths(arr)
|
|
117
|
-
str = arr.map {|v| "<th>#{v}</th>"}.join
|
|
118
|
-
str << "\n"
|
|
119
|
-
end
|
|
120
|
-
end
|
|
121
|
-
|
|
122
|
-
end
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
class ProteinSummary
|
|
126
|
-
|
|
127
|
-
include ProteinSummary::HTML
|
|
128
|
-
|
|
129
|
-
def ref_html(gi, name)
|
|
130
|
-
"<a href=\"http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?db=protein&val=#{gi}\" title=\"#{name}\">#{gi}</a>"
|
|
131
|
-
end
|
|
132
|
-
|
|
133
|
-
# Takes the -prot.xml filename and grabs the png file (if available)
|
|
134
|
-
def error_info(prot_file_name)
|
|
135
|
-
img = prot_file_name.gsub('.xml', '.png')
|
|
136
|
-
img_bn = File.basename(img)
|
|
137
|
-
"<div id=\"error\"><img src=\"#{img_bn}\" alt=\"[ Optional: To view error/sensitivity image, put #{img_bn} in the same directory as #{File.basename(prot_file_name)} ]\"/>\n</div>"
|
|
138
|
-
end
|
|
139
|
-
|
|
140
|
-
# attempts to get the NCBI gi code
|
|
141
|
-
def accession(name)
|
|
142
|
-
if (name.include? '|') && (name[0,3] == 'gi|')
|
|
143
|
-
name.split('|')[1]
|
|
144
|
-
else
|
|
145
|
-
name
|
|
146
|
-
end
|
|
147
|
-
end
|
|
148
|
-
|
|
149
|
-
def flag_to_regex(flag, prefix=false)
|
|
150
|
-
if flag
|
|
151
|
-
if prefix
|
|
152
|
-
/^#{Regexp.escape(flag)}/
|
|
153
|
-
else
|
|
154
|
-
/#{Regexp.escape(flag)}/
|
|
155
|
-
end
|
|
156
|
-
else
|
|
157
|
-
nil
|
|
158
|
-
end
|
|
159
|
-
end
|
|
160
|
-
|
|
161
|
-
# given a list of proteins, output a tab delimited textfile with protein
|
|
162
|
-
# name and the total number of peptides found
|
|
163
|
-
def output_peptide_counts_file(prots, filename)
|
|
164
|
-
File.open(filename, "w") do |fh_out|
|
|
165
|
-
prots.each do |prot|
|
|
166
|
-
fh_out.puts [prot._protein_name, prot._total_number_peptides].join("\t")
|
|
167
|
-
end
|
|
168
|
-
end
|
|
169
|
-
end
|
|
170
|
-
|
|
171
|
-
# filters on the false positive regex and sorts by prot probability
|
|
172
|
-
def filter_and_sort(uniq_prots, flag=nil, prefix=false)
|
|
173
|
-
false_flag_re = flag_to_regex(flag, prefix)
|
|
174
|
-
sorted = uniq_prots.sort_by {|prt| [prt._probability, prt.parent._probability]}.reverse
|
|
175
|
-
## filter on prefix
|
|
176
|
-
if prefix
|
|
177
|
-
sorted = sorted.reject {|prot| prot._protein_name =~ false_flag_re }
|
|
178
|
-
end
|
|
179
|
-
sorted
|
|
180
|
-
end
|
|
181
|
-
|
|
182
|
-
# assumes that these are sorted on probability
|
|
183
|
-
# desired_fppr is a float
|
|
184
|
-
# returns [number_of_prots, actual_fppr]
|
|
185
|
-
def num_prots_above_fppr(prots, desired_fppr)
|
|
186
|
-
current_fppr_rate_percent = 0.0
|
|
187
|
-
previous_fppr_rate_percent = 0.0
|
|
188
|
-
current_sum_one_minus_prob = 0.0
|
|
189
|
-
proteins_within_fppr = 0
|
|
190
|
-
actual_fppr = nil
|
|
191
|
-
already_found = false
|
|
192
|
-
prot_cnt = 0
|
|
193
|
-
prots.each do |prot|
|
|
194
|
-
prot_cnt += 1
|
|
195
|
-
# SUM(1-probX)/#prots
|
|
196
|
-
current_sum_one_minus_prob += 1.0 - prot._probability.to_f
|
|
197
|
-
current_fppr_rate_percent = (current_sum_one_minus_prob / prot_cnt) * 100
|
|
198
|
-
|
|
199
|
-
if current_fppr_rate_percent > desired_fppr && !already_found
|
|
200
|
-
actual_fppr = previous_fppr_rate_percent
|
|
201
|
-
proteins_within_fppr = prot_cnt
|
|
202
|
-
already_found = true
|
|
203
|
-
end
|
|
204
|
-
previous_fppr_rate_percent = current_fppr_rate_percent
|
|
205
|
-
end
|
|
206
|
-
[proteins_within_fppr, actual_fppr]
|
|
207
|
-
end
|
|
208
|
-
|
|
209
|
-
#### #readable_previous_fppr_rate_percent = sprintf("%.2f", previous_fppr_rate_percent)
|
|
210
|
-
|
|
211
|
-
# returns a string of the table rows
|
|
212
|
-
# false_positive_rate (give as a %) is the cutoff mark
|
|
213
|
-
# returns the number of proteins at the desired_fppr (if given)
|
|
214
|
-
def table_rows(uniq_prots, prefix, false_positive_rate_percent, num_cols, desired_fppr, actual_percent_fp, annotations=nil, peptide_count_filename=nil)
|
|
215
|
-
prot_cnt = 0
|
|
216
|
-
an_cnt = 0
|
|
217
|
-
|
|
218
|
-
uniq_prots.map do |prot|
|
|
219
|
-
tr do
|
|
220
|
-
prot_cnt += 1
|
|
221
|
-
gi = accession(prot._protein_name)
|
|
222
|
-
|
|
223
|
-
if annotations
|
|
224
|
-
protein_description = annotations[an_cnt]
|
|
225
|
-
an_cnt += 1
|
|
226
|
-
else
|
|
227
|
-
if prot.annotation.size > 0
|
|
228
|
-
protein_description = prot.annotation.first._protein_description
|
|
229
|
-
else
|
|
230
|
-
protein_description = 'NA'
|
|
231
|
-
end
|
|
232
|
-
end
|
|
233
|
-
tds([prot_cnt, prot._probability, ref_html(gi, prot._protein_name), protein_description, prot._percent_coverage, peptide_cell(prot_cnt, prot._unique_stripped_peptides.split('+')), prot._total_number_peptides, prot._pct_spectrum_ids])
|
|
234
|
-
end
|
|
235
|
-
end.join
|
|
236
|
-
end
|
|
237
|
-
|
|
238
|
-
def print_html_pieces(file, *pieces)
|
|
239
|
-
File.open(file, "w") do |out|
|
|
240
|
-
pieces.each do |piece|
|
|
241
|
-
out.print piece
|
|
242
|
-
end
|
|
243
|
-
end
|
|
244
|
-
end
|
|
245
|
-
|
|
246
|
-
def file_info(file)
|
|
247
|
-
"<div class=\"file_info\"><h3>Source File Information</h3>File: #{File.expand_path(file)}
|
|
248
|
-
<br/>Last Modified: #{File.mtime(file)}
|
|
249
|
-
<br/>Size: #{File.size(file)/1000} KB
|
|
250
|
-
</div>"
|
|
251
|
-
end
|
|
252
|
-
|
|
253
|
-
def bioworks_script_info(obj)
|
|
254
|
-
version = "3.2??"
|
|
255
|
-
if obj.version
|
|
256
|
-
version = obj.version
|
|
257
|
-
end
|
|
258
|
-
script_info{"Bioworks version #{version}"}
|
|
259
|
-
end
|
|
260
|
-
|
|
261
|
-
def protproph_script_info
|
|
262
|
-
begin
|
|
263
|
-
where = `which xinteract`
|
|
264
|
-
reply = `#{where}`
|
|
265
|
-
rescue Exception
|
|
266
|
-
reply = ""
|
|
267
|
-
end
|
|
268
|
-
prophet = "TPP (version unknown)" # put your version here if you can't get it dynamically
|
|
269
|
-
if reply =~ /xinteract.*?\((TPP .*)\)/
|
|
270
|
-
prophet = $1.dup
|
|
271
|
-
end
|
|
272
|
-
script_info { "ProteinProphet from: #{prophet}" }
|
|
273
|
-
end
|
|
274
|
-
|
|
275
|
-
def mspire_version
|
|
276
|
-
string = "mspire"
|
|
277
|
-
begin
|
|
278
|
-
if `gem list --local mspire` =~ /mspire \((.*?)\)/
|
|
279
|
-
string << (" v" + $1)
|
|
280
|
-
end
|
|
281
|
-
rescue Exception
|
|
282
|
-
end
|
|
283
|
-
string
|
|
284
|
-
end
|
|
285
|
-
|
|
286
|
-
def script_info
|
|
287
|
-
"<div class=\"software\"><h3>Software Information</h3>#{yield}<br/>Ruby package: #{mspire_version}<br/>Command: #{[File.basename(__FILE__), *@orig_argv].join(" ")}</div>"
|
|
288
|
-
end
|
|
289
|
-
|
|
290
|
-
def proph_output(file, outfn, opt, fppr_output_as_html)
|
|
291
|
-
header_anchors = [at('#', 'number'), at('prob','protein probability (for Prophet, higher is better)'), at('ref', 'gi number if available (or complete reference)'), at('annotation', 'annotation from the fasta file'), at('%cov', 'percent of protein sequence covered by corresponding peptides'), at('peps', 'unique peptides identified (includes non-contributing peptides). Click number to show/hide'), at('#peps', 'total number of corresponding peptides that contributed to protein probability'), at('%ids', 'fraction of correct dataset peptide identifications corresponding to protein')]
|
|
292
|
-
num_cols = header_anchors.size
|
|
293
|
-
theaders = ths(header_anchors)
|
|
294
|
-
|
|
295
|
-
root = AXML.parse_file(file)
|
|
296
|
-
prots = []
|
|
297
|
-
## find the min_prob at a fppr of XX
|
|
298
|
-
min_prob_redline = 1.01 # if no fppr is less than what they give, then all are redlined!
|
|
299
|
-
|
|
300
|
-
if opt.c
|
|
301
|
-
actual_percent_fp = opt.c.to_f
|
|
302
|
-
elsif opt.cut_at
|
|
303
|
-
actual_percent_fp = opt.cut_at.to_f
|
|
304
|
-
else
|
|
305
|
-
actual_percent_fp = nil
|
|
306
|
-
end
|
|
307
|
-
root.protein_group.each do |group|
|
|
308
|
-
group.protein.each do |prt|
|
|
309
|
-
prots << prt
|
|
310
|
-
end
|
|
311
|
-
end
|
|
312
|
-
uniq_prots = prots.hash_by(:_protein_name).map{|name,prot_arr| prot_arr.first }
|
|
313
|
-
filtered_sorted_prots = filter_and_sort(uniq_prots, opt.f, opt.prefix)
|
|
314
|
-
|
|
315
|
-
## num proteins above cutoff (if opt.c)
|
|
316
|
-
num_prots_html = ''
|
|
317
|
-
if opt.c || opt.cut_at
|
|
318
|
-
(num_prots, actual_fppr) = num_prots_above_fppr(filtered_sorted_prots, actual_percent_fp)
|
|
319
|
-
num_prots_html = num_prots_to_html(actual_percent_fp, actual_fppr, num_prots)
|
|
320
|
-
end
|
|
321
|
-
if opt.cut_at
|
|
322
|
-
filtered_sorted_prots = filtered_sorted_prots[0,num_prots]
|
|
323
|
-
end
|
|
324
|
-
|
|
325
|
-
output_peptide_counts_file(filtered_sorted_prots, opt.peptide_count) if opt.peptide_count
|
|
326
|
-
|
|
327
|
-
# get an array of annotations (or nil if no option)
|
|
328
|
-
annotations =
|
|
329
|
-
if opt.get_annotation
|
|
330
|
-
gis = filtered_sorted_prots.map {|prot| accession(prot._protein_name) }
|
|
331
|
-
GI.gi2annot(gis)
|
|
332
|
-
end
|
|
333
|
-
|
|
334
|
-
table_string = table do
|
|
335
|
-
tr{theaders} + table_rows(filtered_sorted_prots, opt.f, actual_percent_fp, num_cols, opt.c.to_f, actual_percent_fp, annotations, opt.peptide_count)
|
|
336
|
-
end
|
|
337
|
-
er_info = opt.precision ? error_info(file) : ""
|
|
338
|
-
html_pieces = [outfn, header, fppr_output_as_html, er_info, file_info(file), protproph_script_info, num_prots_html, table_string, trailer]
|
|
339
|
-
print_html_pieces(*html_pieces)
|
|
340
|
-
end # proph_output
|
|
341
|
-
|
|
342
|
-
# given a list of peptide sequences creates javascript to hide/show them
|
|
343
|
-
def peptide_cell(prot_num, peptide_sequences)
|
|
344
|
-
"<a href=\"#prot#{prot_num}\" onclick=\"toggle_vis('#{prot_num}');\">#{peptide_sequences.size}</a><div id=\"#{prot_num}\" style=\"display:none;\">#{peptide_sequences.join(', ')}</div>"
|
|
345
|
-
end
|
|
346
|
-
|
|
347
|
-
# takes spec_id object
|
|
348
|
-
# the outfn is the output filename
|
|
349
|
-
# opt is an OpenStruct that holds opt.f = the false prefix
|
|
350
|
-
def bioworks_output(spec_id, outfn, file=nil, false_flag_re=nil, fppr_output_as_html=nil)
|
|
351
|
-
fppr_output_as_html ||= ''
|
|
352
|
-
header_anchors = [at('#', 'number'), at('prob','protein probability (for Bioworks, lower is better)'), at('ref', 'gi number if available (or complete reference)'), at('annotation', 'annotation from the fasta file'), at('%cov', 'percent of protein sequence covered by corresponding peptides'), at('peps', 'unique peptides identified (at any confidence) Click number to show/hide.'), at('#peps', 'total number of peptides seen (not unique)')]
|
|
353
|
-
num_cols = header_anchors.size
|
|
354
|
-
theaders = ths(header_anchors)
|
|
355
|
-
proteins = spec_id.prots
|
|
356
|
-
protein_num = 0
|
|
357
|
-
rows = ""
|
|
358
|
-
proteins.each do |prot|
|
|
359
|
-
if false_flag_re && prot.reference =~ false_flag_re
|
|
360
|
-
next
|
|
361
|
-
end
|
|
362
|
-
uniq_peps = Hash.new {|h,k| h[k] = true; }
|
|
363
|
-
protein_num += 1
|
|
364
|
-
prot.peps.each do |pep|
|
|
365
|
-
uniq_peps[pep.sequence.split('.')[1]] = true
|
|
366
|
-
end
|
|
367
|
-
pieces = prot.reference.split(' ')
|
|
368
|
-
long_prot_name = pieces.shift
|
|
369
|
-
annotation = pieces.join(' ')
|
|
370
|
-
accession = prot.accession
|
|
371
|
-
if accession == '0' ; accession = long_prot_name end
|
|
372
|
-
rows << tr{ tds([protein_num, prot.protein_probability, ref_html(accession, long_prot_name), annotation, prot.coverage, peptide_cell(protein_num, uniq_peps.keys), prot.peps.size]) }
|
|
373
|
-
end
|
|
374
|
-
table_string = table do
|
|
375
|
-
tr{theaders} + rows
|
|
376
|
-
end
|
|
377
|
-
print_html_pieces(outfn, header, fppr_output_as_html, file_info(file), bioworks_script_info(spec_id), table_string, trailer)
|
|
378
|
-
end # bioworks_output
|
|
379
|
-
|
|
380
|
-
def num_prots_to_html(desired_cutoff, actual_cutoff, num_proteins)
|
|
381
|
-
actual_cutoff = sprintf("%.3f", actual_cutoff)
|
|
382
|
-
desired_cutoff = sprintf("%.3f", desired_cutoff)
|
|
383
|
-
"<div class=\"num_proteins\"><h3>False Positive Predictive Rate [ FP/(TP+FP) ]</h3>
|
|
384
|
-
Desired FPPR: #{desired_cutoff} %<br/>
|
|
385
|
-
Actual FPPR: #{actual_cutoff} %<br/>
|
|
386
|
-
Number of Proteins at Actual FPPR: #{num_proteins}
|
|
387
|
-
</div>"
|
|
388
|
-
end
|
|
389
|
-
|
|
390
|
-
# transforms the output string of file_as_decoy into html
|
|
391
|
-
def file_as_decoy_to_html(string)
|
|
392
|
-
lines = string.split("\n")
|
|
393
|
-
#puts lines ?? is this supposed to be commented out?
|
|
394
|
-
lines = lines.reject do |obj| obj =~ /\*{10}/ end
|
|
395
|
-
lines.map! do |line| "#{line}<br/>" end
|
|
396
|
-
"<div class=\"fppr\">
|
|
397
|
-
<h3>Classification Analysis</h3>
|
|
398
|
-
#{lines.join("\n")}
|
|
399
|
-
</div>"
|
|
400
|
-
end
|
|
401
|
-
|
|
402
|
-
# transforms the output string of file_as_decoy into html
|
|
403
|
-
def prefix_as_decoy_to_html(string)
|
|
404
|
-
"<div class=\"fppr\">
|
|
405
|
-
<h3>Classification Analysis</h3>
|
|
406
|
-
</div>" +
|
|
407
|
-
string
|
|
408
|
-
end
|
|
409
|
-
|
|
410
|
-
def create_from_command_line_args(argv)
|
|
411
|
-
@orig_argv = argv.dup
|
|
412
|
-
|
|
413
|
-
opt = OpenStruct.new
|
|
414
|
-
opt.f = DEF_PREFIX
|
|
415
|
-
opts = OptionParser.new do |op|
|
|
416
|
-
op.banner = "usage: #{File.basename(__FILE__)} [options] <file>.xml ..."
|
|
417
|
-
op.separator " where file = bioworks -or- <run>-prot (prophet output)"
|
|
418
|
-
op.separator " outputs: <file>.summary.html"
|
|
419
|
-
op.separator ""
|
|
420
|
-
op.on("-f", "--false <prefix>", "ignore proteins with flag (def: #{DEF_PREFIX})") {|v| opt.f = v }
|
|
421
|
-
op.on("--prefix", "false flag for prefixes only") {|v| opt.prefix = v }
|
|
422
|
-
op.on("-p", "--precision", "include the output from precision.rb") {|v| opt.p = v }
|
|
423
|
-
op.separator(" if --precision then -f is used to specify a file or prefix")
|
|
424
|
-
op.separator(" that indicates the false positives.")
|
|
425
|
-
op.on("--peptide_count <filename>", "outputs text file with # peptides per protein") {|v| opt.peptide_count = v}
|
|
426
|
-
op.separator ""
|
|
427
|
-
op.separator "Options for #{PRECISION_PROGRAM_BASE}.rb :"
|
|
428
|
-
op.on("--#{PRECISION_PROGRAM_BASE}", "include output of #{PRECISION_PROGRAM_BASE}.rb,") {|v| opt.precision = v}
|
|
429
|
-
op.separator(" type '#{PRECISION_PROGRAM_BASE}.rb' for details")
|
|
430
|
-
op.separator ""
|
|
431
|
-
op.separator "specific to ProteinProphet (with no concatenated DB):"
|
|
432
|
-
op.on("-c", "--cutoff percent", "false positive predictive rate (FPPR)% for given cutoff") {|v| opt.c = v }
|
|
433
|
-
op.on("--cut_at percent", "only reports proteins within FPPR %") {|v| opt.cut_at = v }
|
|
434
|
-
op.on("--get_annotation", "retrieves annotation by gi code") {|v| opt.get_annotation = v}
|
|
435
|
-
op.separator " (use if your proteins have gi's but no annotation) "
|
|
436
|
-
end
|
|
437
|
-
|
|
438
|
-
opts.parse!(argv)
|
|
439
|
-
|
|
440
|
-
if argv.size < 1
|
|
441
|
-
puts opts
|
|
442
|
-
return
|
|
443
|
-
end
|
|
444
|
-
|
|
445
|
-
fppr_output_as_html = ''
|
|
446
|
-
files = argv.to_a
|
|
447
|
-
files.each do |file|
|
|
448
|
-
outfn = file.sub(/\.xml$/, '.summary.html')
|
|
449
|
-
outfn = outfn.sub(/\.srg$/, '.summary.html')
|
|
450
|
-
## False Positive Rate Calculation:
|
|
451
|
-
if opt.precision
|
|
452
|
-
opt.o = outfn # won't actually be written over, but used
|
|
453
|
-
to_use_argv = create_precision_argv(file, opt)
|
|
454
|
-
(out_string, opt) = Prec.new.precision(to_use_argv)
|
|
455
|
-
fppr_output_as_html = prefix_as_decoy_to_html(out_string)
|
|
456
|
-
end
|
|
457
|
-
|
|
458
|
-
case SpecID.file_type(file)
|
|
459
|
-
when "protproph"
|
|
460
|
-
#spec_id = SpecID.new(file)
|
|
461
|
-
proph_output(file, outfn, opt, fppr_output_as_html)
|
|
462
|
-
when "bioworks"
|
|
463
|
-
spec_id = SpecID.new(file)
|
|
464
|
-
|
|
465
|
-
false_regex = flag_to_regex(opt.f, opt.prefix)
|
|
466
|
-
bioworks_output(spec_id, outfn, file, false_regex, fppr_output_as_html)
|
|
467
|
-
else
|
|
468
|
-
abort "filetype for #{file} not recognized!"
|
|
469
|
-
end
|
|
470
|
-
end
|
|
471
|
-
|
|
472
|
-
end # method create_from_command_line
|
|
473
|
-
|
|
474
|
-
def create_precision_argv(file, opt)
|
|
475
|
-
# include only those options specific
|
|
476
|
-
new_argv = [file]
|
|
477
|
-
if opt.prefix ; new_argv << '--prefix' end
|
|
478
|
-
if opt.f ; new_argv << '-f' << opt.f end
|
|
479
|
-
if opt.o ; new_argv << '-o' << opt.o end
|
|
480
|
-
new_argv
|
|
481
|
-
end
|
|
482
|
-
|
|
483
|
-
end # ProteinSummary
|
|
484
|
-
|
|
485
|
-
##################################################################
|
|
486
|
-
# MAIN
|
|
487
|
-
##################################################################
|
|
488
|
-
|
|
489
|
-
|