mspire 0.4.9 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,489 +0,0 @@
1
-
2
-
3
- require 'axml'
4
- require 'hash_by'
5
- require 'optparse'
6
- require 'ostruct'
7
- require 'spec_id'
8
- #require 'spec_id/precision' # gone now
9
- require 'gi'
10
-
11
- #############################################################
12
- # GLOBALS:
13
- PRECISION_PROGRAM_BASE = 'precision'
14
- DEF_PREFIX = "INV_"
15
- DEF_PERCENT_FP = "5.0"
16
- #############################################################
17
-
18
-
19
- # @TODO: add group probability title (showin all group probabilities) for protein prob
20
-
21
- #class String
22
- # def margin
23
- # self.gsub(/^\s*\|/,'')
24
- # end
25
- #end
26
-
27
-
28
- class ProteinSummary
29
- module HTML
30
- def header
31
- %Q{<html>
32
- <head
33
- #{style}
34
- </head>
35
- <body>
36
- <script type="text/javascript">
37
- <!--
38
- function toggle_vis(id) {
39
- var e = document.getElementById(id);
40
- if(e.style.display == 'none')
41
- e.style.display = 'block';
42
- else
43
- e.style.display = 'none';
44
- }
45
- //-->
46
- </script>
47
- }
48
- end
49
-
50
- def style
51
- '
52
- <style type="text/css">
53
- table {
54
- border-width:1px;
55
- border-color:#DDDDDD;
56
- border-collapse: collapse;
57
- }
58
- td,th {
59
- padding-top: 2px;
60
- padding-bottom: 2px;
61
- padding-left: 5;
62
- padding-right: 5;
63
- }
64
- td.redline {
65
- background-color: #FF0000;
66
- color: #FFFFFF
67
- }
68
- div.file_info, div.software, div.fppr, div.num_proteins{
69
- margin-left: 20px;
70
- margin-top: 20px;
71
- }
72
- div.main {
73
- margin-left: 10px;
74
- margin-right: 10px;
75
- margin-top: 50px;
76
- margin-bottom: 50px;
77
- }
78
- div#error {
79
- margin: 30px;
80
- text-align:center
81
- }
82
- hr {color: sienna}
83
- body { font-size: 8pt; font-family: Arial,Helvetica,Times}
84
- </style>
85
- '
86
- end
87
-
88
- # an anchor and a title
89
- def at(display, title)
90
- "<a title=\"#{title}\">#{display}</a>"
91
- end
92
-
93
- def trailer
94
- %q{
95
- </body>
96
- </html>
97
- }
98
- end
99
-
100
- def tr
101
- "|<tr>
102
- | #{yield}
103
- |</tr>\n".margin
104
- end
105
-
106
- def table
107
- "|<div class=\"main\"><table align=\"center\" border=\"1\" style=\"font-size:100%\" width=\"800px\">
108
- | #{yield}
109
- |</table></div>\n".margin
110
- end
111
-
112
- def tds(arr)
113
- arr.map {|v| "<td>#{v}</td>"}.join
114
- end
115
-
116
- def ths(arr)
117
- str = arr.map {|v| "<th>#{v}</th>"}.join
118
- str << "\n"
119
- end
120
- end
121
-
122
- end
123
-
124
-
125
- class ProteinSummary
126
-
127
- include ProteinSummary::HTML
128
-
129
- def ref_html(gi, name)
130
- "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?db=protein&val=#{gi}\" title=\"#{name}\">#{gi}</a>"
131
- end
132
-
133
- # Takes the -prot.xml filename and grabs the png file (if available)
134
- def error_info(prot_file_name)
135
- img = prot_file_name.gsub('.xml', '.png')
136
- img_bn = File.basename(img)
137
- "<div id=\"error\"><img src=\"#{img_bn}\" alt=\"[ Optional: To view error/sensitivity image, put #{img_bn} in the same directory as #{File.basename(prot_file_name)} ]\"/>\n</div>"
138
- end
139
-
140
- # attempts to get the NCBI gi code
141
- def accession(name)
142
- if (name.include? '|') && (name[0,3] == 'gi|')
143
- name.split('|')[1]
144
- else
145
- name
146
- end
147
- end
148
-
149
- def flag_to_regex(flag, prefix=false)
150
- if flag
151
- if prefix
152
- /^#{Regexp.escape(flag)}/
153
- else
154
- /#{Regexp.escape(flag)}/
155
- end
156
- else
157
- nil
158
- end
159
- end
160
-
161
- # given a list of proteins, output a tab delimited textfile with protein
162
- # name and the total number of peptides found
163
- def output_peptide_counts_file(prots, filename)
164
- File.open(filename, "w") do |fh_out|
165
- prots.each do |prot|
166
- fh_out.puts [prot._protein_name, prot._total_number_peptides].join("\t")
167
- end
168
- end
169
- end
170
-
171
- # filters on the false positive regex and sorts by prot probability
172
- def filter_and_sort(uniq_prots, flag=nil, prefix=false)
173
- false_flag_re = flag_to_regex(flag, prefix)
174
- sorted = uniq_prots.sort_by {|prt| [prt._probability, prt.parent._probability]}.reverse
175
- ## filter on prefix
176
- if prefix
177
- sorted = sorted.reject {|prot| prot._protein_name =~ false_flag_re }
178
- end
179
- sorted
180
- end
181
-
182
- # assumes that these are sorted on probability
183
- # desired_fppr is a float
184
- # returns [number_of_prots, actual_fppr]
185
- def num_prots_above_fppr(prots, desired_fppr)
186
- current_fppr_rate_percent = 0.0
187
- previous_fppr_rate_percent = 0.0
188
- current_sum_one_minus_prob = 0.0
189
- proteins_within_fppr = 0
190
- actual_fppr = nil
191
- already_found = false
192
- prot_cnt = 0
193
- prots.each do |prot|
194
- prot_cnt += 1
195
- # SUM(1-probX)/#prots
196
- current_sum_one_minus_prob += 1.0 - prot._probability.to_f
197
- current_fppr_rate_percent = (current_sum_one_minus_prob / prot_cnt) * 100
198
-
199
- if current_fppr_rate_percent > desired_fppr && !already_found
200
- actual_fppr = previous_fppr_rate_percent
201
- proteins_within_fppr = prot_cnt
202
- already_found = true
203
- end
204
- previous_fppr_rate_percent = current_fppr_rate_percent
205
- end
206
- [proteins_within_fppr, actual_fppr]
207
- end
208
-
209
- #### #readable_previous_fppr_rate_percent = sprintf("%.2f", previous_fppr_rate_percent)
210
-
211
- # returns a string of the table rows
212
- # false_positive_rate (give as a %) is the cutoff mark
213
- # returns the number of proteins at the desired_fppr (if given)
214
- def table_rows(uniq_prots, prefix, false_positive_rate_percent, num_cols, desired_fppr, actual_percent_fp, annotations=nil, peptide_count_filename=nil)
215
- prot_cnt = 0
216
- an_cnt = 0
217
-
218
- uniq_prots.map do |prot|
219
- tr do
220
- prot_cnt += 1
221
- gi = accession(prot._protein_name)
222
-
223
- if annotations
224
- protein_description = annotations[an_cnt]
225
- an_cnt += 1
226
- else
227
- if prot.annotation.size > 0
228
- protein_description = prot.annotation.first._protein_description
229
- else
230
- protein_description = 'NA'
231
- end
232
- end
233
- tds([prot_cnt, prot._probability, ref_html(gi, prot._protein_name), protein_description, prot._percent_coverage, peptide_cell(prot_cnt, prot._unique_stripped_peptides.split('+')), prot._total_number_peptides, prot._pct_spectrum_ids])
234
- end
235
- end.join
236
- end
237
-
238
- def print_html_pieces(file, *pieces)
239
- File.open(file, "w") do |out|
240
- pieces.each do |piece|
241
- out.print piece
242
- end
243
- end
244
- end
245
-
246
- def file_info(file)
247
- "<div class=\"file_info\"><h3>Source File Information</h3>File: #{File.expand_path(file)}
248
- <br/>Last Modified: #{File.mtime(file)}
249
- <br/>Size: #{File.size(file)/1000} KB
250
- </div>"
251
- end
252
-
253
- def bioworks_script_info(obj)
254
- version = "3.2??"
255
- if obj.version
256
- version = obj.version
257
- end
258
- script_info{"Bioworks version #{version}"}
259
- end
260
-
261
- def protproph_script_info
262
- begin
263
- where = `which xinteract`
264
- reply = `#{where}`
265
- rescue Exception
266
- reply = ""
267
- end
268
- prophet = "TPP (version unknown)" # put your version here if you can't get it dynamically
269
- if reply =~ /xinteract.*?\((TPP .*)\)/
270
- prophet = $1.dup
271
- end
272
- script_info { "ProteinProphet from: #{prophet}" }
273
- end
274
-
275
- def mspire_version
276
- string = "mspire"
277
- begin
278
- if `gem list --local mspire` =~ /mspire \((.*?)\)/
279
- string << (" v" + $1)
280
- end
281
- rescue Exception
282
- end
283
- string
284
- end
285
-
286
- def script_info
287
- "<div class=\"software\"><h3>Software Information</h3>#{yield}<br/>Ruby package: #{mspire_version}<br/>Command: #{[File.basename(__FILE__), *@orig_argv].join(" ")}</div>"
288
- end
289
-
290
- def proph_output(file, outfn, opt, fppr_output_as_html)
291
- header_anchors = [at('#', 'number'), at('prob','protein probability (for Prophet, higher is better)'), at('ref', 'gi number if available (or complete reference)'), at('annotation', 'annotation from the fasta file'), at('%cov', 'percent of protein sequence covered by corresponding peptides'), at('peps', 'unique peptides identified (includes non-contributing peptides). Click number to show/hide'), at('#peps', 'total number of corresponding peptides that contributed to protein probability'), at('%ids', 'fraction of correct dataset peptide identifications corresponding to protein')]
292
- num_cols = header_anchors.size
293
- theaders = ths(header_anchors)
294
-
295
- root = AXML.parse_file(file)
296
- prots = []
297
- ## find the min_prob at a fppr of XX
298
- min_prob_redline = 1.01 # if no fppr is less than what they give, then all are redlined!
299
-
300
- if opt.c
301
- actual_percent_fp = opt.c.to_f
302
- elsif opt.cut_at
303
- actual_percent_fp = opt.cut_at.to_f
304
- else
305
- actual_percent_fp = nil
306
- end
307
- root.protein_group.each do |group|
308
- group.protein.each do |prt|
309
- prots << prt
310
- end
311
- end
312
- uniq_prots = prots.hash_by(:_protein_name).map{|name,prot_arr| prot_arr.first }
313
- filtered_sorted_prots = filter_and_sort(uniq_prots, opt.f, opt.prefix)
314
-
315
- ## num proteins above cutoff (if opt.c)
316
- num_prots_html = ''
317
- if opt.c || opt.cut_at
318
- (num_prots, actual_fppr) = num_prots_above_fppr(filtered_sorted_prots, actual_percent_fp)
319
- num_prots_html = num_prots_to_html(actual_percent_fp, actual_fppr, num_prots)
320
- end
321
- if opt.cut_at
322
- filtered_sorted_prots = filtered_sorted_prots[0,num_prots]
323
- end
324
-
325
- output_peptide_counts_file(filtered_sorted_prots, opt.peptide_count) if opt.peptide_count
326
-
327
- # get an array of annotations (or nil if no option)
328
- annotations =
329
- if opt.get_annotation
330
- gis = filtered_sorted_prots.map {|prot| accession(prot._protein_name) }
331
- GI.gi2annot(gis)
332
- end
333
-
334
- table_string = table do
335
- tr{theaders} + table_rows(filtered_sorted_prots, opt.f, actual_percent_fp, num_cols, opt.c.to_f, actual_percent_fp, annotations, opt.peptide_count)
336
- end
337
- er_info = opt.precision ? error_info(file) : ""
338
- html_pieces = [outfn, header, fppr_output_as_html, er_info, file_info(file), protproph_script_info, num_prots_html, table_string, trailer]
339
- print_html_pieces(*html_pieces)
340
- end # proph_output
341
-
342
- # given a list of peptide sequences creates javascript to hide/show them
343
- def peptide_cell(prot_num, peptide_sequences)
344
- "<a href=\"#prot#{prot_num}\" onclick=\"toggle_vis('#{prot_num}');\">#{peptide_sequences.size}</a><div id=\"#{prot_num}\" style=\"display:none;\">#{peptide_sequences.join(', ')}</div>"
345
- end
346
-
347
- # takes spec_id object
348
- # the outfn is the output filename
349
- # opt is an OpenStruct that holds opt.f = the false prefix
350
- def bioworks_output(spec_id, outfn, file=nil, false_flag_re=nil, fppr_output_as_html=nil)
351
- fppr_output_as_html ||= ''
352
- header_anchors = [at('#', 'number'), at('prob','protein probability (for Bioworks, lower is better)'), at('ref', 'gi number if available (or complete reference)'), at('annotation', 'annotation from the fasta file'), at('%cov', 'percent of protein sequence covered by corresponding peptides'), at('peps', 'unique peptides identified (at any confidence) Click number to show/hide.'), at('#peps', 'total number of peptides seen (not unique)')]
353
- num_cols = header_anchors.size
354
- theaders = ths(header_anchors)
355
- proteins = spec_id.prots
356
- protein_num = 0
357
- rows = ""
358
- proteins.each do |prot|
359
- if false_flag_re && prot.reference =~ false_flag_re
360
- next
361
- end
362
- uniq_peps = Hash.new {|h,k| h[k] = true; }
363
- protein_num += 1
364
- prot.peps.each do |pep|
365
- uniq_peps[pep.sequence.split('.')[1]] = true
366
- end
367
- pieces = prot.reference.split(' ')
368
- long_prot_name = pieces.shift
369
- annotation = pieces.join(' ')
370
- accession = prot.accession
371
- if accession == '0' ; accession = long_prot_name end
372
- rows << tr{ tds([protein_num, prot.protein_probability, ref_html(accession, long_prot_name), annotation, prot.coverage, peptide_cell(protein_num, uniq_peps.keys), prot.peps.size]) }
373
- end
374
- table_string = table do
375
- tr{theaders} + rows
376
- end
377
- print_html_pieces(outfn, header, fppr_output_as_html, file_info(file), bioworks_script_info(spec_id), table_string, trailer)
378
- end # bioworks_output
379
-
380
- def num_prots_to_html(desired_cutoff, actual_cutoff, num_proteins)
381
- actual_cutoff = sprintf("%.3f", actual_cutoff)
382
- desired_cutoff = sprintf("%.3f", desired_cutoff)
383
- "<div class=\"num_proteins\"><h3>False Positive Predictive Rate [ FP/(TP+FP) ]</h3>
384
- Desired FPPR: #{desired_cutoff} %<br/>
385
- Actual FPPR: #{actual_cutoff} %<br/>
386
- Number of Proteins at Actual FPPR: #{num_proteins}
387
- </div>"
388
- end
389
-
390
- # transforms the output string of file_as_decoy into html
391
- def file_as_decoy_to_html(string)
392
- lines = string.split("\n")
393
- #puts lines ?? is this supposed to be commented out?
394
- lines = lines.reject do |obj| obj =~ /\*{10}/ end
395
- lines.map! do |line| "#{line}<br/>" end
396
- "<div class=\"fppr\">
397
- <h3>Classification Analysis</h3>
398
- #{lines.join("\n")}
399
- </div>"
400
- end
401
-
402
- # transforms the output string of file_as_decoy into html
403
- def prefix_as_decoy_to_html(string)
404
- "<div class=\"fppr\">
405
- <h3>Classification Analysis</h3>
406
- </div>" +
407
- string
408
- end
409
-
410
- def create_from_command_line_args(argv)
411
- @orig_argv = argv.dup
412
-
413
- opt = OpenStruct.new
414
- opt.f = DEF_PREFIX
415
- opts = OptionParser.new do |op|
416
- op.banner = "usage: #{File.basename(__FILE__)} [options] <file>.xml ..."
417
- op.separator " where file = bioworks -or- <run>-prot (prophet output)"
418
- op.separator " outputs: <file>.summary.html"
419
- op.separator ""
420
- op.on("-f", "--false <prefix>", "ignore proteins with flag (def: #{DEF_PREFIX})") {|v| opt.f = v }
421
- op.on("--prefix", "false flag for prefixes only") {|v| opt.prefix = v }
422
- op.on("-p", "--precision", "include the output from precision.rb") {|v| opt.p = v }
423
- op.separator(" if --precision then -f is used to specify a file or prefix")
424
- op.separator(" that indicates the false positives.")
425
- op.on("--peptide_count <filename>", "outputs text file with # peptides per protein") {|v| opt.peptide_count = v}
426
- op.separator ""
427
- op.separator "Options for #{PRECISION_PROGRAM_BASE}.rb :"
428
- op.on("--#{PRECISION_PROGRAM_BASE}", "include output of #{PRECISION_PROGRAM_BASE}.rb,") {|v| opt.precision = v}
429
- op.separator(" type '#{PRECISION_PROGRAM_BASE}.rb' for details")
430
- op.separator ""
431
- op.separator "specific to ProteinProphet (with no concatenated DB):"
432
- op.on("-c", "--cutoff percent", "false positive predictive rate (FPPR)% for given cutoff") {|v| opt.c = v }
433
- op.on("--cut_at percent", "only reports proteins within FPPR %") {|v| opt.cut_at = v }
434
- op.on("--get_annotation", "retrieves annotation by gi code") {|v| opt.get_annotation = v}
435
- op.separator " (use if your proteins have gi's but no annotation) "
436
- end
437
-
438
- opts.parse!(argv)
439
-
440
- if argv.size < 1
441
- puts opts
442
- return
443
- end
444
-
445
- fppr_output_as_html = ''
446
- files = argv.to_a
447
- files.each do |file|
448
- outfn = file.sub(/\.xml$/, '.summary.html')
449
- outfn = outfn.sub(/\.srg$/, '.summary.html')
450
- ## False Positive Rate Calculation:
451
- if opt.precision
452
- opt.o = outfn # won't actually be written over, but used
453
- to_use_argv = create_precision_argv(file, opt)
454
- (out_string, opt) = Prec.new.precision(to_use_argv)
455
- fppr_output_as_html = prefix_as_decoy_to_html(out_string)
456
- end
457
-
458
- case SpecID.file_type(file)
459
- when "protproph"
460
- #spec_id = SpecID.new(file)
461
- proph_output(file, outfn, opt, fppr_output_as_html)
462
- when "bioworks"
463
- spec_id = SpecID.new(file)
464
-
465
- false_regex = flag_to_regex(opt.f, opt.prefix)
466
- bioworks_output(spec_id, outfn, file, false_regex, fppr_output_as_html)
467
- else
468
- abort "filetype for #{file} not recognized!"
469
- end
470
- end
471
-
472
- end # method create_from_command_line
473
-
474
- def create_precision_argv(file, opt)
475
- # include only those options specific
476
- new_argv = [file]
477
- if opt.prefix ; new_argv << '--prefix' end
478
- if opt.f ; new_argv << '-f' << opt.f end
479
- if opt.o ; new_argv << '-o' << opt.o end
480
- new_argv
481
- end
482
-
483
- end # ProteinSummary
484
-
485
- ##################################################################
486
- # MAIN
487
- ##################################################################
488
-
489
-