mspire 0.4.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,489 +0,0 @@
1
-
2
-
3
- require 'axml'
4
- require 'hash_by'
5
- require 'optparse'
6
- require 'ostruct'
7
- require 'spec_id'
8
- #require 'spec_id/precision' # gone now
9
- require 'gi'
10
-
11
- #############################################################
12
- # GLOBALS:
13
- PRECISION_PROGRAM_BASE = 'precision'
14
- DEF_PREFIX = "INV_"
15
- DEF_PERCENT_FP = "5.0"
16
- #############################################################
17
-
18
-
19
- # @TODO: add group probability title (showin all group probabilities) for protein prob
20
-
21
- #class String
22
- # def margin
23
- # self.gsub(/^\s*\|/,'')
24
- # end
25
- #end
26
-
27
-
28
- class ProteinSummary
29
- module HTML
30
- def header
31
- %Q{<html>
32
- <head
33
- #{style}
34
- </head>
35
- <body>
36
- <script type="text/javascript">
37
- <!--
38
- function toggle_vis(id) {
39
- var e = document.getElementById(id);
40
- if(e.style.display == 'none')
41
- e.style.display = 'block';
42
- else
43
- e.style.display = 'none';
44
- }
45
- //-->
46
- </script>
47
- }
48
- end
49
-
50
- def style
51
- '
52
- <style type="text/css">
53
- table {
54
- border-width:1px;
55
- border-color:#DDDDDD;
56
- border-collapse: collapse;
57
- }
58
- td,th {
59
- padding-top: 2px;
60
- padding-bottom: 2px;
61
- padding-left: 5;
62
- padding-right: 5;
63
- }
64
- td.redline {
65
- background-color: #FF0000;
66
- color: #FFFFFF
67
- }
68
- div.file_info, div.software, div.fppr, div.num_proteins{
69
- margin-left: 20px;
70
- margin-top: 20px;
71
- }
72
- div.main {
73
- margin-left: 10px;
74
- margin-right: 10px;
75
- margin-top: 50px;
76
- margin-bottom: 50px;
77
- }
78
- div#error {
79
- margin: 30px;
80
- text-align:center
81
- }
82
- hr {color: sienna}
83
- body { font-size: 8pt; font-family: Arial,Helvetica,Times}
84
- </style>
85
- '
86
- end
87
-
88
- # an anchor and a title
89
- def at(display, title)
90
- "<a title=\"#{title}\">#{display}</a>"
91
- end
92
-
93
- def trailer
94
- %q{
95
- </body>
96
- </html>
97
- }
98
- end
99
-
100
- def tr
101
- "|<tr>
102
- | #{yield}
103
- |</tr>\n".margin
104
- end
105
-
106
- def table
107
- "|<div class=\"main\"><table align=\"center\" border=\"1\" style=\"font-size:100%\" width=\"800px\">
108
- | #{yield}
109
- |</table></div>\n".margin
110
- end
111
-
112
- def tds(arr)
113
- arr.map {|v| "<td>#{v}</td>"}.join
114
- end
115
-
116
- def ths(arr)
117
- str = arr.map {|v| "<th>#{v}</th>"}.join
118
- str << "\n"
119
- end
120
- end
121
-
122
- end
123
-
124
-
125
- class ProteinSummary
126
-
127
- include ProteinSummary::HTML
128
-
129
- def ref_html(gi, name)
130
- "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?db=protein&val=#{gi}\" title=\"#{name}\">#{gi}</a>"
131
- end
132
-
133
- # Takes the -prot.xml filename and grabs the png file (if available)
134
- def error_info(prot_file_name)
135
- img = prot_file_name.gsub('.xml', '.png')
136
- img_bn = File.basename(img)
137
- "<div id=\"error\"><img src=\"#{img_bn}\" alt=\"[ Optional: To view error/sensitivity image, put #{img_bn} in the same directory as #{File.basename(prot_file_name)} ]\"/>\n</div>"
138
- end
139
-
140
- # attempts to get the NCBI gi code
141
- def accession(name)
142
- if (name.include? '|') && (name[0,3] == 'gi|')
143
- name.split('|')[1]
144
- else
145
- name
146
- end
147
- end
148
-
149
- def flag_to_regex(flag, prefix=false)
150
- if flag
151
- if prefix
152
- /^#{Regexp.escape(flag)}/
153
- else
154
- /#{Regexp.escape(flag)}/
155
- end
156
- else
157
- nil
158
- end
159
- end
160
-
161
- # given a list of proteins, output a tab delimited textfile with protein
162
- # name and the total number of peptides found
163
- def output_peptide_counts_file(prots, filename)
164
- File.open(filename, "w") do |fh_out|
165
- prots.each do |prot|
166
- fh_out.puts [prot._protein_name, prot._total_number_peptides].join("\t")
167
- end
168
- end
169
- end
170
-
171
- # filters on the false positive regex and sorts by prot probability
172
- def filter_and_sort(uniq_prots, flag=nil, prefix=false)
173
- false_flag_re = flag_to_regex(flag, prefix)
174
- sorted = uniq_prots.sort_by {|prt| [prt._probability, prt.parent._probability]}.reverse
175
- ## filter on prefix
176
- if prefix
177
- sorted = sorted.reject {|prot| prot._protein_name =~ false_flag_re }
178
- end
179
- sorted
180
- end
181
-
182
- # assumes that these are sorted on probability
183
- # desired_fppr is a float
184
- # returns [number_of_prots, actual_fppr]
185
- def num_prots_above_fppr(prots, desired_fppr)
186
- current_fppr_rate_percent = 0.0
187
- previous_fppr_rate_percent = 0.0
188
- current_sum_one_minus_prob = 0.0
189
- proteins_within_fppr = 0
190
- actual_fppr = nil
191
- already_found = false
192
- prot_cnt = 0
193
- prots.each do |prot|
194
- prot_cnt += 1
195
- # SUM(1-probX)/#prots
196
- current_sum_one_minus_prob += 1.0 - prot._probability.to_f
197
- current_fppr_rate_percent = (current_sum_one_minus_prob / prot_cnt) * 100
198
-
199
- if current_fppr_rate_percent > desired_fppr && !already_found
200
- actual_fppr = previous_fppr_rate_percent
201
- proteins_within_fppr = prot_cnt
202
- already_found = true
203
- end
204
- previous_fppr_rate_percent = current_fppr_rate_percent
205
- end
206
- [proteins_within_fppr, actual_fppr]
207
- end
208
-
209
- #### #readable_previous_fppr_rate_percent = sprintf("%.2f", previous_fppr_rate_percent)
210
-
211
- # returns a string of the table rows
212
- # false_positive_rate (give as a %) is the cutoff mark
213
- # returns the number of proteins at the desired_fppr (if given)
214
- def table_rows(uniq_prots, prefix, false_positive_rate_percent, num_cols, desired_fppr, actual_percent_fp, annotations=nil, peptide_count_filename=nil)
215
- prot_cnt = 0
216
- an_cnt = 0
217
-
218
- uniq_prots.map do |prot|
219
- tr do
220
- prot_cnt += 1
221
- gi = accession(prot._protein_name)
222
-
223
- if annotations
224
- protein_description = annotations[an_cnt]
225
- an_cnt += 1
226
- else
227
- if prot.annotation.size > 0
228
- protein_description = prot.annotation.first._protein_description
229
- else
230
- protein_description = 'NA'
231
- end
232
- end
233
- tds([prot_cnt, prot._probability, ref_html(gi, prot._protein_name), protein_description, prot._percent_coverage, peptide_cell(prot_cnt, prot._unique_stripped_peptides.split('+')), prot._total_number_peptides, prot._pct_spectrum_ids])
234
- end
235
- end.join
236
- end
237
-
238
- def print_html_pieces(file, *pieces)
239
- File.open(file, "w") do |out|
240
- pieces.each do |piece|
241
- out.print piece
242
- end
243
- end
244
- end
245
-
246
- def file_info(file)
247
- "<div class=\"file_info\"><h3>Source File Information</h3>File: #{File.expand_path(file)}
248
- <br/>Last Modified: #{File.mtime(file)}
249
- <br/>Size: #{File.size(file)/1000} KB
250
- </div>"
251
- end
252
-
253
- def bioworks_script_info(obj)
254
- version = "3.2??"
255
- if obj.version
256
- version = obj.version
257
- end
258
- script_info{"Bioworks version #{version}"}
259
- end
260
-
261
- def protproph_script_info
262
- begin
263
- where = `which xinteract`
264
- reply = `#{where}`
265
- rescue Exception
266
- reply = ""
267
- end
268
- prophet = "TPP (version unknown)" # put your version here if you can't get it dynamically
269
- if reply =~ /xinteract.*?\((TPP .*)\)/
270
- prophet = $1.dup
271
- end
272
- script_info { "ProteinProphet from: #{prophet}" }
273
- end
274
-
275
- def mspire_version
276
- string = "mspire"
277
- begin
278
- if `gem list --local mspire` =~ /mspire \((.*?)\)/
279
- string << (" v" + $1)
280
- end
281
- rescue Exception
282
- end
283
- string
284
- end
285
-
286
- def script_info
287
- "<div class=\"software\"><h3>Software Information</h3>#{yield}<br/>Ruby package: #{mspire_version}<br/>Command: #{[File.basename(__FILE__), *@orig_argv].join(" ")}</div>"
288
- end
289
-
290
- def proph_output(file, outfn, opt, fppr_output_as_html)
291
- header_anchors = [at('#', 'number'), at('prob','protein probability (for Prophet, higher is better)'), at('ref', 'gi number if available (or complete reference)'), at('annotation', 'annotation from the fasta file'), at('%cov', 'percent of protein sequence covered by corresponding peptides'), at('peps', 'unique peptides identified (includes non-contributing peptides). Click number to show/hide'), at('#peps', 'total number of corresponding peptides that contributed to protein probability'), at('%ids', 'fraction of correct dataset peptide identifications corresponding to protein')]
292
- num_cols = header_anchors.size
293
- theaders = ths(header_anchors)
294
-
295
- root = AXML.parse_file(file)
296
- prots = []
297
- ## find the min_prob at a fppr of XX
298
- min_prob_redline = 1.01 # if no fppr is less than what they give, then all are redlined!
299
-
300
- if opt.c
301
- actual_percent_fp = opt.c.to_f
302
- elsif opt.cut_at
303
- actual_percent_fp = opt.cut_at.to_f
304
- else
305
- actual_percent_fp = nil
306
- end
307
- root.protein_group.each do |group|
308
- group.protein.each do |prt|
309
- prots << prt
310
- end
311
- end
312
- uniq_prots = prots.hash_by(:_protein_name).map{|name,prot_arr| prot_arr.first }
313
- filtered_sorted_prots = filter_and_sort(uniq_prots, opt.f, opt.prefix)
314
-
315
- ## num proteins above cutoff (if opt.c)
316
- num_prots_html = ''
317
- if opt.c || opt.cut_at
318
- (num_prots, actual_fppr) = num_prots_above_fppr(filtered_sorted_prots, actual_percent_fp)
319
- num_prots_html = num_prots_to_html(actual_percent_fp, actual_fppr, num_prots)
320
- end
321
- if opt.cut_at
322
- filtered_sorted_prots = filtered_sorted_prots[0,num_prots]
323
- end
324
-
325
- output_peptide_counts_file(filtered_sorted_prots, opt.peptide_count) if opt.peptide_count
326
-
327
- # get an array of annotations (or nil if no option)
328
- annotations =
329
- if opt.get_annotation
330
- gis = filtered_sorted_prots.map {|prot| accession(prot._protein_name) }
331
- GI.gi2annot(gis)
332
- end
333
-
334
- table_string = table do
335
- tr{theaders} + table_rows(filtered_sorted_prots, opt.f, actual_percent_fp, num_cols, opt.c.to_f, actual_percent_fp, annotations, opt.peptide_count)
336
- end
337
- er_info = opt.precision ? error_info(file) : ""
338
- html_pieces = [outfn, header, fppr_output_as_html, er_info, file_info(file), protproph_script_info, num_prots_html, table_string, trailer]
339
- print_html_pieces(*html_pieces)
340
- end # proph_output
341
-
342
- # given a list of peptide sequences creates javascript to hide/show them
343
- def peptide_cell(prot_num, peptide_sequences)
344
- "<a href=\"#prot#{prot_num}\" onclick=\"toggle_vis('#{prot_num}');\">#{peptide_sequences.size}</a><div id=\"#{prot_num}\" style=\"display:none;\">#{peptide_sequences.join(', ')}</div>"
345
- end
346
-
347
- # takes spec_id object
348
- # the outfn is the output filename
349
- # opt is an OpenStruct that holds opt.f = the false prefix
350
- def bioworks_output(spec_id, outfn, file=nil, false_flag_re=nil, fppr_output_as_html=nil)
351
- fppr_output_as_html ||= ''
352
- header_anchors = [at('#', 'number'), at('prob','protein probability (for Bioworks, lower is better)'), at('ref', 'gi number if available (or complete reference)'), at('annotation', 'annotation from the fasta file'), at('%cov', 'percent of protein sequence covered by corresponding peptides'), at('peps', 'unique peptides identified (at any confidence) Click number to show/hide.'), at('#peps', 'total number of peptides seen (not unique)')]
353
- num_cols = header_anchors.size
354
- theaders = ths(header_anchors)
355
- proteins = spec_id.prots
356
- protein_num = 0
357
- rows = ""
358
- proteins.each do |prot|
359
- if false_flag_re && prot.reference =~ false_flag_re
360
- next
361
- end
362
- uniq_peps = Hash.new {|h,k| h[k] = true; }
363
- protein_num += 1
364
- prot.peps.each do |pep|
365
- uniq_peps[pep.sequence.split('.')[1]] = true
366
- end
367
- pieces = prot.reference.split(' ')
368
- long_prot_name = pieces.shift
369
- annotation = pieces.join(' ')
370
- accession = prot.accession
371
- if accession == '0' ; accession = long_prot_name end
372
- rows << tr{ tds([protein_num, prot.protein_probability, ref_html(accession, long_prot_name), annotation, prot.coverage, peptide_cell(protein_num, uniq_peps.keys), prot.peps.size]) }
373
- end
374
- table_string = table do
375
- tr{theaders} + rows
376
- end
377
- print_html_pieces(outfn, header, fppr_output_as_html, file_info(file), bioworks_script_info(spec_id), table_string, trailer)
378
- end # bioworks_output
379
-
380
- def num_prots_to_html(desired_cutoff, actual_cutoff, num_proteins)
381
- actual_cutoff = sprintf("%.3f", actual_cutoff)
382
- desired_cutoff = sprintf("%.3f", desired_cutoff)
383
- "<div class=\"num_proteins\"><h3>False Positive Predictive Rate [ FP/(TP+FP) ]</h3>
384
- Desired FPPR: #{desired_cutoff} %<br/>
385
- Actual FPPR: #{actual_cutoff} %<br/>
386
- Number of Proteins at Actual FPPR: #{num_proteins}
387
- </div>"
388
- end
389
-
390
- # transforms the output string of file_as_decoy into html
391
- def file_as_decoy_to_html(string)
392
- lines = string.split("\n")
393
- #puts lines ?? is this supposed to be commented out?
394
- lines = lines.reject do |obj| obj =~ /\*{10}/ end
395
- lines.map! do |line| "#{line}<br/>" end
396
- "<div class=\"fppr\">
397
- <h3>Classification Analysis</h3>
398
- #{lines.join("\n")}
399
- </div>"
400
- end
401
-
402
- # transforms the output string of file_as_decoy into html
403
- def prefix_as_decoy_to_html(string)
404
- "<div class=\"fppr\">
405
- <h3>Classification Analysis</h3>
406
- </div>" +
407
- string
408
- end
409
-
410
- def create_from_command_line_args(argv)
411
- @orig_argv = argv.dup
412
-
413
- opt = OpenStruct.new
414
- opt.f = DEF_PREFIX
415
- opts = OptionParser.new do |op|
416
- op.banner = "usage: #{File.basename(__FILE__)} [options] <file>.xml ..."
417
- op.separator " where file = bioworks -or- <run>-prot (prophet output)"
418
- op.separator " outputs: <file>.summary.html"
419
- op.separator ""
420
- op.on("-f", "--false <prefix>", "ignore proteins with flag (def: #{DEF_PREFIX})") {|v| opt.f = v }
421
- op.on("--prefix", "false flag for prefixes only") {|v| opt.prefix = v }
422
- op.on("-p", "--precision", "include the output from precision.rb") {|v| opt.p = v }
423
- op.separator(" if --precision then -f is used to specify a file or prefix")
424
- op.separator(" that indicates the false positives.")
425
- op.on("--peptide_count <filename>", "outputs text file with # peptides per protein") {|v| opt.peptide_count = v}
426
- op.separator ""
427
- op.separator "Options for #{PRECISION_PROGRAM_BASE}.rb :"
428
- op.on("--#{PRECISION_PROGRAM_BASE}", "include output of #{PRECISION_PROGRAM_BASE}.rb,") {|v| opt.precision = v}
429
- op.separator(" type '#{PRECISION_PROGRAM_BASE}.rb' for details")
430
- op.separator ""
431
- op.separator "specific to ProteinProphet (with no concatenated DB):"
432
- op.on("-c", "--cutoff percent", "false positive predictive rate (FPPR)% for given cutoff") {|v| opt.c = v }
433
- op.on("--cut_at percent", "only reports proteins within FPPR %") {|v| opt.cut_at = v }
434
- op.on("--get_annotation", "retrieves annotation by gi code") {|v| opt.get_annotation = v}
435
- op.separator " (use if your proteins have gi's but no annotation) "
436
- end
437
-
438
- opts.parse!(argv)
439
-
440
- if argv.size < 1
441
- puts opts
442
- return
443
- end
444
-
445
- fppr_output_as_html = ''
446
- files = argv.to_a
447
- files.each do |file|
448
- outfn = file.sub(/\.xml$/, '.summary.html')
449
- outfn = outfn.sub(/\.srg$/, '.summary.html')
450
- ## False Positive Rate Calculation:
451
- if opt.precision
452
- opt.o = outfn # won't actually be written over, but used
453
- to_use_argv = create_precision_argv(file, opt)
454
- (out_string, opt) = Prec.new.precision(to_use_argv)
455
- fppr_output_as_html = prefix_as_decoy_to_html(out_string)
456
- end
457
-
458
- case SpecID.file_type(file)
459
- when "protproph"
460
- #spec_id = SpecID.new(file)
461
- proph_output(file, outfn, opt, fppr_output_as_html)
462
- when "bioworks"
463
- spec_id = SpecID.new(file)
464
-
465
- false_regex = flag_to_regex(opt.f, opt.prefix)
466
- bioworks_output(spec_id, outfn, file, false_regex, fppr_output_as_html)
467
- else
468
- abort "filetype for #{file} not recognized!"
469
- end
470
- end
471
-
472
- end # method create_from_command_line
473
-
474
- def create_precision_argv(file, opt)
475
- # include only those options specific
476
- new_argv = [file]
477
- if opt.prefix ; new_argv << '--prefix' end
478
- if opt.f ; new_argv << '-f' << opt.f end
479
- if opt.o ; new_argv << '-o' << opt.o end
480
- new_argv
481
- end
482
-
483
- end # ProteinSummary
484
-
485
- ##################################################################
486
- # MAIN
487
- ##################################################################
488
-
489
-