mspire 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. data/Rakefile +41 -14
  2. data/bin/bioworks2excel.rb +1 -1
  3. data/bin/bioworks_to_pepxml.rb +46 -59
  4. data/bin/fasta_shaker.rb +1 -1
  5. data/bin/filter.rb +6 -0
  6. data/bin/find_aa_freq.rb +23 -0
  7. data/bin/id_precision.rb +3 -2
  8. data/bin/mzxml_to_lmat.rb +2 -1
  9. data/bin/pepproph_filter.rb +1 -1
  10. data/bin/precision.rb +1 -1
  11. data/bin/protein_summary.rb +2 -451
  12. data/bin/raw_to_mzXML.rb +55 -0
  13. data/bin/srf_group.rb +26 -0
  14. data/changelog.txt +7 -0
  15. data/lib/align.rb +3 -3
  16. data/lib/fasta.rb +6 -1
  17. data/lib/gi.rb +9 -4
  18. data/lib/roc.rb +2 -0
  19. data/lib/sample_enzyme.rb +2 -1
  20. data/lib/spec/mzxml/parser.rb +2 -43
  21. data/lib/spec/mzxml.rb +65 -2
  22. data/lib/spec_id/aa_freqs.rb +10 -7
  23. data/lib/spec_id/bioworks.rb +67 -87
  24. data/lib/spec_id/filter.rb +794 -0
  25. data/lib/spec_id/precision.rb +29 -36
  26. data/lib/spec_id/proph.rb +5 -3
  27. data/lib/spec_id/protein_summary.rb +459 -0
  28. data/lib/spec_id/sequest.rb +323 -271
  29. data/lib/spec_id/srf.rb +189 -135
  30. data/lib/spec_id.rb +276 -227
  31. data/lib/spec_id_xml.rb +101 -0
  32. data/lib/toppred.rb +18 -0
  33. data/script/degenerate_peptides.rb +47 -0
  34. data/script/filter-peps.rb +5 -1
  35. data/test/tc_align.rb +1 -1
  36. data/test/tc_bioworks.rb +25 -22
  37. data/test/tc_bioworks_to_pepxml.rb +37 -4
  38. data/test/tc_fasta.rb +3 -1
  39. data/test/tc_fasta_shaker.rb +8 -6
  40. data/test/tc_filter.rb +203 -0
  41. data/test/tc_gi.rb +6 -9
  42. data/test/tc_id_precision.rb +31 -0
  43. data/test/tc_mzxml.rb +8 -6
  44. data/test/tc_peptide_parent_times.rb +2 -1
  45. data/test/tc_precision.rb +1 -1
  46. data/test/tc_proph.rb +5 -5
  47. data/test/tc_protein_summary.rb +36 -13
  48. data/test/tc_sequest.rb +78 -33
  49. data/test/tc_spec_id.rb +128 -6
  50. data/test/tc_srf.rb +84 -38
  51. metadata +67 -62
  52. data/bin/fasta_cat.rb +0 -39
  53. data/bin/fasta_cat_mod.rb +0 -59
  54. data/bin/fasta_mod.rb +0 -57
  55. data/bin/filter_spec_id.rb +0 -365
  56. data/bin/raw2mzXML.rb +0 -21
  57. data/script/gen_database_searching.rb +0 -258
@@ -2,22 +2,25 @@
2
2
  require 'optparse'
3
3
  require 'ostruct'
4
4
  require 'generator'
5
- require 'gnuplot'
6
5
  require 'roc'
7
6
 
7
+ ## silence this bad boy
8
+ tmp = $VERBOSE ; $VERBOSE = nil
9
+ require 'gnuplot'
10
+ $VERBOSE = tmp
11
+
8
12
  class String
9
13
  def margin
10
14
  self.gsub(/^\s*\|/,'')
11
15
  end
12
16
  end
13
17
 
14
- class SpecID ; end
15
- class SpecID::Precision ; end
18
+ class Prec ; end
16
19
 
17
- module SpecID::Precision::PlotHelper
20
+ module Prec::PlotHelper
18
21
 
19
22
  PLOT_TYPE = 'XYData'
20
- TITLE = 'Precision (Positive Predictive Value)'
23
+ TITLE = 'Precision vs. Num Hits [ Precision = Positive Predictive Value = TP/(TP+FP) ]'
21
24
  XAXIS = 'Num Hits (excludes known false positives)'
22
25
  EXT = '.toplot'
23
26
  IMAGE_EXT = '.png'
@@ -26,6 +29,8 @@ module SpecID::Precision::PlotHelper
26
29
  ## CREATE the PLOT IMAGE:
27
30
  to_plot = filename_noext + EXT
28
31
  png = filename_noext + IMAGE_EXT
32
+
33
+
29
34
  File.open(to_plot,'w') do |out|
30
35
  out.puts PLOT_TYPE
31
36
  out.puts filename_noext
@@ -55,6 +60,8 @@ module SpecID::Precision::PlotHelper
55
60
  ## CREATE the PLOT IMAGE:
56
61
  to_plot = filename_noext+'.toplot'
57
62
  png = filename_noext+'.png'
63
+
64
+ tmp = $VERBOSE ; $VERBOSE = nil
58
65
  Gnuplot.open do |gp|
59
66
  Gnuplot::Plot.new( gp ) do |plot|
60
67
  plot.terminal "png noenhanced"
@@ -76,6 +83,7 @@ module SpecID::Precision::PlotHelper
76
83
  end
77
84
  end
78
85
  end
86
+ $VERBOSE = tmp
79
87
 
80
88
  ## CREATE the HTML to load the plot:
81
89
  basename_filename_noext = File.basename(filename_noext)
@@ -88,7 +96,7 @@ module SpecID::Precision::PlotHelper
88
96
 
89
97
  end
90
98
 
91
- module SpecID::Precision::HTML
99
+ module Prec::HTML
92
100
 
93
101
  # html and body tags
94
102
  def html
@@ -173,23 +181,22 @@ module SpecID::Precision::HTML
173
181
  end
174
182
  end # module HTML
175
183
 
176
- class SpecID::Precision
177
- include SpecID::Precision::PlotHelper
184
+ class Prec
185
+ include Prec::PlotHelper
178
186
 
179
187
  ###########################################################
180
188
  # GLOBAL SETTINGS:
181
- DEF_PREFIX = "INV_"
182
189
  DATA_PREC = 4 # decimal places of precision for ppv data
183
190
  STDOUT_JTPLOT_BASE = "ppv" # if there is no outfile
184
191
  ###########################################################
185
192
 
186
- include SpecID::Precision::HTML
193
+ include Prec::HTML
187
194
 
188
195
  ## returns an html string
189
196
  def precision(argv)
190
197
  opt = parse_args(argv)
191
198
  files = argv.to_a
192
- out_string = prefix_as_decoy(files, opt)
199
+ out_string = create_precision_data(files, opt)
193
200
  [out_string, opt]
194
201
  end
195
202
 
@@ -270,24 +277,6 @@ Example:
270
277
  end
271
278
 
272
279
 
273
- # takes a comma separated list and extends the last to create an array of
274
- # desired size
275
- def prefixes(arg, desired_size)
276
- arg_arr = arg.split(',')
277
- new_arr = []
278
- last_arg = arg_arr[0]
279
- desired_size.times do |i|
280
- if arg_arr[i]
281
- new_arr[i] = arg_arr[i]
282
- last_arg = new_arr[i]
283
- else
284
- new_arr[i] = last_arg
285
- end
286
- end
287
- new_arr
288
- end
289
-
290
-
291
280
  ## collapses arrays to one level deep so we can sync them up
292
281
  def arrays_to_one_level_deep(all_arrs)
293
282
  mostly_flat = []
@@ -352,7 +341,7 @@ Example:
352
341
  def y_axis_label(key)
353
342
  ## We only take the keys for the first file, as it's assumed that the major
354
343
  ## labels will be identical for all of them
355
- labels = key.first.map {|tp| tp.first }
344
+ labels = key.first.map {|tp| tp.first }.uniq
356
345
  labels.join " | "
357
346
  end
358
347
 
@@ -367,11 +356,14 @@ Example:
367
356
  new_string
368
357
  end
369
358
 
370
- def prefix_as_decoy(files, opt)
371
- $stderr.puts "using prefix #{opt.f} ..."
359
+ # if opt.f, then a prefix is assumed.
360
+ # if a file =~ /-prot.xml$/ then a precision plot based on probability is
361
+ # also created
362
+ def create_precision_data(files, opt)
363
+ #$stderr.puts "using prefix #{opt.f} ..."
372
364
 
373
365
  if opt.f
374
- prefix_arr = prefixes(opt.f, files.size)
366
+ prefix_arr = SpecID.extend_args(opt.f, files.size)
375
367
  end
376
368
  all_arrs = []
377
369
  key = []
@@ -384,12 +376,13 @@ Example:
384
376
  if opt.f
385
377
  (num_hits, ppv) = sp.num_hits_and_ppv_for_prob(prefix_arr[i])
386
378
  all_arrs[i] << [num_hits,ppv]
387
- key[i] << ["Precision", ["#TP", "Prec = TP/(TP+FP)"]]
388
- else
379
+ key[i] << ["Precision", ["# hits", "Prec (decoy)"]]
380
+ end
381
+ if file =~ /-prot\.xml$/
389
382
  ## These are just from protein prophet probabilities:
390
383
  (num_hits, ppv) = sp.num_hits_and_ppv_for_protein_prophet_probabilities
391
384
  all_arrs[i] << [num_hits,ppv]
392
- key[i] << ["Precision", ["#TP", "Prec = TP/(TP+FP)"]]
385
+ key[i] << ["Precision", ["# hits", "Prec (prob)"]]
393
386
  end
394
387
  end
395
388
 
data/lib/spec_id/proph.rb CHANGED
@@ -5,7 +5,6 @@ require 'instance_var_set_from_hash'
5
5
  require 'axml'
6
6
  require 'spec_id'
7
7
 
8
- class SpecID
9
8
  class Proph
10
9
 
11
10
 
@@ -20,6 +19,8 @@ end
20
19
 
21
20
 
22
21
  class ProtSummary
22
+ include SpecID
23
+
23
24
  attr_writer :prots
24
25
  attr_accessor :prot_groups
25
26
 
@@ -102,7 +103,8 @@ class ProtGroup
102
103
  end
103
104
  end
104
105
 
105
- class Prot < SpecID::Prot
106
+ class Prot
107
+ include SpecID::Prot
106
108
 
107
109
  ## probability and reference accessors are inherited
108
110
  attr_accessor :peps, :protein_name, :cutoff, :group_sibling_id, :n_indistinguishable_proteins, :percent_coverage, :unique_stripped_peptides, :total_number_peptides, :pct_spectrum_ids, :description
@@ -137,6 +139,7 @@ class Prot < SpecID::Prot
137
139
  end # class Prot
138
140
 
139
141
  class Pep
142
+ include SpecID::Pep
140
143
 
141
144
  attr_accessor :sequence, :probability, :filenames, :charge, :precursor_neutral_mass, :nsp_cutoff, :scans
142
145
  attr_writer :arithmetic_avg_scan_by_parent_time
@@ -458,4 +461,3 @@ end # Prot::Parser
458
461
  ################ --END
459
462
 
460
463
  end # Proph
461
- end # SpecID
@@ -0,0 +1,459 @@
1
+
2
+
3
+ require 'axml'
4
+ require 'hash_by'
5
+ require 'optparse'
6
+ require 'ostruct'
7
+ require 'spec_id'
8
+ require 'spec_id/precision'
9
+
10
+ #############################################################
11
+ # GLOBALS:
12
+ PRECISION_PROGRAM_BASE = 'precision'
13
+ DEF_PREFIX = "INV_"
14
+ DEF_PERCENT_FP = "5.0"
15
+ #############################################################
16
+
17
+
18
+ # @TODO: add group probability title (showin all group probabilities) for protein prob
19
+
20
+ #class String
21
+ # def margin
22
+ # self.gsub(/^\s*\|/,'')
23
+ # end
24
+ #end
25
+
26
+
27
+ class ProteinSummary
28
+ module HTML
29
+ def header
30
+ %Q{<html>
31
+ <head
32
+ #{style}
33
+ </head>
34
+ <body>
35
+ <script type="text/javascript">
36
+ <!--
37
+ function toggle_vis(id) {
38
+ var e = document.getElementById(id);
39
+ if(e.style.display == 'none')
40
+ e.style.display = 'block';
41
+ else
42
+ e.style.display = 'none';
43
+ }
44
+ //-->
45
+ </script>
46
+ }
47
+ end
48
+
49
+ def style
50
+ '
51
+ <style type="text/css">
52
+ table {
53
+ border-width:1px;
54
+ border-color:#DDDDDD;
55
+ border-collapse: collapse;
56
+ }
57
+ td,th {
58
+ padding-top: 2px;
59
+ padding-bottom: 2px;
60
+ padding-left: 5;
61
+ padding-right: 5;
62
+ }
63
+ td.redline {
64
+ background-color: #FF0000;
65
+ color: #FFFFFF
66
+ }
67
+ div.file_info, div.software, div.fppr, div.num_proteins{
68
+ margin-left: 20px;
69
+ margin-top: 20px;
70
+ }
71
+ div.main {
72
+ margin-left: 10px;
73
+ margin-right: 10px;
74
+ margin-top: 50px;
75
+ margin-bottom: 50px;
76
+ }
77
+ div#error {
78
+ margin: 30px;
79
+ text-align:center
80
+ }
81
+ hr {color: sienna}
82
+ body { font-size: 8pt; font-family: Arial,Helvetica,Times}
83
+ </style>
84
+ '
85
+ end
86
+
87
+ # an anchor and a title
88
+ def at(display, title)
89
+ "<a title=\"#{title}\">#{display}</a>"
90
+ end
91
+
92
+ def trailer
93
+ %q{
94
+ </body>
95
+ </html>
96
+ }
97
+ end
98
+
99
+ def tr
100
+ "|<tr>
101
+ | #{yield}
102
+ |</tr>\n".margin
103
+ end
104
+
105
+ def table
106
+ "|<div class=\"main\"><table align=\"center\" border=\"1\" style=\"font-size:100%\" width=\"800px\">
107
+ | #{yield}
108
+ |</table></div>\n".margin
109
+ end
110
+
111
+ def tds(arr)
112
+ arr.map {|v| "<td>#{v}</td>"}.join
113
+ end
114
+
115
+ def ths(arr)
116
+ str = arr.map {|v| "<th>#{v}</th>"}.join
117
+ str << "\n"
118
+ end
119
+ end
120
+
121
+ end
122
+
123
+
124
+ class ProteinSummary
125
+
126
+ include ProteinSummary::HTML
127
+
128
+ def ref_html(gi, name)
129
+ "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?db=protein&val=#{gi}\" title=\"#{name}\">#{gi}</a>"
130
+ end
131
+
132
+ # Takes the -prot.xml filename and grabs the png file (if available)
133
+ def error_info(prot_file_name)
134
+ img = prot_file_name.gsub('.xml', '.png')
135
+ img_bn = File.basename(img)
136
+ "<div id=\"error\"><img src=\"#{img_bn}\" alt=\"[ Optional: To view error/sensitivity image, put #{img_bn} in the same directory as #{File.basename(prot_file_name)} ]\"/>\n</div>"
137
+ end
138
+
139
+ # attempts to get the NCBI gi code
140
+ def accession(name)
141
+ if (name.include? '|') && (name[0,3] == 'gi|')
142
+ name.split('|')[1]
143
+ else
144
+ name
145
+ end
146
+ end
147
+
148
+ def prefix_to_regex(prefix)
149
+ if prefix
150
+ /^#{Regexp.escape(prefix)}/
151
+ else
152
+ nil
153
+ end
154
+ end
155
+
156
+ # given a list of proteins, output a tab delimited textfile with protein
157
+ # name and the total number of peptides found
158
+ def output_peptide_counts_file(prots, filename)
159
+ File.open(filename, "w") do |fh_out|
160
+ prots.each do |prot|
161
+ fh_out.puts [prot._protein_name, prot._total_number_peptides].join("\t")
162
+ end
163
+ end
164
+ end
165
+
166
+ # filters on the false positive regex and sorts by prot probability
167
+ def filter_and_sort(uniq_prots, prefix=nil)
168
+ prefix_re = prefix_to_regex(prefix)
169
+ sorted = uniq_prots.sort_by {|prt| [prt._probability, prt.parent._probability]}.reverse
170
+ ## filter on prefix
171
+ if prefix
172
+ sorted = sorted.reject {|prot| prot._protein_name =~ prefix_re }
173
+ end
174
+ sorted
175
+ end
176
+
177
+ # assumes that these are sorted on probability
178
+ # desired_fppr is a float
179
+ # returns [number_of_prots, actual_fppr]
180
+ def num_prots_above_fppr(prots, desired_fppr)
181
+ current_fppr_rate_percent = 0.0
182
+ previous_fppr_rate_percent = 0.0
183
+ current_sum_one_minus_prob = 0.0
184
+ proteins_within_fppr = 0
185
+ actual_fppr = nil
186
+ already_found = false
187
+ prot_cnt = 0
188
+ prots.each do |prot|
189
+ prot_cnt += 1
190
+ # SUM(1-probX)/#prots
191
+ current_sum_one_minus_prob += 1.0 - prot._probability.to_f
192
+ current_fppr_rate_percent = (current_sum_one_minus_prob / prot_cnt) * 100
193
+
194
+ if current_fppr_rate_percent > desired_fppr && !already_found
195
+ actual_fppr = previous_fppr_rate_percent
196
+ proteins_within_fppr = prot_cnt
197
+ already_found = true
198
+ end
199
+ previous_fppr_rate_percent = current_fppr_rate_percent
200
+ end
201
+ [proteins_within_fppr, actual_fppr]
202
+ end
203
+
204
+ #### #readable_previous_fppr_rate_percent = sprintf("%.2f", previous_fppr_rate_percent)
205
+
206
+ # returns a string of the table rows
207
+ # false_positive_rate (give as a %) is the cutoff mark
208
+ # returns the number of proteins at the desired_fppr (if given)
209
+ def table_rows(uniq_prots, prefix, false_positive_rate_percent, num_cols, desired_fppr, actual_percent_fp, peptide_count_filename=nil)
210
+ prot_cnt = 0
211
+ uniq_prots.map do |prot|
212
+ tr do
213
+ prot_cnt += 1
214
+ gi = accession(prot._protein_name)
215
+ tds([prot_cnt, prot._probability, ref_html(gi, prot._protein_name), prot.annotation.first._protein_description, prot._percent_coverage, peptide_cell(prot_cnt, prot._unique_stripped_peptides.split('+')), prot._total_number_peptides, prot._pct_spectrum_ids])
216
+ end
217
+ end.join
218
+ end
219
+
220
+ def print_html_pieces(file, *pieces)
221
+ File.open(file, "w") do |out|
222
+ pieces.each do |piece|
223
+ out.print piece
224
+ end
225
+ end
226
+ end
227
+
228
+ def file_info(file)
229
+ "<div class=\"file_info\"><h3>Source File Information</h3>File: #{File.expand_path(file)}
230
+ <br/>Last Modified: #{File.mtime(file)}
231
+ <br/>Size: #{File.size(file)/1000} KB
232
+ </div>"
233
+ end
234
+
235
+ def bioworks_script_info(obj)
236
+ version = "3.2??"
237
+ if obj.version
238
+ version = obj.version
239
+ end
240
+ script_info{"Bioworks version #{version}"}
241
+ end
242
+
243
+ def protproph_script_info
244
+ begin
245
+ where = `which xinteract`
246
+ reply = `#{where}`
247
+ rescue Exception
248
+ reply = ""
249
+ end
250
+ prophet = "TPP (version unknown)" # put your version here if you can't get it dynamically
251
+ if reply =~ /xinteract.*?\((TPP .*)\)/
252
+ prophet = $1.dup
253
+ end
254
+ script_info { "ProteinProphet from: #{prophet}" }
255
+ end
256
+
257
+ def mspire_version
258
+ string = "mspire"
259
+ begin
260
+ if `gem list --local mspire` =~ /mspire \((.*?)\)/
261
+ string << (" v" + $1)
262
+ end
263
+ rescue Exception
264
+ end
265
+ string
266
+ end
267
+
268
+ def script_info
269
+ "<div class=\"software\"><h3>Software Information</h3>#{yield}<br/>Ruby package: #{mspire_version}<br/>Command: #{[File.basename(__FILE__), *@orig_argv].join(" ")}</div>"
270
+ end
271
+
272
+ def proph_output(file, outfn, opt, fppr_output_as_html)
273
+ header_anchors = [at('#', 'number'), at('prob','protein probability (for Prophet, higher is better)'), at('ref', 'gi number if available (or complete reference)'), at('annotation', 'annotation from the fasta file'), at('%cov', 'percent of protein sequence covered by corresponding peptides'), at('peps', 'unique peptides identified (includes non-contributing peptides). Click number to show/hide'), at('#peps', 'total number of corresponding peptides that contributed to protein probability'), at('%ids', 'fraction of correct dataset peptide identifications corresponding to protein')]
274
+ num_cols = header_anchors.size
275
+ theaders = ths(header_anchors)
276
+
277
+ root = AXML.parse_file(file)
278
+ prots = []
279
+ ## find the min_prob at a fppr of XX
280
+ min_prob_redline = 1.01 # if no fppr is less than what they give, then all are redlined!
281
+
282
+ if opt.c
283
+ actual_percent_fp = opt.c.to_f
284
+ elsif opt.cut_at
285
+ actual_percent_fp = opt.cut_at.to_f
286
+ else
287
+ actual_percent_fp = nil
288
+ end
289
+ root.protein_group.each do |group|
290
+ group.protein.each do |prt|
291
+ prots << prt
292
+ end
293
+ end
294
+ uniq_prots = prots.hash_by(:_protein_name).map{|name,prot_arr| prot_arr.first }
295
+ filtered_sorted_prots = filter_and_sort(uniq_prots, opt.f)
296
+
297
+ ## num proteins above cutoff (if opt.c)
298
+ num_prots_html = ''
299
+ if opt.c || opt.cut_at
300
+ (num_prots, actual_fppr) = num_prots_above_fppr(filtered_sorted_prots, actual_percent_fp)
301
+ num_prots_html = num_prots_to_html(actual_percent_fp, actual_fppr, num_prots)
302
+ end
303
+ if opt.cut_at
304
+ filtered_sorted_prots = filtered_sorted_prots[0,num_prots]
305
+ end
306
+
307
+ output_peptide_counts_file(filtered_sorted_prots, opt.peptide_count) if opt.peptide_count
308
+
309
+ table_string = table do
310
+ tr{theaders} + table_rows(filtered_sorted_prots, opt.f, actual_percent_fp, num_cols, opt.c.to_f, actual_percent_fp, opt.peptide_count)
311
+ end
312
+ er_info = opt.precision ? error_info(file) : ""
313
+ html_pieces = [outfn, header, fppr_output_as_html, er_info, file_info(file), protproph_script_info, num_prots_html, table_string, trailer]
314
+ print_html_pieces(*html_pieces)
315
+ end # proph_output
316
+
317
+ # given a list of peptide sequences creates javascript to hide/show them
318
+ def peptide_cell(prot_num, peptide_sequences)
319
+ "<a href=\"#prot#{prot_num}\" onclick=\"toggle_vis('#{prot_num}');\">#{peptide_sequences.size}</a><div id=\"#{prot_num}\" style=\"display:none;\">#{peptide_sequences.join(', ')}</div>"
320
+ end
321
+
322
+ # takes spec_id object
323
+ # the outfn is the output filename
324
+ # opt is an OpenStruct that holds opt.f = the false prefix
325
+ def bioworks_output(spec_id, outfn, file=nil, false_prefix=nil, fppr_output_as_html=nil)
326
+ fppr_output_as_html ||= ''
327
+ header_anchors = [at('#', 'number'), at('prob','protein probability (for Bioworks, lower is better)'), at('ref', 'gi number if available (or complete reference)'), at('annotation', 'annotation from the fasta file'), at('%cov', 'percent of protein sequence covered by corresponding peptides'), at('peps', 'unique peptides identified (at any confidence) Click number to show/hide.'), at('#peps', 'total number of peptides seen (not unique)')]
328
+ num_cols = header_anchors.size
329
+ theaders = ths(header_anchors)
330
+ proteins = spec_id.prots
331
+ protein_num = 0
332
+ rows = ""
333
+ prefix_re = prefix_to_regex(false_prefix)
334
+ proteins.each do |prot|
335
+ if false_prefix && prot.reference =~ prefix_re
336
+ next
337
+ end
338
+ uniq_peps = Hash.new {|h,k| h[k] = true; }
339
+ protein_num += 1
340
+ prot.peps.each do |pep|
341
+ uniq_peps[pep.sequence.split('.')[1]] = true
342
+ end
343
+ pieces = prot.reference.split(' ')
344
+ long_prot_name = pieces.shift
345
+ annotation = pieces.join(' ')
346
+ accession = prot.accession
347
+ if accession == '0' ; accession = long_prot_name end
348
+ rows << tr{ tds([protein_num, prot.protein_probability, ref_html(accession, long_prot_name), annotation, prot.coverage, peptide_cell(protein_num, uniq_peps.keys), prot.peps.size]) }
349
+ end
350
+ table_string = table do
351
+ tr{theaders} + rows
352
+ end
353
+ print_html_pieces(outfn, header, fppr_output_as_html, file_info(file), bioworks_script_info(spec_id), table_string, trailer)
354
+ end # bioworks_output
355
+
356
+ def num_prots_to_html(desired_cutoff, actual_cutoff, num_proteins)
357
+ actual_cutoff = sprintf("%.3f", actual_cutoff)
358
+ desired_cutoff = sprintf("%.3f", desired_cutoff)
359
+ "<div class=\"num_proteins\"><h3>False Positive Predictive Rate [ FP/(TP+FP) ]</h3>
360
+ Desired FPPR: #{desired_cutoff} %<br/>
361
+ Actual FPPR: #{actual_cutoff} %<br/>
362
+ Number of Proteins at Actual FPPR: #{num_proteins}
363
+ </div>"
364
+ end
365
+
366
+ # transforms the output string of file_as_decoy into html
367
+ def file_as_decoy_to_html(string)
368
+ lines = string.split("\n")
369
+ #puts lines ?? is this supposed to be commented out?
370
+ lines = lines.reject do |obj| obj =~ /\*{10}/ end
371
+ lines.map! do |line| "#{line}<br/>" end
372
+ "<div class=\"fppr\">
373
+ <h3>Classification Analysis</h3>
374
+ #{lines.join("\n")}
375
+ </div>"
376
+ end
377
+
378
+ # transforms the output string of file_as_decoy into html
379
+ def prefix_as_decoy_to_html(string)
380
+ "<div class=\"fppr\">
381
+ <h3>Classification Analysis</h3>
382
+ </div>" +
383
+ string
384
+ end
385
+
386
+ def create_from_command_line_args(argv)
387
+ @orig_argv = argv.dup
388
+
389
+ opt = OpenStruct.new
390
+ opt.f = DEF_PREFIX
391
+ opts = OptionParser.new do |op|
392
+ op.banner = "usage: #{File.basename(__FILE__)} [options] <file>.xml ..."
393
+ op.separator " where file = bioworks -or- <run>-prot (prophet output)"
394
+ op.separator " outputs: <file>.summary.html"
395
+ op.separator ""
396
+ op.on("-f", "--false <prefix>", "ignore proteins with prefix (def: #{DEF_PREFIX})") {|v| opt.f = v }
397
+ op.on("-p", "--precision", "include the output from precision.rb") {|v| opt.p = v }
398
+ op.separator(" if --precision then -f is used to specify a file or prefix")
399
+ op.separator(" that indicates the false positives.")
400
+ op.on("--peptide_count <filename>", "outputs text file with # peptides per protein") {|v| opt.peptide_count = v}
401
+ op.separator ""
402
+ op.separator "Options for #{PRECISION_PROGRAM_BASE}.rb :"
403
+ op.on("--#{PRECISION_PROGRAM_BASE}", "include output of #{PRECISION_PROGRAM_BASE}.rb,") {|v| opt.precision = v}
404
+ op.separator(" type '#{PRECISION_PROGRAM_BASE}.rb' for details")
405
+ op.separator ""
406
+ op.separator "Specific to ProteinProphet (with no concatenated DB):"
407
+ op.on("-c", "--cutoff percent", "false positive predictive rate (FPPR)% for given cutoff") {|v| opt.c = v }
408
+ op.on("--cut_at percent", "only reports proteins within FPPR %") {|v| opt.cut_at = v }
409
+ end
410
+
411
+ opts.parse!(argv)
412
+
413
+ if argv.size < 1
414
+ puts opts
415
+ return
416
+ end
417
+
418
+ fppr_output_as_html = ''
419
+ files = argv.to_a
420
+ files.each do |file|
421
+ outfn = file.sub(/\.xml$/, '.summary.html')
422
+ outfn = outfn.sub(/\.srg$/, '.summary.html')
423
+ ## False Positive Rate Calculation:
424
+ if opt.precision
425
+ opt.o = outfn # won't actually be written over, but used
426
+ to_use_argv = create_precision_argv(file, opt)
427
+ (out_string, opt) = Prec.new.precision(to_use_argv)
428
+ fppr_output_as_html = prefix_as_decoy_to_html(out_string)
429
+ end
430
+
431
+ case SpecID.file_type(file)
432
+ when "protproph"
433
+ #spec_id = SpecID.new(file)
434
+ proph_output(file, outfn, opt, fppr_output_as_html)
435
+ when "bioworks"
436
+ spec_id = SpecID.new(file)
437
+ bioworks_output(spec_id, outfn, file, opt.f, fppr_output_as_html)
438
+ else
439
+ abort "filetype for #{file} not recognized!"
440
+ end
441
+ end
442
+
443
+ end # method create_from_command_line
444
+
445
+ def create_precision_argv(file, opt)
446
+ # include only those options specific
447
+ new_argv = [file]
448
+ if opt.f ; new_argv << '-f' << opt.f end
449
+ if opt.o ; new_argv << '-o' << opt.o end
450
+ new_argv
451
+ end
452
+
453
+ end # ProteinSummary
454
+
455
+ ##################################################################
456
+ # MAIN
457
+ ##################################################################
458
+
459
+