mspire 0.1.7 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. data/Rakefile +41 -14
  2. data/bin/bioworks2excel.rb +1 -1
  3. data/bin/bioworks_to_pepxml.rb +46 -59
  4. data/bin/fasta_shaker.rb +1 -1
  5. data/bin/filter.rb +6 -0
  6. data/bin/find_aa_freq.rb +23 -0
  7. data/bin/id_precision.rb +3 -2
  8. data/bin/mzxml_to_lmat.rb +2 -1
  9. data/bin/pepproph_filter.rb +1 -1
  10. data/bin/precision.rb +1 -1
  11. data/bin/protein_summary.rb +2 -451
  12. data/bin/raw_to_mzXML.rb +55 -0
  13. data/bin/srf_group.rb +26 -0
  14. data/changelog.txt +7 -0
  15. data/lib/align.rb +3 -3
  16. data/lib/fasta.rb +6 -1
  17. data/lib/gi.rb +9 -4
  18. data/lib/roc.rb +2 -0
  19. data/lib/sample_enzyme.rb +2 -1
  20. data/lib/spec/mzxml/parser.rb +2 -43
  21. data/lib/spec/mzxml.rb +65 -2
  22. data/lib/spec_id/aa_freqs.rb +10 -7
  23. data/lib/spec_id/bioworks.rb +67 -87
  24. data/lib/spec_id/filter.rb +794 -0
  25. data/lib/spec_id/precision.rb +29 -36
  26. data/lib/spec_id/proph.rb +5 -3
  27. data/lib/spec_id/protein_summary.rb +459 -0
  28. data/lib/spec_id/sequest.rb +323 -271
  29. data/lib/spec_id/srf.rb +189 -135
  30. data/lib/spec_id.rb +276 -227
  31. data/lib/spec_id_xml.rb +101 -0
  32. data/lib/toppred.rb +18 -0
  33. data/script/degenerate_peptides.rb +47 -0
  34. data/script/filter-peps.rb +5 -1
  35. data/test/tc_align.rb +1 -1
  36. data/test/tc_bioworks.rb +25 -22
  37. data/test/tc_bioworks_to_pepxml.rb +37 -4
  38. data/test/tc_fasta.rb +3 -1
  39. data/test/tc_fasta_shaker.rb +8 -6
  40. data/test/tc_filter.rb +203 -0
  41. data/test/tc_gi.rb +6 -9
  42. data/test/tc_id_precision.rb +31 -0
  43. data/test/tc_mzxml.rb +8 -6
  44. data/test/tc_peptide_parent_times.rb +2 -1
  45. data/test/tc_precision.rb +1 -1
  46. data/test/tc_proph.rb +5 -5
  47. data/test/tc_protein_summary.rb +36 -13
  48. data/test/tc_sequest.rb +78 -33
  49. data/test/tc_spec_id.rb +128 -6
  50. data/test/tc_srf.rb +84 -38
  51. metadata +67 -62
  52. data/bin/fasta_cat.rb +0 -39
  53. data/bin/fasta_cat_mod.rb +0 -59
  54. data/bin/fasta_mod.rb +0 -57
  55. data/bin/filter_spec_id.rb +0 -365
  56. data/bin/raw2mzXML.rb +0 -21
  57. data/script/gen_database_searching.rb +0 -258
@@ -2,22 +2,25 @@
2
2
  require 'optparse'
3
3
  require 'ostruct'
4
4
  require 'generator'
5
- require 'gnuplot'
6
5
  require 'roc'
7
6
 
7
+ ## silence this bad boy
8
+ tmp = $VERBOSE ; $VERBOSE = nil
9
+ require 'gnuplot'
10
+ $VERBOSE = tmp
11
+
8
12
  class String
9
13
  def margin
10
14
  self.gsub(/^\s*\|/,'')
11
15
  end
12
16
  end
13
17
 
14
- class SpecID ; end
15
- class SpecID::Precision ; end
18
+ class Prec ; end
16
19
 
17
- module SpecID::Precision::PlotHelper
20
+ module Prec::PlotHelper
18
21
 
19
22
  PLOT_TYPE = 'XYData'
20
- TITLE = 'Precision (Positive Predictive Value)'
23
+ TITLE = 'Precision vs. Num Hits [ Precision = Positive Predictive Value = TP/(TP+FP) ]'
21
24
  XAXIS = 'Num Hits (excludes known false positives)'
22
25
  EXT = '.toplot'
23
26
  IMAGE_EXT = '.png'
@@ -26,6 +29,8 @@ module SpecID::Precision::PlotHelper
26
29
  ## CREATE the PLOT IMAGE:
27
30
  to_plot = filename_noext + EXT
28
31
  png = filename_noext + IMAGE_EXT
32
+
33
+
29
34
  File.open(to_plot,'w') do |out|
30
35
  out.puts PLOT_TYPE
31
36
  out.puts filename_noext
@@ -55,6 +60,8 @@ module SpecID::Precision::PlotHelper
55
60
  ## CREATE the PLOT IMAGE:
56
61
  to_plot = filename_noext+'.toplot'
57
62
  png = filename_noext+'.png'
63
+
64
+ tmp = $VERBOSE ; $VERBOSE = nil
58
65
  Gnuplot.open do |gp|
59
66
  Gnuplot::Plot.new( gp ) do |plot|
60
67
  plot.terminal "png noenhanced"
@@ -76,6 +83,7 @@ module SpecID::Precision::PlotHelper
76
83
  end
77
84
  end
78
85
  end
86
+ $VERBOSE = tmp
79
87
 
80
88
  ## CREATE the HTML to load the plot:
81
89
  basename_filename_noext = File.basename(filename_noext)
@@ -88,7 +96,7 @@ module SpecID::Precision::PlotHelper
88
96
 
89
97
  end
90
98
 
91
- module SpecID::Precision::HTML
99
+ module Prec::HTML
92
100
 
93
101
  # html and body tags
94
102
  def html
@@ -173,23 +181,22 @@ module SpecID::Precision::HTML
173
181
  end
174
182
  end # module HTML
175
183
 
176
- class SpecID::Precision
177
- include SpecID::Precision::PlotHelper
184
+ class Prec
185
+ include Prec::PlotHelper
178
186
 
179
187
  ###########################################################
180
188
  # GLOBAL SETTINGS:
181
- DEF_PREFIX = "INV_"
182
189
  DATA_PREC = 4 # decimal places of precision for ppv data
183
190
  STDOUT_JTPLOT_BASE = "ppv" # if there is no outfile
184
191
  ###########################################################
185
192
 
186
- include SpecID::Precision::HTML
193
+ include Prec::HTML
187
194
 
188
195
  ## returns an html string
189
196
  def precision(argv)
190
197
  opt = parse_args(argv)
191
198
  files = argv.to_a
192
- out_string = prefix_as_decoy(files, opt)
199
+ out_string = create_precision_data(files, opt)
193
200
  [out_string, opt]
194
201
  end
195
202
 
@@ -270,24 +277,6 @@ Example:
270
277
  end
271
278
 
272
279
 
273
- # takes a comma separated list and extends the last to create an array of
274
- # desired size
275
- def prefixes(arg, desired_size)
276
- arg_arr = arg.split(',')
277
- new_arr = []
278
- last_arg = arg_arr[0]
279
- desired_size.times do |i|
280
- if arg_arr[i]
281
- new_arr[i] = arg_arr[i]
282
- last_arg = new_arr[i]
283
- else
284
- new_arr[i] = last_arg
285
- end
286
- end
287
- new_arr
288
- end
289
-
290
-
291
280
  ## collapses arrays to one level deep so we can sync them up
292
281
  def arrays_to_one_level_deep(all_arrs)
293
282
  mostly_flat = []
@@ -352,7 +341,7 @@ Example:
352
341
  def y_axis_label(key)
353
342
  ## We only take the keys for the first file, as it's assumed that the major
354
343
  ## labels will be identical for all of them
355
- labels = key.first.map {|tp| tp.first }
344
+ labels = key.first.map {|tp| tp.first }.uniq
356
345
  labels.join " | "
357
346
  end
358
347
 
@@ -367,11 +356,14 @@ Example:
367
356
  new_string
368
357
  end
369
358
 
370
- def prefix_as_decoy(files, opt)
371
- $stderr.puts "using prefix #{opt.f} ..."
359
+ # if opt.f, then a prefix is assumed.
360
+ # if a file =~ /-prot.xml$/ then a precision plot based on probability is
361
+ # also created
362
+ def create_precision_data(files, opt)
363
+ #$stderr.puts "using prefix #{opt.f} ..."
372
364
 
373
365
  if opt.f
374
- prefix_arr = prefixes(opt.f, files.size)
366
+ prefix_arr = SpecID.extend_args(opt.f, files.size)
375
367
  end
376
368
  all_arrs = []
377
369
  key = []
@@ -384,12 +376,13 @@ Example:
384
376
  if opt.f
385
377
  (num_hits, ppv) = sp.num_hits_and_ppv_for_prob(prefix_arr[i])
386
378
  all_arrs[i] << [num_hits,ppv]
387
- key[i] << ["Precision", ["#TP", "Prec = TP/(TP+FP)"]]
388
- else
379
+ key[i] << ["Precision", ["# hits", "Prec (decoy)"]]
380
+ end
381
+ if file =~ /-prot\.xml$/
389
382
  ## These are just from protein prophet probabilities:
390
383
  (num_hits, ppv) = sp.num_hits_and_ppv_for_protein_prophet_probabilities
391
384
  all_arrs[i] << [num_hits,ppv]
392
- key[i] << ["Precision", ["#TP", "Prec = TP/(TP+FP)"]]
385
+ key[i] << ["Precision", ["# hits", "Prec (prob)"]]
393
386
  end
394
387
  end
395
388
 
data/lib/spec_id/proph.rb CHANGED
@@ -5,7 +5,6 @@ require 'instance_var_set_from_hash'
5
5
  require 'axml'
6
6
  require 'spec_id'
7
7
 
8
- class SpecID
9
8
  class Proph
10
9
 
11
10
 
@@ -20,6 +19,8 @@ end
20
19
 
21
20
 
22
21
  class ProtSummary
22
+ include SpecID
23
+
23
24
  attr_writer :prots
24
25
  attr_accessor :prot_groups
25
26
 
@@ -102,7 +103,8 @@ class ProtGroup
102
103
  end
103
104
  end
104
105
 
105
- class Prot < SpecID::Prot
106
+ class Prot
107
+ include SpecID::Prot
106
108
 
107
109
  ## probability and reference accessors are inherited
108
110
  attr_accessor :peps, :protein_name, :cutoff, :group_sibling_id, :n_indistinguishable_proteins, :percent_coverage, :unique_stripped_peptides, :total_number_peptides, :pct_spectrum_ids, :description
@@ -137,6 +139,7 @@ class Prot < SpecID::Prot
137
139
  end # class Prot
138
140
 
139
141
  class Pep
142
+ include SpecID::Pep
140
143
 
141
144
  attr_accessor :sequence, :probability, :filenames, :charge, :precursor_neutral_mass, :nsp_cutoff, :scans
142
145
  attr_writer :arithmetic_avg_scan_by_parent_time
@@ -458,4 +461,3 @@ end # Prot::Parser
458
461
  ################ --END
459
462
 
460
463
  end # Proph
461
- end # SpecID
@@ -0,0 +1,459 @@
1
+
2
+
3
+ require 'axml'
4
+ require 'hash_by'
5
+ require 'optparse'
6
+ require 'ostruct'
7
+ require 'spec_id'
8
+ require 'spec_id/precision'
9
+
10
+ #############################################################
11
+ # GLOBALS:
12
+ PRECISION_PROGRAM_BASE = 'precision'
13
+ DEF_PREFIX = "INV_"
14
+ DEF_PERCENT_FP = "5.0"
15
+ #############################################################
16
+
17
+
18
+ # @TODO: add group probability title (showin all group probabilities) for protein prob
19
+
20
+ #class String
21
+ # def margin
22
+ # self.gsub(/^\s*\|/,'')
23
+ # end
24
+ #end
25
+
26
+
27
+ class ProteinSummary
28
+ module HTML
29
+ def header
30
+ %Q{<html>
31
+ <head
32
+ #{style}
33
+ </head>
34
+ <body>
35
+ <script type="text/javascript">
36
+ <!--
37
+ function toggle_vis(id) {
38
+ var e = document.getElementById(id);
39
+ if(e.style.display == 'none')
40
+ e.style.display = 'block';
41
+ else
42
+ e.style.display = 'none';
43
+ }
44
+ //-->
45
+ </script>
46
+ }
47
+ end
48
+
49
+ def style
50
+ '
51
+ <style type="text/css">
52
+ table {
53
+ border-width:1px;
54
+ border-color:#DDDDDD;
55
+ border-collapse: collapse;
56
+ }
57
+ td,th {
58
+ padding-top: 2px;
59
+ padding-bottom: 2px;
60
+ padding-left: 5;
61
+ padding-right: 5;
62
+ }
63
+ td.redline {
64
+ background-color: #FF0000;
65
+ color: #FFFFFF
66
+ }
67
+ div.file_info, div.software, div.fppr, div.num_proteins{
68
+ margin-left: 20px;
69
+ margin-top: 20px;
70
+ }
71
+ div.main {
72
+ margin-left: 10px;
73
+ margin-right: 10px;
74
+ margin-top: 50px;
75
+ margin-bottom: 50px;
76
+ }
77
+ div#error {
78
+ margin: 30px;
79
+ text-align:center
80
+ }
81
+ hr {color: sienna}
82
+ body { font-size: 8pt; font-family: Arial,Helvetica,Times}
83
+ </style>
84
+ '
85
+ end
86
+
87
+ # an anchor and a title
88
+ def at(display, title)
89
+ "<a title=\"#{title}\">#{display}</a>"
90
+ end
91
+
92
+ def trailer
93
+ %q{
94
+ </body>
95
+ </html>
96
+ }
97
+ end
98
+
99
+ def tr
100
+ "|<tr>
101
+ | #{yield}
102
+ |</tr>\n".margin
103
+ end
104
+
105
+ def table
106
+ "|<div class=\"main\"><table align=\"center\" border=\"1\" style=\"font-size:100%\" width=\"800px\">
107
+ | #{yield}
108
+ |</table></div>\n".margin
109
+ end
110
+
111
+ def tds(arr)
112
+ arr.map {|v| "<td>#{v}</td>"}.join
113
+ end
114
+
115
+ def ths(arr)
116
+ str = arr.map {|v| "<th>#{v}</th>"}.join
117
+ str << "\n"
118
+ end
119
+ end
120
+
121
+ end
122
+
123
+
124
+ class ProteinSummary
125
+
126
+ include ProteinSummary::HTML
127
+
128
+ def ref_html(gi, name)
129
+ "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?db=protein&val=#{gi}\" title=\"#{name}\">#{gi}</a>"
130
+ end
131
+
132
+ # Takes the -prot.xml filename and grabs the png file (if available)
133
+ def error_info(prot_file_name)
134
+ img = prot_file_name.gsub('.xml', '.png')
135
+ img_bn = File.basename(img)
136
+ "<div id=\"error\"><img src=\"#{img_bn}\" alt=\"[ Optional: To view error/sensitivity image, put #{img_bn} in the same directory as #{File.basename(prot_file_name)} ]\"/>\n</div>"
137
+ end
138
+
139
+ # attempts to get the NCBI gi code
140
+ def accession(name)
141
+ if (name.include? '|') && (name[0,3] == 'gi|')
142
+ name.split('|')[1]
143
+ else
144
+ name
145
+ end
146
+ end
147
+
148
+ def prefix_to_regex(prefix)
149
+ if prefix
150
+ /^#{Regexp.escape(prefix)}/
151
+ else
152
+ nil
153
+ end
154
+ end
155
+
156
+ # given a list of proteins, output a tab delimited textfile with protein
157
+ # name and the total number of peptides found
158
+ def output_peptide_counts_file(prots, filename)
159
+ File.open(filename, "w") do |fh_out|
160
+ prots.each do |prot|
161
+ fh_out.puts [prot._protein_name, prot._total_number_peptides].join("\t")
162
+ end
163
+ end
164
+ end
165
+
166
+ # filters on the false positive regex and sorts by prot probability
167
+ def filter_and_sort(uniq_prots, prefix=nil)
168
+ prefix_re = prefix_to_regex(prefix)
169
+ sorted = uniq_prots.sort_by {|prt| [prt._probability, prt.parent._probability]}.reverse
170
+ ## filter on prefix
171
+ if prefix
172
+ sorted = sorted.reject {|prot| prot._protein_name =~ prefix_re }
173
+ end
174
+ sorted
175
+ end
176
+
177
+ # assumes that these are sorted on probability
178
+ # desired_fppr is a float
179
+ # returns [number_of_prots, actual_fppr]
180
+ def num_prots_above_fppr(prots, desired_fppr)
181
+ current_fppr_rate_percent = 0.0
182
+ previous_fppr_rate_percent = 0.0
183
+ current_sum_one_minus_prob = 0.0
184
+ proteins_within_fppr = 0
185
+ actual_fppr = nil
186
+ already_found = false
187
+ prot_cnt = 0
188
+ prots.each do |prot|
189
+ prot_cnt += 1
190
+ # SUM(1-probX)/#prots
191
+ current_sum_one_minus_prob += 1.0 - prot._probability.to_f
192
+ current_fppr_rate_percent = (current_sum_one_minus_prob / prot_cnt) * 100
193
+
194
+ if current_fppr_rate_percent > desired_fppr && !already_found
195
+ actual_fppr = previous_fppr_rate_percent
196
+ proteins_within_fppr = prot_cnt
197
+ already_found = true
198
+ end
199
+ previous_fppr_rate_percent = current_fppr_rate_percent
200
+ end
201
+ [proteins_within_fppr, actual_fppr]
202
+ end
203
+
204
+ #### #readable_previous_fppr_rate_percent = sprintf("%.2f", previous_fppr_rate_percent)
205
+
206
+ # returns a string of the table rows
207
+ # false_positive_rate (give as a %) is the cutoff mark
208
+ # returns the number of proteins at the desired_fppr (if given)
209
+ def table_rows(uniq_prots, prefix, false_positive_rate_percent, num_cols, desired_fppr, actual_percent_fp, peptide_count_filename=nil)
210
+ prot_cnt = 0
211
+ uniq_prots.map do |prot|
212
+ tr do
213
+ prot_cnt += 1
214
+ gi = accession(prot._protein_name)
215
+ tds([prot_cnt, prot._probability, ref_html(gi, prot._protein_name), prot.annotation.first._protein_description, prot._percent_coverage, peptide_cell(prot_cnt, prot._unique_stripped_peptides.split('+')), prot._total_number_peptides, prot._pct_spectrum_ids])
216
+ end
217
+ end.join
218
+ end
219
+
220
+ def print_html_pieces(file, *pieces)
221
+ File.open(file, "w") do |out|
222
+ pieces.each do |piece|
223
+ out.print piece
224
+ end
225
+ end
226
+ end
227
+
228
+ def file_info(file)
229
+ "<div class=\"file_info\"><h3>Source File Information</h3>File: #{File.expand_path(file)}
230
+ <br/>Last Modified: #{File.mtime(file)}
231
+ <br/>Size: #{File.size(file)/1000} KB
232
+ </div>"
233
+ end
234
+
235
+ def bioworks_script_info(obj)
236
+ version = "3.2??"
237
+ if obj.version
238
+ version = obj.version
239
+ end
240
+ script_info{"Bioworks version #{version}"}
241
+ end
242
+
243
+ def protproph_script_info
244
+ begin
245
+ where = `which xinteract`
246
+ reply = `#{where}`
247
+ rescue Exception
248
+ reply = ""
249
+ end
250
+ prophet = "TPP (version unknown)" # put your version here if you can't get it dynamically
251
+ if reply =~ /xinteract.*?\((TPP .*)\)/
252
+ prophet = $1.dup
253
+ end
254
+ script_info { "ProteinProphet from: #{prophet}" }
255
+ end
256
+
257
+ def mspire_version
258
+ string = "mspire"
259
+ begin
260
+ if `gem list --local mspire` =~ /mspire \((.*?)\)/
261
+ string << (" v" + $1)
262
+ end
263
+ rescue Exception
264
+ end
265
+ string
266
+ end
267
+
268
+ def script_info
269
+ "<div class=\"software\"><h3>Software Information</h3>#{yield}<br/>Ruby package: #{mspire_version}<br/>Command: #{[File.basename(__FILE__), *@orig_argv].join(" ")}</div>"
270
+ end
271
+
272
+ def proph_output(file, outfn, opt, fppr_output_as_html)
273
+ header_anchors = [at('#', 'number'), at('prob','protein probability (for Prophet, higher is better)'), at('ref', 'gi number if available (or complete reference)'), at('annotation', 'annotation from the fasta file'), at('%cov', 'percent of protein sequence covered by corresponding peptides'), at('peps', 'unique peptides identified (includes non-contributing peptides). Click number to show/hide'), at('#peps', 'total number of corresponding peptides that contributed to protein probability'), at('%ids', 'fraction of correct dataset peptide identifications corresponding to protein')]
274
+ num_cols = header_anchors.size
275
+ theaders = ths(header_anchors)
276
+
277
+ root = AXML.parse_file(file)
278
+ prots = []
279
+ ## find the min_prob at a fppr of XX
280
+ min_prob_redline = 1.01 # if no fppr is less than what they give, then all are redlined!
281
+
282
+ if opt.c
283
+ actual_percent_fp = opt.c.to_f
284
+ elsif opt.cut_at
285
+ actual_percent_fp = opt.cut_at.to_f
286
+ else
287
+ actual_percent_fp = nil
288
+ end
289
+ root.protein_group.each do |group|
290
+ group.protein.each do |prt|
291
+ prots << prt
292
+ end
293
+ end
294
+ uniq_prots = prots.hash_by(:_protein_name).map{|name,prot_arr| prot_arr.first }
295
+ filtered_sorted_prots = filter_and_sort(uniq_prots, opt.f)
296
+
297
+ ## num proteins above cutoff (if opt.c)
298
+ num_prots_html = ''
299
+ if opt.c || opt.cut_at
300
+ (num_prots, actual_fppr) = num_prots_above_fppr(filtered_sorted_prots, actual_percent_fp)
301
+ num_prots_html = num_prots_to_html(actual_percent_fp, actual_fppr, num_prots)
302
+ end
303
+ if opt.cut_at
304
+ filtered_sorted_prots = filtered_sorted_prots[0,num_prots]
305
+ end
306
+
307
+ output_peptide_counts_file(filtered_sorted_prots, opt.peptide_count) if opt.peptide_count
308
+
309
+ table_string = table do
310
+ tr{theaders} + table_rows(filtered_sorted_prots, opt.f, actual_percent_fp, num_cols, opt.c.to_f, actual_percent_fp, opt.peptide_count)
311
+ end
312
+ er_info = opt.precision ? error_info(file) : ""
313
+ html_pieces = [outfn, header, fppr_output_as_html, er_info, file_info(file), protproph_script_info, num_prots_html, table_string, trailer]
314
+ print_html_pieces(*html_pieces)
315
+ end # proph_output
316
+
317
+ # given a list of peptide sequences creates javascript to hide/show them
318
+ def peptide_cell(prot_num, peptide_sequences)
319
+ "<a href=\"#prot#{prot_num}\" onclick=\"toggle_vis('#{prot_num}');\">#{peptide_sequences.size}</a><div id=\"#{prot_num}\" style=\"display:none;\">#{peptide_sequences.join(', ')}</div>"
320
+ end
321
+
322
+ # takes spec_id object
323
+ # the outfn is the output filename
324
+ # opt is an OpenStruct that holds opt.f = the false prefix
325
+ def bioworks_output(spec_id, outfn, file=nil, false_prefix=nil, fppr_output_as_html=nil)
326
+ fppr_output_as_html ||= ''
327
+ header_anchors = [at('#', 'number'), at('prob','protein probability (for Bioworks, lower is better)'), at('ref', 'gi number if available (or complete reference)'), at('annotation', 'annotation from the fasta file'), at('%cov', 'percent of protein sequence covered by corresponding peptides'), at('peps', 'unique peptides identified (at any confidence) Click number to show/hide.'), at('#peps', 'total number of peptides seen (not unique)')]
328
+ num_cols = header_anchors.size
329
+ theaders = ths(header_anchors)
330
+ proteins = spec_id.prots
331
+ protein_num = 0
332
+ rows = ""
333
+ prefix_re = prefix_to_regex(false_prefix)
334
+ proteins.each do |prot|
335
+ if false_prefix && prot.reference =~ prefix_re
336
+ next
337
+ end
338
+ uniq_peps = Hash.new {|h,k| h[k] = true; }
339
+ protein_num += 1
340
+ prot.peps.each do |pep|
341
+ uniq_peps[pep.sequence.split('.')[1]] = true
342
+ end
343
+ pieces = prot.reference.split(' ')
344
+ long_prot_name = pieces.shift
345
+ annotation = pieces.join(' ')
346
+ accession = prot.accession
347
+ if accession == '0' ; accession = long_prot_name end
348
+ rows << tr{ tds([protein_num, prot.protein_probability, ref_html(accession, long_prot_name), annotation, prot.coverage, peptide_cell(protein_num, uniq_peps.keys), prot.peps.size]) }
349
+ end
350
+ table_string = table do
351
+ tr{theaders} + rows
352
+ end
353
+ print_html_pieces(outfn, header, fppr_output_as_html, file_info(file), bioworks_script_info(spec_id), table_string, trailer)
354
+ end # bioworks_output
355
+
356
+ def num_prots_to_html(desired_cutoff, actual_cutoff, num_proteins)
357
+ actual_cutoff = sprintf("%.3f", actual_cutoff)
358
+ desired_cutoff = sprintf("%.3f", desired_cutoff)
359
+ "<div class=\"num_proteins\"><h3>False Positive Predictive Rate [ FP/(TP+FP) ]</h3>
360
+ Desired FPPR: #{desired_cutoff} %<br/>
361
+ Actual FPPR: #{actual_cutoff} %<br/>
362
+ Number of Proteins at Actual FPPR: #{num_proteins}
363
+ </div>"
364
+ end
365
+
366
+ # transforms the output string of file_as_decoy into html
367
+ def file_as_decoy_to_html(string)
368
+ lines = string.split("\n")
369
+ #puts lines ?? is this supposed to be commented out?
370
+ lines = lines.reject do |obj| obj =~ /\*{10}/ end
371
+ lines.map! do |line| "#{line}<br/>" end
372
+ "<div class=\"fppr\">
373
+ <h3>Classification Analysis</h3>
374
+ #{lines.join("\n")}
375
+ </div>"
376
+ end
377
+
378
+ # transforms the output string of file_as_decoy into html
379
+ def prefix_as_decoy_to_html(string)
380
+ "<div class=\"fppr\">
381
+ <h3>Classification Analysis</h3>
382
+ </div>" +
383
+ string
384
+ end
385
+
386
+ def create_from_command_line_args(argv)
387
+ @orig_argv = argv.dup
388
+
389
+ opt = OpenStruct.new
390
+ opt.f = DEF_PREFIX
391
+ opts = OptionParser.new do |op|
392
+ op.banner = "usage: #{File.basename(__FILE__)} [options] <file>.xml ..."
393
+ op.separator " where file = bioworks -or- <run>-prot (prophet output)"
394
+ op.separator " outputs: <file>.summary.html"
395
+ op.separator ""
396
+ op.on("-f", "--false <prefix>", "ignore proteins with prefix (def: #{DEF_PREFIX})") {|v| opt.f = v }
397
+ op.on("-p", "--precision", "include the output from precision.rb") {|v| opt.p = v }
398
+ op.separator(" if --precision then -f is used to specify a file or prefix")
399
+ op.separator(" that indicates the false positives.")
400
+ op.on("--peptide_count <filename>", "outputs text file with # peptides per protein") {|v| opt.peptide_count = v}
401
+ op.separator ""
402
+ op.separator "Options for #{PRECISION_PROGRAM_BASE}.rb :"
403
+ op.on("--#{PRECISION_PROGRAM_BASE}", "include output of #{PRECISION_PROGRAM_BASE}.rb,") {|v| opt.precision = v}
404
+ op.separator(" type '#{PRECISION_PROGRAM_BASE}.rb' for details")
405
+ op.separator ""
406
+ op.separator "Specific to ProteinProphet (with no concatenated DB):"
407
+ op.on("-c", "--cutoff percent", "false positive predictive rate (FPPR)% for given cutoff") {|v| opt.c = v }
408
+ op.on("--cut_at percent", "only reports proteins within FPPR %") {|v| opt.cut_at = v }
409
+ end
410
+
411
+ opts.parse!(argv)
412
+
413
+ if argv.size < 1
414
+ puts opts
415
+ return
416
+ end
417
+
418
+ fppr_output_as_html = ''
419
+ files = argv.to_a
420
+ files.each do |file|
421
+ outfn = file.sub(/\.xml$/, '.summary.html')
422
+ outfn = outfn.sub(/\.srg$/, '.summary.html')
423
+ ## False Positive Rate Calculation:
424
+ if opt.precision
425
+ opt.o = outfn # won't actually be written over, but used
426
+ to_use_argv = create_precision_argv(file, opt)
427
+ (out_string, opt) = Prec.new.precision(to_use_argv)
428
+ fppr_output_as_html = prefix_as_decoy_to_html(out_string)
429
+ end
430
+
431
+ case SpecID.file_type(file)
432
+ when "protproph"
433
+ #spec_id = SpecID.new(file)
434
+ proph_output(file, outfn, opt, fppr_output_as_html)
435
+ when "bioworks"
436
+ spec_id = SpecID.new(file)
437
+ bioworks_output(spec_id, outfn, file, opt.f, fppr_output_as_html)
438
+ else
439
+ abort "filetype for #{file} not recognized!"
440
+ end
441
+ end
442
+
443
+ end # method create_from_command_line
444
+
445
+ def create_precision_argv(file, opt)
446
+ # include only those options specific
447
+ new_argv = [file]
448
+ if opt.f ; new_argv << '-f' << opt.f end
449
+ if opt.o ; new_argv << '-o' << opt.o end
450
+ new_argv
451
+ end
452
+
453
+ end # ProteinSummary
454
+
455
+ ##################################################################
456
+ # MAIN
457
+ ##################################################################
458
+
459
+