mspire 0.1.7 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. data/Rakefile +41 -14
  2. data/bin/bioworks2excel.rb +1 -1
  3. data/bin/bioworks_to_pepxml.rb +46 -59
  4. data/bin/fasta_shaker.rb +1 -1
  5. data/bin/filter.rb +6 -0
  6. data/bin/find_aa_freq.rb +23 -0
  7. data/bin/id_precision.rb +3 -2
  8. data/bin/mzxml_to_lmat.rb +2 -1
  9. data/bin/pepproph_filter.rb +1 -1
  10. data/bin/precision.rb +1 -1
  11. data/bin/protein_summary.rb +2 -451
  12. data/bin/raw_to_mzXML.rb +55 -0
  13. data/bin/srf_group.rb +26 -0
  14. data/changelog.txt +7 -0
  15. data/lib/align.rb +3 -3
  16. data/lib/fasta.rb +6 -1
  17. data/lib/gi.rb +9 -4
  18. data/lib/roc.rb +2 -0
  19. data/lib/sample_enzyme.rb +2 -1
  20. data/lib/spec/mzxml/parser.rb +2 -43
  21. data/lib/spec/mzxml.rb +65 -2
  22. data/lib/spec_id/aa_freqs.rb +10 -7
  23. data/lib/spec_id/bioworks.rb +67 -87
  24. data/lib/spec_id/filter.rb +794 -0
  25. data/lib/spec_id/precision.rb +29 -36
  26. data/lib/spec_id/proph.rb +5 -3
  27. data/lib/spec_id/protein_summary.rb +459 -0
  28. data/lib/spec_id/sequest.rb +323 -271
  29. data/lib/spec_id/srf.rb +189 -135
  30. data/lib/spec_id.rb +276 -227
  31. data/lib/spec_id_xml.rb +101 -0
  32. data/lib/toppred.rb +18 -0
  33. data/script/degenerate_peptides.rb +47 -0
  34. data/script/filter-peps.rb +5 -1
  35. data/test/tc_align.rb +1 -1
  36. data/test/tc_bioworks.rb +25 -22
  37. data/test/tc_bioworks_to_pepxml.rb +37 -4
  38. data/test/tc_fasta.rb +3 -1
  39. data/test/tc_fasta_shaker.rb +8 -6
  40. data/test/tc_filter.rb +203 -0
  41. data/test/tc_gi.rb +6 -9
  42. data/test/tc_id_precision.rb +31 -0
  43. data/test/tc_mzxml.rb +8 -6
  44. data/test/tc_peptide_parent_times.rb +2 -1
  45. data/test/tc_precision.rb +1 -1
  46. data/test/tc_proph.rb +5 -5
  47. data/test/tc_protein_summary.rb +36 -13
  48. data/test/tc_sequest.rb +78 -33
  49. data/test/tc_spec_id.rb +128 -6
  50. data/test/tc_srf.rb +84 -38
  51. metadata +67 -62
  52. data/bin/fasta_cat.rb +0 -39
  53. data/bin/fasta_cat_mod.rb +0 -59
  54. data/bin/fasta_mod.rb +0 -57
  55. data/bin/filter_spec_id.rb +0 -365
  56. data/bin/raw2mzXML.rb +0 -21
  57. data/script/gen_database_searching.rb +0 -258
@@ -1,455 +1,6 @@
1
1
  #!/usr/bin/ruby -w
2
2
 
3
- require 'axml'
4
- require 'hash_by'
5
- require 'optparse'
6
- require 'ostruct'
7
- require 'spec_id'
8
-
9
- #############################################################
10
- # GLOBALS:
11
- PRECISION_PROGRAM_BASE = 'precision'
12
- DEF_PREFIX = "INV_"
13
- DEF_PERCENT_FP = "5.0"
14
- #############################################################
15
-
16
-
17
- # @TODO: add group probability title (showin all group probabilities) for protein prob
18
-
19
- #class String
20
- # def margin
21
- # self.gsub(/^\s*\|/,'')
22
- # end
23
- #end
24
-
25
-
26
- class Runner
27
- module HTML
28
- def header
29
- %Q{<html>
30
- <head>
31
- #{style}
32
- </head>
33
- <body>
34
- <script type="text/javascript">
35
- <!--
36
- function toggle_vis(id) {
37
- var e = document.getElementById(id);
38
- if(e.style.display == 'none')
39
- e.style.display = 'block';
40
- else
41
- e.style.display = 'none';
42
- }
43
- //-->
44
- </script>
45
- }
46
- end
47
-
48
- def style
49
- '
50
- <style type="text/css">
51
- table {
52
- border-width:1px;
53
- border-color:#DDDDDD;
54
- border-collapse: collapse;
55
- }
56
- td,th {
57
- padding-top: 2px;
58
- padding-bottom: 2px;
59
- padding-left: 5;
60
- padding-right: 5;
61
- }
62
- td.redline {
63
- background-color: #FF0000;
64
- color: #FFFFFF
65
- }
66
- div.file_info, div.software, div.fppr, div.num_proteins{
67
- margin-left: 20px;
68
- margin-top: 20px;
69
- }
70
- div.main {
71
- margin-left: 10px;
72
- margin-right: 10px;
73
- margin-top: 50px;
74
- margin-bottom: 50px;
75
- }
76
- div#error {
77
- margin: 30px;
78
- text-align:center
79
- }
80
- hr {color: sienna}
81
- body { font-size: 8pt; font-family: Arial,Helvetica,Times}
82
- </style>
83
- '
84
- end
85
-
86
- # an anchor and a title
87
- def at(display, title)
88
- "<a title=\"#{title}\">#{display}</a>"
89
- end
90
-
91
- def trailer
92
- %q{
93
- </body>
94
- </html>
95
- }
96
- end
97
-
98
- def tr
99
- "|<tr>
100
- | #{yield}
101
- |</tr>\n".margin
102
- end
103
-
104
- def table
105
- "|<div class=\"main\"><table align=\"center\" border=\"1\" style=\"font-size:100%\" width=\"800px\">
106
- | #{yield}
107
- |</table></div>\n".margin
108
- end
109
-
110
- def tds(arr)
111
- arr.map {|v| "<td>#{v}</td>"}.join
112
- end
113
-
114
- def ths(arr)
115
- str = arr.map {|v| "<th>#{v}</th>"}.join
116
- str << "\n"
117
- end
118
- end
119
-
120
- end
121
-
122
-
123
- class Runner
124
-
125
- include Runner::HTML
126
-
127
- def ref_html(gi, name)
128
- "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?db=protein&val=#{gi}\" title=\"#{name}\">#{gi}</a>"
129
- end
130
-
131
- # Takes the -prot.xml filename and grabs the png file (if available)
132
- def error_info(prot_file_name)
133
- img = prot_file_name.gsub('.xml', '.png')
134
- img_bn = File.basename(img)
135
- "<div id=\"error\"><img src=\"#{img_bn}\" alt=\"[ Optional: To view error/sensitivity image, put #{img_bn} in the same directory as #{File.basename(prot_file_name)} ]\"/>\n</div>"
136
- end
137
-
138
- # attempts to get the NCBI gi code
139
- def accession(name)
140
- if (name.include? '|') && (name[0,3] == 'gi|')
141
- name.split('|')[1]
142
- else
143
- name
144
- end
145
- end
146
-
147
- def prefix_to_regex(prefix)
148
- if prefix
149
- /^#{Regexp.escape(prefix)}/
150
- else
151
- nil
152
- end
153
- end
154
-
155
- # given a list of proteins, output a tab delimited textfile with protein
156
- # name and the total number of peptides found
157
- def output_peptide_counts_file(prots, filename)
158
- File.open(filename, "w") do |fh_out|
159
- prots.each do |prot|
160
- fh_out.puts [prot._protein_name, prot._total_number_peptides].join("\t")
161
- end
162
- end
163
- end
164
-
165
- # filters on the false positive regex and sorts by prot probability
166
- def filter_and_sort(uniq_prots, prefix=nil)
167
- prefix_re = prefix_to_regex(prefix)
168
- sorted = uniq_prots.sort_by {|prt| [prt._probability, prt.parent._probability]}.reverse
169
- ## filter on prefix
170
- if prefix
171
- sorted = sorted.reject {|prot| prot.reference =~ prefix_re }
172
- end
173
- sorted
174
- end
175
-
176
- # assumes that these are sorted on probability
177
- # desired_fppr is a float
178
- # returns [number_of_prots, actual_fppr]
179
- def num_prots_above_fppr(prots, desired_fppr)
180
- current_fppr_rate_percent = 0.0
181
- previous_fppr_rate_percent = 0.0
182
- current_sum_one_minus_prob = 0.0
183
- proteins_within_fppr = 0
184
- actual_fppr = nil
185
- already_found = false
186
- prot_cnt = 0
187
- prots.each do |prot|
188
- prot_cnt += 1
189
- # SUM(1-probX)/#prots
190
- current_sum_one_minus_prob += 1.0 - prot._probability.to_f
191
- current_fppr_rate_percent = (current_sum_one_minus_prob / prot_cnt) * 100
192
-
193
- if current_fppr_rate_percent > desired_fppr && !already_found
194
- actual_fppr = previous_fppr_rate_percent
195
- proteins_within_fppr = prot_cnt
196
- already_found = true
197
- end
198
- previous_fppr_rate_percent = current_fppr_rate_percent
199
- end
200
- [proteins_within_fppr, actual_fppr]
201
- end
202
-
203
- #### #readable_previous_fppr_rate_percent = sprintf("%.2f", previous_fppr_rate_percent)
204
-
205
- # returns a string of the table rows
206
- # false_positive_rate (give as a %) is the cutoff mark
207
- # returns the number of proteins at the desired_fppr (if given)
208
- def table_rows(uniq_prots, prefix, false_positive_rate_percent, num_cols, desired_fppr, actual_percent_fp, peptide_count_filename=nil)
209
- prot_cnt = 0
210
- uniq_prots.map do |prot|
211
- tr do
212
- prot_cnt += 1
213
- gi = accession(prot._protein_name)
214
- tds([prot_cnt, prot._probability, ref_html(gi, prot._protein_name), prot.annotation.first._protein_description, prot._percent_coverage, peptide_cell(prot_cnt, prot._unique_stripped_peptides.split('+')), prot._total_number_peptides, prot._pct_spectrum_ids])
215
- end
216
- end.join
217
- end
218
-
219
- def print_html_pieces(file, *pieces)
220
- File.open(file, "w") do |out|
221
- pieces.each do |piece|
222
- out.print piece
223
- end
224
- end
225
- end
226
-
227
- def file_info(file)
228
- "<div class=\"file_info\"><h3>Source File Information</h3>File: #{File.expand_path(file)}
229
- <br/>Last Modified: #{File.mtime(file)}
230
- <br/>Size: #{File.size(file)/1000} KB
231
- </div>"
232
- end
233
-
234
- def bioworks_script_info(obj)
235
- version = "3.2??"
236
- if obj.version
237
- version = obj.version
238
- end
239
- script_info{"Bioworks version #{version}"}
240
- end
241
-
242
- def protproph_script_info
243
- begin
244
- where = `which xinteract`
245
- reply = `#{where}`
246
- rescue Exception
247
- reply = ""
248
- end
249
- prophet = "TPP (version unknown)" # put your version here if you can't get it dynamically
250
- if reply =~ /xinteract.*?\((TPP .*)\)/
251
- prophet = $1.dup
252
- end
253
- script_info { "ProteinProphet from: #{prophet}" }
254
- end
255
-
256
- def mspire_version
257
- string = "mspire"
258
- begin
259
- if `gem list --local mspire` =~ /mspire \((.*?)\)/
260
- string << (" v" + $1)
261
- end
262
- rescue Exception
263
- end
264
- string
265
- end
266
-
267
- def script_info
268
- "<div class=\"software\"><h3>Software Information</h3>#{yield}<br/>Ruby package: #{mspire_version}<br/>Command: #{[File.basename(__FILE__), *@orig_argv].join(" ")}</div>"
269
- end
270
-
271
- def proph_output(file, outfn, opt, fppr_output_as_html)
272
- header_anchors = [at('#', 'number'), at('prob','protein probability (for Prophet, higher is better)'), at('ref', 'gi number if available (or complete reference)'), at('annotation', 'annotation from the fasta file'), at('%cov', 'percent of protein sequence covered by corresponding peptides'), at('peps', 'unique peptides identified (includes non-contributing peptides). Click number to show/hide'), at('#peps', 'total number of corresponding peptides that contributed to protein probability'), at('%ids', 'fraction of correct dataset peptide identifications corresponding to protein')]
273
- num_cols = header_anchors.size
274
- theaders = ths(header_anchors)
275
-
276
- root = AXML.parse_file(file)
277
- prots = []
278
- ## find the min_prob at a fppr of XX
279
- min_prob_redline = 1.01 # if no fppr is less than what they give, then all are redlined!
280
-
281
- if opt.c
282
- actual_percent_fp = opt.c.to_f
283
- elsif opt.cut_at
284
- actual_percent_fp = opt.cut_at.to_f
285
- else
286
- actual_percent_fp = nil
287
- end
288
- root.protein_group.each do |group|
289
- group.protein.each do |prt|
290
- prots << prt
291
- end
292
- end
293
- uniq_prots = prots.hash_by(:_protein_name).map{|name,prot_arr| prot_arr.first }
294
- filtered_sorted_prots = filter_and_sort(uniq_prots, opt.f)
295
-
296
- ## num proteins above cutoff (if opt.c)
297
- num_prots_html = ''
298
- if opt.c || opt.cut_at
299
- (num_prots, actual_fppr) = num_prots_above_fppr(filtered_sorted_prots, actual_percent_fp)
300
- num_prots_html = num_prots_to_html(actual_percent_fp, actual_fppr, num_prots)
301
- end
302
- if opt.cut_at
303
- filtered_sorted_prots = filtered_sorted_prots[0,num_prots]
304
- end
305
-
306
- output_peptide_counts_file(filtered_sorted_prots, opt.peptide_count) if opt.peptide_count
307
-
308
- table_string = table do
309
- tr{theaders} + table_rows(filtered_sorted_prots, opt.f, actual_percent_fp, num_cols, opt.c.to_f, actual_percent_fp, opt.peptide_count)
310
- end
311
- er_info = opt.precision ? error_info(file) : ""
312
- html_pieces = [outfn, header, fppr_output_as_html, er_info, file_info(file), protproph_script_info, num_prots_html, table_string, trailer]
313
- print_html_pieces(*html_pieces)
314
- end # proph_output
315
-
316
- # given a list of peptide sequences creates javascript to hide/show them
317
- def peptide_cell(prot_num, peptide_sequences)
318
- "<a href=\"#prot#{prot_num}\" onclick=\"toggle_vis('#{prot_num}');\">#{peptide_sequences.size}</a><div id=\"#{prot_num}\" style=\"display:none;\">#{peptide_sequences.join(', ')}</div>"
319
- end
320
-
321
- def bioworks_output(file, outfn, opt, fppr_output_as_html)
322
- header_anchors = [at('#', 'number'), at('prob','protein probability (for Bioworks, lower is better)'), at('ref', 'gi number if available (or complete reference)'), at('annotation', 'annotation from the fasta file'), at('%cov', 'percent of protein sequence covered by corresponding peptides'), at('peps', 'unique peptides identified (at any confidence) Click number to show/hide.'), at('#peps', 'total number of peptides seen (not unique)')]
323
- num_cols = header_anchors.size
324
- theaders = ths(header_anchors)
325
- bio_obj = SpecID.new(file)
326
- proteins = bio_obj.prots
327
- protein_num = 0
328
- rows = ""
329
- prefix_re = prefix_to_regex(opt.f)
330
- proteins.each do |prot|
331
- if opt.f && prot.reference =~ prefix_re
332
- next
333
- end
334
- uniq_peps = Hash.new {|h,k| h[k] = true; }
335
- protein_num += 1
336
- prot.peps.each do |pep|
337
- uniq_peps[pep.sequence.split('.')[1]] = true
338
- end
339
- pieces = prot.reference.split(' ')
340
- long_prot_name = pieces.shift
341
- annotation = pieces.join(' ')
342
- accession = prot.accession
343
- if accession == '0' ; accession = long_prot_name end
344
- rows << tr{ tds([protein_num, prot.protein_probability, ref_html(accession, long_prot_name), annotation, prot.coverage, peptide_cell(protein_num, uniq_peps.keys), prot.peps.size]) }
345
- end
346
- table_string = table do
347
- tr{theaders} + rows
348
- end
349
- print_html_pieces(outfn, header, fppr_output_as_html, file_info(file), bioworks_script_info(bio_obj), table_string, trailer)
350
- end # bioworks_output
351
-
352
- def num_prots_to_html(desired_cutoff, actual_cutoff, num_proteins)
353
- actual_cutoff = sprintf("%.3f", actual_cutoff)
354
- desired_cutoff = sprintf("%.3f", desired_cutoff)
355
- "<div class=\"num_proteins\"><h3>False Positive Rate Information</h3>
356
- Desired FPR: #{desired_cutoff} %<br/>
357
- Actual FPR: #{actual_cutoff} %<br/>
358
- Number of Proteins at Actual FPR: #{num_proteins}
359
- </div>"
360
- end
361
-
362
- # transforms the output string of file_as_decoy into html
363
- def file_as_decoy_to_html(string)
364
- lines = string.split("\n")
365
- #puts lines ?? is this supposed to be commented out?
366
- lines = lines.reject do |obj| obj =~ /\*{10}/ end
367
- lines.map! do |line| "#{line}<br/>" end
368
- "<div class=\"fppr\">
369
- <h3>Classification Analysis</h3>
370
- #{lines.join("\n")}
371
- </div>"
372
- end
373
-
374
- # transforms the output string of file_as_decoy into html
375
- def prefix_as_decoy_to_html(string)
376
- "<div class=\"fppr\">
377
- <h3>Classification Analysis</h3>
378
- </div>" +
379
- string
380
- end
381
-
382
- def go(argv)
383
- @orig_argv = argv.dup
384
- dup_argv = argv.dup
385
-
386
- opt = OpenStruct.new
387
- opt.f = DEF_PREFIX
388
- opts = OptionParser.new do |op|
389
- op.banner = "usage: #{File.basename(__FILE__)} [options] <file>.xml ..."
390
- op.separator " where file = bioworks -or- <run>-prot (prophet output)"
391
- op.separator " outputs: <file>.summary.html"
392
- op.separator ""
393
- op.on("-f", "--false <prefix>", "ignore proteins with prefix (def: #{DEF_PREFIX})") {|v| opt.f = v }
394
- op.on("-p", "--precision", "include the output from precision.rb") {|v| opt.p = v }
395
- op.separator(" if --precision then -f is used to specify a file or prefix")
396
- op.separator(" that indicates the false positives.")
397
- op.on("--peptide_count <filename>", "outputs text file with # peptides per protein") {|v| opt.peptide_count = v}
398
- op.separator ""
399
- op.separator "Options for #{PRECISION_PROGRAM_BASE}.rb :"
400
- op.on("--#{PRECISION_PROGRAM_BASE}", "include output of #{PRECISION_PROGRAM_BASE}.rb,") {|v| opt.precision = v}
401
- op.separator(" type '#{PRECISION_PROGRAM_BASE}.rb' for details")
402
- op.separator ""
403
- op.separator "Specific to ProteinProphet (with no concatenated DB):"
404
- op.on("-c", "--cutoff percent", "includes FPR summary at given cutoff") {|v| opt.c = v }
405
- op.on("--cut_at percent", "only reports proteins within FPR percent") {|v| opt.cut_at = v }
406
- end
407
-
408
- opts.parse!
409
-
410
- if argv.size < 1
411
- puts opts
412
- exit
413
- end
414
-
415
- fppr_output_as_html = ''
416
- files = argv.to_a
417
- files.each do |file|
418
- outfn = file.gsub(/\.xml$/, '.summary.html')
419
- ## False Positive Rate Calculation:
420
- if opt.precision
421
- opt.o = outfn # won't actually be written over, but used
422
- to_use_argv = create_precision_argv(file, opt)
423
- (out_string, opt) = SpecID::Precision.new.precision(to_use_argv)
424
- fppr_output_as_html = prefix_as_decoy_to_html(out_string)
425
- end
426
-
427
- case SpecID.file_type(file)
428
- when "protproph"
429
- proph_output(file, outfn, opt, fppr_output_as_html)
430
- when "bioworks"
431
- bioworks_output(file, outfn, opt, fppr_output_as_html)
432
- else
433
- abort "filetype for #{file} not recognized!"
434
- end
435
- end
436
-
437
- end # method go
438
-
439
- def create_precision_argv(file, opt)
440
- # include only those options specific
441
- new_argv = [file]
442
- if opt.f ; new_argv << '-f' << opt.f end
443
- if opt.o ; new_argv << '-o' << opt.o end
444
- new_argv
445
- end
446
-
447
- end # Runner
448
-
449
- ##################################################################
450
- # MAIN
451
- ##################################################################
452
-
453
- Runner.new.go(ARGV)
3
+ require 'spec_id/protein_summary'
454
4
 
5
+ ProteinSummary.new.create_from_command_line_args(ARGV)
455
6
 
@@ -0,0 +1,55 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ require 'optparse'
4
+ require 'spec/mzxml'
5
+ require 'fileutils'
6
+
7
+ progname = File.basename(__FILE__)
8
+
9
+
10
+ opt = {}
11
+ opts = OptionParser.new do |op|
12
+ op.banner = "usage: #{progname} [OPTIONS] <file>.RAW ..."
13
+ op.separator ""
14
+ op.on("-p", "--profile", "uses profile output instead of centroid (default)") {|v| opt[:profile] = v}
15
+ end
16
+
17
+ opts.parse!
18
+
19
+ if ARGV.size == 0
20
+ puts opts
21
+ exit
22
+ end
23
+
24
+ converter = Spec::MzXML.find_mzxml_converter
25
+ if converter
26
+ $stderr.puts "using #{converter} to convert files"
27
+ else
28
+ puts "cannot find [#{Spec::MzXML::Potential_mzxml_converters.join(', ')}] in the paths:"
29
+ puts ENV['PATH'].split(/[:;]/).join(", ")
30
+ abort
31
+ end
32
+
33
+ files = ARGV.to_a
34
+ files.each do |file|
35
+ puts "******************************************"
36
+ puts "Converting: #{file}"
37
+ if converter =~ /readw/
38
+ centroid_or_profile = 'c'
39
+ if opt[:profile]
40
+ centroid_or_profile = 'p'
41
+ end
42
+ outfile = file.sub(/\.RAW$/i, '.mzXML')
43
+ cmd = "#{converter} #{file} #{centroid_or_profile} #{outfile}"
44
+ puts "Performing: '#{cmd}'"
45
+ puts `#{cmd}`
46
+ else
47
+ ## t2x only outputs in cwd!
48
+ Dir.chdir(File.dirname(file)) do |dir|
49
+ puts "Performing: '#{cmd}' in #{dir}"
50
+ puts `#{cmd}`
51
+ system "#{converter} #{File.basename(file)}"
52
+ end
53
+ end
54
+ puts "******************************************"
55
+ end
data/bin/srf_group.rb ADDED
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/ruby
2
+
3
+
4
+ require 'optparse'
5
+ require 'spec_id/srf'
6
+
7
+ $OUTFILE = 'bioworks.srg'
8
+
9
+ opts = OptionParser.new do |op|
10
+ op.banner = "usage: #{File.basename(__FILE__)} <file1>.srf <file2>.srf ..."
11
+ op.separator "outputs: 'bioworks.srg'"
12
+ op.separator ""
13
+ op.separator " A '.srg' file is an ascii text file with a list"
14
+ op.separator " of the srf files (full path names) in that group."
15
+ op.separator ""
16
+ op.on('-o', '--output <filename>', 'a different output name') {|v| $OUTFILE }
17
+ end
18
+
19
+ if ARGV.size == 0
20
+ puts opts
21
+ end
22
+
23
+ obj = SRFGroup.new
24
+ obj.filenames = ARGV.to_a
25
+ obj.to_srg($OUTFILE)
26
+
data/changelog.txt CHANGED
@@ -1,4 +1,6 @@
1
1
 
2
+ ## version 0.1.7
3
+
2
4
  1. A couple of scripts and subroutines were hashing peptides but not on the file
3
5
  basename. This would result in slightly incorrect results (any time there
4
6
  were overlapping scan numbers in multiple datasets, only the top one would be
@@ -31,4 +33,9 @@ Rate' and 'FPR' from the package. It's been suggested that FP/(TP+FP) be
31
33
  called the False Positive Predictive Rate (FPPR). I will probably implement
32
34
  this in a future release.
33
35
 
36
+ ## version 0.2.0
34
37
 
38
+ ** This is a definite code breaker **
39
+ Revamped the way SpecID works (it is now subclassed). Since I want to return
40
+ the specific object that the file specifies, I use 'create' now instead of
41
+ 'new' (which forces one to return *that* class.
data/lib/align.rb CHANGED
@@ -24,12 +24,12 @@ class Align
24
24
  scanindex_by_basename_noext[runindex.basename_noext] = runindex.scans_by_num
25
25
  end
26
26
 
27
- dta_filenames = SpecID::Proph::Pep::Parser.new.dta_filenames_by_seq_charge(pep_proph_xml, "regex")
27
+ dta_filenames = Proph::Pep::Parser.new.dta_filenames_by_seq_charge(pep_proph_xml, "regex")
28
28
 
29
- parser = SpecID::Proph::Prot::Parser.new
29
+ parser = Proph::Prot::Parser.new
30
30
  parser.get_prots_and_peps(prot_xml, prot_prob, pep_init_prob, pep_nsp_prob, "regex")
31
31
  peptides = parser.peps
32
- peptides = SpecID::Proph::Pep.uniq_by_seqcharge(peptides)
32
+ peptides = Proph::Pep.uniq_by_seqcharge(peptides)
33
33
  ## we update each peptide with a list of dtafilenames
34
34
  ## then we update with a parallel list of scans (one for each dtafn...
35
35
  ## unless there are multiple scans associated with each filename
data/lib/fasta.rb CHANGED
@@ -1,5 +1,8 @@
1
1
  require 'sample_enzyme'
2
+ require 'each_index'
2
3
 
4
+
5
+ tmp = $VERBOSE ; $VERBOSE = nil
3
6
  class String
4
7
 
5
8
  def each_index
@@ -21,6 +24,8 @@ class String
21
24
  end
22
25
 
23
26
  end
27
+ $VERBOSE = tmp
28
+
24
29
 
25
30
 
26
31
  class Fasta
@@ -259,9 +264,9 @@ class Fasta
259
264
  end
260
265
 
261
266
  class Fasta::Prot
262
- attr_accessor :header, :aaseq
263
267
  # header given as full line with starting '>' (but no newline chars!).
264
268
  # aaseq also given without any newline chars
269
+ attr_accessor :header, :aaseq
265
270
  def initialize(header=nil, aaseq=nil)
266
271
  @header = header || ''
267
272
  if aaseq
data/lib/gi.rb CHANGED
@@ -40,19 +40,24 @@ class GI
40
40
  BATCH_SIZE = 500
41
41
  # takes an array of gi numbers and returns an array of annotation
42
42
  # This allows use of the batch search mode on NCBI
43
+ # returns nil if no internet connection
43
44
  def self.gi2annot(list_of_gi_numbers)
45
+ annots = []
44
46
  loop do
45
47
  batch = list_of_gi_numbers.slice!(0..BATCH_SIZE)
46
48
  if batch.size == 0 then break end
47
49
  string = batch.join(",")
48
50
  url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=protein&retmode=xml&id=#{string}"
49
51
  #puts url
50
- annots = []
51
- open(url) do |handle|
52
- annots = parse_etool_output(handle)
52
+ begin
53
+ open(url) do |handle|
54
+ annots.push( *(parse_etool_output(handle)) )
55
+ end
56
+ rescue SocketError
57
+ return nil
53
58
  end
54
- annots
55
59
  end
60
+ annots
56
61
  end
57
62
 
58
63
  protected
data/lib/roc.rb CHANGED
@@ -90,6 +90,8 @@ end
90
90
  # For calculating precision given lists of hits and decoy hits. The hits are
91
91
  # assumed to have false positives within them that can be estimated from the
92
92
  # number of decoy hits at the same rate
93
+ # NOTE: this class assumes that lower scores are better. Negate your scores
94
+ # if this is not the case.
93
95
  class DecoyROC < ROC
94
96
 
95
97
  # returns the [num_hits, num_tps, precision] as a function of true
data/lib/sample_enzyme.rb CHANGED
@@ -1,6 +1,7 @@
1
+
1
2
  module SpecIDXML; end
2
3
 
3
- require 'spec_id'
4
+ require 'spec_id_xml'
4
5
  require 'strscan'
5
6
 
6
7
  class SampleEnzyme