mspire 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
@@ -1,421 +0,0 @@
1
-
2
- require 'optparse'
3
- require 'ostruct'
4
- require 'generator'
5
- require 'roc'
6
-
7
- ## silence this bad boy
8
- tmp = $VERBOSE ; $VERBOSE = nil
9
- require 'gnuplot'
10
- $VERBOSE = tmp
11
-
12
- class String
13
- def margin
14
- self.gsub(/^\s*\|/,'')
15
- end
16
- end
17
-
18
- class Prec ; end
19
-
20
- module Prec::PlotHelper
21
-
22
- PLOT_TYPE = 'XYData'
23
- TITLE = 'Precision vs. Num Hits [ Precision = Positive Predictive Value = TP/(TP+FP) ]'
24
- XAXIS = 'Num Hits (excludes known false positives)'
25
- EXT = '.toplot'
26
- IMAGE_EXT = '.png'
27
-
28
- def create_to_plot_file(all_arrs, key, files, filename_noext)
29
- ## CREATE the PLOT IMAGE:
30
- to_plot = filename_noext + EXT
31
- png = filename_noext + IMAGE_EXT
32
-
33
-
34
- File.open(to_plot,'w') do |out|
35
- out.puts PLOT_TYPE
36
- out.puts filename_noext
37
- out.puts TITLE
38
- out.puts XAXIS
39
- out.puts escape_to_gnuplot(y_axis_label(key))
40
- files.each_with_index do |file,i|
41
- #p key[i]
42
- #p all_arrs[i]
43
-
44
- key[i].each_with_index do |k,j|
45
- out.puts(escape_to_gnuplot("#{file}: #{k[1][1]}"))
46
- out.puts all_arrs[i][j][0].join(' ')
47
- out.puts all_arrs[i][j][1].join(' ')
48
- end
49
- end
50
- end
51
- end
52
-
53
-
54
- ## outputs a .toplot file based on filename_noext, creates a png file, and
55
- ## writes html to fh that will load the png file up
56
- ## This is a self contained module that can be swapped out for a
57
- ## completely different plotting program if desired.
58
- def plot_figure(all_arrs, key, files, filename_noext)
59
-
60
- ## CREATE the PLOT IMAGE:
61
- to_plot = filename_noext+'.toplot'
62
- png = filename_noext+'.png'
63
-
64
- tmp = $VERBOSE ; $VERBOSE = nil
65
- Gnuplot.open do |gp|
66
- Gnuplot::Plot.new( gp ) do |plot|
67
- plot.terminal "png noenhanced"
68
- plot.output png
69
- plot.title TITLE
70
- plot.xlabel XAXIS
71
- plot.ylabel escape_to_gnuplot(y_axis_label(key))
72
- plot.style "line 1 lt 1"
73
- plot.style "line 2 lt 12"
74
- #plot.style "line 1 lt 1 lw #{opts.lw} pt 7 ps #{opts.ps}",
75
- plot.yrange "[-0.05:#{1.05 + 0.020*files.size}]"
76
- files.each_with_index do |file,i|
77
- key[i].each_with_index do |k,j|
78
- plot.data << Gnuplot::DataSet.new( [ all_arrs[i][j][0], all_arrs[i][j][1] ] ) do |ds|
79
- ds.with = "lines"
80
- ds.title = escape_to_gnuplot("#{file}: #{k[1][1]}")
81
- end
82
- end
83
- end
84
- end
85
- end
86
- $VERBOSE = tmp
87
-
88
- ## CREATE the HTML to load the plot:
89
- basename_filename_noext = File.basename(filename_noext)
90
- output = "<div id=\"plot\"><table class=\"image\" align=\"center\">\n"
91
- #output << "<caption align=\"bottom\">Additional views of this data may be obtained by using the <span class=\"code\">plot.rb</span> command on '#{to_plot}' (type <span class=\"code\">plot.rb</span> for more details). Plot generated with command: &nbsp;&nbsp; <span class=\"code\">#{plot_cmd}</span></caption>\n"
92
- output << "<tr><td><img src=\"#{basename_filename_noext}.png\" title=\"File #{basename_filename_noext} must be in the same directory as this html.\"/></td></tr>\n"
93
- output << "</table></div>\n"
94
- output
95
- end # plot_figure
96
-
97
- end
98
-
99
- module Prec::HTML
100
-
101
- # html and body tags
102
- def html
103
- "|<html>
104
- |#{yield}
105
- |</html>\n".margin
106
- end
107
-
108
- def body
109
- "|<body>
110
- | #{yield}
111
- |</body>\n".margin
112
- end
113
-
114
- def header
115
- "|<head>
116
- | #{style}
117
- |</head>\n".margin
118
- end
119
-
120
- def td
121
- "<td>#{yield}</td>"
122
- end
123
-
124
-
125
- def style
126
- '
127
- <style type="text/css">
128
- div#tp_table {
129
- text-align: center;
130
- margin-top: 50px;
131
- margin-bottom: 50px;
132
- }
133
- span.code {
134
- font-family: Courier,Monospace;
135
- font-size: 80%;
136
- }
137
- table {
138
- border-width:1px;
139
- border-color:#CCCCCC;
140
- border-collapse: collapse;
141
- }
142
- caption {
143
- font-size: 90%;
144
- }
145
- td,th {
146
- padding-top: 2px;
147
- padding-bottom: 2px;
148
- padding-left: 1;
149
- padding-right: 1;
150
- }
151
- th.small {
152
- font-size: 80%;
153
- font-weight: normal;
154
- padding: 1px;
155
- }
156
- td.redline {
157
- background-color: #FF0000;
158
- color: #FFFFFF
159
- }
160
- div#plot {
161
- margin: 30px;
162
- text-align:center
163
- }
164
- hr {color: sienna}
165
- body { font-size: 8pt; font-family: Arial,Helvetica,Times}
166
- </style>
167
- '
168
-
169
- end
170
-
171
- def table
172
- "|<table border=\"1\" align=\"center\" style=\"font-size:100%\">
173
- | #{yield}
174
- |</table>\n".margin
175
- end
176
-
177
- def tr
178
- "|<tr>
179
- | #{yield}
180
- |</tr>\n".margin
181
- end
182
- end # module HTML
183
-
184
- class Prec
185
- include Prec::PlotHelper
186
-
187
- ###########################################################
188
- # GLOBAL SETTINGS:
189
- DATA_PREC = 4 # decimal places of precision for ppv data
190
- STDOUT_JTPLOT_BASE = "ppv" # if there is no outfile
191
- ###########################################################
192
-
193
- include Prec::HTML
194
-
195
- ## returns an html string
196
- def precision(argv)
197
- opt = parse_args(argv)
198
- files = argv.to_a
199
- out_string = create_precision_data(files, opt)
200
- [out_string, opt]
201
- end
202
-
203
- def run_cmd_line(argv)
204
- output_string, opt, file_as_decoy = precision(argv)
205
- if file_as_decoy
206
- puts output_string
207
- else
208
- ## open file and write to it..
209
- if opt.o == 'STDOUT'
210
- print output_string
211
- else
212
- File.open(opt.o,'w') do |fh| fh.print output_string end
213
- end
214
- end
215
- end
216
-
217
- # returns the outfile with no extension
218
- def outfile_noext(opt)
219
- if opt == 'STDOUT'
220
- "#{STDOUT_JTPLOT_BASE}"
221
- else
222
- opt.sub(/#{Regexp.escape(File.extname(opt))}$/, '')
223
- end
224
- end
225
-
226
- def file_noext(file)
227
- file.sub(/#{Regexp.escape(File.extname(file))}$/, '')
228
- end
229
-
230
- def parse_args(argv)
231
-
232
- opt = OpenStruct.new
233
- opt.o = 'STDOUT'
234
- opts = OptionParser.new do |op|
235
- op.banner = "Usage: #{File.basename(__FILE__)} [options] bioworks.xml|proph-prot.xml ..."
236
- op.separator ""
237
- op.separator "Abbreviations and Definitions:"
238
- op.separator " TP = True Positives"
239
- op.separator " FP = False Positives"
240
- op.separator " Precision = Positive Predictive Value = [TP/(TP+FP)]"
241
- op.separator ""
242
- op.separator "Output: "
243
- op.separator " 1. Decoy as separate search: PPV to STDOUT"
244
- op.separator " 2. Decoy proteins from concatenated database: '.html'"
245
- op.separator ""
246
- op.separator "Options:"
247
-
248
- op.on("-f", "--fp_data <prefix_or_file>", "flag -or- decoy FILE") {|v| opt.f = v }
249
- op.separator ""
250
- op.separator " If searched with a concatenated DB, give a false flag to decoy proteins."
251
- op.separator " If files have different flags, separate with commas."
252
- op.separator " If searched with a separate decoy DB, give the FILE name of decoy data"
253
- op.on("--prefix", "false flag as prefix only") {|v| opt.prefix = v }
254
- op.separator ""
255
- ## NOT YET FUNCTIONAL: op.on("-e", "--peptides", "do peptides instead of proteins")
256
- op.separator ""
257
- op.on("-o", "--outfile <file>", "write output to file (def: #{opt.o})") {|v| opt.o = v}
258
- op.on("-a", "--area", "output area under the curve instead of the plot") {|v| opt.a = v}
259
- op.on("-j", "--plot_file", "output to_plot file") {|v| opt.j = v}
260
- op.on_tail("
261
- Example:
262
- For a search on a concatenated database where the decoy proteins have
263
- been flagged with the prefix 'INV_' for both Bioworks and ProteinProphet
264
- output:
265
-
266
- #{File.basename(__FILE__)} -f INV_ bioworks.xml proph-prot.xml
267
-
268
- ")
269
- end
270
- opts.parse!(argv)
271
-
272
- if argv.size < 1
273
- puts opts
274
- exit
275
- end
276
-
277
- opt
278
- end
279
-
280
-
281
- ## collapses arrays to one level deep so we can sync them up
282
- def arrays_to_one_level_deep(all_arrs)
283
- mostly_flat = []
284
- all_arrs.each do |per_file|
285
- per_file.each do |per_style|
286
- mostly_flat << per_style[0]
287
- mostly_flat << per_style[1]
288
- end
289
- end
290
- mostly_flat
291
- end
292
-
293
- # prints rows and th for the data
294
- def table_cells(all_arrs, key)
295
- ## columns specific headings:
296
- all_string = ""
297
- all_string << tr do
298
- line = ""
299
- key.each do |per_file|
300
- per_file.each do |per_ds|
301
- line << "<th class=\"small\">#{per_ds[1][0]}</th><th class=\"small\">#{per_ds[1][1]}</th>"
302
- end
303
- end
304
- line
305
- end
306
- mostly_flat = arrays_to_one_level_deep(all_arrs)
307
- SyncEnumerator.new(*mostly_flat).each do |row|
308
- all_string << tr do
309
- string = row.map {|it|
310
- sty="%d"
311
- if it.class == Float ; sty="%.#{DATA_PREC}f" end
312
- td{ sprintf(sty,it)}
313
- }.join
314
- end
315
- end
316
- all_string
317
- end
318
-
319
- def html_table_output(all_arrs, key, files, filename_noext)
320
- num_datasets_per_file = all_arrs.first.size
321
- num_cols_per_dataset = 2
322
- big_colspan = num_datasets_per_file * num_cols_per_dataset
323
- output = table do
324
- tr do
325
- files.map do |file|
326
- "<th colspan=\"#{big_colspan}\">#{file}</th>"
327
- end.join
328
- end +
329
- tr do
330
- key.map do |arr|
331
- arr.map do |ds|
332
- "<th colspan=\"2\">#{ds.first}</th>"
333
- end
334
- end
335
- end +
336
- table_cells(all_arrs, key)
337
- end
338
- "<div id=\"tp_table\">" + output + "</div>"
339
- end
340
-
341
-
342
- def y_axis_label(key)
343
- ## We only take the keys for the first file, as it's assumed that the major
344
- ## labels will be identical for all of them
345
- labels = key.first.map {|tp| tp.first }.uniq
346
- labels.join " | "
347
- end
348
-
349
- # escapes any ' chars
350
- def escape_to_gnuplot(string)
351
- # long way, but it works.
352
- new_string = ""
353
- string.split(//).each do |chr|
354
- if chr == "'" ; new_string << "\\" end
355
- new_string << chr
356
- end
357
- new_string
358
- end
359
-
360
- # if opt.f, then a prefix is assumed.
361
- # if a file =~ /-prot.xml$/ then a precision plot based on probability is
362
- # also created
363
- def create_precision_data(files, opt)
364
- #$stderr.puts "using prefix #{opt.f} ..."
365
-
366
- if opt.f
367
- prefix_arr = SpecID.extend_args(opt.f, files.size)
368
- end
369
- all_arrs = []
370
- key = []
371
- out_noext = outfile_noext(opt.o)
372
- files.each_with_index do |file,i|
373
- all_arrs[i] = []
374
- key[i] = []
375
- sp = SpecID.new(file)
376
- #headers = ["#{file_noext(file)} Precision [TP/(TP+FP)]", "#{file_noext(file)} FPR [FP/(FP+TP)]"]
377
- if opt.f
378
- (num_hits, ppv) = sp.num_hits_and_ppv_for_prob(prefix_arr[i], opt.prefix)
379
- all_arrs[i] << [num_hits,ppv]
380
- key[i] << ["Precision", ["# hits", "Prec (decoy)"]]
381
- end
382
- if file =~ /-prot\.xml$/
383
- ## These are just from protein prophet probabilities:
384
- (num_hits, ppv) = sp.num_hits_and_ppv_for_protein_prophet_probabilities
385
- all_arrs[i] << [num_hits,ppv]
386
- key[i] << ["Precision", ["# hits", "Prec (prob)"]]
387
- end
388
- end
389
-
390
- string = ''
391
- if opt.a
392
- roc = ROC.new
393
- #string << "***********************************************************\n"
394
- #string << "AREA UNDER CURVE:\n"
395
- key.each_with_index do |file,i|
396
- string << "#{files[i]} (area under curve)\n"
397
- key[i].each_index do |j|
398
- string << "#{key[i][j][0]} [#{ key[i][j][1]}]:\t"
399
- num_hits = all_arrs[i][j][0]
400
- oth = all_arrs[i][j][1]
401
- string << roc.area_under_curve(num_hits, oth).to_s << "\n"
402
- end
403
- end
404
- #string << "***********************************************************\n"
405
- else
406
- if opt.j
407
- create_to_plot_file(all_arrs, key, files, out_noext)
408
- end
409
- string = html do
410
- header +
411
- body do
412
- plot_figure(all_arrs, key, files, out_noext) +
413
- html_table_output(all_arrs, key, files, out_noext)
414
- end
415
- end
416
- end
417
- string
418
- end
419
-
420
- end # class SpecID
421
-
data/lib/toppred.rb DELETED
@@ -1,18 +0,0 @@
1
-
2
- # reader for the http://bioweb.pasteur.fr/seqanal/interfaces/toppred.html
3
- # output
4
- class TopPred
5
-
6
- attr_accessor :hmmmm
7
-
8
- def initialize(toppred_out_file=nil)
9
- if toppred_out_file
10
- from_file(toppred_out_file)
11
- end
12
- end
13
-
14
- def from_file(toppred_out_file)
15
- end
16
-
17
- end
18
-
@@ -1,164 +0,0 @@
1
- #!/usr/bin/ruby -w
2
-
3
- require 'spec_id'
4
- require 'optparse'
5
- require 'ostruct'
6
-
7
- DELIMITER = "\t"
8
-
9
- $opt = OpenStruct.new
10
- $opt.deltacn = 0.2
11
- $opt.charge1 = 1.5
12
- $opt.charge2 = 2.0
13
- $opt.charge3 = 2.5
14
-
15
- opts = OptionParser.new do |op|
16
- op.banner = "usage: #{File.basename(__FILE__)} [options] prefixlist bioworks.xml ..."
17
- op.on("-1", "--charge1 <cutoff>", "xcorr <= cutoff for charge (#{$opt.charge1})") { |v| $opt.charge1 = v.to_f }
18
- op.on("-2", "--charge2 <cutoff>", "xcorr <= cutoff for charge (#{$opt.charge2})") { |v| $opt.charge2 = v.to_f }
19
- op.on("-3", "--charge3 <cutoff>", "xcorr <= cutoff for charge (#{$opt.charge3})") { |v| $opt.charge3 = v.to_f }
20
- op.on("-d", "--deltacn <cutoff>", "deltacn >= cutoff (#{$opt.deltacn})") { |v| $opt.deltacn = v.to_f }
21
- end
22
-
23
- opts.parse!
24
-
25
- if ARGV.size < 2
26
- puts opts
27
- exit
28
- end
29
-
30
-
31
- prefix_list = ARGV.shift
32
- prefixes = prefix_list.split ","
33
- files = ARGV.to_a
34
-
35
- ## Fill in the prefix array with the last prefix given
36
- last_prefix = prefixes.first
37
- if files.size > prefixes.size
38
- files.each_with_index do |file,i|
39
- if prefixes[i]
40
- last_prefix = prefixes[i]
41
- else
42
- prefixes[i] = last_prefix
43
- end
44
- end
45
- end
46
-
47
- ###############################
48
- #CH1 = 1.0
49
- #CH2 = 2.0
50
- #CH3 = 3.0
51
- #DELTACN = 0.2
52
- ###############################
53
-
54
- def passes(pep)
55
- if pep.deltacn <= $opt.deltacn
56
- case pep.charge
57
- when 1
58
- pep.xcorr >= $opt.charge1
59
- when 2
60
- pep.xcorr >= $opt.charge2
61
- when 3
62
- pep.xcorr >= $opt.charge3
63
- end
64
- else
65
- false
66
- end
67
- end
68
-
69
-
70
- # adds two categories with results from the hash
71
- def analyze(pep_groups, category, hash)
72
- best = best_xcorr(pep_groups)
73
- top10 = top10_xcorr(pep_groups)
74
- hash[category+"Best"] = filter(best).size
75
- hash[category+"Top10"] = filter(top10).size
76
- end
77
-
78
- # returns a hash containing the number of peptides passing the thresholds
79
- def number_passing(peps)
80
- np = {}
81
- np["PepProts"] = filter(peps).size
82
-
83
- by_scan_charge = peps.hash_by(:base_name, :first_scan, :last_scan, :charge).values
84
- analyze(by_scan_charge, "ScanCharge", np)
85
-
86
- by_scan = peps.hash_by(:base_name, :first_scan, :last_scan).values
87
- analyze(by_scan, "Scan", np)
88
-
89
- by_seq_charge = peps.hash_by(:base_name, :sequence, :charge).values
90
- analyze(by_seq_charge, "SeqCharge", np)
91
-
92
- np
93
- end
94
-
95
-
96
- # key = :symbol, val = [:lt|:gt|:lte|:gte, val]
97
- def filter(peps)
98
- peps.select {|pep| passes(pep)}
99
- end
100
-
101
- def top10_xcorr(pep_groups)
102
- peptides_by_tens = []
103
- pep_groups.each do |group|
104
- arr = group.sort {|a,b| b.xcorr <=> a.xcorr }.slice(0,10)
105
- peptides_by_tens.push(*arr)
106
- end
107
- peptides_by_tens
108
- end
109
-
110
- def best_xcorr(pep_groups)
111
- min_peptides = pep_groups.collect do |group|
112
- group.max {|a,b| a.xcorr <=> b.xcorr }
113
- end
114
- end
115
-
116
- headers = %w(PepProts ScanChargeBest ScanChargeTop10 ScanBest ScanTop10 SeqChargeBest SeqChargeTop10)
117
- csv_headers = headers.dup
118
- csv_headers.unshift "FILENAME"
119
-
120
- puts csv_headers.join(DELIMITER)
121
-
122
-
123
- files.each_with_index do |file,i|
124
-
125
- obj = SpecID.new(file)
126
- obj.peps = obj.pep_prots
127
-
128
-
129
- obj.peps.each do |pep|
130
- pep.charge = pep.charge.to_i
131
- pep.xcorr = pep.xcorr.to_f
132
- pep.deltacn = pep.deltacn.to_f
133
- end
134
-
135
-
136
- re_prefix = /^#{Regexp.escape(prefixes[i])}/
137
- prc = proc {|it| it.prots.first.reference =~ re_prefix }
138
- #(match, nomatch) = obj.classify(:peps, prc)
139
- (fp, tp) = obj.classify(:peps, prc)
140
-
141
-
142
- (fp_pass, tp_pass) = [fp,tp].map {|v| number_passing(v) }
143
-
144
- # print to file out
145
-
146
- tp = headers.map do |head|
147
- tp_pass[head]
148
- end
149
- fp = headers.map do |head|
150
- fp_pass[head]
151
- end
152
- diffs = []
153
- tp.each_index do |i|
154
- diffs << (tp[i] - fp[i])
155
- end
156
- tp.unshift("TP: " + file)
157
- fp.unshift("FP: " + file)
158
- diffs.unshift("DIFF: " + file)
159
- puts tp.join(DELIMITER)
160
- puts fp.join(DELIMITER)
161
- puts diffs.join(DELIMITER)
162
-
163
- end
164
-
data/test/tc_aa_freqs.rb DELETED
@@ -1,59 +0,0 @@
1
-
2
-
3
- require 'test/unit'
4
- require 'spec_id/aa_freqs'
5
-
6
-
7
- class FastaTest < Test::Unit::TestCase
8
-
9
- def initialize(arg)
10
- super(arg)
11
- @tfiles = File.dirname(__FILE__) + '/tfiles/'
12
- @sf = @tfiles + "small.fasta"
13
- end
14
-
15
- def test_basic
16
- obj = SpecID::AAFreqs.new(@sf)
17
- expect = {:I=>0.0628918621937819, :S=>0.0539719475147049, :D=>0.0526145691939758, :Z=>0.0, :L=>0.102772929998061, :T=>0.0491888048607071, :E=>0.0609527503070261, :O=>0.0, :C=>0.0157714433456144, :K=>0.0471850559110594, :U=>0.0, :Q=>0.0382651412319824, :W=>0.0137030573330748, :A=>0.101997285243359, :M=>0.0294745006786892, :J=>0.0, :G=>0.0811195139292871, :Y=>0.0254670027793937, :X=>0.0, :F=>0.0418201796910348, :R=>0.0546829552065154, :V=>0.0702604873634542, :H=>0.0213302307543145, :B=>0.0, :N=>0.03471010277293, :P=>0.0418201796910348}
18
- aaf = obj.aafreqs
19
- expect.each do |k,v|
20
- assert(aaf.key?(k))
21
- assert_in_delta(v, aaf[k], 0.00000001, "freqs match up")
22
- end
23
- sum = 0.0
24
- aaf.values.each do |v|
25
- sum += v
26
- end
27
- assert_in_delta(1.0, sum, 0.0000000000001, "all freqs add to 1")
28
- end
29
-
30
- def test_probability_of_length_table
31
- # p SpecID::AAFreqs.probability_of_length_table(0.01, 4)
32
- assert_equal_arrs_in_delta([0.0, 0.01, 0.0199, 0.029701, 0.0394039900000001], SpecID::AAFreqs.probability_of_length_table(0.01, 4), 0.000000001)
33
-
34
- assert_equal_arrs_in_delta([0.0, 0.2, 0.36, 0.488, 0.5904], SpecID::AAFreqs.probability_of_length_table(0.2, 4), 0.000000001)
35
- end
36
-
37
- def test_actual_and_expected_number
38
- fobj = Fasta.new.read_file(@sf)
39
- obj = SpecID::AAFreqs.new
40
- obj.aafreqs = obj.calculate_frequencies(fobj)
41
-
42
- peptide_aaseqs = fobj.prots.map do |prot|
43
- prot.aaseq[0..12]
44
- end
45
- assert_equal(50, peptide_aaseqs.size, 'sanity check')
46
- (ac,ex) = obj.actual_and_expected_number(peptide_aaseqs, :C, 1)
47
- assert_equal(9, ac)
48
- assert_in_delta( 9.33530631238985, ex, 0.0000000001)
49
- end
50
-
51
- private
52
- def assert_equal_arrs_in_delta(expect, actual, delta)
53
- expect.each_with_index do |v,i|
54
- assert_in_delta(v, actual[i], delta)
55
- end
56
- end
57
-
58
-
59
- end