mspire 0.4.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,160 +0,0 @@
1
-
2
- require 'validator/cmdline'
3
- require 'spec_id'
4
-
5
- module SpecID
6
- module Precision
7
- class Prob
8
- class CmdlineParser
9
-
10
- DEFAULTS = SpecID::Precision::Prob::PN_DEFAULTS.merge( { :output => [[:csv, nil]], } )
11
-
12
-
13
- COMMAND_LINE = {
14
- :sort_by_init => ['--sort_by_init', "sort the proteins based on init probability"],
15
- :perc_qval => ['--perc_qval', "use percolator q-values to calculate precision"],
16
- :to_qvalues => ['--to_qvalues', "transform probabilities into q-values",
17
- "(includes pi_0 correction)",
18
- "uses PROB [TYPE] if given and supercedes",
19
- "the prob validation type",
20
- "*NOTE: include all PeptideProphet results",
21
- "(don't use any low prob cutoff) for",
22
- "accurate results!"],
23
- :prob => ['--prob [TYPE]', "use prophet probabilites to calculate precision",
24
- "TYPE = nsp [default] prophet nsp",
25
- " (nsp also should be used for PeptideProphet results)",
26
- " = init (for ProteinProphet results) use initial",
27
- "probability instead of nsp probability",
28
- ],
29
- # OUTPUT
30
- :proteins => ["--proteins", "includes proteins (and validation)"],
31
- :output => ["-o", "--output format[:FILENAME]", "format to output filtering results.",
32
- "can be used multiple times",
33
- ":FILENAME is the filename to use (defaults to STDOUT)",
34
- "valid formats are:",
35
- " csv (default)",
36
- " to_plot",
37
- " calc_bkg_to_plot",
38
- " yaml",
39
- #" protein_summary (need to implement)",
40
- #" html_table (need to implement)"
41
- ],
42
-
43
- # VALIDATION MODIFIERS:
44
- :pephits => ["--pephits <file>.srg", "an srg file pointing to the srf files for",
45
- "the given -prot.xml run",
46
- "[this or --digestion must be used for applicable]",
47
- "validators (validators depending on a",
48
- "false/total ratio)]"],
49
- }.merge( Validator::Cmdline::COMMAND_LINE )
50
-
51
-
52
- # returns (spec_id_obj, options, option_parser_obj)
53
- def parse(args)
54
- opts = {}
55
- opts[:output] = []
56
- @out_used = false
57
- opts[:sequest] = {}
58
- opts[:validators] = []
59
- # defaults
60
-
61
- option_parser = OptionParser.new do |op|
62
- def op.opt(arg, &block)
63
- on(*COMMAND_LINE[arg], &block)
64
- end
65
-
66
- def op.val_opt(arg, opts)
67
- on(*COMMAND_LINE[arg]) {|ar| Validator::Cmdline::PrepArgs[arg].call(ar, opts) }
68
- end
69
-
70
- def op.exact_opt(opts, arg)
71
- on(*COMMAND_LINE[arg]) {|v| opts[arg] = v}
72
- end
73
-
74
- op.banner = "USAGE: #{File.basename($0)} [OPTS] <file>-prot.xml | <file>.sqg"
75
- op.separator ""
76
- op.separator " RETURNS: precision across the number of hits"
77
- op.separator " (based on probability or q-value)"
78
- op.separator " (optional) other validation of the results."
79
- op.separator ""
80
-
81
- op.separator "OUTPUT OPTIONS: "
82
- op.opt(:proteins) {|v| opts[:proteins] = true }
83
- op.opt(:output) do |output|
84
- # copied from rspec:
85
- # This funky regexp checks whether we have a FILE_NAME or not
86
- where = nil
87
- if (output =~ /([a-zA-Z_]+(?:::[a-zA-Z_]+)*):?(.*)/) && ($2 != '')
88
- output = $1
89
- where = $2
90
- else
91
- raise "When using several --output options only one of them can be without a file" if @out_used
92
- @out_used = true
93
- end
94
- opts[:output] << [output, where]
95
- end
96
-
97
- op.separator "GENERAL OPTIONS:"
98
- op.separator ""
99
- op.opt(:sort_by_init) {|v| opts[:sort_by_init] = true }
100
- op.separator "VALIDATION OPTIONS: "
101
- op.separator " each option will calculate the precision"
102
- op.separator ""
103
-
104
- op.val_opt(:prob, opts)
105
- op.val_opt(:perc_qval, opts)
106
- op.val_opt(:to_qvalues, opts)
107
- op.val_opt(:decoy, opts)
108
- op.val_opt(:pephits, opts) # sets opts[:ties] = false
109
- op.val_opt(:digestion, opts)
110
- op.val_opt(:bias, opts)
111
- op.val_opt(:bad_aa, opts)
112
- op.val_opt(:bad_aa_est, opts)
113
-
114
- op.val_opt(:tmm, opts)
115
- op.val_opt(:fasta, opts)
116
- op.val_opt(:tps, opts)
117
-
118
- op.separator ""
119
- op.separator "VALIDATION MODIFIERS: "
120
- op.val_opt(:false_on_tie, opts) # sets opts[:ties] = false
121
-
122
- end
123
- option_parser.parse!(args)
124
-
125
- # prepare validators
126
-
127
- if args.size > 0
128
- spec_id_obj = ::SpecID.new(args[0])
129
- if opts[:ties] == nil # will be nil or false
130
- opts[:ties] = Validator::Cmdline::DEFAULTS[:ties]
131
- end
132
- postfilter =
133
- if spec_id_obj.class == SQTGroup or spec_id_obj.class == Proph::PepSummary
134
- #puts 'making background estimates with: top_per_scan'
135
- :top_per_scan
136
- else
137
- #puts 'making background estimates with: top_per_aaseq_charge'
138
- :top_per_aaseq_charge
139
- end
140
-
141
- opts[:validators] = Validator::Cmdline.prepare_validators(opts, !opts[:ties], opts[:interactive], postfilter, spec_id_obj)
142
-
143
- if opts[:output].size == 0
144
- opts[:output] = DEFAULTS[:output]
145
- end
146
- else
147
- spec_id_obj = nil
148
- end
149
-
150
- [spec_id_obj, opts, option_parser]
151
- end # parse
152
- end # CmdlineParser
153
- end # Prob
154
- end # Precision
155
- end # SpecID
156
-
157
-
158
-
159
-
160
-
@@ -1,94 +0,0 @@
1
- require 'yaml'
2
- require 'spec_id/precision/output'
3
- require 'table'
4
- require 'matrix'
5
-
6
- module SpecID ; end
7
- module SpecID::Precision ; end
8
- class SpecID::Precision::Prob ; end
9
- class SpecID::Precision::Prob::Output
10
- include SpecID::Precision::Output
11
-
12
- # returns array of data arrays and parallel labels
13
- def to_cols_and_labels(answer_hash)
14
- col_labels = %w(count probability peptide)
15
- col_labels[1] = 'q_values' if answer_hash.key?(:q_values)
16
-
17
- cols = []
18
- cols << answer_hash[:count]
19
- if answer_hash.key?(:q_values)
20
- cols << answer_hash[:q_values]
21
- else
22
- cols << answer_hash[:probabilities]
23
- end
24
- cols << answer_hash[:aaseqs]
25
-
26
-
27
- # if there is a single modified peptide, we'll include the column
28
- if answer_hash.key?(:modified_peptides)
29
- cols << answer_hash[:modified_peptides]
30
- col_labels.push( 'modified_peptide' )
31
- end
32
-
33
- col_labels.push( 'charge' )
34
- cols << answer_hash[:charges]
35
-
36
- answer_hash[:pephits_precision].each do |ans|
37
- col_labels.push( "#{ans[:validator]} (prob)" )
38
- cols << ans[:values]
39
- end
40
-
41
- [cols, col_labels]
42
- end
43
-
44
- def csv(handle, answer_hash)
45
- (cols, col_labels) = to_cols_and_labels(answer_hash)
46
- table = Table.new(Matrix[*cols].transpose, nil, col_labels)
47
- handle.puts(table.to_s("\t"))
48
- end
49
-
50
- def to_plot(handle, answer_hash)
51
- tp = 'XYData'
52
- basename_noext =
53
- if handle.respond_to?(:path)
54
- out = File.basename(handle.path).sub(/\.(\w)+$/,'')
55
- else
56
- 'plot'
57
- end
58
- title = 'precision vs. num (aaseq+charge)'
59
- xlabel = 'num hits'
60
- ylabel = 'precision'
61
- [tp, basename_noext, title, xlabel, ylabel].each {|v| handle.puts v }
62
- answer_hash[:pephits_precision].each do |hash|
63
- handle.puts hash[:validator] # label
64
- handle.puts answer_hash[:count] # x vals
65
- handle.puts hash[:values] # y vals
66
- end
67
- end
68
-
69
- def calc_bkg_to_plot(handle, answer_hash)
70
- tp = 'XYData'
71
- basename_noext =
72
- if handle.respond_to?(:path)
73
- out = File.basename(handle.path).sub(/\.(\w)+$/,'')
74
- else
75
- 'calc_bkg_plot'
76
- end
77
- title = 'background vs. num (aaseq+charge)'
78
- xlabel = 'num hits'
79
- ylabel = 'background (false/total)'
80
- [tp, basename_noext, title, xlabel, ylabel].each {|v| handle.puts v }
81
- answer_hash[:params][:validators].each do |hash|
82
- handle.puts hash[:name] # label
83
- handle.puts answer_hash[:count] # x vals
84
- handle.puts hash[:calculated_backgrounds] # y vals
85
- end
86
- end
87
-
88
- def yaml(handle, answer_hash)
89
- handle.puts answer_hash.to_yaml
90
- end
91
-
92
- end
93
-
94
-
@@ -1,249 +0,0 @@
1
- # note that we require 'spec_id/precision/prob/cmdline' below!
2
-
3
- require 'spec_id/precision/prob/output'
4
-
5
- module SpecID ; end
6
- module SpecID::Precision ; end
7
-
8
-
9
- # for probability based spec identifications (true probabilities, not the
10
- # bioworks p-value (which they call probability)).
11
- class SpecID::Precision::Prob
12
-
13
- PN_DEFAULTS = {
14
- :proteins => false,
15
- :validators => [],
16
- :sort_by_init => false,
17
- }
18
-
19
- require 'spec_id/precision/prob/cmdline'
20
-
21
- def precision_vs_num_hits_cmdline(args)
22
- (spec_id_obj, options, option_parser) = CmdlineParser.new.parse(args)
23
- if spec_id_obj == nil
24
- puts option_parser
25
- return
26
- end
27
- final_answer = SpecID::Precision::Prob.new.precision_vs_num_hits(spec_id_obj, options)
28
- options[:output].each do |output|
29
- output[1] = $stdout unless output[1]
30
- SpecID::Precision::Prob::Output.new(*output).print(final_answer).close
31
- end
32
- end
33
-
34
- # this is the way I was doing it:
35
- # ajdusted = (1+R)*prec / (R*precision +1)
36
- # # where R is the decoy_to_target ratio
37
-
38
- # opts may include:
39
- # :proteins => true|*false
40
- # :validators => array of Validator objects
41
- #
42
- # This method will adjust the precision in the *probability* validators
43
- # used in the decoy validator (both terms with pi_0 in the denominator go
44
- # to zero if there is no decoy validator and the precision is not
45
- # adjusted)
46
- #
47
- # ajdusted = (1+(1/pi_0))*prec / ((precision/pi_0) +1)
48
- # # where pi_0 is the ratio incorrect target hits to total decoy hits
49
- #
50
- # NOTE: if you have decoy data, you MUST pass in a decoy validator for the
51
- # decoy pephits to be removed from other validator analyses!
52
- #
53
- # returns a hash of data
54
- # :pephits_precision => [{validator => <name>, values => [<precision>,...]},... ]
55
- # :params => :validators => [array of validators] (includes
56
- # :calculated_backgrounds)
57
- # :aaseqs => array of aaseqs
58
- # :charges => array of charge
59
- # :modified_peptides => array of modified sequence (only included if
60
- # applicable)
61
- #
62
- # NOTE: For protein prophet, the results are given on a peptide+charge
63
- # basis.
64
- #
65
- # TODO: implement tihs guy:
66
- # prothits_precision => {validator => <name>, values => {worst => ,
67
- # normal, normal_stdev } }
68
- def precision_vs_num_hits(spec_id, opts={})
69
-
70
- opt = PN_DEFAULTS.merge(opts)
71
-
72
- out = {}
73
- num_pephits = [] # NOTE!: these are aaseq/aaseq_mod + charge for Prophet
74
- val_hash = Hash.new {|hash,key| hash[key] = [] }
75
- val_calc_bkg_hash = Hash.new {|hash,key| hash[key] = [] }
76
- pepstrings = []
77
- modified_peptides = []
78
- pepcharges = []
79
- probabilities = []
80
- found_modified_peptide = false
81
-
82
- check_precisions = []
83
- check_precisions_decoy = []
84
-
85
- # do we need to deal with decoy peptides? (true/false)
86
- validators = opt[:validators].map
87
- decoy_vals = validators.select {|val| val.class == Validator::Decoy }
88
-
89
-
90
- if decoy_vals.size > 1
91
- raise(ArgumentError, "only one decoy validator allowed!")
92
- else
93
- decoy_val = decoy_vals.first
94
- if decoy_val
95
- pi_zero = decoy_val.pi_zero
96
- end
97
- end
98
-
99
- validators.delete(decoy_val)
100
- other_validators = validators
101
-
102
- (probability_validators, other_validators) = other_validators.partition {|val| val.class == Validator::Probability }
103
- if opt[:initial_probability]
104
- probability_validators.each do |pv|
105
- pv.prob_method = :initial_probability
106
- end
107
- end
108
-
109
- n_count = 0
110
- d_count = 0
111
-
112
-
113
- # this is a peptide prophet
114
- is_peptide_prophet =
115
- if spec_id.peps.first.respond_to?(:fval) ; true
116
- else ;false
117
- end
118
-
119
- use_q_value = other_validators.any? {|v| v.class == Validator::QValue }
120
-
121
- ## ORDER THE PEPTIDE HITS:
122
- ordered_peps =
123
- if use_q_value
124
- spec_id.peps.sort_by {|v| v.q_value }
125
- elsif is_peptide_prophet
126
- spec_id.peps.reject {|v| v.probability == -1.0}.sort_by {|v| v.probability }.reverse
127
- else
128
- if opt[:sort_by_init]
129
- spec_id.peps.sort_by{|v| [v.initial_probability, v.n_instances, ( v.is_nondegenerate_evidence ? 1 : 0 ), v.n_enzymatic_termini, ( v.is_contributing_evidence ? 1 : 0 ), v.n_sibling_peptides] }.reverse
130
- else
131
- spec_id.peps.sort_by{|v| [v.nsp_adjusted_probability, v.initial_probability, v.n_instances, ( v.is_nondegenerate_evidence ? 1 : 0 ), v.n_enzymatic_termini, ( v.is_contributing_evidence ? 1 : 0 ), v.n_sibling_peptides] }.reverse
132
- end
133
- end
134
-
135
- # for probability based precision with decoy database (not using prophet's
136
- # -d flag) we do this:
137
- # foreach peptide.sorted_by_probability
138
- # 1. update the running precision of the validator REGARDLESS of
139
- # decoy/target status of peptide. the internal hit counts are
140
- # incremented.
141
- # 2. only increment reported HIT COUNTS on a non-decoy hit and record
142
- # the precision as (1+R)*prec / (R*precision +1) where R is the ratio of
143
- # decoy hits to target hits. If it is 1:1 (R = 1) then this becomes:
144
- # 2*prec / (prec + 1)
145
-
146
- ## WORK THROUGH EACH PEPTIDE:
147
- ordered_peps.each_with_index do |pep,i|
148
- # probability validators must work on the entire set of normal and decoy
149
-
150
- last_prob_values = probability_validators.map do |val|
151
- reply = val.increment_pephits_precision(pep)
152
- check_precisions << reply
153
- reply
154
- end
155
-
156
- it_is_a_normal_pep =
157
- if decoy_val
158
- # get the decoy precision
159
- decoy_precision = decoy_val.increment_pephits_precision(pep)
160
-
161
- # continue with ONLY normal peptides
162
- is_normal = (decoy_val.normal_peps_just_submitted.size > 0)
163
- else
164
- true
165
- end
166
-
167
- if it_is_a_normal_pep
168
- check_precisions_decoy << false
169
- else
170
- check_precisions_decoy << true
171
- end
172
-
173
- if it_is_a_normal_pep
174
- n_count += 1
175
-
176
- # UPDATE validators:
177
- val_hash[decoy_val].push(decoy_precision) if decoy_val
178
- probability_validators.zip(last_prob_values) do |val,prec|
179
- if decoy_val
180
- raise ArgumentError, "pi_zero in decoy validator must not == 0" if pi_zero == 0
181
- val_hash[val].push( ((1.0/pi_zero+1.0)*prec) / ((prec/pi_zero) + 1.0) )
182
- else
183
- val_hash[val] << prec
184
- end
185
- end
186
- other_validators.each do |val|
187
- val_hash[val] << val.increment_pephits_precision(pep)
188
- if val.is_a? Validator::DigestionBased
189
- val_calc_bkg_hash[val] << val.calculated_background
190
- end
191
- end
192
-
193
- # UPDATE other basic useful information:
194
- if pep.respond_to?(:mod_info)
195
- modified_pep_string =
196
- if pep.mod_info
197
- found_modified_peptide = true
198
- pep.mod_info.modified_peptide
199
- else
200
- nil
201
- end
202
- modified_peptides << modified_pep_string
203
- else
204
- modified_pep_string =
205
- if pep.sequence =~ /[^A-Z\-\.]/
206
- found_modified_peptide = true
207
- pep.sequence
208
- else
209
- nil
210
- end
211
- modified_peptides << modified_pep_string
212
- end
213
- pepcharges << pep.charge
214
- pepstrings << pep.aaseq
215
- probabilities << pep.probability # this is the q_value if percolator
216
- num_pephits << (i+1)
217
- else
218
- d_count += 1
219
- end
220
- end
221
- if found_modified_peptide
222
- out[:modified_peptides] = modified_peptides
223
- end
224
- if use_q_value
225
- out[:q_values] = probabilities
226
- else
227
- out[:probabilities] = probabilities
228
- end
229
- # out[:pephits] = ordered_peps # just in case they want to see
230
- out[:count] = num_pephits
231
- out[:aaseqs] = pepstrings
232
- out[:charges] = pepcharges
233
- out[:pephits_precision] = opt[:validators].map do |val|
234
- hsh = {}
235
- hsh[:validator] = Validator::Validator_to_string[val.class.to_s]
236
- hsh[:values] = val_hash[val]
237
- hsh
238
- end
239
- out[:params] = {}
240
- out[:params][:validators] = Validator.sensible_validator_hashes(opt[:validators]).zip(opt[:validators]).map do |hash,val|
241
- hash.delete(:calculated_background)
242
- hash[:calculated_backgrounds] = val_calc_bkg_hash[val]
243
- hash
244
- end
245
- out
246
- end
247
- end
248
-
249
-
@@ -1,104 +0,0 @@
1
-
2
- require 'arrayclass'
3
- require 'spec_id/sequest/pepxml'
4
- require 'spec_id/parser/proph'
5
-
6
- module Sequest ; end
7
- class Sequest::PepXML ; end
8
- class Sequest::PepXML::MSMSRunSummary ; end
9
- class Sequest::PepXML::SearchHit ; end
10
-
11
- module SpecID ; end
12
- module SpecID::Prot ; end
13
- module SpecID::Pep ; end
14
-
15
-
16
-
17
- module Proph
18
-
19
- class PepSummary
20
- include SpecID
21
-
22
- Filetype_and_version_re_new = /version="PeptideProphet v([\d\.]+) /
23
-
24
- # inherits prots and peps
25
-
26
- # the protein groups
27
- # currently these are just xml nodes returned!
28
- attr_accessor :peptideprophet_summary
29
- attr_accessor :msms_run_summaries
30
- attr_accessor :version
31
-
32
- def hi_prob_best ; true end
33
-
34
- def get_version(file)
35
- answer = nil
36
- File.open(file) do |fh|
37
- 8.times do
38
- line = fh.gets
39
- answer =
40
- if line =~ Filetype_and_version_re_new
41
- $1.dup
42
- end
43
- break if answer
44
- end
45
- end
46
- raise(ArgumentError, "couldn't detect version in #{file}") unless answer
47
- answer
48
- end
49
-
50
- def search_hit_class
51
- PepSummary::Pep
52
- end
53
-
54
- def initialize(file=nil)
55
- if file
56
- @version = get_version(file)
57
- spec_id = SpecID::Parser::PepProph.new(:spec_id).parse(file, :spec_id => self)
58
- end
59
- end
60
- end
61
-
62
- # this is a SpecID::Pep (by interface: not including stuff yet)
63
- class PepSummary::Pep < Sequest::PepXML::SearchHit
64
- # aaseq is defined in SearchHit
65
-
66
- %w(probability fval ntt nmc massd prots q_value).each do |guy|
67
- self.add_member(guy)
68
- end
69
-
70
- # returns self
71
- def from_pepxml_node(node)
72
- super(node)
73
-
74
- an_res = node.find_first('child::analysis_result')
75
- pp_n = an_res.find_first('child::peptideprophet_result')
76
- self.probability = pp_n['probability'].to_f
77
- pp_n.find('descendant::parameter').each do |par_n|
78
- case par_n['name']
79
- when 'fval'
80
- self.fval = par_n['value'].to_f
81
- when 'ntt'
82
- self.ntt = par_n['value'].to_i
83
- when 'nmc'
84
- self.nmc = par_n['value'].to_i
85
- when 'massd'
86
- self.massd = par_n['value'].to_f
87
- end
88
- end
89
- self
90
- end
91
- end
92
-
93
- ::Proph::PepSummary::Prot = Arrayclass.new(%w(name protein_descr peps))
94
-
95
- class PepSummary::Prot
96
- def first_entry ; self[0] end ## name
97
- def reference ; self[0] + ' ' + self[1] end
98
- end
99
-
100
- end
101
-
102
-
103
-
104
-