mspire 0.4.9 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,160 +0,0 @@
1
-
2
- require 'validator/cmdline'
3
- require 'spec_id'
4
-
5
- module SpecID
6
- module Precision
7
- class Prob
8
- class CmdlineParser
9
-
10
- DEFAULTS = SpecID::Precision::Prob::PN_DEFAULTS.merge( { :output => [[:csv, nil]], } )
11
-
12
-
13
- COMMAND_LINE = {
14
- :sort_by_init => ['--sort_by_init', "sort the proteins based on init probability"],
15
- :perc_qval => ['--perc_qval', "use percolator q-values to calculate precision"],
16
- :to_qvalues => ['--to_qvalues', "transform probabilities into q-values",
17
- "(includes pi_0 correction)",
18
- "uses PROB [TYPE] if given and supercedes",
19
- "the prob validation type",
20
- "*NOTE: include all PeptideProphet results",
21
- "(don't use any low prob cutoff) for",
22
- "accurate results!"],
23
- :prob => ['--prob [TYPE]', "use prophet probabilites to calculate precision",
24
- "TYPE = nsp [default] prophet nsp",
25
- " (nsp also should be used for PeptideProphet results)",
26
- " = init (for ProteinProphet results) use initial",
27
- "probability instead of nsp probability",
28
- ],
29
- # OUTPUT
30
- :proteins => ["--proteins", "includes proteins (and validation)"],
31
- :output => ["-o", "--output format[:FILENAME]", "format to output filtering results.",
32
- "can be used multiple times",
33
- ":FILENAME is the filename to use (defaults to STDOUT)",
34
- "valid formats are:",
35
- " csv (default)",
36
- " to_plot",
37
- " calc_bkg_to_plot",
38
- " yaml",
39
- #" protein_summary (need to implement)",
40
- #" html_table (need to implement)"
41
- ],
42
-
43
- # VALIDATION MODIFIERS:
44
- :pephits => ["--pephits <file>.srg", "an srg file pointing to the srf files for",
45
- "the given -prot.xml run",
46
- "[this or --digestion must be used for applicable]",
47
- "validators (validators depending on a",
48
- "false/total ratio)]"],
49
- }.merge( Validator::Cmdline::COMMAND_LINE )
50
-
51
-
52
- # returns (spec_id_obj, options, option_parser_obj)
53
- def parse(args)
54
- opts = {}
55
- opts[:output] = []
56
- @out_used = false
57
- opts[:sequest] = {}
58
- opts[:validators] = []
59
- # defaults
60
-
61
- option_parser = OptionParser.new do |op|
62
- def op.opt(arg, &block)
63
- on(*COMMAND_LINE[arg], &block)
64
- end
65
-
66
- def op.val_opt(arg, opts)
67
- on(*COMMAND_LINE[arg]) {|ar| Validator::Cmdline::PrepArgs[arg].call(ar, opts) }
68
- end
69
-
70
- def op.exact_opt(opts, arg)
71
- on(*COMMAND_LINE[arg]) {|v| opts[arg] = v}
72
- end
73
-
74
- op.banner = "USAGE: #{File.basename($0)} [OPTS] <file>-prot.xml | <file>.sqg"
75
- op.separator ""
76
- op.separator " RETURNS: precision across the number of hits"
77
- op.separator " (based on probability or q-value)"
78
- op.separator " (optional) other validation of the results."
79
- op.separator ""
80
-
81
- op.separator "OUTPUT OPTIONS: "
82
- op.opt(:proteins) {|v| opts[:proteins] = true }
83
- op.opt(:output) do |output|
84
- # copied from rspec:
85
- # This funky regexp checks whether we have a FILE_NAME or not
86
- where = nil
87
- if (output =~ /([a-zA-Z_]+(?:::[a-zA-Z_]+)*):?(.*)/) && ($2 != '')
88
- output = $1
89
- where = $2
90
- else
91
- raise "When using several --output options only one of them can be without a file" if @out_used
92
- @out_used = true
93
- end
94
- opts[:output] << [output, where]
95
- end
96
-
97
- op.separator "GENERAL OPTIONS:"
98
- op.separator ""
99
- op.opt(:sort_by_init) {|v| opts[:sort_by_init] = true }
100
- op.separator "VALIDATION OPTIONS: "
101
- op.separator " each option will calculate the precision"
102
- op.separator ""
103
-
104
- op.val_opt(:prob, opts)
105
- op.val_opt(:perc_qval, opts)
106
- op.val_opt(:to_qvalues, opts)
107
- op.val_opt(:decoy, opts)
108
- op.val_opt(:pephits, opts) # sets opts[:ties] = false
109
- op.val_opt(:digestion, opts)
110
- op.val_opt(:bias, opts)
111
- op.val_opt(:bad_aa, opts)
112
- op.val_opt(:bad_aa_est, opts)
113
-
114
- op.val_opt(:tmm, opts)
115
- op.val_opt(:fasta, opts)
116
- op.val_opt(:tps, opts)
117
-
118
- op.separator ""
119
- op.separator "VALIDATION MODIFIERS: "
120
- op.val_opt(:false_on_tie, opts) # sets opts[:ties] = false
121
-
122
- end
123
- option_parser.parse!(args)
124
-
125
- # prepare validators
126
-
127
- if args.size > 0
128
- spec_id_obj = ::SpecID.new(args[0])
129
- if opts[:ties] == nil # will be nil or false
130
- opts[:ties] = Validator::Cmdline::DEFAULTS[:ties]
131
- end
132
- postfilter =
133
- if spec_id_obj.class == SQTGroup or spec_id_obj.class == Proph::PepSummary
134
- #puts 'making background estimates with: top_per_scan'
135
- :top_per_scan
136
- else
137
- #puts 'making background estimates with: top_per_aaseq_charge'
138
- :top_per_aaseq_charge
139
- end
140
-
141
- opts[:validators] = Validator::Cmdline.prepare_validators(opts, !opts[:ties], opts[:interactive], postfilter, spec_id_obj)
142
-
143
- if opts[:output].size == 0
144
- opts[:output] = DEFAULTS[:output]
145
- end
146
- else
147
- spec_id_obj = nil
148
- end
149
-
150
- [spec_id_obj, opts, option_parser]
151
- end # parse
152
- end # CmdlineParser
153
- end # Prob
154
- end # Precision
155
- end # SpecID
156
-
157
-
158
-
159
-
160
-
@@ -1,94 +0,0 @@
1
- require 'yaml'
2
- require 'spec_id/precision/output'
3
- require 'table'
4
- require 'matrix'
5
-
6
- module SpecID ; end
7
- module SpecID::Precision ; end
8
- class SpecID::Precision::Prob ; end
9
- class SpecID::Precision::Prob::Output
10
- include SpecID::Precision::Output
11
-
12
- # returns array of data arrays and parallel labels
13
- def to_cols_and_labels(answer_hash)
14
- col_labels = %w(count probability peptide)
15
- col_labels[1] = 'q_values' if answer_hash.key?(:q_values)
16
-
17
- cols = []
18
- cols << answer_hash[:count]
19
- if answer_hash.key?(:q_values)
20
- cols << answer_hash[:q_values]
21
- else
22
- cols << answer_hash[:probabilities]
23
- end
24
- cols << answer_hash[:aaseqs]
25
-
26
-
27
- # if there is a single modified peptide, we'll include the column
28
- if answer_hash.key?(:modified_peptides)
29
- cols << answer_hash[:modified_peptides]
30
- col_labels.push( 'modified_peptide' )
31
- end
32
-
33
- col_labels.push( 'charge' )
34
- cols << answer_hash[:charges]
35
-
36
- answer_hash[:pephits_precision].each do |ans|
37
- col_labels.push( "#{ans[:validator]} (prob)" )
38
- cols << ans[:values]
39
- end
40
-
41
- [cols, col_labels]
42
- end
43
-
44
- def csv(handle, answer_hash)
45
- (cols, col_labels) = to_cols_and_labels(answer_hash)
46
- table = Table.new(Matrix[*cols].transpose, nil, col_labels)
47
- handle.puts(table.to_s("\t"))
48
- end
49
-
50
- def to_plot(handle, answer_hash)
51
- tp = 'XYData'
52
- basename_noext =
53
- if handle.respond_to?(:path)
54
- out = File.basename(handle.path).sub(/\.(\w)+$/,'')
55
- else
56
- 'plot'
57
- end
58
- title = 'precision vs. num (aaseq+charge)'
59
- xlabel = 'num hits'
60
- ylabel = 'precision'
61
- [tp, basename_noext, title, xlabel, ylabel].each {|v| handle.puts v }
62
- answer_hash[:pephits_precision].each do |hash|
63
- handle.puts hash[:validator] # label
64
- handle.puts answer_hash[:count] # x vals
65
- handle.puts hash[:values] # y vals
66
- end
67
- end
68
-
69
- def calc_bkg_to_plot(handle, answer_hash)
70
- tp = 'XYData'
71
- basename_noext =
72
- if handle.respond_to?(:path)
73
- out = File.basename(handle.path).sub(/\.(\w)+$/,'')
74
- else
75
- 'calc_bkg_plot'
76
- end
77
- title = 'background vs. num (aaseq+charge)'
78
- xlabel = 'num hits'
79
- ylabel = 'background (false/total)'
80
- [tp, basename_noext, title, xlabel, ylabel].each {|v| handle.puts v }
81
- answer_hash[:params][:validators].each do |hash|
82
- handle.puts hash[:name] # label
83
- handle.puts answer_hash[:count] # x vals
84
- handle.puts hash[:calculated_backgrounds] # y vals
85
- end
86
- end
87
-
88
- def yaml(handle, answer_hash)
89
- handle.puts answer_hash.to_yaml
90
- end
91
-
92
- end
93
-
94
-
@@ -1,249 +0,0 @@
1
- # note that we require 'spec_id/precision/prob/cmdline' below!
2
-
3
- require 'spec_id/precision/prob/output'
4
-
5
- module SpecID ; end
6
- module SpecID::Precision ; end
7
-
8
-
9
- # for probability based spec identifications (true probabilities, not the
10
- # bioworks p-value (which they call probability)).
11
- class SpecID::Precision::Prob
12
-
13
- PN_DEFAULTS = {
14
- :proteins => false,
15
- :validators => [],
16
- :sort_by_init => false,
17
- }
18
-
19
- require 'spec_id/precision/prob/cmdline'
20
-
21
- def precision_vs_num_hits_cmdline(args)
22
- (spec_id_obj, options, option_parser) = CmdlineParser.new.parse(args)
23
- if spec_id_obj == nil
24
- puts option_parser
25
- return
26
- end
27
- final_answer = SpecID::Precision::Prob.new.precision_vs_num_hits(spec_id_obj, options)
28
- options[:output].each do |output|
29
- output[1] = $stdout unless output[1]
30
- SpecID::Precision::Prob::Output.new(*output).print(final_answer).close
31
- end
32
- end
33
-
34
- # this is the way I was doing it:
35
- # ajdusted = (1+R)*prec / (R*precision +1)
36
- # # where R is the decoy_to_target ratio
37
-
38
- # opts may include:
39
- # :proteins => true|*false
40
- # :validators => array of Validator objects
41
- #
42
- # This method will adjust the precision in the *probability* validators
43
- # used in the decoy validator (both terms with pi_0 in the denominator go
44
- # to zero if there is no decoy validator and the precision is not
45
- # adjusted)
46
- #
47
- # ajdusted = (1+(1/pi_0))*prec / ((precision/pi_0) +1)
48
- # # where pi_0 is the ratio incorrect target hits to total decoy hits
49
- #
50
- # NOTE: if you have decoy data, you MUST pass in a decoy validator for the
51
- # decoy pephits to be removed from other validator analyses!
52
- #
53
- # returns a hash of data
54
- # :pephits_precision => [{validator => <name>, values => [<precision>,...]},... ]
55
- # :params => :validators => [array of validators] (includes
56
- # :calculated_backgrounds)
57
- # :aaseqs => array of aaseqs
58
- # :charges => array of charge
59
- # :modified_peptides => array of modified sequence (only included if
60
- # applicable)
61
- #
62
- # NOTE: For protein prophet, the results are given on a peptide+charge
63
- # basis.
64
- #
65
- # TODO: implement tihs guy:
66
- # prothits_precision => {validator => <name>, values => {worst => ,
67
- # normal, normal_stdev } }
68
- def precision_vs_num_hits(spec_id, opts={})
69
-
70
- opt = PN_DEFAULTS.merge(opts)
71
-
72
- out = {}
73
- num_pephits = [] # NOTE!: these are aaseq/aaseq_mod + charge for Prophet
74
- val_hash = Hash.new {|hash,key| hash[key] = [] }
75
- val_calc_bkg_hash = Hash.new {|hash,key| hash[key] = [] }
76
- pepstrings = []
77
- modified_peptides = []
78
- pepcharges = []
79
- probabilities = []
80
- found_modified_peptide = false
81
-
82
- check_precisions = []
83
- check_precisions_decoy = []
84
-
85
- # do we need to deal with decoy peptides? (true/false)
86
- validators = opt[:validators].map
87
- decoy_vals = validators.select {|val| val.class == Validator::Decoy }
88
-
89
-
90
- if decoy_vals.size > 1
91
- raise(ArgumentError, "only one decoy validator allowed!")
92
- else
93
- decoy_val = decoy_vals.first
94
- if decoy_val
95
- pi_zero = decoy_val.pi_zero
96
- end
97
- end
98
-
99
- validators.delete(decoy_val)
100
- other_validators = validators
101
-
102
- (probability_validators, other_validators) = other_validators.partition {|val| val.class == Validator::Probability }
103
- if opt[:initial_probability]
104
- probability_validators.each do |pv|
105
- pv.prob_method = :initial_probability
106
- end
107
- end
108
-
109
- n_count = 0
110
- d_count = 0
111
-
112
-
113
- # this is a peptide prophet
114
- is_peptide_prophet =
115
- if spec_id.peps.first.respond_to?(:fval) ; true
116
- else ;false
117
- end
118
-
119
- use_q_value = other_validators.any? {|v| v.class == Validator::QValue }
120
-
121
- ## ORDER THE PEPTIDE HITS:
122
- ordered_peps =
123
- if use_q_value
124
- spec_id.peps.sort_by {|v| v.q_value }
125
- elsif is_peptide_prophet
126
- spec_id.peps.reject {|v| v.probability == -1.0}.sort_by {|v| v.probability }.reverse
127
- else
128
- if opt[:sort_by_init]
129
- spec_id.peps.sort_by{|v| [v.initial_probability, v.n_instances, ( v.is_nondegenerate_evidence ? 1 : 0 ), v.n_enzymatic_termini, ( v.is_contributing_evidence ? 1 : 0 ), v.n_sibling_peptides] }.reverse
130
- else
131
- spec_id.peps.sort_by{|v| [v.nsp_adjusted_probability, v.initial_probability, v.n_instances, ( v.is_nondegenerate_evidence ? 1 : 0 ), v.n_enzymatic_termini, ( v.is_contributing_evidence ? 1 : 0 ), v.n_sibling_peptides] }.reverse
132
- end
133
- end
134
-
135
- # for probability based precision with decoy database (not using prophet's
136
- # -d flag) we do this:
137
- # foreach peptide.sorted_by_probability
138
- # 1. update the running precision of the validator REGARDLESS of
139
- # decoy/target status of peptide. the internal hit counts are
140
- # incremented.
141
- # 2. only increment reported HIT COUNTS on a non-decoy hit and record
142
- # the precision as (1+R)*prec / (R*precision +1) where R is the ratio of
143
- # decoy hits to target hits. If it is 1:1 (R = 1) then this becomes:
144
- # 2*prec / (prec + 1)
145
-
146
- ## WORK THROUGH EACH PEPTIDE:
147
- ordered_peps.each_with_index do |pep,i|
148
- # probability validators must work on the entire set of normal and decoy
149
-
150
- last_prob_values = probability_validators.map do |val|
151
- reply = val.increment_pephits_precision(pep)
152
- check_precisions << reply
153
- reply
154
- end
155
-
156
- it_is_a_normal_pep =
157
- if decoy_val
158
- # get the decoy precision
159
- decoy_precision = decoy_val.increment_pephits_precision(pep)
160
-
161
- # continue with ONLY normal peptides
162
- is_normal = (decoy_val.normal_peps_just_submitted.size > 0)
163
- else
164
- true
165
- end
166
-
167
- if it_is_a_normal_pep
168
- check_precisions_decoy << false
169
- else
170
- check_precisions_decoy << true
171
- end
172
-
173
- if it_is_a_normal_pep
174
- n_count += 1
175
-
176
- # UPDATE validators:
177
- val_hash[decoy_val].push(decoy_precision) if decoy_val
178
- probability_validators.zip(last_prob_values) do |val,prec|
179
- if decoy_val
180
- raise ArgumentError, "pi_zero in decoy validator must not == 0" if pi_zero == 0
181
- val_hash[val].push( ((1.0/pi_zero+1.0)*prec) / ((prec/pi_zero) + 1.0) )
182
- else
183
- val_hash[val] << prec
184
- end
185
- end
186
- other_validators.each do |val|
187
- val_hash[val] << val.increment_pephits_precision(pep)
188
- if val.is_a? Validator::DigestionBased
189
- val_calc_bkg_hash[val] << val.calculated_background
190
- end
191
- end
192
-
193
- # UPDATE other basic useful information:
194
- if pep.respond_to?(:mod_info)
195
- modified_pep_string =
196
- if pep.mod_info
197
- found_modified_peptide = true
198
- pep.mod_info.modified_peptide
199
- else
200
- nil
201
- end
202
- modified_peptides << modified_pep_string
203
- else
204
- modified_pep_string =
205
- if pep.sequence =~ /[^A-Z\-\.]/
206
- found_modified_peptide = true
207
- pep.sequence
208
- else
209
- nil
210
- end
211
- modified_peptides << modified_pep_string
212
- end
213
- pepcharges << pep.charge
214
- pepstrings << pep.aaseq
215
- probabilities << pep.probability # this is the q_value if percolator
216
- num_pephits << (i+1)
217
- else
218
- d_count += 1
219
- end
220
- end
221
- if found_modified_peptide
222
- out[:modified_peptides] = modified_peptides
223
- end
224
- if use_q_value
225
- out[:q_values] = probabilities
226
- else
227
- out[:probabilities] = probabilities
228
- end
229
- # out[:pephits] = ordered_peps # just in case they want to see
230
- out[:count] = num_pephits
231
- out[:aaseqs] = pepstrings
232
- out[:charges] = pepcharges
233
- out[:pephits_precision] = opt[:validators].map do |val|
234
- hsh = {}
235
- hsh[:validator] = Validator::Validator_to_string[val.class.to_s]
236
- hsh[:values] = val_hash[val]
237
- hsh
238
- end
239
- out[:params] = {}
240
- out[:params][:validators] = Validator.sensible_validator_hashes(opt[:validators]).zip(opt[:validators]).map do |hash,val|
241
- hash.delete(:calculated_background)
242
- hash[:calculated_backgrounds] = val_calc_bkg_hash[val]
243
- hash
244
- end
245
- out
246
- end
247
- end
248
-
249
-
@@ -1,104 +0,0 @@
1
-
2
- require 'arrayclass'
3
- require 'spec_id/sequest/pepxml'
4
- require 'spec_id/parser/proph'
5
-
6
- module Sequest ; end
7
- class Sequest::PepXML ; end
8
- class Sequest::PepXML::MSMSRunSummary ; end
9
- class Sequest::PepXML::SearchHit ; end
10
-
11
- module SpecID ; end
12
- module SpecID::Prot ; end
13
- module SpecID::Pep ; end
14
-
15
-
16
-
17
- module Proph
18
-
19
- class PepSummary
20
- include SpecID
21
-
22
- Filetype_and_version_re_new = /version="PeptideProphet v([\d\.]+) /
23
-
24
- # inherits prots and peps
25
-
26
- # the protein groups
27
- # currently these are just xml nodes returned!
28
- attr_accessor :peptideprophet_summary
29
- attr_accessor :msms_run_summaries
30
- attr_accessor :version
31
-
32
- def hi_prob_best ; true end
33
-
34
- def get_version(file)
35
- answer = nil
36
- File.open(file) do |fh|
37
- 8.times do
38
- line = fh.gets
39
- answer =
40
- if line =~ Filetype_and_version_re_new
41
- $1.dup
42
- end
43
- break if answer
44
- end
45
- end
46
- raise(ArgumentError, "couldn't detect version in #{file}") unless answer
47
- answer
48
- end
49
-
50
- def search_hit_class
51
- PepSummary::Pep
52
- end
53
-
54
- def initialize(file=nil)
55
- if file
56
- @version = get_version(file)
57
- spec_id = SpecID::Parser::PepProph.new(:spec_id).parse(file, :spec_id => self)
58
- end
59
- end
60
- end
61
-
62
- # this is a SpecID::Pep (by interface: not including stuff yet)
63
- class PepSummary::Pep < Sequest::PepXML::SearchHit
64
- # aaseq is defined in SearchHit
65
-
66
- %w(probability fval ntt nmc massd prots q_value).each do |guy|
67
- self.add_member(guy)
68
- end
69
-
70
- # returns self
71
- def from_pepxml_node(node)
72
- super(node)
73
-
74
- an_res = node.find_first('child::analysis_result')
75
- pp_n = an_res.find_first('child::peptideprophet_result')
76
- self.probability = pp_n['probability'].to_f
77
- pp_n.find('descendant::parameter').each do |par_n|
78
- case par_n['name']
79
- when 'fval'
80
- self.fval = par_n['value'].to_f
81
- when 'ntt'
82
- self.ntt = par_n['value'].to_i
83
- when 'nmc'
84
- self.nmc = par_n['value'].to_i
85
- when 'massd'
86
- self.massd = par_n['value'].to_f
87
- end
88
- end
89
- self
90
- end
91
- end
92
-
93
- ::Proph::PepSummary::Prot = Arrayclass.new(%w(name protein_descr peps))
94
-
95
- class PepSummary::Prot
96
- def first_entry ; self[0] end ## name
97
- def reference ; self[0] + ' ' + self[1] end
98
- end
99
-
100
- end
101
-
102
-
103
-
104
-