mspire 0.4.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,431 +0,0 @@
1
- require 'validator'
2
-
3
- require 'validator/true_pos'
4
- require 'validator/aa'
5
- require 'validator/aa_est'
6
- require 'validator/bias'
7
- require 'validator/decoy'
8
- require 'validator/transmem'
9
- require 'validator/probability'
10
- require 'validator/q_value'
11
- require 'validator/prot_from_pep'
12
-
13
- ## these all for a stupid check...
14
- require 'spec_id/sqt'
15
- require 'spec_id/proph/prot_summary'
16
- require 'spec_id/proph/pep_summary'
17
-
18
- class Validator::Cmdline
19
-
20
- Validator_symbols_to_classes = {
21
- :tmm => Validator::Transmem::Protein,
22
- :decoy => Validator::Decoy,
23
- :bad_aa => Validator::AA,
24
- :bad_aa_est => Validator::AAEst,
25
- :tps => Validator::TruePos,
26
- :bias => Validator::Bias,
27
- :prob => Validator::Probability,
28
- :qval => Validator::QValue,
29
- }
30
- # was VAL_DEFAULTS
31
- DEFAULTS = {
32
- :tmm =>
33
- {
34
- # file
35
- :min_num_tmm_seqs => 1,
36
- :expect_soluble => true,
37
- :no_include_tm_peps => 0.8,
38
- :bkg => 0.0,
39
- },
40
- :decoy =>
41
- {
42
- :hits_together => true,
43
- :decoy_on_match => true,
44
- :frit => 1.0, # fraction incorrect targets (like PIT)
45
- },
46
- :bad_aa =>
47
- {
48
- :false_if_found => true,
49
- :bkg => 0.0,
50
- },
51
- :bad_aa_est =>
52
- {
53
- :false_if_found => true,
54
- :bkg => 0.0,
55
- },
56
- :bias =>
57
- {
58
- :bkg => 0.0,
59
- :proteins_expected => true,
60
- },
61
- :ties => true,
62
- }
63
- COMMAND_LINE = {
64
- :decoy => ["--decoy /REGEXP/|FILENAME[,PIT,DOM]", Array, "REGEXP for decoy proteins (catenated searches) or a",
65
- "FILENAME of separate search on decoys.",
66
- "All regular expressions must be surrounded by '/'",
67
- "(no extended options [trailing modifiers]).",
68
- "e.g., a run using concatenated reversed proteins that",
69
- "includes 'REVERSE' in the fasta heading:",
70
- " --decoy /REVERSE/",
71
- "Anything fancier should be quoted:",
72
- " --decoy '/^\\s*REVERSE/'",
73
- "If decoys proteins were searched in a separate file,",
74
- "then give the FILENAME (e.g., --decoy decoy.srg)",
75
- "FRIT = Fraction Incorrect Targets (like",
76
- "the PIT as a fraction) (default: #{DEFAULTS[:decoy][:frit]})",
77
- "DOM = *true/false, decoy on match",],
78
- :tps => ["--tps <fasta>", "for a completely defined sample, this is the",
79
- "fasta file containing the true protein hits"],
80
- # may require digestion:
81
- :fasta => ["--fasta FASTA", "fasta file for phobius transmembrane",
82
- "(needed if PEPS options is not false)"],
83
- :digestion => ["--digestion ORIG_FASTA,PARAMS", Array, "[not recommended]",
84
- "Creates the 'false/total' ratio with in silico",
85
- "digestion. Otherwise, the 3rd-10th best hits (sorted by",
86
- "xcorr) are used.",
87
- "The following validators will use this",
88
- "information (shared between them) if option given",
89
- "ORIG_FASTA = the fasta file used to do the run",
90
- "PARAMS = the params file used to do the run",],
91
- :bias => ["--bias FASTA[,PE,BKG]", Array, "FASTA contains proteins expected to be in the sample",
92
- "PE = *true|false proteins in fasta file expected in sample",
93
- "BKG = Background frequency of fps (d: #{DEFAULTS[:bias][:bkg]})",],
94
- :bad_aa => ["--bad_aa AA,BKG]", Array, "An amino acid expected (or not expected) in legitimate hits",
95
- "AA = The amino acid (e.g., 'C')",
96
- "BKG = Background frequency of genuine pephits (d: #{DEFAULTS[:bad_aa][:bkg]}):",],
97
- :bad_aa_est => ["--bad_aa_est AA,BKG]", Array, "An amino acid expected (or not expected) in legitimate hits",
98
- "AA = The amino acid (e.g., 'C')",
99
- "BKG = Background frequency of genuine pephits (d: #{DEFAULTS[:bad_aa_est][:bkg]}):",],
100
-
101
- :tmm => ["--tmm <TM[,MIN,SOL,PEPS,BKG]>", Array, "TM = phobius.small or toppred.out file",
102
- "phobius.small:",
103
- "http://phobius.cgb.ki.se/",
104
- "(select 'Short' output, and save output as file)",
105
- "toppred.out:",
106
- "http://bioweb.pasteur.fr/seqanal/interfaces/toppred.html",
107
- "(output 'toppred.out' in 'New' or 'Xml' format)",
108
- "MIN = Int, minimum number transmembrane seqs (def: #{DEFAULTS[:tmm][:min_num_tmm_seqs]})",
109
- "SOL = true|false, this is a soluble fraction( def: #{DEFAULTS[:tmm][:expect_soluble]})",
110
- "PEPS = Float | false, don't consider tm peps (>= fraction",
111
- " tm content) (false skips) (def: #{DEFAULTS[:tmm][:no_include_tm_peps]})",
112
- "BKG = Float , background contaminating insoluble (def: #{DEFAULTS[:tmm][:bkg]})"],
113
-
114
-
115
- # VALIDATION MODIFIERS
116
- :false_on_tie => ["--false_on_tie", "if peptide belongs to correct AND incorrect proteins",
117
- "it will be counted as correct"],
118
-
119
- }
120
-
121
- def self.boolean(arg, default)
122
- case arg
123
- when 'true' ; true
124
- when 'false' ; false
125
- else ; default
126
- end
127
- end
128
-
129
- PrepArgs = {
130
- :prob => lambda {|ar, opts|
131
- mthd =
132
- if ar
133
- if ar == 'nsp'
134
- :probability
135
- elsif ar == 'init'
136
- :initial_probability
137
- else
138
- raise ArgumentError, "--prob [arg], optional arg can only be 'nsp' or 'init'!"
139
- end
140
- else
141
- :probability
142
- end
143
- opts[:validators].push([:prob, mthd])
144
- },
145
- :perc_qval => lambda {|ar, opts| opts[:validators].push([:perc_qval]) },
146
- :to_qvalues => lambda {|ar, opts| opts[:validators].push([:to_qvalues]) },
147
- :decoy => lambda {|ar, opts|
148
- myargs = [:decoy]
149
- first_arg = ar[0]
150
- val_opts = {}
151
- val_opts[:constraint] =
152
- if first_arg[0,1] == '/' and first_arg[-1,1] == '/'
153
- # cast as a regular expression of has '/ /'
154
- Regexp.new(first_arg[1...-1])
155
- else
156
- # assume that it is a filename
157
- raise ArgumentError, "File does not exist: #{first_arg}\n(was this supposed to be a regular expression? if so, should be given: /#{first_arg}/)" unless File.exist?(first_arg)
158
- first_arg
159
- end
160
- val_opts[:frit] = (ar[1] || DEFAULTS[:decoy][:frit]).to_f
161
- val_opts[:decoy_on_match] = self.boolean(ar[2], DEFAULTS[:decoy][:decoy_on_match])
162
- myargs.push(val_opts)
163
- opts[:validators].push(myargs)
164
- },
165
- :fasta => lambda {|arg, opts|
166
- opts[:fasta] = Fasta.new(arg)
167
- },
168
- :digestion => lambda {|ar, opts|
169
- raise(ArgumentError, "need fasta and sequest params!") if ar.size != 2
170
- opts[:digestion] = ar.dup
171
- opts[:digestion_objects] = [Fasta.new(ar[0]), Sequest::Params.new(ar[1])]
172
- },
173
- :bias => lambda {|ar, opts|
174
- myargs = [:bias]
175
- myargs.push( Fasta.new(ar[0]) )
176
- val_opts = {}
177
- val_opts[:proteins_expected] = self.boolean(ar[1], DEFAULTS[:bias][:proteins_expected])
178
- val_opts[:background] =
179
- if ar[2]
180
- ar[2].to_f
181
- else
182
- DEFAULTS[:bias][:bkg]
183
- end
184
- if ar[3]
185
- val_opts[:false_to_total_ratio] = ar[3].to_f
186
- end
187
- myargs.push(val_opts)
188
- opts[:validators].push(myargs)
189
- },
190
- :bad_aa => lambda {|ar, opts|
191
- ## GET the FREQUENCY
192
- myargs = [:bad_aa]
193
- myargs.push( ar[0] )
194
- val_opts = {}
195
- val_opts[:background] =
196
- if ar[1]
197
- ar[1].to_f
198
- else
199
- DEFAULTS[:bad_aa][:bkg]
200
- end
201
- if ar[2]
202
- val_opts[:false_to_total_ratio] = ar[2].to_f
203
- end
204
- myargs.push(val_opts)
205
- opts[:validators].push(myargs)
206
- },
207
- :bad_aa_est => lambda {|ar, opts|
208
- ## GET the FREQUENCY
209
- myargs = [:bad_aa_est]
210
- myargs.push( ar[0] )
211
- val_opts = {}
212
- val_opts[:background] =
213
- if ar[1]
214
- ar[1].to_f
215
- else
216
- DEFAULTS[:bad_aa_est][:bkg]
217
- end
218
- if ar[2]
219
- val_opts[:frequency] = ar[2].to_f
220
- end
221
- myargs.push(val_opts)
222
- opts[:validators].push(myargs)
223
- },
224
-
225
- :tmm => lambda {|ar, opts|
226
- myargs = [:tmm]
227
- myargs.push( ar[0] )
228
- val_opts = {}
229
- val_opts[:min_num_tms] =
230
- if ar[1] ; ar[1].to_i
231
- else ; DEFAULTS[:tmm][:min_num_tmm_seqs]
232
- end
233
- val_opts[:soluble_fraction] = self.boolean(ar[2], DEFAULTS[:tmm][:expect_soluble])
234
- val_opts[:no_include_tm_peps] =
235
- if ar[3]
236
- case ar[3]
237
- when 'false' ; false
238
- else ; ar[3].to_f
239
- end
240
- else ; DEFAULTS[:tmm][:no_include_tm_peps]
241
- end
242
- val_opts[:background] =
243
- if ar[4] ; ar[4].to_f
244
- else ; DEFAULTS[:tmm][:bkg]
245
- end
246
- if ar[5]
247
- val_opts[:false_to_total_ratio] = ar[5].to_f
248
- end
249
- myargs.push(val_opts)
250
- opts[:validators].push( myargs )
251
- },
252
- :pephits => lambda {|v,opts| opts[:pephits] = SpecID.new(v) },
253
- :tps => lambda {|v,opts| opts[:validators].push([:tps, Fasta.new(v)]) },
254
- :false_on_tie => lambda {|v,opts| opts[:ties] = false },
255
- }
256
-
257
- def self.requires_pephits?(spec_id_obj)
258
- case spec_id_obj
259
- when Proph::ProtSummary : true
260
- # at least currently (subject to change)
261
- when Proph::PepSummary : true
262
- when SQTGroup
263
- if spec_id_obj.peps.first.respond_to?(:q_value)
264
- # its percolator output and we don't have other hits to use
265
- true
266
- else
267
- false
268
- end
269
- else ; false
270
- end
271
- end
272
-
273
- # remove the keys from opts involved in validators and return an array
274
- # of validators
275
- # postfilter is one of :top_per_scan, :top_per_aaseq,
276
- # :top_per_aaseq_charge (of which last two are subsets of scan)
277
- def self.prepare_validators(opts, false_on_tie, interactive, postfilter, spec_id)
278
-
279
- validator_args = opts[:validators]
280
- if validator_args.any? {|v| v.first == :to_qvalues }
281
- prob_val_args_ar = validator_args.select {|v| v.first == :prob }.first
282
- prob_method =
283
- if prob_val_args_ar && prob_val_args_ar[1]
284
- prob_val_args_ar[1]
285
- else
286
- :probability
287
- end
288
- validator_args.reject! {|v| v.first == :prob }
289
-
290
- require 'vec'
291
- require 'qvalue'
292
-
293
- # get a list of p-values
294
- pvals = spec_id.peps.map do |pep|
295
- val = 1.0 - pep.send(prob_method)
296
- val = 1e-9 if val == 0
297
- val
298
- end
299
- File.open("TMP_PVALUES.txt", 'w') {|v| v.puts pvals.sort.join(" ") }
300
- pvals = VecD.new(pvals)
301
- #qvals = pvals.qvalues(false, :lambda_vals => 0.30 )
302
- qvals = pvals.qvalues
303
- qvals.zip(spec_id.peps) do |qval,pep|
304
- pep.q_value = qval
305
- end
306
- end
307
-
308
- validator_args.map! do |v|
309
- if v.first == :to_qvalues || v.first == :perc_qval
310
- [:qval]
311
- else
312
- v
313
- end
314
- end
315
-
316
- correct_wins = !false_on_tie
317
- need_false_to_total_ratio = []
318
- need_frequency = []
319
- transmem_vals = []
320
- validators = validator_args.map do |args|
321
- tp = args.shift
322
- val_args = args.dup # protect the original keys
323
- val_args =
324
- case tp
325
- when :tmm
326
- val_args[1][:correct_wins] = correct_wins
327
- if opts.key?(:fasta)
328
- val_args[1][:fasta] = opts[:fasta]
329
- end
330
- val_args
331
- when :bias
332
- val_args[1][:correct_wins] = correct_wins
333
- val_args
334
- when :tps
335
- val_args = [val_args[0], correct_wins]
336
- val_args
337
- when :decoy
338
- val_args[0][:correct_wins] = correct_wins
339
- # don't delete the key here since we need the decoy = regexp key
340
- val_args
341
- else ## bad_aa, prob, and qval are represented here:
342
- val_args
343
- end
344
- val = Validator_symbols_to_classes[tp].new( *val_args )
345
- # make some lists of validators based on pre-processing needs:
346
- if tp == :tmm
347
- transmem_vals << val
348
- end
349
- potential_digestion_classes = /Transmem|AA|AAEst|Bias/
350
- if val.class.to_s =~ potential_digestion_classes
351
- if val.class.to_s == 'Validator::AAEst'
352
- need_frequency.push(val) if val.frequency.nil?
353
- elsif !(val.false_to_total_ratio.nil?)
354
- $stderr.puts "using false_to_total_ratio: #{val.false_to_total_ratio}"
355
- else
356
- need_false_to_total_ratio << val
357
- end
358
- end
359
- val
360
- end
361
-
362
- if ((need_false_to_total_ratio.size > 0) or (need_frequency.size > 0))
363
- if opts.key?(:digestion_objects)
364
- #raise ArgumentError, "requires --digestion fasta,params argument!" if !opts.key?(:digestion_objects)
365
- peps = Digestor.digest( *(opts[:digestion_objects]) )
366
- need_false_to_total_ratio.each do |val|
367
- val.set_false_to_total_ratio( peps )
368
- end
369
- if need_frequency.size > 0
370
- need_frequency.each do |val|
371
- val.set_frequency( opts[:digestion_objects][0] )
372
- end
373
- end
374
- opts.delete(:digestion_objects)
375
- else ## do the new and improved selection of non-top hits to get false_to_total_ratios and freqs
376
- $stderr.puts "...using pephits to calculate background ratios"
377
- # first_index, last_index
378
- pephits =
379
- if opts[:pephits] ## protein prophet (since it needs to get ratios somewhere
380
- $stderr.puts "using --pephits"
381
- opts[:pephits].peps
382
- elsif requires_pephits?(spec_id)
383
- raise ArgumentError, "with objects of class '#{spec_id.class}', one of your validators requires --pephits or --digestion"
384
- else
385
- $stderr.puts "using given spec_id.peps"
386
- spec_id.peps
387
- end
388
-
389
- not_first_or_second_peps = Sequest.other_hits_sorted_by_xcorr(pephits, 2, 9, [:base_name, :first_scan, :charge])
390
- pephits =
391
- case postfilter
392
- when :top_per_scan
393
- $stderr.puts "using top_per_scan" ; not_first_or_second_peps
394
- when :top_per_aaseq
395
- # it doesn't matter which one is given since validators are
396
- # based on amino acid sequence
397
- $stderr.puts 'using top_per_aaseq'
398
- not_first_or_second_peps.hash_by(:aaseq).values.map {|pep| pep.first }
399
- when :top_per_aaseq_charge
400
- $stderr.puts 'using top_per_aaseq_charge'
401
- not_first_or_second_peps.hash_by(:aaseq, :charge).values.map {|pep| pep.first }
402
- else
403
- raise ArgumentError, "must have a valid postfilter method, yours: '#{postfilter}'"
404
- end
405
-
406
- need_false_to_total_ratio.each do |val|
407
- val.set_false_to_total_ratio( pephits )
408
- $stderr.puts "false_to_total_ratio for #{val.class.to_s}: #{val.false_to_total_ratio}"
409
- end
410
- if need_frequency.size > 0
411
- need_frequency.each do |val|
412
- $stderr.puts "Setting frequency!"
413
- val.set_frequency( pephits )
414
- end
415
- end
416
- end
417
- end
418
-
419
- if (transmem_vals.size > 0) # and interactive ## we'd like to just run this for interactive
420
- # This is overkill if we are doing a single filtering job, but it
421
- # ensures that it works in all the ways I'm doing it. Should
422
- # refactor eventually !!
423
- transmem_vals.each do |val| ## but, prob uses it too!
424
- val.transmem_status_hash = val.create_transmem_status_hash(spec_id.peps)
425
- end
426
- end
427
- validators
428
-
429
- end
430
-
431
- end
@@ -1,107 +0,0 @@
1
- require 'validator'
2
-
3
- class Validator::Decoy < Validator
4
- include Precision::Calculator::Decoy
5
-
6
- # a Regexp (if concatenated) or a String (the filename of separate run)
7
- attr_accessor :constraint
8
-
9
- attr_accessor :decoy_on_match
10
- attr_accessor :correct_wins
11
- # This is the the number of incorrect target hits over the total decoy hits
12
- # The percent incorrect targets (PIT) expressed as a fraction (== 1 - PI_0).
13
- # The rough, conservative ballpark estimate is the ratio of target hits to
14
- # decoy hits. This can be refined by removing the number of true target
15
- # hits from the targets used to calculate it.
16
- attr_accessor :frit
17
-
18
- attr_accessor :last_pep_was_decoy
19
-
20
- attr_accessor :increment_normal
21
- attr_accessor :increment_decoy
22
- attr_accessor :increment_total_submitted
23
-
24
- attr_reader :normal_peps_just_submitted
25
-
26
- DEFAULTS = {
27
- :decoy_on_match => true,
28
- :correct_wins => true,
29
- :frit => 1.0,
30
- }
31
-
32
- def initialize(opts={})
33
- merged = DEFAULTS.merge(opts)
34
- @constraint, @decoy_on_match, @correct_wins, @frit = merged.values_at(:constraint, :decoy_on_match, :correct_wins, :frit)
35
- end
36
-
37
- # returns [normal, decoy] (?? I think ??)
38
- # reads the full protein reference
39
- def partition(peps)
40
- if @decoy_on_match
41
- if @correct_wins
42
- peps.partition do |pep|
43
- !(pep.prots.all? {|prot| prot.reference.match(@constraint) })
44
- end
45
- else # fp wins
46
- peps.partition do |pep|
47
- !(pep.prots.any? {|prot| prot.reference.match(@constraint) })
48
- end
49
- end
50
- else
51
- if @correct_wins
52
- peps.partition do |pep|
53
- pep.prots.any? {|prot| prot.reference.match(@constraint) }
54
- end
55
- else
56
- peps.partition do |pep|
57
- pep.prots.all? {|prot| prot.reference.match(@constraint) }
58
- end
59
- end
60
- end
61
- end
62
-
63
- def initialize_increment
64
- @increment_normal = 0
65
- @increment_decoy = 0
66
- @increment_total_submitted = 0
67
- @increment_initialized = true
68
- end
69
-
70
-
71
- # does not deal in separate_peps right now!!
72
- # will take an array or single peptide
73
- def increment_pephits_precision(peps)
74
- tmp = $VERBOSE; $VERBOSE = nil
75
- initialize_increment unless @increment_initialized
76
- $VERBOSE = tmp
77
-
78
- to_submit =
79
- if peps.is_a? SpecID::Pep
80
- [peps]
81
- else
82
- peps
83
- end
84
-
85
- @increment_total_submitted += to_submit.size
86
- (normal, decoy) = partition(to_submit)
87
- @normal_peps_just_submitted = normal
88
- @increment_normal += normal.size
89
- @increment_decoy += decoy.size
90
- calc_precision(@increment_normal, @increment_decoy, @frit)
91
- end
92
-
93
- def pephit_precision(peps, separate_peps=nil)
94
- if separate_peps
95
- calc_precision(peps.size, separate_peps.size, @frit)
96
- else
97
- (norm, decoy) = partition(peps)
98
- calc_precision(norm.size, decoy.size, @frit)
99
- end
100
- end
101
-
102
- def to_param_string
103
- "decoy="+ ["{constraint=#{(constraint ? constraint.inspect : '')}", "decoy_on_match=#{@decoy_on_match}", "correct_wins=#{@correct_wins}}"].join(", ")
104
- end
105
- end
106
-
107
-
@@ -1,70 +0,0 @@
1
- require 'validator'
2
- require 'fasta'
3
- require 'spec_id/sequest/params'
4
-
5
- # objects of this class can calculate pephit_precision given an array of
6
- # SpecID::Pep objects using the pephit_precision method.
7
- class Validator::DigestionBased < Validator
8
- DEFAULTS = {
9
- #:false_to_total_ratio => 1.0, # disable because this needs to be set
10
- # explicitly
11
- :background => 0.0,
12
- }
13
-
14
- # the number of tps
15
- attr_accessor :increment_tps
16
- # the number of fps
17
- attr_accessor :increment_fps
18
-
19
- # the total peptides submitted to the validator (regardless of tp, fp, or
20
- # nil)
21
- attr_accessor :increment_total_submitted
22
-
23
- # the ratio of false hits to total peptides in the fasta file
24
- attr_accessor :false_to_total_ratio
25
-
26
- # the false_to_total_ratio calculated (but not applied)
27
- attr_reader :calculated_background
28
-
29
- # For a sample with no false hits in it, (under defaults) this is the
30
- # fraction of peptides with the constraint over the total number of peptides
31
- # from which these hits are derived.
32
- attr_accessor :background
33
-
34
-
35
- # expects that classes define a partition method, and a @background
36
- def pephit_precision(peps)
37
- ## this gives us the fraction that are transmembrane (under defaults):
38
- (tps, fps) = partition(peps)
39
- (num_tps, num_fps) = calc_precision_prep(tps.size, fps.size)
40
- calc_precision(num_tps, num_fps)
41
- end
42
-
43
- # returns [num_tps, num_fps]
44
- def calc_precision_prep(num_tps, num_fps)
45
- total_peps_passing_partition = num_tps + num_fps
46
- num_fps = adjust_fps_for_background(num_tps, num_fps, background)
47
- ## we must use the false_to_total_ratio to estimate how many are really
48
- ## incorrect!
49
- # FALSE/TOTAL = FALSE(found)/TOTAL(found)
50
- # TOTAL(found) = FALSE(found) * TOTAL/FALSE
51
- # = FALSE(found) / (FALSE/TOTAL)
52
- total_false = num_fps / false_to_total_ratio
53
- # NOTE: the partition algorithm drops peptides that are transmembrane
54
- # under certain options. Thus, the total false estimate must be tempered
55
- # by this lower number of total peptides.
56
- adjusted_tps = total_peps_passing_partition.to_f - total_false
57
- [adjusted_tps, total_false]
58
- end
59
-
60
- # returns self
61
- # assumes partition returns (tps, fps)
62
- def set_false_to_total_ratio(peps)
63
- (tps, fps) = partition(peps)
64
- self.false_to_total_ratio = fps.size.to_f / (tps.size + fps.size)
65
- self
66
- end
67
-
68
- end
69
-
70
-
@@ -1,51 +0,0 @@
1
-
2
- # calculates precision based on the Benjamini-Hochberg FDR method.
3
- # @TODO: class should probably be renamed to reflect method used!
4
- # or options given to specify different methods (i.e., q-value)??
5
- class Validator::Probability
6
-
7
- attr_accessor :prob_method
8
-
9
- def initialize(prob_method=:probability)
10
- @prob_method = prob_method
11
- end
12
-
13
- # objs should respond_to probability
14
- def precision(objs)
15
- return 1.0 if objs.size == 0
16
-
17
- current_sum_one_minus_prob = 0.0
18
-
19
- # this should work!
20
- #objs.inject(0.0) {|sum,obj| sum + (1.0 - obj.probability) }
21
-
22
- objs.each do |obj|
23
- # SUM(1-probX)/#objs
24
- current_sum_one_minus_prob += 1.0 - obj.send(@prob_method)
25
- end
26
- prec = 1.0 - (current_sum_one_minus_prob / objs.size)
27
- end
28
-
29
-
30
- # objs should respond_to probability
31
- # These should be added from high probability(1.0) to low (0.0)
32
- def increment_precision(objs)
33
- if objs.is_a?(SpecID::Pep) or objs.is_a?(SpecID::Prot)
34
- objs = [objs]
35
- end
36
-
37
- @total_objs ||= 0
38
- @current_sum_one_minus_prob ||= 0.0
39
-
40
- @total_objs += objs.size
41
- objs.each do |obj|
42
- @current_sum_one_minus_prob += 1.0 - obj.send(@prob_method)
43
- end
44
- prec = 1.0 - (@current_sum_one_minus_prob / @total_objs)
45
- end
46
-
47
-
48
- alias_method :pephit_precision, :precision
49
- alias_method :prothit_precision, :precision
50
- alias_method :increment_pephits_precision, :increment_precision
51
- end