mspire 0.4.9 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,431 +0,0 @@
1
- require 'validator'
2
-
3
- require 'validator/true_pos'
4
- require 'validator/aa'
5
- require 'validator/aa_est'
6
- require 'validator/bias'
7
- require 'validator/decoy'
8
- require 'validator/transmem'
9
- require 'validator/probability'
10
- require 'validator/q_value'
11
- require 'validator/prot_from_pep'
12
-
13
- ## these all for a stupid check...
14
- require 'spec_id/sqt'
15
- require 'spec_id/proph/prot_summary'
16
- require 'spec_id/proph/pep_summary'
17
-
18
- class Validator::Cmdline
19
-
20
- Validator_symbols_to_classes = {
21
- :tmm => Validator::Transmem::Protein,
22
- :decoy => Validator::Decoy,
23
- :bad_aa => Validator::AA,
24
- :bad_aa_est => Validator::AAEst,
25
- :tps => Validator::TruePos,
26
- :bias => Validator::Bias,
27
- :prob => Validator::Probability,
28
- :qval => Validator::QValue,
29
- }
30
- # was VAL_DEFAULTS
31
- DEFAULTS = {
32
- :tmm =>
33
- {
34
- # file
35
- :min_num_tmm_seqs => 1,
36
- :expect_soluble => true,
37
- :no_include_tm_peps => 0.8,
38
- :bkg => 0.0,
39
- },
40
- :decoy =>
41
- {
42
- :hits_together => true,
43
- :decoy_on_match => true,
44
- :frit => 1.0, # fraction incorrect targets (like PIT)
45
- },
46
- :bad_aa =>
47
- {
48
- :false_if_found => true,
49
- :bkg => 0.0,
50
- },
51
- :bad_aa_est =>
52
- {
53
- :false_if_found => true,
54
- :bkg => 0.0,
55
- },
56
- :bias =>
57
- {
58
- :bkg => 0.0,
59
- :proteins_expected => true,
60
- },
61
- :ties => true,
62
- }
63
- COMMAND_LINE = {
64
- :decoy => ["--decoy /REGEXP/|FILENAME[,PIT,DOM]", Array, "REGEXP for decoy proteins (catenated searches) or a",
65
- "FILENAME of separate search on decoys.",
66
- "All regular expressions must be surrounded by '/'",
67
- "(no extended options [trailing modifiers]).",
68
- "e.g., a run using concatenated reversed proteins that",
69
- "includes 'REVERSE' in the fasta heading:",
70
- " --decoy /REVERSE/",
71
- "Anything fancier should be quoted:",
72
- " --decoy '/^\\s*REVERSE/'",
73
- "If decoys proteins were searched in a separate file,",
74
- "then give the FILENAME (e.g., --decoy decoy.srg)",
75
- "FRIT = Fraction Incorrect Targets (like",
76
- "the PIT as a fraction) (default: #{DEFAULTS[:decoy][:frit]})",
77
- "DOM = *true/false, decoy on match",],
78
- :tps => ["--tps <fasta>", "for a completely defined sample, this is the",
79
- "fasta file containing the true protein hits"],
80
- # may require digestion:
81
- :fasta => ["--fasta FASTA", "fasta file for phobius transmembrane",
82
- "(needed if PEPS options is not false)"],
83
- :digestion => ["--digestion ORIG_FASTA,PARAMS", Array, "[not recommended]",
84
- "Creates the 'false/total' ratio with in silico",
85
- "digestion. Otherwise, the 3rd-10th best hits (sorted by",
86
- "xcorr) are used.",
87
- "The following validators will use this",
88
- "information (shared between them) if option given",
89
- "ORIG_FASTA = the fasta file used to do the run",
90
- "PARAMS = the params file used to do the run",],
91
- :bias => ["--bias FASTA[,PE,BKG]", Array, "FASTA contains proteins expected to be in the sample",
92
- "PE = *true|false proteins in fasta file expected in sample",
93
- "BKG = Background frequency of fps (d: #{DEFAULTS[:bias][:bkg]})",],
94
- :bad_aa => ["--bad_aa AA,BKG]", Array, "An amino acid expected (or not expected) in legitimate hits",
95
- "AA = The amino acid (e.g., 'C')",
96
- "BKG = Background frequency of genuine pephits (d: #{DEFAULTS[:bad_aa][:bkg]}):",],
97
- :bad_aa_est => ["--bad_aa_est AA,BKG]", Array, "An amino acid expected (or not expected) in legitimate hits",
98
- "AA = The amino acid (e.g., 'C')",
99
- "BKG = Background frequency of genuine pephits (d: #{DEFAULTS[:bad_aa_est][:bkg]}):",],
100
-
101
- :tmm => ["--tmm <TM[,MIN,SOL,PEPS,BKG]>", Array, "TM = phobius.small or toppred.out file",
102
- "phobius.small:",
103
- "http://phobius.cgb.ki.se/",
104
- "(select 'Short' output, and save output as file)",
105
- "toppred.out:",
106
- "http://bioweb.pasteur.fr/seqanal/interfaces/toppred.html",
107
- "(output 'toppred.out' in 'New' or 'Xml' format)",
108
- "MIN = Int, minimum number transmembrane seqs (def: #{DEFAULTS[:tmm][:min_num_tmm_seqs]})",
109
- "SOL = true|false, this is a soluble fraction( def: #{DEFAULTS[:tmm][:expect_soluble]})",
110
- "PEPS = Float | false, don't consider tm peps (>= fraction",
111
- " tm content) (false skips) (def: #{DEFAULTS[:tmm][:no_include_tm_peps]})",
112
- "BKG = Float , background contaminating insoluble (def: #{DEFAULTS[:tmm][:bkg]})"],
113
-
114
-
115
- # VALIDATION MODIFIERS
116
- :false_on_tie => ["--false_on_tie", "if peptide belongs to correct AND incorrect proteins",
117
- "it will be counted as correct"],
118
-
119
- }
120
-
121
- def self.boolean(arg, default)
122
- case arg
123
- when 'true' ; true
124
- when 'false' ; false
125
- else ; default
126
- end
127
- end
128
-
129
- PrepArgs = {
130
- :prob => lambda {|ar, opts|
131
- mthd =
132
- if ar
133
- if ar == 'nsp'
134
- :probability
135
- elsif ar == 'init'
136
- :initial_probability
137
- else
138
- raise ArgumentError, "--prob [arg], optional arg can only be 'nsp' or 'init'!"
139
- end
140
- else
141
- :probability
142
- end
143
- opts[:validators].push([:prob, mthd])
144
- },
145
- :perc_qval => lambda {|ar, opts| opts[:validators].push([:perc_qval]) },
146
- :to_qvalues => lambda {|ar, opts| opts[:validators].push([:to_qvalues]) },
147
- :decoy => lambda {|ar, opts|
148
- myargs = [:decoy]
149
- first_arg = ar[0]
150
- val_opts = {}
151
- val_opts[:constraint] =
152
- if first_arg[0,1] == '/' and first_arg[-1,1] == '/'
153
- # cast as a regular expression of has '/ /'
154
- Regexp.new(first_arg[1...-1])
155
- else
156
- # assume that it is a filename
157
- raise ArgumentError, "File does not exist: #{first_arg}\n(was this supposed to be a regular expression? if so, should be given: /#{first_arg}/)" unless File.exist?(first_arg)
158
- first_arg
159
- end
160
- val_opts[:frit] = (ar[1] || DEFAULTS[:decoy][:frit]).to_f
161
- val_opts[:decoy_on_match] = self.boolean(ar[2], DEFAULTS[:decoy][:decoy_on_match])
162
- myargs.push(val_opts)
163
- opts[:validators].push(myargs)
164
- },
165
- :fasta => lambda {|arg, opts|
166
- opts[:fasta] = Fasta.new(arg)
167
- },
168
- :digestion => lambda {|ar, opts|
169
- raise(ArgumentError, "need fasta and sequest params!") if ar.size != 2
170
- opts[:digestion] = ar.dup
171
- opts[:digestion_objects] = [Fasta.new(ar[0]), Sequest::Params.new(ar[1])]
172
- },
173
- :bias => lambda {|ar, opts|
174
- myargs = [:bias]
175
- myargs.push( Fasta.new(ar[0]) )
176
- val_opts = {}
177
- val_opts[:proteins_expected] = self.boolean(ar[1], DEFAULTS[:bias][:proteins_expected])
178
- val_opts[:background] =
179
- if ar[2]
180
- ar[2].to_f
181
- else
182
- DEFAULTS[:bias][:bkg]
183
- end
184
- if ar[3]
185
- val_opts[:false_to_total_ratio] = ar[3].to_f
186
- end
187
- myargs.push(val_opts)
188
- opts[:validators].push(myargs)
189
- },
190
- :bad_aa => lambda {|ar, opts|
191
- ## GET the FREQUENCY
192
- myargs = [:bad_aa]
193
- myargs.push( ar[0] )
194
- val_opts = {}
195
- val_opts[:background] =
196
- if ar[1]
197
- ar[1].to_f
198
- else
199
- DEFAULTS[:bad_aa][:bkg]
200
- end
201
- if ar[2]
202
- val_opts[:false_to_total_ratio] = ar[2].to_f
203
- end
204
- myargs.push(val_opts)
205
- opts[:validators].push(myargs)
206
- },
207
- :bad_aa_est => lambda {|ar, opts|
208
- ## GET the FREQUENCY
209
- myargs = [:bad_aa_est]
210
- myargs.push( ar[0] )
211
- val_opts = {}
212
- val_opts[:background] =
213
- if ar[1]
214
- ar[1].to_f
215
- else
216
- DEFAULTS[:bad_aa_est][:bkg]
217
- end
218
- if ar[2]
219
- val_opts[:frequency] = ar[2].to_f
220
- end
221
- myargs.push(val_opts)
222
- opts[:validators].push(myargs)
223
- },
224
-
225
- :tmm => lambda {|ar, opts|
226
- myargs = [:tmm]
227
- myargs.push( ar[0] )
228
- val_opts = {}
229
- val_opts[:min_num_tms] =
230
- if ar[1] ; ar[1].to_i
231
- else ; DEFAULTS[:tmm][:min_num_tmm_seqs]
232
- end
233
- val_opts[:soluble_fraction] = self.boolean(ar[2], DEFAULTS[:tmm][:expect_soluble])
234
- val_opts[:no_include_tm_peps] =
235
- if ar[3]
236
- case ar[3]
237
- when 'false' ; false
238
- else ; ar[3].to_f
239
- end
240
- else ; DEFAULTS[:tmm][:no_include_tm_peps]
241
- end
242
- val_opts[:background] =
243
- if ar[4] ; ar[4].to_f
244
- else ; DEFAULTS[:tmm][:bkg]
245
- end
246
- if ar[5]
247
- val_opts[:false_to_total_ratio] = ar[5].to_f
248
- end
249
- myargs.push(val_opts)
250
- opts[:validators].push( myargs )
251
- },
252
- :pephits => lambda {|v,opts| opts[:pephits] = SpecID.new(v) },
253
- :tps => lambda {|v,opts| opts[:validators].push([:tps, Fasta.new(v)]) },
254
- :false_on_tie => lambda {|v,opts| opts[:ties] = false },
255
- }
256
-
257
- def self.requires_pephits?(spec_id_obj)
258
- case spec_id_obj
259
- when Proph::ProtSummary : true
260
- # at least currently (subject to change)
261
- when Proph::PepSummary : true
262
- when SQTGroup
263
- if spec_id_obj.peps.first.respond_to?(:q_value)
264
- # its percolator output and we don't have other hits to use
265
- true
266
- else
267
- false
268
- end
269
- else ; false
270
- end
271
- end
272
-
273
- # remove the keys from opts involved in validators and return an array
274
- # of validators
275
- # postfilter is one of :top_per_scan, :top_per_aaseq,
276
- # :top_per_aaseq_charge (of which last two are subsets of scan)
277
- def self.prepare_validators(opts, false_on_tie, interactive, postfilter, spec_id)
278
-
279
- validator_args = opts[:validators]
280
- if validator_args.any? {|v| v.first == :to_qvalues }
281
- prob_val_args_ar = validator_args.select {|v| v.first == :prob }.first
282
- prob_method =
283
- if prob_val_args_ar && prob_val_args_ar[1]
284
- prob_val_args_ar[1]
285
- else
286
- :probability
287
- end
288
- validator_args.reject! {|v| v.first == :prob }
289
-
290
- require 'vec'
291
- require 'qvalue'
292
-
293
- # get a list of p-values
294
- pvals = spec_id.peps.map do |pep|
295
- val = 1.0 - pep.send(prob_method)
296
- val = 1e-9 if val == 0
297
- val
298
- end
299
- File.open("TMP_PVALUES.txt", 'w') {|v| v.puts pvals.sort.join(" ") }
300
- pvals = VecD.new(pvals)
301
- #qvals = pvals.qvalues(false, :lambda_vals => 0.30 )
302
- qvals = pvals.qvalues
303
- qvals.zip(spec_id.peps) do |qval,pep|
304
- pep.q_value = qval
305
- end
306
- end
307
-
308
- validator_args.map! do |v|
309
- if v.first == :to_qvalues || v.first == :perc_qval
310
- [:qval]
311
- else
312
- v
313
- end
314
- end
315
-
316
- correct_wins = !false_on_tie
317
- need_false_to_total_ratio = []
318
- need_frequency = []
319
- transmem_vals = []
320
- validators = validator_args.map do |args|
321
- tp = args.shift
322
- val_args = args.dup # protect the original keys
323
- val_args =
324
- case tp
325
- when :tmm
326
- val_args[1][:correct_wins] = correct_wins
327
- if opts.key?(:fasta)
328
- val_args[1][:fasta] = opts[:fasta]
329
- end
330
- val_args
331
- when :bias
332
- val_args[1][:correct_wins] = correct_wins
333
- val_args
334
- when :tps
335
- val_args = [val_args[0], correct_wins]
336
- val_args
337
- when :decoy
338
- val_args[0][:correct_wins] = correct_wins
339
- # don't delete the key here since we need the decoy = regexp key
340
- val_args
341
- else ## bad_aa, prob, and qval are represented here:
342
- val_args
343
- end
344
- val = Validator_symbols_to_classes[tp].new( *val_args )
345
- # make some lists of validators based on pre-processing needs:
346
- if tp == :tmm
347
- transmem_vals << val
348
- end
349
- potential_digestion_classes = /Transmem|AA|AAEst|Bias/
350
- if val.class.to_s =~ potential_digestion_classes
351
- if val.class.to_s == 'Validator::AAEst'
352
- need_frequency.push(val) if val.frequency.nil?
353
- elsif !(val.false_to_total_ratio.nil?)
354
- $stderr.puts "using false_to_total_ratio: #{val.false_to_total_ratio}"
355
- else
356
- need_false_to_total_ratio << val
357
- end
358
- end
359
- val
360
- end
361
-
362
- if ((need_false_to_total_ratio.size > 0) or (need_frequency.size > 0))
363
- if opts.key?(:digestion_objects)
364
- #raise ArgumentError, "requires --digestion fasta,params argument!" if !opts.key?(:digestion_objects)
365
- peps = Digestor.digest( *(opts[:digestion_objects]) )
366
- need_false_to_total_ratio.each do |val|
367
- val.set_false_to_total_ratio( peps )
368
- end
369
- if need_frequency.size > 0
370
- need_frequency.each do |val|
371
- val.set_frequency( opts[:digestion_objects][0] )
372
- end
373
- end
374
- opts.delete(:digestion_objects)
375
- else ## do the new and improved selection of non-top hits to get false_to_total_ratios and freqs
376
- $stderr.puts "...using pephits to calculate background ratios"
377
- # first_index, last_index
378
- pephits =
379
- if opts[:pephits] ## protein prophet (since it needs to get ratios somewhere
380
- $stderr.puts "using --pephits"
381
- opts[:pephits].peps
382
- elsif requires_pephits?(spec_id)
383
- raise ArgumentError, "with objects of class '#{spec_id.class}', one of your validators requires --pephits or --digestion"
384
- else
385
- $stderr.puts "using given spec_id.peps"
386
- spec_id.peps
387
- end
388
-
389
- not_first_or_second_peps = Sequest.other_hits_sorted_by_xcorr(pephits, 2, 9, [:base_name, :first_scan, :charge])
390
- pephits =
391
- case postfilter
392
- when :top_per_scan
393
- $stderr.puts "using top_per_scan" ; not_first_or_second_peps
394
- when :top_per_aaseq
395
- # it doesn't matter which one is given since validators are
396
- # based on amino acid sequence
397
- $stderr.puts 'using top_per_aaseq'
398
- not_first_or_second_peps.hash_by(:aaseq).values.map {|pep| pep.first }
399
- when :top_per_aaseq_charge
400
- $stderr.puts 'using top_per_aaseq_charge'
401
- not_first_or_second_peps.hash_by(:aaseq, :charge).values.map {|pep| pep.first }
402
- else
403
- raise ArgumentError, "must have a valid postfilter method, yours: '#{postfilter}'"
404
- end
405
-
406
- need_false_to_total_ratio.each do |val|
407
- val.set_false_to_total_ratio( pephits )
408
- $stderr.puts "false_to_total_ratio for #{val.class.to_s}: #{val.false_to_total_ratio}"
409
- end
410
- if need_frequency.size > 0
411
- need_frequency.each do |val|
412
- $stderr.puts "Setting frequency!"
413
- val.set_frequency( pephits )
414
- end
415
- end
416
- end
417
- end
418
-
419
- if (transmem_vals.size > 0) # and interactive ## we'd like to just run this for interactive
420
- # This is overkill if we are doing a single filtering job, but it
421
- # ensures that it works in all the ways I'm doing it. Should
422
- # refactor eventually !!
423
- transmem_vals.each do |val| ## but, prob uses it too!
424
- val.transmem_status_hash = val.create_transmem_status_hash(spec_id.peps)
425
- end
426
- end
427
- validators
428
-
429
- end
430
-
431
- end
@@ -1,107 +0,0 @@
1
- require 'validator'
2
-
3
- class Validator::Decoy < Validator
4
- include Precision::Calculator::Decoy
5
-
6
- # a Regexp (if concatenated) or a String (the filename of separate run)
7
- attr_accessor :constraint
8
-
9
- attr_accessor :decoy_on_match
10
- attr_accessor :correct_wins
11
- # This is the the number of incorrect target hits over the total decoy hits
12
- # The percent incorrect targets (PIT) expressed as a fraction (== 1 - PI_0).
13
- # The rough, conservative ballpark estimate is the ratio of target hits to
14
- # decoy hits. This can be refined by removing the number of true target
15
- # hits from the targets used to calculate it.
16
- attr_accessor :frit
17
-
18
- attr_accessor :last_pep_was_decoy
19
-
20
- attr_accessor :increment_normal
21
- attr_accessor :increment_decoy
22
- attr_accessor :increment_total_submitted
23
-
24
- attr_reader :normal_peps_just_submitted
25
-
26
- DEFAULTS = {
27
- :decoy_on_match => true,
28
- :correct_wins => true,
29
- :frit => 1.0,
30
- }
31
-
32
- def initialize(opts={})
33
- merged = DEFAULTS.merge(opts)
34
- @constraint, @decoy_on_match, @correct_wins, @frit = merged.values_at(:constraint, :decoy_on_match, :correct_wins, :frit)
35
- end
36
-
37
- # returns [normal, decoy] (?? I think ??)
38
- # reads the full protein reference
39
- def partition(peps)
40
- if @decoy_on_match
41
- if @correct_wins
42
- peps.partition do |pep|
43
- !(pep.prots.all? {|prot| prot.reference.match(@constraint) })
44
- end
45
- else # fp wins
46
- peps.partition do |pep|
47
- !(pep.prots.any? {|prot| prot.reference.match(@constraint) })
48
- end
49
- end
50
- else
51
- if @correct_wins
52
- peps.partition do |pep|
53
- pep.prots.any? {|prot| prot.reference.match(@constraint) }
54
- end
55
- else
56
- peps.partition do |pep|
57
- pep.prots.all? {|prot| prot.reference.match(@constraint) }
58
- end
59
- end
60
- end
61
- end
62
-
63
- def initialize_increment
64
- @increment_normal = 0
65
- @increment_decoy = 0
66
- @increment_total_submitted = 0
67
- @increment_initialized = true
68
- end
69
-
70
-
71
- # does not deal in separate_peps right now!!
72
- # will take an array or single peptide
73
- def increment_pephits_precision(peps)
74
- tmp = $VERBOSE; $VERBOSE = nil
75
- initialize_increment unless @increment_initialized
76
- $VERBOSE = tmp
77
-
78
- to_submit =
79
- if peps.is_a? SpecID::Pep
80
- [peps]
81
- else
82
- peps
83
- end
84
-
85
- @increment_total_submitted += to_submit.size
86
- (normal, decoy) = partition(to_submit)
87
- @normal_peps_just_submitted = normal
88
- @increment_normal += normal.size
89
- @increment_decoy += decoy.size
90
- calc_precision(@increment_normal, @increment_decoy, @frit)
91
- end
92
-
93
- def pephit_precision(peps, separate_peps=nil)
94
- if separate_peps
95
- calc_precision(peps.size, separate_peps.size, @frit)
96
- else
97
- (norm, decoy) = partition(peps)
98
- calc_precision(norm.size, decoy.size, @frit)
99
- end
100
- end
101
-
102
- def to_param_string
103
- "decoy="+ ["{constraint=#{(constraint ? constraint.inspect : '')}", "decoy_on_match=#{@decoy_on_match}", "correct_wins=#{@correct_wins}}"].join(", ")
104
- end
105
- end
106
-
107
-
@@ -1,70 +0,0 @@
1
- require 'validator'
2
- require 'fasta'
3
- require 'spec_id/sequest/params'
4
-
5
- # objects of this class can calculate pephit_precision given an array of
6
- # SpecID::Pep objects using the pephit_precision method.
7
- class Validator::DigestionBased < Validator
8
- DEFAULTS = {
9
- #:false_to_total_ratio => 1.0, # disable because this needs to be set
10
- # explicitly
11
- :background => 0.0,
12
- }
13
-
14
- # the number of tps
15
- attr_accessor :increment_tps
16
- # the number of fps
17
- attr_accessor :increment_fps
18
-
19
- # the total peptides submitted to the validator (regardless of tp, fp, or
20
- # nil)
21
- attr_accessor :increment_total_submitted
22
-
23
- # the ratio of false hits to total peptides in the fasta file
24
- attr_accessor :false_to_total_ratio
25
-
26
- # the false_to_total_ratio calculated (but not applied)
27
- attr_reader :calculated_background
28
-
29
- # For a sample with no false hits in it, (under defaults) this is the
30
- # fraction of peptides with the constraint over the total number of peptides
31
- # from which these hits are derived.
32
- attr_accessor :background
33
-
34
-
35
- # expects that classes define a partition method, and a @background
36
- def pephit_precision(peps)
37
- ## this gives us the fraction that are transmembrane (under defaults):
38
- (tps, fps) = partition(peps)
39
- (num_tps, num_fps) = calc_precision_prep(tps.size, fps.size)
40
- calc_precision(num_tps, num_fps)
41
- end
42
-
43
- # returns [num_tps, num_fps]
44
- def calc_precision_prep(num_tps, num_fps)
45
- total_peps_passing_partition = num_tps + num_fps
46
- num_fps = adjust_fps_for_background(num_tps, num_fps, background)
47
- ## we must use the false_to_total_ratio to estimate how many are really
48
- ## incorrect!
49
- # FALSE/TOTAL = FALSE(found)/TOTAL(found)
50
- # TOTAL(found) = FALSE(found) * TOTAL/FALSE
51
- # = FALSE(found) / (FALSE/TOTAL)
52
- total_false = num_fps / false_to_total_ratio
53
- # NOTE: the partition algorithm drops peptides that are transmembrane
54
- # under certain options. Thus, the total false estimate must be tempered
55
- # by this lower number of total peptides.
56
- adjusted_tps = total_peps_passing_partition.to_f - total_false
57
- [adjusted_tps, total_false]
58
- end
59
-
60
- # returns self
61
- # assumes partition returns (tps, fps)
62
- def set_false_to_total_ratio(peps)
63
- (tps, fps) = partition(peps)
64
- self.false_to_total_ratio = fps.size.to_f / (tps.size + fps.size)
65
- self
66
- end
67
-
68
- end
69
-
70
-
@@ -1,51 +0,0 @@
1
-
2
- # calculates precision based on the Benjamini-Hochberg FDR method.
3
- # @TODO: class should probably be renamed to reflect method used!
4
- # or options given to specify different methods (i.e., q-value)??
5
- class Validator::Probability
6
-
7
- attr_accessor :prob_method
8
-
9
- def initialize(prob_method=:probability)
10
- @prob_method = prob_method
11
- end
12
-
13
- # objs should respond_to probability
14
- def precision(objs)
15
- return 1.0 if objs.size == 0
16
-
17
- current_sum_one_minus_prob = 0.0
18
-
19
- # this should work!
20
- #objs.inject(0.0) {|sum,obj| sum + (1.0 - obj.probability) }
21
-
22
- objs.each do |obj|
23
- # SUM(1-probX)/#objs
24
- current_sum_one_minus_prob += 1.0 - obj.send(@prob_method)
25
- end
26
- prec = 1.0 - (current_sum_one_minus_prob / objs.size)
27
- end
28
-
29
-
30
- # objs should respond_to probability
31
- # These should be added from high probability(1.0) to low (0.0)
32
- def increment_precision(objs)
33
- if objs.is_a?(SpecID::Pep) or objs.is_a?(SpecID::Prot)
34
- objs = [objs]
35
- end
36
-
37
- @total_objs ||= 0
38
- @current_sum_one_minus_prob ||= 0.0
39
-
40
- @total_objs += objs.size
41
- objs.each do |obj|
42
- @current_sum_one_minus_prob += 1.0 - obj.send(@prob_method)
43
- end
44
- prec = 1.0 - (@current_sum_one_minus_prob / @total_objs)
45
- end
46
-
47
-
48
- alias_method :pephit_precision, :precision
49
- alias_method :prothit_precision, :precision
50
- alias_method :increment_pephits_precision, :increment_precision
51
- end