mspire 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
@@ -0,0 +1,209 @@
1
+ require 'validator/cmdline'
2
+ require 'spec_id'
3
+
4
+
5
+ module SpecID
6
+ module Precision
7
+ class Filter
8
+ class CmdlineParser
9
+
10
+ DEFAULTS = SpecID::Precision::Filter::FV_DEFAULTS.merge( { :output => [[:text_table,nil]], } )
11
+
12
+
13
+ COMMAND_LINE = {
14
+ # SEQUEST
15
+ :xcorr1 => ["-1", "--xcorr1 N", Float, "xcorr at +1 charge default: #{DEFAULTS[:sequest][:xcorr1]}"],
16
+ :xcorr2 => ["-2", "--xcorr2 N", Float, "xcorr at +2 charge default: #{DEFAULTS[:sequest][:xcorr2]}"],
17
+ :xcorr3 => ["-3", "--xcorr3 N", Float, "xcorr at +3 charge default: #{DEFAULTS[:sequest][:xcorr3]}"],
18
+
19
+ :deltacn => ["-d", "--deltacn N", Float, ">= deltacn default: #{DEFAULTS[:sequest][:deltacn]}"],
20
+ :ppm => ["-p", "--ppm N", Float, "<= ppm default: #{DEFAULTS[:sequest][:ppm]}",
21
+ "if bioworks.xml, then ppm = deltamass*10^6/mass"],
22
+ :no_deltacnstar => ["--no_deltacnstar", "Do not pass deltacn of top hit with no 2nd hit",
23
+ "(these are set at 1.1 by bioworks)"],
24
+
25
+ # OUTPUT
26
+ :proteins => ["--proteins", "includes proteins (and validation)"],
27
+ :output => ["-o", "--output format[:FILENAME]", "format to output filtering results.",
28
+ "can be used multiple times",
29
+ ":FILENAME is the filename to use (defaults to STDOUT)",
30
+ "valid formats are:",
31
+ " text_table (default)",
32
+ " yaml",
33
+ #" protein_summary (need to implement)",
34
+ #" html_table (need to implement)"
35
+ ],
36
+
37
+ # VALIDATION MODIFIERS:
38
+ :hits_separate => ["--hits_separate", "target/decoy hits are normally together when choosing",
39
+ "the top hit per peptide (in prefilter and postfilter)",
40
+ "in BOTH catenated and separate searches. This flag",
41
+ "separates them when finding the top hit per scan.",
42
+ "[This option modifies behavior of --decoy options]"],
43
+
44
+ # OTHER:
45
+ :prefilter => ["--prefilter", "finds the top hit per file+scan+charge and removes",
46
+ "others. Speeds up filtering with '--interactive'."],
47
+ :postfilter => ["--postfilter ARG", "ARG = top_per_scan | top_per_aaseq[_charge]",
48
+ "'top_per_scan' hashes on filename+scan.",
49
+ "'top_per_aaseq' hashes only on aaseq",
50
+ "'top_per_aaseq_charge' hashes on aaseq+charge."],
51
+ :top_hit_by => ["--top_hit_by ARG", "ARG = xcorr | probability (xcorr default)"],
52
+
53
+
54
+
55
+ :interactive => ["-i", "--interactive [FILENAME]", "interactive filtering",
56
+ "FILENAME given, then the interactive commands are",
57
+ "read out of that file. NOTE: The flag without the",
58
+ "filename must not be placed in front of an input",
59
+ "file argument! e.g., -i bioworks.xml # -> bad!",
60
+ "e.g., bioworks.xml -i # -> ok!"],
61
+
62
+ :interactive_verbose => ["--interactive_verbose", "give help and hints in interactive mode"],
63
+
64
+ }.merge( Validator::Cmdline::COMMAND_LINE )
65
+
66
+
67
+ # returns (spec_id_obj, options, option_parser_obj)
68
+ def parse(args)
69
+ opts = {}
70
+ opts[:output] = []
71
+ @out_used = false
72
+ opts[:sequest] = {}
73
+ opts[:validators] = []
74
+ # defaults
75
+
76
+ option_parser = OptionParser.new do |op|
77
+ def op.opt(arg, &block)
78
+ on(*COMMAND_LINE[arg], &block)
79
+ end
80
+
81
+ def op.val_opt(arg, opts)
82
+ on(*COMMAND_LINE[arg]) {|ar| Validator::Cmdline::PrepArgs[arg].call(ar, opts) }
83
+ end
84
+
85
+ def op.exact_opt(opts, arg)
86
+ on(*COMMAND_LINE[arg]) {|v| opts[arg] = v}
87
+ end
88
+
89
+ op.banner = "USAGE: #{File.basename($0)} [OPTS] <bioworks.xml | bioworks.srg | .srf ....srf>"
90
+ op.separator ""
91
+ op.separator " EXPECTS: the multiconsensus XML export of Bioworks 3.X (bioworks.xml) -or- *.srf files"
92
+ op.separator " grouped together (bioworks.srg) [type 'srf_group.rb' at the cmd line]"
93
+
94
+ op.separator " multiple .srf files may also be entered."
95
+ op.separator " RETURNS: the number of peptides/proteins ID'd at given thresholds with"
96
+ op.separator " (optional) validation of the results."
97
+ op.separator ""
98
+
99
+ #op.separator("** 'dcn*' is the number of peptides with deltacn == 1.1")
100
+ #op.separator(" (these are peptides who are the only hit with xcorr > 0)")
101
+ op.separator "SEQUEST OPTIONS: "
102
+ op.exact_opt(opts[:sequest], :xcorr1)
103
+ op.exact_opt(opts[:sequest], :xcorr2)
104
+ op.exact_opt(opts[:sequest], :xcorr3)
105
+ op.exact_opt(opts[:sequest], :deltacn)
106
+ op.exact_opt(opts[:sequest], :ppm)
107
+ op.opt(:no_deltacnstar) {|v| opts[:sequest][:include_deltacnstar] = false}
108
+ op.separator ""
109
+ op.separator "OUTPUT OPTIONS: "
110
+ op.opt(:proteins) {|v| opts[:proteins] = true }
111
+ op.opt(:output) do |output|
112
+ # copied from rspec:
113
+ # This funky regexp checks whether we have a FILE_NAME or not
114
+ where = nil
115
+ if (output =~ /([a-zA-Z_]+(?:::[a-zA-Z_]+)*):?(.*)/) && ($2 != '')
116
+ output = $1
117
+ where = $2
118
+ else
119
+ raise "When using several --output options only one of them can be without a file" if @out_used
120
+ @out_used = true
121
+ end
122
+ opts[:output] << [output, where]
123
+ end
124
+
125
+ op.separator ""
126
+ op.separator "VALIDATION OPTIONS: "
127
+ op.separator " each option will calculate the precision"
128
+ op.separator ""
129
+
130
+ op.val_opt(:decoy, opts)
131
+ op.val_opt(:digestion, opts)
132
+ op.val_opt(:bias, opts)
133
+ op.val_opt(:bad_aa, opts)
134
+
135
+ op.val_opt(:tmm, opts)
136
+ op.val_opt(:tps, opts)
137
+
138
+ op.separator ""
139
+ op.separator "VALIDATION MODIFIERS: "
140
+ op.val_opt(:false_on_tie, opts) # sets opts[:ties] = false
141
+
142
+ op.opt(:hits_separate) { opts[:hits_together] = false } # :top_hits_together
143
+
144
+ op.separator ""
145
+ op.separator "OTHER OPTIONS: "
146
+ op.opt(:interactive) {|v| opts[:interactive] = v }
147
+ op.opt(:interactive_verbose) {|v| opts[:interactive_verbose] = v }
148
+
149
+ op.opt(:top_hit_by) {|v| opts[:top_hit_by] = v.to_sym}
150
+ op.opt(:postfilter) {|v| opts[:postfilter] = v.to_sym}
151
+ op.opt(:prefilter) {|v| opts[:prefilter] = true }
152
+
153
+
154
+ #op.on("--yaml", "spits out yaml-ized data") {|v| opts[:tabulate = v }
155
+ #op.on("--combined_score", "shows the combined score") {|v| opts[:combined_score = v }
156
+ #op.on("--marshal", "will write marshaled data or read existing") {|v| opts[:marshal = v }
157
+ #op.on("--log <file>", "also writes all output to file") {|v| opts[:log = v }
158
+ ### NEED TO IMPLEMENT THIS:
159
+ ##op.on("--protein_summary", "writes passing proteins to .summary.html files") {|v| opts[:protein_summary = v }
160
+ #op.on("-z", "--occams_razor", "will show minimal set of proteins") {|v| opts[:occams_razor = v }
161
+
162
+ end
163
+ option_parser.parse!(args)
164
+
165
+ # prepare interactive object if necessary:
166
+ if v = opts[:interactive]
167
+ klass = SpecID::Precision::Filter::Interactive
168
+ if v.is_a? String
169
+ opts[:interactive] = klass.new(v, opts[:interactive_verbose])
170
+ else
171
+ opts[:interactive] = klass.new(nil, opts[:interactive_verbose])
172
+ end
173
+ end
174
+
175
+
176
+ opts[:sequest] = DEFAULTS[:sequest].merge(opts[:sequest])
177
+
178
+ # prepare validators
179
+
180
+ if args.size > 0
181
+ spec_id_obj =
182
+ if args[0] =~ /\.srf$/i
183
+ ::SpecID.new(args)
184
+ else
185
+ ::SpecID.new(args[0])
186
+ end
187
+ if opts[:ties] == nil # will be nil or false
188
+ opts[:ties] = Validator::Cmdline::DEFAULTS[:ties]
189
+ end
190
+ opts[:validators] = Validator::Cmdline.prepare_validators(opts, !opts[:ties], opts[:interactive], spec_id_obj)
191
+
192
+ if opts[:output].size == 0
193
+ opts[:output] = DEFAULTS[:output]
194
+ end
195
+ else
196
+ spec_id_obj = nil
197
+ end
198
+
199
+ [spec_id_obj, opts, option_parser]
200
+ end # parse
201
+ end # CmdlineParser
202
+ end # Filter
203
+ end # Precision
204
+ end # SpecID
205
+
206
+
207
+
208
+
209
+
@@ -0,0 +1,134 @@
1
+
2
+ module SpecID ; end
3
+ module SpecID::Precision ; end
4
+
5
+ class SpecID::Precision::Filter
6
+ class Interactive
7
+ attr_accessor :file
8
+ attr_accessor :verbose
9
+
10
+ # the file contains the interactive commands
11
+ def initialize(file=nil, verbose=false)
12
+ @verbose = verbose
13
+ if file
14
+ @file = file
15
+ @lines = IO.readlines(file)
16
+ else
17
+ @lines = nil
18
+ end
19
+ end
20
+
21
+ def passing(opts, answer)
22
+ puts "****************************************************" if @verbose
23
+ end
24
+ # takes opts and modifies the keys in question, or returns nil
25
+ # shortcut map takes each proper key and designates a shortcut (if any)
26
+
27
+ def filter_args(opts_to_change, changing_keys, shortcut_map, casting_map)
28
+ shortcut_order = changing_keys.map {|k| shortcut_map[k] }
29
+ casting_array = changing_keys.map {|k| casting_map[k] }
30
+ return_val = true
31
+ reply = nil
32
+ base_args = opts_to_change.values_at( *changing_keys )
33
+ #b = base_args
34
+ current_values = changing_keys.map {|v| "#{shortcut_map[v]}:#{opts_to_change[v]}" }
35
+ out(current_values.join(" ")) if @verbose
36
+ #out "#{b[0]} #{b[1]} #{b[2]} dcn:#{b[3]} ppm:#{b[4]}"
37
+ loop do
38
+ reply =
39
+ if @lines
40
+ if @lines.size > 0
41
+ @lines.shift.chomp
42
+ else
43
+ 'q'
44
+ end
45
+ else
46
+ gets.chomp
47
+ end
48
+ answer = prep_reply(reply, base_args, shortcut_order, casting_array)
49
+ if answer == false
50
+ out(interactive_help(changing_keys, shortcut_map)) if @verbose
51
+ elsif answer == nil
52
+ return nil
53
+ else
54
+ answer.zip(changing_keys) do |newval,changing_key|
55
+ opts_to_change[changing_key] = newval
56
+ end
57
+ return_val = true
58
+ break
59
+ end
60
+ end
61
+ return_val
62
+ end
63
+
64
+ def out(string)
65
+ puts string
66
+ end
67
+
68
+ def interactive_help(changing_keys, shortcut_map)
69
+ shortcuts = changing_keys.map {|v| shortcut_map[v] }
70
+ as_array = shortcuts.map {|v| "<#{v}>" }
71
+ as_hash = shortcuts.map {|v| "#{v}:<#{v}>" }
72
+ string = []
73
+ string << "******************************************************************************"
74
+ string << "INTERACTIVE FILTERING HELP:"
75
+ string << "enter as an array of values : #{as_array.join(' ')}"
76
+ string << "or as keys and values : #{as_hash.join(' ')}"
77
+ string << "or some of the keys and values : #{as_hash.last}"
78
+ if changing_keys.size >= 3
79
+ string << "or mix array and keys/values : #{as_array[0]} #{as_array[1]} #{as_hash.last}"
80
+ end
81
+ string << "etc..."
82
+ string << "<enter> to (re)run current values"
83
+ string << "'q' to quit"
84
+ string << "******************************************************************************"
85
+ string.join("\n")
86
+ end
87
+
88
+ # assumes its already chomped
89
+ # updates the 5 globals
90
+ # returns nil if 'q'
91
+ def prep_reply(reply, base, shortcut_order, casting_array)
92
+ if reply == 'q'
93
+ return nil
94
+ end
95
+ if reply =~ /^\s*$/
96
+ base
97
+ elsif reply
98
+ arr = reply.split(/\s+/)
99
+ to_change_ar = []
100
+ to_change_hash = {}
101
+ arr.each do |it|
102
+ if it.include? ':'
103
+ (k,v) = it.split(':')
104
+ to_change_hash[k] = v
105
+ else
106
+ to_change_ar << it
107
+ end
108
+ end
109
+ to_change_ar.each_with_index do |tc,i|
110
+ base[i] = tc
111
+ end
112
+ to_change_hash.each do |k,v|
113
+ index = shortcut_order.index(k)
114
+ if index.nil?
115
+ out("BAD ARG: #{k}:#{v}") if @verbose
116
+ end
117
+ base[index] = v
118
+ end
119
+ base.zip(casting_array).map do |v,cast_proc|
120
+ begin
121
+ cast_proc.call(v)
122
+ rescue NoMethodError
123
+ out "BAD ARG: #{tc}" if @verbose
124
+ return false
125
+ end
126
+ end
127
+ else
128
+ false
129
+ end
130
+ end
131
+
132
+ end
133
+ end
134
+
@@ -0,0 +1,147 @@
1
+ require 'table'
2
+ require 'spec_id/precision/output'
3
+
4
+ module SpecID ; end
5
+ module SpecID::Precision ; end
6
+ class SpecID::Precision::Filter ; end
7
+
8
+ class SpecID::Precision::Filter::Output
9
+ include SpecID::Precision::Output
10
+
11
+ ProtPrecAbbr = {
12
+ :normal => 'nrm',
13
+ :normal_stdev => 'nrm_std',
14
+ :worst => 'worst',
15
+ }
16
+
17
+ GTE = '>='
18
+ LTE = '<='
19
+ MSial_operator = {
20
+ 'xcorr1' => GTE,
21
+ 'xcorr2' => GTE,
22
+ 'xcorr3' => GTE,
23
+ 'deltacn' => GTE,
24
+ 'ppm' => LTE,
25
+ }
26
+
27
+ # takes a hash {:normal => x, :normal_stdev => y :worst => z }
28
+ # and returns a string
29
+ def protein_precision_to_s(hash)
30
+ "#{hash[:worst]}--#{hash[:normal]}+/-#{hash[:normal_stdev]}"
31
+ end
32
+
33
+ # num tps tmm badAA decoy
34
+ # pep
35
+ # prot
36
+ #
37
+ def params_as_string(params_hash)
38
+ hash = SpecID::Precision::Output.symbol_keys_to_string(params_hash)
39
+ cleanup_params_hash(hash)
40
+ hash_as_string(hash)
41
+ end
42
+
43
+ def text_table(fh, answer)
44
+ col_headings = ['num']
45
+ if answer[:params][:validators]
46
+ val_strings = answer[:params][:validators].map do |val|
47
+ Validator::Validator_to_string[val.class.to_s]
48
+ end
49
+ col_headings.push( *val_strings )
50
+ end
51
+
52
+ data_rows = []
53
+ # push on the peptide row
54
+ row_headings = ['peps']
55
+ pep_row = []
56
+ pep_row << answer[:pephits].size
57
+ if answer[:params][:validators]
58
+ answer[:params][:validators].zip( answer[:pephits_precision] ) do |val, precision|
59
+ pep_row << precision
60
+ end
61
+ end
62
+ data_rows << pep_row
63
+
64
+ # push on the protein row
65
+ if answer[:prothits]
66
+ [:worst, :normal, :normal_stdev].each do |guy|
67
+ prot_row = []
68
+ row_headings << "prots(#{ProtPrecAbbr[guy]})"
69
+ if guy == :worst
70
+ prot_row << answer[:prothits].size
71
+ else
72
+ prot_row << '"'
73
+ end
74
+ answer[:prothits_precision].each do |precision|
75
+ prot_row.push(precision[guy])
76
+ end
77
+ data_rows << prot_row
78
+ end
79
+ end
80
+ params_string = params_as_string(answer[:params])
81
+ table = Table.new( data_rows, row_headings, col_headings )
82
+ fh.puts params_string
83
+ fh.puts ""
84
+ fh.puts( table.to_formatted_string )
85
+ fh.puts ""
86
+ end
87
+
88
+ def yaml(fh, answer)
89
+ final_output = { :params => answer[:params].dup }
90
+ #"PEPHITS"
91
+ #answer[:pephits]
92
+ final_output[:pephits] = answer[:pephits].size
93
+ if answer[:prothits]
94
+ final_output[:prothits_precision] = answer[:params][:validators].zip( answer[:prothits_precision] ).map do |val, precision|
95
+ {'validator' => Validator::Validator_to_string[val.class.to_s], 'values' => precision }
96
+ end
97
+ final_output[:prothits] = answer[:prothits].size
98
+
99
+ #final_output[:prothits_precision] = {} if answer[:prothits_precision]
100
+ #final_output[:prothits] = answer[:prothits].size
101
+ #answer[:params][:validators].zip( answer[:prothits_precision] ) do |val, precision|
102
+ # final_output[:prothits_precision][Validator::Validator_to_string[val.class.to_s]] = precision
103
+ #end
104
+ end
105
+ final_output[:pephits_precision] = answer[:params][:validators].zip( answer[:pephits_precision] ).map do |val, precision|
106
+ { 'validator' => Validator::Validator_to_string[val.class.to_s], 'value' => precision }
107
+ end
108
+ final_output[:pephits] = answer[:pephits].size
109
+ final_output_as_strings = SpecID::Precision::Output.symbol_keys_to_string(final_output)
110
+ cleanup_params_hash(final_output_as_strings['params'])
111
+ fh.print(final_output_as_strings.to_yaml )
112
+ end
113
+
114
+
115
+ # returns nil
116
+ def cleanup_params_hash(hash)
117
+ ################################
118
+ # OUTPUT
119
+ ################################
120
+ hash['output'] = hash['output'].map do |output|
121
+ if output[1] == nil
122
+ output[1] = 'STDOUT'
123
+ end
124
+ output.join(" => ")
125
+ end
126
+ %w(postfilter top_hit_by).each do |st|
127
+ hash[st] = hash[st].to_s
128
+ end
129
+ if hash['interactive']
130
+ if file = hash['interactive'].file
131
+ hash['interactive'] = file
132
+ else
133
+ hash['interactive'] = true
134
+ end
135
+ end
136
+ if hash['decoy']
137
+ if hash['decoy']['regexp']
138
+ hash['decoy']['regexp'] = hash['decoy']['regexp'].inspect
139
+ end
140
+ end
141
+ if x = hash['validators']
142
+ hash['validators'] = Validator.sensible_validator_hashes(x)
143
+ end
144
+ nil
145
+ end
146
+
147
+ end