mspire 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
@@ -0,0 +1,209 @@
1
+ require 'validator/cmdline'
2
+ require 'spec_id'
3
+
4
+
5
+ module SpecID
6
+ module Precision
7
+ class Filter
8
+ class CmdlineParser
9
+
10
+ DEFAULTS = SpecID::Precision::Filter::FV_DEFAULTS.merge( { :output => [[:text_table,nil]], } )
11
+
12
+
13
+ COMMAND_LINE = {
14
+ # SEQUEST
15
+ :xcorr1 => ["-1", "--xcorr1 N", Float, "xcorr at +1 charge default: #{DEFAULTS[:sequest][:xcorr1]}"],
16
+ :xcorr2 => ["-2", "--xcorr2 N", Float, "xcorr at +2 charge default: #{DEFAULTS[:sequest][:xcorr2]}"],
17
+ :xcorr3 => ["-3", "--xcorr3 N", Float, "xcorr at +3 charge default: #{DEFAULTS[:sequest][:xcorr3]}"],
18
+
19
+ :deltacn => ["-d", "--deltacn N", Float, ">= deltacn default: #{DEFAULTS[:sequest][:deltacn]}"],
20
+ :ppm => ["-p", "--ppm N", Float, "<= ppm default: #{DEFAULTS[:sequest][:ppm]}",
21
+ "if bioworks.xml, then ppm = deltamass*10^6/mass"],
22
+ :no_deltacnstar => ["--no_deltacnstar", "Do not pass deltacn of top hit with no 2nd hit",
23
+ "(these are set at 1.1 by bioworks)"],
24
+
25
+ # OUTPUT
26
+ :proteins => ["--proteins", "includes proteins (and validation)"],
27
+ :output => ["-o", "--output format[:FILENAME]", "format to output filtering results.",
28
+ "can be used multiple times",
29
+ ":FILENAME is the filename to use (defaults to STDOUT)",
30
+ "valid formats are:",
31
+ " text_table (default)",
32
+ " yaml",
33
+ #" protein_summary (need to implement)",
34
+ #" html_table (need to implement)"
35
+ ],
36
+
37
+ # VALIDATION MODIFIERS:
38
+ :hits_separate => ["--hits_separate", "target/decoy hits are normally together when choosing",
39
+ "the top hit per peptide (in prefilter and postfilter)",
40
+ "in BOTH catenated and separate searches. This flag",
41
+ "separates them when finding the top hit per scan.",
42
+ "[This option modifies behavior of --decoy options]"],
43
+
44
+ # OTHER:
45
+ :prefilter => ["--prefilter", "finds the top hit per file+scan+charge and removes",
46
+ "others. Speeds up filtering with '--interactive'."],
47
+ :postfilter => ["--postfilter ARG", "ARG = top_per_scan | top_per_aaseq[_charge]",
48
+ "'top_per_scan' hashes on filename+scan.",
49
+ "'top_per_aaseq' hashes only on aaseq",
50
+ "'top_per_aaseq_charge' hashes on aaseq+charge."],
51
+ :top_hit_by => ["--top_hit_by ARG", "ARG = xcorr | probability (xcorr default)"],
52
+
53
+
54
+
55
+ :interactive => ["-i", "--interactive [FILENAME]", "interactive filtering",
56
+ "FILENAME given, then the interactive commands are",
57
+ "read out of that file. NOTE: The flag without the",
58
+ "filename must not be placed in front of an input",
59
+ "file argument! e.g., -i bioworks.xml # -> bad!",
60
+ "e.g., bioworks.xml -i # -> ok!"],
61
+
62
+ :interactive_verbose => ["--interactive_verbose", "give help and hints in interactive mode"],
63
+
64
+ }.merge( Validator::Cmdline::COMMAND_LINE )
65
+
66
+
67
+ # returns (spec_id_obj, options, option_parser_obj)
68
+ def parse(args)
69
+ opts = {}
70
+ opts[:output] = []
71
+ @out_used = false
72
+ opts[:sequest] = {}
73
+ opts[:validators] = []
74
+ # defaults
75
+
76
+ option_parser = OptionParser.new do |op|
77
+ def op.opt(arg, &block)
78
+ on(*COMMAND_LINE[arg], &block)
79
+ end
80
+
81
+ def op.val_opt(arg, opts)
82
+ on(*COMMAND_LINE[arg]) {|ar| Validator::Cmdline::PrepArgs[arg].call(ar, opts) }
83
+ end
84
+
85
+ def op.exact_opt(opts, arg)
86
+ on(*COMMAND_LINE[arg]) {|v| opts[arg] = v}
87
+ end
88
+
89
+ op.banner = "USAGE: #{File.basename($0)} [OPTS] <bioworks.xml | bioworks.srg | .srf ....srf>"
90
+ op.separator ""
91
+ op.separator " EXPECTS: the multiconsensus XML export of Bioworks 3.X (bioworks.xml) -or- *.srf files"
92
+ op.separator " grouped together (bioworks.srg) [type 'srf_group.rb' at the cmd line]"
93
+
94
+ op.separator " multiple .srf files may also be entered."
95
+ op.separator " RETURNS: the number of peptides/proteins ID'd at given thresholds with"
96
+ op.separator " (optional) validation of the results."
97
+ op.separator ""
98
+
99
+ #op.separator("** 'dcn*' is the number of peptides with deltacn == 1.1")
100
+ #op.separator(" (these are peptides who are the only hit with xcorr > 0)")
101
+ op.separator "SEQUEST OPTIONS: "
102
+ op.exact_opt(opts[:sequest], :xcorr1)
103
+ op.exact_opt(opts[:sequest], :xcorr2)
104
+ op.exact_opt(opts[:sequest], :xcorr3)
105
+ op.exact_opt(opts[:sequest], :deltacn)
106
+ op.exact_opt(opts[:sequest], :ppm)
107
+ op.opt(:no_deltacnstar) {|v| opts[:sequest][:include_deltacnstar] = false}
108
+ op.separator ""
109
+ op.separator "OUTPUT OPTIONS: "
110
+ op.opt(:proteins) {|v| opts[:proteins] = true }
111
+ op.opt(:output) do |output|
112
+ # copied from rspec:
113
+ # This funky regexp checks whether we have a FILE_NAME or not
114
+ where = nil
115
+ if (output =~ /([a-zA-Z_]+(?:::[a-zA-Z_]+)*):?(.*)/) && ($2 != '')
116
+ output = $1
117
+ where = $2
118
+ else
119
+ raise "When using several --output options only one of them can be without a file" if @out_used
120
+ @out_used = true
121
+ end
122
+ opts[:output] << [output, where]
123
+ end
124
+
125
+ op.separator ""
126
+ op.separator "VALIDATION OPTIONS: "
127
+ op.separator " each option will calculate the precision"
128
+ op.separator ""
129
+
130
+ op.val_opt(:decoy, opts)
131
+ op.val_opt(:digestion, opts)
132
+ op.val_opt(:bias, opts)
133
+ op.val_opt(:bad_aa, opts)
134
+
135
+ op.val_opt(:tmm, opts)
136
+ op.val_opt(:tps, opts)
137
+
138
+ op.separator ""
139
+ op.separator "VALIDATION MODIFIERS: "
140
+ op.val_opt(:false_on_tie, opts) # sets opts[:ties] = false
141
+
142
+ op.opt(:hits_separate) { opts[:hits_together] = false } # :top_hits_together
143
+
144
+ op.separator ""
145
+ op.separator "OTHER OPTIONS: "
146
+ op.opt(:interactive) {|v| opts[:interactive] = v }
147
+ op.opt(:interactive_verbose) {|v| opts[:interactive_verbose] = v }
148
+
149
+ op.opt(:top_hit_by) {|v| opts[:top_hit_by] = v.to_sym}
150
+ op.opt(:postfilter) {|v| opts[:postfilter] = v.to_sym}
151
+ op.opt(:prefilter) {|v| opts[:prefilter] = true }
152
+
153
+
154
+ #op.on("--yaml", "spits out yaml-ized data") {|v| opts[:tabulate = v }
155
+ #op.on("--combined_score", "shows the combined score") {|v| opts[:combined_score = v }
156
+ #op.on("--marshal", "will write marshaled data or read existing") {|v| opts[:marshal = v }
157
+ #op.on("--log <file>", "also writes all output to file") {|v| opts[:log = v }
158
+ ### NEED TO IMPLEMENT THIS:
159
+ ##op.on("--protein_summary", "writes passing proteins to .summary.html files") {|v| opts[:protein_summary = v }
160
+ #op.on("-z", "--occams_razor", "will show minimal set of proteins") {|v| opts[:occams_razor = v }
161
+
162
+ end
163
+ option_parser.parse!(args)
164
+
165
+ # prepare interactive object if necessary:
166
+ if v = opts[:interactive]
167
+ klass = SpecID::Precision::Filter::Interactive
168
+ if v.is_a? String
169
+ opts[:interactive] = klass.new(v, opts[:interactive_verbose])
170
+ else
171
+ opts[:interactive] = klass.new(nil, opts[:interactive_verbose])
172
+ end
173
+ end
174
+
175
+
176
+ opts[:sequest] = DEFAULTS[:sequest].merge(opts[:sequest])
177
+
178
+ # prepare validators
179
+
180
+ if args.size > 0
181
+ spec_id_obj =
182
+ if args[0] =~ /\.srf$/i
183
+ ::SpecID.new(args)
184
+ else
185
+ ::SpecID.new(args[0])
186
+ end
187
+ if opts[:ties] == nil # will be nil or false
188
+ opts[:ties] = Validator::Cmdline::DEFAULTS[:ties]
189
+ end
190
+ opts[:validators] = Validator::Cmdline.prepare_validators(opts, !opts[:ties], opts[:interactive], spec_id_obj)
191
+
192
+ if opts[:output].size == 0
193
+ opts[:output] = DEFAULTS[:output]
194
+ end
195
+ else
196
+ spec_id_obj = nil
197
+ end
198
+
199
+ [spec_id_obj, opts, option_parser]
200
+ end # parse
201
+ end # CmdlineParser
202
+ end # Filter
203
+ end # Precision
204
+ end # SpecID
205
+
206
+
207
+
208
+
209
+
@@ -0,0 +1,134 @@
1
+
2
+ module SpecID ; end
3
+ module SpecID::Precision ; end
4
+
5
+ class SpecID::Precision::Filter
6
+ class Interactive
7
+ attr_accessor :file
8
+ attr_accessor :verbose
9
+
10
+ # the file contains the interactive commands
11
+ def initialize(file=nil, verbose=false)
12
+ @verbose = verbose
13
+ if file
14
+ @file = file
15
+ @lines = IO.readlines(file)
16
+ else
17
+ @lines = nil
18
+ end
19
+ end
20
+
21
+ def passing(opts, answer)
22
+ puts "****************************************************" if @verbose
23
+ end
24
+ # takes opts and modifies the keys in question, or returns nil
25
+ # shortcut map takes each proper key and designates a shortcut (if any)
26
+
27
+ def filter_args(opts_to_change, changing_keys, shortcut_map, casting_map)
28
+ shortcut_order = changing_keys.map {|k| shortcut_map[k] }
29
+ casting_array = changing_keys.map {|k| casting_map[k] }
30
+ return_val = true
31
+ reply = nil
32
+ base_args = opts_to_change.values_at( *changing_keys )
33
+ #b = base_args
34
+ current_values = changing_keys.map {|v| "#{shortcut_map[v]}:#{opts_to_change[v]}" }
35
+ out(current_values.join(" ")) if @verbose
36
+ #out "#{b[0]} #{b[1]} #{b[2]} dcn:#{b[3]} ppm:#{b[4]}"
37
+ loop do
38
+ reply =
39
+ if @lines
40
+ if @lines.size > 0
41
+ @lines.shift.chomp
42
+ else
43
+ 'q'
44
+ end
45
+ else
46
+ gets.chomp
47
+ end
48
+ answer = prep_reply(reply, base_args, shortcut_order, casting_array)
49
+ if answer == false
50
+ out(interactive_help(changing_keys, shortcut_map)) if @verbose
51
+ elsif answer == nil
52
+ return nil
53
+ else
54
+ answer.zip(changing_keys) do |newval,changing_key|
55
+ opts_to_change[changing_key] = newval
56
+ end
57
+ return_val = true
58
+ break
59
+ end
60
+ end
61
+ return_val
62
+ end
63
+
64
+ def out(string)
65
+ puts string
66
+ end
67
+
68
+ def interactive_help(changing_keys, shortcut_map)
69
+ shortcuts = changing_keys.map {|v| shortcut_map[v] }
70
+ as_array = shortcuts.map {|v| "<#{v}>" }
71
+ as_hash = shortcuts.map {|v| "#{v}:<#{v}>" }
72
+ string = []
73
+ string << "******************************************************************************"
74
+ string << "INTERACTIVE FILTERING HELP:"
75
+ string << "enter as an array of values : #{as_array.join(' ')}"
76
+ string << "or as keys and values : #{as_hash.join(' ')}"
77
+ string << "or some of the keys and values : #{as_hash.last}"
78
+ if changing_keys.size >= 3
79
+ string << "or mix array and keys/values : #{as_array[0]} #{as_array[1]} #{as_hash.last}"
80
+ end
81
+ string << "etc..."
82
+ string << "<enter> to (re)run current values"
83
+ string << "'q' to quit"
84
+ string << "******************************************************************************"
85
+ string.join("\n")
86
+ end
87
+
88
+ # assumes its already chomped
89
+ # updates the 5 globals
90
+ # returns nil if 'q'
91
+ def prep_reply(reply, base, shortcut_order, casting_array)
92
+ if reply == 'q'
93
+ return nil
94
+ end
95
+ if reply =~ /^\s*$/
96
+ base
97
+ elsif reply
98
+ arr = reply.split(/\s+/)
99
+ to_change_ar = []
100
+ to_change_hash = {}
101
+ arr.each do |it|
102
+ if it.include? ':'
103
+ (k,v) = it.split(':')
104
+ to_change_hash[k] = v
105
+ else
106
+ to_change_ar << it
107
+ end
108
+ end
109
+ to_change_ar.each_with_index do |tc,i|
110
+ base[i] = tc
111
+ end
112
+ to_change_hash.each do |k,v|
113
+ index = shortcut_order.index(k)
114
+ if index.nil?
115
+ out("BAD ARG: #{k}:#{v}") if @verbose
116
+ end
117
+ base[index] = v
118
+ end
119
+ base.zip(casting_array).map do |v,cast_proc|
120
+ begin
121
+ cast_proc.call(v)
122
+ rescue NoMethodError
123
+ out "BAD ARG: #{tc}" if @verbose
124
+ return false
125
+ end
126
+ end
127
+ else
128
+ false
129
+ end
130
+ end
131
+
132
+ end
133
+ end
134
+
@@ -0,0 +1,147 @@
1
+ require 'table'
2
+ require 'spec_id/precision/output'
3
+
4
+ module SpecID ; end
5
+ module SpecID::Precision ; end
6
+ class SpecID::Precision::Filter ; end
7
+
8
+ class SpecID::Precision::Filter::Output
9
+ include SpecID::Precision::Output
10
+
11
+ ProtPrecAbbr = {
12
+ :normal => 'nrm',
13
+ :normal_stdev => 'nrm_std',
14
+ :worst => 'worst',
15
+ }
16
+
17
+ GTE = '>='
18
+ LTE = '<='
19
+ MSial_operator = {
20
+ 'xcorr1' => GTE,
21
+ 'xcorr2' => GTE,
22
+ 'xcorr3' => GTE,
23
+ 'deltacn' => GTE,
24
+ 'ppm' => LTE,
25
+ }
26
+
27
+ # takes a hash {:normal => x, :normal_stdev => y :worst => z }
28
+ # and returns a string
29
+ def protein_precision_to_s(hash)
30
+ "#{hash[:worst]}--#{hash[:normal]}+/-#{hash[:normal_stdev]}"
31
+ end
32
+
33
+ # num tps tmm badAA decoy
34
+ # pep
35
+ # prot
36
+ #
37
+ def params_as_string(params_hash)
38
+ hash = SpecID::Precision::Output.symbol_keys_to_string(params_hash)
39
+ cleanup_params_hash(hash)
40
+ hash_as_string(hash)
41
+ end
42
+
43
+ def text_table(fh, answer)
44
+ col_headings = ['num']
45
+ if answer[:params][:validators]
46
+ val_strings = answer[:params][:validators].map do |val|
47
+ Validator::Validator_to_string[val.class.to_s]
48
+ end
49
+ col_headings.push( *val_strings )
50
+ end
51
+
52
+ data_rows = []
53
+ # push on the peptide row
54
+ row_headings = ['peps']
55
+ pep_row = []
56
+ pep_row << answer[:pephits].size
57
+ if answer[:params][:validators]
58
+ answer[:params][:validators].zip( answer[:pephits_precision] ) do |val, precision|
59
+ pep_row << precision
60
+ end
61
+ end
62
+ data_rows << pep_row
63
+
64
+ # push on the protein row
65
+ if answer[:prothits]
66
+ [:worst, :normal, :normal_stdev].each do |guy|
67
+ prot_row = []
68
+ row_headings << "prots(#{ProtPrecAbbr[guy]})"
69
+ if guy == :worst
70
+ prot_row << answer[:prothits].size
71
+ else
72
+ prot_row << '"'
73
+ end
74
+ answer[:prothits_precision].each do |precision|
75
+ prot_row.push(precision[guy])
76
+ end
77
+ data_rows << prot_row
78
+ end
79
+ end
80
+ params_string = params_as_string(answer[:params])
81
+ table = Table.new( data_rows, row_headings, col_headings )
82
+ fh.puts params_string
83
+ fh.puts ""
84
+ fh.puts( table.to_formatted_string )
85
+ fh.puts ""
86
+ end
87
+
88
+ def yaml(fh, answer)
89
+ final_output = { :params => answer[:params].dup }
90
+ #"PEPHITS"
91
+ #answer[:pephits]
92
+ final_output[:pephits] = answer[:pephits].size
93
+ if answer[:prothits]
94
+ final_output[:prothits_precision] = answer[:params][:validators].zip( answer[:prothits_precision] ).map do |val, precision|
95
+ {'validator' => Validator::Validator_to_string[val.class.to_s], 'values' => precision }
96
+ end
97
+ final_output[:prothits] = answer[:prothits].size
98
+
99
+ #final_output[:prothits_precision] = {} if answer[:prothits_precision]
100
+ #final_output[:prothits] = answer[:prothits].size
101
+ #answer[:params][:validators].zip( answer[:prothits_precision] ) do |val, precision|
102
+ # final_output[:prothits_precision][Validator::Validator_to_string[val.class.to_s]] = precision
103
+ #end
104
+ end
105
+ final_output[:pephits_precision] = answer[:params][:validators].zip( answer[:pephits_precision] ).map do |val, precision|
106
+ { 'validator' => Validator::Validator_to_string[val.class.to_s], 'value' => precision }
107
+ end
108
+ final_output[:pephits] = answer[:pephits].size
109
+ final_output_as_strings = SpecID::Precision::Output.symbol_keys_to_string(final_output)
110
+ cleanup_params_hash(final_output_as_strings['params'])
111
+ fh.print(final_output_as_strings.to_yaml )
112
+ end
113
+
114
+
115
+ # returns nil
116
+ def cleanup_params_hash(hash)
117
+ ################################
118
+ # OUTPUT
119
+ ################################
120
+ hash['output'] = hash['output'].map do |output|
121
+ if output[1] == nil
122
+ output[1] = 'STDOUT'
123
+ end
124
+ output.join(" => ")
125
+ end
126
+ %w(postfilter top_hit_by).each do |st|
127
+ hash[st] = hash[st].to_s
128
+ end
129
+ if hash['interactive']
130
+ if file = hash['interactive'].file
131
+ hash['interactive'] = file
132
+ else
133
+ hash['interactive'] = true
134
+ end
135
+ end
136
+ if hash['decoy']
137
+ if hash['decoy']['regexp']
138
+ hash['decoy']['regexp'] = hash['decoy']['regexp'].inspect
139
+ end
140
+ end
141
+ if x = hash['validators']
142
+ hash['validators'] = Validator.sensible_validator_hashes(x)
143
+ end
144
+ nil
145
+ end
146
+
147
+ end