mspire 0.4.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,225 +0,0 @@
1
- #!/usr/bin/ruby -w
2
-
3
- tmp = $VERBOSE ; $VERBOSE = nil
4
- require 'fox16'
5
- $VERBOSE = tmp
6
-
7
- include Fox
8
-
9
-
10
- class Opt
11
- attr_accessor :flag, :value
12
- def initialize(flag, value=nil)
13
- @flag = flag
14
- @value = value
15
- end
16
-
17
- def self.[](flag, value=nil)
18
- self.new(flag, value)
19
- end
20
-
21
- def to_s
22
- st = @flag
23
- if @value
24
- st << " " << @value
25
- end
26
- st
27
- end
28
-
29
- end
30
-
31
-
32
- NCOLS = 40
33
-
34
- srf_dir = nil
35
- output_dir = '.'
36
- msdata_dir = '.'
37
- $progname = 'bioworks_to_pepxml.rb'
38
- $sequest_folder = '/project/marcotte/marcotte/ms/john/sequest'
39
- $data_folder = '/project/marcotte/marcotte/ms/john/data'
40
- $isb_folder = '/var/www/tpp'
41
-
42
- # This is a directory selector consisting of: Label | FieldText | BrowseButton
43
- # if you pass in patterns, then you can select multiple files!
44
- class DirSelector
45
- attr_writer :directory
46
-
47
- def directory
48
- @directory_data.to_s
49
- end
50
-
51
- # You should pass in the frame that you want filled up!
52
- def initialize(parent, label='select directory', init_dir='.', text_field_width=30)
53
- @directory_data = FXDataTarget.new(init_dir)
54
-
55
- FXLabel.new(parent, label , nil, LAYOUT_CENTER_Y|LAYOUT_RIGHT|JUSTIFY_RIGHT)
56
- srf_field = FXTextField.new(parent, text_field_width, @directory_data) do |tf|
57
- tf.text = @directory_data.to_s
58
- end
59
- srf_field.connect(SEL_COMMAND) do |sender, sel, message|
60
- @directory_data.value = message
61
- end
62
- but = FXButton.new(parent, "Browse")
63
- but.connect(SEL_COMMAND) do |sender, sel, message|
64
- @directory_data.value = FXFileDialog.getOpenDirectory(parent, "Open directory_data", @directory_data.to_s)
65
- srf_field.text = @directory_data.value
66
- end
67
- end
68
- end
69
-
70
- # This is a directory selector consisting of: Label | FieldText | BrowseButton
71
- # if you pass in patterns, then you can select multiple files!
72
- class MultipleFilesSelector
73
- # an array of filenames
74
- attr_writer :files
75
-
76
- # You should pass in the frame that you want filled up!
77
- def initialize(parent, label='select multiple files', init_dir='.', text_field_width=30, patterns=["All Files (*)"])
78
- @directory_data = FXDataTarget.new(init_dir)
79
-
80
- FXLabel.new(parent, label , nil, LAYOUT_CENTER_Y|LAYOUT_RIGHT|JUSTIFY_RIGHT)
81
- srf_field = FXTextField.new(parent, text_field_width, @directory_data) do |tf|
82
- tf.text = @directory_data.to_s
83
- end
84
- srf_field.connect(SEL_COMMAND) do |sender, sel, message|
85
- @directory_data.value = message
86
- end
87
- but = FXButton.new(parent, "Browse")
88
- if patterns.is_a?(Array)
89
- pattern_string = patterns.join("\n")
90
- else
91
- pattern_string = patterns
92
- end
93
- but.connect(SEL_COMMAND) do |sender, sel, message|
94
- reply = FXFileDialog.getOpenFilenames(parent, "Open directory_data", @directory_data.to_s, pattern_string)
95
- p reply
96
- abort
97
- srf_field.text = @directory_data.value
98
- end
99
- end
100
- end
101
-
102
-
103
- class MainWindow < FXMainWindow
104
-
105
- def action(*args)
106
- p args
107
-
108
- cmd = []
109
- cmd << $progname
110
- #cmd << args
111
- #cmd << Opt['-o', output_dir]
112
-
113
- puts cmd.join(" ")
114
- end
115
-
116
- def initialize(anApp)
117
- labels = ["&SRF files (select multiple)", "&Output Directory (ISB)", "&Directory with RAW files"]
118
- super(anApp, "bioworks_to_pepxml", nil, nil, DECOR_ALL)
119
-
120
- gb = FXGroupBox.new(self, "Specify input/output", FRAME_RIDGE)
121
- mat = FXMatrix.new(gb, 3, MATRIX_BY_COLUMNS|LAYOUT_SIDE_TOP)
122
-
123
- srf_files_selector = MultipleFilesSelector.new(mat, labels[0], $sequest_folder, NCOLS, ["SRF files (*.srf)"])
124
-
125
- isb_files_selector = DirSelector.new(mat, labels[1], $isb_folder, NCOLS)
126
-
127
- hf = FXHorizontalFrame.new(self)
128
- create_mzxml = FXCheckButton.new(hf, 'create mzXML files')
129
- copy_mzxml = FXCheckButton.new(hf, 'copy mzXML files to ISB dir') {|v| v.checkState = TRUE }
130
- copy_mzxml.hide
131
-
132
- @mat2 = FXMatrix.new(self, 3, MATRIX_BY_COLUMNS|LAYOUT_SIDE_TOP)
133
- srf_dir_selector = DirSelector.new(@mat2, labels[2], $data_folder, NCOLS)
134
-
135
- submit = FXButton.new(self, "Submit")
136
- submit.connect(SEL_COMMAND) do |sender, sel, message|
137
- action(srf_dir_selector.files)
138
- end
139
-
140
- create_mzxml.connect(SEL_COMMAND) do |button,b,checked|
141
- if checked
142
- copy_mzxml.show
143
- @mat2.show
144
- self.resize(self.width, @large_height)
145
- else
146
- copy_mzxml.hide
147
- @mat2.hide
148
- self.resize(self.width, @small_height)
149
- end
150
- end
151
-
152
- end
153
-
154
- def create
155
- super
156
- show(PLACEMENT_SCREEN)
157
- @large_height = self.height
158
-
159
- # setup hidden state
160
- @mat2.hide
161
- @small_height = @large_height - @mat2.height
162
- self.resize(self.width, @small_height)
163
- end
164
-
165
- end
166
-
167
-
168
- application = FXApp.new("Hello", "FXRuby") do |theApp|
169
- MainWindow.new(theApp)
170
- theApp.create
171
- theApp.run
172
- end
173
-
174
-
175
- application.run()
176
-
177
-
178
-
179
-
180
- =begin
181
-
182
-
183
- ---------------------------------
184
- class MyMainWindow < FXMainWindow
185
-
186
- attr :advancedFrame
187
-
188
- def initialize(app)
189
- super(app, "MyMainWindow")
190
-
191
- contents = FXVerticalFrame.new(self,
192
- LAYOUT_FILL_X|LAYOUT_FILL_Y)
193
-
194
- advancedButton = FXButton.new(contents, "Advanced >>",
195
- nil, self, 0, FRAME_RAISED|FRAME_THICK)
196
-
197
- advancedButton.connect(SEL_COMMAND) do
198
- if @advancedFrame.shown?
199
- self.height -= @advancedFrame.height
200
- @advancedFrame.hide
201
- advancedButton.text = "Advanced >>"
202
- else
203
- self.height += @advancedFrame.height
204
- @advancedFrame.show
205
- advancedButton.text = "<< Basic"
206
- end
207
-
208
- self.recalc
209
- end
210
- end
211
- end
212
- #---------------------------
213
- app = FXApp.new
214
-
215
- mainWindow = MyMainWindow.new(app)
216
-
217
- app.create
218
- mainWindow.advancedFrame.hide
219
- mainWindow.height -= mainWindow.advancedFrame.height
220
-
221
- mainWindow.show(PLACEMENT_SCREEN)
222
-
223
- app.run
224
-
225
- =end
data/bin/fasta_shaker.rb DELETED
@@ -1,5 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
-
4
- require 'fasta'
5
- FastaShaker.shake_from_argv(ARGV)
@@ -1,5 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- require 'spec_id/precision/filter'
4
-
5
- SpecID::Precision::Filter.new.filter_and_validate_cmdline(ARGV)
data/bin/gi2annot.rb DELETED
@@ -1,14 +0,0 @@
1
- #!/usr/bin/ruby -w
2
-
3
- require 'gi'
4
-
5
- if ARGV.size < 1
6
- puts "usage: #{File.basename(__FILE__)} <gi> ..."
7
- puts "calls NCBI for the annotation of the gi"
8
- end
9
-
10
-
11
- gis = ARGV.to_a.dup
12
-
13
- puts( GI.gi2annot(gis).join("\n") )
14
-
data/bin/id_class_anal.rb DELETED
@@ -1,112 +0,0 @@
1
- #!/usr/bin/ruby -w
2
-
3
- require 'spec_id'
4
- require 'generator'
5
- require 'optparse'
6
- require 'ostruct'
7
- require 'roc'
8
-
9
- def file_noext(file)
10
- file.sub(/#{Regexp.escape(File.extname(file))}$/, '')
11
- end
12
-
13
- delimiter = "\t"
14
- def_pre = "SHUFF_"
15
-
16
- opt = OpenStruct.new
17
- opt.p = def_pre
18
-
19
- jtplot_base = 'class_anal'
20
- jtplot_file = jtplot_base + '.toplot'
21
-
22
- OptionParser.new do |op|
23
- op.on("-p", "--prefix PREFIX", "prefix for false positive proteins") {|v| opt.p = v.split(',') }
24
- op.on("-j", "--jtplot", "output file '#{jtplot_file}' for jtp plotting program") {|v| opt.j = v }
25
- # op.on("-e", "--peptides", "runs a full analysis on peptides") {|v| opt.e = v }
26
- op.on("-a", "--area", "outputs area under the curve") {|v| opt.a = v }
27
- end.parse!
28
-
29
- if ARGV.size < 1
30
- puts "
31
- usage: #{File.basename(__FILE__)} [options] protein_file.xml ...
32
-
33
- Protein ID classification analysis. Takes either a bioworks.xml (v3.2 with
34
- probabilities) or protein_prophet-prot.xml file which has been run with
35
- decoy proteins.
36
-
37
- Outputs tp's and precision.
38
- [The false positive predictive rate (FPPR) is 1 - precision]
39
- The two columns will be labeled at the top.
40
- (delimited by '\\t') to STDOUT. To capture to file:
41
- #{File.basename(__FILE__)} protein_file.xml > out.csv
42
-
43
- OPTIONS:
44
- <s> = string
45
- -p --prefix <s[,s...]> Prefix(s) by which to determine decoy proteins (default #{def_pre})
46
- -j --jtplot outputs #{jtplot_file} for plotting by plot.rb
47
- [% plot.rb -w lp --yrange n0.1:1.1 --noenhanced <file> ]
48
- -a --area outputs area under the curve instead of tps/precision
49
-
50
- NOTE: protein prophet files not yet functional!!!
51
- ABBR:
52
- TP = True Positives
53
- FP = False Positives
54
- Prec = Precision = TP/(TP+FP)
55
- "
56
- exit
57
- end
58
-
59
- ###########################################################
60
- # I DON"T think option -e is functional yet...
61
- ###########################################################
62
-
63
- files = ARGV.to_a
64
-
65
- out = nil
66
- if opt.j
67
- out = File.open(jtplot_file, "w")
68
- lines = ['XYData', jtplot_base, "Classification Analysis", "Num Hits", "Precision"]
69
- lines.each {|l| out.puts l}
70
- end
71
-
72
- headings = files.collect do |file|
73
- %w(TP Precision).collect {|v| v + " (#{file_noext(file)})" }
74
- end
75
-
76
- all_arrs = []
77
- files.each_with_index do |file,i|
78
- sp = SpecID.new(file)
79
- headers = [file_noext(file)]
80
- arrs = sp.num_hits_and_ppv_for_prob(opt.p[i])
81
-
82
- if opt.a
83
- (num_hits, prec) = arrs
84
- roc = ROC.new
85
- prec_area = roc.area_under_curve(num_hits, prec)
86
- puts "#{file} (area under curve [num_hits, precision])"
87
- puts "Prec [#TPPrec = TP/(TP+FP)]:\t#{prec_area}"
88
- end
89
-
90
- all_arrs.push(*arrs)
91
-
92
- lns = []
93
- if opt.j
94
- xs = arrs.shift
95
- arrs.zip(headers).each do |ar|
96
- lns << ar[1] << xs.join(" ") << ar[0].join(" ")
97
- end
98
- lns.each do |line|
99
- out.puts line
100
- end
101
- end
102
- end
103
-
104
-
105
- unless opt.a
106
- puts headings.flatten.join(delimiter)
107
- SyncEnumerator.new(*all_arrs).each do |row|
108
- puts row.join(delimiter)
109
- end
110
- end
111
-
112
- out.close if opt.j
data/bin/id_precision.rb DELETED
@@ -1,172 +0,0 @@
1
- #!/usr/bin/ruby -w
2
-
3
- require 'spec_id'
4
- require 'roc'
5
- require 'generator'
6
- require 'optparse'
7
-
8
- ################################################
9
- $AREAS_ONLY = false
10
- ################################################
11
-
12
- opts = OptionParser.new do |op|
13
- op.banner = "usage: #{File.basename(__FILE__)} prefix bioworks.xml"
14
- op.separator ""
15
- op.separator "takes Bioworks 3.2 xml output files (with probabilities)"
16
- op.separator "rank orders the probabilities and outputs num hits and precision"
17
- op.separator "Also takes gzipped (xml.gz) files labeled as such"
18
- op.separator ""
19
- op.separator "Outputs a comma separated value to STDOUT (.csv)"
20
- op.separator ""
21
- op.separator "To capture:"
22
- op.separator " #{File.basename(__FILE__)} bioworks.xml > out.csv"
23
- op.on("-a", "--area", "outputs the area under the curve instead") do |v| $AREAS_ONLY = true end
24
- end
25
-
26
- opts.parse!
27
-
28
- if ARGV.size < 2
29
- puts opts
30
- exit
31
- end
32
-
33
- fp_prefix = ARGV[0]
34
- file = ARGV[1]
35
-
36
- obj = SpecID.new(file)
37
- re_prefix = /^#{Regexp.escape(fp_prefix)}/o
38
- prc = proc {|it| it.prots.first.reference =~ re_prefix }
39
- #(match, nomatch) = obj.classify(:peps, prc)
40
- obj.peps = obj.pep_prots
41
- (fp, tp) = obj.classify(:peps, prc)
42
-
43
-
44
- #puts fp.size.to_s
45
- #puts tp.size.to_s
46
- fp_obj = SpecID.new
47
- fp_obj.peps = fp
48
- tp_obj = SpecID.new
49
- tp_obj.peps = tp
50
-
51
- two_lists = [tp_obj, fp_obj].map do |obj|
52
- list = []
53
- list.push( obj.pep_probs_by_pep_prots )
54
-
55
- list.push( obj.pep_probs_by_bn_seq_charge )
56
- # These each have a by_min and a by_top10
57
- list.push(*( obj.pep_probs_by_bn_scan ) )
58
- list.push(*( obj.pep_probs_by_bn_scan_charge ) )
59
- list
60
- end
61
-
62
-
63
- headings = ["PepProts", "SeqCharge", "Scan(TopHit)", "Scan(Top10)", "ScanCharge(TopHit)", "ScanCharge(Top10)"]
64
- csv_headings = []
65
- headings.each do |head|
66
- csv_headings << head + ": NH"
67
- csv_headings << head + ": PR"
68
- end
69
-
70
- pairs = two_lists[0].zip two_lists[1]
71
-
72
- roc = DecoyROC.new
73
- x_y= []
74
- area_under_curve = []
75
- #start_x = []
76
- #end_x = []
77
- pairs.each do |pair|
78
- #x,y = roc.pred_and_tps_and_ppv(pair[0], pair[1])
79
- (num_hits, tps, ppv) = roc.pred_and_tps_and_ppv(pair[0], pair[1])
80
- x = num_hits
81
- y = ppv
82
- if $AREAS_ONLY
83
- x.unshift 0
84
- y.unshift 1.0
85
- area_under_curve << roc.area_under_curve(x,y)
86
- #start_x << x.first
87
- #end_x << x.last
88
- else
89
- x_y.push(x, y) # <- normal output
90
- end
91
- end
92
-
93
- if $AREAS_ONLY
94
- headings.unshift "Filename"
95
- puts headings.join(" ")
96
- area_under_curve.unshift file
97
- puts area_under_curve.join(" ")
98
- #puts start_x.join(" ")
99
- #puts end_x.join(" ")
100
- exit ### <-------------- ABORT HERE
101
- end
102
-
103
-
104
- # X axis is the number of peptides id# (i.e., # of peps in TP db)
105
- # Y axis is the precision = TP/(TP+FP)
106
-
107
- puts "# NH = number of hits"
108
- puts "# TP = true positives"
109
- puts "# FP = false positives"
110
- puts "# PR = precision = TP/(TP+FP)"
111
- puts csv_headings.join(",")
112
-
113
- SyncEnumerator.new(*x_y).each do |row|
114
- #items_as_string = row.collect do |item|
115
- # sprintf("%.18f", item)
116
- #end
117
-
118
- ## THIS IS THE NORMAL OUTPUT:
119
- puts row.join(", ")
120
-
121
-
122
- #puts items_as_string.join(", ")
123
- end
124
-
125
- =begin
126
-
127
- files = ARGV.to_a
128
-
129
- two_lists = files.collect do |file|
130
- obj = Bioworks.new(file)
131
- list = []
132
- list.push( obj.pep_probs_by_pep_prots )
133
- list.push( obj.pep_probs_by_seq_charge )
134
- # These each have a by_min and a by_top10
135
- list.push(*( obj.pep_probs_by_scan ) )
136
- list.push(*( obj.pep_probs_by_scan_charge ) )
137
- list
138
- end
139
-
140
-
141
- headings = ["PepProts: TP", "PepProts: PR", "SeqCharge: TP", "SeqCharge: PR",
142
- "Scan(TopHit): TP", "Scan(TopHit): PR", "Scan(Top10): TP", "Scan(Top10): PR",
143
- "ScanCharge(TopHit): TP", "ScanCharge(TopHit): PR",
144
- "ScanCharge(Top10): TP", "ScanCharge(Top10): PR"]
145
-
146
- pairs = two_lists[0].zip two_lists[1]
147
-
148
- roc = ROC.new
149
- x_y= []
150
- pairs.each do |pair|
151
- x,y = roc.tps_and_precision(pair[0], pair[1])
152
- x_y.push(x, y)
153
- end
154
-
155
- # X axis is the number of peptides id# (i.e., # of peps in TP db)
156
- # Y axis is the precision = TP/(TP+FP)
157
-
158
- puts "# TP = true positives"
159
- puts "# FP = false positives"
160
- puts "# PR = precision = TP/(TP+FP)"
161
- puts headings.join(",")
162
-
163
- SyncEnumerator.new(*x_y).each do |row|
164
- #items_as_string = row.collect do |item|
165
- # sprintf("%.18f", item)
166
- #end
167
- puts row.join(", ")
168
- #puts items_as_string.join(", ")
169
- end
170
-
171
- =end
172
-
data/bin/ms_to_lmat.rb DELETED
@@ -1,67 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- require 'ms/msrun'
4
- require 'optparse'
5
- require 'ostruct'
6
- require 'lmat'
7
-
8
-
9
- # defaults:
10
- opt = {}
11
- opt[:baseline] = 0.0
12
- opt[:newext] = ".lmat"
13
- opt[:inc_mz] = 1.0
14
-
15
- # get options:
16
- opts = OptionParser.new do |op|
17
- op.banner = "usage: #{File.basename(__FILE__)} [options] <msfile> ..."
18
- op.separator "input: .mzdata or .mzXML (versions 1.x and 2.x)"
19
- op.separator ""
20
- op.separator "(sums m/z values that round to the same bin)"
21
- op.separator ""
22
- op.on("--mz_inc N", Float, "m/z increment (def: 1.0)") {|n| opt[:mz_inc] = n.to_f}
23
- op.on("--mz_start N", Float, "m/z start (def: start of 1st full scan)") {|n| opt[:start_mz] = n.to_f}
24
- op.on("--mz_end N", Float, "m/z end (def: end of 1st full scan)") {|n| opt[:end_mz] = n.to_f}
25
- op.on("--baseline N", Float, "value for missing indices (def: #{opt[:baseline]})") {|n| opt[:baseline] = n.to_f}
26
- op.on("--ascii", "generates an lmata file instead") {opt[:ascii] = true}
27
- op.on("-v", "--verbose") {$VERBOSE = true}
28
- end
29
- opts.parse!
30
-
31
- if ARGV.size < 1
32
- puts opts
33
- end
34
-
35
- ARGV.each do |file|
36
- msrun = MS::MSRun.new(file)
37
- mslevel = 1
38
- (start_mz, end_mz) = msrun.start_and_end_mz(mslevel)
39
- (times, spectra) = msrun.times_and_spectra(mslevel)
40
- args = {
41
- :start_mz => start_mz,
42
- :end_mz => end_mz,
43
-
44
- :start_tm => times.first,
45
- :end_tm => times.last,
46
- :inc_tm => nil,
47
- }
48
- args.merge!(opt)
49
- lmat = LMat.new.from_times_and_spectra(times, spectra, args)
50
- ext = File.extname(file)
51
- outfile = file.sub(/#{Regexp.escape(ext)}$/, opt[:newext])
52
- if args[:ascii]
53
- outfile << "a"
54
- lmat.print(outfile)
55
- else
56
- lmat.write(outfile)
57
- end
58
- puts("OUTPUT: #{outfile}") if $VERBOSE
59
- end
60
-
61
-
62
-
63
-
64
-
65
-
66
-
67
-
@@ -1,16 +0,0 @@
1
- #!/usr/bin/ruby -w
2
-
3
- require 'spec_id/proph'
4
-
5
- if ARGV.size < 1
6
- puts "usage: #{File.basename(__FILE__)} <prob_cutoff> <pepproph.xml> ..."
7
- puts " For each file outputs 'pepproph_min<prob_cutoff>.xml'"
8
- puts " deleting all search_hits with peptides less than prob_cutoff"
9
- end
10
-
11
- files = ARGV.to_a
12
- cutoff = files.shift
13
- files.each do |file|
14
- outfile = file.gsub(/\.xml/, "_min#{cutoff}.xml")
15
- Proph::Pep::Parser.new.filter_by_min_pep_prob(file, outfile, cutoff.to_f)
16
- end
data/bin/prob_validate.rb DELETED
@@ -1,6 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- require 'spec_id/precision/prob'
4
-
5
- SpecID::Precision::Prob.new.precision_vs_num_hits_cmdline(ARGV)
6
-
@@ -1,6 +0,0 @@
1
- #!/usr/bin/ruby -w
2
-
3
- require 'spec_id/protein_summary'
4
-
5
- ProteinSummary.new.create_from_command_line_args(ARGV)
6
-
@@ -1,32 +0,0 @@
1
- #!/usr/bin/ruby -w
2
-
3
- require 'prot'
4
- require 'pep'
5
-
6
- if ARGV.size < 4
7
- usage = <<HERE
8
- usage: protxml2prots_peps.rb run-prot.xml prot_prob nsp_pep_prob init_pep_prob
9
- takes all proteins and peptides passing prob cutoffs and
10
- outputs 'run-prot.xml.<prot_prob>_<nsp_prob>_<init_prob>.protpep'
11
- which is a marshalled array of proteins (containing peptides)
12
- HERE
13
- puts usage
14
- exit(1);
15
- end
16
-
17
- file = ARGV[0]
18
- outfile = file + '.' + ARGV[1] +'_'+ ARGV[2] +'_'+ ARGV[3] + ".protpep"
19
-
20
- proteins = Protein.get_prots_and_peps_fast(*ARGV)
21
- #puts "proteins"
22
- #proteins.each do |pr|
23
- # puts pr
24
- # pr.peptides.each do |pep|
25
- # puts "\n\t" + pep.to_s
26
- # end
27
- #end
28
- #proteins = Protein.get_prots_and_peps(*ARGV)
29
- File.open(outfile, "w") do |f|
30
- Marshal.dump(proteins, f)
31
- end
32
-