mspire 0.4.9 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,225 +0,0 @@
1
- #!/usr/bin/ruby -w
2
-
3
- tmp = $VERBOSE ; $VERBOSE = nil
4
- require 'fox16'
5
- $VERBOSE = tmp
6
-
7
- include Fox
8
-
9
-
10
- class Opt
11
- attr_accessor :flag, :value
12
- def initialize(flag, value=nil)
13
- @flag = flag
14
- @value = value
15
- end
16
-
17
- def self.[](flag, value=nil)
18
- self.new(flag, value)
19
- end
20
-
21
- def to_s
22
- st = @flag
23
- if @value
24
- st << " " << @value
25
- end
26
- st
27
- end
28
-
29
- end
30
-
31
-
32
- NCOLS = 40
33
-
34
- srf_dir = nil
35
- output_dir = '.'
36
- msdata_dir = '.'
37
- $progname = 'bioworks_to_pepxml.rb'
38
- $sequest_folder = '/project/marcotte/marcotte/ms/john/sequest'
39
- $data_folder = '/project/marcotte/marcotte/ms/john/data'
40
- $isb_folder = '/var/www/tpp'
41
-
42
- # This is a directory selector consisting of: Label | FieldText | BrowseButton
43
- # if you pass in patterns, then you can select multiple files!
44
- class DirSelector
45
- attr_writer :directory
46
-
47
- def directory
48
- @directory_data.to_s
49
- end
50
-
51
- # You should pass in the frame that you want filled up!
52
- def initialize(parent, label='select directory', init_dir='.', text_field_width=30)
53
- @directory_data = FXDataTarget.new(init_dir)
54
-
55
- FXLabel.new(parent, label , nil, LAYOUT_CENTER_Y|LAYOUT_RIGHT|JUSTIFY_RIGHT)
56
- srf_field = FXTextField.new(parent, text_field_width, @directory_data) do |tf|
57
- tf.text = @directory_data.to_s
58
- end
59
- srf_field.connect(SEL_COMMAND) do |sender, sel, message|
60
- @directory_data.value = message
61
- end
62
- but = FXButton.new(parent, "Browse")
63
- but.connect(SEL_COMMAND) do |sender, sel, message|
64
- @directory_data.value = FXFileDialog.getOpenDirectory(parent, "Open directory_data", @directory_data.to_s)
65
- srf_field.text = @directory_data.value
66
- end
67
- end
68
- end
69
-
70
- # This is a directory selector consisting of: Label | FieldText | BrowseButton
71
- # if you pass in patterns, then you can select multiple files!
72
- class MultipleFilesSelector
73
- # an array of filenames
74
- attr_writer :files
75
-
76
- # You should pass in the frame that you want filled up!
77
- def initialize(parent, label='select multiple files', init_dir='.', text_field_width=30, patterns=["All Files (*)"])
78
- @directory_data = FXDataTarget.new(init_dir)
79
-
80
- FXLabel.new(parent, label , nil, LAYOUT_CENTER_Y|LAYOUT_RIGHT|JUSTIFY_RIGHT)
81
- srf_field = FXTextField.new(parent, text_field_width, @directory_data) do |tf|
82
- tf.text = @directory_data.to_s
83
- end
84
- srf_field.connect(SEL_COMMAND) do |sender, sel, message|
85
- @directory_data.value = message
86
- end
87
- but = FXButton.new(parent, "Browse")
88
- if patterns.is_a?(Array)
89
- pattern_string = patterns.join("\n")
90
- else
91
- pattern_string = patterns
92
- end
93
- but.connect(SEL_COMMAND) do |sender, sel, message|
94
- reply = FXFileDialog.getOpenFilenames(parent, "Open directory_data", @directory_data.to_s, pattern_string)
95
- p reply
96
- abort
97
- srf_field.text = @directory_data.value
98
- end
99
- end
100
- end
101
-
102
-
103
- class MainWindow < FXMainWindow
104
-
105
- def action(*args)
106
- p args
107
-
108
- cmd = []
109
- cmd << $progname
110
- #cmd << args
111
- #cmd << Opt['-o', output_dir]
112
-
113
- puts cmd.join(" ")
114
- end
115
-
116
- def initialize(anApp)
117
- labels = ["&SRF files (select multiple)", "&Output Directory (ISB)", "&Directory with RAW files"]
118
- super(anApp, "bioworks_to_pepxml", nil, nil, DECOR_ALL)
119
-
120
- gb = FXGroupBox.new(self, "Specify input/output", FRAME_RIDGE)
121
- mat = FXMatrix.new(gb, 3, MATRIX_BY_COLUMNS|LAYOUT_SIDE_TOP)
122
-
123
- srf_files_selector = MultipleFilesSelector.new(mat, labels[0], $sequest_folder, NCOLS, ["SRF files (*.srf)"])
124
-
125
- isb_files_selector = DirSelector.new(mat, labels[1], $isb_folder, NCOLS)
126
-
127
- hf = FXHorizontalFrame.new(self)
128
- create_mzxml = FXCheckButton.new(hf, 'create mzXML files')
129
- copy_mzxml = FXCheckButton.new(hf, 'copy mzXML files to ISB dir') {|v| v.checkState = TRUE }
130
- copy_mzxml.hide
131
-
132
- @mat2 = FXMatrix.new(self, 3, MATRIX_BY_COLUMNS|LAYOUT_SIDE_TOP)
133
- srf_dir_selector = DirSelector.new(@mat2, labels[2], $data_folder, NCOLS)
134
-
135
- submit = FXButton.new(self, "Submit")
136
- submit.connect(SEL_COMMAND) do |sender, sel, message|
137
- action(srf_dir_selector.files)
138
- end
139
-
140
- create_mzxml.connect(SEL_COMMAND) do |button,b,checked|
141
- if checked
142
- copy_mzxml.show
143
- @mat2.show
144
- self.resize(self.width, @large_height)
145
- else
146
- copy_mzxml.hide
147
- @mat2.hide
148
- self.resize(self.width, @small_height)
149
- end
150
- end
151
-
152
- end
153
-
154
- def create
155
- super
156
- show(PLACEMENT_SCREEN)
157
- @large_height = self.height
158
-
159
- # setup hidden state
160
- @mat2.hide
161
- @small_height = @large_height - @mat2.height
162
- self.resize(self.width, @small_height)
163
- end
164
-
165
- end
166
-
167
-
168
- application = FXApp.new("Hello", "FXRuby") do |theApp|
169
- MainWindow.new(theApp)
170
- theApp.create
171
- theApp.run
172
- end
173
-
174
-
175
- application.run()
176
-
177
-
178
-
179
-
180
- =begin
181
-
182
-
183
- ---------------------------------
184
- class MyMainWindow < FXMainWindow
185
-
186
- attr :advancedFrame
187
-
188
- def initialize(app)
189
- super(app, "MyMainWindow")
190
-
191
- contents = FXVerticalFrame.new(self,
192
- LAYOUT_FILL_X|LAYOUT_FILL_Y)
193
-
194
- advancedButton = FXButton.new(contents, "Advanced >>",
195
- nil, self, 0, FRAME_RAISED|FRAME_THICK)
196
-
197
- advancedButton.connect(SEL_COMMAND) do
198
- if @advancedFrame.shown?
199
- self.height -= @advancedFrame.height
200
- @advancedFrame.hide
201
- advancedButton.text = "Advanced >>"
202
- else
203
- self.height += @advancedFrame.height
204
- @advancedFrame.show
205
- advancedButton.text = "<< Basic"
206
- end
207
-
208
- self.recalc
209
- end
210
- end
211
- end
212
- #---------------------------
213
- app = FXApp.new
214
-
215
- mainWindow = MyMainWindow.new(app)
216
-
217
- app.create
218
- mainWindow.advancedFrame.hide
219
- mainWindow.height -= mainWindow.advancedFrame.height
220
-
221
- mainWindow.show(PLACEMENT_SCREEN)
222
-
223
- app.run
224
-
225
- =end
data/bin/fasta_shaker.rb DELETED
@@ -1,5 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
-
4
- require 'fasta'
5
- FastaShaker.shake_from_argv(ARGV)
@@ -1,5 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- require 'spec_id/precision/filter'
4
-
5
- SpecID::Precision::Filter.new.filter_and_validate_cmdline(ARGV)
data/bin/gi2annot.rb DELETED
@@ -1,14 +0,0 @@
1
- #!/usr/bin/ruby -w
2
-
3
- require 'gi'
4
-
5
- if ARGV.size < 1
6
- puts "usage: #{File.basename(__FILE__)} <gi> ..."
7
- puts "calls NCBI for the annotation of the gi"
8
- end
9
-
10
-
11
- gis = ARGV.to_a.dup
12
-
13
- puts( GI.gi2annot(gis).join("\n") )
14
-
data/bin/id_class_anal.rb DELETED
@@ -1,112 +0,0 @@
1
- #!/usr/bin/ruby -w
2
-
3
- require 'spec_id'
4
- require 'generator'
5
- require 'optparse'
6
- require 'ostruct'
7
- require 'roc'
8
-
9
- def file_noext(file)
10
- file.sub(/#{Regexp.escape(File.extname(file))}$/, '')
11
- end
12
-
13
- delimiter = "\t"
14
- def_pre = "SHUFF_"
15
-
16
- opt = OpenStruct.new
17
- opt.p = def_pre
18
-
19
- jtplot_base = 'class_anal'
20
- jtplot_file = jtplot_base + '.toplot'
21
-
22
- OptionParser.new do |op|
23
- op.on("-p", "--prefix PREFIX", "prefix for false positive proteins") {|v| opt.p = v.split(',') }
24
- op.on("-j", "--jtplot", "output file '#{jtplot_file}' for jtp plotting program") {|v| opt.j = v }
25
- # op.on("-e", "--peptides", "runs a full analysis on peptides") {|v| opt.e = v }
26
- op.on("-a", "--area", "outputs area under the curve") {|v| opt.a = v }
27
- end.parse!
28
-
29
- if ARGV.size < 1
30
- puts "
31
- usage: #{File.basename(__FILE__)} [options] protein_file.xml ...
32
-
33
- Protein ID classification analysis. Takes either a bioworks.xml (v3.2 with
34
- probabilities) or protein_prophet-prot.xml file which has been run with
35
- decoy proteins.
36
-
37
- Outputs tp's and precision.
38
- [The false positive predictive rate (FPPR) is 1 - precision]
39
- The two columns will be labeled at the top.
40
- (delimited by '\\t') to STDOUT. To capture to file:
41
- #{File.basename(__FILE__)} protein_file.xml > out.csv
42
-
43
- OPTIONS:
44
- <s> = string
45
- -p --prefix <s[,s...]> Prefix(s) by which to determine decoy proteins (default #{def_pre})
46
- -j --jtplot outputs #{jtplot_file} for plotting by plot.rb
47
- [% plot.rb -w lp --yrange n0.1:1.1 --noenhanced <file> ]
48
- -a --area outputs area under the curve instead of tps/precision
49
-
50
- NOTE: protein prophet files not yet functional!!!
51
- ABBR:
52
- TP = True Positives
53
- FP = False Positives
54
- Prec = Precision = TP/(TP+FP)
55
- "
56
- exit
57
- end
58
-
59
- ###########################################################
60
- # I DON"T think option -e is functional yet...
61
- ###########################################################
62
-
63
- files = ARGV.to_a
64
-
65
- out = nil
66
- if opt.j
67
- out = File.open(jtplot_file, "w")
68
- lines = ['XYData', jtplot_base, "Classification Analysis", "Num Hits", "Precision"]
69
- lines.each {|l| out.puts l}
70
- end
71
-
72
- headings = files.collect do |file|
73
- %w(TP Precision).collect {|v| v + " (#{file_noext(file)})" }
74
- end
75
-
76
- all_arrs = []
77
- files.each_with_index do |file,i|
78
- sp = SpecID.new(file)
79
- headers = [file_noext(file)]
80
- arrs = sp.num_hits_and_ppv_for_prob(opt.p[i])
81
-
82
- if opt.a
83
- (num_hits, prec) = arrs
84
- roc = ROC.new
85
- prec_area = roc.area_under_curve(num_hits, prec)
86
- puts "#{file} (area under curve [num_hits, precision])"
87
- puts "Prec [#TPPrec = TP/(TP+FP)]:\t#{prec_area}"
88
- end
89
-
90
- all_arrs.push(*arrs)
91
-
92
- lns = []
93
- if opt.j
94
- xs = arrs.shift
95
- arrs.zip(headers).each do |ar|
96
- lns << ar[1] << xs.join(" ") << ar[0].join(" ")
97
- end
98
- lns.each do |line|
99
- out.puts line
100
- end
101
- end
102
- end
103
-
104
-
105
- unless opt.a
106
- puts headings.flatten.join(delimiter)
107
- SyncEnumerator.new(*all_arrs).each do |row|
108
- puts row.join(delimiter)
109
- end
110
- end
111
-
112
- out.close if opt.j
data/bin/id_precision.rb DELETED
@@ -1,172 +0,0 @@
1
- #!/usr/bin/ruby -w
2
-
3
- require 'spec_id'
4
- require 'roc'
5
- require 'generator'
6
- require 'optparse'
7
-
8
- ################################################
9
- $AREAS_ONLY = false
10
- ################################################
11
-
12
- opts = OptionParser.new do |op|
13
- op.banner = "usage: #{File.basename(__FILE__)} prefix bioworks.xml"
14
- op.separator ""
15
- op.separator "takes Bioworks 3.2 xml output files (with probabilities)"
16
- op.separator "rank orders the probabilities and outputs num hits and precision"
17
- op.separator "Also takes gzipped (xml.gz) files labeled as such"
18
- op.separator ""
19
- op.separator "Outputs a comma separated value to STDOUT (.csv)"
20
- op.separator ""
21
- op.separator "To capture:"
22
- op.separator " #{File.basename(__FILE__)} bioworks.xml > out.csv"
23
- op.on("-a", "--area", "outputs the area under the curve instead") do |v| $AREAS_ONLY = true end
24
- end
25
-
26
- opts.parse!
27
-
28
- if ARGV.size < 2
29
- puts opts
30
- exit
31
- end
32
-
33
- fp_prefix = ARGV[0]
34
- file = ARGV[1]
35
-
36
- obj = SpecID.new(file)
37
- re_prefix = /^#{Regexp.escape(fp_prefix)}/o
38
- prc = proc {|it| it.prots.first.reference =~ re_prefix }
39
- #(match, nomatch) = obj.classify(:peps, prc)
40
- obj.peps = obj.pep_prots
41
- (fp, tp) = obj.classify(:peps, prc)
42
-
43
-
44
- #puts fp.size.to_s
45
- #puts tp.size.to_s
46
- fp_obj = SpecID.new
47
- fp_obj.peps = fp
48
- tp_obj = SpecID.new
49
- tp_obj.peps = tp
50
-
51
- two_lists = [tp_obj, fp_obj].map do |obj|
52
- list = []
53
- list.push( obj.pep_probs_by_pep_prots )
54
-
55
- list.push( obj.pep_probs_by_bn_seq_charge )
56
- # These each have a by_min and a by_top10
57
- list.push(*( obj.pep_probs_by_bn_scan ) )
58
- list.push(*( obj.pep_probs_by_bn_scan_charge ) )
59
- list
60
- end
61
-
62
-
63
- headings = ["PepProts", "SeqCharge", "Scan(TopHit)", "Scan(Top10)", "ScanCharge(TopHit)", "ScanCharge(Top10)"]
64
- csv_headings = []
65
- headings.each do |head|
66
- csv_headings << head + ": NH"
67
- csv_headings << head + ": PR"
68
- end
69
-
70
- pairs = two_lists[0].zip two_lists[1]
71
-
72
- roc = DecoyROC.new
73
- x_y= []
74
- area_under_curve = []
75
- #start_x = []
76
- #end_x = []
77
- pairs.each do |pair|
78
- #x,y = roc.pred_and_tps_and_ppv(pair[0], pair[1])
79
- (num_hits, tps, ppv) = roc.pred_and_tps_and_ppv(pair[0], pair[1])
80
- x = num_hits
81
- y = ppv
82
- if $AREAS_ONLY
83
- x.unshift 0
84
- y.unshift 1.0
85
- area_under_curve << roc.area_under_curve(x,y)
86
- #start_x << x.first
87
- #end_x << x.last
88
- else
89
- x_y.push(x, y) # <- normal output
90
- end
91
- end
92
-
93
- if $AREAS_ONLY
94
- headings.unshift "Filename"
95
- puts headings.join(" ")
96
- area_under_curve.unshift file
97
- puts area_under_curve.join(" ")
98
- #puts start_x.join(" ")
99
- #puts end_x.join(" ")
100
- exit ### <-------------- ABORT HERE
101
- end
102
-
103
-
104
- # X axis is the number of peptides id# (i.e., # of peps in TP db)
105
- # Y axis is the precision = TP/(TP+FP)
106
-
107
- puts "# NH = number of hits"
108
- puts "# TP = true positives"
109
- puts "# FP = false positives"
110
- puts "# PR = precision = TP/(TP+FP)"
111
- puts csv_headings.join(",")
112
-
113
- SyncEnumerator.new(*x_y).each do |row|
114
- #items_as_string = row.collect do |item|
115
- # sprintf("%.18f", item)
116
- #end
117
-
118
- ## THIS IS THE NORMAL OUTPUT:
119
- puts row.join(", ")
120
-
121
-
122
- #puts items_as_string.join(", ")
123
- end
124
-
125
- =begin
126
-
127
- files = ARGV.to_a
128
-
129
- two_lists = files.collect do |file|
130
- obj = Bioworks.new(file)
131
- list = []
132
- list.push( obj.pep_probs_by_pep_prots )
133
- list.push( obj.pep_probs_by_seq_charge )
134
- # These each have a by_min and a by_top10
135
- list.push(*( obj.pep_probs_by_scan ) )
136
- list.push(*( obj.pep_probs_by_scan_charge ) )
137
- list
138
- end
139
-
140
-
141
- headings = ["PepProts: TP", "PepProts: PR", "SeqCharge: TP", "SeqCharge: PR",
142
- "Scan(TopHit): TP", "Scan(TopHit): PR", "Scan(Top10): TP", "Scan(Top10): PR",
143
- "ScanCharge(TopHit): TP", "ScanCharge(TopHit): PR",
144
- "ScanCharge(Top10): TP", "ScanCharge(Top10): PR"]
145
-
146
- pairs = two_lists[0].zip two_lists[1]
147
-
148
- roc = ROC.new
149
- x_y= []
150
- pairs.each do |pair|
151
- x,y = roc.tps_and_precision(pair[0], pair[1])
152
- x_y.push(x, y)
153
- end
154
-
155
- # X axis is the number of peptides id# (i.e., # of peps in TP db)
156
- # Y axis is the precision = TP/(TP+FP)
157
-
158
- puts "# TP = true positives"
159
- puts "# FP = false positives"
160
- puts "# PR = precision = TP/(TP+FP)"
161
- puts headings.join(",")
162
-
163
- SyncEnumerator.new(*x_y).each do |row|
164
- #items_as_string = row.collect do |item|
165
- # sprintf("%.18f", item)
166
- #end
167
- puts row.join(", ")
168
- #puts items_as_string.join(", ")
169
- end
170
-
171
- =end
172
-
data/bin/ms_to_lmat.rb DELETED
@@ -1,67 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- require 'ms/msrun'
4
- require 'optparse'
5
- require 'ostruct'
6
- require 'lmat'
7
-
8
-
9
- # defaults:
10
- opt = {}
11
- opt[:baseline] = 0.0
12
- opt[:newext] = ".lmat"
13
- opt[:inc_mz] = 1.0
14
-
15
- # get options:
16
- opts = OptionParser.new do |op|
17
- op.banner = "usage: #{File.basename(__FILE__)} [options] <msfile> ..."
18
- op.separator "input: .mzdata or .mzXML (versions 1.x and 2.x)"
19
- op.separator ""
20
- op.separator "(sums m/z values that round to the same bin)"
21
- op.separator ""
22
- op.on("--mz_inc N", Float, "m/z increment (def: 1.0)") {|n| opt[:mz_inc] = n.to_f}
23
- op.on("--mz_start N", Float, "m/z start (def: start of 1st full scan)") {|n| opt[:start_mz] = n.to_f}
24
- op.on("--mz_end N", Float, "m/z end (def: end of 1st full scan)") {|n| opt[:end_mz] = n.to_f}
25
- op.on("--baseline N", Float, "value for missing indices (def: #{opt[:baseline]})") {|n| opt[:baseline] = n.to_f}
26
- op.on("--ascii", "generates an lmata file instead") {opt[:ascii] = true}
27
- op.on("-v", "--verbose") {$VERBOSE = true}
28
- end
29
- opts.parse!
30
-
31
- if ARGV.size < 1
32
- puts opts
33
- end
34
-
35
- ARGV.each do |file|
36
- msrun = MS::MSRun.new(file)
37
- mslevel = 1
38
- (start_mz, end_mz) = msrun.start_and_end_mz(mslevel)
39
- (times, spectra) = msrun.times_and_spectra(mslevel)
40
- args = {
41
- :start_mz => start_mz,
42
- :end_mz => end_mz,
43
-
44
- :start_tm => times.first,
45
- :end_tm => times.last,
46
- :inc_tm => nil,
47
- }
48
- args.merge!(opt)
49
- lmat = LMat.new.from_times_and_spectra(times, spectra, args)
50
- ext = File.extname(file)
51
- outfile = file.sub(/#{Regexp.escape(ext)}$/, opt[:newext])
52
- if args[:ascii]
53
- outfile << "a"
54
- lmat.print(outfile)
55
- else
56
- lmat.write(outfile)
57
- end
58
- puts("OUTPUT: #{outfile}") if $VERBOSE
59
- end
60
-
61
-
62
-
63
-
64
-
65
-
66
-
67
-
@@ -1,16 +0,0 @@
1
- #!/usr/bin/ruby -w
2
-
3
- require 'spec_id/proph'
4
-
5
- if ARGV.size < 1
6
- puts "usage: #{File.basename(__FILE__)} <prob_cutoff> <pepproph.xml> ..."
7
- puts " For each file outputs 'pepproph_min<prob_cutoff>.xml'"
8
- puts " deleting all search_hits with peptides less than prob_cutoff"
9
- end
10
-
11
- files = ARGV.to_a
12
- cutoff = files.shift
13
- files.each do |file|
14
- outfile = file.gsub(/\.xml/, "_min#{cutoff}.xml")
15
- Proph::Pep::Parser.new.filter_by_min_pep_prob(file, outfile, cutoff.to_f)
16
- end
data/bin/prob_validate.rb DELETED
@@ -1,6 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- require 'spec_id/precision/prob'
4
-
5
- SpecID::Precision::Prob.new.precision_vs_num_hits_cmdline(ARGV)
6
-
@@ -1,6 +0,0 @@
1
- #!/usr/bin/ruby -w
2
-
3
- require 'spec_id/protein_summary'
4
-
5
- ProteinSummary.new.create_from_command_line_args(ARGV)
6
-
@@ -1,32 +0,0 @@
1
- #!/usr/bin/ruby -w
2
-
3
- require 'prot'
4
- require 'pep'
5
-
6
- if ARGV.size < 4
7
- usage = <<HERE
8
- usage: protxml2prots_peps.rb run-prot.xml prot_prob nsp_pep_prob init_pep_prob
9
- takes all proteins and peptides passing prob cutoffs and
10
- outputs 'run-prot.xml.<prot_prob>_<nsp_prob>_<init_prob>.protpep'
11
- which is a marshalled array of proteins (containing peptides)
12
- HERE
13
- puts usage
14
- exit(1);
15
- end
16
-
17
- file = ARGV[0]
18
- outfile = file + '.' + ARGV[1] +'_'+ ARGV[2] +'_'+ ARGV[3] + ".protpep"
19
-
20
- proteins = Protein.get_prots_and_peps_fast(*ARGV)
21
- #puts "proteins"
22
- #proteins.each do |pr|
23
- # puts pr
24
- # pr.peptides.each do |pep|
25
- # puts "\n\t" + pep.to_s
26
- # end
27
- #end
28
- #proteins = Protein.get_prots_and_peps(*ARGV)
29
- File.open(outfile, "w") do |f|
30
- Marshal.dump(proteins, f)
31
- end
32
-