mspire 0.4.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
data/lib/spec_id/srf.rb DELETED
@@ -1,973 +0,0 @@
1
- require 'fileutils'
2
-
3
- require 'spec_id'
4
- require 'spec_id/sequest'
5
- require 'fasta'
6
- require 'mspire'
7
- require 'set'
8
-
9
- require 'core_extensions'
10
-
11
- module BinaryReader
12
- Null_char = "\0"[0] ## TODO: change for ruby 1.9 or 2.0
13
- # extracts a string with all empty chars at the end stripped
14
- # expects the filehandle to be at the proper location
15
- def get_null_padded_string(fh,bytes)
16
- st = fh.read(bytes)
17
- # for empty declarations
18
- if st[0] == Null_char
19
- return ''
20
- end
21
- st.rstrip!
22
- st
23
- end
24
- end
25
-
26
- # class to extract information from <file>_dta.log files
27
-
28
- class SRFGroup
29
- include SpecID
30
-
31
- ## the srf objects themselves
32
- attr_accessor :srfs, :filenames
33
- ## also inherits :peps and :prots accessor
34
-
35
- # takes an array of filenames
36
- # or a single .srg filename
37
- # see from_srg to load a single .srg file
38
- # by default, the hits will be returned filtered by sequest params values.
39
- # [The raw SRF data is unfiltered!]
40
- def initialize(filenames=nil, filter_hits_by_params=true)
41
- @filenames = filenames
42
- @peps = []
43
- @prots = []
44
- @srfs = []
45
-
46
- # This is essentially duplicated in SQTGroup (should refactor eventually)
47
- global_ref_hash = {}
48
- if filenames
49
- if filenames.is_a?(String) && filenames =~ /\.srg$/
50
- srg_filename = filenames.dup
51
- @filename = srg_filename
52
- filenames = SRFGroup.srg_to_paths(filenames)
53
- filenames.each do |file|
54
- if !File.exist? file
55
- puts "File: #{file} in #{srg_filename} does not exist!"
56
- puts "Please modify #{srg_filename} to point to existing files."
57
- abort
58
- end
59
- end
60
- end
61
- filenames.each do |file|
62
- @srfs << SRF.new(file, @peps, global_ref_hash)
63
- end
64
- @prots = global_ref_hash.values
65
- if filter_hits_by_params
66
- filter_by_peptide_mass_tolerance
67
- end
68
- end
69
- end
70
-
71
- # reads a srg file and delivers the path names
72
- def self.srg_to_paths(file)
73
- IO.readlines(file).grep(/\w/).map {|v| v.chomp }
74
- end
75
-
76
-
77
- # if srfs were read in separately, then the proteins will need to be merged
78
- # by their reference
79
- def merge_different_sets(srfs)
80
- raise NotImplementedError, "need to implement?"
81
- end
82
-
83
- # 1. sets @prots and returns it: a new list of proteins based on which
84
- # peptides passed.
85
- # 2. updates the out_file's list of hits based on passing peptides (but not
86
- # the original hit id; rank is implicit in array ordering)
87
- # 3. updates each protein to only include peptides passing thresholds.
88
- # [Note, this process is how .out files are generated!]
89
- # 4. recalculates deltacn values completely if number of hits changed (does
90
- # not touch deltacn orig)
91
- # ASSUMES:
92
- # A. all srfs have identical params objects and each has a
93
- # peptide_mass_tolerance filter attribute.
94
- # B. proteins are already unique (peptides referencing the same protein
95
- # reference the same object already) In practice, this means all srfs were
96
- # read in together.
97
- def filter_by_peptide_mass_tolerance
98
- prots_in_set = Set.new
99
- params = @srfs.first.params
100
- pmt = params.peptide_mass_tolerance.to_f
101
- methd = nil # the method to
102
-
103
- case params.peptide_mass_units
104
- when '0'
105
- amu_based = true
106
- milli_amu = false
107
- when '1'
108
- amu_based = true
109
- milli_amu = true
110
- when '2'
111
- amu_based = false
112
- end
113
-
114
- @srfs.each do |srf|
115
- srf.filtered_by_precursor_mass_tolerance = true
116
- srf.out_files.each do |out_file|
117
- hits = out_file.hits
118
- before = hits.size
119
- hits.reject! do |pep|
120
- do_not_keep =
121
- if amu_based
122
- if milli_amu
123
- (pep.deltamass.abs > (pmt/1000))
124
- else
125
- (pep.deltamass.abs > pmt)
126
- end
127
- else
128
- (pep.ppm.abs > pmt)
129
- end
130
- unless do_not_keep
131
- pep.prots.each do |prot|
132
- if prots_in_set.include?(prot)
133
- prot.peps << pep
134
- else
135
- prots_in_set.add(prot)
136
- prot.peps = [pep]
137
- end
138
- end
139
- end
140
- do_not_keep
141
- end
142
- if hits.size != before
143
- SRF::OUT::Pep.update_deltacns_from_xcorr(hits)
144
- out_file.num_hits = hits.size
145
- end
146
- end
147
- end
148
- @prots = prots_in_set.to_a
149
-
150
- end
151
-
152
- # returns the filename used
153
- # if the file exists, the name will be expanded to full path, otherwise just
154
- # what is given
155
- def to_srg(srg_filename='bioworks.srg')
156
- File.open(srg_filename, 'w') do |v|
157
- @filenames.each do |srf_file|
158
- if File.exist? srf_file
159
- v.puts File.expand_path(srf_file)
160
- else
161
- v.puts srf_file
162
- end
163
- end
164
- end
165
- srg_filename
166
- end
167
- end
168
-
169
- class SRF
170
-
171
- # a string 3.5, 3.3 or 3.2
172
- attr_accessor :version
173
-
174
- attr_accessor :header
175
- attr_accessor :dta_files
176
- attr_accessor :out_files
177
- attr_accessor :params
178
- # a parallel array to dta_files and out_files where each entry is:
179
- # [first_scan, last_scan, charge]
180
- attr_accessor :index
181
- attr_accessor :base_name
182
- # this is the global peptides array
183
- attr_accessor :peps
184
- MASCOT_HYDROGEN_MASS = 1.007276
185
-
186
- attr_accessor :filtered_by_precursor_mass_tolerance
187
-
188
- # returns a Sequest::Params object
189
- def self.get_sequest_params(filename)
190
- # split the file in half and only read the second half (since we can be
191
- # confident that the params file will be there!)
192
- File.open(filename) do |handle|
193
- halfway = handle.stat.size / 2
194
- handle.seek halfway
195
- last_half = handle.read
196
- params_start_index = last_half.rindex('[SEQUEST]') + halfway
197
- handle.seek(params_start_index)
198
- Sequest::Params.new.parse_handle(handle)
199
- end
200
- end
201
-
202
- def dta_start_byte
203
- case @version
204
- when '3.2' ; 3260
205
- when '3.3' ; 3644
206
- when '3.5' ; 3644
207
- end
208
- end
209
-
210
- # peps and global_ref_hash are created as the srf files is read. If the
211
- # file is read as part of a group, then these should be passed in.
212
- # NOTE: if you want the hits filtered by precursor tolerance (the way they
213
- # might be displayed in .out files) you should probably use SRFGroup (which
214
- # does this by default)
215
- # SRF is meant to be a low level read of the file.
216
- def initialize(filename=nil, peps=[], global_ref_hash={})
217
- @dta_files = []
218
- @out_files = []
219
- if filename
220
- from_file(filename, peps, global_ref_hash)
221
- end
222
- end
223
-
224
- def round(float, decimal_places)
225
- sprintf("%.#{decimal_places}f", float)
226
- end
227
-
228
- # this mimicks the output of merge.pl from mascot
229
- # The only difference is that this does not include the "\r\n"
230
- # that is found after the peak lists, instead, it uses "\n" throughout the
231
- # file (thinking that this is preferable to mixing newline styles!)
232
- # note that Mass
233
- # if no filename is given, will use base_name + '.mgf'
234
- def to_mgf_file(filename=nil)
235
- filename =
236
- if filename ; filename
237
- else
238
- base_name + '.mgf'
239
- end
240
- h_plus = SpecID::MONO[:h_plus]
241
- File.open(filename, 'wb') do |out|
242
- dta_files.zip(index) do |dta, i_ar|
243
- chrg = dta.charge
244
- out.puts 'BEGIN IONS'
245
- out.puts "TITLE=#{[base_name, *i_ar].push('dta').join('.')}"
246
- out.puts "CHARGE=#{chrg}+"
247
- out.puts "PEPMASS=#{(dta.mh+((chrg-1)*h_plus))/chrg}"
248
- peak_ar = dta.peaks.unpack('e*')
249
- (0...(peak_ar.size)).step(2) do |i|
250
- out.puts( peak_ar[i,2].join(' ') )
251
- end
252
- out.puts ''
253
- out.puts 'END IONS'
254
- out.puts ''
255
- end
256
- end
257
- end
258
-
259
- # not given an out_folder, will make one with the basename
260
- # compress may be: :zip, :tgz, or nil (no compression)
261
- # :zip requires gem rubyzip to be installed and is *very* bloated
262
- # as it writes out all the files first!
263
- # :tgz requires gem archive-tar-minitar to be installed
264
- def to_dta_files(out_folder=nil, compress=nil)
265
- outdir =
266
- if out_folder ; out_folder
267
- else base_name
268
- end
269
-
270
- case compress
271
- when :tgz
272
- begin
273
- require 'archive/tar/minitar'
274
- rescue LoadError
275
- abort "need gem 'archive-tar-minitar' installed' for tgz compression!\n#{$!}"
276
- end
277
- require 'archive/targz' # my own simplified interface!
278
- require 'zlib'
279
- names = index.map do |i_ar|
280
- [outdir, '/', [base_name, *i_ar].join('.'), '.dta'].join('')
281
- end
282
- #Archive::Targz.archive_as_files(outdir + '.tgz', names, dta_file_data)
283
-
284
- tgz = Zlib::GzipWriter.new(File.open(outdir + '.tgz', 'wb'))
285
-
286
- Archive::Tar::Minitar::Output.open(tgz) do |outp|
287
- dta_files.each_with_index do |dta_file, i|
288
- Archive::Tar::Minitar.pack_as_file(names[i], dta_file.to_dta_file_data, outp)
289
- end
290
- end
291
- when :zip
292
- begin
293
- require 'zip/zipfilesystem'
294
- rescue LoadError
295
- abort "need gem 'rubyzip' installed' for zip compression!\n#{$!}"
296
- end
297
- #begin ; require 'zip/zipfilesystem' ; rescue LoadError, "need gem 'rubyzip' installed' for zip compression!\n#{$!}" ; end
298
- Zip::ZipFile.open(outdir + ".zip", Zip::ZipFile::CREATE) do |zfs|
299
- dta_files.zip(index) do |dta,i_ar|
300
- #zfs.mkdir(outdir)
301
- zfs.get_output_stream(outdir + '/' + [base_name, *i_ar].join('.') + '.dta') do |out|
302
- dta.write_dta_file(out)
303
- #zfs.commit
304
- end
305
- end
306
- end
307
- else # no compression
308
- FileUtils.mkpath(outdir)
309
- Dir.chdir(outdir) do
310
- dta_files.zip(index) do |dta,i_ar|
311
- File.open([base_name, *i_ar].join('.') << '.dta', 'wb') do |out|
312
- dta.write_dta_file(out)
313
- end
314
- end
315
- end
316
- end
317
- end
318
-
319
- # the out_filename will be the base_name + .sqt unless 'out_filename' is
320
- # defined
321
- # :round => round floating point numbers
322
- # etc...
323
- def to_sqt(out_filename=nil, opts={})
324
- tic_dp = 2
325
- mh_dp = 7
326
- xcorr_dp = 5
327
- sp_dp = 2
328
- dcn_dp = 5
329
-
330
- defaults = {:db_info=>false, :new_db_path=>nil, :update_db_path=>false, :round=>false}
331
- opt = defaults.merge(opts)
332
-
333
- outfile =
334
- if out_filename
335
- out_filename
336
- else
337
- base_name + '.sqt'
338
- end
339
- invariant_ordering = %w(SQTGenerator SQTGeneratorVersion Database FragmentMasses PrecursorMasses StartTime) # just for readability and consistency
340
- fmt =
341
- if params.fragment_mass_type == 'average' ; 'AVG'
342
- else ; 'MONO'
343
- end
344
- pmt =
345
- if params.precursor_mass_type == 'average' ; 'AVG'
346
- else ; 'MONO'
347
- end
348
-
349
- mass_table = params.mass_table
350
- static_mods = params.static_mods.map do |k,v|
351
- key = k.split(/_/)[1]
352
- if key.size == 1
353
- key + '=' + (mass_table[key.to_sym] + v.to_f).to_s
354
- else
355
- key + '=' + v
356
- end
357
- end
358
-
359
- dynamic_mods = []
360
- header.modifications.scan(/\((.*?)\)/) do |match|
361
- dynamic_mods << match.first.sub(/ /,'=')
362
- end
363
- plural = {
364
- 'StaticMod' => static_mods,
365
- 'DynamicMod' => dynamic_mods, # example as diff mod
366
- 'Comment' => ['Created from Bioworks .srf file']
367
- }
368
-
369
-
370
- db_filename = header.db_filename
371
- db_filename_in_sqt = db_filename
372
- if opt[:new_db_path]
373
- db_filename = File.join(opt[:new_db_path], File.basename(db_filename.gsub('\\', '/')))
374
- if opt[:update_db_path]
375
- db_filename_in_sqt = File.expand_path(db_filename)
376
- warn "writing Database #{db_filename} to sqt, but it does not exist on this file system" unless File.exist?(db_filename)
377
- end
378
- end
379
-
380
- apmu =
381
- case params.peptide_mass_units
382
- when '0' : 'amu'
383
- when '1' : 'mmu'
384
- when '2' : 'ppm'
385
- end
386
-
387
- hh = {
388
- 'SQTGenerator' => 'mspire',
389
- 'SQTGeneratorVersion' => Mspire::Version,
390
- 'Database' => db_filename_in_sqt,
391
- 'FragmentMasses' => fmt,
392
- 'PrecursorMasses' => pmt,
393
- 'StartTime' => '', # Bioworks 3.2 also leaves this blank...
394
- 'Alg-PreMassTol' => params.peptide_mass_tolerance,
395
- 'Alg-FragMassTol' => params.fragment_ion_tolerance,
396
- 'Alg-PreMassUnits' => apmu, ## mine
397
- 'Alg-IonSeries' => header.ion_series.split(':').last.lstrip,
398
- 'Alg-Enzyme' => header.enzyme.split(':').last,
399
- 'Alg-MSModel' => header.model,
400
- }
401
-
402
- if opt[:db_info]
403
- if File.exist?(db_filename)
404
- reply = get_db_info_for_sqt(db_filename)
405
- %w(DBSeqLength DBLocusCount DBMD5Sum).zip(reply) do |label,val|
406
- hh[label] = val
407
- end
408
- else
409
- warn "file #{db_filename} does not exist, no extra db info in header!"
410
- end
411
- end
412
-
413
- has_hits = (self.out_files.size > 0)
414
- if has_hits
415
- # somewhat redundant with above, but we can get this without a db present!
416
- hh['DBLocusCount'] = self.out_files.first.db_locus_count
417
- end
418
-
419
- File.open(outfile, 'w') do |out|
420
- # print the header:
421
- invariant_ordering.each do |iv|
422
- out.puts ['H', iv, hh.delete(iv)].join("\t")
423
- end
424
- hh.each do |k,v|
425
- out.puts ['H', k, v].join("\t")
426
- end
427
- plural.each do |k,vals|
428
- vals.each do |val|
429
- out.puts ['H', k, val].join("\t")
430
- end
431
- end
432
-
433
- ##### SPECTRA
434
- time_to_process = '0.0'
435
- #########################################
436
- # NEED TO FIGURE OUT: (in spectra guy)
437
- # * Lowest Sp value for top 500 spectra
438
- # * Number of sequences matching this precursor ion
439
- #########################################
440
-
441
- manual_validation_status = 'U'
442
- self.out_files.zip(dta_files) do |out_file, dta_file|
443
- # don't have the time to process (using 0.0 like bioworks 3.2)
444
- dta_file_mh = dta_file.mh
445
- out_file_total_inten = out_file.total_inten
446
- out_file_lowest_sp = out_file.lowest_sp
447
- if opt[:round]
448
- dta_file_mh = round(dta_file_mh, mh_dp)
449
- out_file_total_inten = round(out_file_total_inten, tic_dp)
450
- out_file_lowest_sp = round(out_file_lowest_sp, sp_dp)
451
- end
452
-
453
- out.puts ['S', out_file.first_scan, out_file.last_scan, out_file.charge, time_to_process, out_file.computer, dta_file_mh, out_file_total_inten, out_file_lowest_sp, out_file.num_matched_peptides].join("\t")
454
- out_file.hits.each_with_index do |hit,index|
455
- hit_mh = hit.mh
456
- hit_deltacn_orig_updated = hit.deltacn_orig_updated
457
- hit_xcorr = hit.xcorr
458
- hit_sp = hit.sp
459
- if opt[:round]
460
- hit_mh = round(hit_mh, mh_dp)
461
- hit_deltacn_orig_updated = round(hit_deltacn_orig_updated, dcn_dp)
462
- hit_xcorr = round(hit_xcorr, xcorr_dp)
463
- hit_sp = round(hit_sp, sp_dp)
464
- end
465
- # note that the rank is determined by the order..
466
- out.puts ['M', index+1, hit.rsp, hit_mh, hit_deltacn_orig_updated, hit_xcorr, hit_sp, hit.ions_matched, hit.ions_total, hit.sequence, manual_validation_status].join("\t")
467
- hit.prots.each do |prot|
468
- out.puts ['L', prot.first_entry].join("\t")
469
- end
470
- end
471
- end
472
- end # close the filehandle
473
-
474
- end
475
-
476
- # assumes the file exists and is readable
477
- # returns [DBSeqLength, DBLocusCount, DBMD5Sum] or nil if no file
478
- def get_db_info_for_sqt(dbfile)
479
- fasta = Fasta.new(dbfile)
480
- [fasta.aa_seq_length, fasta.size, fasta.md5_sum]
481
- end
482
-
483
-
484
- # returns self
485
- def from_file(filename, peps, global_ref_hash)
486
- dups = SRF.get_sequest_params(filename).print_duplicate_references
487
- if dups == '0'
488
- raise RuntimeError, <<END
489
-
490
- ***************************************************************************
491
- Sorry, but the SRF reader cannot read this file!
492
- .srf files must currently be created with print_duplicate_references > 0
493
- (This is how the srf object can link peptides with proteins!)
494
- To capture all duplicate references, set the sequest parameter
495
- 'print_duplicate_references' to 100 or greater.
496
- ***************************************************************************
497
- END
498
- end
499
-
500
- File.open(filename, "rb") do |fh|
501
- @header = SRF::Header.new.from_handle(fh)
502
- @version = @header.version
503
-
504
- unpack_35 = case @version
505
- when '3.2'
506
- false
507
- when '3.3'
508
- false
509
- when '3.5'
510
- true
511
- end
512
- @dta_files, measured_mhs = read_dta_files(fh,@header.num_dta_files, unpack_35)
513
-
514
- @out_files = read_out_files(fh,@header.num_dta_files, global_ref_hash, measured_mhs, unpack_35)
515
- if fh.eof?
516
- warn "FILE: '#{filename}' appears to be an abortive run (no params in srf file)\nstill continuing..."
517
- @params = nil
518
- @index = []
519
- else
520
- @params = Sequest::Params.new.parse_handle(fh)
521
- # This is very sensitive to the grab_params method in sequest params
522
- fh.read(12) ## gap between last params entry and index
523
- @index = read_scan_index(fh,@header.num_dta_files)
524
- end
525
- end
526
-
527
- ### UPDATE SOME THINGS ON SINGLE PASS:
528
- @base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
529
- # give each hit a base_name, first_scan, last_scan
530
- @index.each_with_index do |ind,i|
531
- mass_measured = @dta_files[i][0]
532
- #puts @out_files[i].join(", ")
533
- @out_files[i][0,3] = *ind
534
- pep_hits = @out_files[i][6]
535
- peps.push( *pep_hits )
536
- pep_hits.each do |pep_hit|
537
- pep_hit[14,4] = @base_name, *ind
538
- # add the deltamass
539
- pep_hit[11] = pep_hit[0] - mass_measured # real - measured (deltamass)
540
- pep_hit[12] = 1.0e6 * pep_hit[11].abs / mass_measured ## ppm
541
- pep_hit[18] = self ## link with the srf object
542
- end
543
- end
544
- self
545
- end
546
-
547
- # returns an index where each entry is [first_scan, last_scan, charge]
548
- def read_scan_index(fh, num)
549
- ind_len = 24
550
- index = Array.new(num)
551
- unpack_string = 'III'
552
- st = ''
553
- ind_len.times do st << '0' end ## create a 24 byte string to receive data
554
- num.times do |i|
555
- fh.read(ind_len, st)
556
- index[i] = st.unpack(unpack_string)
557
- end
558
- index
559
- end
560
-
561
- # returns an array of dta_files
562
- def read_dta_files(fh, num_files, unpack_35)
563
- measured_mhs = Array.new(num_files) ## A parallel array to capture the actual mh
564
- dta_files = Array.new(num_files)
565
- start = dta_start_byte
566
- unless fh.pos == start
567
- fh.pos = start
568
- end
569
-
570
- header.num_dta_files.times do |i|
571
- dta_file = SRF::DTA.new.from_handle(fh, unpack_35)
572
- measured_mhs[i] = dta_file[0]
573
- dta_files[i] = dta_file
574
- end
575
- [dta_files, measured_mhs]
576
- end
577
-
578
- # filehandle (fh) must be at the start of the outfiles. 'read_dta_files'
579
- # will put the fh there.
580
- def read_out_files(fh,number_files, global_ref_hash, measured_mhs, unpack_35)
581
- out_files = Array.new(number_files)
582
- header.num_dta_files.times do |i|
583
- out_files[i] = SRF::OUT.new.from_handle(fh, global_ref_hash, unpack_35)
584
- end
585
- out_files
586
- end
587
-
588
- end
589
-
590
- class SRF::Header
591
- include BinaryReader
592
-
593
- Start_byte = {
594
- :enzyme => 438,
595
- :ion_series => 694,
596
- :model => 950,
597
- :modifications => 982,
598
- :raw_filename => 1822,
599
- :db_filename => 2082,
600
- :dta_log_filename => 2602,
601
- :params_filename => 3122,
602
- :sequest_log_filename => 3382,
603
- }
604
- Byte_length = {
605
- :enzyme => 256,
606
- :ion_series => 256,
607
- :model => 32,
608
- :modifications => 840,
609
- :raw_filename => 260,
610
- :db_filename => 520,
611
- :dta_log_filename => 520,
612
- :params_filename => 260,
613
- :sequest_log_filename => 262, ## is this really 262?? or should be 260??
614
- }
615
- Byte_length_v32 = {
616
- :modifications => 456,
617
- }
618
-
619
- # a SRF::DTAGen object
620
- attr_accessor :version
621
- attr_accessor :dta_gen
622
- attr_accessor :enzyme
623
- attr_accessor :ion_series
624
- attr_accessor :model
625
- attr_accessor :modifications
626
- attr_accessor :raw_filename
627
- attr_accessor :db_filename
628
- attr_accessor :dta_log_filename
629
- attr_accessor :params_filename
630
- attr_accessor :sequest_log_filename
631
-
632
- def num_dta_files
633
- @dta_gen.num_dta_files
634
- end
635
-
636
- # sets fh to 0 and grabs the information it wants
637
- def from_handle(fh)
638
- st = fh.read(4)
639
- @version = '3.' + st.unpack('I').first.to_s
640
- @dta_gen = SRF::DTAGen.new.from_handle(fh)
641
-
642
- ## get the rest of the info
643
- byte_length = Byte_length.dup
644
- byte_length.merge! Byte_length_v32 if @version == '3.2'
645
-
646
- fh.pos = Start_byte[:enzyme]
647
- [:enzyme, :ion_series, :model, :modifications, :raw_filename, :db_filename, :dta_log_filename, :params_filename, :sequest_log_filename].each do |param|
648
- send("#{param}=".to_sym, get_null_padded_string(fh, byte_length[param]) )
649
- end
650
- self
651
- end
652
-
653
- end
654
-
655
- # the DTA Generation Params
656
- class SRF::DTAGen
657
-
658
- ## not sure if this is correct
659
- # Float
660
- attr_accessor :start_time
661
- # Float
662
- attr_accessor :start_mass
663
- # Float
664
- attr_accessor :end_mass
665
- # Integer
666
- attr_accessor :num_dta_files
667
- # Integer
668
- attr_accessor :group_scan
669
- ## not sure if this is correct
670
- # Integer
671
- attr_accessor :min_group_count
672
- # Integer
673
- attr_accessor :min_ion_threshold
674
- #attr_accessor :intensity_threshold # can't find yet
675
- #attr_accessor :precursor_tolerance # can't find yet
676
- # Integer
677
- attr_accessor :start_scan
678
- # Integer
679
- attr_accessor :end_scan
680
-
681
- #
682
- def from_handle(fh)
683
- fh.pos = 0 if fh.pos != 0
684
- st = fh.read(148)
685
- (@start_time, @start_mass, @end_mass, @num_dta_files, @group_scan, @min_group_count, @min_ion_threshold, @start_scan, @end_scan) = st.unpack('x36ex12ex4ex48Ix12IIIII')
686
- self
687
- end
688
- end
689
-
690
- # total_num_possible_charge_states is not correct under 3.5 (Bioworks 3.3.1)
691
- # unknown is, well unknown...
692
- SRF::DTA = Arrayclass.new(%w(mh dta_tic num_peaks charge ms_level unknown total_num_possible_charge_states peaks))
693
-
694
- class SRF::DTA
695
- # original
696
- # Unpack = "EeIvvvv"
697
- Unpack_32 = "EeIvvvv"
698
- Unpack_35 = "Ex8eVx2vvvv"
699
-
700
- # note on peaks (self[7])
701
- # this is a byte array of floats, you can get the peaks out with
702
- # unpack("e*")
703
-
704
- undef_method :inspect
705
- def inspect
706
- peaks_st = 'nil'
707
- if self[7] ; peaks_st = "[#{self[7].size} bytes]" end
708
- "<SRF::DTA @mh=#{mh} @dta_tic=#{dta_tic} @num_peaks=#{num_peaks} @charge=#{charge} @ms_level=#{ms_level} @total_num_possible_charge_states=#{total_num_possible_charge_states} @peaks=#{peaks_st} >"
709
- end
710
-
711
- def from_handle(fh, unpack_35)
712
- if unpack_35
713
- @unpack = Unpack_35
714
- @read_header = 34
715
- @read_spacer = 22
716
- else
717
- @unpack = Unpack_32
718
- @read_header = 24
719
- @read_spacer = 24
720
- end
721
-
722
- st = fh.read(@read_header)
723
- # get the bulk of the data in single unpack
724
- self[0,7] = st.unpack(@unpack)
725
-
726
- # Scan numbers are given at the end in an index!
727
- st2 = fh.read(@read_spacer)
728
-
729
- num_bytes_to_read = num_peaks * 8
730
- st3 = fh.read(num_bytes_to_read)
731
- self[7] = st3
732
- self
733
- end
734
-
735
- def to_dta_file_data
736
- string = "#{mh.round_to(6)} #{charge}\r\n"
737
- peak_ar = peaks.unpack('e*')
738
- (0...(peak_ar.size)).step(2) do |i|
739
- # %d is equivalent to floor, so we round by adding 0.5!
740
- string << "#{peak_ar[i].round_to(4)} #{(peak_ar[i+1] + 0.5).floor}\r\n"
741
- #string << peak_ar[i,2].join(' ') << "\r\n"
742
- end
743
- string
744
- end
745
-
746
- # write a class dta file to the io object
747
- def write_dta_file(io)
748
- io.print to_dta_file_data
749
- end
750
-
751
- end
752
-
753
- SRF::OUT = Arrayclass.new( %w(first_scan last_scan charge num_hits computer date_time hits total_inten lowest_sp num_matched_peptides db_locus_count) )
754
- # 0=first_scan, 1=last_scan, 2=charge, 3=num_hits, 4=computer, 5=date_time, 6=hits, 7=total_inten, 8=lowest_sp, 9=num_matched_peptides, 10=db_locus_count
755
-
756
- class SRF::OUT
757
- Unpack_32 = '@36vx2Z*@60Z*'
758
- Unpack_35 = '@36vx4Z*@62Z*'
759
-
760
- undef_method :inspect
761
- def inspect
762
- hits_s =
763
- if self[6]
764
- ", @hits(#)=#{hits.size}"
765
- else
766
- ''
767
- end
768
- "<SRF::OUT first_scan=#{first_scan}, last_scan=#{last_scan}, charge=#{charge}, num_hits=#{num_hits}, computer=#{computer}, date_time=#{date_time}#{hits_s}>"
769
- end
770
-
771
- def from_handle(fh, global_ref_hash, unpack_35)
772
- ## EMPTY out file is 96 bytes
773
- ## each hit is 320 bytes
774
- ## num_hits and charge:
775
- st = fh.read(96)
776
-
777
- self[3,3] = st.unpack( (unpack_35 ? Unpack_35 : Unpack_32) )
778
- self[7,4] = st.unpack('@8eex4Ix4I')
779
- num_hits = self[3]
780
-
781
- ar = Array.new(num_hits)
782
- if ar.size > 0
783
- num_extra_references = 0
784
- num_hits.times do |i|
785
- ar[i] = SRF::OUT::Pep.new.from_handle(fh, global_ref_hash, unpack_35)
786
- num_extra_references += ar[i].num_other_loci
787
- end
788
- SRF::OUT::Pep.read_extra_references(fh, num_extra_references, ar, global_ref_hash)
789
- ## The xcorrs are already ordered by best to worst hit
790
- ## ADJUST the deltacn's to be meaningful for the top hit:
791
- ## (the same as bioworks and prophet)
792
- SRF::OUT::Pep.set_deltacn_from_deltacn_orig(ar)
793
- #puts ar.map {|a| a.deltacn }.join(", ")
794
- end
795
- self[6] = ar
796
- self
797
- end
798
-
799
-
800
-
801
- end
802
-
803
-
804
- # deltacn_orig - the one that sequest originally reports (top hit gets 0.0)
805
- # deltacn - modified to be that of the next best hit (by xcorr) and the last
806
- # hit takes 1.1. This is what is called deltacn by bioworks and pepprophet
807
- # (at least for the first few years). If filtering occurs, it will be
808
- # updated.
809
- # deltacn_orig_updated - the latest updated value of deltacn.
810
- # Originally, this will be equal to deltacn_orig. After filtering, this will
811
- # be recalculated. To know if this will be different from deltacn_orig, query
812
- # match.srf.filtered_by_precursor_mass_tolerance. If this is changed, then
813
- # deltacn should also be changed to reflect it.
814
- # mh - the theoretical mass + h
815
- # prots are created as SRF prot objects with a reference and linked to their
816
- # peptides (from global hash by reference)
817
- # ppm = 10^6 * ∆m_accuracy / mass_measured [ where ∆m_accuracy = mass_real – mass_measured ]
818
- # This is calculated for the M+H mass!
819
- # num_other_loci is the number of other loci that the peptide matches beyond
820
- # the first one listed
821
- # srf = the srf object this scan came from
822
-
823
- SRF::OUT::Pep = Arrayclass.new(%w( mh deltacn_orig sp xcorr id num_other_loci rsp ions_matched ions_total sequence prots deltamass ppm aaseq base_name first_scan last_scan charge srf deltacn deltacn_orig_updated) )
824
-
825
- # 0=mh 1=deltacn_orig 2=sp 3=xcorr 4=id 5=num_other_loci 6=rsp 7=ions_matched 8=ions_total 9=sequence 10=prots 11=deltamass 12=ppm 13=aaseq 14=base_name 15=first_scan 16=last_scan 17=charge 18=srf 19=deltacn 20=deltacn_orig_updated
826
-
827
- class SRF::OUT::Pep
828
- include SpecID::Pep
829
-
830
- # creates the deltacn that is meaningful for the top hit (the deltacn_orig
831
- # or the second best hit and so on).
832
- # assumes sorted
833
- def self.set_deltacn_from_deltacn_orig(ar)
834
- (1...ar.size).each {|i| ar[i-1].deltacn = ar[i].deltacn_orig }
835
- ar[-1].deltacn = 1.1
836
- end
837
-
838
- # (assumes sorted)
839
- # recalculates deltacn from xcorrs and sets deltacn_orig_updated and deltacn
840
- def self.update_deltacns_from_xcorr(ar)
841
- if ar.size > 0
842
- top_score = ar.first[3]
843
- other_scores = (1...(ar.size)).to_a.map do |i|
844
- 1.0 - (ar[i][3]/top_score)
845
- end
846
- ar.first[20] = 0.0
847
- (0...(ar.size-1)).each do |i|
848
- ar[i][19] = other_scores[i] # deltacn
849
- ar[i+1][20] = other_scores[i] # deltacn_orig_updated
850
- end
851
- ar.last[19] = 1.1
852
- end
853
- end
854
-
855
- def self.read_extra_references(fh, num_extra_references, pep_hits, global_ref_hash)
856
- num_extra_references.times do
857
- # 80 bytes total (with index number)
858
- pep = pep_hits[fh.read(8).unpack('x4I').first - 1]
859
-
860
- ref = fh.read(80).unpack('A*').first
861
- pep[10] << pep.new_protein(ref[0,38], pep, global_ref_hash)
862
- end
863
- # fh.read(6) if unpack_35
864
- end
865
-
866
- # x2=???
867
- #Unpack_35 = '@64Ex8ex12eeIx22vx2vvx8Z*@246Z*'
868
- ### NOTE:
869
- # I need to verify that this is correct (I mean the 'I' after x18)
870
- Unpack_35 = '@64Ex8ex12eeIx18Ivx2vvx8Z*@246Z*'
871
- # translation: @64=(64 bytes in to the record), E=mH, x8=8unknown bytes, e=deltacn,
872
- # x12=12unknown bytes, e=sp, e=xcorr, I=ID#, x18=18 unknown bytes, v=rsp,
873
- # v=ions_matched, v=ions_total, x8=8unknown bytes, Z*=sequence, 240Z*=at
874
- # byte 240 grab the string (which is proteins).
875
- #Unpack_32 = '@64Ex8ex12eeIx18vvvx8Z*@240Z*'
876
- Unpack_32 = '@64Ex8ex12eeIx14Ivvvx8Z*@240Z*'
877
- Unpack_four_null_bytes = 'a*'
878
- Unpack_Zstar = 'Z*'
879
- Read_35 = 426
880
- Read_32 = 320
881
-
882
- FourNullBytes_as_string = "\0\0\0\0"
883
- #NewRecordStart = "\0\0" + 0x3a.chr + 0x1a.chr + "\0\0"
884
- NewRecordStart = 0x01.chr + 0x00.chr
885
- Sequest_record_start = "[SEQUEST]"
886
-
887
- undef_method :inspect
888
- def inspect
889
- st = %w(aaseq sequence mh deltacn_orig sp xcorr id rsp ions_matched ions_total prots deltamass ppm base_name first_scan last_scan charge deltacn).map do |v|
890
- if v == 'prots'
891
- "#{v}(#)=#{send(v.to_sym).size}"
892
- elsif v.is_a? Array
893
- "##{v}=#{send(v.to_sym).size}"
894
- else
895
- "#{v}=#{send(v.to_sym).inspect}"
896
- end
897
- end
898
- st.unshift("<#{self.class}")
899
- if srf
900
- st.push("srf(base_name)=#{srf.base_name.inspect}")
901
- end
902
- st.push('>')
903
- st.join(' ')
904
- #"<SRF::OUT::Pep @mh=#{mh}, @deltacn=#{deltacn}, @sp=#{sp}, @xcorr=#{xcorr}, @id=#{id}, @rsp=#{rsp}, @ions_matched=#{ions_matched}, @ions_total=#{ions_total}, @sequence=#{sequence}, @prots(count)=#{prots.size}, @deltamass=#{deltamass}, @ppm=#{ppm} @aaseq=#{aaseq}, @base_name=#{base_name}, @first_scan=#{first_scan}, @last_scan=#{last_scan}, @charge=#{charge}, @srf(base_name)=#{srf.base_name}>"
905
- end
906
- # extra_references_array is an array that grows with peptides as extra
907
- # references are discovered.
908
- def from_handle(fh, global_ref_hash, unpack_35)
909
- unpack =
910
- if unpack_35 ; Unpack_35
911
- else ; Unpack_32
912
- end
913
-
914
- ## get the first part of the info
915
- st = fh.read(( unpack_35 ? Read_35 : Read_32) ) ## read all the hit data
916
-
917
- self[0,10] = st.unpack(unpack)
918
-
919
- # set deltacn_orig_updated
920
- self[20] = self[1]
921
-
922
- # we are slicing the reference to 38 chars to be the same length as
923
- # duplicate references
924
- self[10] = [new_protein(self[10][0,38], self, global_ref_hash)]
925
-
926
- self[13] = SpecID::Pep.sequence_to_aaseq(self[9])
927
-
928
- fh.read(6) if unpack_35
929
-
930
- self
931
- end
932
-
933
- def new_protein(reference, peptide, global_ref_hash)
934
- if global_ref_hash.key? reference
935
- global_ref_hash[reference].peps << peptide
936
- else
937
- global_ref_hash[reference] = SRF::OUT::Prot.new(reference, [peptide])
938
- end
939
- global_ref_hash[reference]
940
- end
941
-
942
- end
943
-
944
- SRF::OUT::Prot = Arrayclass.new( %w(reference peps) )
945
-
946
- class SRF::OUT::Prot
947
- include SpecID::Prot
948
- # we shouldn't have to do this because this is inlcuded in SpecID::Prot, but
949
- # under some circumstances it won't work without explicitly calling it.
950
- include ProteinReferenceable
951
-
952
- tmp = $VERBOSE ; $VERBOSE = nil
953
- def initialize(reference=nil, peps=[])
954
- #super(@@arr_size)
955
- super(self.class.size)
956
- #@reference = reference
957
- #@peps = peps
958
- self[0,2] = reference, peps
959
- end
960
- $VERBOSE = tmp
961
-
962
- # "<SRF::OUT::Prot reference=\"#{@reference}\">"
963
-
964
- undef_method :inspect
965
- def inspect
966
- "<SRF::OUT::Prot @reference=#{reference}, @peps(#)=#{peps.size}>"
967
- end
968
- end
969
-
970
-
971
-
972
-
973
-