mspire 0.4.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,33 +0,0 @@
1
- require 'spec_id/sequest/params'
2
- require 'hash_by'
3
- require 'sort_by_attributes.rb'
4
-
5
- module Sequest
6
-
7
- # returns one array of peptide hits: indexes hits based on index_by, takes
8
- # the uniq ones and then sorts the group by sort_by (compatible with
9
- # sort_by_attributes) then slices from first_index to last_index
10
- # (inclusive).
11
- def self.other_hits(peps, first_index=1, last_index=9, index_by=[:base_name, :first_scan, :charge], sort_by=[:xcorr, {:down => :xcorr}])
12
- all_hits = []
13
- peps.hash_by(*index_by).each do |scan_key, peps_per_scan|
14
- if peps_per_scan.size >= (first_index + 1)
15
- all_hits.push( *(peps_per_scan.uniq.sort_by_attributes(*sort_by)[first_index..last_index]) )
16
- end
17
- end
18
- all_hits.compact
19
- end
20
-
21
- def self.other_hits_sorted_by_xcorr(peps, first_index, last_index, index_by=[:base_name, :first_scan, :charge])
22
- all_hits = []
23
- peps.hash_by(*index_by).each do |scan_key, peps_per_scan|
24
- if peps_per_scan.size >= (first_index + 1)
25
- all_hits.push( *(peps_per_scan.uniq.sort_by {|x| x.xcorr }.reverse[first_index..last_index]) )
26
- end
27
- end
28
- all_hits.compact
29
-
30
- end
31
-
32
- end
33
-
data/lib/spec_id/sqt.rb DELETED
@@ -1,349 +0,0 @@
1
- require 'spec_id'
2
- require 'arrayclass'
3
- require 'set'
4
-
5
- class SQTGroup
6
- include SpecID # inherits prots and peps accessors
7
-
8
- attr_accessor :sqts, :filenames
9
-
10
- # if filenames is a String, then it should be a filename to a file ending in
11
- # '.sqg' (meta text file with list of .sqt files) else it should be an array
12
- # of sqt filenames
13
- def initialize(filenames=nil)
14
- @filenames = filenames
15
- @prots = []
16
- @peps = []
17
- @sqts = []
18
-
19
- global_ref_hash = {}
20
- ## This is duplicated in SRFGroup (should refactor eventually)
21
- if filenames
22
- if filenames.is_a?(String) && filenames =~ /\.sqg$/
23
- srg_filename = filenames.dup
24
- @filename = srg_filename
25
- @filenames = IO.readlines(filenames).grep(/\w/).map {|v| v.chomp }
26
- @filenames.each do |file|
27
- if !File.exist? file
28
- puts "File: #{file} in #{srg_filename} does not exist!"
29
- puts "Please modify #{srg_filename} to point to existing files."
30
- abort
31
- end
32
- end
33
- end
34
- @filenames.each do |file|
35
- @sqts << SQT.new(file, @peps, global_ref_hash)
36
- end
37
-
38
- @prots = global_ref_hash.values
39
- end
40
- end
41
-
42
- # NOTE THAT this is copy/paste from srf.rb, should be refactored...
43
- # returns the filename used
44
- # if the file exists, the name will be expanded to full path, otherwise just
45
- # what is given
46
- def to_sqg(sqg_filename='bioworks.sqg')
47
- File.open(sqg_filename, 'w') do |v|
48
- @filenames.each do |sqt_file|
49
- if File.exist? sqt_file
50
- v.puts File.expand_path(sqt_file)
51
- else
52
- v.puts sqt_file
53
- end
54
- end
55
- end
56
- sqg_filename
57
- end
58
-
59
- end
60
-
61
- class SQT
62
- PercolatorHeaderMatch = /^Percolator v/
63
- Delimiter = "\t"
64
- attr_accessor :header
65
- attr_accessor :spectra
66
- attr_accessor :base_name
67
- # boolean
68
- attr_accessor :percolator_results
69
-
70
- def initialize(filename=nil, peps=[], global_ref_hash={})
71
- if filename
72
- from_file(filename, peps, global_ref_hash)
73
- end
74
- end
75
-
76
- # if the file contains the header key '/$Percolator v/' then the results
77
- # will be interpreted as percolator results
78
- def from_file(filename, peps=[], global_ref_hash={}, percolator_results=false)
79
- @percolator_results = percolator_results
80
- @base_name = File.basename( filename.gsub('\\','/') ).sub(/\.\w+$/, '')
81
- File.open(filename) do |fh|
82
- @header = SQT::Header.new.from_handle(fh)
83
- if @header.keys.any? {|v| v =~ PercolatorHeaderMatch }
84
- @percolator_results = true
85
- end
86
- @spectra = SQT::Spectrum.spectra_from_handle(fh, @base_name, peps, global_ref_hash, @percolator_results)
87
- end
88
- end
89
-
90
- end
91
-
92
- # Inherits from hash, so all header stuff can be accessed by key. Multiline
93
- # values will be pushed into an array.
94
- # All header values are stored as (newline-removed) strings!
95
- class SQT::Header < Hash
96
- Leader = 'H'
97
-
98
- # These will be in arrays no matter what: StaticMod, DynamicMod, Comment
99
- # Any other keys repeated will be shoved into an array; otherwise a string
100
- Arrayed = %w(DyanmicMod StaticMod Comment).to_set
101
-
102
- HeaderKeys = {
103
- :sqt_generator => 'SQTGenerator',
104
- :sqt_generator_version => 'SQTGeneratorVersion',
105
- :database => 'Database',
106
- :fragment_masses => 'FragmentMasses',
107
- :precursor_masses => 'PrecursorMasses',
108
- :start_time => 'StartTime',
109
- :db_seq_length => 'DBSeqLength',
110
- :db_locus_count => 'DBLocusCount',
111
- :db_md5sum => 'DBMD5Sum',
112
- :peptide_mass_tolerance => 'Alg-PreMassTol',
113
- :fragment_ion_tolerance => 'Alg-FragMassTol',
114
- # nonstandard (mine)
115
- :peptide_mass_units => 'Alg-PreMassUnits',
116
- :ion_series => 'Alg-IonSeries',
117
- :enzyme => 'Alg-Enzyme',
118
- # nonstandard (mine)
119
- :ms_model => 'Alg-MSModel',
120
- :static_mods => 'StaticMod',
121
- :dynamic_mods => 'DynamicMod',
122
- :comments => 'Comment'
123
- }
124
-
125
-
126
- KeysToAtts = HeaderKeys.invert
127
-
128
- HeaderKeys.keys.each do |ky|
129
- attr_accessor ky
130
- end
131
-
132
- def from_handle(fh)
133
- Arrayed.each do |ky|
134
- self[ky] = []
135
- end
136
- pos = fh.pos
137
- lines = []
138
- loop do
139
- line = fh.gets
140
- if line && (line[0,1] == SQT::Header::Leader )
141
- lines << line
142
- else # reset the fh.pos and we're done
143
- fh.pos = pos
144
- break
145
- end
146
- pos = fh.pos
147
- end
148
- from_lines(lines)
149
- end
150
-
151
- def from_lines(array_of_header_lines)
152
- array_of_header_lines.each do |line|
153
- line.chomp!
154
- (ky, *rest) = line.split(SQT::Delimiter)[1..-1]
155
- # just in case they have any tabs in their field
156
- value = rest.join(SQT::Delimiter)
157
- if Arrayed.include?(ky)
158
- self[ky] << value
159
- elsif self.key? ky # already exists
160
- if self[ky].is_a? Array
161
- self[ky] << value
162
- else
163
- self[ky] = [self[ky], value]
164
- end
165
- else # normal
166
- self[ky] = value
167
- end
168
- end
169
- KeysToAtts.each do |ky,methd|
170
- self.send("#{methd}=".to_sym, self[ky])
171
- end
172
- self
173
- end
174
-
175
- end
176
-
177
- # all are cast as expected (total_intensity is a float)
178
- # mh = observed mh
179
- SQT::Spectrum = Arrayclass.new(%w[first_scan last_scan charge time_to_process node mh total_intensity lowest_sp num_matched_peptides matches])
180
-
181
- # 0=first_scan 1=last_scan 2=charge 3=time_to_process 4=node 5=mh 6=total_intensity 7=lowest_sp 8=num_matched_peptides 9=matches
182
-
183
- class SQT::Spectrum
184
- Leader = 'S'
185
-
186
- # assumes the first line starts with an 'S'
187
- def self.spectra_from_handle(fh, base_name, peps=[], global_ref_hash={}, percolator_results=false)
188
- spectra = []
189
-
190
- while line = fh.gets
191
- case line[0,1]
192
- when SQT::Spectrum::Leader
193
- spectrum = SQT::Spectrum.new.from_line( line )
194
- spectra << spectrum
195
- matches = []
196
- spectrum.matches = matches
197
- when SQT::Match::Leader
198
- match_klass = if percolator_results
199
- SQT::Match::Percolator
200
- else
201
- SQT::Match
202
- end
203
- match = match_klass.new.from_line( line )
204
- match[10,3] = spectrum[0,3]
205
- match[15] = base_name
206
- matches << match
207
- peps << match
208
- loci = []
209
- match.loci = loci
210
- matches << match
211
- when SQT::Locus::Leader
212
- line.chomp!
213
- key = line.split(SQT::Delimiter)[1]
214
- locus =
215
- if global_ref_hash.key?(key)
216
- global_ref_hash[key]
217
- else
218
- locus = SQT::Locus.new.from_line( line )
219
- locus.peps = []
220
- global_ref_hash[key] = locus
221
- end
222
- locus.peps << match
223
- loci << locus
224
- end
225
- end
226
- # set the deltacn:
227
- set_deltacn(spectra)
228
- spectra
229
- end
230
-
231
- def self.set_deltacn(spectra)
232
- spectra.each do |spec|
233
- matches = spec.matches
234
- if matches.size > 0
235
-
236
- (0...(matches.size-1)).each do |i|
237
- matches[i].deltacn = matches[i+1].deltacn_orig
238
- end
239
- matches[-1].deltacn = 1.1
240
- end
241
- end
242
- spectra
243
- end
244
-
245
-
246
- # returns an array -> [the next spectra line (or nil if eof), spectrum]
247
- def from_line(line)
248
- line.chomp!
249
- ar = line.split(SQT::Delimiter)
250
- self[0] = ar[1].to_i
251
- self[1] = ar[2].to_i
252
- self[2] = ar[3].to_i
253
- self[3] = ar[4].to_f
254
- self[4] = ar[5]
255
- self[5] = ar[6].to_f
256
- self[6] = ar[7].to_f
257
- self[7] = ar[8].to_f
258
- self[8] = ar[9].to_i
259
- self[9] = []
260
- self
261
- end
262
- end
263
-
264
- # SQT format uses only indices 0 - 9
265
- SQT::Match = Arrayclass.new(%w[rxcorr rsp mh deltacn_orig xcorr sp ions_matched ions_total sequence manual_validation_status first_scan last_scan charge deltacn aaseq base_name loci])
266
-
267
- # 0=rxcorr 1=rsp 2=mh 3=deltacn_orig 4=xcorr 5=sp 6=ions_matched 7=ions_total 8=sequence 9=manual_validation_status 10=first_scan 11=last_scan 12=charge 13=deltacn 14=aaseq 15=base_name 16=loci
268
-
269
- # rxcorr = rank by xcorr
270
- # rsp = rank by sp
271
- # NOTE:
272
- # deltacn_orig
273
- # deltacn is the adjusted deltacn (like Bioworks - shift all scores up and
274
- # give the last one 1.1)
275
- class SQT::Match
276
- include SpecID::Pep
277
- Leader = 'M'
278
-
279
- # same as 'loci'
280
- def prots
281
- self[16]
282
- end
283
-
284
- def from_line(line)
285
- line.chomp!
286
- ar = line.split(SQT::Delimiter)
287
- self[0] = ar[1].to_i
288
- self[1] = ar[2].to_i
289
- self[2] = ar[3].to_f
290
- self[3] = ar[4].to_f
291
- self[4] = ar[5].to_f
292
- self[5] = ar[6].to_f
293
- self[6] = ar[7].to_i
294
- self[7] = ar[8].to_i
295
- self[8] = ar[9]
296
- self[9] = ar[10]
297
- self[14] = SpecID::Pep.sequence_to_aaseq(self[8])
298
- self
299
- end
300
- end
301
-
302
-
303
- class SQT::Match::Percolator < SQT::Match
304
- # we will keep access to these old terms since we can then access routines
305
- # that sort on xcorr...
306
- #undef_method :xcorr
307
- #undef_method :xcorr=
308
- #undef_method :sp
309
- #undef_method :sp=
310
-
311
- def percolator_score
312
- self[4]
313
- end
314
- def percolator_score=(score)
315
- self[4] = score
316
- end
317
- def negative_q_value
318
- self[5]
319
- end
320
- def negative_q_value=(arg)
321
- self[5] = arg
322
- end
323
- def q_value
324
- -self[5]
325
- end
326
- # for compatibility with scripts that want this guy
327
- def probability
328
- -self[5]
329
- end
330
- end
331
-
332
- SQT::Locus = Arrayclass.new(%w[locus description peps])
333
-
334
- class SQT::Locus
335
- include SpecID::Prot
336
- Leader = 'L'
337
-
338
- def first_entry ; self[0] end
339
- def reference ; self[0] end
340
-
341
- def from_line(line)
342
- line.chomp!
343
- ar = line.split(SQT::Delimiter)
344
- self[0] = ar[1]
345
- self[1] = ar[2]
346
- self
347
- end
348
-
349
- end