mspire 0.4.9 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +27 -17
- data/changelog.txt +31 -62
- data/lib/ms/calc.rb +32 -0
- data/lib/ms/data/interleaved.rb +60 -0
- data/lib/ms/data/lazy_io.rb +73 -0
- data/lib/ms/data/lazy_string.rb +15 -0
- data/lib/ms/data/simple.rb +59 -0
- data/lib/ms/data/transposed.rb +41 -0
- data/lib/ms/data.rb +57 -0
- data/lib/ms/format/format_error.rb +12 -0
- data/lib/ms/spectrum.rb +25 -384
- data/lib/ms/support/binary_search.rb +126 -0
- data/lib/ms.rb +10 -10
- metadata +38 -350
- data/INSTALL +0 -58
- data/README.rdoc +0 -18
- data/Rakefile +0 -330
- data/bin/aafreqs.rb +0 -23
- data/bin/bioworks2excel.rb +0 -14
- data/bin/bioworks_to_pepxml.rb +0 -148
- data/bin/bioworks_to_pepxml_gui.rb +0 -225
- data/bin/fasta_shaker.rb +0 -5
- data/bin/filter_and_validate.rb +0 -5
- data/bin/gi2annot.rb +0 -14
- data/bin/id_class_anal.rb +0 -112
- data/bin/id_precision.rb +0 -172
- data/bin/ms_to_lmat.rb +0 -67
- data/bin/pepproph_filter.rb +0 -16
- data/bin/prob_validate.rb +0 -6
- data/bin/protein_summary.rb +0 -6
- data/bin/protxml2prots_peps.rb +0 -32
- data/bin/raw_to_mzXML.rb +0 -55
- data/bin/run_percolator.rb +0 -122
- data/bin/sqt_group.rb +0 -26
- data/bin/srf_group.rb +0 -27
- data/bin/srf_to_sqt.rb +0 -40
- data/lib/align/chams.rb +0 -78
- data/lib/align.rb +0 -154
- data/lib/archive/targz.rb +0 -94
- data/lib/bsearch.rb +0 -120
- data/lib/core_extensions.rb +0 -16
- data/lib/fasta.rb +0 -626
- data/lib/gi.rb +0 -124
- data/lib/group_by.rb +0 -10
- data/lib/index_by.rb +0 -11
- data/lib/merge_deep.rb +0 -21
- data/lib/ms/converter/mzxml.rb +0 -77
- data/lib/ms/gradient_program.rb +0 -170
- data/lib/ms/msrun.rb +0 -244
- data/lib/ms/msrun_index.rb +0 -108
- data/lib/ms/parser/mzdata/axml.rb +0 -67
- data/lib/ms/parser/mzdata/dom.rb +0 -175
- data/lib/ms/parser/mzdata/libxml.rb +0 -7
- data/lib/ms/parser/mzdata.rb +0 -31
- data/lib/ms/parser/mzxml/axml.rb +0 -70
- data/lib/ms/parser/mzxml/dom.rb +0 -182
- data/lib/ms/parser/mzxml/hpricot.rb +0 -253
- data/lib/ms/parser/mzxml/libxml.rb +0 -19
- data/lib/ms/parser/mzxml/regexp.rb +0 -122
- data/lib/ms/parser/mzxml/rexml.rb +0 -72
- data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
- data/lib/ms/parser/mzxml.rb +0 -282
- data/lib/ms/parser.rb +0 -108
- data/lib/ms/precursor.rb +0 -25
- data/lib/ms/scan.rb +0 -81
- data/lib/mspire.rb +0 -4
- data/lib/pi_zero.rb +0 -244
- data/lib/qvalue.rb +0 -161
- data/lib/roc.rb +0 -187
- data/lib/sample_enzyme.rb +0 -160
- data/lib/scan_i.rb +0 -21
- data/lib/spec_id/aa_freqs.rb +0 -170
- data/lib/spec_id/bioworks.rb +0 -497
- data/lib/spec_id/digestor.rb +0 -138
- data/lib/spec_id/mass.rb +0 -179
- data/lib/spec_id/parser/proph.rb +0 -335
- data/lib/spec_id/precision/filter/cmdline.rb +0 -218
- data/lib/spec_id/precision/filter/interactive.rb +0 -134
- data/lib/spec_id/precision/filter/output.rb +0 -148
- data/lib/spec_id/precision/filter.rb +0 -637
- data/lib/spec_id/precision/output.rb +0 -60
- data/lib/spec_id/precision/prob/cmdline.rb +0 -160
- data/lib/spec_id/precision/prob/output.rb +0 -94
- data/lib/spec_id/precision/prob.rb +0 -249
- data/lib/spec_id/proph/pep_summary.rb +0 -104
- data/lib/spec_id/proph/prot_summary.rb +0 -484
- data/lib/spec_id/proph.rb +0 -4
- data/lib/spec_id/protein_summary.rb +0 -489
- data/lib/spec_id/sequest/params.rb +0 -316
- data/lib/spec_id/sequest/pepxml.rb +0 -1458
- data/lib/spec_id/sequest.rb +0 -33
- data/lib/spec_id/sqt.rb +0 -349
- data/lib/spec_id/srf.rb +0 -973
- data/lib/spec_id.rb +0 -778
- data/lib/spec_id_xml.rb +0 -99
- data/lib/transmem/phobius.rb +0 -147
- data/lib/transmem/toppred.rb +0 -368
- data/lib/transmem.rb +0 -157
- data/lib/validator/aa.rb +0 -48
- data/lib/validator/aa_est.rb +0 -112
- data/lib/validator/background.rb +0 -77
- data/lib/validator/bias.rb +0 -95
- data/lib/validator/cmdline.rb +0 -431
- data/lib/validator/decoy.rb +0 -107
- data/lib/validator/digestion_based.rb +0 -70
- data/lib/validator/probability.rb +0 -51
- data/lib/validator/prot_from_pep.rb +0 -234
- data/lib/validator/q_value.rb +0 -32
- data/lib/validator/transmem.rb +0 -272
- data/lib/validator/true_pos.rb +0 -46
- data/lib/validator.rb +0 -197
- data/lib/xml.rb +0 -38
- data/lib/xml_style_parser.rb +0 -119
- data/lib/xmlparser_wrapper.rb +0 -19
- data/release_notes.txt +0 -2
- data/script/compile_and_plot_smriti_final.rb +0 -97
- data/script/create_little_pepxml.rb +0 -61
- data/script/degenerate_peptides.rb +0 -47
- data/script/estimate_fpr_by_cysteine.rb +0 -226
- data/script/extract_gradient_programs.rb +0 -56
- data/script/find_cysteine_background.rb +0 -137
- data/script/genuine_tps_and_probs.rb +0 -136
- data/script/get_apex_values_rexml.rb +0 -44
- data/script/histogram_probs.rb +0 -61
- data/script/mascot_fix_pepxml.rb +0 -123
- data/script/msvis.rb +0 -42
- data/script/mzXML2timeIndex.rb +0 -25
- data/script/peps_per_bin.rb +0 -67
- data/script/prep_dir.rb +0 -121
- data/script/simple_protein_digestion.rb +0 -27
- data/script/smriti_final_analysis.rb +0 -103
- data/script/sqt_to_meta.rb +0 -24
- data/script/top_hit_per_scan.rb +0 -67
- data/script/toppred_to_yaml.rb +0 -47
- data/script/tpp_installer.rb +0 -249
- data/specs/align_spec.rb +0 -79
- data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
- data/specs/bin/fasta_shaker_spec.rb +0 -259
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
- data/specs/bin/filter_and_validate_spec.rb +0 -180
- data/specs/bin/ms_to_lmat_spec.rb +0 -34
- data/specs/bin/prob_validate_spec.rb +0 -86
- data/specs/bin/protein_summary_spec.rb +0 -14
- data/specs/fasta_spec.rb +0 -354
- data/specs/gi_spec.rb +0 -22
- data/specs/load_bin_path.rb +0 -7
- data/specs/merge_deep_spec.rb +0 -13
- data/specs/ms/gradient_program_spec.rb +0 -77
- data/specs/ms/msrun_spec.rb +0 -498
- data/specs/ms/parser_spec.rb +0 -92
- data/specs/ms/spectrum_spec.rb +0 -87
- data/specs/pi_zero_spec.rb +0 -115
- data/specs/qvalue_spec.rb +0 -39
- data/specs/roc_spec.rb +0 -251
- data/specs/rspec_autotest.rb +0 -149
- data/specs/sample_enzyme_spec.rb +0 -126
- data/specs/spec_helper.rb +0 -135
- data/specs/spec_id/aa_freqs_spec.rb +0 -52
- data/specs/spec_id/bioworks_spec.rb +0 -148
- data/specs/spec_id/digestor_spec.rb +0 -75
- data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
- data/specs/spec_id/precision/filter/output_spec.rb +0 -31
- data/specs/spec_id/precision/filter_spec.rb +0 -246
- data/specs/spec_id/precision/prob_spec.rb +0 -44
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
- data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
- data/specs/spec_id/protein_summary_spec.rb +0 -189
- data/specs/spec_id/sequest/params_spec.rb +0 -68
- data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
- data/specs/spec_id/sequest_spec.rb +0 -38
- data/specs/spec_id/sqt_spec.rb +0 -246
- data/specs/spec_id/srf_spec.rb +0 -172
- data/specs/spec_id/srf_spec_helper.rb +0 -139
- data/specs/spec_id_helper.rb +0 -33
- data/specs/spec_id_spec.rb +0 -366
- data/specs/spec_id_xml_spec.rb +0 -33
- data/specs/transmem/phobius_spec.rb +0 -425
- data/specs/transmem/toppred_spec.rb +0 -298
- data/specs/transmem_spec.rb +0 -60
- data/specs/transmem_spec_shared.rb +0 -64
- data/specs/validator/aa_est_spec.rb +0 -66
- data/specs/validator/aa_spec.rb +0 -40
- data/specs/validator/background_spec.rb +0 -67
- data/specs/validator/bias_spec.rb +0 -122
- data/specs/validator/decoy_spec.rb +0 -51
- data/specs/validator/fasta_helper.rb +0 -26
- data/specs/validator/prot_from_pep_spec.rb +0 -141
- data/specs/validator/transmem_spec.rb +0 -146
- data/specs/validator/true_pos_spec.rb +0 -58
- data/specs/validator_helper.rb +0 -33
- data/specs/xml_spec.rb +0 -12
- data/test_files/000_pepxml18_small.xml +0 -206
- data/test_files/020a.mzXML.timeIndex +0 -4710
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
- data/test_files/4-03-03_small-prot.xml +0 -321
- data/test_files/4-03-03_small.xml +0 -3876
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +0 -5999
- data/test_files/bioworks31.params +0 -77
- data/test_files/bioworks32.params +0 -62
- data/test_files/bioworks33.params +0 -63
- data/test_files/bioworks_single_run_small.xml +0 -7237
- data/test_files/bioworks_small.fasta +0 -212
- data/test_files/bioworks_small.params +0 -63
- data/test_files/bioworks_small.phobius +0 -109
- data/test_files/bioworks_small.toppred.out +0 -2847
- data/test_files/bioworks_small.xml +0 -5610
- data/test_files/bioworks_with_INV_small.xml +0 -3753
- data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +0 -304
- data/test_files/messups.fasta +0 -297
- data/test_files/opd1/000.my_answer.100lines.xml +0 -101
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
- data/test_files/opd1/000_020_3prots-prot.xml +0 -62
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
- data/test_files/opd1/sequest.3.1.params +0 -77
- data/test_files/opd1/sequest.3.2.params +0 -62
- data/test_files/opd1/twenty_scans.mzXML +0 -418
- data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +0 -9
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
- data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
- data/test_files/pepproph_small.xml +0 -4691
- data/test_files/phobius.small.noheader.txt +0 -50
- data/test_files/phobius.small.small.txt +0 -53
- data/test_files/s01_anC1_ld020mM.key.txt +0 -25
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +0 -297
- data/test_files/small.sqt +0 -87
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +0 -14340
- data/test_files/tf_bioworks2excel.txt.actual +0 -1035
- data/test_files/toppred.small.out +0 -416
- data/test_files/toppred.xml.out +0 -318
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
- data/test_files/yeast_gly_small-prot.xml +0 -265
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
- data/test_files/yeast_gly_small.xml +0 -3807
- data/test_files/yeast_gly_small2.parentTimes +0 -6
data/lib/spec_id/sequest.rb
DELETED
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
require 'spec_id/sequest/params'
|
|
2
|
-
require 'hash_by'
|
|
3
|
-
require 'sort_by_attributes.rb'
|
|
4
|
-
|
|
5
|
-
module Sequest
|
|
6
|
-
|
|
7
|
-
# returns one array of peptide hits: indexes hits based on index_by, takes
|
|
8
|
-
# the uniq ones and then sorts the group by sort_by (compatible with
|
|
9
|
-
# sort_by_attributes) then slices from first_index to last_index
|
|
10
|
-
# (inclusive).
|
|
11
|
-
def self.other_hits(peps, first_index=1, last_index=9, index_by=[:base_name, :first_scan, :charge], sort_by=[:xcorr, {:down => :xcorr}])
|
|
12
|
-
all_hits = []
|
|
13
|
-
peps.hash_by(*index_by).each do |scan_key, peps_per_scan|
|
|
14
|
-
if peps_per_scan.size >= (first_index + 1)
|
|
15
|
-
all_hits.push( *(peps_per_scan.uniq.sort_by_attributes(*sort_by)[first_index..last_index]) )
|
|
16
|
-
end
|
|
17
|
-
end
|
|
18
|
-
all_hits.compact
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
def self.other_hits_sorted_by_xcorr(peps, first_index, last_index, index_by=[:base_name, :first_scan, :charge])
|
|
22
|
-
all_hits = []
|
|
23
|
-
peps.hash_by(*index_by).each do |scan_key, peps_per_scan|
|
|
24
|
-
if peps_per_scan.size >= (first_index + 1)
|
|
25
|
-
all_hits.push( *(peps_per_scan.uniq.sort_by {|x| x.xcorr }.reverse[first_index..last_index]) )
|
|
26
|
-
end
|
|
27
|
-
end
|
|
28
|
-
all_hits.compact
|
|
29
|
-
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
end
|
|
33
|
-
|
data/lib/spec_id/sqt.rb
DELETED
|
@@ -1,349 +0,0 @@
|
|
|
1
|
-
require 'spec_id'
|
|
2
|
-
require 'arrayclass'
|
|
3
|
-
require 'set'
|
|
4
|
-
|
|
5
|
-
class SQTGroup
|
|
6
|
-
include SpecID # inherits prots and peps accessors
|
|
7
|
-
|
|
8
|
-
attr_accessor :sqts, :filenames
|
|
9
|
-
|
|
10
|
-
# if filenames is a String, then it should be a filename to a file ending in
|
|
11
|
-
# '.sqg' (meta text file with list of .sqt files) else it should be an array
|
|
12
|
-
# of sqt filenames
|
|
13
|
-
def initialize(filenames=nil)
|
|
14
|
-
@filenames = filenames
|
|
15
|
-
@prots = []
|
|
16
|
-
@peps = []
|
|
17
|
-
@sqts = []
|
|
18
|
-
|
|
19
|
-
global_ref_hash = {}
|
|
20
|
-
## This is duplicated in SRFGroup (should refactor eventually)
|
|
21
|
-
if filenames
|
|
22
|
-
if filenames.is_a?(String) && filenames =~ /\.sqg$/
|
|
23
|
-
srg_filename = filenames.dup
|
|
24
|
-
@filename = srg_filename
|
|
25
|
-
@filenames = IO.readlines(filenames).grep(/\w/).map {|v| v.chomp }
|
|
26
|
-
@filenames.each do |file|
|
|
27
|
-
if !File.exist? file
|
|
28
|
-
puts "File: #{file} in #{srg_filename} does not exist!"
|
|
29
|
-
puts "Please modify #{srg_filename} to point to existing files."
|
|
30
|
-
abort
|
|
31
|
-
end
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
@filenames.each do |file|
|
|
35
|
-
@sqts << SQT.new(file, @peps, global_ref_hash)
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
@prots = global_ref_hash.values
|
|
39
|
-
end
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
# NOTE THAT this is copy/paste from srf.rb, should be refactored...
|
|
43
|
-
# returns the filename used
|
|
44
|
-
# if the file exists, the name will be expanded to full path, otherwise just
|
|
45
|
-
# what is given
|
|
46
|
-
def to_sqg(sqg_filename='bioworks.sqg')
|
|
47
|
-
File.open(sqg_filename, 'w') do |v|
|
|
48
|
-
@filenames.each do |sqt_file|
|
|
49
|
-
if File.exist? sqt_file
|
|
50
|
-
v.puts File.expand_path(sqt_file)
|
|
51
|
-
else
|
|
52
|
-
v.puts sqt_file
|
|
53
|
-
end
|
|
54
|
-
end
|
|
55
|
-
end
|
|
56
|
-
sqg_filename
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
class SQT
|
|
62
|
-
PercolatorHeaderMatch = /^Percolator v/
|
|
63
|
-
Delimiter = "\t"
|
|
64
|
-
attr_accessor :header
|
|
65
|
-
attr_accessor :spectra
|
|
66
|
-
attr_accessor :base_name
|
|
67
|
-
# boolean
|
|
68
|
-
attr_accessor :percolator_results
|
|
69
|
-
|
|
70
|
-
def initialize(filename=nil, peps=[], global_ref_hash={})
|
|
71
|
-
if filename
|
|
72
|
-
from_file(filename, peps, global_ref_hash)
|
|
73
|
-
end
|
|
74
|
-
end
|
|
75
|
-
|
|
76
|
-
# if the file contains the header key '/$Percolator v/' then the results
|
|
77
|
-
# will be interpreted as percolator results
|
|
78
|
-
def from_file(filename, peps=[], global_ref_hash={}, percolator_results=false)
|
|
79
|
-
@percolator_results = percolator_results
|
|
80
|
-
@base_name = File.basename( filename.gsub('\\','/') ).sub(/\.\w+$/, '')
|
|
81
|
-
File.open(filename) do |fh|
|
|
82
|
-
@header = SQT::Header.new.from_handle(fh)
|
|
83
|
-
if @header.keys.any? {|v| v =~ PercolatorHeaderMatch }
|
|
84
|
-
@percolator_results = true
|
|
85
|
-
end
|
|
86
|
-
@spectra = SQT::Spectrum.spectra_from_handle(fh, @base_name, peps, global_ref_hash, @percolator_results)
|
|
87
|
-
end
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
# Inherits from hash, so all header stuff can be accessed by key. Multiline
|
|
93
|
-
# values will be pushed into an array.
|
|
94
|
-
# All header values are stored as (newline-removed) strings!
|
|
95
|
-
class SQT::Header < Hash
|
|
96
|
-
Leader = 'H'
|
|
97
|
-
|
|
98
|
-
# These will be in arrays no matter what: StaticMod, DynamicMod, Comment
|
|
99
|
-
# Any other keys repeated will be shoved into an array; otherwise a string
|
|
100
|
-
Arrayed = %w(DyanmicMod StaticMod Comment).to_set
|
|
101
|
-
|
|
102
|
-
HeaderKeys = {
|
|
103
|
-
:sqt_generator => 'SQTGenerator',
|
|
104
|
-
:sqt_generator_version => 'SQTGeneratorVersion',
|
|
105
|
-
:database => 'Database',
|
|
106
|
-
:fragment_masses => 'FragmentMasses',
|
|
107
|
-
:precursor_masses => 'PrecursorMasses',
|
|
108
|
-
:start_time => 'StartTime',
|
|
109
|
-
:db_seq_length => 'DBSeqLength',
|
|
110
|
-
:db_locus_count => 'DBLocusCount',
|
|
111
|
-
:db_md5sum => 'DBMD5Sum',
|
|
112
|
-
:peptide_mass_tolerance => 'Alg-PreMassTol',
|
|
113
|
-
:fragment_ion_tolerance => 'Alg-FragMassTol',
|
|
114
|
-
# nonstandard (mine)
|
|
115
|
-
:peptide_mass_units => 'Alg-PreMassUnits',
|
|
116
|
-
:ion_series => 'Alg-IonSeries',
|
|
117
|
-
:enzyme => 'Alg-Enzyme',
|
|
118
|
-
# nonstandard (mine)
|
|
119
|
-
:ms_model => 'Alg-MSModel',
|
|
120
|
-
:static_mods => 'StaticMod',
|
|
121
|
-
:dynamic_mods => 'DynamicMod',
|
|
122
|
-
:comments => 'Comment'
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
KeysToAtts = HeaderKeys.invert
|
|
127
|
-
|
|
128
|
-
HeaderKeys.keys.each do |ky|
|
|
129
|
-
attr_accessor ky
|
|
130
|
-
end
|
|
131
|
-
|
|
132
|
-
def from_handle(fh)
|
|
133
|
-
Arrayed.each do |ky|
|
|
134
|
-
self[ky] = []
|
|
135
|
-
end
|
|
136
|
-
pos = fh.pos
|
|
137
|
-
lines = []
|
|
138
|
-
loop do
|
|
139
|
-
line = fh.gets
|
|
140
|
-
if line && (line[0,1] == SQT::Header::Leader )
|
|
141
|
-
lines << line
|
|
142
|
-
else # reset the fh.pos and we're done
|
|
143
|
-
fh.pos = pos
|
|
144
|
-
break
|
|
145
|
-
end
|
|
146
|
-
pos = fh.pos
|
|
147
|
-
end
|
|
148
|
-
from_lines(lines)
|
|
149
|
-
end
|
|
150
|
-
|
|
151
|
-
def from_lines(array_of_header_lines)
|
|
152
|
-
array_of_header_lines.each do |line|
|
|
153
|
-
line.chomp!
|
|
154
|
-
(ky, *rest) = line.split(SQT::Delimiter)[1..-1]
|
|
155
|
-
# just in case they have any tabs in their field
|
|
156
|
-
value = rest.join(SQT::Delimiter)
|
|
157
|
-
if Arrayed.include?(ky)
|
|
158
|
-
self[ky] << value
|
|
159
|
-
elsif self.key? ky # already exists
|
|
160
|
-
if self[ky].is_a? Array
|
|
161
|
-
self[ky] << value
|
|
162
|
-
else
|
|
163
|
-
self[ky] = [self[ky], value]
|
|
164
|
-
end
|
|
165
|
-
else # normal
|
|
166
|
-
self[ky] = value
|
|
167
|
-
end
|
|
168
|
-
end
|
|
169
|
-
KeysToAtts.each do |ky,methd|
|
|
170
|
-
self.send("#{methd}=".to_sym, self[ky])
|
|
171
|
-
end
|
|
172
|
-
self
|
|
173
|
-
end
|
|
174
|
-
|
|
175
|
-
end
|
|
176
|
-
|
|
177
|
-
# all are cast as expected (total_intensity is a float)
|
|
178
|
-
# mh = observed mh
|
|
179
|
-
SQT::Spectrum = Arrayclass.new(%w[first_scan last_scan charge time_to_process node mh total_intensity lowest_sp num_matched_peptides matches])
|
|
180
|
-
|
|
181
|
-
# 0=first_scan 1=last_scan 2=charge 3=time_to_process 4=node 5=mh 6=total_intensity 7=lowest_sp 8=num_matched_peptides 9=matches
|
|
182
|
-
|
|
183
|
-
class SQT::Spectrum
|
|
184
|
-
Leader = 'S'
|
|
185
|
-
|
|
186
|
-
# assumes the first line starts with an 'S'
|
|
187
|
-
def self.spectra_from_handle(fh, base_name, peps=[], global_ref_hash={}, percolator_results=false)
|
|
188
|
-
spectra = []
|
|
189
|
-
|
|
190
|
-
while line = fh.gets
|
|
191
|
-
case line[0,1]
|
|
192
|
-
when SQT::Spectrum::Leader
|
|
193
|
-
spectrum = SQT::Spectrum.new.from_line( line )
|
|
194
|
-
spectra << spectrum
|
|
195
|
-
matches = []
|
|
196
|
-
spectrum.matches = matches
|
|
197
|
-
when SQT::Match::Leader
|
|
198
|
-
match_klass = if percolator_results
|
|
199
|
-
SQT::Match::Percolator
|
|
200
|
-
else
|
|
201
|
-
SQT::Match
|
|
202
|
-
end
|
|
203
|
-
match = match_klass.new.from_line( line )
|
|
204
|
-
match[10,3] = spectrum[0,3]
|
|
205
|
-
match[15] = base_name
|
|
206
|
-
matches << match
|
|
207
|
-
peps << match
|
|
208
|
-
loci = []
|
|
209
|
-
match.loci = loci
|
|
210
|
-
matches << match
|
|
211
|
-
when SQT::Locus::Leader
|
|
212
|
-
line.chomp!
|
|
213
|
-
key = line.split(SQT::Delimiter)[1]
|
|
214
|
-
locus =
|
|
215
|
-
if global_ref_hash.key?(key)
|
|
216
|
-
global_ref_hash[key]
|
|
217
|
-
else
|
|
218
|
-
locus = SQT::Locus.new.from_line( line )
|
|
219
|
-
locus.peps = []
|
|
220
|
-
global_ref_hash[key] = locus
|
|
221
|
-
end
|
|
222
|
-
locus.peps << match
|
|
223
|
-
loci << locus
|
|
224
|
-
end
|
|
225
|
-
end
|
|
226
|
-
# set the deltacn:
|
|
227
|
-
set_deltacn(spectra)
|
|
228
|
-
spectra
|
|
229
|
-
end
|
|
230
|
-
|
|
231
|
-
def self.set_deltacn(spectra)
|
|
232
|
-
spectra.each do |spec|
|
|
233
|
-
matches = spec.matches
|
|
234
|
-
if matches.size > 0
|
|
235
|
-
|
|
236
|
-
(0...(matches.size-1)).each do |i|
|
|
237
|
-
matches[i].deltacn = matches[i+1].deltacn_orig
|
|
238
|
-
end
|
|
239
|
-
matches[-1].deltacn = 1.1
|
|
240
|
-
end
|
|
241
|
-
end
|
|
242
|
-
spectra
|
|
243
|
-
end
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
# returns an array -> [the next spectra line (or nil if eof), spectrum]
|
|
247
|
-
def from_line(line)
|
|
248
|
-
line.chomp!
|
|
249
|
-
ar = line.split(SQT::Delimiter)
|
|
250
|
-
self[0] = ar[1].to_i
|
|
251
|
-
self[1] = ar[2].to_i
|
|
252
|
-
self[2] = ar[3].to_i
|
|
253
|
-
self[3] = ar[4].to_f
|
|
254
|
-
self[4] = ar[5]
|
|
255
|
-
self[5] = ar[6].to_f
|
|
256
|
-
self[6] = ar[7].to_f
|
|
257
|
-
self[7] = ar[8].to_f
|
|
258
|
-
self[8] = ar[9].to_i
|
|
259
|
-
self[9] = []
|
|
260
|
-
self
|
|
261
|
-
end
|
|
262
|
-
end
|
|
263
|
-
|
|
264
|
-
# SQT format uses only indices 0 - 9
|
|
265
|
-
SQT::Match = Arrayclass.new(%w[rxcorr rsp mh deltacn_orig xcorr sp ions_matched ions_total sequence manual_validation_status first_scan last_scan charge deltacn aaseq base_name loci])
|
|
266
|
-
|
|
267
|
-
# 0=rxcorr 1=rsp 2=mh 3=deltacn_orig 4=xcorr 5=sp 6=ions_matched 7=ions_total 8=sequence 9=manual_validation_status 10=first_scan 11=last_scan 12=charge 13=deltacn 14=aaseq 15=base_name 16=loci
|
|
268
|
-
|
|
269
|
-
# rxcorr = rank by xcorr
|
|
270
|
-
# rsp = rank by sp
|
|
271
|
-
# NOTE:
|
|
272
|
-
# deltacn_orig
|
|
273
|
-
# deltacn is the adjusted deltacn (like Bioworks - shift all scores up and
|
|
274
|
-
# give the last one 1.1)
|
|
275
|
-
class SQT::Match
|
|
276
|
-
include SpecID::Pep
|
|
277
|
-
Leader = 'M'
|
|
278
|
-
|
|
279
|
-
# same as 'loci'
|
|
280
|
-
def prots
|
|
281
|
-
self[16]
|
|
282
|
-
end
|
|
283
|
-
|
|
284
|
-
def from_line(line)
|
|
285
|
-
line.chomp!
|
|
286
|
-
ar = line.split(SQT::Delimiter)
|
|
287
|
-
self[0] = ar[1].to_i
|
|
288
|
-
self[1] = ar[2].to_i
|
|
289
|
-
self[2] = ar[3].to_f
|
|
290
|
-
self[3] = ar[4].to_f
|
|
291
|
-
self[4] = ar[5].to_f
|
|
292
|
-
self[5] = ar[6].to_f
|
|
293
|
-
self[6] = ar[7].to_i
|
|
294
|
-
self[7] = ar[8].to_i
|
|
295
|
-
self[8] = ar[9]
|
|
296
|
-
self[9] = ar[10]
|
|
297
|
-
self[14] = SpecID::Pep.sequence_to_aaseq(self[8])
|
|
298
|
-
self
|
|
299
|
-
end
|
|
300
|
-
end
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
class SQT::Match::Percolator < SQT::Match
|
|
304
|
-
# we will keep access to these old terms since we can then access routines
|
|
305
|
-
# that sort on xcorr...
|
|
306
|
-
#undef_method :xcorr
|
|
307
|
-
#undef_method :xcorr=
|
|
308
|
-
#undef_method :sp
|
|
309
|
-
#undef_method :sp=
|
|
310
|
-
|
|
311
|
-
def percolator_score
|
|
312
|
-
self[4]
|
|
313
|
-
end
|
|
314
|
-
def percolator_score=(score)
|
|
315
|
-
self[4] = score
|
|
316
|
-
end
|
|
317
|
-
def negative_q_value
|
|
318
|
-
self[5]
|
|
319
|
-
end
|
|
320
|
-
def negative_q_value=(arg)
|
|
321
|
-
self[5] = arg
|
|
322
|
-
end
|
|
323
|
-
def q_value
|
|
324
|
-
-self[5]
|
|
325
|
-
end
|
|
326
|
-
# for compatibility with scripts that want this guy
|
|
327
|
-
def probability
|
|
328
|
-
-self[5]
|
|
329
|
-
end
|
|
330
|
-
end
|
|
331
|
-
|
|
332
|
-
SQT::Locus = Arrayclass.new(%w[locus description peps])
|
|
333
|
-
|
|
334
|
-
class SQT::Locus
|
|
335
|
-
include SpecID::Prot
|
|
336
|
-
Leader = 'L'
|
|
337
|
-
|
|
338
|
-
def first_entry ; self[0] end
|
|
339
|
-
def reference ; self[0] end
|
|
340
|
-
|
|
341
|
-
def from_line(line)
|
|
342
|
-
line.chomp!
|
|
343
|
-
ar = line.split(SQT::Delimiter)
|
|
344
|
-
self[0] = ar[1]
|
|
345
|
-
self[1] = ar[2]
|
|
346
|
-
self
|
|
347
|
-
end
|
|
348
|
-
|
|
349
|
-
end
|