mspire 0.4.9 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +27 -17
- data/changelog.txt +31 -62
- data/lib/ms/calc.rb +32 -0
- data/lib/ms/data/interleaved.rb +60 -0
- data/lib/ms/data/lazy_io.rb +73 -0
- data/lib/ms/data/lazy_string.rb +15 -0
- data/lib/ms/data/simple.rb +59 -0
- data/lib/ms/data/transposed.rb +41 -0
- data/lib/ms/data.rb +57 -0
- data/lib/ms/format/format_error.rb +12 -0
- data/lib/ms/spectrum.rb +25 -384
- data/lib/ms/support/binary_search.rb +126 -0
- data/lib/ms.rb +10 -10
- metadata +38 -350
- data/INSTALL +0 -58
- data/README.rdoc +0 -18
- data/Rakefile +0 -330
- data/bin/aafreqs.rb +0 -23
- data/bin/bioworks2excel.rb +0 -14
- data/bin/bioworks_to_pepxml.rb +0 -148
- data/bin/bioworks_to_pepxml_gui.rb +0 -225
- data/bin/fasta_shaker.rb +0 -5
- data/bin/filter_and_validate.rb +0 -5
- data/bin/gi2annot.rb +0 -14
- data/bin/id_class_anal.rb +0 -112
- data/bin/id_precision.rb +0 -172
- data/bin/ms_to_lmat.rb +0 -67
- data/bin/pepproph_filter.rb +0 -16
- data/bin/prob_validate.rb +0 -6
- data/bin/protein_summary.rb +0 -6
- data/bin/protxml2prots_peps.rb +0 -32
- data/bin/raw_to_mzXML.rb +0 -55
- data/bin/run_percolator.rb +0 -122
- data/bin/sqt_group.rb +0 -26
- data/bin/srf_group.rb +0 -27
- data/bin/srf_to_sqt.rb +0 -40
- data/lib/align/chams.rb +0 -78
- data/lib/align.rb +0 -154
- data/lib/archive/targz.rb +0 -94
- data/lib/bsearch.rb +0 -120
- data/lib/core_extensions.rb +0 -16
- data/lib/fasta.rb +0 -626
- data/lib/gi.rb +0 -124
- data/lib/group_by.rb +0 -10
- data/lib/index_by.rb +0 -11
- data/lib/merge_deep.rb +0 -21
- data/lib/ms/converter/mzxml.rb +0 -77
- data/lib/ms/gradient_program.rb +0 -170
- data/lib/ms/msrun.rb +0 -244
- data/lib/ms/msrun_index.rb +0 -108
- data/lib/ms/parser/mzdata/axml.rb +0 -67
- data/lib/ms/parser/mzdata/dom.rb +0 -175
- data/lib/ms/parser/mzdata/libxml.rb +0 -7
- data/lib/ms/parser/mzdata.rb +0 -31
- data/lib/ms/parser/mzxml/axml.rb +0 -70
- data/lib/ms/parser/mzxml/dom.rb +0 -182
- data/lib/ms/parser/mzxml/hpricot.rb +0 -253
- data/lib/ms/parser/mzxml/libxml.rb +0 -19
- data/lib/ms/parser/mzxml/regexp.rb +0 -122
- data/lib/ms/parser/mzxml/rexml.rb +0 -72
- data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
- data/lib/ms/parser/mzxml.rb +0 -282
- data/lib/ms/parser.rb +0 -108
- data/lib/ms/precursor.rb +0 -25
- data/lib/ms/scan.rb +0 -81
- data/lib/mspire.rb +0 -4
- data/lib/pi_zero.rb +0 -244
- data/lib/qvalue.rb +0 -161
- data/lib/roc.rb +0 -187
- data/lib/sample_enzyme.rb +0 -160
- data/lib/scan_i.rb +0 -21
- data/lib/spec_id/aa_freqs.rb +0 -170
- data/lib/spec_id/bioworks.rb +0 -497
- data/lib/spec_id/digestor.rb +0 -138
- data/lib/spec_id/mass.rb +0 -179
- data/lib/spec_id/parser/proph.rb +0 -335
- data/lib/spec_id/precision/filter/cmdline.rb +0 -218
- data/lib/spec_id/precision/filter/interactive.rb +0 -134
- data/lib/spec_id/precision/filter/output.rb +0 -148
- data/lib/spec_id/precision/filter.rb +0 -637
- data/lib/spec_id/precision/output.rb +0 -60
- data/lib/spec_id/precision/prob/cmdline.rb +0 -160
- data/lib/spec_id/precision/prob/output.rb +0 -94
- data/lib/spec_id/precision/prob.rb +0 -249
- data/lib/spec_id/proph/pep_summary.rb +0 -104
- data/lib/spec_id/proph/prot_summary.rb +0 -484
- data/lib/spec_id/proph.rb +0 -4
- data/lib/spec_id/protein_summary.rb +0 -489
- data/lib/spec_id/sequest/params.rb +0 -316
- data/lib/spec_id/sequest/pepxml.rb +0 -1458
- data/lib/spec_id/sequest.rb +0 -33
- data/lib/spec_id/sqt.rb +0 -349
- data/lib/spec_id/srf.rb +0 -973
- data/lib/spec_id.rb +0 -778
- data/lib/spec_id_xml.rb +0 -99
- data/lib/transmem/phobius.rb +0 -147
- data/lib/transmem/toppred.rb +0 -368
- data/lib/transmem.rb +0 -157
- data/lib/validator/aa.rb +0 -48
- data/lib/validator/aa_est.rb +0 -112
- data/lib/validator/background.rb +0 -77
- data/lib/validator/bias.rb +0 -95
- data/lib/validator/cmdline.rb +0 -431
- data/lib/validator/decoy.rb +0 -107
- data/lib/validator/digestion_based.rb +0 -70
- data/lib/validator/probability.rb +0 -51
- data/lib/validator/prot_from_pep.rb +0 -234
- data/lib/validator/q_value.rb +0 -32
- data/lib/validator/transmem.rb +0 -272
- data/lib/validator/true_pos.rb +0 -46
- data/lib/validator.rb +0 -197
- data/lib/xml.rb +0 -38
- data/lib/xml_style_parser.rb +0 -119
- data/lib/xmlparser_wrapper.rb +0 -19
- data/release_notes.txt +0 -2
- data/script/compile_and_plot_smriti_final.rb +0 -97
- data/script/create_little_pepxml.rb +0 -61
- data/script/degenerate_peptides.rb +0 -47
- data/script/estimate_fpr_by_cysteine.rb +0 -226
- data/script/extract_gradient_programs.rb +0 -56
- data/script/find_cysteine_background.rb +0 -137
- data/script/genuine_tps_and_probs.rb +0 -136
- data/script/get_apex_values_rexml.rb +0 -44
- data/script/histogram_probs.rb +0 -61
- data/script/mascot_fix_pepxml.rb +0 -123
- data/script/msvis.rb +0 -42
- data/script/mzXML2timeIndex.rb +0 -25
- data/script/peps_per_bin.rb +0 -67
- data/script/prep_dir.rb +0 -121
- data/script/simple_protein_digestion.rb +0 -27
- data/script/smriti_final_analysis.rb +0 -103
- data/script/sqt_to_meta.rb +0 -24
- data/script/top_hit_per_scan.rb +0 -67
- data/script/toppred_to_yaml.rb +0 -47
- data/script/tpp_installer.rb +0 -249
- data/specs/align_spec.rb +0 -79
- data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
- data/specs/bin/fasta_shaker_spec.rb +0 -259
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
- data/specs/bin/filter_and_validate_spec.rb +0 -180
- data/specs/bin/ms_to_lmat_spec.rb +0 -34
- data/specs/bin/prob_validate_spec.rb +0 -86
- data/specs/bin/protein_summary_spec.rb +0 -14
- data/specs/fasta_spec.rb +0 -354
- data/specs/gi_spec.rb +0 -22
- data/specs/load_bin_path.rb +0 -7
- data/specs/merge_deep_spec.rb +0 -13
- data/specs/ms/gradient_program_spec.rb +0 -77
- data/specs/ms/msrun_spec.rb +0 -498
- data/specs/ms/parser_spec.rb +0 -92
- data/specs/ms/spectrum_spec.rb +0 -87
- data/specs/pi_zero_spec.rb +0 -115
- data/specs/qvalue_spec.rb +0 -39
- data/specs/roc_spec.rb +0 -251
- data/specs/rspec_autotest.rb +0 -149
- data/specs/sample_enzyme_spec.rb +0 -126
- data/specs/spec_helper.rb +0 -135
- data/specs/spec_id/aa_freqs_spec.rb +0 -52
- data/specs/spec_id/bioworks_spec.rb +0 -148
- data/specs/spec_id/digestor_spec.rb +0 -75
- data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
- data/specs/spec_id/precision/filter/output_spec.rb +0 -31
- data/specs/spec_id/precision/filter_spec.rb +0 -246
- data/specs/spec_id/precision/prob_spec.rb +0 -44
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
- data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
- data/specs/spec_id/protein_summary_spec.rb +0 -189
- data/specs/spec_id/sequest/params_spec.rb +0 -68
- data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
- data/specs/spec_id/sequest_spec.rb +0 -38
- data/specs/spec_id/sqt_spec.rb +0 -246
- data/specs/spec_id/srf_spec.rb +0 -172
- data/specs/spec_id/srf_spec_helper.rb +0 -139
- data/specs/spec_id_helper.rb +0 -33
- data/specs/spec_id_spec.rb +0 -366
- data/specs/spec_id_xml_spec.rb +0 -33
- data/specs/transmem/phobius_spec.rb +0 -425
- data/specs/transmem/toppred_spec.rb +0 -298
- data/specs/transmem_spec.rb +0 -60
- data/specs/transmem_spec_shared.rb +0 -64
- data/specs/validator/aa_est_spec.rb +0 -66
- data/specs/validator/aa_spec.rb +0 -40
- data/specs/validator/background_spec.rb +0 -67
- data/specs/validator/bias_spec.rb +0 -122
- data/specs/validator/decoy_spec.rb +0 -51
- data/specs/validator/fasta_helper.rb +0 -26
- data/specs/validator/prot_from_pep_spec.rb +0 -141
- data/specs/validator/transmem_spec.rb +0 -146
- data/specs/validator/true_pos_spec.rb +0 -58
- data/specs/validator_helper.rb +0 -33
- data/specs/xml_spec.rb +0 -12
- data/test_files/000_pepxml18_small.xml +0 -206
- data/test_files/020a.mzXML.timeIndex +0 -4710
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
- data/test_files/4-03-03_small-prot.xml +0 -321
- data/test_files/4-03-03_small.xml +0 -3876
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +0 -5999
- data/test_files/bioworks31.params +0 -77
- data/test_files/bioworks32.params +0 -62
- data/test_files/bioworks33.params +0 -63
- data/test_files/bioworks_single_run_small.xml +0 -7237
- data/test_files/bioworks_small.fasta +0 -212
- data/test_files/bioworks_small.params +0 -63
- data/test_files/bioworks_small.phobius +0 -109
- data/test_files/bioworks_small.toppred.out +0 -2847
- data/test_files/bioworks_small.xml +0 -5610
- data/test_files/bioworks_with_INV_small.xml +0 -3753
- data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +0 -304
- data/test_files/messups.fasta +0 -297
- data/test_files/opd1/000.my_answer.100lines.xml +0 -101
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
- data/test_files/opd1/000_020_3prots-prot.xml +0 -62
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
- data/test_files/opd1/sequest.3.1.params +0 -77
- data/test_files/opd1/sequest.3.2.params +0 -62
- data/test_files/opd1/twenty_scans.mzXML +0 -418
- data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +0 -9
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
- data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
- data/test_files/pepproph_small.xml +0 -4691
- data/test_files/phobius.small.noheader.txt +0 -50
- data/test_files/phobius.small.small.txt +0 -53
- data/test_files/s01_anC1_ld020mM.key.txt +0 -25
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +0 -297
- data/test_files/small.sqt +0 -87
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +0 -14340
- data/test_files/tf_bioworks2excel.txt.actual +0 -1035
- data/test_files/toppred.small.out +0 -416
- data/test_files/toppred.xml.out +0 -318
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
- data/test_files/yeast_gly_small-prot.xml +0 -265
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
- data/test_files/yeast_gly_small.xml +0 -3807
- data/test_files/yeast_gly_small2.parentTimes +0 -6
data/lib/validator.rb
DELETED
@@ -1,197 +0,0 @@
|
|
1
|
-
|
2
|
-
class Validator
|
3
|
-
|
4
|
-
# in the absence of digestion, does the spec_id type requires pephits for
|
5
|
-
# validation?
|
6
|
-
def self.requires_pephits?(spec_id_obj)
|
7
|
-
case spec_id_obj
|
8
|
-
when Proph::ProtSummary : true
|
9
|
-
when Proph::PepSummary : true
|
10
|
-
when SQTGroup : true
|
11
|
-
else ; false
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
Validator_to_string = {
|
16
|
-
'Validator::AA' => 'badAA',
|
17
|
-
'Validator::AAEst' => 'badAAEst',
|
18
|
-
'Validator::Decoy' => 'decoy',
|
19
|
-
'Validator::Transmem::Protein' => 'tmm',
|
20
|
-
'Validator::TruePos' => 'tps',
|
21
|
-
'Validator::Bias' => 'bias',
|
22
|
-
'Validator::Probability' => 'prob',
|
23
|
-
'Validator::QValue' => 'qval',
|
24
|
-
:bad_aa => 'badAA',
|
25
|
-
:bad_aa_est => 'badAAEst',
|
26
|
-
:decoy => 'decoy',
|
27
|
-
:tmm => 'tmm',
|
28
|
-
:tps => 'tps',
|
29
|
-
:bias => 'bias',
|
30
|
-
:prob => 'prob',
|
31
|
-
:qval => 'qval',
|
32
|
-
}
|
33
|
-
|
34
|
-
def initialize_increment
|
35
|
-
@increment_tps = 0
|
36
|
-
@increment_fps = 0
|
37
|
-
@increment_total_submitted = 0
|
38
|
-
@increment_initialized = true
|
39
|
-
end
|
40
|
-
|
41
|
-
# if adding pephits in groups at a time, the entire group does not need to be
|
42
|
-
# queried, just the individual hit. Use this OR pephits_precision (NOT
|
43
|
-
# both). The initial query to this method will begin a running tally that
|
44
|
-
# is saved by the validator.
|
45
|
-
# takes either an array or a single pephit (determined by if it is a
|
46
|
-
# SpecID::Pep)
|
47
|
-
def increment_pephits_precision(peps)
|
48
|
-
tmp = $VERBOSE; $VERBOSE = nil
|
49
|
-
initialize_increment unless @increment_initialized
|
50
|
-
$VERBOSE = tmp
|
51
|
-
|
52
|
-
to_submit =
|
53
|
-
if peps.is_a? SpecID::Pep
|
54
|
-
[peps]
|
55
|
-
else
|
56
|
-
peps
|
57
|
-
end
|
58
|
-
@increment_total_submitted += to_submit.size
|
59
|
-
(tps, fps) = partition(to_submit)
|
60
|
-
@increment_tps += tps.size
|
61
|
-
@increment_fps += fps.size
|
62
|
-
(num_tps, num_fps) =
|
63
|
-
if self.respond_to?(:calc_precision_prep) # for digestion based validators
|
64
|
-
(num_tps, num_fps) = calc_precision_prep(@increment_tps, @increment_fps)
|
65
|
-
[num_tps, num_fps]
|
66
|
-
else
|
67
|
-
[@increment_tps, @increment_fps]
|
68
|
-
end
|
69
|
-
calc_precision(num_tps, num_fps)
|
70
|
-
end
|
71
|
-
|
72
|
-
|
73
|
-
# returns an adjusted false positive rate (a float not to drop below 0.0)
|
74
|
-
# based on a background of 'false'-false positive hits to total hits. Also
|
75
|
-
# sets the @calculated_background attribute. Accepts floats or ints
|
76
|
-
def adjust_fps_for_background(num_tps, num_fps, background)
|
77
|
-
num_fps = num_fps.to_f
|
78
|
-
total_peps = num_tps + num_fps
|
79
|
-
@calculated_background = num_fps / total_peps
|
80
|
-
num_fps -= (total_peps.to_f * background)
|
81
|
-
num_fps = 0.0 if num_fps < 0.0
|
82
|
-
num_fps
|
83
|
-
end
|
84
|
-
|
85
|
-
# copied from libjtp: vec
|
86
|
-
# returns the mean and std_dev
|
87
|
-
def sample_stats(array)
|
88
|
-
_len = array.size
|
89
|
-
_sum = 0.0
|
90
|
-
_sum_sq = 0.0
|
91
|
-
array.each do |val|
|
92
|
-
_sum += val
|
93
|
-
_sum_sq += val * val
|
94
|
-
end
|
95
|
-
std_dev = _sum_sq - ((_sum * _sum)/_len)
|
96
|
-
std_dev /= ( (_len > 1) ? (_len-1) : 1 )
|
97
|
-
# on occasion, a very small negative number occurs
|
98
|
-
if std_dev < 0.0
|
99
|
-
std_dev = 0.0
|
100
|
-
else
|
101
|
-
std_dev = Math.sqrt(std_dev)
|
102
|
-
end
|
103
|
-
mean = _sum.to_f/_len
|
104
|
-
[mean, std_dev]
|
105
|
-
end
|
106
|
-
|
107
|
-
# takes an array of validators and returns a fresh array where each has been
|
108
|
-
# turned into a sensible hash (with symbols as the keys!)
|
109
|
-
def self.sensible_validator_hashes(validators)
|
110
|
-
validators.map do |val|
|
111
|
-
hash = {}
|
112
|
-
case val
|
113
|
-
when Validator::TruePos
|
114
|
-
hash.merge( {:correct_wins => val.correct_wins, :file => val.fasta.filename } )
|
115
|
-
when Validator::AAEst
|
116
|
-
%w(frequency background calculated_background).each do |cat|
|
117
|
-
hash[cat.to_sym] = val.send(cat.to_sym)
|
118
|
-
end
|
119
|
-
when Validator::AA
|
120
|
-
%w(false_to_total_ratio background calculated_background).each do |cat|
|
121
|
-
hash[cat.to_sym] = val.send(cat.to_sym)
|
122
|
-
end
|
123
|
-
when Validator::Decoy
|
124
|
-
%w(pi_zero correct_wins decoy_on_match).each do |cat|
|
125
|
-
hash[cat.to_sym] = val.send(cat.to_sym)
|
126
|
-
end
|
127
|
-
hash[:constraint] = val.constraint.inspect if val.constraint
|
128
|
-
when Validator::Bias
|
129
|
-
%w(correct_wins proteins_expected background calculated_background false_to_total_ratio).each do |cat|
|
130
|
-
hash[cat.to_sym] = val.send(cat.to_sym)
|
131
|
-
end
|
132
|
-
hash[:file] = val.fasta.filename
|
133
|
-
when Validator::Transmem::Protein
|
134
|
-
%w(false_to_total_ratio min_num_tms soluble_fraction correct_wins no_include_tm_peps background calculated_background transmem_file).each do |cat|
|
135
|
-
hash[cat.to_sym] = val.send(cat.to_sym)
|
136
|
-
end
|
137
|
-
when Validator::Probability
|
138
|
-
%w(prob_method).each do |cat|
|
139
|
-
hash[cat.to_sym] = val.send(cat.to_sym)
|
140
|
-
end
|
141
|
-
when Validator::QValue
|
142
|
-
# no params to add
|
143
|
-
else ; raise ArgumentError, "Don't know the validator class #{val}"
|
144
|
-
end
|
145
|
-
klass_as_s = val.class.to_s
|
146
|
-
hash[:type] = Validator_to_string[klass_as_s]
|
147
|
-
hash[:class] = klass_as_s
|
148
|
-
hash
|
149
|
-
end
|
150
|
-
end
|
151
|
-
end
|
152
|
-
|
153
|
-
module Precision::Calculator
|
154
|
-
# calculates precision by the assumption that the first group are all true
|
155
|
-
# hits and the second are all false hits
|
156
|
-
# (0,0) is returned as 1.0
|
157
|
-
def calc_precision(num_true_hits, num_false_hits)
|
158
|
-
if ((num_true_hits.to_f == 0.0) && (num_false_hits.to_f == 0.0))
|
159
|
-
1.0
|
160
|
-
else
|
161
|
-
num_true_hits.to_f / (num_true_hits.to_f + num_false_hits.to_f)
|
162
|
-
end
|
163
|
-
end
|
164
|
-
end
|
165
|
-
|
166
|
-
# will calculate precision for groups of proteins where the first group are
|
167
|
-
# normal hits (which may be true or false) and the second are decoy hits.
|
168
|
-
# edge case: if num_normal.to_f == 0.0 then if num_decoy.to_f > 0 ; 0, else 1
|
169
|
-
module Precision::Calculator::Decoy
|
170
|
-
def calc_precision(num_normal, num_decoy, frit=1.0)
|
171
|
-
# will calculate as floats in case fractional amounts passed in for
|
172
|
-
# whatever reason
|
173
|
-
num_normal_f = num_normal.to_f
|
174
|
-
num_true_pos = num_normal_f - (num_decoy.to_f * frit)
|
175
|
-
precision =
|
176
|
-
if num_normal_f == 0.0
|
177
|
-
if num_decoy.to_f > 0.0
|
178
|
-
0.0
|
179
|
-
else
|
180
|
-
1.0
|
181
|
-
end
|
182
|
-
else
|
183
|
-
num_true_pos/num_normal_f
|
184
|
-
end
|
185
|
-
end
|
186
|
-
end
|
187
|
-
|
188
|
-
#require 'validator/true_pos'
|
189
|
-
#require 'validator/aa'
|
190
|
-
#require 'validator/aa_est'
|
191
|
-
#require 'validator/bias'
|
192
|
-
#require 'validator/decoy'
|
193
|
-
#require 'validator/transmem'
|
194
|
-
#require 'validator/probability'
|
195
|
-
#require 'validator/q_value'
|
196
|
-
#require 'validator/prot_from_pep'
|
197
|
-
|
data/lib/xml.rb
DELETED
@@ -1,38 +0,0 @@
|
|
1
|
-
|
2
|
-
module XML
|
3
|
-
HourMinuteMatch = /[MH]/o
|
4
|
-
# returns a float object of seconds
|
5
|
-
# doesn't support year month, etc, yet
|
6
|
-
def self.duration_to_seconds(string)
|
7
|
-
case x = string[0,2]
|
8
|
-
when 'PT'
|
9
|
-
rest = string[2..-1]
|
10
|
-
# usually it will be this 'PT1.223434S':
|
11
|
-
if rest !~ HourMinuteMatch
|
12
|
-
rest[0...-1].to_f
|
13
|
-
else
|
14
|
-
addit = ''
|
15
|
-
total_secs = 0
|
16
|
-
total_secs_as_float = nil
|
17
|
-
rest.split('').each do |let|
|
18
|
-
case let
|
19
|
-
when 'H'
|
20
|
-
total_secs += addit.to_i * 3600
|
21
|
-
addit = ''
|
22
|
-
when 'M'
|
23
|
-
total_secs += addit.to_i * 60
|
24
|
-
addit = ''
|
25
|
-
when 'S'
|
26
|
-
total_secs_as_float = total_secs.to_f
|
27
|
-
total_secs_as_float += addit.to_f
|
28
|
-
else
|
29
|
-
addit << let
|
30
|
-
end
|
31
|
-
end
|
32
|
-
total_secs_as_float
|
33
|
-
end
|
34
|
-
else
|
35
|
-
abort 'need to include support for other durations'
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
data/lib/xml_style_parser.rb
DELETED
@@ -1,119 +0,0 @@
|
|
1
|
-
|
2
|
-
module XMLStyleParser
|
3
|
-
@done_once = nil
|
4
|
-
|
5
|
-
Parser_precedence = %w(AXML LibXML XMLParser Regexp REXML)
|
6
|
-
# currently AXML requires 'xmlparser' to be installed.... (may not always be
|
7
|
-
# the case...)
|
8
|
-
File_required = {'AXML' => /^axml/, 'LibXML' => /^xml\/libxml/, 'XMLParser' => /^xmlparser/}
|
9
|
-
|
10
|
-
# the method that the parser will call on the given file at parse!
|
11
|
-
attr_accessor :method
|
12
|
-
|
13
|
-
# parses the given file by sending to @method
|
14
|
-
def parse(file, opts={})
|
15
|
-
if respond_to? @method
|
16
|
-
send(@method, file, opts)
|
17
|
-
else
|
18
|
-
raise NoMethodError, "Parser of class #{self.class} can't parse #{@method} yet"
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
# XMLParser and xml/libxml are incompatible, so if xmlparser is available,
|
23
|
-
# libxml will not be loaded (XMLParser#parse is clobbered by
|
24
|
-
# XML::Parser#parse [don't ask me why])
|
25
|
-
def self.require_parsers
|
26
|
-
if !@done_once
|
27
|
-
have_xmlparser = false
|
28
|
-
begin
|
29
|
-
require 'xmlparser'
|
30
|
-
puts "Loaded XMLParser" if $VERBOSE
|
31
|
-
have_xmlparser = true
|
32
|
-
rescue LoadError
|
33
|
-
end
|
34
|
-
|
35
|
-
begin
|
36
|
-
require 'axml'
|
37
|
-
puts "Loaded AXML" if $VERBOSE
|
38
|
-
rescue LoadError
|
39
|
-
end
|
40
|
-
|
41
|
-
begin
|
42
|
-
unless have_xmlparser
|
43
|
-
require 'xml/libxml'
|
44
|
-
puts "Loaded xml/libxml" if $VERBOSE
|
45
|
-
################################################################
|
46
|
-
# IMPORTANT!
|
47
|
-
# This magic line makes the parser behave like it ought to!!
|
48
|
-
XML::Parser.default_keep_blanks = false
|
49
|
-
################################################################
|
50
|
-
end
|
51
|
-
rescue LoadError
|
52
|
-
end
|
53
|
-
end
|
54
|
-
@done_once = true
|
55
|
-
end
|
56
|
-
|
57
|
-
# returns an array of strings depending on File_required (in the order of
|
58
|
-
# Parser_precedence)
|
59
|
-
def self.available_xml_parsers
|
60
|
-
require_parsers
|
61
|
-
parser_precedence = Parser_precedence.dup
|
62
|
-
File_required.map do |k,v|
|
63
|
-
unless $".any? {|req_file| req_file.match(v) }
|
64
|
-
parser_precedence.delete(k)
|
65
|
-
end
|
66
|
-
end
|
67
|
-
parser_precedence
|
68
|
-
end
|
69
|
-
|
70
|
-
## appends downcase to each parser type here and tries to require it
|
71
|
-
# returns all those that were required without a load error
|
72
|
-
def self.require_parse_files(base_dir)
|
73
|
-
XMLStyleParser.available_xml_parsers.select do |v|
|
74
|
-
to_require = base_dir + '/' + v.downcase
|
75
|
-
begin
|
76
|
-
require to_require
|
77
|
-
true
|
78
|
-
rescue LoadError
|
79
|
-
false
|
80
|
-
end
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
# seeks a subclass that has the public_method @method
|
85
|
-
def self.choose_parser(const, method, special_subclass=nil)
|
86
|
-
## First update @@parser_precedence to ensure we should get these guys
|
87
|
-
parser_precedence = available_xml_parsers
|
88
|
-
|
89
|
-
available_constants = parser_precedence.select do |v|
|
90
|
-
const.const_defined?(v)
|
91
|
-
end
|
92
|
-
available_subclasses = available_constants.map do |v|
|
93
|
-
const.const_get(v)
|
94
|
-
end
|
95
|
-
available = available_subclasses.select do |subclass|
|
96
|
-
subclass.public_method_defined? method
|
97
|
-
end
|
98
|
-
if special_subclass
|
99
|
-
available_special_subclasses = []
|
100
|
-
available.each do |subclass|
|
101
|
-
if subclass.const_defined?(special_subclass)
|
102
|
-
available_special_subclasses << subclass.const_get(special_subclass)
|
103
|
-
end
|
104
|
-
end
|
105
|
-
available = available_special_subclasses
|
106
|
-
end
|
107
|
-
if available.size > 0
|
108
|
-
available.first
|
109
|
-
else
|
110
|
-
warning = ""
|
111
|
-
if special_subclass
|
112
|
-
warning << "** while looking for special subclass: #{special_subclass} **\n"
|
113
|
-
end
|
114
|
-
warning << "No parser of class #{const} can parse :#{method}\n** Is 'axml' (or another xml parser) installed and working? **"
|
115
|
-
raise NoMethodError, warning
|
116
|
-
end
|
117
|
-
end
|
118
|
-
|
119
|
-
end
|
data/lib/xmlparser_wrapper.rb
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
module XMLParserWrapper
|
4
|
-
def parse_and_report(file, const, report_method=:report)
|
5
|
-
parse_and_report_string(IO.read(file), const, report_method)
|
6
|
-
end
|
7
|
-
|
8
|
-
def parse_and_report_string(string, const, report_method=:report)
|
9
|
-
parser = self.class.const_get(const).new
|
10
|
-
parser.parse(string)
|
11
|
-
parser.send(report_method)
|
12
|
-
end
|
13
|
-
|
14
|
-
def parse_and_report_io(io, const, report_method=:report)
|
15
|
-
parser = self.class.const_get(const).new
|
16
|
-
parser.parse(io)
|
17
|
-
parser.send(report_method)
|
18
|
-
end
|
19
|
-
end
|
data/release_notes.txt
DELETED
@@ -1,97 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby -w
|
2
|
-
|
3
|
-
|
4
|
-
require 'roc'
|
5
|
-
require 'optparse'
|
6
|
-
require 'generator'
|
7
|
-
|
8
|
-
$decoy = false
|
9
|
-
$base = "precision_vs_numhits"
|
10
|
-
|
11
|
-
opts = OptionParser.new do |op|
|
12
|
-
op.banner = "usage: #{File.basename(__FILE__)} smriti.csv ..."
|
13
|
-
op.separator ""
|
14
|
-
op.separator "smriti.csv = (tab delimited) prob, file:seq:charge, T/F"
|
15
|
-
op.separator ""
|
16
|
-
op.on("--decoy", "'F' indicates this is a decoy") {|v| $decoy = true }
|
17
|
-
op.on("-o", "--outfile <filename>", "base outfile name (#{$base})") {|v| $base = v}
|
18
|
-
end
|
19
|
-
|
20
|
-
opts.parse!
|
21
|
-
|
22
|
-
if ARGV.size <= 0
|
23
|
-
puts opts
|
24
|
-
exit
|
25
|
-
end
|
26
|
-
|
27
|
-
files = ARGV.to_a
|
28
|
-
|
29
|
-
xys = files.map do |file|
|
30
|
-
triplets = IO.readlines(file).reject{|v| v =~ /^#/}.map do |line|
|
31
|
-
line.chomp.split("\t")
|
32
|
-
end
|
33
|
-
|
34
|
-
# check that they're all OK:
|
35
|
-
triplets.each do |trip|
|
36
|
-
if trip.size != 3 ; abort "bad triplet" end
|
37
|
-
end
|
38
|
-
|
39
|
-
# figure out the ordering (and correct if necessary):
|
40
|
-
higher_better = triplets[0][0].to_f > triplets.last[0].to_f
|
41
|
-
|
42
|
-
doublets = triplets.map do |trip|
|
43
|
-
value = trip[0].to_f
|
44
|
-
value *= -1 if higher_better
|
45
|
-
[value, ((trip[2] == 'T') ? true : false)]
|
46
|
-
end
|
47
|
-
|
48
|
-
roc = ROC.new
|
49
|
-
|
50
|
-
(tps, fps) = roc.doublets_to_separate(doublets)
|
51
|
-
|
52
|
-
(x, y) =
|
53
|
-
if $decoy
|
54
|
-
(numhits, precision) = DecoyROC.new.pred_and_ppv(tps, fps)
|
55
|
-
[numhits, precision]
|
56
|
-
else
|
57
|
-
(numhits, precision) = roc.numhits_and_ppv(doublets)
|
58
|
-
[numhits, precision]
|
59
|
-
end
|
60
|
-
[x,y]
|
61
|
-
|
62
|
-
end
|
63
|
-
|
64
|
-
|
65
|
-
## PLOT TO to_plot
|
66
|
-
File.open( $base + ".to_plot", 'w') do |fh|
|
67
|
-
fh.puts "XYData"
|
68
|
-
fh.puts $base
|
69
|
-
fh.puts "precision vs. num hits"
|
70
|
-
fh.puts "num hits"
|
71
|
-
fh.puts "precision"
|
72
|
-
files.zip(xys) do |file,xy|
|
73
|
-
(x,y) = xy
|
74
|
-
x.unshift(0)
|
75
|
-
y.unshift(1)
|
76
|
-
fh.puts file.sub(/\.[^\.]$/,'')
|
77
|
-
fh.puts x.join(" ")
|
78
|
-
fh.puts y.join(" ")
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
File.open( $base + ".csv", 'w') do |fh|
|
83
|
-
columns = []
|
84
|
-
files.zip(xys) do |file,xy|
|
85
|
-
f = file.sub(/\.[^\.]$/,'')
|
86
|
-
(x,y) = xy
|
87
|
-
x.unshift("#Hits: #{f}")
|
88
|
-
y.unshift("Precision: #{f}")
|
89
|
-
columns << x << y
|
90
|
-
end
|
91
|
-
SyncEnumerator.new(*columns).each do |row|
|
92
|
-
fh.puts row.join("\t")
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
|
97
|
-
|
@@ -1,61 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby -w
|
2
|
-
|
3
|
-
|
4
|
-
if ARGV.size < 2
|
5
|
-
puts "usage: #{File.basename(__FILE__)} protxml pepxml"
|
6
|
-
puts "Based on some kind of truncated prot xml file, takes a pepxml file"
|
7
|
-
puts "and deletes all search hits/peptides that aren't in the prot xml file!"
|
8
|
-
exit
|
9
|
-
end
|
10
|
-
|
11
|
-
protxml = ARGV[0]
|
12
|
-
pepxml = ARGV[1]
|
13
|
-
|
14
|
-
hash = {}
|
15
|
-
File.open(protxml) do |fh|
|
16
|
-
while line = fh.gets
|
17
|
-
if line =~ /peptide_sequence="(.*?)" charge="(\d)" /
|
18
|
-
hash[[$1.dup,$2.dup]] = 1
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
p hash
|
24
|
-
|
25
|
-
out = File.open(pepxml + ".small", "w")
|
26
|
-
|
27
|
-
in_hit = false
|
28
|
-
cur_charge = nil
|
29
|
-
stored_lines = ""
|
30
|
-
print_it = false
|
31
|
-
File.open(pepxml) do |fh|
|
32
|
-
while line = fh.gets
|
33
|
-
if line =~ /<search_result .*? assumed_charge="(\d)".*?>/
|
34
|
-
cur_charge = $1.dup
|
35
|
-
in_hit = true
|
36
|
-
end
|
37
|
-
if line =~ /<search_hit .*? peptide="(.*?)"/
|
38
|
-
if hash.key?([$1.dup,cur_charge])
|
39
|
-
print_it = true
|
40
|
-
else
|
41
|
-
print_it = false
|
42
|
-
end
|
43
|
-
end
|
44
|
-
if line =~ /<\/search_result>/
|
45
|
-
if print_it == true
|
46
|
-
stored_lines << line
|
47
|
-
out.print stored_lines
|
48
|
-
end
|
49
|
-
stored_lines = ""
|
50
|
-
in_hit == false
|
51
|
-
elsif !in_hit
|
52
|
-
out.print line
|
53
|
-
else
|
54
|
-
stored_lines << line
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
|
59
|
-
end
|
60
|
-
|
61
|
-
out.close
|
@@ -1,47 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby -w
|
2
|
-
|
3
|
-
require 'fasta'
|
4
|
-
require 'sample_enzyme'
|
5
|
-
|
6
|
-
if ARGV.size < 3
|
7
|
-
puts "usage: #{File.basename(__FILE__)} min_peptide_length missed_cleavages <file>.fasta ..."
|
8
|
-
puts " returns <file>.min_pep_length_<#>.missed_cleavages_<#>.degenerate_peptides.csv"
|
9
|
-
abort
|
10
|
-
end
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
min_peptide_length = ARGV.shift.to_i
|
15
|
-
missed_cleavages = ARGV.shift.to_i
|
16
|
-
|
17
|
-
ARGV.each do |file|
|
18
|
-
hash = {}
|
19
|
-
|
20
|
-
if file !~ /\.fasta/
|
21
|
-
abort "must be a fasta file with extension fasta"
|
22
|
-
end
|
23
|
-
new_filename = file.sub(/\.fasta$/, '')
|
24
|
-
new_filename << ".min_pep_length_#{min_peptide_length}.missed_cleavages_#{missed_cleavages}.degenerate_peptides.csv"
|
25
|
-
peptides = []
|
26
|
-
Fasta.new.read_file(file).prots.each do |prot|
|
27
|
-
|
28
|
-
|
29
|
-
SampleEnzyme.tryptic(prot.aaseq, missed_cleavages).each do |aaseq|
|
30
|
-
if aaseq.size >= min_peptide_length
|
31
|
-
hash[aaseq] ||= []
|
32
|
-
hash[aaseq].push( prot.header.sub(/^>/,'') )
|
33
|
-
end
|
34
|
-
end
|
35
|
-
#fh.puts( prot.header.split(/\s+/).first.sub(/^>/,'') + "\t" + SampleEnzyme.tryptic(prot.aaseq, missed_cleavages).join(" ") )
|
36
|
-
end
|
37
|
-
|
38
|
-
File.open(new_filename, "w") do |fh|
|
39
|
-
hash.keys.sort_by {|pep| hash[pep].size }.reverse.each do |pep|
|
40
|
-
fh.puts( [pep, *(hash[pep])].join("\t") )
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|