mspire 0.4.9 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +27 -17
- data/changelog.txt +31 -62
- data/lib/ms/calc.rb +32 -0
- data/lib/ms/data/interleaved.rb +60 -0
- data/lib/ms/data/lazy_io.rb +73 -0
- data/lib/ms/data/lazy_string.rb +15 -0
- data/lib/ms/data/simple.rb +59 -0
- data/lib/ms/data/transposed.rb +41 -0
- data/lib/ms/data.rb +57 -0
- data/lib/ms/format/format_error.rb +12 -0
- data/lib/ms/spectrum.rb +25 -384
- data/lib/ms/support/binary_search.rb +126 -0
- data/lib/ms.rb +10 -10
- metadata +38 -350
- data/INSTALL +0 -58
- data/README.rdoc +0 -18
- data/Rakefile +0 -330
- data/bin/aafreqs.rb +0 -23
- data/bin/bioworks2excel.rb +0 -14
- data/bin/bioworks_to_pepxml.rb +0 -148
- data/bin/bioworks_to_pepxml_gui.rb +0 -225
- data/bin/fasta_shaker.rb +0 -5
- data/bin/filter_and_validate.rb +0 -5
- data/bin/gi2annot.rb +0 -14
- data/bin/id_class_anal.rb +0 -112
- data/bin/id_precision.rb +0 -172
- data/bin/ms_to_lmat.rb +0 -67
- data/bin/pepproph_filter.rb +0 -16
- data/bin/prob_validate.rb +0 -6
- data/bin/protein_summary.rb +0 -6
- data/bin/protxml2prots_peps.rb +0 -32
- data/bin/raw_to_mzXML.rb +0 -55
- data/bin/run_percolator.rb +0 -122
- data/bin/sqt_group.rb +0 -26
- data/bin/srf_group.rb +0 -27
- data/bin/srf_to_sqt.rb +0 -40
- data/lib/align/chams.rb +0 -78
- data/lib/align.rb +0 -154
- data/lib/archive/targz.rb +0 -94
- data/lib/bsearch.rb +0 -120
- data/lib/core_extensions.rb +0 -16
- data/lib/fasta.rb +0 -626
- data/lib/gi.rb +0 -124
- data/lib/group_by.rb +0 -10
- data/lib/index_by.rb +0 -11
- data/lib/merge_deep.rb +0 -21
- data/lib/ms/converter/mzxml.rb +0 -77
- data/lib/ms/gradient_program.rb +0 -170
- data/lib/ms/msrun.rb +0 -244
- data/lib/ms/msrun_index.rb +0 -108
- data/lib/ms/parser/mzdata/axml.rb +0 -67
- data/lib/ms/parser/mzdata/dom.rb +0 -175
- data/lib/ms/parser/mzdata/libxml.rb +0 -7
- data/lib/ms/parser/mzdata.rb +0 -31
- data/lib/ms/parser/mzxml/axml.rb +0 -70
- data/lib/ms/parser/mzxml/dom.rb +0 -182
- data/lib/ms/parser/mzxml/hpricot.rb +0 -253
- data/lib/ms/parser/mzxml/libxml.rb +0 -19
- data/lib/ms/parser/mzxml/regexp.rb +0 -122
- data/lib/ms/parser/mzxml/rexml.rb +0 -72
- data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
- data/lib/ms/parser/mzxml.rb +0 -282
- data/lib/ms/parser.rb +0 -108
- data/lib/ms/precursor.rb +0 -25
- data/lib/ms/scan.rb +0 -81
- data/lib/mspire.rb +0 -4
- data/lib/pi_zero.rb +0 -244
- data/lib/qvalue.rb +0 -161
- data/lib/roc.rb +0 -187
- data/lib/sample_enzyme.rb +0 -160
- data/lib/scan_i.rb +0 -21
- data/lib/spec_id/aa_freqs.rb +0 -170
- data/lib/spec_id/bioworks.rb +0 -497
- data/lib/spec_id/digestor.rb +0 -138
- data/lib/spec_id/mass.rb +0 -179
- data/lib/spec_id/parser/proph.rb +0 -335
- data/lib/spec_id/precision/filter/cmdline.rb +0 -218
- data/lib/spec_id/precision/filter/interactive.rb +0 -134
- data/lib/spec_id/precision/filter/output.rb +0 -148
- data/lib/spec_id/precision/filter.rb +0 -637
- data/lib/spec_id/precision/output.rb +0 -60
- data/lib/spec_id/precision/prob/cmdline.rb +0 -160
- data/lib/spec_id/precision/prob/output.rb +0 -94
- data/lib/spec_id/precision/prob.rb +0 -249
- data/lib/spec_id/proph/pep_summary.rb +0 -104
- data/lib/spec_id/proph/prot_summary.rb +0 -484
- data/lib/spec_id/proph.rb +0 -4
- data/lib/spec_id/protein_summary.rb +0 -489
- data/lib/spec_id/sequest/params.rb +0 -316
- data/lib/spec_id/sequest/pepxml.rb +0 -1458
- data/lib/spec_id/sequest.rb +0 -33
- data/lib/spec_id/sqt.rb +0 -349
- data/lib/spec_id/srf.rb +0 -973
- data/lib/spec_id.rb +0 -778
- data/lib/spec_id_xml.rb +0 -99
- data/lib/transmem/phobius.rb +0 -147
- data/lib/transmem/toppred.rb +0 -368
- data/lib/transmem.rb +0 -157
- data/lib/validator/aa.rb +0 -48
- data/lib/validator/aa_est.rb +0 -112
- data/lib/validator/background.rb +0 -77
- data/lib/validator/bias.rb +0 -95
- data/lib/validator/cmdline.rb +0 -431
- data/lib/validator/decoy.rb +0 -107
- data/lib/validator/digestion_based.rb +0 -70
- data/lib/validator/probability.rb +0 -51
- data/lib/validator/prot_from_pep.rb +0 -234
- data/lib/validator/q_value.rb +0 -32
- data/lib/validator/transmem.rb +0 -272
- data/lib/validator/true_pos.rb +0 -46
- data/lib/validator.rb +0 -197
- data/lib/xml.rb +0 -38
- data/lib/xml_style_parser.rb +0 -119
- data/lib/xmlparser_wrapper.rb +0 -19
- data/release_notes.txt +0 -2
- data/script/compile_and_plot_smriti_final.rb +0 -97
- data/script/create_little_pepxml.rb +0 -61
- data/script/degenerate_peptides.rb +0 -47
- data/script/estimate_fpr_by_cysteine.rb +0 -226
- data/script/extract_gradient_programs.rb +0 -56
- data/script/find_cysteine_background.rb +0 -137
- data/script/genuine_tps_and_probs.rb +0 -136
- data/script/get_apex_values_rexml.rb +0 -44
- data/script/histogram_probs.rb +0 -61
- data/script/mascot_fix_pepxml.rb +0 -123
- data/script/msvis.rb +0 -42
- data/script/mzXML2timeIndex.rb +0 -25
- data/script/peps_per_bin.rb +0 -67
- data/script/prep_dir.rb +0 -121
- data/script/simple_protein_digestion.rb +0 -27
- data/script/smriti_final_analysis.rb +0 -103
- data/script/sqt_to_meta.rb +0 -24
- data/script/top_hit_per_scan.rb +0 -67
- data/script/toppred_to_yaml.rb +0 -47
- data/script/tpp_installer.rb +0 -249
- data/specs/align_spec.rb +0 -79
- data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
- data/specs/bin/fasta_shaker_spec.rb +0 -259
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
- data/specs/bin/filter_and_validate_spec.rb +0 -180
- data/specs/bin/ms_to_lmat_spec.rb +0 -34
- data/specs/bin/prob_validate_spec.rb +0 -86
- data/specs/bin/protein_summary_spec.rb +0 -14
- data/specs/fasta_spec.rb +0 -354
- data/specs/gi_spec.rb +0 -22
- data/specs/load_bin_path.rb +0 -7
- data/specs/merge_deep_spec.rb +0 -13
- data/specs/ms/gradient_program_spec.rb +0 -77
- data/specs/ms/msrun_spec.rb +0 -498
- data/specs/ms/parser_spec.rb +0 -92
- data/specs/ms/spectrum_spec.rb +0 -87
- data/specs/pi_zero_spec.rb +0 -115
- data/specs/qvalue_spec.rb +0 -39
- data/specs/roc_spec.rb +0 -251
- data/specs/rspec_autotest.rb +0 -149
- data/specs/sample_enzyme_spec.rb +0 -126
- data/specs/spec_helper.rb +0 -135
- data/specs/spec_id/aa_freqs_spec.rb +0 -52
- data/specs/spec_id/bioworks_spec.rb +0 -148
- data/specs/spec_id/digestor_spec.rb +0 -75
- data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
- data/specs/spec_id/precision/filter/output_spec.rb +0 -31
- data/specs/spec_id/precision/filter_spec.rb +0 -246
- data/specs/spec_id/precision/prob_spec.rb +0 -44
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
- data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
- data/specs/spec_id/protein_summary_spec.rb +0 -189
- data/specs/spec_id/sequest/params_spec.rb +0 -68
- data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
- data/specs/spec_id/sequest_spec.rb +0 -38
- data/specs/spec_id/sqt_spec.rb +0 -246
- data/specs/spec_id/srf_spec.rb +0 -172
- data/specs/spec_id/srf_spec_helper.rb +0 -139
- data/specs/spec_id_helper.rb +0 -33
- data/specs/spec_id_spec.rb +0 -366
- data/specs/spec_id_xml_spec.rb +0 -33
- data/specs/transmem/phobius_spec.rb +0 -425
- data/specs/transmem/toppred_spec.rb +0 -298
- data/specs/transmem_spec.rb +0 -60
- data/specs/transmem_spec_shared.rb +0 -64
- data/specs/validator/aa_est_spec.rb +0 -66
- data/specs/validator/aa_spec.rb +0 -40
- data/specs/validator/background_spec.rb +0 -67
- data/specs/validator/bias_spec.rb +0 -122
- data/specs/validator/decoy_spec.rb +0 -51
- data/specs/validator/fasta_helper.rb +0 -26
- data/specs/validator/prot_from_pep_spec.rb +0 -141
- data/specs/validator/transmem_spec.rb +0 -146
- data/specs/validator/true_pos_spec.rb +0 -58
- data/specs/validator_helper.rb +0 -33
- data/specs/xml_spec.rb +0 -12
- data/test_files/000_pepxml18_small.xml +0 -206
- data/test_files/020a.mzXML.timeIndex +0 -4710
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
- data/test_files/4-03-03_small-prot.xml +0 -321
- data/test_files/4-03-03_small.xml +0 -3876
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +0 -5999
- data/test_files/bioworks31.params +0 -77
- data/test_files/bioworks32.params +0 -62
- data/test_files/bioworks33.params +0 -63
- data/test_files/bioworks_single_run_small.xml +0 -7237
- data/test_files/bioworks_small.fasta +0 -212
- data/test_files/bioworks_small.params +0 -63
- data/test_files/bioworks_small.phobius +0 -109
- data/test_files/bioworks_small.toppred.out +0 -2847
- data/test_files/bioworks_small.xml +0 -5610
- data/test_files/bioworks_with_INV_small.xml +0 -3753
- data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +0 -304
- data/test_files/messups.fasta +0 -297
- data/test_files/opd1/000.my_answer.100lines.xml +0 -101
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
- data/test_files/opd1/000_020_3prots-prot.xml +0 -62
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
- data/test_files/opd1/sequest.3.1.params +0 -77
- data/test_files/opd1/sequest.3.2.params +0 -62
- data/test_files/opd1/twenty_scans.mzXML +0 -418
- data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +0 -9
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
- data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
- data/test_files/pepproph_small.xml +0 -4691
- data/test_files/phobius.small.noheader.txt +0 -50
- data/test_files/phobius.small.small.txt +0 -53
- data/test_files/s01_anC1_ld020mM.key.txt +0 -25
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +0 -297
- data/test_files/small.sqt +0 -87
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +0 -14340
- data/test_files/tf_bioworks2excel.txt.actual +0 -1035
- data/test_files/toppred.small.out +0 -416
- data/test_files/toppred.xml.out +0 -318
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
- data/test_files/yeast_gly_small-prot.xml +0 -265
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
- data/test_files/yeast_gly_small.xml +0 -3807
- data/test_files/yeast_gly_small2.parentTimes +0 -6
data/lib/validator.rb
DELETED
|
@@ -1,197 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
class Validator
|
|
3
|
-
|
|
4
|
-
# in the absence of digestion, does the spec_id type requires pephits for
|
|
5
|
-
# validation?
|
|
6
|
-
def self.requires_pephits?(spec_id_obj)
|
|
7
|
-
case spec_id_obj
|
|
8
|
-
when Proph::ProtSummary : true
|
|
9
|
-
when Proph::PepSummary : true
|
|
10
|
-
when SQTGroup : true
|
|
11
|
-
else ; false
|
|
12
|
-
end
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
Validator_to_string = {
|
|
16
|
-
'Validator::AA' => 'badAA',
|
|
17
|
-
'Validator::AAEst' => 'badAAEst',
|
|
18
|
-
'Validator::Decoy' => 'decoy',
|
|
19
|
-
'Validator::Transmem::Protein' => 'tmm',
|
|
20
|
-
'Validator::TruePos' => 'tps',
|
|
21
|
-
'Validator::Bias' => 'bias',
|
|
22
|
-
'Validator::Probability' => 'prob',
|
|
23
|
-
'Validator::QValue' => 'qval',
|
|
24
|
-
:bad_aa => 'badAA',
|
|
25
|
-
:bad_aa_est => 'badAAEst',
|
|
26
|
-
:decoy => 'decoy',
|
|
27
|
-
:tmm => 'tmm',
|
|
28
|
-
:tps => 'tps',
|
|
29
|
-
:bias => 'bias',
|
|
30
|
-
:prob => 'prob',
|
|
31
|
-
:qval => 'qval',
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
def initialize_increment
|
|
35
|
-
@increment_tps = 0
|
|
36
|
-
@increment_fps = 0
|
|
37
|
-
@increment_total_submitted = 0
|
|
38
|
-
@increment_initialized = true
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
# if adding pephits in groups at a time, the entire group does not need to be
|
|
42
|
-
# queried, just the individual hit. Use this OR pephits_precision (NOT
|
|
43
|
-
# both). The initial query to this method will begin a running tally that
|
|
44
|
-
# is saved by the validator.
|
|
45
|
-
# takes either an array or a single pephit (determined by if it is a
|
|
46
|
-
# SpecID::Pep)
|
|
47
|
-
def increment_pephits_precision(peps)
|
|
48
|
-
tmp = $VERBOSE; $VERBOSE = nil
|
|
49
|
-
initialize_increment unless @increment_initialized
|
|
50
|
-
$VERBOSE = tmp
|
|
51
|
-
|
|
52
|
-
to_submit =
|
|
53
|
-
if peps.is_a? SpecID::Pep
|
|
54
|
-
[peps]
|
|
55
|
-
else
|
|
56
|
-
peps
|
|
57
|
-
end
|
|
58
|
-
@increment_total_submitted += to_submit.size
|
|
59
|
-
(tps, fps) = partition(to_submit)
|
|
60
|
-
@increment_tps += tps.size
|
|
61
|
-
@increment_fps += fps.size
|
|
62
|
-
(num_tps, num_fps) =
|
|
63
|
-
if self.respond_to?(:calc_precision_prep) # for digestion based validators
|
|
64
|
-
(num_tps, num_fps) = calc_precision_prep(@increment_tps, @increment_fps)
|
|
65
|
-
[num_tps, num_fps]
|
|
66
|
-
else
|
|
67
|
-
[@increment_tps, @increment_fps]
|
|
68
|
-
end
|
|
69
|
-
calc_precision(num_tps, num_fps)
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
# returns an adjusted false positive rate (a float not to drop below 0.0)
|
|
74
|
-
# based on a background of 'false'-false positive hits to total hits. Also
|
|
75
|
-
# sets the @calculated_background attribute. Accepts floats or ints
|
|
76
|
-
def adjust_fps_for_background(num_tps, num_fps, background)
|
|
77
|
-
num_fps = num_fps.to_f
|
|
78
|
-
total_peps = num_tps + num_fps
|
|
79
|
-
@calculated_background = num_fps / total_peps
|
|
80
|
-
num_fps -= (total_peps.to_f * background)
|
|
81
|
-
num_fps = 0.0 if num_fps < 0.0
|
|
82
|
-
num_fps
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
# copied from libjtp: vec
|
|
86
|
-
# returns the mean and std_dev
|
|
87
|
-
def sample_stats(array)
|
|
88
|
-
_len = array.size
|
|
89
|
-
_sum = 0.0
|
|
90
|
-
_sum_sq = 0.0
|
|
91
|
-
array.each do |val|
|
|
92
|
-
_sum += val
|
|
93
|
-
_sum_sq += val * val
|
|
94
|
-
end
|
|
95
|
-
std_dev = _sum_sq - ((_sum * _sum)/_len)
|
|
96
|
-
std_dev /= ( (_len > 1) ? (_len-1) : 1 )
|
|
97
|
-
# on occasion, a very small negative number occurs
|
|
98
|
-
if std_dev < 0.0
|
|
99
|
-
std_dev = 0.0
|
|
100
|
-
else
|
|
101
|
-
std_dev = Math.sqrt(std_dev)
|
|
102
|
-
end
|
|
103
|
-
mean = _sum.to_f/_len
|
|
104
|
-
[mean, std_dev]
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
# takes an array of validators and returns a fresh array where each has been
|
|
108
|
-
# turned into a sensible hash (with symbols as the keys!)
|
|
109
|
-
def self.sensible_validator_hashes(validators)
|
|
110
|
-
validators.map do |val|
|
|
111
|
-
hash = {}
|
|
112
|
-
case val
|
|
113
|
-
when Validator::TruePos
|
|
114
|
-
hash.merge( {:correct_wins => val.correct_wins, :file => val.fasta.filename } )
|
|
115
|
-
when Validator::AAEst
|
|
116
|
-
%w(frequency background calculated_background).each do |cat|
|
|
117
|
-
hash[cat.to_sym] = val.send(cat.to_sym)
|
|
118
|
-
end
|
|
119
|
-
when Validator::AA
|
|
120
|
-
%w(false_to_total_ratio background calculated_background).each do |cat|
|
|
121
|
-
hash[cat.to_sym] = val.send(cat.to_sym)
|
|
122
|
-
end
|
|
123
|
-
when Validator::Decoy
|
|
124
|
-
%w(pi_zero correct_wins decoy_on_match).each do |cat|
|
|
125
|
-
hash[cat.to_sym] = val.send(cat.to_sym)
|
|
126
|
-
end
|
|
127
|
-
hash[:constraint] = val.constraint.inspect if val.constraint
|
|
128
|
-
when Validator::Bias
|
|
129
|
-
%w(correct_wins proteins_expected background calculated_background false_to_total_ratio).each do |cat|
|
|
130
|
-
hash[cat.to_sym] = val.send(cat.to_sym)
|
|
131
|
-
end
|
|
132
|
-
hash[:file] = val.fasta.filename
|
|
133
|
-
when Validator::Transmem::Protein
|
|
134
|
-
%w(false_to_total_ratio min_num_tms soluble_fraction correct_wins no_include_tm_peps background calculated_background transmem_file).each do |cat|
|
|
135
|
-
hash[cat.to_sym] = val.send(cat.to_sym)
|
|
136
|
-
end
|
|
137
|
-
when Validator::Probability
|
|
138
|
-
%w(prob_method).each do |cat|
|
|
139
|
-
hash[cat.to_sym] = val.send(cat.to_sym)
|
|
140
|
-
end
|
|
141
|
-
when Validator::QValue
|
|
142
|
-
# no params to add
|
|
143
|
-
else ; raise ArgumentError, "Don't know the validator class #{val}"
|
|
144
|
-
end
|
|
145
|
-
klass_as_s = val.class.to_s
|
|
146
|
-
hash[:type] = Validator_to_string[klass_as_s]
|
|
147
|
-
hash[:class] = klass_as_s
|
|
148
|
-
hash
|
|
149
|
-
end
|
|
150
|
-
end
|
|
151
|
-
end
|
|
152
|
-
|
|
153
|
-
module Precision::Calculator
|
|
154
|
-
# calculates precision by the assumption that the first group are all true
|
|
155
|
-
# hits and the second are all false hits
|
|
156
|
-
# (0,0) is returned as 1.0
|
|
157
|
-
def calc_precision(num_true_hits, num_false_hits)
|
|
158
|
-
if ((num_true_hits.to_f == 0.0) && (num_false_hits.to_f == 0.0))
|
|
159
|
-
1.0
|
|
160
|
-
else
|
|
161
|
-
num_true_hits.to_f / (num_true_hits.to_f + num_false_hits.to_f)
|
|
162
|
-
end
|
|
163
|
-
end
|
|
164
|
-
end
|
|
165
|
-
|
|
166
|
-
# will calculate precision for groups of proteins where the first group are
|
|
167
|
-
# normal hits (which may be true or false) and the second are decoy hits.
|
|
168
|
-
# edge case: if num_normal.to_f == 0.0 then if num_decoy.to_f > 0 ; 0, else 1
|
|
169
|
-
module Precision::Calculator::Decoy
|
|
170
|
-
def calc_precision(num_normal, num_decoy, frit=1.0)
|
|
171
|
-
# will calculate as floats in case fractional amounts passed in for
|
|
172
|
-
# whatever reason
|
|
173
|
-
num_normal_f = num_normal.to_f
|
|
174
|
-
num_true_pos = num_normal_f - (num_decoy.to_f * frit)
|
|
175
|
-
precision =
|
|
176
|
-
if num_normal_f == 0.0
|
|
177
|
-
if num_decoy.to_f > 0.0
|
|
178
|
-
0.0
|
|
179
|
-
else
|
|
180
|
-
1.0
|
|
181
|
-
end
|
|
182
|
-
else
|
|
183
|
-
num_true_pos/num_normal_f
|
|
184
|
-
end
|
|
185
|
-
end
|
|
186
|
-
end
|
|
187
|
-
|
|
188
|
-
#require 'validator/true_pos'
|
|
189
|
-
#require 'validator/aa'
|
|
190
|
-
#require 'validator/aa_est'
|
|
191
|
-
#require 'validator/bias'
|
|
192
|
-
#require 'validator/decoy'
|
|
193
|
-
#require 'validator/transmem'
|
|
194
|
-
#require 'validator/probability'
|
|
195
|
-
#require 'validator/q_value'
|
|
196
|
-
#require 'validator/prot_from_pep'
|
|
197
|
-
|
data/lib/xml.rb
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
module XML
|
|
3
|
-
HourMinuteMatch = /[MH]/o
|
|
4
|
-
# returns a float object of seconds
|
|
5
|
-
# doesn't support year month, etc, yet
|
|
6
|
-
def self.duration_to_seconds(string)
|
|
7
|
-
case x = string[0,2]
|
|
8
|
-
when 'PT'
|
|
9
|
-
rest = string[2..-1]
|
|
10
|
-
# usually it will be this 'PT1.223434S':
|
|
11
|
-
if rest !~ HourMinuteMatch
|
|
12
|
-
rest[0...-1].to_f
|
|
13
|
-
else
|
|
14
|
-
addit = ''
|
|
15
|
-
total_secs = 0
|
|
16
|
-
total_secs_as_float = nil
|
|
17
|
-
rest.split('').each do |let|
|
|
18
|
-
case let
|
|
19
|
-
when 'H'
|
|
20
|
-
total_secs += addit.to_i * 3600
|
|
21
|
-
addit = ''
|
|
22
|
-
when 'M'
|
|
23
|
-
total_secs += addit.to_i * 60
|
|
24
|
-
addit = ''
|
|
25
|
-
when 'S'
|
|
26
|
-
total_secs_as_float = total_secs.to_f
|
|
27
|
-
total_secs_as_float += addit.to_f
|
|
28
|
-
else
|
|
29
|
-
addit << let
|
|
30
|
-
end
|
|
31
|
-
end
|
|
32
|
-
total_secs_as_float
|
|
33
|
-
end
|
|
34
|
-
else
|
|
35
|
-
abort 'need to include support for other durations'
|
|
36
|
-
end
|
|
37
|
-
end
|
|
38
|
-
end
|
data/lib/xml_style_parser.rb
DELETED
|
@@ -1,119 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
module XMLStyleParser
|
|
3
|
-
@done_once = nil
|
|
4
|
-
|
|
5
|
-
Parser_precedence = %w(AXML LibXML XMLParser Regexp REXML)
|
|
6
|
-
# currently AXML requires 'xmlparser' to be installed.... (may not always be
|
|
7
|
-
# the case...)
|
|
8
|
-
File_required = {'AXML' => /^axml/, 'LibXML' => /^xml\/libxml/, 'XMLParser' => /^xmlparser/}
|
|
9
|
-
|
|
10
|
-
# the method that the parser will call on the given file at parse!
|
|
11
|
-
attr_accessor :method
|
|
12
|
-
|
|
13
|
-
# parses the given file by sending to @method
|
|
14
|
-
def parse(file, opts={})
|
|
15
|
-
if respond_to? @method
|
|
16
|
-
send(@method, file, opts)
|
|
17
|
-
else
|
|
18
|
-
raise NoMethodError, "Parser of class #{self.class} can't parse #{@method} yet"
|
|
19
|
-
end
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
# XMLParser and xml/libxml are incompatible, so if xmlparser is available,
|
|
23
|
-
# libxml will not be loaded (XMLParser#parse is clobbered by
|
|
24
|
-
# XML::Parser#parse [don't ask me why])
|
|
25
|
-
def self.require_parsers
|
|
26
|
-
if !@done_once
|
|
27
|
-
have_xmlparser = false
|
|
28
|
-
begin
|
|
29
|
-
require 'xmlparser'
|
|
30
|
-
puts "Loaded XMLParser" if $VERBOSE
|
|
31
|
-
have_xmlparser = true
|
|
32
|
-
rescue LoadError
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
begin
|
|
36
|
-
require 'axml'
|
|
37
|
-
puts "Loaded AXML" if $VERBOSE
|
|
38
|
-
rescue LoadError
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
begin
|
|
42
|
-
unless have_xmlparser
|
|
43
|
-
require 'xml/libxml'
|
|
44
|
-
puts "Loaded xml/libxml" if $VERBOSE
|
|
45
|
-
################################################################
|
|
46
|
-
# IMPORTANT!
|
|
47
|
-
# This magic line makes the parser behave like it ought to!!
|
|
48
|
-
XML::Parser.default_keep_blanks = false
|
|
49
|
-
################################################################
|
|
50
|
-
end
|
|
51
|
-
rescue LoadError
|
|
52
|
-
end
|
|
53
|
-
end
|
|
54
|
-
@done_once = true
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
# returns an array of strings depending on File_required (in the order of
|
|
58
|
-
# Parser_precedence)
|
|
59
|
-
def self.available_xml_parsers
|
|
60
|
-
require_parsers
|
|
61
|
-
parser_precedence = Parser_precedence.dup
|
|
62
|
-
File_required.map do |k,v|
|
|
63
|
-
unless $".any? {|req_file| req_file.match(v) }
|
|
64
|
-
parser_precedence.delete(k)
|
|
65
|
-
end
|
|
66
|
-
end
|
|
67
|
-
parser_precedence
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
## appends downcase to each parser type here and tries to require it
|
|
71
|
-
# returns all those that were required without a load error
|
|
72
|
-
def self.require_parse_files(base_dir)
|
|
73
|
-
XMLStyleParser.available_xml_parsers.select do |v|
|
|
74
|
-
to_require = base_dir + '/' + v.downcase
|
|
75
|
-
begin
|
|
76
|
-
require to_require
|
|
77
|
-
true
|
|
78
|
-
rescue LoadError
|
|
79
|
-
false
|
|
80
|
-
end
|
|
81
|
-
end
|
|
82
|
-
end
|
|
83
|
-
|
|
84
|
-
# seeks a subclass that has the public_method @method
|
|
85
|
-
def self.choose_parser(const, method, special_subclass=nil)
|
|
86
|
-
## First update @@parser_precedence to ensure we should get these guys
|
|
87
|
-
parser_precedence = available_xml_parsers
|
|
88
|
-
|
|
89
|
-
available_constants = parser_precedence.select do |v|
|
|
90
|
-
const.const_defined?(v)
|
|
91
|
-
end
|
|
92
|
-
available_subclasses = available_constants.map do |v|
|
|
93
|
-
const.const_get(v)
|
|
94
|
-
end
|
|
95
|
-
available = available_subclasses.select do |subclass|
|
|
96
|
-
subclass.public_method_defined? method
|
|
97
|
-
end
|
|
98
|
-
if special_subclass
|
|
99
|
-
available_special_subclasses = []
|
|
100
|
-
available.each do |subclass|
|
|
101
|
-
if subclass.const_defined?(special_subclass)
|
|
102
|
-
available_special_subclasses << subclass.const_get(special_subclass)
|
|
103
|
-
end
|
|
104
|
-
end
|
|
105
|
-
available = available_special_subclasses
|
|
106
|
-
end
|
|
107
|
-
if available.size > 0
|
|
108
|
-
available.first
|
|
109
|
-
else
|
|
110
|
-
warning = ""
|
|
111
|
-
if special_subclass
|
|
112
|
-
warning << "** while looking for special subclass: #{special_subclass} **\n"
|
|
113
|
-
end
|
|
114
|
-
warning << "No parser of class #{const} can parse :#{method}\n** Is 'axml' (or another xml parser) installed and working? **"
|
|
115
|
-
raise NoMethodError, warning
|
|
116
|
-
end
|
|
117
|
-
end
|
|
118
|
-
|
|
119
|
-
end
|
data/lib/xmlparser_wrapper.rb
DELETED
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
module XMLParserWrapper
|
|
4
|
-
def parse_and_report(file, const, report_method=:report)
|
|
5
|
-
parse_and_report_string(IO.read(file), const, report_method)
|
|
6
|
-
end
|
|
7
|
-
|
|
8
|
-
def parse_and_report_string(string, const, report_method=:report)
|
|
9
|
-
parser = self.class.const_get(const).new
|
|
10
|
-
parser.parse(string)
|
|
11
|
-
parser.send(report_method)
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
def parse_and_report_io(io, const, report_method=:report)
|
|
15
|
-
parser = self.class.const_get(const).new
|
|
16
|
-
parser.parse(io)
|
|
17
|
-
parser.send(report_method)
|
|
18
|
-
end
|
|
19
|
-
end
|
data/release_notes.txt
DELETED
|
@@ -1,97 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/ruby -w
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
require 'roc'
|
|
5
|
-
require 'optparse'
|
|
6
|
-
require 'generator'
|
|
7
|
-
|
|
8
|
-
$decoy = false
|
|
9
|
-
$base = "precision_vs_numhits"
|
|
10
|
-
|
|
11
|
-
opts = OptionParser.new do |op|
|
|
12
|
-
op.banner = "usage: #{File.basename(__FILE__)} smriti.csv ..."
|
|
13
|
-
op.separator ""
|
|
14
|
-
op.separator "smriti.csv = (tab delimited) prob, file:seq:charge, T/F"
|
|
15
|
-
op.separator ""
|
|
16
|
-
op.on("--decoy", "'F' indicates this is a decoy") {|v| $decoy = true }
|
|
17
|
-
op.on("-o", "--outfile <filename>", "base outfile name (#{$base})") {|v| $base = v}
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
opts.parse!
|
|
21
|
-
|
|
22
|
-
if ARGV.size <= 0
|
|
23
|
-
puts opts
|
|
24
|
-
exit
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
files = ARGV.to_a
|
|
28
|
-
|
|
29
|
-
xys = files.map do |file|
|
|
30
|
-
triplets = IO.readlines(file).reject{|v| v =~ /^#/}.map do |line|
|
|
31
|
-
line.chomp.split("\t")
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
# check that they're all OK:
|
|
35
|
-
triplets.each do |trip|
|
|
36
|
-
if trip.size != 3 ; abort "bad triplet" end
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
# figure out the ordering (and correct if necessary):
|
|
40
|
-
higher_better = triplets[0][0].to_f > triplets.last[0].to_f
|
|
41
|
-
|
|
42
|
-
doublets = triplets.map do |trip|
|
|
43
|
-
value = trip[0].to_f
|
|
44
|
-
value *= -1 if higher_better
|
|
45
|
-
[value, ((trip[2] == 'T') ? true : false)]
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
roc = ROC.new
|
|
49
|
-
|
|
50
|
-
(tps, fps) = roc.doublets_to_separate(doublets)
|
|
51
|
-
|
|
52
|
-
(x, y) =
|
|
53
|
-
if $decoy
|
|
54
|
-
(numhits, precision) = DecoyROC.new.pred_and_ppv(tps, fps)
|
|
55
|
-
[numhits, precision]
|
|
56
|
-
else
|
|
57
|
-
(numhits, precision) = roc.numhits_and_ppv(doublets)
|
|
58
|
-
[numhits, precision]
|
|
59
|
-
end
|
|
60
|
-
[x,y]
|
|
61
|
-
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
## PLOT TO to_plot
|
|
66
|
-
File.open( $base + ".to_plot", 'w') do |fh|
|
|
67
|
-
fh.puts "XYData"
|
|
68
|
-
fh.puts $base
|
|
69
|
-
fh.puts "precision vs. num hits"
|
|
70
|
-
fh.puts "num hits"
|
|
71
|
-
fh.puts "precision"
|
|
72
|
-
files.zip(xys) do |file,xy|
|
|
73
|
-
(x,y) = xy
|
|
74
|
-
x.unshift(0)
|
|
75
|
-
y.unshift(1)
|
|
76
|
-
fh.puts file.sub(/\.[^\.]$/,'')
|
|
77
|
-
fh.puts x.join(" ")
|
|
78
|
-
fh.puts y.join(" ")
|
|
79
|
-
end
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
File.open( $base + ".csv", 'w') do |fh|
|
|
83
|
-
columns = []
|
|
84
|
-
files.zip(xys) do |file,xy|
|
|
85
|
-
f = file.sub(/\.[^\.]$/,'')
|
|
86
|
-
(x,y) = xy
|
|
87
|
-
x.unshift("#Hits: #{f}")
|
|
88
|
-
y.unshift("Precision: #{f}")
|
|
89
|
-
columns << x << y
|
|
90
|
-
end
|
|
91
|
-
SyncEnumerator.new(*columns).each do |row|
|
|
92
|
-
fh.puts row.join("\t")
|
|
93
|
-
end
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/ruby -w
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
if ARGV.size < 2
|
|
5
|
-
puts "usage: #{File.basename(__FILE__)} protxml pepxml"
|
|
6
|
-
puts "Based on some kind of truncated prot xml file, takes a pepxml file"
|
|
7
|
-
puts "and deletes all search hits/peptides that aren't in the prot xml file!"
|
|
8
|
-
exit
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
protxml = ARGV[0]
|
|
12
|
-
pepxml = ARGV[1]
|
|
13
|
-
|
|
14
|
-
hash = {}
|
|
15
|
-
File.open(protxml) do |fh|
|
|
16
|
-
while line = fh.gets
|
|
17
|
-
if line =~ /peptide_sequence="(.*?)" charge="(\d)" /
|
|
18
|
-
hash[[$1.dup,$2.dup]] = 1
|
|
19
|
-
end
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
p hash
|
|
24
|
-
|
|
25
|
-
out = File.open(pepxml + ".small", "w")
|
|
26
|
-
|
|
27
|
-
in_hit = false
|
|
28
|
-
cur_charge = nil
|
|
29
|
-
stored_lines = ""
|
|
30
|
-
print_it = false
|
|
31
|
-
File.open(pepxml) do |fh|
|
|
32
|
-
while line = fh.gets
|
|
33
|
-
if line =~ /<search_result .*? assumed_charge="(\d)".*?>/
|
|
34
|
-
cur_charge = $1.dup
|
|
35
|
-
in_hit = true
|
|
36
|
-
end
|
|
37
|
-
if line =~ /<search_hit .*? peptide="(.*?)"/
|
|
38
|
-
if hash.key?([$1.dup,cur_charge])
|
|
39
|
-
print_it = true
|
|
40
|
-
else
|
|
41
|
-
print_it = false
|
|
42
|
-
end
|
|
43
|
-
end
|
|
44
|
-
if line =~ /<\/search_result>/
|
|
45
|
-
if print_it == true
|
|
46
|
-
stored_lines << line
|
|
47
|
-
out.print stored_lines
|
|
48
|
-
end
|
|
49
|
-
stored_lines = ""
|
|
50
|
-
in_hit == false
|
|
51
|
-
elsif !in_hit
|
|
52
|
-
out.print line
|
|
53
|
-
else
|
|
54
|
-
stored_lines << line
|
|
55
|
-
end
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
out.close
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/ruby -w
|
|
2
|
-
|
|
3
|
-
require 'fasta'
|
|
4
|
-
require 'sample_enzyme'
|
|
5
|
-
|
|
6
|
-
if ARGV.size < 3
|
|
7
|
-
puts "usage: #{File.basename(__FILE__)} min_peptide_length missed_cleavages <file>.fasta ..."
|
|
8
|
-
puts " returns <file>.min_pep_length_<#>.missed_cleavages_<#>.degenerate_peptides.csv"
|
|
9
|
-
abort
|
|
10
|
-
end
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
min_peptide_length = ARGV.shift.to_i
|
|
15
|
-
missed_cleavages = ARGV.shift.to_i
|
|
16
|
-
|
|
17
|
-
ARGV.each do |file|
|
|
18
|
-
hash = {}
|
|
19
|
-
|
|
20
|
-
if file !~ /\.fasta/
|
|
21
|
-
abort "must be a fasta file with extension fasta"
|
|
22
|
-
end
|
|
23
|
-
new_filename = file.sub(/\.fasta$/, '')
|
|
24
|
-
new_filename << ".min_pep_length_#{min_peptide_length}.missed_cleavages_#{missed_cleavages}.degenerate_peptides.csv"
|
|
25
|
-
peptides = []
|
|
26
|
-
Fasta.new.read_file(file).prots.each do |prot|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
SampleEnzyme.tryptic(prot.aaseq, missed_cleavages).each do |aaseq|
|
|
30
|
-
if aaseq.size >= min_peptide_length
|
|
31
|
-
hash[aaseq] ||= []
|
|
32
|
-
hash[aaseq].push( prot.header.sub(/^>/,'') )
|
|
33
|
-
end
|
|
34
|
-
end
|
|
35
|
-
#fh.puts( prot.header.split(/\s+/).first.sub(/^>/,'') + "\t" + SampleEnzyme.tryptic(prot.aaseq, missed_cleavages).join(" ") )
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
File.open(new_filename, "w") do |fh|
|
|
39
|
-
hash.keys.sort_by {|pep| hash[pep].size }.reverse.each do |pep|
|
|
40
|
-
fh.puts( [pep, *(hash[pep])].join("\t") )
|
|
41
|
-
end
|
|
42
|
-
end
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|