mspire 0.4.9 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +27 -17
- data/changelog.txt +31 -62
- data/lib/ms/calc.rb +32 -0
- data/lib/ms/data/interleaved.rb +60 -0
- data/lib/ms/data/lazy_io.rb +73 -0
- data/lib/ms/data/lazy_string.rb +15 -0
- data/lib/ms/data/simple.rb +59 -0
- data/lib/ms/data/transposed.rb +41 -0
- data/lib/ms/data.rb +57 -0
- data/lib/ms/format/format_error.rb +12 -0
- data/lib/ms/spectrum.rb +25 -384
- data/lib/ms/support/binary_search.rb +126 -0
- data/lib/ms.rb +10 -10
- metadata +38 -350
- data/INSTALL +0 -58
- data/README.rdoc +0 -18
- data/Rakefile +0 -330
- data/bin/aafreqs.rb +0 -23
- data/bin/bioworks2excel.rb +0 -14
- data/bin/bioworks_to_pepxml.rb +0 -148
- data/bin/bioworks_to_pepxml_gui.rb +0 -225
- data/bin/fasta_shaker.rb +0 -5
- data/bin/filter_and_validate.rb +0 -5
- data/bin/gi2annot.rb +0 -14
- data/bin/id_class_anal.rb +0 -112
- data/bin/id_precision.rb +0 -172
- data/bin/ms_to_lmat.rb +0 -67
- data/bin/pepproph_filter.rb +0 -16
- data/bin/prob_validate.rb +0 -6
- data/bin/protein_summary.rb +0 -6
- data/bin/protxml2prots_peps.rb +0 -32
- data/bin/raw_to_mzXML.rb +0 -55
- data/bin/run_percolator.rb +0 -122
- data/bin/sqt_group.rb +0 -26
- data/bin/srf_group.rb +0 -27
- data/bin/srf_to_sqt.rb +0 -40
- data/lib/align/chams.rb +0 -78
- data/lib/align.rb +0 -154
- data/lib/archive/targz.rb +0 -94
- data/lib/bsearch.rb +0 -120
- data/lib/core_extensions.rb +0 -16
- data/lib/fasta.rb +0 -626
- data/lib/gi.rb +0 -124
- data/lib/group_by.rb +0 -10
- data/lib/index_by.rb +0 -11
- data/lib/merge_deep.rb +0 -21
- data/lib/ms/converter/mzxml.rb +0 -77
- data/lib/ms/gradient_program.rb +0 -170
- data/lib/ms/msrun.rb +0 -244
- data/lib/ms/msrun_index.rb +0 -108
- data/lib/ms/parser/mzdata/axml.rb +0 -67
- data/lib/ms/parser/mzdata/dom.rb +0 -175
- data/lib/ms/parser/mzdata/libxml.rb +0 -7
- data/lib/ms/parser/mzdata.rb +0 -31
- data/lib/ms/parser/mzxml/axml.rb +0 -70
- data/lib/ms/parser/mzxml/dom.rb +0 -182
- data/lib/ms/parser/mzxml/hpricot.rb +0 -253
- data/lib/ms/parser/mzxml/libxml.rb +0 -19
- data/lib/ms/parser/mzxml/regexp.rb +0 -122
- data/lib/ms/parser/mzxml/rexml.rb +0 -72
- data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
- data/lib/ms/parser/mzxml.rb +0 -282
- data/lib/ms/parser.rb +0 -108
- data/lib/ms/precursor.rb +0 -25
- data/lib/ms/scan.rb +0 -81
- data/lib/mspire.rb +0 -4
- data/lib/pi_zero.rb +0 -244
- data/lib/qvalue.rb +0 -161
- data/lib/roc.rb +0 -187
- data/lib/sample_enzyme.rb +0 -160
- data/lib/scan_i.rb +0 -21
- data/lib/spec_id/aa_freqs.rb +0 -170
- data/lib/spec_id/bioworks.rb +0 -497
- data/lib/spec_id/digestor.rb +0 -138
- data/lib/spec_id/mass.rb +0 -179
- data/lib/spec_id/parser/proph.rb +0 -335
- data/lib/spec_id/precision/filter/cmdline.rb +0 -218
- data/lib/spec_id/precision/filter/interactive.rb +0 -134
- data/lib/spec_id/precision/filter/output.rb +0 -148
- data/lib/spec_id/precision/filter.rb +0 -637
- data/lib/spec_id/precision/output.rb +0 -60
- data/lib/spec_id/precision/prob/cmdline.rb +0 -160
- data/lib/spec_id/precision/prob/output.rb +0 -94
- data/lib/spec_id/precision/prob.rb +0 -249
- data/lib/spec_id/proph/pep_summary.rb +0 -104
- data/lib/spec_id/proph/prot_summary.rb +0 -484
- data/lib/spec_id/proph.rb +0 -4
- data/lib/spec_id/protein_summary.rb +0 -489
- data/lib/spec_id/sequest/params.rb +0 -316
- data/lib/spec_id/sequest/pepxml.rb +0 -1458
- data/lib/spec_id/sequest.rb +0 -33
- data/lib/spec_id/sqt.rb +0 -349
- data/lib/spec_id/srf.rb +0 -973
- data/lib/spec_id.rb +0 -778
- data/lib/spec_id_xml.rb +0 -99
- data/lib/transmem/phobius.rb +0 -147
- data/lib/transmem/toppred.rb +0 -368
- data/lib/transmem.rb +0 -157
- data/lib/validator/aa.rb +0 -48
- data/lib/validator/aa_est.rb +0 -112
- data/lib/validator/background.rb +0 -77
- data/lib/validator/bias.rb +0 -95
- data/lib/validator/cmdline.rb +0 -431
- data/lib/validator/decoy.rb +0 -107
- data/lib/validator/digestion_based.rb +0 -70
- data/lib/validator/probability.rb +0 -51
- data/lib/validator/prot_from_pep.rb +0 -234
- data/lib/validator/q_value.rb +0 -32
- data/lib/validator/transmem.rb +0 -272
- data/lib/validator/true_pos.rb +0 -46
- data/lib/validator.rb +0 -197
- data/lib/xml.rb +0 -38
- data/lib/xml_style_parser.rb +0 -119
- data/lib/xmlparser_wrapper.rb +0 -19
- data/release_notes.txt +0 -2
- data/script/compile_and_plot_smriti_final.rb +0 -97
- data/script/create_little_pepxml.rb +0 -61
- data/script/degenerate_peptides.rb +0 -47
- data/script/estimate_fpr_by_cysteine.rb +0 -226
- data/script/extract_gradient_programs.rb +0 -56
- data/script/find_cysteine_background.rb +0 -137
- data/script/genuine_tps_and_probs.rb +0 -136
- data/script/get_apex_values_rexml.rb +0 -44
- data/script/histogram_probs.rb +0 -61
- data/script/mascot_fix_pepxml.rb +0 -123
- data/script/msvis.rb +0 -42
- data/script/mzXML2timeIndex.rb +0 -25
- data/script/peps_per_bin.rb +0 -67
- data/script/prep_dir.rb +0 -121
- data/script/simple_protein_digestion.rb +0 -27
- data/script/smriti_final_analysis.rb +0 -103
- data/script/sqt_to_meta.rb +0 -24
- data/script/top_hit_per_scan.rb +0 -67
- data/script/toppred_to_yaml.rb +0 -47
- data/script/tpp_installer.rb +0 -249
- data/specs/align_spec.rb +0 -79
- data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
- data/specs/bin/fasta_shaker_spec.rb +0 -259
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
- data/specs/bin/filter_and_validate_spec.rb +0 -180
- data/specs/bin/ms_to_lmat_spec.rb +0 -34
- data/specs/bin/prob_validate_spec.rb +0 -86
- data/specs/bin/protein_summary_spec.rb +0 -14
- data/specs/fasta_spec.rb +0 -354
- data/specs/gi_spec.rb +0 -22
- data/specs/load_bin_path.rb +0 -7
- data/specs/merge_deep_spec.rb +0 -13
- data/specs/ms/gradient_program_spec.rb +0 -77
- data/specs/ms/msrun_spec.rb +0 -498
- data/specs/ms/parser_spec.rb +0 -92
- data/specs/ms/spectrum_spec.rb +0 -87
- data/specs/pi_zero_spec.rb +0 -115
- data/specs/qvalue_spec.rb +0 -39
- data/specs/roc_spec.rb +0 -251
- data/specs/rspec_autotest.rb +0 -149
- data/specs/sample_enzyme_spec.rb +0 -126
- data/specs/spec_helper.rb +0 -135
- data/specs/spec_id/aa_freqs_spec.rb +0 -52
- data/specs/spec_id/bioworks_spec.rb +0 -148
- data/specs/spec_id/digestor_spec.rb +0 -75
- data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
- data/specs/spec_id/precision/filter/output_spec.rb +0 -31
- data/specs/spec_id/precision/filter_spec.rb +0 -246
- data/specs/spec_id/precision/prob_spec.rb +0 -44
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
- data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
- data/specs/spec_id/protein_summary_spec.rb +0 -189
- data/specs/spec_id/sequest/params_spec.rb +0 -68
- data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
- data/specs/spec_id/sequest_spec.rb +0 -38
- data/specs/spec_id/sqt_spec.rb +0 -246
- data/specs/spec_id/srf_spec.rb +0 -172
- data/specs/spec_id/srf_spec_helper.rb +0 -139
- data/specs/spec_id_helper.rb +0 -33
- data/specs/spec_id_spec.rb +0 -366
- data/specs/spec_id_xml_spec.rb +0 -33
- data/specs/transmem/phobius_spec.rb +0 -425
- data/specs/transmem/toppred_spec.rb +0 -298
- data/specs/transmem_spec.rb +0 -60
- data/specs/transmem_spec_shared.rb +0 -64
- data/specs/validator/aa_est_spec.rb +0 -66
- data/specs/validator/aa_spec.rb +0 -40
- data/specs/validator/background_spec.rb +0 -67
- data/specs/validator/bias_spec.rb +0 -122
- data/specs/validator/decoy_spec.rb +0 -51
- data/specs/validator/fasta_helper.rb +0 -26
- data/specs/validator/prot_from_pep_spec.rb +0 -141
- data/specs/validator/transmem_spec.rb +0 -146
- data/specs/validator/true_pos_spec.rb +0 -58
- data/specs/validator_helper.rb +0 -33
- data/specs/xml_spec.rb +0 -12
- data/test_files/000_pepxml18_small.xml +0 -206
- data/test_files/020a.mzXML.timeIndex +0 -4710
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
- data/test_files/4-03-03_small-prot.xml +0 -321
- data/test_files/4-03-03_small.xml +0 -3876
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +0 -5999
- data/test_files/bioworks31.params +0 -77
- data/test_files/bioworks32.params +0 -62
- data/test_files/bioworks33.params +0 -63
- data/test_files/bioworks_single_run_small.xml +0 -7237
- data/test_files/bioworks_small.fasta +0 -212
- data/test_files/bioworks_small.params +0 -63
- data/test_files/bioworks_small.phobius +0 -109
- data/test_files/bioworks_small.toppred.out +0 -2847
- data/test_files/bioworks_small.xml +0 -5610
- data/test_files/bioworks_with_INV_small.xml +0 -3753
- data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +0 -304
- data/test_files/messups.fasta +0 -297
- data/test_files/opd1/000.my_answer.100lines.xml +0 -101
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
- data/test_files/opd1/000_020_3prots-prot.xml +0 -62
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
- data/test_files/opd1/sequest.3.1.params +0 -77
- data/test_files/opd1/sequest.3.2.params +0 -62
- data/test_files/opd1/twenty_scans.mzXML +0 -418
- data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +0 -9
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
- data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
- data/test_files/pepproph_small.xml +0 -4691
- data/test_files/phobius.small.noheader.txt +0 -50
- data/test_files/phobius.small.small.txt +0 -53
- data/test_files/s01_anC1_ld020mM.key.txt +0 -25
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +0 -297
- data/test_files/small.sqt +0 -87
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +0 -14340
- data/test_files/tf_bioworks2excel.txt.actual +0 -1035
- data/test_files/toppred.small.out +0 -416
- data/test_files/toppred.xml.out +0 -318
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
- data/test_files/yeast_gly_small-prot.xml +0 -265
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
- data/test_files/yeast_gly_small.xml +0 -3807
- data/test_files/yeast_gly_small2.parentTimes +0 -6
data/README
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
= {Mspire}[http://mspire.rubyforge.org]
|
2
|
+
|
3
|
+
A library for working with mass spectrometry proteomics data.
|
4
|
+
|
5
|
+
<em> Mspire is going through a re-write as of version 0.5.0 to support a new
|
6
|
+
development model. Many modules are absent but will gradually be added back.
|
7
|
+
Use the 0.4 releases as necessary. </em>
|
8
|
+
|
9
|
+
== Description
|
1
10
|
|
2
11
|
mspire - 'Mass Spectrometry Proteomics in Ruby' is a collection of tools for
|
3
12
|
working with MS proteomics data in ruby. It seeks to provide support for open
|
@@ -5,19 +14,22 @@ standards (e.g., parsers for mzData, mzXML, Peptide/Protein Prophet and the
|
|
5
14
|
TPP) and contribute other useful functionality for working with mass
|
6
15
|
spectrometry data in ruby.
|
7
16
|
|
8
|
-
|
9
|
-
|
17
|
+
* Lighthouse[http://bahuvrihi.lighthouseapp.com/projects/16692-mspire/tickets]
|
18
|
+
* Github[http://github.com/bahuvrihi/mspire/tree/master]
|
19
|
+
* {Google Group}[http://groups.google.com/group/mspire-forum]
|
20
|
+
|
21
|
+
--
|
22
|
+
=== Current Focus
|
10
23
|
|
11
24
|
The project is currently focusing on the following:
|
12
25
|
|
13
|
-
* SEQUEST data (particularly the output of Bioworks 3.2-3.3)
|
26
|
+
* SEQUEST data (particularly the output of Bioworks 3.2-3.3.1)
|
14
27
|
* mzXML
|
15
28
|
* mzData
|
16
29
|
* ProteinProphet
|
17
30
|
* Preparation of files for [obiwarp](http://obi-warp.sourceforge.net/)
|
18
31
|
|
19
|
-
Features
|
20
|
-
--------
|
32
|
+
=== Features
|
21
33
|
|
22
34
|
* mzXML (version 1, 2, and 3) parsing
|
23
35
|
* mzData parsing
|
@@ -37,31 +49,29 @@ Validation by:
|
|
37
49
|
* Generic sample bias (e.g., low abundance/high abundance proteins)
|
38
50
|
* Defined sample
|
39
51
|
|
40
|
-
Spectra and Spectra Identification
|
41
|
-
----------------------------------
|
52
|
+
=== Spectra and Spectra Identification
|
42
53
|
|
43
54
|
The [MS](ms/index.html) namespace contains objects for working with mass spectra and associated file formats.
|
44
55
|
|
45
56
|
The [SpecID](spec_id/index.html) namespace contains objects for working with spectral identifications.
|
46
57
|
|
47
|
-
Tutorials
|
48
|
-
---------
|
58
|
+
=== Tutorials
|
49
59
|
|
50
60
|
* [Database Searching Tutorial](tutorial/database_searching/index.html) -
|
51
61
|
Demonstrates two methods for running and analysing Bioworks output to obtain
|
52
62
|
false positive rates using mspire executables.
|
63
|
+
++
|
64
|
+
|
65
|
+
== Installation
|
53
66
|
|
54
|
-
|
55
|
-
|
67
|
+
Mspire is available as a gem on RubyForge[http://rubyforge.org/projects/mspire]. Use:
|
68
|
+
|
69
|
+
% gem install mspire
|
70
|
+
|
71
|
+
= Warning
|
56
72
|
|
57
73
|
This is an experimental package. As such, all versions prior to version 1.0
|
58
74
|
may contain interface changes on minor revisions (major.minor.build) (e.g.,
|
59
75
|
0.4.0 may contain interface change from 0.3.9). Beyond version 1.0, the
|
60
76
|
versioning scheme will be strictly adhered to (no interface changes except on
|
61
77
|
major revisions).
|
62
|
-
|
63
|
-
Installation
|
64
|
-
------------
|
65
|
-
|
66
|
-
see [Install](install/index.html)
|
67
|
-
|
data/changelog.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
|
2
|
-
|
2
|
+
== version 0.1.7
|
3
3
|
|
4
4
|
1. A couple of scripts and subroutines were hashing peptides but not on the file
|
5
5
|
basename. This would result in slightly incorrect results (any time there
|
@@ -33,33 +33,33 @@ Rate' and 'FPR' from the package. It's been suggested that FP/(TP+FP) be
|
|
33
33
|
called the False Positive Predictive Rate (FPPR). I will probably implement
|
34
34
|
this in a future release.
|
35
35
|
|
36
|
-
|
36
|
+
== version 0.2.0
|
37
37
|
|
38
38
|
Revamped the way SpecID works (it is now mixed-in).
|
39
39
|
Added support for modifications to bioworks_to_pepxml.rb
|
40
40
|
Can read .srf files (nearly interchangeable with bioworks files)
|
41
41
|
Redid filter.rb
|
42
42
|
|
43
|
-
|
43
|
+
== version 0.2.1
|
44
44
|
|
45
45
|
minor bugfix
|
46
46
|
|
47
|
-
|
47
|
+
== version 0.2.2
|
48
48
|
|
49
49
|
made compatible with Bioworks fasta file reverser and updated tutorial.
|
50
50
|
Killed classify_by_prefix routine in favor of classify_by_false_flag which has
|
51
51
|
a prefix option
|
52
52
|
|
53
|
-
|
53
|
+
== version 0.2.3
|
54
54
|
|
55
55
|
in protein_summary.rb added handling for proteins with no annotation. (either
|
56
56
|
dispaly NA or use gi2annnot to grab them from NCBI)
|
57
57
|
|
58
|
-
|
58
|
+
== version 0.2.5
|
59
59
|
|
60
60
|
renamed prep_list in roc (potential breaks in code)
|
61
61
|
|
62
|
-
|
62
|
+
== version 0.2.6
|
63
63
|
|
64
64
|
1. Massive refactorization of filtering and validation. Validation objects are
|
65
65
|
created and then can be used to validate just about anything.
|
@@ -75,105 +75,105 @@ appear to influence our analyses, however). Fixed.
|
|
75
75
|
2. Enzymes with no exceptions (e.g., cuts at KR) would report one too many
|
76
76
|
missed cleavages if the last amino acid was a cut point. Fixed.
|
77
77
|
|
78
|
-
|
78
|
+
== version 0.2.7
|
79
79
|
|
80
80
|
1. In conversion from bioworks to pepxml, the default was trypsin (KR/P).
|
81
81
|
Now, the sample enzyme is set explicitly from the params file and the option
|
82
82
|
is not available. This can give more accuract pepxml files than from
|
83
83
|
previous depending on your enzyme.
|
84
84
|
|
85
|
-
|
85
|
+
== version 0.2.9
|
86
86
|
|
87
87
|
1. Added support for phobius transmembrane predictions
|
88
88
|
2. have filter_and_validate.rb working well (multiple validators allowed).
|
89
89
|
3. Can read bioworks 3.3.1 .srf files (.srf version 3.5 files)
|
90
90
|
4. Added a bias validator
|
91
91
|
|
92
|
-
|
92
|
+
== version 0.2.10
|
93
93
|
|
94
94
|
1. Fixed --hits_separate flag in spec_id/filter
|
95
95
|
|
96
|
-
|
96
|
+
== version 0.2.11
|
97
97
|
|
98
98
|
1. Added prob precision support and reorganized filter_and_validate libs
|
99
99
|
|
100
|
-
|
100
|
+
== version 0.2.12
|
101
101
|
|
102
102
|
1. Fixed bug in transmem for prob and others.
|
103
103
|
2. Can use axml (XMLParser based) or libxml depending on availability
|
104
104
|
|
105
|
-
|
105
|
+
== version 0.2.13
|
106
106
|
|
107
107
|
1. Fixed issue with --hits_separate
|
108
108
|
2. filter_and_validate.rb requires decoy validator if decoy proteins
|
109
109
|
(refactored code)
|
110
110
|
|
111
|
-
|
111
|
+
== version 0.2.14
|
112
112
|
|
113
113
|
1. Can read PeptideProphet files (should be able to read pepxml files, too)
|
114
114
|
2. API change: Some slight modifications to the Sequest::PepXML object
|
115
115
|
interfaces and implementations (using ArrayClass)
|
116
116
|
|
117
|
-
|
117
|
+
== version 0.2.15
|
118
118
|
|
119
119
|
1. can convert srf files to sqt files
|
120
120
|
|
121
|
-
|
121
|
+
== version 0.3.0
|
122
122
|
|
123
123
|
1. IMPORTANT BUG FIX: protein reporting in srf files is correct now (proteins after the first protein were being assigned to the last hit in an out file).
|
124
124
|
2. SQT export is correct and works at least on 3.2 and 3.3.1.
|
125
125
|
|
126
|
-
|
126
|
+
== version 0.3.1
|
127
127
|
|
128
128
|
1. Bug fix in srf filtering (num_hits adjusted)
|
129
129
|
|
130
|
-
|
130
|
+
== version 0.3.2
|
131
131
|
|
132
132
|
1. Uses sequest peptide_mass_tolerance filter on srf group files by default
|
133
133
|
now.
|
134
134
|
|
135
|
-
|
135
|
+
== version 0.3.3
|
136
136
|
|
137
137
|
1. Worked out minor kinks in prob_precision.rb
|
138
138
|
|
139
|
-
|
139
|
+
== version 0.3.4
|
140
140
|
|
141
141
|
1. filters >= +3 charged ions now.
|
142
142
|
|
143
|
-
|
143
|
+
== version 0.3.5
|
144
144
|
|
145
145
|
1. fixed creation of background distribution in validators (hash_by base_name,
|
146
146
|
first_scan, charge now)
|
147
147
|
|
148
|
-
|
148
|
+
== version 0.3.6
|
149
149
|
|
150
150
|
1. split off bad_aa_est from bad_aa
|
151
151
|
|
152
|
-
|
152
|
+
== version 0.3.7
|
153
153
|
|
154
154
|
1. can deal with No_Enzyme searches now (while still capable of setting
|
155
155
|
sample_enzyme)
|
156
156
|
|
157
|
-
|
157
|
+
== version 0.3.8
|
158
158
|
|
159
159
|
1. can set a decoy to target ratio for decoy validation
|
160
160
|
2. added mass calculator in Mass::Calculator
|
161
161
|
|
162
|
-
|
162
|
+
== version 0.3.9
|
163
163
|
|
164
164
|
1. doesn't clobber mzdata filename in ms_to_lmat.rb conversion
|
165
165
|
|
166
|
-
|
166
|
+
== version 0.3.10
|
167
167
|
|
168
168
|
1. added run_percolator.rb script which makes running multiple files easy
|
169
169
|
|
170
|
-
|
170
|
+
== version 0.3.11
|
171
171
|
|
172
172
|
1. faster sensing of bad scan tags in mzXML v. 2.0 files
|
173
173
|
2. implemented lazy evaluation of spectrum in 2 different ways allowing much
|
174
174
|
larger files to be parsed
|
175
175
|
|
176
|
-
|
176
|
+
== version 0.4.0
|
177
177
|
|
178
178
|
1. ** INTERFACE CHANGE: each scan can only have one precursor (used to be an array)
|
179
179
|
2. ** INTERFACE CHANGE: spectrum mz and intensity data accessed with mzs and intensities
|
@@ -182,46 +182,15 @@ larger files to be parsed
|
|
182
182
|
eval methos (however, the method intensity_at_mz will still work (causing
|
183
183
|
evaluation))
|
184
184
|
|
185
|
-
|
185
|
+
== version 0.4.1
|
186
186
|
|
187
187
|
1. added support for reading mzXML version 3.0 (may fail in some cases)
|
188
188
|
|
189
|
-
|
189
|
+
== version 0.4.2
|
190
190
|
|
191
191
|
1. added MS::MSRun.open method
|
192
192
|
2. added method to write dta files from SRF
|
193
193
|
|
194
|
-
|
194
|
+
== version 0.4.3
|
195
195
|
|
196
196
|
1. added to_mfg_file from SRF
|
197
|
-
2. added to_dta_files from SRF complete with streaming .tar.gz output (and
|
198
|
-
supporting .zip output but it has to make tmp files)
|
199
|
-
|
200
|
-
## version 0.4.4
|
201
|
-
1. implemented q-value and pi_0 methods of Storey
|
202
|
-
2. can do complete q-value calculations given p-values
|
203
|
-
3. can determine a pi_0 given a list of target and decoy values (as booleans)
|
204
|
-
4. can determine a pi_0 given a list containing numbers of decoy and target
|
205
|
-
values as is often encountered with filtering
|
206
|
-
5. prob_validate.rb implements a q-value option for turning PeptideProphet
|
207
|
-
probabilities into q-values
|
208
|
-
6. filter_validate.rb implements a p value method using xcorr values, however,
|
209
|
-
this is not very effective since xcorr values underrepresent the the
|
210
|
-
difference between good hits and bad hits
|
211
|
-
|
212
|
-
## version 0.4.5
|
213
|
-
1. using pi_zero instead of decoy_to_target_ratio. While all tests are
|
214
|
-
passing, this release should be considered experimental with the use of any
|
215
|
-
target-decoy validation.
|
216
|
-
|
217
|
-
## version 0.4.6
|
218
|
-
1. added NOTE to --to_qvalues option to include all results (no low prob
|
219
|
-
filter)
|
220
|
-
|
221
|
-
## version 0.4.7
|
222
|
-
1. Added ability to quickly grab sequest params out of a .SRF file
|
223
|
-
2. Added helpful runtime error if print_duplicate_references is 0.
|
224
|
-
|
225
|
-
## version 0.4.9
|
226
|
-
1. quiet some unneeded output and fixed truncation of filenames with '.'
|
227
|
-
inside them.
|
data/lib/ms/calc.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
module Ms
|
2
|
+
module Calc
|
3
|
+
module_function
|
4
|
+
|
5
|
+
#
|
6
|
+
# ppm calculations... maybe use RUnit
|
7
|
+
#
|
8
|
+
|
9
|
+
def ppm_tol_at(mz, ppm)
|
10
|
+
1.0 * mz * ppm / 10**6
|
11
|
+
end
|
12
|
+
|
13
|
+
def ppm_span_at(mz, ppm)
|
14
|
+
tol = ppm_tol_at(mz, ppm)
|
15
|
+
[mz-tol, mz+tol]
|
16
|
+
end
|
17
|
+
|
18
|
+
def ppm_range_at(mz, ppm)
|
19
|
+
mz = mz.to_f
|
20
|
+
tol = ppm_tol_at(mz, ppm)
|
21
|
+
mz-tol...mz+tol
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
# Rounds n to the specified precision (ie number of decimal places)
|
26
|
+
# def round(n, precision)
|
27
|
+
# factor = 10**precision.to_i
|
28
|
+
# (n * factor).round.to_f / factor
|
29
|
+
# end
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'ms/data/simple'
|
2
|
+
|
3
|
+
module Ms
|
4
|
+
module Data
|
5
|
+
module_function
|
6
|
+
|
7
|
+
# Initializes a new interleaved data array.
|
8
|
+
def new_interleaved(unresolved_data, n=2)
|
9
|
+
Interleaved.new(unresolved_data, n=2)
|
10
|
+
end
|
11
|
+
|
12
|
+
# An Interleaved data array lazily evaluates it's unresolved data as
|
13
|
+
# an interleaved array of n members. The unresolved data is evaluated
|
14
|
+
# into an array using to_a.
|
15
|
+
#
|
16
|
+
# i = Ms::Data::Interleaved.new([1,4,2,5,3,6])
|
17
|
+
# i.unresolved_data # => [1,4,2,5,3,6]
|
18
|
+
# i.data # => []
|
19
|
+
# i[0] # => [1,2,3]
|
20
|
+
# i[1] # => [4,5,6]
|
21
|
+
# i.data # => [[1,2,3], [4,5,6]]
|
22
|
+
#
|
23
|
+
class Interleaved < Simple
|
24
|
+
attr_reader :n
|
25
|
+
|
26
|
+
def initialize(unresolved_data, n=2)
|
27
|
+
@n = 2
|
28
|
+
super(unresolved_data)
|
29
|
+
end
|
30
|
+
|
31
|
+
def [](index)
|
32
|
+
resolve.data[index]
|
33
|
+
end
|
34
|
+
|
35
|
+
def resolved?
|
36
|
+
!@data.empty?
|
37
|
+
end
|
38
|
+
|
39
|
+
def resolve
|
40
|
+
return(self) if resolved?
|
41
|
+
|
42
|
+
unresolved_data = @unresolved_data.to_a
|
43
|
+
|
44
|
+
unless unresolved_data.length % n == 0
|
45
|
+
raise ArgumentError, "interleaved data must have a number of elements evenly divisible by n (#{n})"
|
46
|
+
end
|
47
|
+
|
48
|
+
n.times { @data << [] }
|
49
|
+
map = @data * (unresolved_data.length/n)
|
50
|
+
|
51
|
+
unresolved_data.each_with_index do |item, i|
|
52
|
+
map[i] << item
|
53
|
+
end
|
54
|
+
|
55
|
+
self
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module Ms
|
2
|
+
module Data
|
3
|
+
|
4
|
+
# LazyIO represents data to be lazily read from an IO. To read the data
|
5
|
+
# from the IO, either string or to_a may be called (to_a unpacks the
|
6
|
+
# string into an array using the decode_format and unpack_format).
|
7
|
+
#
|
8
|
+
# LazyIO is a suitable unresolved_data source for Ms::Data formats.
|
9
|
+
class LazyIO
|
10
|
+
NETWORK_FLOAT = 'g*'
|
11
|
+
NETWORK_DOUBLE = 'G*'
|
12
|
+
LITTLE_ENDIAN_FLOAT = 'e*'
|
13
|
+
LITTLE_ENDIAN_DOUBLE = 'E*'
|
14
|
+
BASE_64 = 'm'
|
15
|
+
|
16
|
+
class << self
|
17
|
+
# Returns the unpacking code for the given precision (32 or 64-bit)
|
18
|
+
# and network order (true for big-endian).
|
19
|
+
def unpack_code(precision, network_order)
|
20
|
+
case precision
|
21
|
+
when 32 then network_order ? NETWORK_FLOAT : LITTLE_ENDIAN_FLOAT
|
22
|
+
when 64 then network_order ? NETWORK_DOUBLE : LITTLE_ENDIAN_DOUBLE
|
23
|
+
else raise ArgumentError, "unknown precision (should be 32 or 64): #{precision}"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# The IO from which string is read
|
29
|
+
attr_reader :io
|
30
|
+
|
31
|
+
# The start index for reading string
|
32
|
+
attr_reader :start_index
|
33
|
+
|
34
|
+
# The number of bytes to be read from io when evaluating string
|
35
|
+
attr_reader :num_bytes
|
36
|
+
|
37
|
+
# Indicates the unpacking format
|
38
|
+
attr_reader :unpack_format
|
39
|
+
|
40
|
+
# Indicates a decoding format, may be false to unpack string
|
41
|
+
# without decoding.
|
42
|
+
attr_reader :decode_format
|
43
|
+
|
44
|
+
def initialize(io, start_index=io.pos, num_bytes=nil, unpack_format=NETWORK_FLOAT, decode_format=BASE_64)
|
45
|
+
@io = io
|
46
|
+
@start_index = start_index
|
47
|
+
@num_bytes = num_bytes
|
48
|
+
@unpack_format = unpack_format
|
49
|
+
@decode_format = decode_format
|
50
|
+
end
|
51
|
+
|
52
|
+
# Positions io at start_index and reads a string of num_bytes length.
|
53
|
+
# The string is newly read from io each time string is called.
|
54
|
+
def string
|
55
|
+
io.pos = start_index unless io.pos == start_index
|
56
|
+
io.read(num_bytes)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Resets the cached array (returned by to_a) so that the array will
|
60
|
+
# be re-read from io.
|
61
|
+
def reset
|
62
|
+
@array = nil
|
63
|
+
end
|
64
|
+
|
65
|
+
# Reads string and unpacks using decode_format and unpack_code. The
|
66
|
+
# array is cached internally; to re-read the array, use reset.
|
67
|
+
def to_a
|
68
|
+
@array ||= (decode_format ? string.unpack(decode_format)[0] : string).unpack(unpack_format)
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'ms/data/lazy_io'
|
2
|
+
require 'stringio'
|
3
|
+
|
4
|
+
module Ms
|
5
|
+
module Data
|
6
|
+
|
7
|
+
# LazyString is a LazyIO initialized from a string, which is converted into
|
8
|
+
# a StringIO.
|
9
|
+
class LazyString < LazyIO
|
10
|
+
def initialize(string, unpack_format=NETWORK_FLOAT, decode_format=BASE_64)
|
11
|
+
super(StringIO.new(string), 0, string.length, unpack_format, decode_format)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module Ms
|
2
|
+
module Data
|
3
|
+
module_function
|
4
|
+
|
5
|
+
# Initializes a new simple data array.
|
6
|
+
def new_simple(unresolved_data)
|
7
|
+
Simple.new(unresolved_data)
|
8
|
+
end
|
9
|
+
|
10
|
+
# A Simple data array that lazily evaluates unresolved_data, and
|
11
|
+
# each member of unresolved_data using to_a:
|
12
|
+
#
|
13
|
+
# class LazyObject
|
14
|
+
# attr_reader :to_a
|
15
|
+
# def initialize(array)
|
16
|
+
# @to_a = array
|
17
|
+
# end
|
18
|
+
# end
|
19
|
+
#
|
20
|
+
# a = LazyObject.new([1,2,3])
|
21
|
+
# b = LazyObject.new([4,5,6])
|
22
|
+
# s = Ms::Data::Simple.new([a, b])
|
23
|
+
#
|
24
|
+
# s.unresolved_data # => [a, b]
|
25
|
+
# s.data # => []
|
26
|
+
# s[0] # => [1,2,3]
|
27
|
+
# s[1] # => [4,5,6]
|
28
|
+
# s.data # => [[1,2,3], [4,5,6]]
|
29
|
+
#
|
30
|
+
class Simple
|
31
|
+
# The underlying resolved data store.
|
32
|
+
attr_reader :data
|
33
|
+
|
34
|
+
# The underlying unresolved data store.
|
35
|
+
attr_reader :unresolved_data
|
36
|
+
|
37
|
+
def initialize(unresolved_data)
|
38
|
+
@data = []
|
39
|
+
@unresolved_data = unresolved_data
|
40
|
+
end
|
41
|
+
|
42
|
+
def [](index)
|
43
|
+
@data[index] ||= @unresolved_data.to_a[index].to_a
|
44
|
+
end
|
45
|
+
|
46
|
+
def resolve
|
47
|
+
0.upto(@unresolved_data.length - 1) do |index|
|
48
|
+
self[index]
|
49
|
+
end unless resolved?
|
50
|
+
|
51
|
+
self
|
52
|
+
end
|
53
|
+
|
54
|
+
def resolved?
|
55
|
+
@data.compact.length == @unresolved_data.length
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'ms/data/simple'
|
2
|
+
|
3
|
+
module Ms
|
4
|
+
module Data
|
5
|
+
module_function
|
6
|
+
|
7
|
+
# Initializes a new transposed data array.
|
8
|
+
def new_transposed(unresolved_data)
|
9
|
+
Transposed.new(unresolved_data)
|
10
|
+
end
|
11
|
+
|
12
|
+
# A Transposed data array lazily evaluates it's unresolved data as
|
13
|
+
# a transposed array. The unresolved data is evaluated
|
14
|
+
# into an array using to_a.
|
15
|
+
#
|
16
|
+
# t = Ms::Data::Transposed.new([[1,4],[2,5],[3,6]])
|
17
|
+
#
|
18
|
+
# t.unresolved_data # => [[1,4],[2,5],[3,6]]
|
19
|
+
# t.data # => []
|
20
|
+
# t[0] # => [1,2,3]
|
21
|
+
# t[1] # => [4,5,6]
|
22
|
+
# t.data # => [[1,2,3], [4,5,6]]
|
23
|
+
#
|
24
|
+
class Transposed < Simple
|
25
|
+
|
26
|
+
def [](index)
|
27
|
+
resolve.data[index]
|
28
|
+
end
|
29
|
+
|
30
|
+
def resolved?
|
31
|
+
!@data.empty?
|
32
|
+
end
|
33
|
+
|
34
|
+
def resolve
|
35
|
+
@data = @unresolved_data.to_a.transpose unless resolved?
|
36
|
+
self
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/lib/ms/data.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'ms/data/interleaved'
|
2
|
+
require 'ms/data/transposed'
|
3
|
+
|
4
|
+
module Ms
|
5
|
+
|
6
|
+
# The Data module contains a number of classes providing a standard way to
|
7
|
+
# resolve various data storage formats into a 'simple' data array.
|
8
|
+
#
|
9
|
+
# type format
|
10
|
+
# simple [[mzs,...], [intensities...]]
|
11
|
+
# transposed [[mz,intensity], [mz,intensity], ...]
|
12
|
+
# interleaved [mz,intensity,mz,intensity,...]
|
13
|
+
#
|
14
|
+
# For instance:
|
15
|
+
#
|
16
|
+
# s = Data.new([[1,2,3], [4,5,6]], :simple)
|
17
|
+
# s.resolve.data # => [[1,2,3], [4,5,6]]
|
18
|
+
#
|
19
|
+
# t = Data.new([[1,4],[2,5],[3,6]], :transposed)
|
20
|
+
# t.resolve.data # => [[1,2,3], [4,5,6]]
|
21
|
+
#
|
22
|
+
# i = Data.new([1,4,2,5,3,6], :interleaved)
|
23
|
+
# i.resolve.data # => [[1,2,3], [4,5,6]]
|
24
|
+
#
|
25
|
+
# Data is always resolved by calling to_a on the unresolved data object
|
26
|
+
# and then rearranging as needed (in the case of simple data, to_a is
|
27
|
+
# also called on each member of the unresolved data array). This lazy
|
28
|
+
# resolution allows the use of non-array unresolved_data objects such
|
29
|
+
# as Data::LazyString:
|
30
|
+
#
|
31
|
+
# str = [[1,4,2,5,3,6].pack("g*")].pack("m")
|
32
|
+
# unresolved_data = Data::LazyString.new(str)
|
33
|
+
#
|
34
|
+
# i = Data.new(unresolved_data, :interleaved)
|
35
|
+
# i.resolve.data # => [[1,2,3], [4,5,6]]
|
36
|
+
#
|
37
|
+
# Obviously the big advantage of lazy data resolution is that Data objects
|
38
|
+
# may be instantiated cheaply while expensive operations like unpacking and
|
39
|
+
# rearrangement may be put off or not executed at all.
|
40
|
+
#
|
41
|
+
module Data
|
42
|
+
module_function
|
43
|
+
|
44
|
+
# Initializes a new data array of the specified type by forwarding
|
45
|
+
# data to the "new_<type>" method.
|
46
|
+
#
|
47
|
+
# simple = Ms::Data.new([[1,2,3], [4,5,6]], :simple)
|
48
|
+
# simple.class # => Ms::Data::Simple
|
49
|
+
#
|
50
|
+
# interleaved = Ms::Data.new([1,4,2,5,3,6], :interleaved)
|
51
|
+
# interleaved.class # => Ms::Data::Interleaved
|
52
|
+
#
|
53
|
+
def new(data, type=:simple)
|
54
|
+
send("new_#{type}", data)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|