mspire 0.4.9 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +27 -17
- data/changelog.txt +31 -62
- data/lib/ms/calc.rb +32 -0
- data/lib/ms/data/interleaved.rb +60 -0
- data/lib/ms/data/lazy_io.rb +73 -0
- data/lib/ms/data/lazy_string.rb +15 -0
- data/lib/ms/data/simple.rb +59 -0
- data/lib/ms/data/transposed.rb +41 -0
- data/lib/ms/data.rb +57 -0
- data/lib/ms/format/format_error.rb +12 -0
- data/lib/ms/spectrum.rb +25 -384
- data/lib/ms/support/binary_search.rb +126 -0
- data/lib/ms.rb +10 -10
- metadata +38 -350
- data/INSTALL +0 -58
- data/README.rdoc +0 -18
- data/Rakefile +0 -330
- data/bin/aafreqs.rb +0 -23
- data/bin/bioworks2excel.rb +0 -14
- data/bin/bioworks_to_pepxml.rb +0 -148
- data/bin/bioworks_to_pepxml_gui.rb +0 -225
- data/bin/fasta_shaker.rb +0 -5
- data/bin/filter_and_validate.rb +0 -5
- data/bin/gi2annot.rb +0 -14
- data/bin/id_class_anal.rb +0 -112
- data/bin/id_precision.rb +0 -172
- data/bin/ms_to_lmat.rb +0 -67
- data/bin/pepproph_filter.rb +0 -16
- data/bin/prob_validate.rb +0 -6
- data/bin/protein_summary.rb +0 -6
- data/bin/protxml2prots_peps.rb +0 -32
- data/bin/raw_to_mzXML.rb +0 -55
- data/bin/run_percolator.rb +0 -122
- data/bin/sqt_group.rb +0 -26
- data/bin/srf_group.rb +0 -27
- data/bin/srf_to_sqt.rb +0 -40
- data/lib/align/chams.rb +0 -78
- data/lib/align.rb +0 -154
- data/lib/archive/targz.rb +0 -94
- data/lib/bsearch.rb +0 -120
- data/lib/core_extensions.rb +0 -16
- data/lib/fasta.rb +0 -626
- data/lib/gi.rb +0 -124
- data/lib/group_by.rb +0 -10
- data/lib/index_by.rb +0 -11
- data/lib/merge_deep.rb +0 -21
- data/lib/ms/converter/mzxml.rb +0 -77
- data/lib/ms/gradient_program.rb +0 -170
- data/lib/ms/msrun.rb +0 -244
- data/lib/ms/msrun_index.rb +0 -108
- data/lib/ms/parser/mzdata/axml.rb +0 -67
- data/lib/ms/parser/mzdata/dom.rb +0 -175
- data/lib/ms/parser/mzdata/libxml.rb +0 -7
- data/lib/ms/parser/mzdata.rb +0 -31
- data/lib/ms/parser/mzxml/axml.rb +0 -70
- data/lib/ms/parser/mzxml/dom.rb +0 -182
- data/lib/ms/parser/mzxml/hpricot.rb +0 -253
- data/lib/ms/parser/mzxml/libxml.rb +0 -19
- data/lib/ms/parser/mzxml/regexp.rb +0 -122
- data/lib/ms/parser/mzxml/rexml.rb +0 -72
- data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
- data/lib/ms/parser/mzxml.rb +0 -282
- data/lib/ms/parser.rb +0 -108
- data/lib/ms/precursor.rb +0 -25
- data/lib/ms/scan.rb +0 -81
- data/lib/mspire.rb +0 -4
- data/lib/pi_zero.rb +0 -244
- data/lib/qvalue.rb +0 -161
- data/lib/roc.rb +0 -187
- data/lib/sample_enzyme.rb +0 -160
- data/lib/scan_i.rb +0 -21
- data/lib/spec_id/aa_freqs.rb +0 -170
- data/lib/spec_id/bioworks.rb +0 -497
- data/lib/spec_id/digestor.rb +0 -138
- data/lib/spec_id/mass.rb +0 -179
- data/lib/spec_id/parser/proph.rb +0 -335
- data/lib/spec_id/precision/filter/cmdline.rb +0 -218
- data/lib/spec_id/precision/filter/interactive.rb +0 -134
- data/lib/spec_id/precision/filter/output.rb +0 -148
- data/lib/spec_id/precision/filter.rb +0 -637
- data/lib/spec_id/precision/output.rb +0 -60
- data/lib/spec_id/precision/prob/cmdline.rb +0 -160
- data/lib/spec_id/precision/prob/output.rb +0 -94
- data/lib/spec_id/precision/prob.rb +0 -249
- data/lib/spec_id/proph/pep_summary.rb +0 -104
- data/lib/spec_id/proph/prot_summary.rb +0 -484
- data/lib/spec_id/proph.rb +0 -4
- data/lib/spec_id/protein_summary.rb +0 -489
- data/lib/spec_id/sequest/params.rb +0 -316
- data/lib/spec_id/sequest/pepxml.rb +0 -1458
- data/lib/spec_id/sequest.rb +0 -33
- data/lib/spec_id/sqt.rb +0 -349
- data/lib/spec_id/srf.rb +0 -973
- data/lib/spec_id.rb +0 -778
- data/lib/spec_id_xml.rb +0 -99
- data/lib/transmem/phobius.rb +0 -147
- data/lib/transmem/toppred.rb +0 -368
- data/lib/transmem.rb +0 -157
- data/lib/validator/aa.rb +0 -48
- data/lib/validator/aa_est.rb +0 -112
- data/lib/validator/background.rb +0 -77
- data/lib/validator/bias.rb +0 -95
- data/lib/validator/cmdline.rb +0 -431
- data/lib/validator/decoy.rb +0 -107
- data/lib/validator/digestion_based.rb +0 -70
- data/lib/validator/probability.rb +0 -51
- data/lib/validator/prot_from_pep.rb +0 -234
- data/lib/validator/q_value.rb +0 -32
- data/lib/validator/transmem.rb +0 -272
- data/lib/validator/true_pos.rb +0 -46
- data/lib/validator.rb +0 -197
- data/lib/xml.rb +0 -38
- data/lib/xml_style_parser.rb +0 -119
- data/lib/xmlparser_wrapper.rb +0 -19
- data/release_notes.txt +0 -2
- data/script/compile_and_plot_smriti_final.rb +0 -97
- data/script/create_little_pepxml.rb +0 -61
- data/script/degenerate_peptides.rb +0 -47
- data/script/estimate_fpr_by_cysteine.rb +0 -226
- data/script/extract_gradient_programs.rb +0 -56
- data/script/find_cysteine_background.rb +0 -137
- data/script/genuine_tps_and_probs.rb +0 -136
- data/script/get_apex_values_rexml.rb +0 -44
- data/script/histogram_probs.rb +0 -61
- data/script/mascot_fix_pepxml.rb +0 -123
- data/script/msvis.rb +0 -42
- data/script/mzXML2timeIndex.rb +0 -25
- data/script/peps_per_bin.rb +0 -67
- data/script/prep_dir.rb +0 -121
- data/script/simple_protein_digestion.rb +0 -27
- data/script/smriti_final_analysis.rb +0 -103
- data/script/sqt_to_meta.rb +0 -24
- data/script/top_hit_per_scan.rb +0 -67
- data/script/toppred_to_yaml.rb +0 -47
- data/script/tpp_installer.rb +0 -249
- data/specs/align_spec.rb +0 -79
- data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
- data/specs/bin/fasta_shaker_spec.rb +0 -259
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
- data/specs/bin/filter_and_validate_spec.rb +0 -180
- data/specs/bin/ms_to_lmat_spec.rb +0 -34
- data/specs/bin/prob_validate_spec.rb +0 -86
- data/specs/bin/protein_summary_spec.rb +0 -14
- data/specs/fasta_spec.rb +0 -354
- data/specs/gi_spec.rb +0 -22
- data/specs/load_bin_path.rb +0 -7
- data/specs/merge_deep_spec.rb +0 -13
- data/specs/ms/gradient_program_spec.rb +0 -77
- data/specs/ms/msrun_spec.rb +0 -498
- data/specs/ms/parser_spec.rb +0 -92
- data/specs/ms/spectrum_spec.rb +0 -87
- data/specs/pi_zero_spec.rb +0 -115
- data/specs/qvalue_spec.rb +0 -39
- data/specs/roc_spec.rb +0 -251
- data/specs/rspec_autotest.rb +0 -149
- data/specs/sample_enzyme_spec.rb +0 -126
- data/specs/spec_helper.rb +0 -135
- data/specs/spec_id/aa_freqs_spec.rb +0 -52
- data/specs/spec_id/bioworks_spec.rb +0 -148
- data/specs/spec_id/digestor_spec.rb +0 -75
- data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
- data/specs/spec_id/precision/filter/output_spec.rb +0 -31
- data/specs/spec_id/precision/filter_spec.rb +0 -246
- data/specs/spec_id/precision/prob_spec.rb +0 -44
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
- data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
- data/specs/spec_id/protein_summary_spec.rb +0 -189
- data/specs/spec_id/sequest/params_spec.rb +0 -68
- data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
- data/specs/spec_id/sequest_spec.rb +0 -38
- data/specs/spec_id/sqt_spec.rb +0 -246
- data/specs/spec_id/srf_spec.rb +0 -172
- data/specs/spec_id/srf_spec_helper.rb +0 -139
- data/specs/spec_id_helper.rb +0 -33
- data/specs/spec_id_spec.rb +0 -366
- data/specs/spec_id_xml_spec.rb +0 -33
- data/specs/transmem/phobius_spec.rb +0 -425
- data/specs/transmem/toppred_spec.rb +0 -298
- data/specs/transmem_spec.rb +0 -60
- data/specs/transmem_spec_shared.rb +0 -64
- data/specs/validator/aa_est_spec.rb +0 -66
- data/specs/validator/aa_spec.rb +0 -40
- data/specs/validator/background_spec.rb +0 -67
- data/specs/validator/bias_spec.rb +0 -122
- data/specs/validator/decoy_spec.rb +0 -51
- data/specs/validator/fasta_helper.rb +0 -26
- data/specs/validator/prot_from_pep_spec.rb +0 -141
- data/specs/validator/transmem_spec.rb +0 -146
- data/specs/validator/true_pos_spec.rb +0 -58
- data/specs/validator_helper.rb +0 -33
- data/specs/xml_spec.rb +0 -12
- data/test_files/000_pepxml18_small.xml +0 -206
- data/test_files/020a.mzXML.timeIndex +0 -4710
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
- data/test_files/4-03-03_small-prot.xml +0 -321
- data/test_files/4-03-03_small.xml +0 -3876
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +0 -5999
- data/test_files/bioworks31.params +0 -77
- data/test_files/bioworks32.params +0 -62
- data/test_files/bioworks33.params +0 -63
- data/test_files/bioworks_single_run_small.xml +0 -7237
- data/test_files/bioworks_small.fasta +0 -212
- data/test_files/bioworks_small.params +0 -63
- data/test_files/bioworks_small.phobius +0 -109
- data/test_files/bioworks_small.toppred.out +0 -2847
- data/test_files/bioworks_small.xml +0 -5610
- data/test_files/bioworks_with_INV_small.xml +0 -3753
- data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +0 -304
- data/test_files/messups.fasta +0 -297
- data/test_files/opd1/000.my_answer.100lines.xml +0 -101
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
- data/test_files/opd1/000_020_3prots-prot.xml +0 -62
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
- data/test_files/opd1/sequest.3.1.params +0 -77
- data/test_files/opd1/sequest.3.2.params +0 -62
- data/test_files/opd1/twenty_scans.mzXML +0 -418
- data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +0 -9
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
- data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
- data/test_files/pepproph_small.xml +0 -4691
- data/test_files/phobius.small.noheader.txt +0 -50
- data/test_files/phobius.small.small.txt +0 -53
- data/test_files/s01_anC1_ld020mM.key.txt +0 -25
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +0 -297
- data/test_files/small.sqt +0 -87
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +0 -14340
- data/test_files/tf_bioworks2excel.txt.actual +0 -1035
- data/test_files/toppred.small.out +0 -416
- data/test_files/toppred.xml.out +0 -318
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
- data/test_files/yeast_gly_small-prot.xml +0 -265
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
- data/test_files/yeast_gly_small.xml +0 -3807
- data/test_files/yeast_gly_small2.parentTimes +0 -6
data/README
CHANGED
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
= {Mspire}[http://mspire.rubyforge.org]
|
|
2
|
+
|
|
3
|
+
A library for working with mass spectrometry proteomics data.
|
|
4
|
+
|
|
5
|
+
<em> Mspire is going through a re-write as of version 0.5.0 to support a new
|
|
6
|
+
development model. Many modules are absent but will gradually be added back.
|
|
7
|
+
Use the 0.4 releases as necessary. </em>
|
|
8
|
+
|
|
9
|
+
== Description
|
|
1
10
|
|
|
2
11
|
mspire - 'Mass Spectrometry Proteomics in Ruby' is a collection of tools for
|
|
3
12
|
working with MS proteomics data in ruby. It seeks to provide support for open
|
|
@@ -5,19 +14,22 @@ standards (e.g., parsers for mzData, mzXML, Peptide/Protein Prophet and the
|
|
|
5
14
|
TPP) and contribute other useful functionality for working with mass
|
|
6
15
|
spectrometry data in ruby.
|
|
7
16
|
|
|
8
|
-
|
|
9
|
-
|
|
17
|
+
* Lighthouse[http://bahuvrihi.lighthouseapp.com/projects/16692-mspire/tickets]
|
|
18
|
+
* Github[http://github.com/bahuvrihi/mspire/tree/master]
|
|
19
|
+
* {Google Group}[http://groups.google.com/group/mspire-forum]
|
|
20
|
+
|
|
21
|
+
--
|
|
22
|
+
=== Current Focus
|
|
10
23
|
|
|
11
24
|
The project is currently focusing on the following:
|
|
12
25
|
|
|
13
|
-
* SEQUEST data (particularly the output of Bioworks 3.2-3.3)
|
|
26
|
+
* SEQUEST data (particularly the output of Bioworks 3.2-3.3.1)
|
|
14
27
|
* mzXML
|
|
15
28
|
* mzData
|
|
16
29
|
* ProteinProphet
|
|
17
30
|
* Preparation of files for [obiwarp](http://obi-warp.sourceforge.net/)
|
|
18
31
|
|
|
19
|
-
Features
|
|
20
|
-
--------
|
|
32
|
+
=== Features
|
|
21
33
|
|
|
22
34
|
* mzXML (version 1, 2, and 3) parsing
|
|
23
35
|
* mzData parsing
|
|
@@ -37,31 +49,29 @@ Validation by:
|
|
|
37
49
|
* Generic sample bias (e.g., low abundance/high abundance proteins)
|
|
38
50
|
* Defined sample
|
|
39
51
|
|
|
40
|
-
Spectra and Spectra Identification
|
|
41
|
-
----------------------------------
|
|
52
|
+
=== Spectra and Spectra Identification
|
|
42
53
|
|
|
43
54
|
The [MS](ms/index.html) namespace contains objects for working with mass spectra and associated file formats.
|
|
44
55
|
|
|
45
56
|
The [SpecID](spec_id/index.html) namespace contains objects for working with spectral identifications.
|
|
46
57
|
|
|
47
|
-
Tutorials
|
|
48
|
-
---------
|
|
58
|
+
=== Tutorials
|
|
49
59
|
|
|
50
60
|
* [Database Searching Tutorial](tutorial/database_searching/index.html) -
|
|
51
61
|
Demonstrates two methods for running and analysing Bioworks output to obtain
|
|
52
62
|
false positive rates using mspire executables.
|
|
63
|
+
++
|
|
64
|
+
|
|
65
|
+
== Installation
|
|
53
66
|
|
|
54
|
-
|
|
55
|
-
|
|
67
|
+
Mspire is available as a gem on RubyForge[http://rubyforge.org/projects/mspire]. Use:
|
|
68
|
+
|
|
69
|
+
% gem install mspire
|
|
70
|
+
|
|
71
|
+
= Warning
|
|
56
72
|
|
|
57
73
|
This is an experimental package. As such, all versions prior to version 1.0
|
|
58
74
|
may contain interface changes on minor revisions (major.minor.build) (e.g.,
|
|
59
75
|
0.4.0 may contain interface change from 0.3.9). Beyond version 1.0, the
|
|
60
76
|
versioning scheme will be strictly adhered to (no interface changes except on
|
|
61
77
|
major revisions).
|
|
62
|
-
|
|
63
|
-
Installation
|
|
64
|
-
------------
|
|
65
|
-
|
|
66
|
-
see [Install](install/index.html)
|
|
67
|
-
|
data/changelog.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-
|
|
2
|
+
== version 0.1.7
|
|
3
3
|
|
|
4
4
|
1. A couple of scripts and subroutines were hashing peptides but not on the file
|
|
5
5
|
basename. This would result in slightly incorrect results (any time there
|
|
@@ -33,33 +33,33 @@ Rate' and 'FPR' from the package. It's been suggested that FP/(TP+FP) be
|
|
|
33
33
|
called the False Positive Predictive Rate (FPPR). I will probably implement
|
|
34
34
|
this in a future release.
|
|
35
35
|
|
|
36
|
-
|
|
36
|
+
== version 0.2.0
|
|
37
37
|
|
|
38
38
|
Revamped the way SpecID works (it is now mixed-in).
|
|
39
39
|
Added support for modifications to bioworks_to_pepxml.rb
|
|
40
40
|
Can read .srf files (nearly interchangeable with bioworks files)
|
|
41
41
|
Redid filter.rb
|
|
42
42
|
|
|
43
|
-
|
|
43
|
+
== version 0.2.1
|
|
44
44
|
|
|
45
45
|
minor bugfix
|
|
46
46
|
|
|
47
|
-
|
|
47
|
+
== version 0.2.2
|
|
48
48
|
|
|
49
49
|
made compatible with Bioworks fasta file reverser and updated tutorial.
|
|
50
50
|
Killed classify_by_prefix routine in favor of classify_by_false_flag which has
|
|
51
51
|
a prefix option
|
|
52
52
|
|
|
53
|
-
|
|
53
|
+
== version 0.2.3
|
|
54
54
|
|
|
55
55
|
in protein_summary.rb added handling for proteins with no annotation. (either
|
|
56
56
|
dispaly NA or use gi2annnot to grab them from NCBI)
|
|
57
57
|
|
|
58
|
-
|
|
58
|
+
== version 0.2.5
|
|
59
59
|
|
|
60
60
|
renamed prep_list in roc (potential breaks in code)
|
|
61
61
|
|
|
62
|
-
|
|
62
|
+
== version 0.2.6
|
|
63
63
|
|
|
64
64
|
1. Massive refactorization of filtering and validation. Validation objects are
|
|
65
65
|
created and then can be used to validate just about anything.
|
|
@@ -75,105 +75,105 @@ appear to influence our analyses, however). Fixed.
|
|
|
75
75
|
2. Enzymes with no exceptions (e.g., cuts at KR) would report one too many
|
|
76
76
|
missed cleavages if the last amino acid was a cut point. Fixed.
|
|
77
77
|
|
|
78
|
-
|
|
78
|
+
== version 0.2.7
|
|
79
79
|
|
|
80
80
|
1. In conversion from bioworks to pepxml, the default was trypsin (KR/P).
|
|
81
81
|
Now, the sample enzyme is set explicitly from the params file and the option
|
|
82
82
|
is not available. This can give more accuract pepxml files than from
|
|
83
83
|
previous depending on your enzyme.
|
|
84
84
|
|
|
85
|
-
|
|
85
|
+
== version 0.2.9
|
|
86
86
|
|
|
87
87
|
1. Added support for phobius transmembrane predictions
|
|
88
88
|
2. have filter_and_validate.rb working well (multiple validators allowed).
|
|
89
89
|
3. Can read bioworks 3.3.1 .srf files (.srf version 3.5 files)
|
|
90
90
|
4. Added a bias validator
|
|
91
91
|
|
|
92
|
-
|
|
92
|
+
== version 0.2.10
|
|
93
93
|
|
|
94
94
|
1. Fixed --hits_separate flag in spec_id/filter
|
|
95
95
|
|
|
96
|
-
|
|
96
|
+
== version 0.2.11
|
|
97
97
|
|
|
98
98
|
1. Added prob precision support and reorganized filter_and_validate libs
|
|
99
99
|
|
|
100
|
-
|
|
100
|
+
== version 0.2.12
|
|
101
101
|
|
|
102
102
|
1. Fixed bug in transmem for prob and others.
|
|
103
103
|
2. Can use axml (XMLParser based) or libxml depending on availability
|
|
104
104
|
|
|
105
|
-
|
|
105
|
+
== version 0.2.13
|
|
106
106
|
|
|
107
107
|
1. Fixed issue with --hits_separate
|
|
108
108
|
2. filter_and_validate.rb requires decoy validator if decoy proteins
|
|
109
109
|
(refactored code)
|
|
110
110
|
|
|
111
|
-
|
|
111
|
+
== version 0.2.14
|
|
112
112
|
|
|
113
113
|
1. Can read PeptideProphet files (should be able to read pepxml files, too)
|
|
114
114
|
2. API change: Some slight modifications to the Sequest::PepXML object
|
|
115
115
|
interfaces and implementations (using ArrayClass)
|
|
116
116
|
|
|
117
|
-
|
|
117
|
+
== version 0.2.15
|
|
118
118
|
|
|
119
119
|
1. can convert srf files to sqt files
|
|
120
120
|
|
|
121
|
-
|
|
121
|
+
== version 0.3.0
|
|
122
122
|
|
|
123
123
|
1. IMPORTANT BUG FIX: protein reporting in srf files is correct now (proteins after the first protein were being assigned to the last hit in an out file).
|
|
124
124
|
2. SQT export is correct and works at least on 3.2 and 3.3.1.
|
|
125
125
|
|
|
126
|
-
|
|
126
|
+
== version 0.3.1
|
|
127
127
|
|
|
128
128
|
1. Bug fix in srf filtering (num_hits adjusted)
|
|
129
129
|
|
|
130
|
-
|
|
130
|
+
== version 0.3.2
|
|
131
131
|
|
|
132
132
|
1. Uses sequest peptide_mass_tolerance filter on srf group files by default
|
|
133
133
|
now.
|
|
134
134
|
|
|
135
|
-
|
|
135
|
+
== version 0.3.3
|
|
136
136
|
|
|
137
137
|
1. Worked out minor kinks in prob_precision.rb
|
|
138
138
|
|
|
139
|
-
|
|
139
|
+
== version 0.3.4
|
|
140
140
|
|
|
141
141
|
1. filters >= +3 charged ions now.
|
|
142
142
|
|
|
143
|
-
|
|
143
|
+
== version 0.3.5
|
|
144
144
|
|
|
145
145
|
1. fixed creation of background distribution in validators (hash_by base_name,
|
|
146
146
|
first_scan, charge now)
|
|
147
147
|
|
|
148
|
-
|
|
148
|
+
== version 0.3.6
|
|
149
149
|
|
|
150
150
|
1. split off bad_aa_est from bad_aa
|
|
151
151
|
|
|
152
|
-
|
|
152
|
+
== version 0.3.7
|
|
153
153
|
|
|
154
154
|
1. can deal with No_Enzyme searches now (while still capable of setting
|
|
155
155
|
sample_enzyme)
|
|
156
156
|
|
|
157
|
-
|
|
157
|
+
== version 0.3.8
|
|
158
158
|
|
|
159
159
|
1. can set a decoy to target ratio for decoy validation
|
|
160
160
|
2. added mass calculator in Mass::Calculator
|
|
161
161
|
|
|
162
|
-
|
|
162
|
+
== version 0.3.9
|
|
163
163
|
|
|
164
164
|
1. doesn't clobber mzdata filename in ms_to_lmat.rb conversion
|
|
165
165
|
|
|
166
|
-
|
|
166
|
+
== version 0.3.10
|
|
167
167
|
|
|
168
168
|
1. added run_percolator.rb script which makes running multiple files easy
|
|
169
169
|
|
|
170
|
-
|
|
170
|
+
== version 0.3.11
|
|
171
171
|
|
|
172
172
|
1. faster sensing of bad scan tags in mzXML v. 2.0 files
|
|
173
173
|
2. implemented lazy evaluation of spectrum in 2 different ways allowing much
|
|
174
174
|
larger files to be parsed
|
|
175
175
|
|
|
176
|
-
|
|
176
|
+
== version 0.4.0
|
|
177
177
|
|
|
178
178
|
1. ** INTERFACE CHANGE: each scan can only have one precursor (used to be an array)
|
|
179
179
|
2. ** INTERFACE CHANGE: spectrum mz and intensity data accessed with mzs and intensities
|
|
@@ -182,46 +182,15 @@ larger files to be parsed
|
|
|
182
182
|
eval methos (however, the method intensity_at_mz will still work (causing
|
|
183
183
|
evaluation))
|
|
184
184
|
|
|
185
|
-
|
|
185
|
+
== version 0.4.1
|
|
186
186
|
|
|
187
187
|
1. added support for reading mzXML version 3.0 (may fail in some cases)
|
|
188
188
|
|
|
189
|
-
|
|
189
|
+
== version 0.4.2
|
|
190
190
|
|
|
191
191
|
1. added MS::MSRun.open method
|
|
192
192
|
2. added method to write dta files from SRF
|
|
193
193
|
|
|
194
|
-
|
|
194
|
+
== version 0.4.3
|
|
195
195
|
|
|
196
196
|
1. added to_mfg_file from SRF
|
|
197
|
-
2. added to_dta_files from SRF complete with streaming .tar.gz output (and
|
|
198
|
-
supporting .zip output but it has to make tmp files)
|
|
199
|
-
|
|
200
|
-
## version 0.4.4
|
|
201
|
-
1. implemented q-value and pi_0 methods of Storey
|
|
202
|
-
2. can do complete q-value calculations given p-values
|
|
203
|
-
3. can determine a pi_0 given a list of target and decoy values (as booleans)
|
|
204
|
-
4. can determine a pi_0 given a list containing numbers of decoy and target
|
|
205
|
-
values as is often encountered with filtering
|
|
206
|
-
5. prob_validate.rb implements a q-value option for turning PeptideProphet
|
|
207
|
-
probabilities into q-values
|
|
208
|
-
6. filter_validate.rb implements a p value method using xcorr values, however,
|
|
209
|
-
this is not very effective since xcorr values underrepresent the the
|
|
210
|
-
difference between good hits and bad hits
|
|
211
|
-
|
|
212
|
-
## version 0.4.5
|
|
213
|
-
1. using pi_zero instead of decoy_to_target_ratio. While all tests are
|
|
214
|
-
passing, this release should be considered experimental with the use of any
|
|
215
|
-
target-decoy validation.
|
|
216
|
-
|
|
217
|
-
## version 0.4.6
|
|
218
|
-
1. added NOTE to --to_qvalues option to include all results (no low prob
|
|
219
|
-
filter)
|
|
220
|
-
|
|
221
|
-
## version 0.4.7
|
|
222
|
-
1. Added ability to quickly grab sequest params out of a .SRF file
|
|
223
|
-
2. Added helpful runtime error if print_duplicate_references is 0.
|
|
224
|
-
|
|
225
|
-
## version 0.4.9
|
|
226
|
-
1. quiet some unneeded output and fixed truncation of filenames with '.'
|
|
227
|
-
inside them.
|
data/lib/ms/calc.rb
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
module Ms
|
|
2
|
+
module Calc
|
|
3
|
+
module_function
|
|
4
|
+
|
|
5
|
+
#
|
|
6
|
+
# ppm calculations... maybe use RUnit
|
|
7
|
+
#
|
|
8
|
+
|
|
9
|
+
def ppm_tol_at(mz, ppm)
|
|
10
|
+
1.0 * mz * ppm / 10**6
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def ppm_span_at(mz, ppm)
|
|
14
|
+
tol = ppm_tol_at(mz, ppm)
|
|
15
|
+
[mz-tol, mz+tol]
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def ppm_range_at(mz, ppm)
|
|
19
|
+
mz = mz.to_f
|
|
20
|
+
tol = ppm_tol_at(mz, ppm)
|
|
21
|
+
mz-tol...mz+tol
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# Rounds n to the specified precision (ie number of decimal places)
|
|
26
|
+
# def round(n, precision)
|
|
27
|
+
# factor = 10**precision.to_i
|
|
28
|
+
# (n * factor).round.to_f / factor
|
|
29
|
+
# end
|
|
30
|
+
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
require 'ms/data/simple'
|
|
2
|
+
|
|
3
|
+
module Ms
|
|
4
|
+
module Data
|
|
5
|
+
module_function
|
|
6
|
+
|
|
7
|
+
# Initializes a new interleaved data array.
|
|
8
|
+
def new_interleaved(unresolved_data, n=2)
|
|
9
|
+
Interleaved.new(unresolved_data, n=2)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# An Interleaved data array lazily evaluates it's unresolved data as
|
|
13
|
+
# an interleaved array of n members. The unresolved data is evaluated
|
|
14
|
+
# into an array using to_a.
|
|
15
|
+
#
|
|
16
|
+
# i = Ms::Data::Interleaved.new([1,4,2,5,3,6])
|
|
17
|
+
# i.unresolved_data # => [1,4,2,5,3,6]
|
|
18
|
+
# i.data # => []
|
|
19
|
+
# i[0] # => [1,2,3]
|
|
20
|
+
# i[1] # => [4,5,6]
|
|
21
|
+
# i.data # => [[1,2,3], [4,5,6]]
|
|
22
|
+
#
|
|
23
|
+
class Interleaved < Simple
|
|
24
|
+
attr_reader :n
|
|
25
|
+
|
|
26
|
+
def initialize(unresolved_data, n=2)
|
|
27
|
+
@n = 2
|
|
28
|
+
super(unresolved_data)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def [](index)
|
|
32
|
+
resolve.data[index]
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def resolved?
|
|
36
|
+
!@data.empty?
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def resolve
|
|
40
|
+
return(self) if resolved?
|
|
41
|
+
|
|
42
|
+
unresolved_data = @unresolved_data.to_a
|
|
43
|
+
|
|
44
|
+
unless unresolved_data.length % n == 0
|
|
45
|
+
raise ArgumentError, "interleaved data must have a number of elements evenly divisible by n (#{n})"
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
n.times { @data << [] }
|
|
49
|
+
map = @data * (unresolved_data.length/n)
|
|
50
|
+
|
|
51
|
+
unresolved_data.each_with_index do |item, i|
|
|
52
|
+
map[i] << item
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
self
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
module Ms
|
|
2
|
+
module Data
|
|
3
|
+
|
|
4
|
+
# LazyIO represents data to be lazily read from an IO. To read the data
|
|
5
|
+
# from the IO, either string or to_a may be called (to_a unpacks the
|
|
6
|
+
# string into an array using the decode_format and unpack_format).
|
|
7
|
+
#
|
|
8
|
+
# LazyIO is a suitable unresolved_data source for Ms::Data formats.
|
|
9
|
+
class LazyIO
|
|
10
|
+
NETWORK_FLOAT = 'g*'
|
|
11
|
+
NETWORK_DOUBLE = 'G*'
|
|
12
|
+
LITTLE_ENDIAN_FLOAT = 'e*'
|
|
13
|
+
LITTLE_ENDIAN_DOUBLE = 'E*'
|
|
14
|
+
BASE_64 = 'm'
|
|
15
|
+
|
|
16
|
+
class << self
|
|
17
|
+
# Returns the unpacking code for the given precision (32 or 64-bit)
|
|
18
|
+
# and network order (true for big-endian).
|
|
19
|
+
def unpack_code(precision, network_order)
|
|
20
|
+
case precision
|
|
21
|
+
when 32 then network_order ? NETWORK_FLOAT : LITTLE_ENDIAN_FLOAT
|
|
22
|
+
when 64 then network_order ? NETWORK_DOUBLE : LITTLE_ENDIAN_DOUBLE
|
|
23
|
+
else raise ArgumentError, "unknown precision (should be 32 or 64): #{precision}"
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# The IO from which string is read
|
|
29
|
+
attr_reader :io
|
|
30
|
+
|
|
31
|
+
# The start index for reading string
|
|
32
|
+
attr_reader :start_index
|
|
33
|
+
|
|
34
|
+
# The number of bytes to be read from io when evaluating string
|
|
35
|
+
attr_reader :num_bytes
|
|
36
|
+
|
|
37
|
+
# Indicates the unpacking format
|
|
38
|
+
attr_reader :unpack_format
|
|
39
|
+
|
|
40
|
+
# Indicates a decoding format, may be false to unpack string
|
|
41
|
+
# without decoding.
|
|
42
|
+
attr_reader :decode_format
|
|
43
|
+
|
|
44
|
+
def initialize(io, start_index=io.pos, num_bytes=nil, unpack_format=NETWORK_FLOAT, decode_format=BASE_64)
|
|
45
|
+
@io = io
|
|
46
|
+
@start_index = start_index
|
|
47
|
+
@num_bytes = num_bytes
|
|
48
|
+
@unpack_format = unpack_format
|
|
49
|
+
@decode_format = decode_format
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Positions io at start_index and reads a string of num_bytes length.
|
|
53
|
+
# The string is newly read from io each time string is called.
|
|
54
|
+
def string
|
|
55
|
+
io.pos = start_index unless io.pos == start_index
|
|
56
|
+
io.read(num_bytes)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Resets the cached array (returned by to_a) so that the array will
|
|
60
|
+
# be re-read from io.
|
|
61
|
+
def reset
|
|
62
|
+
@array = nil
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Reads string and unpacks using decode_format and unpack_code. The
|
|
66
|
+
# array is cached internally; to re-read the array, use reset.
|
|
67
|
+
def to_a
|
|
68
|
+
@array ||= (decode_format ? string.unpack(decode_format)[0] : string).unpack(unpack_format)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
require 'ms/data/lazy_io'
|
|
2
|
+
require 'stringio'
|
|
3
|
+
|
|
4
|
+
module Ms
|
|
5
|
+
module Data
|
|
6
|
+
|
|
7
|
+
# LazyString is a LazyIO initialized from a string, which is converted into
|
|
8
|
+
# a StringIO.
|
|
9
|
+
class LazyString < LazyIO
|
|
10
|
+
def initialize(string, unpack_format=NETWORK_FLOAT, decode_format=BASE_64)
|
|
11
|
+
super(StringIO.new(string), 0, string.length, unpack_format, decode_format)
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
module Ms
|
|
2
|
+
module Data
|
|
3
|
+
module_function
|
|
4
|
+
|
|
5
|
+
# Initializes a new simple data array.
|
|
6
|
+
def new_simple(unresolved_data)
|
|
7
|
+
Simple.new(unresolved_data)
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
# A Simple data array that lazily evaluates unresolved_data, and
|
|
11
|
+
# each member of unresolved_data using to_a:
|
|
12
|
+
#
|
|
13
|
+
# class LazyObject
|
|
14
|
+
# attr_reader :to_a
|
|
15
|
+
# def initialize(array)
|
|
16
|
+
# @to_a = array
|
|
17
|
+
# end
|
|
18
|
+
# end
|
|
19
|
+
#
|
|
20
|
+
# a = LazyObject.new([1,2,3])
|
|
21
|
+
# b = LazyObject.new([4,5,6])
|
|
22
|
+
# s = Ms::Data::Simple.new([a, b])
|
|
23
|
+
#
|
|
24
|
+
# s.unresolved_data # => [a, b]
|
|
25
|
+
# s.data # => []
|
|
26
|
+
# s[0] # => [1,2,3]
|
|
27
|
+
# s[1] # => [4,5,6]
|
|
28
|
+
# s.data # => [[1,2,3], [4,5,6]]
|
|
29
|
+
#
|
|
30
|
+
class Simple
|
|
31
|
+
# The underlying resolved data store.
|
|
32
|
+
attr_reader :data
|
|
33
|
+
|
|
34
|
+
# The underlying unresolved data store.
|
|
35
|
+
attr_reader :unresolved_data
|
|
36
|
+
|
|
37
|
+
def initialize(unresolved_data)
|
|
38
|
+
@data = []
|
|
39
|
+
@unresolved_data = unresolved_data
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def [](index)
|
|
43
|
+
@data[index] ||= @unresolved_data.to_a[index].to_a
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def resolve
|
|
47
|
+
0.upto(@unresolved_data.length - 1) do |index|
|
|
48
|
+
self[index]
|
|
49
|
+
end unless resolved?
|
|
50
|
+
|
|
51
|
+
self
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def resolved?
|
|
55
|
+
@data.compact.length == @unresolved_data.length
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
require 'ms/data/simple'
|
|
2
|
+
|
|
3
|
+
module Ms
|
|
4
|
+
module Data
|
|
5
|
+
module_function
|
|
6
|
+
|
|
7
|
+
# Initializes a new transposed data array.
|
|
8
|
+
def new_transposed(unresolved_data)
|
|
9
|
+
Transposed.new(unresolved_data)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# A Transposed data array lazily evaluates it's unresolved data as
|
|
13
|
+
# a transposed array. The unresolved data is evaluated
|
|
14
|
+
# into an array using to_a.
|
|
15
|
+
#
|
|
16
|
+
# t = Ms::Data::Transposed.new([[1,4],[2,5],[3,6]])
|
|
17
|
+
#
|
|
18
|
+
# t.unresolved_data # => [[1,4],[2,5],[3,6]]
|
|
19
|
+
# t.data # => []
|
|
20
|
+
# t[0] # => [1,2,3]
|
|
21
|
+
# t[1] # => [4,5,6]
|
|
22
|
+
# t.data # => [[1,2,3], [4,5,6]]
|
|
23
|
+
#
|
|
24
|
+
class Transposed < Simple
|
|
25
|
+
|
|
26
|
+
def [](index)
|
|
27
|
+
resolve.data[index]
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def resolved?
|
|
31
|
+
!@data.empty?
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def resolve
|
|
35
|
+
@data = @unresolved_data.to_a.transpose unless resolved?
|
|
36
|
+
self
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
data/lib/ms/data.rb
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
require 'ms/data/interleaved'
|
|
2
|
+
require 'ms/data/transposed'
|
|
3
|
+
|
|
4
|
+
module Ms
|
|
5
|
+
|
|
6
|
+
# The Data module contains a number of classes providing a standard way to
|
|
7
|
+
# resolve various data storage formats into a 'simple' data array.
|
|
8
|
+
#
|
|
9
|
+
# type format
|
|
10
|
+
# simple [[mzs,...], [intensities...]]
|
|
11
|
+
# transposed [[mz,intensity], [mz,intensity], ...]
|
|
12
|
+
# interleaved [mz,intensity,mz,intensity,...]
|
|
13
|
+
#
|
|
14
|
+
# For instance:
|
|
15
|
+
#
|
|
16
|
+
# s = Data.new([[1,2,3], [4,5,6]], :simple)
|
|
17
|
+
# s.resolve.data # => [[1,2,3], [4,5,6]]
|
|
18
|
+
#
|
|
19
|
+
# t = Data.new([[1,4],[2,5],[3,6]], :transposed)
|
|
20
|
+
# t.resolve.data # => [[1,2,3], [4,5,6]]
|
|
21
|
+
#
|
|
22
|
+
# i = Data.new([1,4,2,5,3,6], :interleaved)
|
|
23
|
+
# i.resolve.data # => [[1,2,3], [4,5,6]]
|
|
24
|
+
#
|
|
25
|
+
# Data is always resolved by calling to_a on the unresolved data object
|
|
26
|
+
# and then rearranging as needed (in the case of simple data, to_a is
|
|
27
|
+
# also called on each member of the unresolved data array). This lazy
|
|
28
|
+
# resolution allows the use of non-array unresolved_data objects such
|
|
29
|
+
# as Data::LazyString:
|
|
30
|
+
#
|
|
31
|
+
# str = [[1,4,2,5,3,6].pack("g*")].pack("m")
|
|
32
|
+
# unresolved_data = Data::LazyString.new(str)
|
|
33
|
+
#
|
|
34
|
+
# i = Data.new(unresolved_data, :interleaved)
|
|
35
|
+
# i.resolve.data # => [[1,2,3], [4,5,6]]
|
|
36
|
+
#
|
|
37
|
+
# Obviously the big advantage of lazy data resolution is that Data objects
|
|
38
|
+
# may be instantiated cheaply while expensive operations like unpacking and
|
|
39
|
+
# rearrangement may be put off or not executed at all.
|
|
40
|
+
#
|
|
41
|
+
module Data
|
|
42
|
+
module_function
|
|
43
|
+
|
|
44
|
+
# Initializes a new data array of the specified type by forwarding
|
|
45
|
+
# data to the "new_<type>" method.
|
|
46
|
+
#
|
|
47
|
+
# simple = Ms::Data.new([[1,2,3], [4,5,6]], :simple)
|
|
48
|
+
# simple.class # => Ms::Data::Simple
|
|
49
|
+
#
|
|
50
|
+
# interleaved = Ms::Data.new([1,4,2,5,3,6], :interleaved)
|
|
51
|
+
# interleaved.class # => Ms::Data::Interleaved
|
|
52
|
+
#
|
|
53
|
+
def new(data, type=:simple)
|
|
54
|
+
send("new_#{type}", data)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|