mspire 0.5.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +24 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/lib/cv/description.rb +18 -0
- data/lib/cv/param.rb +33 -0
- data/lib/cv.rb +3 -0
- data/lib/io/bookmark.rb +13 -0
- data/lib/merge.rb +7 -0
- data/lib/ms/cvlist.rb +76 -0
- data/lib/ms/digester.rb +245 -0
- data/lib/ms/fasta.rb +86 -0
- data/lib/ms/ident/peptide/db.rb +243 -0
- data/lib/ms/ident/peptide.rb +72 -0
- data/lib/ms/ident/peptide_hit/qvalue.rb +56 -0
- data/lib/ms/ident/peptide_hit.rb +26 -0
- data/lib/ms/ident/pepxml/modifications.rb +83 -0
- data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
- data/lib/ms/ident/pepxml/msms_run_summary.rb +82 -0
- data/lib/ms/ident/pepxml/parameters.rb +14 -0
- data/lib/ms/ident/pepxml/sample_enzyme.rb +165 -0
- data/lib/ms/ident/pepxml/search_database.rb +49 -0
- data/lib/ms/ident/pepxml/search_hit/modification_info.rb +79 -0
- data/lib/ms/ident/pepxml/search_hit.rb +144 -0
- data/lib/ms/ident/pepxml/search_result.rb +35 -0
- data/lib/ms/ident/pepxml/search_summary.rb +92 -0
- data/lib/ms/ident/pepxml/spectrum_query.rb +85 -0
- data/lib/ms/ident/pepxml.rb +112 -0
- data/lib/ms/ident/protein.rb +33 -0
- data/lib/ms/ident/protein_group.rb +80 -0
- data/lib/ms/ident/search.rb +114 -0
- data/lib/ms/ident.rb +37 -0
- data/lib/ms/isotope/aa.rb +59 -0
- data/lib/ms/mascot.rb +6 -0
- data/lib/ms/mass/aa.rb +79 -0
- data/lib/ms/mass.rb +55 -0
- data/lib/ms/mzml/index_list.rb +98 -0
- data/lib/ms/mzml/plms1.rb +34 -0
- data/lib/ms/mzml.rb +197 -0
- data/lib/ms/obo.rb +38 -0
- data/lib/ms/plms1.rb +156 -0
- data/lib/ms/quant/qspec/protein_group_comparison.rb +22 -0
- data/lib/ms/quant/qspec.rb +112 -0
- data/lib/ms/spectrum.rb +154 -8
- data/lib/ms.rb +3 -10
- data/lib/msplat.rb +2 -0
- data/lib/obo/ims.rb +5 -0
- data/lib/obo/ms.rb +7 -0
- data/lib/obo/ontology.rb +41 -0
- data/lib/obo/unit.rb +5 -0
- data/lib/openany.rb +23 -0
- data/lib/write_file_or_string.rb +18 -0
- data/obo/ims.obo +562 -0
- data/obo/ms.obo +11677 -0
- data/obo/unit.obo +2563 -0
- data/spec/ms/cvlist_spec.rb +60 -0
- data/spec/ms/digester_spec.rb +351 -0
- data/spec/ms/fasta_spec.rb +100 -0
- data/spec/ms/ident/peptide/db_spec.rb +108 -0
- data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
- data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb +37 -0
- data/spec/ms/ident/pepxml_spec.rb +442 -0
- data/spec/ms/ident/protein_group_spec.rb +68 -0
- data/spec/ms/mass_spec.rb +8 -0
- data/spec/ms/mzml/index_list_spec.rb +122 -0
- data/spec/ms/mzml/plms1_spec.rb +62 -0
- data/spec/ms/mzml_spec.rb +50 -0
- data/spec/ms/plms1_spec.rb +38 -0
- data/spec/ms/quant/qspec_spec.rb +25 -0
- data/spec/msplat_spec.rb +24 -0
- data/spec/obo_spec.rb +25 -0
- data/spec/spec_helper.rb +25 -0
- data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta +69 -0
- data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +728 -0
- data/spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML +271 -0
- data/spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML +330 -0
- data/spec/testfiles/ms/quant/kill_extra_tabs.rb +13 -0
- data/spec/testfiles/ms/quant/max_quant_output.provenance.txt +15 -0
- data/spec/testfiles/ms/quant/max_quant_output.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp +0 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt +134 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp +134 -0
- data/spec/testfiles/ms/quant/remove_rest_of_proteins.rb +13 -0
- data/spec/testfiles/ms/quant/unlog_transform.rb +13 -0
- data/spec/testfiles/plms1/output.key +0 -0
- metadata +157 -40
- data/README +0 -77
- data/changelog.txt +0 -196
- data/lib/ms/calc.rb +0 -32
- data/lib/ms/data/interleaved.rb +0 -60
- data/lib/ms/data/lazy_io.rb +0 -73
- data/lib/ms/data/lazy_string.rb +0 -15
- data/lib/ms/data/simple.rb +0 -59
- data/lib/ms/data/transposed.rb +0 -41
- data/lib/ms/data.rb +0 -57
- data/lib/ms/format/format_error.rb +0 -12
- data/lib/ms/support/binary_search.rb +0 -126
metadata
CHANGED
@@ -1,88 +1,205 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mspire
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
prerelease:
|
5
|
+
version: 0.6.1
|
5
6
|
platform: ruby
|
6
7
|
authors:
|
7
|
-
- John Prince
|
8
|
+
- John T. Prince
|
8
9
|
- Simon Chiang
|
9
10
|
autorequire:
|
10
11
|
bindir: bin
|
11
12
|
cert_chain: []
|
12
13
|
|
13
|
-
date:
|
14
|
-
default_executable:
|
14
|
+
date: 2012-01-25 00:00:00 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
|
-
name:
|
17
|
+
name: nokogiri
|
18
|
+
prerelease: false
|
19
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
20
|
+
none: false
|
21
|
+
requirements:
|
22
|
+
- - ~>
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: "1.5"
|
25
|
+
type: :runtime
|
26
|
+
version_requirements: *id001
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
prerelease: false
|
30
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
31
|
+
none: false
|
32
|
+
requirements:
|
33
|
+
- - ~>
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: "2.6"
|
18
36
|
type: :development
|
19
|
-
|
20
|
-
|
37
|
+
version_requirements: *id002
|
38
|
+
- !ruby/object:Gem::Dependency
|
39
|
+
name: jeweler
|
40
|
+
prerelease: false
|
41
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
42
|
+
none: false
|
21
43
|
requirements:
|
22
|
-
- -
|
44
|
+
- - ~>
|
23
45
|
- !ruby/object:Gem::Version
|
24
|
-
version:
|
25
|
-
|
46
|
+
version: 1.5.2
|
47
|
+
type: :development
|
48
|
+
version_requirements: *id003
|
26
49
|
- !ruby/object:Gem::Dependency
|
27
|
-
name:
|
50
|
+
name: rcov
|
51
|
+
prerelease: false
|
52
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: "0"
|
28
58
|
type: :development
|
29
|
-
|
30
|
-
|
59
|
+
version_requirements: *id004
|
60
|
+
- !ruby/object:Gem::Dependency
|
61
|
+
name: obo
|
62
|
+
prerelease: false
|
63
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
64
|
+
none: false
|
31
65
|
requirements:
|
32
|
-
- - "
|
66
|
+
- - ">="
|
33
67
|
- !ruby/object:Gem::Version
|
34
|
-
version: 1.
|
35
|
-
|
36
|
-
|
37
|
-
|
68
|
+
version: 0.1.0
|
69
|
+
type: :development
|
70
|
+
version_requirements: *id005
|
71
|
+
description: mass spectrometry proteomics, lipidomics, and tools, a rewrite of mspire, merging of ms-* gems
|
72
|
+
email: jtprince@gmail.com
|
38
73
|
executables: []
|
39
74
|
|
40
75
|
extensions: []
|
41
76
|
|
42
77
|
extra_rdoc_files:
|
43
|
-
- changelog.txt
|
44
78
|
- LICENSE
|
45
|
-
- README
|
79
|
+
- README.rdoc
|
46
80
|
files:
|
81
|
+
- LICENSE
|
82
|
+
- README.rdoc
|
83
|
+
- Rakefile
|
84
|
+
- VERSION
|
85
|
+
- lib/cv.rb
|
86
|
+
- lib/cv/description.rb
|
87
|
+
- lib/cv/param.rb
|
88
|
+
- lib/io/bookmark.rb
|
89
|
+
- lib/merge.rb
|
47
90
|
- lib/ms.rb
|
48
|
-
- lib/ms/
|
49
|
-
- lib/ms/
|
50
|
-
- lib/ms/
|
51
|
-
- lib/ms/
|
52
|
-
- lib/ms/
|
53
|
-
- lib/ms/
|
54
|
-
- lib/ms/
|
55
|
-
- lib/ms/
|
56
|
-
- lib/ms/
|
91
|
+
- lib/ms/cvlist.rb
|
92
|
+
- lib/ms/digester.rb
|
93
|
+
- lib/ms/fasta.rb
|
94
|
+
- lib/ms/ident.rb
|
95
|
+
- lib/ms/ident/peptide.rb
|
96
|
+
- lib/ms/ident/peptide/db.rb
|
97
|
+
- lib/ms/ident/peptide_hit.rb
|
98
|
+
- lib/ms/ident/peptide_hit/qvalue.rb
|
99
|
+
- lib/ms/ident/pepxml.rb
|
100
|
+
- lib/ms/ident/pepxml/modifications.rb
|
101
|
+
- lib/ms/ident/pepxml/msms_pipeline_analysis.rb
|
102
|
+
- lib/ms/ident/pepxml/msms_run_summary.rb
|
103
|
+
- lib/ms/ident/pepxml/parameters.rb
|
104
|
+
- lib/ms/ident/pepxml/sample_enzyme.rb
|
105
|
+
- lib/ms/ident/pepxml/search_database.rb
|
106
|
+
- lib/ms/ident/pepxml/search_hit.rb
|
107
|
+
- lib/ms/ident/pepxml/search_hit/modification_info.rb
|
108
|
+
- lib/ms/ident/pepxml/search_result.rb
|
109
|
+
- lib/ms/ident/pepxml/search_summary.rb
|
110
|
+
- lib/ms/ident/pepxml/spectrum_query.rb
|
111
|
+
- lib/ms/ident/protein.rb
|
112
|
+
- lib/ms/ident/protein_group.rb
|
113
|
+
- lib/ms/ident/search.rb
|
114
|
+
- lib/ms/isotope/aa.rb
|
115
|
+
- lib/ms/mascot.rb
|
116
|
+
- lib/ms/mass.rb
|
117
|
+
- lib/ms/mass/aa.rb
|
118
|
+
- lib/ms/mzml.rb
|
119
|
+
- lib/ms/mzml/index_list.rb
|
120
|
+
- lib/ms/mzml/plms1.rb
|
121
|
+
- lib/ms/obo.rb
|
122
|
+
- lib/ms/plms1.rb
|
123
|
+
- lib/ms/quant/qspec.rb
|
124
|
+
- lib/ms/quant/qspec/protein_group_comparison.rb
|
57
125
|
- lib/ms/spectrum.rb
|
58
|
-
-
|
59
|
-
-
|
60
|
-
-
|
61
|
-
|
62
|
-
|
126
|
+
- lib/msplat.rb
|
127
|
+
- lib/obo/ims.rb
|
128
|
+
- lib/obo/ms.rb
|
129
|
+
- lib/obo/ontology.rb
|
130
|
+
- lib/obo/unit.rb
|
131
|
+
- lib/openany.rb
|
132
|
+
- lib/write_file_or_string.rb
|
133
|
+
- obo/ims.obo
|
134
|
+
- obo/ms.obo
|
135
|
+
- obo/unit.obo
|
136
|
+
- spec/ms/cvlist_spec.rb
|
137
|
+
- spec/ms/digester_spec.rb
|
138
|
+
- spec/ms/fasta_spec.rb
|
139
|
+
- spec/ms/ident/peptide/db_spec.rb
|
140
|
+
- spec/ms/ident/pepxml/sample_enzyme_spec.rb
|
141
|
+
- spec/ms/ident/pepxml/search_hit/modification_info_spec.rb
|
142
|
+
- spec/ms/ident/pepxml_spec.rb
|
143
|
+
- spec/ms/ident/protein_group_spec.rb
|
144
|
+
- spec/ms/mass_spec.rb
|
145
|
+
- spec/ms/mzml/index_list_spec.rb
|
146
|
+
- spec/ms/mzml/plms1_spec.rb
|
147
|
+
- spec/ms/mzml_spec.rb
|
148
|
+
- spec/ms/plms1_spec.rb
|
149
|
+
- spec/ms/quant/qspec_spec.rb
|
150
|
+
- spec/msplat_spec.rb
|
151
|
+
- spec/obo_spec.rb
|
152
|
+
- spec/spec_helper.rb
|
153
|
+
- spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta
|
154
|
+
- spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml
|
155
|
+
- spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML
|
156
|
+
- spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML
|
157
|
+
- spec/testfiles/ms/quant/kill_extra_tabs.rb
|
158
|
+
- spec/testfiles/ms/quant/max_quant_output.provenance.txt
|
159
|
+
- spec/testfiles/ms/quant/max_quant_output.txt
|
160
|
+
- spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv
|
161
|
+
- spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp
|
162
|
+
- spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv
|
163
|
+
- spec/testfiles/ms/quant/pdcd5_final.txt
|
164
|
+
- spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp
|
165
|
+
- spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv
|
166
|
+
- spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv
|
167
|
+
- spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv
|
168
|
+
- spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv
|
169
|
+
- spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp
|
170
|
+
- spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv
|
171
|
+
- spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt
|
172
|
+
- spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt
|
173
|
+
- spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp
|
174
|
+
- spec/testfiles/ms/quant/remove_rest_of_proteins.rb
|
175
|
+
- spec/testfiles/ms/quant/unlog_transform.rb
|
176
|
+
- spec/testfiles/plms1/output.key
|
177
|
+
homepage: http://github.com/princelab/mspire
|
178
|
+
licenses:
|
179
|
+
- MIT
|
63
180
|
post_install_message:
|
64
181
|
rdoc_options: []
|
65
182
|
|
66
183
|
require_paths:
|
67
184
|
- lib
|
68
185
|
required_ruby_version: !ruby/object:Gem::Requirement
|
186
|
+
none: false
|
69
187
|
requirements:
|
70
188
|
- - ">="
|
71
189
|
- !ruby/object:Gem::Version
|
72
190
|
version: "0"
|
73
|
-
version:
|
74
191
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
192
|
+
none: false
|
75
193
|
requirements:
|
76
194
|
- - ">="
|
77
195
|
- !ruby/object:Gem::Version
|
78
196
|
version: "0"
|
79
|
-
version:
|
80
197
|
requirements: []
|
81
198
|
|
82
|
-
rubyforge_project:
|
83
|
-
rubygems_version: 1.
|
199
|
+
rubyforge_project:
|
200
|
+
rubygems_version: 1.8.10
|
84
201
|
signing_key:
|
85
|
-
specification_version:
|
86
|
-
summary:
|
202
|
+
specification_version: 3
|
203
|
+
summary: mass spectrometry proteomics, lipidomics, and tools
|
87
204
|
test_files: []
|
88
205
|
|
data/README
DELETED
@@ -1,77 +0,0 @@
|
|
1
|
-
= {Mspire}[http://mspire.rubyforge.org]
|
2
|
-
|
3
|
-
A library for working with mass spectrometry proteomics data.
|
4
|
-
|
5
|
-
<em> Mspire is going through a re-write as of version 0.5.0 to support a new
|
6
|
-
development model. Many modules are absent but will gradually be added back.
|
7
|
-
Use the 0.4 releases as necessary. </em>
|
8
|
-
|
9
|
-
== Description
|
10
|
-
|
11
|
-
mspire - 'Mass Spectrometry Proteomics in Ruby' is a collection of tools for
|
12
|
-
working with MS proteomics data in ruby. It seeks to provide support for open
|
13
|
-
standards (e.g., parsers for mzData, mzXML, Peptide/Protein Prophet and the
|
14
|
-
TPP) and contribute other useful functionality for working with mass
|
15
|
-
spectrometry data in ruby.
|
16
|
-
|
17
|
-
* Lighthouse[http://bahuvrihi.lighthouseapp.com/projects/16692-mspire/tickets]
|
18
|
-
* Github[http://github.com/bahuvrihi/mspire/tree/master]
|
19
|
-
* {Google Group}[http://groups.google.com/group/mspire-forum]
|
20
|
-
|
21
|
-
--
|
22
|
-
=== Current Focus
|
23
|
-
|
24
|
-
The project is currently focusing on the following:
|
25
|
-
|
26
|
-
* SEQUEST data (particularly the output of Bioworks 3.2-3.3.1)
|
27
|
-
* mzXML
|
28
|
-
* mzData
|
29
|
-
* ProteinProphet
|
30
|
-
* Preparation of files for [obiwarp](http://obi-warp.sourceforge.net/)
|
31
|
-
|
32
|
-
=== Features
|
33
|
-
|
34
|
-
* mzXML (version 1, 2, and 3) parsing
|
35
|
-
* mzData parsing
|
36
|
-
* bioworks .srf (binary files) reader
|
37
|
-
* read/write .sqt files
|
38
|
-
* bioworks to PeptideProphet input (pepXML files)
|
39
|
-
* lightweight APEX values parser
|
40
|
-
* histogram protein probabilities
|
41
|
-
* protein summary views with custom false ID cutoff values
|
42
|
-
* conversion to OBI-Warp input files
|
43
|
-
* portable: works across platforms
|
44
|
-
|
45
|
-
Validation by:
|
46
|
-
* Various Decoy Database search options: Reverse/Shuffle, concatenated/separate, with various hashing options (e.g., by amino acid sequence + charge)
|
47
|
-
* Amino acid (e.g., search for unblocked cysteines)
|
48
|
-
* Transmembrane prediction (Phobius or TopPred)
|
49
|
-
* Generic sample bias (e.g., low abundance/high abundance proteins)
|
50
|
-
* Defined sample
|
51
|
-
|
52
|
-
=== Spectra and Spectra Identification
|
53
|
-
|
54
|
-
The [MS](ms/index.html) namespace contains objects for working with mass spectra and associated file formats.
|
55
|
-
|
56
|
-
The [SpecID](spec_id/index.html) namespace contains objects for working with spectral identifications.
|
57
|
-
|
58
|
-
=== Tutorials
|
59
|
-
|
60
|
-
* [Database Searching Tutorial](tutorial/database_searching/index.html) -
|
61
|
-
Demonstrates two methods for running and analysing Bioworks output to obtain
|
62
|
-
false positive rates using mspire executables.
|
63
|
-
++
|
64
|
-
|
65
|
-
== Installation
|
66
|
-
|
67
|
-
Mspire is available as a gem on RubyForge[http://rubyforge.org/projects/mspire]. Use:
|
68
|
-
|
69
|
-
% gem install mspire
|
70
|
-
|
71
|
-
= Warning
|
72
|
-
|
73
|
-
This is an experimental package. As such, all versions prior to version 1.0
|
74
|
-
may contain interface changes on minor revisions (major.minor.build) (e.g.,
|
75
|
-
0.4.0 may contain interface change from 0.3.9). Beyond version 1.0, the
|
76
|
-
versioning scheme will be strictly adhered to (no interface changes except on
|
77
|
-
major revisions).
|
data/changelog.txt
DELETED
@@ -1,196 +0,0 @@
|
|
1
|
-
|
2
|
-
== version 0.1.7
|
3
|
-
|
4
|
-
1. A couple of scripts and subroutines were hashing peptides but not on the file
|
5
|
-
basename. This would result in slightly incorrect results (any time there
|
6
|
-
were overlapping scan numbers in multiple datasets, only the top one would be
|
7
|
-
chosen). The results would be correct for single runs.
|
8
|
-
|
9
|
-
Output files that could be affected:
|
10
|
-
*.top_per_scan.txt
|
11
|
-
*.all_peps_per_scan.txt
|
12
|
-
|
13
|
-
Scripts that could be affected:
|
14
|
-
script/top_hit_per_scan.rb
|
15
|
-
bin/filter_spec_id.rb
|
16
|
-
script/filter-peps.rb
|
17
|
-
bin/id_precision.rb
|
18
|
-
|
19
|
-
Subroutines that were affected:
|
20
|
-
spec_id.rb (pep_probs_by_* )
|
21
|
-
spec_id.rb (top_peps_prefilter!)
|
22
|
-
proph.rb uniq_by_seqcharge
|
23
|
-
align.rb called uniq_by_seqcharge
|
24
|
-
|
25
|
-
|
26
|
-
2. false_positive_rate.rb and protein_summary.rb (by extension) were using
|
27
|
-
number of true positives on the x axis while in reality I was plotting the
|
28
|
-
number of hits. I've updated x axis labels to reflect this change. In
|
29
|
-
addition, since the term 'false positive rate' has such a distinct definition
|
30
|
-
in classical ROC plots and binary statistics, I've decided to work primarily
|
31
|
-
in terms of precision (TP/(TP+FP)). I've purged the terms 'False Positive
|
32
|
-
Rate' and 'FPR' from the package. It's been suggested that FP/(TP+FP) be
|
33
|
-
called the False Positive Predictive Rate (FPPR). I will probably implement
|
34
|
-
this in a future release.
|
35
|
-
|
36
|
-
== version 0.2.0
|
37
|
-
|
38
|
-
Revamped the way SpecID works (it is now mixed-in).
|
39
|
-
Added support for modifications to bioworks_to_pepxml.rb
|
40
|
-
Can read .srf files (nearly interchangeable with bioworks files)
|
41
|
-
Redid filter.rb
|
42
|
-
|
43
|
-
== version 0.2.1
|
44
|
-
|
45
|
-
minor bugfix
|
46
|
-
|
47
|
-
== version 0.2.2
|
48
|
-
|
49
|
-
made compatible with Bioworks fasta file reverser and updated tutorial.
|
50
|
-
Killed classify_by_prefix routine in favor of classify_by_false_flag which has
|
51
|
-
a prefix option
|
52
|
-
|
53
|
-
== version 0.2.3
|
54
|
-
|
55
|
-
in protein_summary.rb added handling for proteins with no annotation. (either
|
56
|
-
dispaly NA or use gi2annnot to grab them from NCBI)
|
57
|
-
|
58
|
-
== version 0.2.5
|
59
|
-
|
60
|
-
renamed prep_list in roc (potential breaks in code)
|
61
|
-
|
62
|
-
== version 0.2.6
|
63
|
-
|
64
|
-
1. Massive refactorization of filtering and validation. Validation objects are
|
65
|
-
created and then can be used to validate just about anything.
|
66
|
-
2. Massive redo of the parsing of MS runs. Can parse mzXML v1, v2.X
|
67
|
-
(including readw broken output), and mzData (even Thermo's broken output).
|
68
|
-
4. Moved all tests to specs (rspec).
|
69
|
-
5. Can read gradient programs off of .meth or .RAW files (both Xcal 1.X and
|
70
|
-
2.X)
|
71
|
-
|
72
|
-
Bugfixes:
|
73
|
-
1. The search_summary 'base_name' in pepxml output was incorrect (this did not
|
74
|
-
appear to influence our analyses, however). Fixed.
|
75
|
-
2. Enzymes with no exceptions (e.g., cuts at KR) would report one too many
|
76
|
-
missed cleavages if the last amino acid was a cut point. Fixed.
|
77
|
-
|
78
|
-
== version 0.2.7
|
79
|
-
|
80
|
-
1. In conversion from bioworks to pepxml, the default was trypsin (KR/P).
|
81
|
-
Now, the sample enzyme is set explicitly from the params file and the option
|
82
|
-
is not available. This can give more accuract pepxml files than from
|
83
|
-
previous depending on your enzyme.
|
84
|
-
|
85
|
-
== version 0.2.9
|
86
|
-
|
87
|
-
1. Added support for phobius transmembrane predictions
|
88
|
-
2. have filter_and_validate.rb working well (multiple validators allowed).
|
89
|
-
3. Can read bioworks 3.3.1 .srf files (.srf version 3.5 files)
|
90
|
-
4. Added a bias validator
|
91
|
-
|
92
|
-
== version 0.2.10
|
93
|
-
|
94
|
-
1. Fixed --hits_separate flag in spec_id/filter
|
95
|
-
|
96
|
-
== version 0.2.11
|
97
|
-
|
98
|
-
1. Added prob precision support and reorganized filter_and_validate libs
|
99
|
-
|
100
|
-
== version 0.2.12
|
101
|
-
|
102
|
-
1. Fixed bug in transmem for prob and others.
|
103
|
-
2. Can use axml (XMLParser based) or libxml depending on availability
|
104
|
-
|
105
|
-
== version 0.2.13
|
106
|
-
|
107
|
-
1. Fixed issue with --hits_separate
|
108
|
-
2. filter_and_validate.rb requires decoy validator if decoy proteins
|
109
|
-
(refactored code)
|
110
|
-
|
111
|
-
== version 0.2.14
|
112
|
-
|
113
|
-
1. Can read PeptideProphet files (should be able to read pepxml files, too)
|
114
|
-
2. API change: Some slight modifications to the Sequest::PepXML object
|
115
|
-
interfaces and implementations (using ArrayClass)
|
116
|
-
|
117
|
-
== version 0.2.15
|
118
|
-
|
119
|
-
1. can convert srf files to sqt files
|
120
|
-
|
121
|
-
== version 0.3.0
|
122
|
-
|
123
|
-
1. IMPORTANT BUG FIX: protein reporting in srf files is correct now (proteins after the first protein were being assigned to the last hit in an out file).
|
124
|
-
2. SQT export is correct and works at least on 3.2 and 3.3.1.
|
125
|
-
|
126
|
-
== version 0.3.1
|
127
|
-
|
128
|
-
1. Bug fix in srf filtering (num_hits adjusted)
|
129
|
-
|
130
|
-
== version 0.3.2
|
131
|
-
|
132
|
-
1. Uses sequest peptide_mass_tolerance filter on srf group files by default
|
133
|
-
now.
|
134
|
-
|
135
|
-
== version 0.3.3
|
136
|
-
|
137
|
-
1. Worked out minor kinks in prob_precision.rb
|
138
|
-
|
139
|
-
== version 0.3.4
|
140
|
-
|
141
|
-
1. filters >= +3 charged ions now.
|
142
|
-
|
143
|
-
== version 0.3.5
|
144
|
-
|
145
|
-
1. fixed creation of background distribution in validators (hash_by base_name,
|
146
|
-
first_scan, charge now)
|
147
|
-
|
148
|
-
== version 0.3.6
|
149
|
-
|
150
|
-
1. split off bad_aa_est from bad_aa
|
151
|
-
|
152
|
-
== version 0.3.7
|
153
|
-
|
154
|
-
1. can deal with No_Enzyme searches now (while still capable of setting
|
155
|
-
sample_enzyme)
|
156
|
-
|
157
|
-
== version 0.3.8
|
158
|
-
|
159
|
-
1. can set a decoy to target ratio for decoy validation
|
160
|
-
2. added mass calculator in Mass::Calculator
|
161
|
-
|
162
|
-
== version 0.3.9
|
163
|
-
|
164
|
-
1. doesn't clobber mzdata filename in ms_to_lmat.rb conversion
|
165
|
-
|
166
|
-
== version 0.3.10
|
167
|
-
|
168
|
-
1. added run_percolator.rb script which makes running multiple files easy
|
169
|
-
|
170
|
-
== version 0.3.11
|
171
|
-
|
172
|
-
1. faster sensing of bad scan tags in mzXML v. 2.0 files
|
173
|
-
2. implemented lazy evaluation of spectrum in 2 different ways allowing much
|
174
|
-
larger files to be parsed
|
175
|
-
|
176
|
-
== version 0.4.0
|
177
|
-
|
178
|
-
1. ** INTERFACE CHANGE: each scan can only have one precursor (used to be an array)
|
179
|
-
2. ** INTERFACE CHANGE: spectrum mz and intensity data accessed with mzs and intensities
|
180
|
-
3. lazy eval working on mzData
|
181
|
-
4. mzData not necessarily guaranteed to have precursor intensities on lazy
|
182
|
-
eval methos (however, the method intensity_at_mz will still work (causing
|
183
|
-
evaluation))
|
184
|
-
|
185
|
-
== version 0.4.1
|
186
|
-
|
187
|
-
1. added support for reading mzXML version 3.0 (may fail in some cases)
|
188
|
-
|
189
|
-
== version 0.4.2
|
190
|
-
|
191
|
-
1. added MS::MSRun.open method
|
192
|
-
2. added method to write dta files from SRF
|
193
|
-
|
194
|
-
== version 0.4.3
|
195
|
-
|
196
|
-
1. added to_mfg_file from SRF
|
data/lib/ms/calc.rb
DELETED
@@ -1,32 +0,0 @@
|
|
1
|
-
module Ms
|
2
|
-
module Calc
|
3
|
-
module_function
|
4
|
-
|
5
|
-
#
|
6
|
-
# ppm calculations... maybe use RUnit
|
7
|
-
#
|
8
|
-
|
9
|
-
def ppm_tol_at(mz, ppm)
|
10
|
-
1.0 * mz * ppm / 10**6
|
11
|
-
end
|
12
|
-
|
13
|
-
def ppm_span_at(mz, ppm)
|
14
|
-
tol = ppm_tol_at(mz, ppm)
|
15
|
-
[mz-tol, mz+tol]
|
16
|
-
end
|
17
|
-
|
18
|
-
def ppm_range_at(mz, ppm)
|
19
|
-
mz = mz.to_f
|
20
|
-
tol = ppm_tol_at(mz, ppm)
|
21
|
-
mz-tol...mz+tol
|
22
|
-
end
|
23
|
-
|
24
|
-
|
25
|
-
# Rounds n to the specified precision (ie number of decimal places)
|
26
|
-
# def round(n, precision)
|
27
|
-
# factor = 10**precision.to_i
|
28
|
-
# (n * factor).round.to_f / factor
|
29
|
-
# end
|
30
|
-
|
31
|
-
end
|
32
|
-
end
|
data/lib/ms/data/interleaved.rb
DELETED
@@ -1,60 +0,0 @@
|
|
1
|
-
require 'ms/data/simple'
|
2
|
-
|
3
|
-
module Ms
|
4
|
-
module Data
|
5
|
-
module_function
|
6
|
-
|
7
|
-
# Initializes a new interleaved data array.
|
8
|
-
def new_interleaved(unresolved_data, n=2)
|
9
|
-
Interleaved.new(unresolved_data, n=2)
|
10
|
-
end
|
11
|
-
|
12
|
-
# An Interleaved data array lazily evaluates it's unresolved data as
|
13
|
-
# an interleaved array of n members. The unresolved data is evaluated
|
14
|
-
# into an array using to_a.
|
15
|
-
#
|
16
|
-
# i = Ms::Data::Interleaved.new([1,4,2,5,3,6])
|
17
|
-
# i.unresolved_data # => [1,4,2,5,3,6]
|
18
|
-
# i.data # => []
|
19
|
-
# i[0] # => [1,2,3]
|
20
|
-
# i[1] # => [4,5,6]
|
21
|
-
# i.data # => [[1,2,3], [4,5,6]]
|
22
|
-
#
|
23
|
-
class Interleaved < Simple
|
24
|
-
attr_reader :n
|
25
|
-
|
26
|
-
def initialize(unresolved_data, n=2)
|
27
|
-
@n = 2
|
28
|
-
super(unresolved_data)
|
29
|
-
end
|
30
|
-
|
31
|
-
def [](index)
|
32
|
-
resolve.data[index]
|
33
|
-
end
|
34
|
-
|
35
|
-
def resolved?
|
36
|
-
!@data.empty?
|
37
|
-
end
|
38
|
-
|
39
|
-
def resolve
|
40
|
-
return(self) if resolved?
|
41
|
-
|
42
|
-
unresolved_data = @unresolved_data.to_a
|
43
|
-
|
44
|
-
unless unresolved_data.length % n == 0
|
45
|
-
raise ArgumentError, "interleaved data must have a number of elements evenly divisible by n (#{n})"
|
46
|
-
end
|
47
|
-
|
48
|
-
n.times { @data << [] }
|
49
|
-
map = @data * (unresolved_data.length/n)
|
50
|
-
|
51
|
-
unresolved_data.each_with_index do |item, i|
|
52
|
-
map[i] << item
|
53
|
-
end
|
54
|
-
|
55
|
-
self
|
56
|
-
end
|
57
|
-
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
data/lib/ms/data/lazy_io.rb
DELETED
@@ -1,73 +0,0 @@
|
|
1
|
-
module Ms
|
2
|
-
module Data
|
3
|
-
|
4
|
-
# LazyIO represents data to be lazily read from an IO. To read the data
|
5
|
-
# from the IO, either string or to_a may be called (to_a unpacks the
|
6
|
-
# string into an array using the decode_format and unpack_format).
|
7
|
-
#
|
8
|
-
# LazyIO is a suitable unresolved_data source for Ms::Data formats.
|
9
|
-
class LazyIO
|
10
|
-
NETWORK_FLOAT = 'g*'
|
11
|
-
NETWORK_DOUBLE = 'G*'
|
12
|
-
LITTLE_ENDIAN_FLOAT = 'e*'
|
13
|
-
LITTLE_ENDIAN_DOUBLE = 'E*'
|
14
|
-
BASE_64 = 'm'
|
15
|
-
|
16
|
-
class << self
|
17
|
-
# Returns the unpacking code for the given precision (32 or 64-bit)
|
18
|
-
# and network order (true for big-endian).
|
19
|
-
def unpack_code(precision, network_order)
|
20
|
-
case precision
|
21
|
-
when 32 then network_order ? NETWORK_FLOAT : LITTLE_ENDIAN_FLOAT
|
22
|
-
when 64 then network_order ? NETWORK_DOUBLE : LITTLE_ENDIAN_DOUBLE
|
23
|
-
else raise ArgumentError, "unknown precision (should be 32 or 64): #{precision}"
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
# The IO from which string is read
|
29
|
-
attr_reader :io
|
30
|
-
|
31
|
-
# The start index for reading string
|
32
|
-
attr_reader :start_index
|
33
|
-
|
34
|
-
# The number of bytes to be read from io when evaluating string
|
35
|
-
attr_reader :num_bytes
|
36
|
-
|
37
|
-
# Indicates the unpacking format
|
38
|
-
attr_reader :unpack_format
|
39
|
-
|
40
|
-
# Indicates a decoding format, may be false to unpack string
|
41
|
-
# without decoding.
|
42
|
-
attr_reader :decode_format
|
43
|
-
|
44
|
-
def initialize(io, start_index=io.pos, num_bytes=nil, unpack_format=NETWORK_FLOAT, decode_format=BASE_64)
|
45
|
-
@io = io
|
46
|
-
@start_index = start_index
|
47
|
-
@num_bytes = num_bytes
|
48
|
-
@unpack_format = unpack_format
|
49
|
-
@decode_format = decode_format
|
50
|
-
end
|
51
|
-
|
52
|
-
# Positions io at start_index and reads a string of num_bytes length.
|
53
|
-
# The string is newly read from io each time string is called.
|
54
|
-
def string
|
55
|
-
io.pos = start_index unless io.pos == start_index
|
56
|
-
io.read(num_bytes)
|
57
|
-
end
|
58
|
-
|
59
|
-
# Resets the cached array (returned by to_a) so that the array will
|
60
|
-
# be re-read from io.
|
61
|
-
def reset
|
62
|
-
@array = nil
|
63
|
-
end
|
64
|
-
|
65
|
-
# Reads string and unpacks using decode_format and unpack_code. The
|
66
|
-
# array is cached internally; to re-read the array, use reset.
|
67
|
-
def to_a
|
68
|
-
@array ||= (decode_format ? string.unpack(decode_format)[0] : string).unpack(unpack_format)
|
69
|
-
end
|
70
|
-
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
data/lib/ms/data/lazy_string.rb
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
require 'ms/data/lazy_io'
|
2
|
-
require 'stringio'
|
3
|
-
|
4
|
-
module Ms
|
5
|
-
module Data
|
6
|
-
|
7
|
-
# LazyString is a LazyIO initialized from a string, which is converted into
|
8
|
-
# a StringIO.
|
9
|
-
class LazyString < LazyIO
|
10
|
-
def initialize(string, unpack_format=NETWORK_FLOAT, decode_format=BASE_64)
|
11
|
-
super(StringIO.new(string), 0, string.length, unpack_format, decode_format)
|
12
|
-
end
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|