mspire 0.5.0 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +24 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/lib/cv/description.rb +18 -0
- data/lib/cv/param.rb +33 -0
- data/lib/cv.rb +3 -0
- data/lib/io/bookmark.rb +13 -0
- data/lib/merge.rb +7 -0
- data/lib/ms/cvlist.rb +76 -0
- data/lib/ms/digester.rb +245 -0
- data/lib/ms/fasta.rb +86 -0
- data/lib/ms/ident/peptide/db.rb +243 -0
- data/lib/ms/ident/peptide.rb +72 -0
- data/lib/ms/ident/peptide_hit/qvalue.rb +56 -0
- data/lib/ms/ident/peptide_hit.rb +26 -0
- data/lib/ms/ident/pepxml/modifications.rb +83 -0
- data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
- data/lib/ms/ident/pepxml/msms_run_summary.rb +82 -0
- data/lib/ms/ident/pepxml/parameters.rb +14 -0
- data/lib/ms/ident/pepxml/sample_enzyme.rb +165 -0
- data/lib/ms/ident/pepxml/search_database.rb +49 -0
- data/lib/ms/ident/pepxml/search_hit/modification_info.rb +79 -0
- data/lib/ms/ident/pepxml/search_hit.rb +144 -0
- data/lib/ms/ident/pepxml/search_result.rb +35 -0
- data/lib/ms/ident/pepxml/search_summary.rb +92 -0
- data/lib/ms/ident/pepxml/spectrum_query.rb +85 -0
- data/lib/ms/ident/pepxml.rb +112 -0
- data/lib/ms/ident/protein.rb +33 -0
- data/lib/ms/ident/protein_group.rb +80 -0
- data/lib/ms/ident/search.rb +114 -0
- data/lib/ms/ident.rb +37 -0
- data/lib/ms/isotope/aa.rb +59 -0
- data/lib/ms/mascot.rb +6 -0
- data/lib/ms/mass/aa.rb +79 -0
- data/lib/ms/mass.rb +55 -0
- data/lib/ms/mzml/index_list.rb +98 -0
- data/lib/ms/mzml/plms1.rb +34 -0
- data/lib/ms/mzml.rb +197 -0
- data/lib/ms/obo.rb +38 -0
- data/lib/ms/plms1.rb +156 -0
- data/lib/ms/quant/qspec/protein_group_comparison.rb +22 -0
- data/lib/ms/quant/qspec.rb +112 -0
- data/lib/ms/spectrum.rb +154 -8
- data/lib/ms.rb +3 -10
- data/lib/msplat.rb +2 -0
- data/lib/obo/ims.rb +5 -0
- data/lib/obo/ms.rb +7 -0
- data/lib/obo/ontology.rb +41 -0
- data/lib/obo/unit.rb +5 -0
- data/lib/openany.rb +23 -0
- data/lib/write_file_or_string.rb +18 -0
- data/obo/ims.obo +562 -0
- data/obo/ms.obo +11677 -0
- data/obo/unit.obo +2563 -0
- data/spec/ms/cvlist_spec.rb +60 -0
- data/spec/ms/digester_spec.rb +351 -0
- data/spec/ms/fasta_spec.rb +100 -0
- data/spec/ms/ident/peptide/db_spec.rb +108 -0
- data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
- data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb +37 -0
- data/spec/ms/ident/pepxml_spec.rb +442 -0
- data/spec/ms/ident/protein_group_spec.rb +68 -0
- data/spec/ms/mass_spec.rb +8 -0
- data/spec/ms/mzml/index_list_spec.rb +122 -0
- data/spec/ms/mzml/plms1_spec.rb +62 -0
- data/spec/ms/mzml_spec.rb +50 -0
- data/spec/ms/plms1_spec.rb +38 -0
- data/spec/ms/quant/qspec_spec.rb +25 -0
- data/spec/msplat_spec.rb +24 -0
- data/spec/obo_spec.rb +25 -0
- data/spec/spec_helper.rb +25 -0
- data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta +69 -0
- data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +728 -0
- data/spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML +271 -0
- data/spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML +330 -0
- data/spec/testfiles/ms/quant/kill_extra_tabs.rb +13 -0
- data/spec/testfiles/ms/quant/max_quant_output.provenance.txt +15 -0
- data/spec/testfiles/ms/quant/max_quant_output.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp +0 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt +134 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp +134 -0
- data/spec/testfiles/ms/quant/remove_rest_of_proteins.rb +13 -0
- data/spec/testfiles/ms/quant/unlog_transform.rb +13 -0
- data/spec/testfiles/plms1/output.key +0 -0
- metadata +157 -40
- data/README +0 -77
- data/changelog.txt +0 -196
- data/lib/ms/calc.rb +0 -32
- data/lib/ms/data/interleaved.rb +0 -60
- data/lib/ms/data/lazy_io.rb +0 -73
- data/lib/ms/data/lazy_string.rb +0 -15
- data/lib/ms/data/simple.rb +0 -59
- data/lib/ms/data/transposed.rb +0 -41
- data/lib/ms/data.rb +0 -57
- data/lib/ms/format/format_error.rb +0 -12
- data/lib/ms/support/binary_search.rb +0 -126
metadata
CHANGED
@@ -1,88 +1,205 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mspire
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
prerelease:
|
5
|
+
version: 0.6.1
|
5
6
|
platform: ruby
|
6
7
|
authors:
|
7
|
-
- John Prince
|
8
|
+
- John T. Prince
|
8
9
|
- Simon Chiang
|
9
10
|
autorequire:
|
10
11
|
bindir: bin
|
11
12
|
cert_chain: []
|
12
13
|
|
13
|
-
date:
|
14
|
-
default_executable:
|
14
|
+
date: 2012-01-25 00:00:00 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
|
-
name:
|
17
|
+
name: nokogiri
|
18
|
+
prerelease: false
|
19
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
20
|
+
none: false
|
21
|
+
requirements:
|
22
|
+
- - ~>
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: "1.5"
|
25
|
+
type: :runtime
|
26
|
+
version_requirements: *id001
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
prerelease: false
|
30
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
31
|
+
none: false
|
32
|
+
requirements:
|
33
|
+
- - ~>
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: "2.6"
|
18
36
|
type: :development
|
19
|
-
|
20
|
-
|
37
|
+
version_requirements: *id002
|
38
|
+
- !ruby/object:Gem::Dependency
|
39
|
+
name: jeweler
|
40
|
+
prerelease: false
|
41
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
42
|
+
none: false
|
21
43
|
requirements:
|
22
|
-
- -
|
44
|
+
- - ~>
|
23
45
|
- !ruby/object:Gem::Version
|
24
|
-
version:
|
25
|
-
|
46
|
+
version: 1.5.2
|
47
|
+
type: :development
|
48
|
+
version_requirements: *id003
|
26
49
|
- !ruby/object:Gem::Dependency
|
27
|
-
name:
|
50
|
+
name: rcov
|
51
|
+
prerelease: false
|
52
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: "0"
|
28
58
|
type: :development
|
29
|
-
|
30
|
-
|
59
|
+
version_requirements: *id004
|
60
|
+
- !ruby/object:Gem::Dependency
|
61
|
+
name: obo
|
62
|
+
prerelease: false
|
63
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
64
|
+
none: false
|
31
65
|
requirements:
|
32
|
-
- - "
|
66
|
+
- - ">="
|
33
67
|
- !ruby/object:Gem::Version
|
34
|
-
version: 1.
|
35
|
-
|
36
|
-
|
37
|
-
|
68
|
+
version: 0.1.0
|
69
|
+
type: :development
|
70
|
+
version_requirements: *id005
|
71
|
+
description: mass spectrometry proteomics, lipidomics, and tools, a rewrite of mspire, merging of ms-* gems
|
72
|
+
email: jtprince@gmail.com
|
38
73
|
executables: []
|
39
74
|
|
40
75
|
extensions: []
|
41
76
|
|
42
77
|
extra_rdoc_files:
|
43
|
-
- changelog.txt
|
44
78
|
- LICENSE
|
45
|
-
- README
|
79
|
+
- README.rdoc
|
46
80
|
files:
|
81
|
+
- LICENSE
|
82
|
+
- README.rdoc
|
83
|
+
- Rakefile
|
84
|
+
- VERSION
|
85
|
+
- lib/cv.rb
|
86
|
+
- lib/cv/description.rb
|
87
|
+
- lib/cv/param.rb
|
88
|
+
- lib/io/bookmark.rb
|
89
|
+
- lib/merge.rb
|
47
90
|
- lib/ms.rb
|
48
|
-
- lib/ms/
|
49
|
-
- lib/ms/
|
50
|
-
- lib/ms/
|
51
|
-
- lib/ms/
|
52
|
-
- lib/ms/
|
53
|
-
- lib/ms/
|
54
|
-
- lib/ms/
|
55
|
-
- lib/ms/
|
56
|
-
- lib/ms/
|
91
|
+
- lib/ms/cvlist.rb
|
92
|
+
- lib/ms/digester.rb
|
93
|
+
- lib/ms/fasta.rb
|
94
|
+
- lib/ms/ident.rb
|
95
|
+
- lib/ms/ident/peptide.rb
|
96
|
+
- lib/ms/ident/peptide/db.rb
|
97
|
+
- lib/ms/ident/peptide_hit.rb
|
98
|
+
- lib/ms/ident/peptide_hit/qvalue.rb
|
99
|
+
- lib/ms/ident/pepxml.rb
|
100
|
+
- lib/ms/ident/pepxml/modifications.rb
|
101
|
+
- lib/ms/ident/pepxml/msms_pipeline_analysis.rb
|
102
|
+
- lib/ms/ident/pepxml/msms_run_summary.rb
|
103
|
+
- lib/ms/ident/pepxml/parameters.rb
|
104
|
+
- lib/ms/ident/pepxml/sample_enzyme.rb
|
105
|
+
- lib/ms/ident/pepxml/search_database.rb
|
106
|
+
- lib/ms/ident/pepxml/search_hit.rb
|
107
|
+
- lib/ms/ident/pepxml/search_hit/modification_info.rb
|
108
|
+
- lib/ms/ident/pepxml/search_result.rb
|
109
|
+
- lib/ms/ident/pepxml/search_summary.rb
|
110
|
+
- lib/ms/ident/pepxml/spectrum_query.rb
|
111
|
+
- lib/ms/ident/protein.rb
|
112
|
+
- lib/ms/ident/protein_group.rb
|
113
|
+
- lib/ms/ident/search.rb
|
114
|
+
- lib/ms/isotope/aa.rb
|
115
|
+
- lib/ms/mascot.rb
|
116
|
+
- lib/ms/mass.rb
|
117
|
+
- lib/ms/mass/aa.rb
|
118
|
+
- lib/ms/mzml.rb
|
119
|
+
- lib/ms/mzml/index_list.rb
|
120
|
+
- lib/ms/mzml/plms1.rb
|
121
|
+
- lib/ms/obo.rb
|
122
|
+
- lib/ms/plms1.rb
|
123
|
+
- lib/ms/quant/qspec.rb
|
124
|
+
- lib/ms/quant/qspec/protein_group_comparison.rb
|
57
125
|
- lib/ms/spectrum.rb
|
58
|
-
-
|
59
|
-
-
|
60
|
-
-
|
61
|
-
|
62
|
-
|
126
|
+
- lib/msplat.rb
|
127
|
+
- lib/obo/ims.rb
|
128
|
+
- lib/obo/ms.rb
|
129
|
+
- lib/obo/ontology.rb
|
130
|
+
- lib/obo/unit.rb
|
131
|
+
- lib/openany.rb
|
132
|
+
- lib/write_file_or_string.rb
|
133
|
+
- obo/ims.obo
|
134
|
+
- obo/ms.obo
|
135
|
+
- obo/unit.obo
|
136
|
+
- spec/ms/cvlist_spec.rb
|
137
|
+
- spec/ms/digester_spec.rb
|
138
|
+
- spec/ms/fasta_spec.rb
|
139
|
+
- spec/ms/ident/peptide/db_spec.rb
|
140
|
+
- spec/ms/ident/pepxml/sample_enzyme_spec.rb
|
141
|
+
- spec/ms/ident/pepxml/search_hit/modification_info_spec.rb
|
142
|
+
- spec/ms/ident/pepxml_spec.rb
|
143
|
+
- spec/ms/ident/protein_group_spec.rb
|
144
|
+
- spec/ms/mass_spec.rb
|
145
|
+
- spec/ms/mzml/index_list_spec.rb
|
146
|
+
- spec/ms/mzml/plms1_spec.rb
|
147
|
+
- spec/ms/mzml_spec.rb
|
148
|
+
- spec/ms/plms1_spec.rb
|
149
|
+
- spec/ms/quant/qspec_spec.rb
|
150
|
+
- spec/msplat_spec.rb
|
151
|
+
- spec/obo_spec.rb
|
152
|
+
- spec/spec_helper.rb
|
153
|
+
- spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta
|
154
|
+
- spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml
|
155
|
+
- spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML
|
156
|
+
- spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML
|
157
|
+
- spec/testfiles/ms/quant/kill_extra_tabs.rb
|
158
|
+
- spec/testfiles/ms/quant/max_quant_output.provenance.txt
|
159
|
+
- spec/testfiles/ms/quant/max_quant_output.txt
|
160
|
+
- spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv
|
161
|
+
- spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp
|
162
|
+
- spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv
|
163
|
+
- spec/testfiles/ms/quant/pdcd5_final.txt
|
164
|
+
- spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp
|
165
|
+
- spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv
|
166
|
+
- spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv
|
167
|
+
- spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv
|
168
|
+
- spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv
|
169
|
+
- spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp
|
170
|
+
- spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv
|
171
|
+
- spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt
|
172
|
+
- spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt
|
173
|
+
- spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp
|
174
|
+
- spec/testfiles/ms/quant/remove_rest_of_proteins.rb
|
175
|
+
- spec/testfiles/ms/quant/unlog_transform.rb
|
176
|
+
- spec/testfiles/plms1/output.key
|
177
|
+
homepage: http://github.com/princelab/mspire
|
178
|
+
licenses:
|
179
|
+
- MIT
|
63
180
|
post_install_message:
|
64
181
|
rdoc_options: []
|
65
182
|
|
66
183
|
require_paths:
|
67
184
|
- lib
|
68
185
|
required_ruby_version: !ruby/object:Gem::Requirement
|
186
|
+
none: false
|
69
187
|
requirements:
|
70
188
|
- - ">="
|
71
189
|
- !ruby/object:Gem::Version
|
72
190
|
version: "0"
|
73
|
-
version:
|
74
191
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
192
|
+
none: false
|
75
193
|
requirements:
|
76
194
|
- - ">="
|
77
195
|
- !ruby/object:Gem::Version
|
78
196
|
version: "0"
|
79
|
-
version:
|
80
197
|
requirements: []
|
81
198
|
|
82
|
-
rubyforge_project:
|
83
|
-
rubygems_version: 1.
|
199
|
+
rubyforge_project:
|
200
|
+
rubygems_version: 1.8.10
|
84
201
|
signing_key:
|
85
|
-
specification_version:
|
86
|
-
summary:
|
202
|
+
specification_version: 3
|
203
|
+
summary: mass spectrometry proteomics, lipidomics, and tools
|
87
204
|
test_files: []
|
88
205
|
|
data/README
DELETED
@@ -1,77 +0,0 @@
|
|
1
|
-
= {Mspire}[http://mspire.rubyforge.org]
|
2
|
-
|
3
|
-
A library for working with mass spectrometry proteomics data.
|
4
|
-
|
5
|
-
<em> Mspire is going through a re-write as of version 0.5.0 to support a new
|
6
|
-
development model. Many modules are absent but will gradually be added back.
|
7
|
-
Use the 0.4 releases as necessary. </em>
|
8
|
-
|
9
|
-
== Description
|
10
|
-
|
11
|
-
mspire - 'Mass Spectrometry Proteomics in Ruby' is a collection of tools for
|
12
|
-
working with MS proteomics data in ruby. It seeks to provide support for open
|
13
|
-
standards (e.g., parsers for mzData, mzXML, Peptide/Protein Prophet and the
|
14
|
-
TPP) and contribute other useful functionality for working with mass
|
15
|
-
spectrometry data in ruby.
|
16
|
-
|
17
|
-
* Lighthouse[http://bahuvrihi.lighthouseapp.com/projects/16692-mspire/tickets]
|
18
|
-
* Github[http://github.com/bahuvrihi/mspire/tree/master]
|
19
|
-
* {Google Group}[http://groups.google.com/group/mspire-forum]
|
20
|
-
|
21
|
-
--
|
22
|
-
=== Current Focus
|
23
|
-
|
24
|
-
The project is currently focusing on the following:
|
25
|
-
|
26
|
-
* SEQUEST data (particularly the output of Bioworks 3.2-3.3.1)
|
27
|
-
* mzXML
|
28
|
-
* mzData
|
29
|
-
* ProteinProphet
|
30
|
-
* Preparation of files for [obiwarp](http://obi-warp.sourceforge.net/)
|
31
|
-
|
32
|
-
=== Features
|
33
|
-
|
34
|
-
* mzXML (version 1, 2, and 3) parsing
|
35
|
-
* mzData parsing
|
36
|
-
* bioworks .srf (binary files) reader
|
37
|
-
* read/write .sqt files
|
38
|
-
* bioworks to PeptideProphet input (pepXML files)
|
39
|
-
* lightweight APEX values parser
|
40
|
-
* histogram protein probabilities
|
41
|
-
* protein summary views with custom false ID cutoff values
|
42
|
-
* conversion to OBI-Warp input files
|
43
|
-
* portable: works across platforms
|
44
|
-
|
45
|
-
Validation by:
|
46
|
-
* Various Decoy Database search options: Reverse/Shuffle, concatenated/separate, with various hashing options (e.g., by amino acid sequence + charge)
|
47
|
-
* Amino acid (e.g., search for unblocked cysteines)
|
48
|
-
* Transmembrane prediction (Phobius or TopPred)
|
49
|
-
* Generic sample bias (e.g., low abundance/high abundance proteins)
|
50
|
-
* Defined sample
|
51
|
-
|
52
|
-
=== Spectra and Spectra Identification
|
53
|
-
|
54
|
-
The [MS](ms/index.html) namespace contains objects for working with mass spectra and associated file formats.
|
55
|
-
|
56
|
-
The [SpecID](spec_id/index.html) namespace contains objects for working with spectral identifications.
|
57
|
-
|
58
|
-
=== Tutorials
|
59
|
-
|
60
|
-
* [Database Searching Tutorial](tutorial/database_searching/index.html) -
|
61
|
-
Demonstrates two methods for running and analysing Bioworks output to obtain
|
62
|
-
false positive rates using mspire executables.
|
63
|
-
++
|
64
|
-
|
65
|
-
== Installation
|
66
|
-
|
67
|
-
Mspire is available as a gem on RubyForge[http://rubyforge.org/projects/mspire]. Use:
|
68
|
-
|
69
|
-
% gem install mspire
|
70
|
-
|
71
|
-
= Warning
|
72
|
-
|
73
|
-
This is an experimental package. As such, all versions prior to version 1.0
|
74
|
-
may contain interface changes on minor revisions (major.minor.build) (e.g.,
|
75
|
-
0.4.0 may contain interface change from 0.3.9). Beyond version 1.0, the
|
76
|
-
versioning scheme will be strictly adhered to (no interface changes except on
|
77
|
-
major revisions).
|
data/changelog.txt
DELETED
@@ -1,196 +0,0 @@
|
|
1
|
-
|
2
|
-
== version 0.1.7
|
3
|
-
|
4
|
-
1. A couple of scripts and subroutines were hashing peptides but not on the file
|
5
|
-
basename. This would result in slightly incorrect results (any time there
|
6
|
-
were overlapping scan numbers in multiple datasets, only the top one would be
|
7
|
-
chosen). The results would be correct for single runs.
|
8
|
-
|
9
|
-
Output files that could be affected:
|
10
|
-
*.top_per_scan.txt
|
11
|
-
*.all_peps_per_scan.txt
|
12
|
-
|
13
|
-
Scripts that could be affected:
|
14
|
-
script/top_hit_per_scan.rb
|
15
|
-
bin/filter_spec_id.rb
|
16
|
-
script/filter-peps.rb
|
17
|
-
bin/id_precision.rb
|
18
|
-
|
19
|
-
Subroutines that were affected:
|
20
|
-
spec_id.rb (pep_probs_by_* )
|
21
|
-
spec_id.rb (top_peps_prefilter!)
|
22
|
-
proph.rb uniq_by_seqcharge
|
23
|
-
align.rb called uniq_by_seqcharge
|
24
|
-
|
25
|
-
|
26
|
-
2. false_positive_rate.rb and protein_summary.rb (by extension) were using
|
27
|
-
number of true positives on the x axis while in reality I was plotting the
|
28
|
-
number of hits. I've updated x axis labels to reflect this change. In
|
29
|
-
addition, since the term 'false positive rate' has such a distinct definition
|
30
|
-
in classical ROC plots and binary statistics, I've decided to work primarily
|
31
|
-
in terms of precision (TP/(TP+FP)). I've purged the terms 'False Positive
|
32
|
-
Rate' and 'FPR' from the package. It's been suggested that FP/(TP+FP) be
|
33
|
-
called the False Positive Predictive Rate (FPPR). I will probably implement
|
34
|
-
this in a future release.
|
35
|
-
|
36
|
-
== version 0.2.0
|
37
|
-
|
38
|
-
Revamped the way SpecID works (it is now mixed-in).
|
39
|
-
Added support for modifications to bioworks_to_pepxml.rb
|
40
|
-
Can read .srf files (nearly interchangeable with bioworks files)
|
41
|
-
Redid filter.rb
|
42
|
-
|
43
|
-
== version 0.2.1
|
44
|
-
|
45
|
-
minor bugfix
|
46
|
-
|
47
|
-
== version 0.2.2
|
48
|
-
|
49
|
-
made compatible with Bioworks fasta file reverser and updated tutorial.
|
50
|
-
Killed classify_by_prefix routine in favor of classify_by_false_flag which has
|
51
|
-
a prefix option
|
52
|
-
|
53
|
-
== version 0.2.3
|
54
|
-
|
55
|
-
in protein_summary.rb added handling for proteins with no annotation. (either
|
56
|
-
dispaly NA or use gi2annnot to grab them from NCBI)
|
57
|
-
|
58
|
-
== version 0.2.5
|
59
|
-
|
60
|
-
renamed prep_list in roc (potential breaks in code)
|
61
|
-
|
62
|
-
== version 0.2.6
|
63
|
-
|
64
|
-
1. Massive refactorization of filtering and validation. Validation objects are
|
65
|
-
created and then can be used to validate just about anything.
|
66
|
-
2. Massive redo of the parsing of MS runs. Can parse mzXML v1, v2.X
|
67
|
-
(including readw broken output), and mzData (even Thermo's broken output).
|
68
|
-
4. Moved all tests to specs (rspec).
|
69
|
-
5. Can read gradient programs off of .meth or .RAW files (both Xcal 1.X and
|
70
|
-
2.X)
|
71
|
-
|
72
|
-
Bugfixes:
|
73
|
-
1. The search_summary 'base_name' in pepxml output was incorrect (this did not
|
74
|
-
appear to influence our analyses, however). Fixed.
|
75
|
-
2. Enzymes with no exceptions (e.g., cuts at KR) would report one too many
|
76
|
-
missed cleavages if the last amino acid was a cut point. Fixed.
|
77
|
-
|
78
|
-
== version 0.2.7
|
79
|
-
|
80
|
-
1. In conversion from bioworks to pepxml, the default was trypsin (KR/P).
|
81
|
-
Now, the sample enzyme is set explicitly from the params file and the option
|
82
|
-
is not available. This can give more accuract pepxml files than from
|
83
|
-
previous depending on your enzyme.
|
84
|
-
|
85
|
-
== version 0.2.9
|
86
|
-
|
87
|
-
1. Added support for phobius transmembrane predictions
|
88
|
-
2. have filter_and_validate.rb working well (multiple validators allowed).
|
89
|
-
3. Can read bioworks 3.3.1 .srf files (.srf version 3.5 files)
|
90
|
-
4. Added a bias validator
|
91
|
-
|
92
|
-
== version 0.2.10
|
93
|
-
|
94
|
-
1. Fixed --hits_separate flag in spec_id/filter
|
95
|
-
|
96
|
-
== version 0.2.11
|
97
|
-
|
98
|
-
1. Added prob precision support and reorganized filter_and_validate libs
|
99
|
-
|
100
|
-
== version 0.2.12
|
101
|
-
|
102
|
-
1. Fixed bug in transmem for prob and others.
|
103
|
-
2. Can use axml (XMLParser based) or libxml depending on availability
|
104
|
-
|
105
|
-
== version 0.2.13
|
106
|
-
|
107
|
-
1. Fixed issue with --hits_separate
|
108
|
-
2. filter_and_validate.rb requires decoy validator if decoy proteins
|
109
|
-
(refactored code)
|
110
|
-
|
111
|
-
== version 0.2.14
|
112
|
-
|
113
|
-
1. Can read PeptideProphet files (should be able to read pepxml files, too)
|
114
|
-
2. API change: Some slight modifications to the Sequest::PepXML object
|
115
|
-
interfaces and implementations (using ArrayClass)
|
116
|
-
|
117
|
-
== version 0.2.15
|
118
|
-
|
119
|
-
1. can convert srf files to sqt files
|
120
|
-
|
121
|
-
== version 0.3.0
|
122
|
-
|
123
|
-
1. IMPORTANT BUG FIX: protein reporting in srf files is correct now (proteins after the first protein were being assigned to the last hit in an out file).
|
124
|
-
2. SQT export is correct and works at least on 3.2 and 3.3.1.
|
125
|
-
|
126
|
-
== version 0.3.1
|
127
|
-
|
128
|
-
1. Bug fix in srf filtering (num_hits adjusted)
|
129
|
-
|
130
|
-
== version 0.3.2
|
131
|
-
|
132
|
-
1. Uses sequest peptide_mass_tolerance filter on srf group files by default
|
133
|
-
now.
|
134
|
-
|
135
|
-
== version 0.3.3
|
136
|
-
|
137
|
-
1. Worked out minor kinks in prob_precision.rb
|
138
|
-
|
139
|
-
== version 0.3.4
|
140
|
-
|
141
|
-
1. filters >= +3 charged ions now.
|
142
|
-
|
143
|
-
== version 0.3.5
|
144
|
-
|
145
|
-
1. fixed creation of background distribution in validators (hash_by base_name,
|
146
|
-
first_scan, charge now)
|
147
|
-
|
148
|
-
== version 0.3.6
|
149
|
-
|
150
|
-
1. split off bad_aa_est from bad_aa
|
151
|
-
|
152
|
-
== version 0.3.7
|
153
|
-
|
154
|
-
1. can deal with No_Enzyme searches now (while still capable of setting
|
155
|
-
sample_enzyme)
|
156
|
-
|
157
|
-
== version 0.3.8
|
158
|
-
|
159
|
-
1. can set a decoy to target ratio for decoy validation
|
160
|
-
2. added mass calculator in Mass::Calculator
|
161
|
-
|
162
|
-
== version 0.3.9
|
163
|
-
|
164
|
-
1. doesn't clobber mzdata filename in ms_to_lmat.rb conversion
|
165
|
-
|
166
|
-
== version 0.3.10
|
167
|
-
|
168
|
-
1. added run_percolator.rb script which makes running multiple files easy
|
169
|
-
|
170
|
-
== version 0.3.11
|
171
|
-
|
172
|
-
1. faster sensing of bad scan tags in mzXML v. 2.0 files
|
173
|
-
2. implemented lazy evaluation of spectrum in 2 different ways allowing much
|
174
|
-
larger files to be parsed
|
175
|
-
|
176
|
-
== version 0.4.0
|
177
|
-
|
178
|
-
1. ** INTERFACE CHANGE: each scan can only have one precursor (used to be an array)
|
179
|
-
2. ** INTERFACE CHANGE: spectrum mz and intensity data accessed with mzs and intensities
|
180
|
-
3. lazy eval working on mzData
|
181
|
-
4. mzData not necessarily guaranteed to have precursor intensities on lazy
|
182
|
-
eval methos (however, the method intensity_at_mz will still work (causing
|
183
|
-
evaluation))
|
184
|
-
|
185
|
-
== version 0.4.1
|
186
|
-
|
187
|
-
1. added support for reading mzXML version 3.0 (may fail in some cases)
|
188
|
-
|
189
|
-
== version 0.4.2
|
190
|
-
|
191
|
-
1. added MS::MSRun.open method
|
192
|
-
2. added method to write dta files from SRF
|
193
|
-
|
194
|
-
== version 0.4.3
|
195
|
-
|
196
|
-
1. added to_mfg_file from SRF
|
data/lib/ms/calc.rb
DELETED
@@ -1,32 +0,0 @@
|
|
1
|
-
module Ms
|
2
|
-
module Calc
|
3
|
-
module_function
|
4
|
-
|
5
|
-
#
|
6
|
-
# ppm calculations... maybe use RUnit
|
7
|
-
#
|
8
|
-
|
9
|
-
def ppm_tol_at(mz, ppm)
|
10
|
-
1.0 * mz * ppm / 10**6
|
11
|
-
end
|
12
|
-
|
13
|
-
def ppm_span_at(mz, ppm)
|
14
|
-
tol = ppm_tol_at(mz, ppm)
|
15
|
-
[mz-tol, mz+tol]
|
16
|
-
end
|
17
|
-
|
18
|
-
def ppm_range_at(mz, ppm)
|
19
|
-
mz = mz.to_f
|
20
|
-
tol = ppm_tol_at(mz, ppm)
|
21
|
-
mz-tol...mz+tol
|
22
|
-
end
|
23
|
-
|
24
|
-
|
25
|
-
# Rounds n to the specified precision (ie number of decimal places)
|
26
|
-
# def round(n, precision)
|
27
|
-
# factor = 10**precision.to_i
|
28
|
-
# (n * factor).round.to_f / factor
|
29
|
-
# end
|
30
|
-
|
31
|
-
end
|
32
|
-
end
|
data/lib/ms/data/interleaved.rb
DELETED
@@ -1,60 +0,0 @@
|
|
1
|
-
require 'ms/data/simple'
|
2
|
-
|
3
|
-
module Ms
|
4
|
-
module Data
|
5
|
-
module_function
|
6
|
-
|
7
|
-
# Initializes a new interleaved data array.
|
8
|
-
def new_interleaved(unresolved_data, n=2)
|
9
|
-
Interleaved.new(unresolved_data, n=2)
|
10
|
-
end
|
11
|
-
|
12
|
-
# An Interleaved data array lazily evaluates it's unresolved data as
|
13
|
-
# an interleaved array of n members. The unresolved data is evaluated
|
14
|
-
# into an array using to_a.
|
15
|
-
#
|
16
|
-
# i = Ms::Data::Interleaved.new([1,4,2,5,3,6])
|
17
|
-
# i.unresolved_data # => [1,4,2,5,3,6]
|
18
|
-
# i.data # => []
|
19
|
-
# i[0] # => [1,2,3]
|
20
|
-
# i[1] # => [4,5,6]
|
21
|
-
# i.data # => [[1,2,3], [4,5,6]]
|
22
|
-
#
|
23
|
-
class Interleaved < Simple
|
24
|
-
attr_reader :n
|
25
|
-
|
26
|
-
def initialize(unresolved_data, n=2)
|
27
|
-
@n = 2
|
28
|
-
super(unresolved_data)
|
29
|
-
end
|
30
|
-
|
31
|
-
def [](index)
|
32
|
-
resolve.data[index]
|
33
|
-
end
|
34
|
-
|
35
|
-
def resolved?
|
36
|
-
!@data.empty?
|
37
|
-
end
|
38
|
-
|
39
|
-
def resolve
|
40
|
-
return(self) if resolved?
|
41
|
-
|
42
|
-
unresolved_data = @unresolved_data.to_a
|
43
|
-
|
44
|
-
unless unresolved_data.length % n == 0
|
45
|
-
raise ArgumentError, "interleaved data must have a number of elements evenly divisible by n (#{n})"
|
46
|
-
end
|
47
|
-
|
48
|
-
n.times { @data << [] }
|
49
|
-
map = @data * (unresolved_data.length/n)
|
50
|
-
|
51
|
-
unresolved_data.each_with_index do |item, i|
|
52
|
-
map[i] << item
|
53
|
-
end
|
54
|
-
|
55
|
-
self
|
56
|
-
end
|
57
|
-
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
data/lib/ms/data/lazy_io.rb
DELETED
@@ -1,73 +0,0 @@
|
|
1
|
-
module Ms
|
2
|
-
module Data
|
3
|
-
|
4
|
-
# LazyIO represents data to be lazily read from an IO. To read the data
|
5
|
-
# from the IO, either string or to_a may be called (to_a unpacks the
|
6
|
-
# string into an array using the decode_format and unpack_format).
|
7
|
-
#
|
8
|
-
# LazyIO is a suitable unresolved_data source for Ms::Data formats.
|
9
|
-
class LazyIO
|
10
|
-
NETWORK_FLOAT = 'g*'
|
11
|
-
NETWORK_DOUBLE = 'G*'
|
12
|
-
LITTLE_ENDIAN_FLOAT = 'e*'
|
13
|
-
LITTLE_ENDIAN_DOUBLE = 'E*'
|
14
|
-
BASE_64 = 'm'
|
15
|
-
|
16
|
-
class << self
|
17
|
-
# Returns the unpacking code for the given precision (32 or 64-bit)
|
18
|
-
# and network order (true for big-endian).
|
19
|
-
def unpack_code(precision, network_order)
|
20
|
-
case precision
|
21
|
-
when 32 then network_order ? NETWORK_FLOAT : LITTLE_ENDIAN_FLOAT
|
22
|
-
when 64 then network_order ? NETWORK_DOUBLE : LITTLE_ENDIAN_DOUBLE
|
23
|
-
else raise ArgumentError, "unknown precision (should be 32 or 64): #{precision}"
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
# The IO from which string is read
|
29
|
-
attr_reader :io
|
30
|
-
|
31
|
-
# The start index for reading string
|
32
|
-
attr_reader :start_index
|
33
|
-
|
34
|
-
# The number of bytes to be read from io when evaluating string
|
35
|
-
attr_reader :num_bytes
|
36
|
-
|
37
|
-
# Indicates the unpacking format
|
38
|
-
attr_reader :unpack_format
|
39
|
-
|
40
|
-
# Indicates a decoding format, may be false to unpack string
|
41
|
-
# without decoding.
|
42
|
-
attr_reader :decode_format
|
43
|
-
|
44
|
-
def initialize(io, start_index=io.pos, num_bytes=nil, unpack_format=NETWORK_FLOAT, decode_format=BASE_64)
|
45
|
-
@io = io
|
46
|
-
@start_index = start_index
|
47
|
-
@num_bytes = num_bytes
|
48
|
-
@unpack_format = unpack_format
|
49
|
-
@decode_format = decode_format
|
50
|
-
end
|
51
|
-
|
52
|
-
# Positions io at start_index and reads a string of num_bytes length.
|
53
|
-
# The string is newly read from io each time string is called.
|
54
|
-
def string
|
55
|
-
io.pos = start_index unless io.pos == start_index
|
56
|
-
io.read(num_bytes)
|
57
|
-
end
|
58
|
-
|
59
|
-
# Resets the cached array (returned by to_a) so that the array will
|
60
|
-
# be re-read from io.
|
61
|
-
def reset
|
62
|
-
@array = nil
|
63
|
-
end
|
64
|
-
|
65
|
-
# Reads string and unpacks using decode_format and unpack_code. The
|
66
|
-
# array is cached internally; to re-read the array, use reset.
|
67
|
-
def to_a
|
68
|
-
@array ||= (decode_format ? string.unpack(decode_format)[0] : string).unpack(unpack_format)
|
69
|
-
end
|
70
|
-
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
data/lib/ms/data/lazy_string.rb
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
require 'ms/data/lazy_io'
|
2
|
-
require 'stringio'
|
3
|
-
|
4
|
-
module Ms
|
5
|
-
module Data
|
6
|
-
|
7
|
-
# LazyString is a LazyIO initialized from a string, which is converted into
|
8
|
-
# a StringIO.
|
9
|
-
class LazyString < LazyIO
|
10
|
-
def initialize(string, unpack_format=NETWORK_FLOAT, decode_format=BASE_64)
|
11
|
-
super(StringIO.new(string), 0, string.length, unpack_format, decode_format)
|
12
|
-
end
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|