mspire 0.3.9 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/INSTALL +24 -7
- data/README +15 -13
- data/README.rdoc +18 -0
- data/Rakefile +50 -14
- data/bin/aafreqs.rb +0 -0
- data/bin/bioworks2excel.rb +0 -0
- data/bin/bioworks_to_pepxml.rb +2 -1
- data/bin/bioworks_to_pepxml_gui.rb +0 -0
- data/bin/fasta_shaker.rb +0 -0
- data/bin/filter_and_validate.rb +0 -0
- data/bin/gi2annot.rb +0 -0
- data/bin/id_class_anal.rb +0 -0
- data/bin/id_precision.rb +0 -0
- data/bin/ms_to_lmat.rb +0 -0
- data/bin/pepproph_filter.rb +0 -0
- data/bin/protein_summary.rb +0 -0
- data/bin/protxml2prots_peps.rb +0 -0
- data/bin/raw_to_mzXML.rb +3 -3
- data/bin/run_percolator.rb +122 -0
- data/bin/sqt_group.rb +0 -0
- data/bin/srf_group.rb +0 -0
- data/changelog.txt +29 -0
- data/lib/ms/gradient_program.rb +0 -1
- data/lib/ms/msrun.rb +62 -29
- data/lib/ms/parser/mzdata/axml.rb +55 -0
- data/lib/ms/parser/mzdata/dom.rb +51 -36
- data/lib/ms/parser/mzdata.rb +8 -2
- data/lib/ms/parser/mzxml/axml.rb +59 -0
- data/lib/ms/parser/mzxml/dom.rb +80 -57
- data/lib/ms/parser/mzxml/hpricot.rb +1 -1
- data/lib/ms/parser/mzxml/libxml.rb +6 -2
- data/lib/ms/parser/mzxml.rb +110 -3
- data/lib/ms/parser.rb +4 -4
- data/lib/ms/precursor.rb +19 -4
- data/lib/ms/scan.rb +7 -7
- data/lib/ms/spectrum.rb +249 -58
- data/lib/mspire.rb +1 -1
- data/lib/spec_id/bioworks.rb +2 -2
- data/lib/spec_id/precision/filter/cmdline.rb +8 -1
- data/lib/spec_id/precision/prob/cmdline.rb +2 -2
- data/lib/spec_id/precision/prob.rb +1 -0
- data/lib/spec_id/proph/pep_summary.rb +3 -4
- data/lib/spec_id/proph/prot_summary.rb +3 -3
- data/lib/spec_id/protein_summary.rb +1 -1
- data/lib/spec_id/sequest/pepxml.rb +5 -5
- data/lib/spec_id/sqt.rb +4 -4
- data/lib/spec_id/srf.rb +49 -8
- data/lib/spec_id.rb +5 -0
- data/lib/xml_style_parser.rb +16 -2
- data/script/compile_and_plot_smriti_final.rb +0 -0
- data/script/create_little_pepxml.rb +0 -0
- data/script/degenerate_peptides.rb +0 -0
- data/script/estimate_fpr_by_cysteine.rb +0 -0
- data/script/extract_gradient_programs.rb +1 -1
- data/script/find_cysteine_background.rb +0 -0
- data/script/genuine_tps_and_probs.rb +0 -0
- data/script/get_apex_values_rexml.rb +0 -0
- data/script/mascot_fix_pepxml.rb +123 -0
- data/script/msvis.rb +0 -0
- data/script/mzXML2timeIndex.rb +0 -0
- data/script/peps_per_bin.rb +0 -0
- data/script/prep_dir.rb +0 -0
- data/script/simple_protein_digestion.rb +0 -0
- data/script/smriti_final_analysis.rb +0 -0
- data/script/sqt_to_meta.rb +0 -0
- data/script/top_hit_per_scan.rb +0 -0
- data/script/toppred_to_yaml.rb +0 -0
- data/script/tpp_installer.rb +0 -0
- data/specs/bin/prob_validate_spec.rb +5 -2
- data/specs/bin/protein_summary_spec.rb +5 -1
- data/specs/ms/msrun_spec.rb +176 -133
- data/specs/ms/parser_spec.rb +3 -3
- data/specs/ms/spectrum_spec.rb +0 -2
- data/specs/spec_id/precision/filter_spec.rb +4 -1
- data/specs/spec_id/precision/prob_spec.rb +2 -2
- data/specs/spec_id/sequest/pepxml_spec.rb +1 -1
- data/specs/spec_id/sqt_spec.rb +5 -5
- data/specs/spec_id/srf_spec.rb +56 -93
- data/specs/spec_id/srf_spec_helper.rb +121 -284
- data/specs/spec_id_spec.rb +3 -0
- data/specs/transmem/toppred_spec.rb +1 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +683 -0
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +382 -0
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +683 -0
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +382 -0
- data/test_files/opd1_2runs_2mods/data/README.txt +6 -0
- metadata +247 -229
data/INSTALL
CHANGED
|
@@ -4,24 +4,35 @@ Prerequisites
|
|
|
4
4
|
|
|
5
5
|
Much of the package will work without any prerequisites at all. Some functionality may require addition ruby packages or other converters. These are listed in current order of importance:
|
|
6
6
|
|
|
7
|
+
* libjtp - generic library installed automatically if you install mspire with rubygems (or 'gem install libjtp')
|
|
7
8
|
* [xmlparser](http://www.yoshidam.net/Ruby.html) (comes with one-click Windows; on Ubuntu: 'sudo apt-get libxml-parser-ruby1.8')
|
|
8
|
-
* [
|
|
9
|
-
* ['t2x'](
|
|
10
|
-
|
|
9
|
+
* [axml](http://axml.rubyforge.org/) dom wrapper for xmlparser. ('gem install axml')
|
|
10
|
+
* ['t2x'](archive/t2x) linux executable to convert .RAW files (Xcalibur 1.x) to version 1 mzXML files
|
|
11
|
+
|
|
12
|
+
Optional:
|
|
13
|
+
* [libxml](http://libxml.rubyforge.org/) can use instead of xmlparser. In Ubuntu: sudo apt-get install libxml2 libxml2-dev ; sudo gem install libxml-ruby --remote
|
|
14
|
+
* [gnuplot](http://rgplot.rubyforge.org/) ('gem install gnuplot'). For some plotting. Of course, you'll need [gnuplot](http://www.gnuplot.info/) before this package will work. Under one-click installer for windows this package requires a little configuration. It works with no configuration on cygwin (or linux).
|
|
11
15
|
|
|
12
16
|
Installation
|
|
13
17
|
------------
|
|
14
18
|
|
|
15
19
|
gem install mspire
|
|
16
20
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
gem install -t mspire (to run tests)
|
|
21
|
+
See [installation under cygwin](cygwin.html) if you're on Windows.
|
|
20
22
|
|
|
21
23
|
Development
|
|
22
24
|
-----------
|
|
23
25
|
|
|
24
|
-
|
|
26
|
+
anonymous svn checkout:
|
|
27
|
+
|
|
28
|
+
svn checkout svn://rubyforge.org/var/svn/mspire
|
|
29
|
+
|
|
30
|
+
prerequisites:
|
|
31
|
+
|
|
32
|
+
* [rake](http://rake.rubyforge.org/) (with rubygems: 'gem install rake --remote')
|
|
33
|
+
* [rspec](http://rspec.info/) (with rubygems: 'gem install rspec --remote')
|
|
34
|
+
|
|
35
|
+
Use rake:
|
|
25
36
|
|
|
26
37
|
% rake -T
|
|
27
38
|
rake clean # Remove any temporary products.
|
|
@@ -32,3 +43,9 @@ prereq: [rake](http://rake.rubyforge.org/) (with rubygems: 'gem install rake --r
|
|
|
32
43
|
rake upload_docs # create and upload docs to server
|
|
33
44
|
...etc...
|
|
34
45
|
|
|
46
|
+
run tests: rake spec
|
|
47
|
+
(or): rake specl
|
|
48
|
+
|
|
49
|
+
run tests with large files: rake spec SPEC_LARGE=t
|
|
50
|
+
|
|
51
|
+
run test on one file: rake spec SPEC=specs/{path_to_spec_file}
|
data/README
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
mspire
|
|
2
|
-
======
|
|
3
1
|
|
|
4
2
|
mspire - 'Mass Spectrometry Proteomics in Ruby' is a collection of tools for
|
|
5
3
|
working with MS proteomics data in ruby. It seeks to provide support for open
|
|
@@ -12,7 +10,7 @@ Current Focus
|
|
|
12
10
|
|
|
13
11
|
The project is currently focusing on the following:
|
|
14
12
|
|
|
15
|
-
* SEQUEST data (particularly the output of Bioworks 3.2)
|
|
13
|
+
* SEQUEST data (particularly the output of Bioworks 3.2-3.3)
|
|
16
14
|
* mzXML
|
|
17
15
|
* mzData
|
|
18
16
|
* ProteinProphet
|
|
@@ -21,15 +19,16 @@ The project is currently focusing on the following:
|
|
|
21
19
|
Features
|
|
22
20
|
--------
|
|
23
21
|
|
|
24
|
-
* mzXML (version 1
|
|
22
|
+
* mzXML (version 1, 2, and 3) parsing
|
|
25
23
|
* mzData parsing
|
|
26
24
|
* bioworks .srf (binary files) reader
|
|
25
|
+
* read/write .sqt files
|
|
27
26
|
* bioworks to PeptideProphet input (pepXML files)
|
|
28
27
|
* lightweight APEX values parser
|
|
29
28
|
* histogram protein probabilities
|
|
30
|
-
* developed for Linux, should port easily to Windows or others
|
|
31
29
|
* protein summary views with custom false ID cutoff values
|
|
32
30
|
* conversion to OBI-Warp input files
|
|
31
|
+
* portable: works across platforms
|
|
33
32
|
|
|
34
33
|
Validation by:
|
|
35
34
|
* Various Decoy Database search options: Reverse/Shuffle, concatenated/separate, with various hashing options (e.g., by amino acid sequence + charge)
|
|
@@ -38,10 +37,12 @@ Validation by:
|
|
|
38
37
|
* Generic sample bias (e.g., low abundance/high abundance proteins)
|
|
39
38
|
* Defined sample
|
|
40
39
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
40
|
+
Spectra and Spectra Identification
|
|
41
|
+
----------------------------------
|
|
42
|
+
|
|
43
|
+
The [MS](ms/index.html) namespace contains objects for working with mass spectra and associated file formats.
|
|
44
|
+
|
|
45
|
+
The [SpecID](spec_id/index.html) namespace contains objects for working with spectral identifications.
|
|
45
46
|
|
|
46
47
|
Tutorials
|
|
47
48
|
---------
|
|
@@ -54,12 +55,13 @@ Warning
|
|
|
54
55
|
-------
|
|
55
56
|
|
|
56
57
|
This is an experimental package. As such, all versions prior to version 1.0
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
58
|
+
may contain interface changes on minor revisions (major.minor.build) (e.g.,
|
|
59
|
+
0.4.0 may contain interface change from 0.3.9). Beyond version 1.0, the
|
|
60
|
+
versioning scheme will be strictly adhered to (no interface changes except on
|
|
61
|
+
major revisions).
|
|
60
62
|
|
|
61
63
|
Installation
|
|
62
64
|
------------
|
|
63
65
|
|
|
64
|
-
see [Install](install.html)
|
|
66
|
+
see [Install](install/index.html)
|
|
65
67
|
|
data/README.rdoc
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
|
|
2
|
+
= mspire
|
|
3
|
+
|
|
4
|
+
mass spectrometry proteomics in ruby
|
|
5
|
+
|
|
6
|
+
Please refer to the latest Documentation[http://mspire.rubyforge.org]
|
|
7
|
+
|
|
8
|
+
Please see Installation[http://mspire.rubyforge.org/install/index.html]
|
|
9
|
+
|
|
10
|
+
== Data Models and Examples
|
|
11
|
+
|
|
12
|
+
Object models and usage examples are online:
|
|
13
|
+
|
|
14
|
+
[MS::MSRun] http://mspire.rubyforge.org/ms/msrun.html
|
|
15
|
+
[SpecID] http://mspire.rubyforge.org/spec_id/spec_id.html
|
|
16
|
+
[SRF] http://mspire.rubyforge.org/spec_id/srf.html
|
|
17
|
+
[False Identification Rate Determination] http://mspire.rubyforge.org/spec_id/fir/index.html
|
|
18
|
+
[OBI-Warp] http://mspire.rubyforge.org/ms/obiwarp.html
|
data/Rakefile
CHANGED
|
@@ -17,7 +17,7 @@ $dependencies = %w(libjtp)
|
|
|
17
17
|
$tfiles_large = 'test_files_large'
|
|
18
18
|
changelog = "changelog.txt"
|
|
19
19
|
|
|
20
|
-
core_files = FL["INSTALL", "README", "Rakefile", "LICENSE", changelog, "release_notes.txt", "{lib,bin,script,specs,tutorial,test_files}/**/*"]
|
|
20
|
+
core_files = FL["INSTALL", "README", "README.rdoc", "Rakefile", "LICENSE", changelog, "release_notes.txt", "{lib,bin,script,specs,tutorial,test_files}/**/*"]
|
|
21
21
|
big_dist_files = core_files + FL["test_files_large/**/*"]
|
|
22
22
|
|
|
23
23
|
dist_files = core_files
|
|
@@ -43,7 +43,7 @@ def move_and_add_webgen_header(file, newfile, src_dir, heading)
|
|
|
43
43
|
string = IO.read file
|
|
44
44
|
with_header = heading + string
|
|
45
45
|
File.open(newfile, 'w') {|v| v.print with_header }
|
|
46
|
-
FileUtils.mv newfile, src_dir
|
|
46
|
+
FileUtils.mv newfile, src_dir, :force => true
|
|
47
47
|
end
|
|
48
48
|
|
|
49
49
|
desc "copy top level files into doc/src"
|
|
@@ -55,27 +55,40 @@ directoryName: mspire
|
|
|
55
55
|
---\n"
|
|
56
56
|
src = "doc/src"
|
|
57
57
|
move_and_add_webgen_header('README', 'index.page', src, string.sub('TITLE', 'Home'))
|
|
58
|
-
move_and_add_webgen_header('INSTALL', '
|
|
58
|
+
move_and_add_webgen_header('INSTALL', 'index.page', src + '/install', string.sub('TITLE', 'Install').sub('mspire', 'Install').sub("inMenu: true\n", ''))
|
|
59
59
|
end
|
|
60
60
|
|
|
61
|
-
desc "
|
|
62
|
-
task :upload_docs
|
|
61
|
+
desc "upload docs (doc/output) to server"
|
|
62
|
+
task :upload_docs do
|
|
63
63
|
sh "scp -i ~/.ssh/rubyforge_key -r doc/output/* jtprince@rubyforge.org:/var/www/gforge-projects/mspire/"
|
|
64
64
|
end
|
|
65
65
|
|
|
66
|
+
# best to use webgen 0.3.8 right now
|
|
67
|
+
# to get working (may not require all these steps):
|
|
68
|
+
# gem install RedCloth
|
|
69
|
+
# gem install BlueCloth
|
|
70
|
+
# soft link the bluecloth binary into path
|
|
66
71
|
desc "creates docs in doc/html"
|
|
67
72
|
task :html_docs => [:cp_top_level_docs] do
|
|
68
73
|
FileUtils.cd 'doc' do
|
|
69
74
|
sh "webgen"
|
|
70
75
|
end
|
|
76
|
+
FileUtils.cp 'doc/src/archive/t2x', 'doc/output/archive/t2x'
|
|
71
77
|
end
|
|
72
78
|
|
|
73
|
-
|
|
74
|
-
|
|
79
|
+
desc "does html_docs and rdoc and puts rdoc inside html_docs"
|
|
80
|
+
task :all_docs => [:html_docs, :rdoc] do
|
|
81
|
+
FileUtils.mv 'html', 'doc/output/rdoc'
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
#rdoc_options = ['--main', 'README', '--title', NAME]
|
|
85
|
+
rdoc_options = ['--main', 'README.rdoc', '--title', NAME]
|
|
86
|
+
#rdoc_extra_includes = ["README", "INSTALL", "LICENSE"]
|
|
87
|
+
rdoc_extra_includes = ['README.rdoc']
|
|
75
88
|
|
|
76
89
|
Rake::RDocTask.new do |rd|
|
|
77
|
-
rd.main = "README"
|
|
78
|
-
rd.rdoc_files.include rdoc_extra_includes
|
|
90
|
+
rd.main = "README.rdoc"
|
|
91
|
+
rd.rdoc_files.include("lib/**/*.rb", *rdoc_extra_includes )
|
|
79
92
|
rd.options.push( *rdoc_options )
|
|
80
93
|
end
|
|
81
94
|
|
|
@@ -124,7 +137,12 @@ Spec::Rake::SpecTask.new('spec') do |t|
|
|
|
124
137
|
Rake::Task[:ensure_gem_is_uninstalled].invoke
|
|
125
138
|
Rake::Task[:ensure_dependencies].invoke
|
|
126
139
|
Rake::Task[:ensure_large_testfiles].invoke
|
|
127
|
-
t.libs =
|
|
140
|
+
t.libs =
|
|
141
|
+
if !ENV['LIB'].nil?
|
|
142
|
+
[ENV['LIB']]
|
|
143
|
+
else
|
|
144
|
+
['lib']
|
|
145
|
+
end
|
|
128
146
|
#t.ruby_opts = ['-I', 'lib']
|
|
129
147
|
t.spec_files = FileList['specs/**/*_spec.rb']
|
|
130
148
|
end
|
|
@@ -135,7 +153,13 @@ Spec::Rake::SpecTask.new('specl') do |t|
|
|
|
135
153
|
Rake::Task[:ensure_dependencies].invoke
|
|
136
154
|
Rake::Task[:ensure_large_testfiles].invoke
|
|
137
155
|
t.spec_files = FileList['specs/**/*_spec.rb']
|
|
138
|
-
t.libs =
|
|
156
|
+
t.libs =
|
|
157
|
+
if !ENV['LIB'].nil?
|
|
158
|
+
[ENV['LIB']]
|
|
159
|
+
else
|
|
160
|
+
['lib']
|
|
161
|
+
end
|
|
162
|
+
#t.libs = ['lib']
|
|
139
163
|
#t.ruby_opts = ['-I', 'lib']
|
|
140
164
|
t.spec_opts = ['--format', 'specdoc' ]
|
|
141
165
|
end
|
|
@@ -147,7 +171,12 @@ Spec::Rake::SpecTask.new('rcov') do |t|
|
|
|
147
171
|
Rake::Task[:ensure_large_testfiles].invoke
|
|
148
172
|
t.spec_files = FileList['specs/**/*_spec.rb']
|
|
149
173
|
t.rcov = true
|
|
150
|
-
t.libs =
|
|
174
|
+
t.libs =
|
|
175
|
+
if !ENV['LIB'].nil?
|
|
176
|
+
[ENV['LIB']]
|
|
177
|
+
else
|
|
178
|
+
['lib']
|
|
179
|
+
end
|
|
151
180
|
#t.ruby_opts = ['-I', 'lib']
|
|
152
181
|
t.rcov_opts = ['--exclude', 'specs']
|
|
153
182
|
end
|
|
@@ -160,9 +189,15 @@ task :speci => [:ensure_gem_is_uninstalled, :ensure_dependencies, :ensure_large_
|
|
|
160
189
|
file.include?(ENV['SPECM'])
|
|
161
190
|
end
|
|
162
191
|
end
|
|
192
|
+
lib =
|
|
193
|
+
if !ENV['LIB'].nil?
|
|
194
|
+
ENV['LIB']
|
|
195
|
+
else
|
|
196
|
+
'lib'
|
|
197
|
+
end
|
|
163
198
|
files_to_run.each do |spc|
|
|
164
199
|
puts "------ SPEC=#{spc} ------"
|
|
165
|
-
system "ruby -I lib -S spec #{spc} --format specdoc"
|
|
200
|
+
system "ruby -I #{lib} -S spec #{spc} --format specdoc"
|
|
166
201
|
end
|
|
167
202
|
end
|
|
168
203
|
|
|
@@ -238,8 +273,9 @@ spec = Gem::Specification.new do |s|
|
|
|
238
273
|
s.rdoc_options = rdoc_options
|
|
239
274
|
s.extra_rdoc_files = rdoc_extra_includes
|
|
240
275
|
s.executables = FL["bin/*"].map {|file| File.basename(file) }
|
|
241
|
-
s.add_dependency('libjtp', '~> 0.2.
|
|
276
|
+
s.add_dependency('libjtp', '~> 0.2.14')
|
|
242
277
|
s.add_dependency('axml', '~> 0.0.0')
|
|
278
|
+
s.add_dependency('arrayclass', '~> 0.1.0')
|
|
243
279
|
s.requirements << '"libxml" is the prefered xml parser right now. libxml, xmlparser, REXML and regular expressions are used as fallback in some routines.'
|
|
244
280
|
s.requirements << 'some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)'
|
|
245
281
|
s.requirements << 'the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications'
|
data/bin/aafreqs.rb
CHANGED
|
File without changes
|
data/bin/bioworks2excel.rb
CHANGED
|
File without changes
|
data/bin/bioworks_to_pepxml.rb
CHANGED
|
@@ -12,7 +12,8 @@ DEFAULT_MS_MODEL = 'LCQ'
|
|
|
12
12
|
DEFAULT_MASS_ANALYZER = 'Ion Trap'
|
|
13
13
|
##############################################################
|
|
14
14
|
|
|
15
|
-
require 'spec_id/sequest/pepxml'
|
|
15
|
+
#require 'spec_id/sequest/pepxml' # dies of this guy is called (why???)
|
|
16
|
+
require 'spec_id/proph/pep_summary' # <- he requests the above...hmmm
|
|
16
17
|
require 'spec_id'
|
|
17
18
|
require 'optparse'
|
|
18
19
|
require 'ostruct'
|
|
File without changes
|
data/bin/fasta_shaker.rb
CHANGED
|
File without changes
|
data/bin/filter_and_validate.rb
CHANGED
|
File without changes
|
data/bin/gi2annot.rb
CHANGED
|
File without changes
|
data/bin/id_class_anal.rb
CHANGED
|
File without changes
|
data/bin/id_precision.rb
CHANGED
|
File without changes
|
data/bin/ms_to_lmat.rb
CHANGED
|
File without changes
|
data/bin/pepproph_filter.rb
CHANGED
|
File without changes
|
data/bin/protein_summary.rb
CHANGED
|
File without changes
|
data/bin/protxml2prots_peps.rb
CHANGED
|
File without changes
|
data/bin/raw_to_mzXML.rb
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/ruby -w
|
|
2
2
|
|
|
3
3
|
require 'optparse'
|
|
4
|
-
require '
|
|
4
|
+
require 'ms/converter/mzxml'
|
|
5
5
|
require 'fileutils'
|
|
6
6
|
|
|
7
7
|
progname = File.basename(__FILE__)
|
|
@@ -21,11 +21,11 @@ if ARGV.size == 0
|
|
|
21
21
|
exit
|
|
22
22
|
end
|
|
23
23
|
|
|
24
|
-
converter = MS::MzXML.find_mzxml_converter
|
|
24
|
+
converter = MS::Converter::MzXML.find_mzxml_converter
|
|
25
25
|
if converter
|
|
26
26
|
$stderr.puts "using #{converter} to convert files"
|
|
27
27
|
else
|
|
28
|
-
puts "cannot find [#{MS::MzXML::Potential_mzxml_converters.join(', ')}] in the paths:"
|
|
28
|
+
puts "cannot find [#{MS::Converter::MzXML::Potential_mzxml_converters.join(', ')}] in the paths:"
|
|
29
29
|
puts ENV['PATH'].split(/[:;]/).join(", ")
|
|
30
30
|
abort
|
|
31
31
|
end
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
|
|
3
|
+
perc_cmd = 'percolator'
|
|
4
|
+
|
|
5
|
+
require 'optparse'
|
|
6
|
+
require 'spec_id/srf'
|
|
7
|
+
|
|
8
|
+
# percolator_v1.02_32bit_linux -o reverse_meta.sqm normal_NOCYS/meta.sqm reverse_NOCYS/meta.sqm
|
|
9
|
+
# percolator_v1.02_32bit_linux -o reverse_cat_meta.sqm -P INV_ reverse_cat_NOCYS/meta.sqm &
|
|
10
|
+
|
|
11
|
+
file_hash = {
|
|
12
|
+
:srg => "bioworks.srg",
|
|
13
|
+
:sqg_in => "bioworks.sqg",
|
|
14
|
+
:sqg_decoy => "decoy.sqg",
|
|
15
|
+
:perc_out => "perc.sqg",
|
|
16
|
+
:perc_stdout => "perc.stdout",
|
|
17
|
+
:perc_stderr => "perc.stderr",
|
|
18
|
+
:perc_ext => ".psqt",
|
|
19
|
+
}
|
|
20
|
+
(default_srg, sqg_in, perc_out, sqg_decoy, perc_stdout, perc_stderr, perc_ext) = file_hash.values_at(:srg, :sqg_in, :perc_out, :sqg_decoy, :perc_stdout, :perc_stderr, :perc_ext)
|
|
21
|
+
|
|
22
|
+
opt = {}
|
|
23
|
+
toclean = []
|
|
24
|
+
opts = OptionParser.new do |op|
|
|
25
|
+
op.banner = "usage: #{File.basename(__FILE__)} -d PATTERN <file>.srf ..."
|
|
26
|
+
op.separator " #{File.basename(__FILE__)} -d PATTERN <file>.srg"
|
|
27
|
+
op.separator " #{File.basename(__FILE__)} <normal>.srg <decoy>.srg"
|
|
28
|
+
op.separator ""
|
|
29
|
+
op.separator " creates necessary meta files in current working directory and"
|
|
30
|
+
op.separator " runs command '#{perc_cmd}'"
|
|
31
|
+
op.separator ""
|
|
32
|
+
op.separator " (all in current working directory)"
|
|
33
|
+
op.separator " 1) (if given .srf files) creates file: #{default_srg}"
|
|
34
|
+
op.separator " 2) creates .sqt file for each srf file (placed in dir with srf file)"
|
|
35
|
+
op.separator " 3) creates percolator (meta) input file(s): #{sqg_in}"
|
|
36
|
+
op.separator " [and for separate searches: #{sqg_decoy}]"
|
|
37
|
+
op.separator " 4) creates a percolator (meta) output file: #{perc_out}"
|
|
38
|
+
op.separator " 5) runs percolator which creates a a #{perc_ext} for each .srf file"
|
|
39
|
+
op.separator " 6) captures stdout in #{perc_stdout} and stderr in #{perc_stderr}"
|
|
40
|
+
op.separator ""
|
|
41
|
+
op.separator " .srg files are text files with full paths to .srf files"
|
|
42
|
+
op.separator " create with command 'srf_group.rb'"
|
|
43
|
+
op.separator ""
|
|
44
|
+
op.on("-d", "--decoy <pattern>", "decoy pattern, eg.: -d REVERSE_") {|v| opt[:decoy] = v }
|
|
45
|
+
op.on("-c", "--clean", "removes ALL generated files except #{perc_ext}") {|v| opt[:clean] = v }
|
|
46
|
+
op.on("-v", "--verbose", "spits out info") {|v| $VERBOSE = v }
|
|
47
|
+
end
|
|
48
|
+
opts.parse!
|
|
49
|
+
|
|
50
|
+
if ARGV.size == 0 or (!opt[:decoy] && (ARGV.size != 2))
|
|
51
|
+
puts opts.to_s
|
|
52
|
+
exit
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
#raise RunTimeError, "command #{perc_cmd} must be callable!" unless `#{perc_cmd}`.match(/Usage/)
|
|
56
|
+
|
|
57
|
+
files = ARGV.to_a
|
|
58
|
+
|
|
59
|
+
# create srg file:
|
|
60
|
+
srg_files =
|
|
61
|
+
if files[0] =~ /\.srf$/i
|
|
62
|
+
obj = SRFGroup.new
|
|
63
|
+
obj.filenames = files.to_a
|
|
64
|
+
puts("CREATING: #{default_srg}") if $VERBOSE
|
|
65
|
+
obj.to_srg(default_srg)
|
|
66
|
+
toclean << default_srg
|
|
67
|
+
[default_srg]
|
|
68
|
+
elsif files[0] =~ /\.srg$/i
|
|
69
|
+
files
|
|
70
|
+
else
|
|
71
|
+
abort "files must have proper extensions"
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# create the sqt files:
|
|
75
|
+
all_sqt_filenames = srg_files.map do |srg_file|
|
|
76
|
+
srf_filenames = SRFGroup.srg_to_paths(srg_file)
|
|
77
|
+
srf_filenames.map do |file|
|
|
78
|
+
new_filename = file.sub(/\.srf$/i, '.sqt')
|
|
79
|
+
puts("CREATING: #{new_filename}") if $VERBOSE
|
|
80
|
+
SRFGroup.new([file], opt['filter']).srfs.first.to_sqt(new_filename)
|
|
81
|
+
toclean << new_filename
|
|
82
|
+
new_filename
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# create the percolator input file:
|
|
87
|
+
all_sqt_filenames.zip(file_hash.values_at(:sqg_in, :sqg_decoy)) do |sqt_filenames,filename|
|
|
88
|
+
puts("CREATING: #{filename}") if $VERBOSE
|
|
89
|
+
File.open(filename, 'w') {|fh| fh.puts(sqt_filenames.join("\n")) }
|
|
90
|
+
toclean << filename
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# create the percolator output file:
|
|
94
|
+
psqt_filenames = all_sqt_filenames[0].map do |file|
|
|
95
|
+
file.sub(/\.sqt$/, perc_ext)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
puts("CREATING: #{perc_out}") if $VERBOSE
|
|
99
|
+
File.open(perc_out, 'w') {|fh| fh.puts(psqt_filenames.join("\n")) }
|
|
100
|
+
toclean << perc_out
|
|
101
|
+
|
|
102
|
+
# run percolator
|
|
103
|
+
to_run =
|
|
104
|
+
if opt[:decoy]
|
|
105
|
+
"#{perc_cmd} -o #{perc_out} -P #{opt[:decoy]} #{sqg_in} 1>#{perc_stdout} 2>#{perc_stderr}"
|
|
106
|
+
else
|
|
107
|
+
"#{perc_cmd} -o #{perc_out} #{sqg_in} #{sqg_decoy} 1>#{perc_stdout} 2>#{perc_stderr}"
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
puts("RUNNING: #{to_run}") if $VERBOSE
|
|
111
|
+
`#{to_run}`
|
|
112
|
+
|
|
113
|
+
toclean << perc_stdout
|
|
114
|
+
toclean << perc_stderr
|
|
115
|
+
|
|
116
|
+
if opt[:clean]
|
|
117
|
+
toclean.each do |file|
|
|
118
|
+
puts("REMOVING: #{file}") if $VERBOSE
|
|
119
|
+
File.unlink(file) if File.exist?(file)
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
data/bin/sqt_group.rb
CHANGED
|
File without changes
|
data/bin/srf_group.rb
CHANGED
|
File without changes
|
data/changelog.txt
CHANGED
|
@@ -162,3 +162,32 @@ sample_enzyme)
|
|
|
162
162
|
## version 0.3.9
|
|
163
163
|
|
|
164
164
|
1. doesn't clobber mzdata filename in ms_to_lmat.rb conversion
|
|
165
|
+
|
|
166
|
+
## version 0.3.10
|
|
167
|
+
|
|
168
|
+
1. added run_percolator.rb script which makes running multiple files easy
|
|
169
|
+
|
|
170
|
+
## version 0.3.11
|
|
171
|
+
|
|
172
|
+
1. faster sensing of bad scan tags in mzXML v. 2.0 files
|
|
173
|
+
2. implemented lazy evaluation of spectrum in 2 different ways allowing much
|
|
174
|
+
larger files to be parsed
|
|
175
|
+
|
|
176
|
+
## version 0.4.0
|
|
177
|
+
|
|
178
|
+
1. ** INTERFACE CHANGE: each scan can only have one precursor (used to be an array)
|
|
179
|
+
2. ** INTERFACE CHANGE: spectrum mz and intensity data accessed with mzs and intensities
|
|
180
|
+
3. lazy eval working on mzData
|
|
181
|
+
4. mzData not necessarily guaranteed to have precursor intensities on lazy
|
|
182
|
+
eval methos (however, the method intensity_at_mz will still work (causing
|
|
183
|
+
evaluation))
|
|
184
|
+
|
|
185
|
+
## version 0.4.1
|
|
186
|
+
|
|
187
|
+
1. added support for reading mzXML version 3.0 (may fail in some cases)
|
|
188
|
+
|
|
189
|
+
## version 0.4.2
|
|
190
|
+
|
|
191
|
+
1. added MS::MSRun.open method
|
|
192
|
+
2. added method to write dta files from SRF
|
|
193
|
+
|
data/lib/ms/gradient_program.rb
CHANGED
data/lib/ms/msrun.rb
CHANGED
|
@@ -9,6 +9,8 @@ require 'ms/converter/mzxml'
|
|
|
9
9
|
|
|
10
10
|
module MS; end
|
|
11
11
|
class MS::MSRun
|
|
12
|
+
|
|
13
|
+
MSRunDefaultOpts = { :lazy => :string }
|
|
12
14
|
|
|
13
15
|
attr_accessor :start_time, :end_time
|
|
14
16
|
attr_accessor :scans
|
|
@@ -19,24 +21,56 @@ class MS::MSRun
|
|
|
19
21
|
# the total number of scans
|
|
20
22
|
attr_writer :scan_count
|
|
21
23
|
|
|
22
|
-
|
|
23
|
-
#
|
|
24
|
-
#
|
|
25
|
-
#
|
|
24
|
+
|
|
25
|
+
#### # [note: precursor intensities not guaranteed to exist unless :
|
|
26
|
+
# TODO: may need to eliminate unavailable precursor intensities if they
|
|
27
|
+
# doing lazy evaluation?? or it becomes lazy too??
|
|
28
|
+
|
|
26
29
|
# OPTIONS:
|
|
27
|
-
# :
|
|
28
|
-
#
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
30
|
+
# :lazy => :string | :not | :no_spectra | :io
|
|
31
|
+
# :string = (default) stores each spectrum as a base64 decoded
|
|
32
|
+
# string that is further processed into Arrays of Floats when m/z
|
|
33
|
+
# or intensity information is access. This lazy evaluation
|
|
34
|
+
# should work on most files.
|
|
35
|
+
# :not = all information is read into memory and parsed into
|
|
36
|
+
# objects. Should only be used for small-medium files (< 80MB on
|
|
37
|
+
# a machine with 2GB memory)
|
|
38
|
+
# :no_spectra = if no peak information is required use this to
|
|
39
|
+
# avoid the overhead of parsing and creating spectra.
|
|
40
|
+
# :io = stores the io object and indices into spectrum data.
|
|
41
|
+
# When spectral information is requested (m/z or intensity
|
|
42
|
+
# information) then the spectrum is read from the io object and
|
|
43
|
+
# evaluated (requires an open io object when spectrum information
|
|
44
|
+
# is requested)
|
|
45
|
+
def initialize(file_or_io=nil, opts={})
|
|
46
|
+
if opts[:lazy] == :io
|
|
47
|
+
if !file_or_io.is_a?(IO)
|
|
48
|
+
raise ArgumentError, "Caller must provide an IO object (rather than filename) if using {:lazy => :io}"
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
myopts = MSRunDefaultOpts.merge(opts)
|
|
52
|
+
myopts[:msrun] = self
|
|
53
|
+
if file_or_io
|
|
54
|
+
filetype_and_version = MS::Parser.filetype_and_version(file_or_io)
|
|
55
|
+
parser = MS::Parser.new(filetype_and_version, :msrun, myopts)
|
|
56
|
+
parser.parse(file_or_io, myopts)
|
|
35
57
|
#MS::Parser.new(filetype_and_version, :msrun).parse(file, myopts)
|
|
36
58
|
(@filetype, @version) = filetype_and_version
|
|
37
59
|
end
|
|
38
60
|
end
|
|
39
61
|
|
|
62
|
+
# This will automatically use :lazy => :io, open the file, and close it
|
|
63
|
+
# after the block returns.
|
|
64
|
+
# MS::MSRun.open("file.mzXML") do |ms|
|
|
65
|
+
# ms.scans.each {|scan| ... do something }
|
|
66
|
+
# end
|
|
67
|
+
def self.open(filename, opts={})
|
|
68
|
+
File.open(filename) do |fh|
|
|
69
|
+
ms = MS::MSRun.new(fh, {:lazy => :io}.merge(opts))
|
|
70
|
+
yield(ms)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
40
74
|
# returns an array, whose indices provide the number of scans in each index level the ms_levels, [0] = all the scans, [1] = mslevel 1, [2] = mslevel 2,
|
|
41
75
|
# ...
|
|
42
76
|
def scan_counts
|
|
@@ -95,9 +129,9 @@ class MS::MSRun
|
|
|
95
129
|
lo_mz = sc.start_mz
|
|
96
130
|
hi_mz = sc.end_mz
|
|
97
131
|
else
|
|
98
|
-
|
|
99
|
-
hi_mz =
|
|
100
|
-
lo_mz =
|
|
132
|
+
mz_ar = sc.spectrum.mzs
|
|
133
|
+
hi_mz = mz_ar.last
|
|
134
|
+
lo_mz = mz_ar.first
|
|
101
135
|
end
|
|
102
136
|
break
|
|
103
137
|
end
|
|
@@ -117,12 +151,12 @@ class MS::MSRun
|
|
|
117
151
|
# didn't have the attributes (find by brute force)
|
|
118
152
|
scans.each do |sc|
|
|
119
153
|
if sc.ms_level == mslevel
|
|
120
|
-
|
|
121
|
-
if
|
|
122
|
-
hi_mz =
|
|
154
|
+
mz_ar = sc.spectrum.mzs
|
|
155
|
+
if mz_ar.last > hi_mz
|
|
156
|
+
hi_mz = mz_ar.last
|
|
123
157
|
end
|
|
124
|
-
if
|
|
125
|
-
lo_mz =
|
|
158
|
+
if mz_ar.last < lo_mz
|
|
159
|
+
lo_mz = mz_ar.last
|
|
126
160
|
end
|
|
127
161
|
end
|
|
128
162
|
end
|
|
@@ -135,7 +169,7 @@ class MS::MSRun
|
|
|
135
169
|
def precursor_mz_by_scan_num
|
|
136
170
|
ar = Array.new(@scans.size + 1)
|
|
137
171
|
@scans.each do |scan|
|
|
138
|
-
if prec = scan.
|
|
172
|
+
if prec = scan.precursor
|
|
139
173
|
ar[scan.num] = prec.mz
|
|
140
174
|
else
|
|
141
175
|
ar[scan.num] = nil
|
|
@@ -170,7 +204,7 @@ class MS::MSRun
|
|
|
170
204
|
# same as the instance method (creates an object without spectrum and calls
|
|
171
205
|
# instance method of the same name)
|
|
172
206
|
def self.precursor_mz_by_scan_num(file)
|
|
173
|
-
self.new(file, :
|
|
207
|
+
self.new(file, :lazy => :no_spectra, :fix_bad_tags => true).precursor_mz_by_scan_num
|
|
174
208
|
end
|
|
175
209
|
|
|
176
210
|
# only adds the parent if one is not already present!
|
|
@@ -190,13 +224,12 @@ class MS::MSRun
|
|
|
190
224
|
(prev_level - level).times do parent_stack.shift end
|
|
191
225
|
end
|
|
192
226
|
if scan.ms_level > 1
|
|
193
|
-
scan.
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
end
|
|
227
|
+
precursor = scan.precursor
|
|
228
|
+
#precursor.parent = parent_stack.first # that's the next line's
|
|
229
|
+
precursor[2] = parent_stack.first unless precursor[2]
|
|
230
|
+
#precursor.intensity
|
|
231
|
+
if add_intensities
|
|
232
|
+
precursor[1] = precursor[2].spectrum.intensity_at_mz(precursor[0])
|
|
200
233
|
end
|
|
201
234
|
end
|
|
202
235
|
prev_level = level
|