mspire 0.3.9 → 0.4.2
Sign up to get free protection for your applications and to get access to all the features.
- data/INSTALL +24 -7
- data/README +15 -13
- data/README.rdoc +18 -0
- data/Rakefile +50 -14
- data/bin/aafreqs.rb +0 -0
- data/bin/bioworks2excel.rb +0 -0
- data/bin/bioworks_to_pepxml.rb +2 -1
- data/bin/bioworks_to_pepxml_gui.rb +0 -0
- data/bin/fasta_shaker.rb +0 -0
- data/bin/filter_and_validate.rb +0 -0
- data/bin/gi2annot.rb +0 -0
- data/bin/id_class_anal.rb +0 -0
- data/bin/id_precision.rb +0 -0
- data/bin/ms_to_lmat.rb +0 -0
- data/bin/pepproph_filter.rb +0 -0
- data/bin/protein_summary.rb +0 -0
- data/bin/protxml2prots_peps.rb +0 -0
- data/bin/raw_to_mzXML.rb +3 -3
- data/bin/run_percolator.rb +122 -0
- data/bin/sqt_group.rb +0 -0
- data/bin/srf_group.rb +0 -0
- data/changelog.txt +29 -0
- data/lib/ms/gradient_program.rb +0 -1
- data/lib/ms/msrun.rb +62 -29
- data/lib/ms/parser/mzdata/axml.rb +55 -0
- data/lib/ms/parser/mzdata/dom.rb +51 -36
- data/lib/ms/parser/mzdata.rb +8 -2
- data/lib/ms/parser/mzxml/axml.rb +59 -0
- data/lib/ms/parser/mzxml/dom.rb +80 -57
- data/lib/ms/parser/mzxml/hpricot.rb +1 -1
- data/lib/ms/parser/mzxml/libxml.rb +6 -2
- data/lib/ms/parser/mzxml.rb +110 -3
- data/lib/ms/parser.rb +4 -4
- data/lib/ms/precursor.rb +19 -4
- data/lib/ms/scan.rb +7 -7
- data/lib/ms/spectrum.rb +249 -58
- data/lib/mspire.rb +1 -1
- data/lib/spec_id/bioworks.rb +2 -2
- data/lib/spec_id/precision/filter/cmdline.rb +8 -1
- data/lib/spec_id/precision/prob/cmdline.rb +2 -2
- data/lib/spec_id/precision/prob.rb +1 -0
- data/lib/spec_id/proph/pep_summary.rb +3 -4
- data/lib/spec_id/proph/prot_summary.rb +3 -3
- data/lib/spec_id/protein_summary.rb +1 -1
- data/lib/spec_id/sequest/pepxml.rb +5 -5
- data/lib/spec_id/sqt.rb +4 -4
- data/lib/spec_id/srf.rb +49 -8
- data/lib/spec_id.rb +5 -0
- data/lib/xml_style_parser.rb +16 -2
- data/script/compile_and_plot_smriti_final.rb +0 -0
- data/script/create_little_pepxml.rb +0 -0
- data/script/degenerate_peptides.rb +0 -0
- data/script/estimate_fpr_by_cysteine.rb +0 -0
- data/script/extract_gradient_programs.rb +1 -1
- data/script/find_cysteine_background.rb +0 -0
- data/script/genuine_tps_and_probs.rb +0 -0
- data/script/get_apex_values_rexml.rb +0 -0
- data/script/mascot_fix_pepxml.rb +123 -0
- data/script/msvis.rb +0 -0
- data/script/mzXML2timeIndex.rb +0 -0
- data/script/peps_per_bin.rb +0 -0
- data/script/prep_dir.rb +0 -0
- data/script/simple_protein_digestion.rb +0 -0
- data/script/smriti_final_analysis.rb +0 -0
- data/script/sqt_to_meta.rb +0 -0
- data/script/top_hit_per_scan.rb +0 -0
- data/script/toppred_to_yaml.rb +0 -0
- data/script/tpp_installer.rb +0 -0
- data/specs/bin/prob_validate_spec.rb +5 -2
- data/specs/bin/protein_summary_spec.rb +5 -1
- data/specs/ms/msrun_spec.rb +176 -133
- data/specs/ms/parser_spec.rb +3 -3
- data/specs/ms/spectrum_spec.rb +0 -2
- data/specs/spec_id/precision/filter_spec.rb +4 -1
- data/specs/spec_id/precision/prob_spec.rb +2 -2
- data/specs/spec_id/sequest/pepxml_spec.rb +1 -1
- data/specs/spec_id/sqt_spec.rb +5 -5
- data/specs/spec_id/srf_spec.rb +56 -93
- data/specs/spec_id/srf_spec_helper.rb +121 -284
- data/specs/spec_id_spec.rb +3 -0
- data/specs/transmem/toppred_spec.rb +1 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +683 -0
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +382 -0
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +683 -0
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +382 -0
- data/test_files/opd1_2runs_2mods/data/README.txt +6 -0
- metadata +247 -229
data/INSTALL
CHANGED
@@ -4,24 +4,35 @@ Prerequisites
|
|
4
4
|
|
5
5
|
Much of the package will work without any prerequisites at all. Some functionality may require addition ruby packages or other converters. These are listed in current order of importance:
|
6
6
|
|
7
|
+
* libjtp - generic library installed automatically if you install mspire with rubygems (or 'gem install libjtp')
|
7
8
|
* [xmlparser](http://www.yoshidam.net/Ruby.html) (comes with one-click Windows; on Ubuntu: 'sudo apt-get libxml-parser-ruby1.8')
|
8
|
-
* [
|
9
|
-
* ['t2x'](
|
10
|
-
|
9
|
+
* [axml](http://axml.rubyforge.org/) dom wrapper for xmlparser. ('gem install axml')
|
10
|
+
* ['t2x'](archive/t2x) linux executable to convert .RAW files (Xcalibur 1.x) to version 1 mzXML files
|
11
|
+
|
12
|
+
Optional:
|
13
|
+
* [libxml](http://libxml.rubyforge.org/) can use instead of xmlparser. In Ubuntu: sudo apt-get install libxml2 libxml2-dev ; sudo gem install libxml-ruby --remote
|
14
|
+
* [gnuplot](http://rgplot.rubyforge.org/) ('gem install gnuplot'). For some plotting. Of course, you'll need [gnuplot](http://www.gnuplot.info/) before this package will work. Under one-click installer for windows this package requires a little configuration. It works with no configuration on cygwin (or linux).
|
11
15
|
|
12
16
|
Installation
|
13
17
|
------------
|
14
18
|
|
15
19
|
gem install mspire
|
16
20
|
|
17
|
-
|
18
|
-
|
19
|
-
gem install -t mspire (to run tests)
|
21
|
+
See [installation under cygwin](cygwin.html) if you're on Windows.
|
20
22
|
|
21
23
|
Development
|
22
24
|
-----------
|
23
25
|
|
24
|
-
|
26
|
+
anonymous svn checkout:
|
27
|
+
|
28
|
+
svn checkout svn://rubyforge.org/var/svn/mspire
|
29
|
+
|
30
|
+
prerequisites:
|
31
|
+
|
32
|
+
* [rake](http://rake.rubyforge.org/) (with rubygems: 'gem install rake --remote')
|
33
|
+
* [rspec](http://rspec.info/) (with rubygems: 'gem install rspec --remote')
|
34
|
+
|
35
|
+
Use rake:
|
25
36
|
|
26
37
|
% rake -T
|
27
38
|
rake clean # Remove any temporary products.
|
@@ -32,3 +43,9 @@ prereq: [rake](http://rake.rubyforge.org/) (with rubygems: 'gem install rake --r
|
|
32
43
|
rake upload_docs # create and upload docs to server
|
33
44
|
...etc...
|
34
45
|
|
46
|
+
run tests: rake spec
|
47
|
+
(or): rake specl
|
48
|
+
|
49
|
+
run tests with large files: rake spec SPEC_LARGE=t
|
50
|
+
|
51
|
+
run test on one file: rake spec SPEC=specs/{path_to_spec_file}
|
data/README
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
mspire
|
2
|
-
======
|
3
1
|
|
4
2
|
mspire - 'Mass Spectrometry Proteomics in Ruby' is a collection of tools for
|
5
3
|
working with MS proteomics data in ruby. It seeks to provide support for open
|
@@ -12,7 +10,7 @@ Current Focus
|
|
12
10
|
|
13
11
|
The project is currently focusing on the following:
|
14
12
|
|
15
|
-
* SEQUEST data (particularly the output of Bioworks 3.2)
|
13
|
+
* SEQUEST data (particularly the output of Bioworks 3.2-3.3)
|
16
14
|
* mzXML
|
17
15
|
* mzData
|
18
16
|
* ProteinProphet
|
@@ -21,15 +19,16 @@ The project is currently focusing on the following:
|
|
21
19
|
Features
|
22
20
|
--------
|
23
21
|
|
24
|
-
* mzXML (version 1
|
22
|
+
* mzXML (version 1, 2, and 3) parsing
|
25
23
|
* mzData parsing
|
26
24
|
* bioworks .srf (binary files) reader
|
25
|
+
* read/write .sqt files
|
27
26
|
* bioworks to PeptideProphet input (pepXML files)
|
28
27
|
* lightweight APEX values parser
|
29
28
|
* histogram protein probabilities
|
30
|
-
* developed for Linux, should port easily to Windows or others
|
31
29
|
* protein summary views with custom false ID cutoff values
|
32
30
|
* conversion to OBI-Warp input files
|
31
|
+
* portable: works across platforms
|
33
32
|
|
34
33
|
Validation by:
|
35
34
|
* Various Decoy Database search options: Reverse/Shuffle, concatenated/separate, with various hashing options (e.g., by amino acid sequence + charge)
|
@@ -38,10 +37,12 @@ Validation by:
|
|
38
37
|
* Generic sample bias (e.g., low abundance/high abundance proteins)
|
39
38
|
* Defined sample
|
40
39
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
40
|
+
Spectra and Spectra Identification
|
41
|
+
----------------------------------
|
42
|
+
|
43
|
+
The [MS](ms/index.html) namespace contains objects for working with mass spectra and associated file formats.
|
44
|
+
|
45
|
+
The [SpecID](spec_id/index.html) namespace contains objects for working with spectral identifications.
|
45
46
|
|
46
47
|
Tutorials
|
47
48
|
---------
|
@@ -54,12 +55,13 @@ Warning
|
|
54
55
|
-------
|
55
56
|
|
56
57
|
This is an experimental package. As such, all versions prior to version 1.0
|
57
|
-
|
58
|
-
|
59
|
-
|
58
|
+
may contain interface changes on minor revisions (major.minor.build) (e.g.,
|
59
|
+
0.4.0 may contain interface change from 0.3.9). Beyond version 1.0, the
|
60
|
+
versioning scheme will be strictly adhered to (no interface changes except on
|
61
|
+
major revisions).
|
60
62
|
|
61
63
|
Installation
|
62
64
|
------------
|
63
65
|
|
64
|
-
see [Install](install.html)
|
66
|
+
see [Install](install/index.html)
|
65
67
|
|
data/README.rdoc
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
|
2
|
+
= mspire
|
3
|
+
|
4
|
+
mass spectrometry proteomics in ruby
|
5
|
+
|
6
|
+
Please refer to the latest Documentation[http://mspire.rubyforge.org]
|
7
|
+
|
8
|
+
Please see Installation[http://mspire.rubyforge.org/install/index.html]
|
9
|
+
|
10
|
+
== Data Models and Examples
|
11
|
+
|
12
|
+
Object models and usage examples are online:
|
13
|
+
|
14
|
+
[MS::MSRun] http://mspire.rubyforge.org/ms/msrun.html
|
15
|
+
[SpecID] http://mspire.rubyforge.org/spec_id/spec_id.html
|
16
|
+
[SRF] http://mspire.rubyforge.org/spec_id/srf.html
|
17
|
+
[False Identification Rate Determination] http://mspire.rubyforge.org/spec_id/fir/index.html
|
18
|
+
[OBI-Warp] http://mspire.rubyforge.org/ms/obiwarp.html
|
data/Rakefile
CHANGED
@@ -17,7 +17,7 @@ $dependencies = %w(libjtp)
|
|
17
17
|
$tfiles_large = 'test_files_large'
|
18
18
|
changelog = "changelog.txt"
|
19
19
|
|
20
|
-
core_files = FL["INSTALL", "README", "Rakefile", "LICENSE", changelog, "release_notes.txt", "{lib,bin,script,specs,tutorial,test_files}/**/*"]
|
20
|
+
core_files = FL["INSTALL", "README", "README.rdoc", "Rakefile", "LICENSE", changelog, "release_notes.txt", "{lib,bin,script,specs,tutorial,test_files}/**/*"]
|
21
21
|
big_dist_files = core_files + FL["test_files_large/**/*"]
|
22
22
|
|
23
23
|
dist_files = core_files
|
@@ -43,7 +43,7 @@ def move_and_add_webgen_header(file, newfile, src_dir, heading)
|
|
43
43
|
string = IO.read file
|
44
44
|
with_header = heading + string
|
45
45
|
File.open(newfile, 'w') {|v| v.print with_header }
|
46
|
-
FileUtils.mv newfile, src_dir
|
46
|
+
FileUtils.mv newfile, src_dir, :force => true
|
47
47
|
end
|
48
48
|
|
49
49
|
desc "copy top level files into doc/src"
|
@@ -55,27 +55,40 @@ directoryName: mspire
|
|
55
55
|
---\n"
|
56
56
|
src = "doc/src"
|
57
57
|
move_and_add_webgen_header('README', 'index.page', src, string.sub('TITLE', 'Home'))
|
58
|
-
move_and_add_webgen_header('INSTALL', '
|
58
|
+
move_and_add_webgen_header('INSTALL', 'index.page', src + '/install', string.sub('TITLE', 'Install').sub('mspire', 'Install').sub("inMenu: true\n", ''))
|
59
59
|
end
|
60
60
|
|
61
|
-
desc "
|
62
|
-
task :upload_docs
|
61
|
+
desc "upload docs (doc/output) to server"
|
62
|
+
task :upload_docs do
|
63
63
|
sh "scp -i ~/.ssh/rubyforge_key -r doc/output/* jtprince@rubyforge.org:/var/www/gforge-projects/mspire/"
|
64
64
|
end
|
65
65
|
|
66
|
+
# best to use webgen 0.3.8 right now
|
67
|
+
# to get working (may not require all these steps):
|
68
|
+
# gem install RedCloth
|
69
|
+
# gem install BlueCloth
|
70
|
+
# soft link the bluecloth binary into path
|
66
71
|
desc "creates docs in doc/html"
|
67
72
|
task :html_docs => [:cp_top_level_docs] do
|
68
73
|
FileUtils.cd 'doc' do
|
69
74
|
sh "webgen"
|
70
75
|
end
|
76
|
+
FileUtils.cp 'doc/src/archive/t2x', 'doc/output/archive/t2x'
|
71
77
|
end
|
72
78
|
|
73
|
-
|
74
|
-
|
79
|
+
desc "does html_docs and rdoc and puts rdoc inside html_docs"
|
80
|
+
task :all_docs => [:html_docs, :rdoc] do
|
81
|
+
FileUtils.mv 'html', 'doc/output/rdoc'
|
82
|
+
end
|
83
|
+
|
84
|
+
#rdoc_options = ['--main', 'README', '--title', NAME]
|
85
|
+
rdoc_options = ['--main', 'README.rdoc', '--title', NAME]
|
86
|
+
#rdoc_extra_includes = ["README", "INSTALL", "LICENSE"]
|
87
|
+
rdoc_extra_includes = ['README.rdoc']
|
75
88
|
|
76
89
|
Rake::RDocTask.new do |rd|
|
77
|
-
rd.main = "README"
|
78
|
-
rd.rdoc_files.include rdoc_extra_includes
|
90
|
+
rd.main = "README.rdoc"
|
91
|
+
rd.rdoc_files.include("lib/**/*.rb", *rdoc_extra_includes )
|
79
92
|
rd.options.push( *rdoc_options )
|
80
93
|
end
|
81
94
|
|
@@ -124,7 +137,12 @@ Spec::Rake::SpecTask.new('spec') do |t|
|
|
124
137
|
Rake::Task[:ensure_gem_is_uninstalled].invoke
|
125
138
|
Rake::Task[:ensure_dependencies].invoke
|
126
139
|
Rake::Task[:ensure_large_testfiles].invoke
|
127
|
-
t.libs =
|
140
|
+
t.libs =
|
141
|
+
if !ENV['LIB'].nil?
|
142
|
+
[ENV['LIB']]
|
143
|
+
else
|
144
|
+
['lib']
|
145
|
+
end
|
128
146
|
#t.ruby_opts = ['-I', 'lib']
|
129
147
|
t.spec_files = FileList['specs/**/*_spec.rb']
|
130
148
|
end
|
@@ -135,7 +153,13 @@ Spec::Rake::SpecTask.new('specl') do |t|
|
|
135
153
|
Rake::Task[:ensure_dependencies].invoke
|
136
154
|
Rake::Task[:ensure_large_testfiles].invoke
|
137
155
|
t.spec_files = FileList['specs/**/*_spec.rb']
|
138
|
-
t.libs =
|
156
|
+
t.libs =
|
157
|
+
if !ENV['LIB'].nil?
|
158
|
+
[ENV['LIB']]
|
159
|
+
else
|
160
|
+
['lib']
|
161
|
+
end
|
162
|
+
#t.libs = ['lib']
|
139
163
|
#t.ruby_opts = ['-I', 'lib']
|
140
164
|
t.spec_opts = ['--format', 'specdoc' ]
|
141
165
|
end
|
@@ -147,7 +171,12 @@ Spec::Rake::SpecTask.new('rcov') do |t|
|
|
147
171
|
Rake::Task[:ensure_large_testfiles].invoke
|
148
172
|
t.spec_files = FileList['specs/**/*_spec.rb']
|
149
173
|
t.rcov = true
|
150
|
-
t.libs =
|
174
|
+
t.libs =
|
175
|
+
if !ENV['LIB'].nil?
|
176
|
+
[ENV['LIB']]
|
177
|
+
else
|
178
|
+
['lib']
|
179
|
+
end
|
151
180
|
#t.ruby_opts = ['-I', 'lib']
|
152
181
|
t.rcov_opts = ['--exclude', 'specs']
|
153
182
|
end
|
@@ -160,9 +189,15 @@ task :speci => [:ensure_gem_is_uninstalled, :ensure_dependencies, :ensure_large_
|
|
160
189
|
file.include?(ENV['SPECM'])
|
161
190
|
end
|
162
191
|
end
|
192
|
+
lib =
|
193
|
+
if !ENV['LIB'].nil?
|
194
|
+
ENV['LIB']
|
195
|
+
else
|
196
|
+
'lib'
|
197
|
+
end
|
163
198
|
files_to_run.each do |spc|
|
164
199
|
puts "------ SPEC=#{spc} ------"
|
165
|
-
system "ruby -I lib -S spec #{spc} --format specdoc"
|
200
|
+
system "ruby -I #{lib} -S spec #{spc} --format specdoc"
|
166
201
|
end
|
167
202
|
end
|
168
203
|
|
@@ -238,8 +273,9 @@ spec = Gem::Specification.new do |s|
|
|
238
273
|
s.rdoc_options = rdoc_options
|
239
274
|
s.extra_rdoc_files = rdoc_extra_includes
|
240
275
|
s.executables = FL["bin/*"].map {|file| File.basename(file) }
|
241
|
-
s.add_dependency('libjtp', '~> 0.2.
|
276
|
+
s.add_dependency('libjtp', '~> 0.2.14')
|
242
277
|
s.add_dependency('axml', '~> 0.0.0')
|
278
|
+
s.add_dependency('arrayclass', '~> 0.1.0')
|
243
279
|
s.requirements << '"libxml" is the prefered xml parser right now. libxml, xmlparser, REXML and regular expressions are used as fallback in some routines.'
|
244
280
|
s.requirements << 'some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)'
|
245
281
|
s.requirements << 'the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications'
|
data/bin/aafreqs.rb
CHANGED
File without changes
|
data/bin/bioworks2excel.rb
CHANGED
File without changes
|
data/bin/bioworks_to_pepxml.rb
CHANGED
@@ -12,7 +12,8 @@ DEFAULT_MS_MODEL = 'LCQ'
|
|
12
12
|
DEFAULT_MASS_ANALYZER = 'Ion Trap'
|
13
13
|
##############################################################
|
14
14
|
|
15
|
-
require 'spec_id/sequest/pepxml'
|
15
|
+
#require 'spec_id/sequest/pepxml' # dies of this guy is called (why???)
|
16
|
+
require 'spec_id/proph/pep_summary' # <- he requests the above...hmmm
|
16
17
|
require 'spec_id'
|
17
18
|
require 'optparse'
|
18
19
|
require 'ostruct'
|
File without changes
|
data/bin/fasta_shaker.rb
CHANGED
File without changes
|
data/bin/filter_and_validate.rb
CHANGED
File without changes
|
data/bin/gi2annot.rb
CHANGED
File without changes
|
data/bin/id_class_anal.rb
CHANGED
File without changes
|
data/bin/id_precision.rb
CHANGED
File without changes
|
data/bin/ms_to_lmat.rb
CHANGED
File without changes
|
data/bin/pepproph_filter.rb
CHANGED
File without changes
|
data/bin/protein_summary.rb
CHANGED
File without changes
|
data/bin/protxml2prots_peps.rb
CHANGED
File without changes
|
data/bin/raw_to_mzXML.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/ruby -w
|
2
2
|
|
3
3
|
require 'optparse'
|
4
|
-
require '
|
4
|
+
require 'ms/converter/mzxml'
|
5
5
|
require 'fileutils'
|
6
6
|
|
7
7
|
progname = File.basename(__FILE__)
|
@@ -21,11 +21,11 @@ if ARGV.size == 0
|
|
21
21
|
exit
|
22
22
|
end
|
23
23
|
|
24
|
-
converter = MS::MzXML.find_mzxml_converter
|
24
|
+
converter = MS::Converter::MzXML.find_mzxml_converter
|
25
25
|
if converter
|
26
26
|
$stderr.puts "using #{converter} to convert files"
|
27
27
|
else
|
28
|
-
puts "cannot find [#{MS::MzXML::Potential_mzxml_converters.join(', ')}] in the paths:"
|
28
|
+
puts "cannot find [#{MS::Converter::MzXML::Potential_mzxml_converters.join(', ')}] in the paths:"
|
29
29
|
puts ENV['PATH'].split(/[:;]/).join(", ")
|
30
30
|
abort
|
31
31
|
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
perc_cmd = 'percolator'
|
4
|
+
|
5
|
+
require 'optparse'
|
6
|
+
require 'spec_id/srf'
|
7
|
+
|
8
|
+
# percolator_v1.02_32bit_linux -o reverse_meta.sqm normal_NOCYS/meta.sqm reverse_NOCYS/meta.sqm
|
9
|
+
# percolator_v1.02_32bit_linux -o reverse_cat_meta.sqm -P INV_ reverse_cat_NOCYS/meta.sqm &
|
10
|
+
|
11
|
+
file_hash = {
|
12
|
+
:srg => "bioworks.srg",
|
13
|
+
:sqg_in => "bioworks.sqg",
|
14
|
+
:sqg_decoy => "decoy.sqg",
|
15
|
+
:perc_out => "perc.sqg",
|
16
|
+
:perc_stdout => "perc.stdout",
|
17
|
+
:perc_stderr => "perc.stderr",
|
18
|
+
:perc_ext => ".psqt",
|
19
|
+
}
|
20
|
+
(default_srg, sqg_in, perc_out, sqg_decoy, perc_stdout, perc_stderr, perc_ext) = file_hash.values_at(:srg, :sqg_in, :perc_out, :sqg_decoy, :perc_stdout, :perc_stderr, :perc_ext)
|
21
|
+
|
22
|
+
opt = {}
|
23
|
+
toclean = []
|
24
|
+
opts = OptionParser.new do |op|
|
25
|
+
op.banner = "usage: #{File.basename(__FILE__)} -d PATTERN <file>.srf ..."
|
26
|
+
op.separator " #{File.basename(__FILE__)} -d PATTERN <file>.srg"
|
27
|
+
op.separator " #{File.basename(__FILE__)} <normal>.srg <decoy>.srg"
|
28
|
+
op.separator ""
|
29
|
+
op.separator " creates necessary meta files in current working directory and"
|
30
|
+
op.separator " runs command '#{perc_cmd}'"
|
31
|
+
op.separator ""
|
32
|
+
op.separator " (all in current working directory)"
|
33
|
+
op.separator " 1) (if given .srf files) creates file: #{default_srg}"
|
34
|
+
op.separator " 2) creates .sqt file for each srf file (placed in dir with srf file)"
|
35
|
+
op.separator " 3) creates percolator (meta) input file(s): #{sqg_in}"
|
36
|
+
op.separator " [and for separate searches: #{sqg_decoy}]"
|
37
|
+
op.separator " 4) creates a percolator (meta) output file: #{perc_out}"
|
38
|
+
op.separator " 5) runs percolator which creates a a #{perc_ext} for each .srf file"
|
39
|
+
op.separator " 6) captures stdout in #{perc_stdout} and stderr in #{perc_stderr}"
|
40
|
+
op.separator ""
|
41
|
+
op.separator " .srg files are text files with full paths to .srf files"
|
42
|
+
op.separator " create with command 'srf_group.rb'"
|
43
|
+
op.separator ""
|
44
|
+
op.on("-d", "--decoy <pattern>", "decoy pattern, eg.: -d REVERSE_") {|v| opt[:decoy] = v }
|
45
|
+
op.on("-c", "--clean", "removes ALL generated files except #{perc_ext}") {|v| opt[:clean] = v }
|
46
|
+
op.on("-v", "--verbose", "spits out info") {|v| $VERBOSE = v }
|
47
|
+
end
|
48
|
+
opts.parse!
|
49
|
+
|
50
|
+
if ARGV.size == 0 or (!opt[:decoy] && (ARGV.size != 2))
|
51
|
+
puts opts.to_s
|
52
|
+
exit
|
53
|
+
end
|
54
|
+
|
55
|
+
#raise RunTimeError, "command #{perc_cmd} must be callable!" unless `#{perc_cmd}`.match(/Usage/)
|
56
|
+
|
57
|
+
files = ARGV.to_a
|
58
|
+
|
59
|
+
# create srg file:
|
60
|
+
srg_files =
|
61
|
+
if files[0] =~ /\.srf$/i
|
62
|
+
obj = SRFGroup.new
|
63
|
+
obj.filenames = files.to_a
|
64
|
+
puts("CREATING: #{default_srg}") if $VERBOSE
|
65
|
+
obj.to_srg(default_srg)
|
66
|
+
toclean << default_srg
|
67
|
+
[default_srg]
|
68
|
+
elsif files[0] =~ /\.srg$/i
|
69
|
+
files
|
70
|
+
else
|
71
|
+
abort "files must have proper extensions"
|
72
|
+
end
|
73
|
+
|
74
|
+
# create the sqt files:
|
75
|
+
all_sqt_filenames = srg_files.map do |srg_file|
|
76
|
+
srf_filenames = SRFGroup.srg_to_paths(srg_file)
|
77
|
+
srf_filenames.map do |file|
|
78
|
+
new_filename = file.sub(/\.srf$/i, '.sqt')
|
79
|
+
puts("CREATING: #{new_filename}") if $VERBOSE
|
80
|
+
SRFGroup.new([file], opt['filter']).srfs.first.to_sqt(new_filename)
|
81
|
+
toclean << new_filename
|
82
|
+
new_filename
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# create the percolator input file:
|
87
|
+
all_sqt_filenames.zip(file_hash.values_at(:sqg_in, :sqg_decoy)) do |sqt_filenames,filename|
|
88
|
+
puts("CREATING: #{filename}") if $VERBOSE
|
89
|
+
File.open(filename, 'w') {|fh| fh.puts(sqt_filenames.join("\n")) }
|
90
|
+
toclean << filename
|
91
|
+
end
|
92
|
+
|
93
|
+
# create the percolator output file:
|
94
|
+
psqt_filenames = all_sqt_filenames[0].map do |file|
|
95
|
+
file.sub(/\.sqt$/, perc_ext)
|
96
|
+
end
|
97
|
+
|
98
|
+
puts("CREATING: #{perc_out}") if $VERBOSE
|
99
|
+
File.open(perc_out, 'w') {|fh| fh.puts(psqt_filenames.join("\n")) }
|
100
|
+
toclean << perc_out
|
101
|
+
|
102
|
+
# run percolator
|
103
|
+
to_run =
|
104
|
+
if opt[:decoy]
|
105
|
+
"#{perc_cmd} -o #{perc_out} -P #{opt[:decoy]} #{sqg_in} 1>#{perc_stdout} 2>#{perc_stderr}"
|
106
|
+
else
|
107
|
+
"#{perc_cmd} -o #{perc_out} #{sqg_in} #{sqg_decoy} 1>#{perc_stdout} 2>#{perc_stderr}"
|
108
|
+
end
|
109
|
+
|
110
|
+
puts("RUNNING: #{to_run}") if $VERBOSE
|
111
|
+
`#{to_run}`
|
112
|
+
|
113
|
+
toclean << perc_stdout
|
114
|
+
toclean << perc_stderr
|
115
|
+
|
116
|
+
if opt[:clean]
|
117
|
+
toclean.each do |file|
|
118
|
+
puts("REMOVING: #{file}") if $VERBOSE
|
119
|
+
File.unlink(file) if File.exist?(file)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
data/bin/sqt_group.rb
CHANGED
File without changes
|
data/bin/srf_group.rb
CHANGED
File without changes
|
data/changelog.txt
CHANGED
@@ -162,3 +162,32 @@ sample_enzyme)
|
|
162
162
|
## version 0.3.9
|
163
163
|
|
164
164
|
1. doesn't clobber mzdata filename in ms_to_lmat.rb conversion
|
165
|
+
|
166
|
+
## version 0.3.10
|
167
|
+
|
168
|
+
1. added run_percolator.rb script which makes running multiple files easy
|
169
|
+
|
170
|
+
## version 0.3.11
|
171
|
+
|
172
|
+
1. faster sensing of bad scan tags in mzXML v. 2.0 files
|
173
|
+
2. implemented lazy evaluation of spectrum in 2 different ways allowing much
|
174
|
+
larger files to be parsed
|
175
|
+
|
176
|
+
## version 0.4.0
|
177
|
+
|
178
|
+
1. ** INTERFACE CHANGE: each scan can only have one precursor (used to be an array)
|
179
|
+
2. ** INTERFACE CHANGE: spectrum mz and intensity data accessed with mzs and intensities
|
180
|
+
3. lazy eval working on mzData
|
181
|
+
4. mzData not necessarily guaranteed to have precursor intensities on lazy
|
182
|
+
eval methos (however, the method intensity_at_mz will still work (causing
|
183
|
+
evaluation))
|
184
|
+
|
185
|
+
## version 0.4.1
|
186
|
+
|
187
|
+
1. added support for reading mzXML version 3.0 (may fail in some cases)
|
188
|
+
|
189
|
+
## version 0.4.2
|
190
|
+
|
191
|
+
1. added MS::MSRun.open method
|
192
|
+
2. added method to write dta files from SRF
|
193
|
+
|
data/lib/ms/gradient_program.rb
CHANGED
data/lib/ms/msrun.rb
CHANGED
@@ -9,6 +9,8 @@ require 'ms/converter/mzxml'
|
|
9
9
|
|
10
10
|
module MS; end
|
11
11
|
class MS::MSRun
|
12
|
+
|
13
|
+
MSRunDefaultOpts = { :lazy => :string }
|
12
14
|
|
13
15
|
attr_accessor :start_time, :end_time
|
14
16
|
attr_accessor :scans
|
@@ -19,24 +21,56 @@ class MS::MSRun
|
|
19
21
|
# the total number of scans
|
20
22
|
attr_writer :scan_count
|
21
23
|
|
22
|
-
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
24
|
+
|
25
|
+
#### # [note: precursor intensities not guaranteed to exist unless :
|
26
|
+
# TODO: may need to eliminate unavailable precursor intensities if they
|
27
|
+
# doing lazy evaluation?? or it becomes lazy too??
|
28
|
+
|
26
29
|
# OPTIONS:
|
27
|
-
# :
|
28
|
-
#
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
30
|
+
# :lazy => :string | :not | :no_spectra | :io
|
31
|
+
# :string = (default) stores each spectrum as a base64 decoded
|
32
|
+
# string that is further processed into Arrays of Floats when m/z
|
33
|
+
# or intensity information is access. This lazy evaluation
|
34
|
+
# should work on most files.
|
35
|
+
# :not = all information is read into memory and parsed into
|
36
|
+
# objects. Should only be used for small-medium files (< 80MB on
|
37
|
+
# a machine with 2GB memory)
|
38
|
+
# :no_spectra = if no peak information is required use this to
|
39
|
+
# avoid the overhead of parsing and creating spectra.
|
40
|
+
# :io = stores the io object and indices into spectrum data.
|
41
|
+
# When spectral information is requested (m/z or intensity
|
42
|
+
# information) then the spectrum is read from the io object and
|
43
|
+
# evaluated (requires an open io object when spectrum information
|
44
|
+
# is requested)
|
45
|
+
def initialize(file_or_io=nil, opts={})
|
46
|
+
if opts[:lazy] == :io
|
47
|
+
if !file_or_io.is_a?(IO)
|
48
|
+
raise ArgumentError, "Caller must provide an IO object (rather than filename) if using {:lazy => :io}"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
myopts = MSRunDefaultOpts.merge(opts)
|
52
|
+
myopts[:msrun] = self
|
53
|
+
if file_or_io
|
54
|
+
filetype_and_version = MS::Parser.filetype_and_version(file_or_io)
|
55
|
+
parser = MS::Parser.new(filetype_and_version, :msrun, myopts)
|
56
|
+
parser.parse(file_or_io, myopts)
|
35
57
|
#MS::Parser.new(filetype_and_version, :msrun).parse(file, myopts)
|
36
58
|
(@filetype, @version) = filetype_and_version
|
37
59
|
end
|
38
60
|
end
|
39
61
|
|
62
|
+
# This will automatically use :lazy => :io, open the file, and close it
|
63
|
+
# after the block returns.
|
64
|
+
# MS::MSRun.open("file.mzXML") do |ms|
|
65
|
+
# ms.scans.each {|scan| ... do something }
|
66
|
+
# end
|
67
|
+
def self.open(filename, opts={})
|
68
|
+
File.open(filename) do |fh|
|
69
|
+
ms = MS::MSRun.new(fh, {:lazy => :io}.merge(opts))
|
70
|
+
yield(ms)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
40
74
|
# returns an array, whose indices provide the number of scans in each index level the ms_levels, [0] = all the scans, [1] = mslevel 1, [2] = mslevel 2,
|
41
75
|
# ...
|
42
76
|
def scan_counts
|
@@ -95,9 +129,9 @@ class MS::MSRun
|
|
95
129
|
lo_mz = sc.start_mz
|
96
130
|
hi_mz = sc.end_mz
|
97
131
|
else
|
98
|
-
|
99
|
-
hi_mz =
|
100
|
-
lo_mz =
|
132
|
+
mz_ar = sc.spectrum.mzs
|
133
|
+
hi_mz = mz_ar.last
|
134
|
+
lo_mz = mz_ar.first
|
101
135
|
end
|
102
136
|
break
|
103
137
|
end
|
@@ -117,12 +151,12 @@ class MS::MSRun
|
|
117
151
|
# didn't have the attributes (find by brute force)
|
118
152
|
scans.each do |sc|
|
119
153
|
if sc.ms_level == mslevel
|
120
|
-
|
121
|
-
if
|
122
|
-
hi_mz =
|
154
|
+
mz_ar = sc.spectrum.mzs
|
155
|
+
if mz_ar.last > hi_mz
|
156
|
+
hi_mz = mz_ar.last
|
123
157
|
end
|
124
|
-
if
|
125
|
-
lo_mz =
|
158
|
+
if mz_ar.last < lo_mz
|
159
|
+
lo_mz = mz_ar.last
|
126
160
|
end
|
127
161
|
end
|
128
162
|
end
|
@@ -135,7 +169,7 @@ class MS::MSRun
|
|
135
169
|
def precursor_mz_by_scan_num
|
136
170
|
ar = Array.new(@scans.size + 1)
|
137
171
|
@scans.each do |scan|
|
138
|
-
if prec = scan.
|
172
|
+
if prec = scan.precursor
|
139
173
|
ar[scan.num] = prec.mz
|
140
174
|
else
|
141
175
|
ar[scan.num] = nil
|
@@ -170,7 +204,7 @@ class MS::MSRun
|
|
170
204
|
# same as the instance method (creates an object without spectrum and calls
|
171
205
|
# instance method of the same name)
|
172
206
|
def self.precursor_mz_by_scan_num(file)
|
173
|
-
self.new(file, :
|
207
|
+
self.new(file, :lazy => :no_spectra, :fix_bad_tags => true).precursor_mz_by_scan_num
|
174
208
|
end
|
175
209
|
|
176
210
|
# only adds the parent if one is not already present!
|
@@ -190,13 +224,12 @@ class MS::MSRun
|
|
190
224
|
(prev_level - level).times do parent_stack.shift end
|
191
225
|
end
|
192
226
|
if scan.ms_level > 1
|
193
|
-
scan.
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
end
|
227
|
+
precursor = scan.precursor
|
228
|
+
#precursor.parent = parent_stack.first # that's the next line's
|
229
|
+
precursor[2] = parent_stack.first unless precursor[2]
|
230
|
+
#precursor.intensity
|
231
|
+
if add_intensities
|
232
|
+
precursor[1] = precursor[2].spectrum.intensity_at_mz(precursor[0])
|
200
233
|
end
|
201
234
|
end
|
202
235
|
prev_level = level
|