mspire 0.3.9 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. data/INSTALL +24 -7
  2. data/README +15 -13
  3. data/README.rdoc +18 -0
  4. data/Rakefile +50 -14
  5. data/bin/aafreqs.rb +0 -0
  6. data/bin/bioworks2excel.rb +0 -0
  7. data/bin/bioworks_to_pepxml.rb +2 -1
  8. data/bin/bioworks_to_pepxml_gui.rb +0 -0
  9. data/bin/fasta_shaker.rb +0 -0
  10. data/bin/filter_and_validate.rb +0 -0
  11. data/bin/gi2annot.rb +0 -0
  12. data/bin/id_class_anal.rb +0 -0
  13. data/bin/id_precision.rb +0 -0
  14. data/bin/ms_to_lmat.rb +0 -0
  15. data/bin/pepproph_filter.rb +0 -0
  16. data/bin/protein_summary.rb +0 -0
  17. data/bin/protxml2prots_peps.rb +0 -0
  18. data/bin/raw_to_mzXML.rb +3 -3
  19. data/bin/run_percolator.rb +122 -0
  20. data/bin/sqt_group.rb +0 -0
  21. data/bin/srf_group.rb +0 -0
  22. data/changelog.txt +29 -0
  23. data/lib/ms/gradient_program.rb +0 -1
  24. data/lib/ms/msrun.rb +62 -29
  25. data/lib/ms/parser/mzdata/axml.rb +55 -0
  26. data/lib/ms/parser/mzdata/dom.rb +51 -36
  27. data/lib/ms/parser/mzdata.rb +8 -2
  28. data/lib/ms/parser/mzxml/axml.rb +59 -0
  29. data/lib/ms/parser/mzxml/dom.rb +80 -57
  30. data/lib/ms/parser/mzxml/hpricot.rb +1 -1
  31. data/lib/ms/parser/mzxml/libxml.rb +6 -2
  32. data/lib/ms/parser/mzxml.rb +110 -3
  33. data/lib/ms/parser.rb +4 -4
  34. data/lib/ms/precursor.rb +19 -4
  35. data/lib/ms/scan.rb +7 -7
  36. data/lib/ms/spectrum.rb +249 -58
  37. data/lib/mspire.rb +1 -1
  38. data/lib/spec_id/bioworks.rb +2 -2
  39. data/lib/spec_id/precision/filter/cmdline.rb +8 -1
  40. data/lib/spec_id/precision/prob/cmdline.rb +2 -2
  41. data/lib/spec_id/precision/prob.rb +1 -0
  42. data/lib/spec_id/proph/pep_summary.rb +3 -4
  43. data/lib/spec_id/proph/prot_summary.rb +3 -3
  44. data/lib/spec_id/protein_summary.rb +1 -1
  45. data/lib/spec_id/sequest/pepxml.rb +5 -5
  46. data/lib/spec_id/sqt.rb +4 -4
  47. data/lib/spec_id/srf.rb +49 -8
  48. data/lib/spec_id.rb +5 -0
  49. data/lib/xml_style_parser.rb +16 -2
  50. data/script/compile_and_plot_smriti_final.rb +0 -0
  51. data/script/create_little_pepxml.rb +0 -0
  52. data/script/degenerate_peptides.rb +0 -0
  53. data/script/estimate_fpr_by_cysteine.rb +0 -0
  54. data/script/extract_gradient_programs.rb +1 -1
  55. data/script/find_cysteine_background.rb +0 -0
  56. data/script/genuine_tps_and_probs.rb +0 -0
  57. data/script/get_apex_values_rexml.rb +0 -0
  58. data/script/mascot_fix_pepxml.rb +123 -0
  59. data/script/msvis.rb +0 -0
  60. data/script/mzXML2timeIndex.rb +0 -0
  61. data/script/peps_per_bin.rb +0 -0
  62. data/script/prep_dir.rb +0 -0
  63. data/script/simple_protein_digestion.rb +0 -0
  64. data/script/smriti_final_analysis.rb +0 -0
  65. data/script/sqt_to_meta.rb +0 -0
  66. data/script/top_hit_per_scan.rb +0 -0
  67. data/script/toppred_to_yaml.rb +0 -0
  68. data/script/tpp_installer.rb +0 -0
  69. data/specs/bin/prob_validate_spec.rb +5 -2
  70. data/specs/bin/protein_summary_spec.rb +5 -1
  71. data/specs/ms/msrun_spec.rb +176 -133
  72. data/specs/ms/parser_spec.rb +3 -3
  73. data/specs/ms/spectrum_spec.rb +0 -2
  74. data/specs/spec_id/precision/filter_spec.rb +4 -1
  75. data/specs/spec_id/precision/prob_spec.rb +2 -2
  76. data/specs/spec_id/sequest/pepxml_spec.rb +1 -1
  77. data/specs/spec_id/sqt_spec.rb +5 -5
  78. data/specs/spec_id/srf_spec.rb +56 -93
  79. data/specs/spec_id/srf_spec_helper.rb +121 -284
  80. data/specs/spec_id_spec.rb +3 -0
  81. data/specs/transmem/toppred_spec.rb +1 -0
  82. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +683 -0
  83. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +382 -0
  84. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +683 -0
  85. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +382 -0
  86. data/test_files/opd1_2runs_2mods/data/README.txt +6 -0
  87. metadata +247 -229
data/INSTALL CHANGED
@@ -4,24 +4,35 @@ Prerequisites
4
4
 
5
5
  Much of the package will work without any prerequisites at all. Some functionality may require addition ruby packages or other converters. These are listed in current order of importance:
6
6
 
7
+ * libjtp - generic library installed automatically if you install mspire with rubygems (or 'gem install libjtp')
7
8
  * [xmlparser](http://www.yoshidam.net/Ruby.html) (comes with one-click Windows; on Ubuntu: 'sudo apt-get libxml-parser-ruby1.8')
8
- * [libxml](http://libxml.rubyforge.org/) in Ubuntu: sudo apt-get install libxml2 libxml2-dev ; sudo gem install libxml-ruby --remote
9
- * ['t2x'](http://sashimi.sourceforge.net/software_glossolalia.html#ReAdW) to convert .RAW files to version 1 mzXML files
10
- * [gnuplot](http://rgplot.rubyforge.org/) ('gem install gnuplot'). Of course, you'll need [gnuplot](http://www.gnuplot.info/) before this package will work. Under one-click installer for windows this package requires a little configuration. It works with no configuration on cygwin (or linux).
9
+ * [axml](http://axml.rubyforge.org/) dom wrapper for xmlparser. ('gem install axml')
10
+ * ['t2x'](archive/t2x) linux executable to convert .RAW files (Xcalibur 1.x) to version 1 mzXML files
11
+
12
+ Optional:
13
+ * [libxml](http://libxml.rubyforge.org/) can use instead of xmlparser. In Ubuntu: sudo apt-get install libxml2 libxml2-dev ; sudo gem install libxml-ruby --remote
14
+ * [gnuplot](http://rgplot.rubyforge.org/) ('gem install gnuplot'). For some plotting. Of course, you'll need [gnuplot](http://www.gnuplot.info/) before this package will work. Under one-click installer for windows this package requires a little configuration. It works with no configuration on cygwin (or linux).
11
15
 
12
16
  Installation
13
17
  ------------
14
18
 
15
19
  gem install mspire
16
20
 
17
- or
18
-
19
- gem install -t mspire (to run tests)
21
+ See [installation under cygwin](cygwin.html) if you're on Windows.
20
22
 
21
23
  Development
22
24
  -----------
23
25
 
24
- prereq: [rake](http://rake.rubyforge.org/) (with rubygems: 'gem install rake --remote')
26
+ anonymous svn checkout:
27
+
28
+ svn checkout svn://rubyforge.org/var/svn/mspire
29
+
30
+ prerequisites:
31
+
32
+ * [rake](http://rake.rubyforge.org/) (with rubygems: 'gem install rake --remote')
33
+ * [rspec](http://rspec.info/) (with rubygems: 'gem install rspec --remote')
34
+
35
+ Use rake:
25
36
 
26
37
  % rake -T
27
38
  rake clean # Remove any temporary products.
@@ -32,3 +43,9 @@ prereq: [rake](http://rake.rubyforge.org/) (with rubygems: 'gem install rake --r
32
43
  rake upload_docs # create and upload docs to server
33
44
  ...etc...
34
45
 
46
+ run tests: rake spec
47
+ (or): rake specl
48
+
49
+ run tests with large files: rake spec SPEC_LARGE=t
50
+
51
+ run test on one file: rake spec SPEC=specs/{path_to_spec_file}
data/README CHANGED
@@ -1,5 +1,3 @@
1
- mspire
2
- ======
3
1
 
4
2
  mspire - 'Mass Spectrometry Proteomics in Ruby' is a collection of tools for
5
3
  working with MS proteomics data in ruby. It seeks to provide support for open
@@ -12,7 +10,7 @@ Current Focus
12
10
 
13
11
  The project is currently focusing on the following:
14
12
 
15
- * SEQUEST data (particularly the output of Bioworks 3.2)
13
+ * SEQUEST data (particularly the output of Bioworks 3.2-3.3)
16
14
  * mzXML
17
15
  * mzData
18
16
  * ProteinProphet
@@ -21,15 +19,16 @@ The project is currently focusing on the following:
21
19
  Features
22
20
  --------
23
21
 
24
- * mzXML (version 1 & 2) parsing
22
+ * mzXML (version 1, 2, and 3) parsing
25
23
  * mzData parsing
26
24
  * bioworks .srf (binary files) reader
25
+ * read/write .sqt files
27
26
  * bioworks to PeptideProphet input (pepXML files)
28
27
  * lightweight APEX values parser
29
28
  * histogram protein probabilities
30
- * developed for Linux, should port easily to Windows or others
31
29
  * protein summary views with custom false ID cutoff values
32
30
  * conversion to OBI-Warp input files
31
+ * portable: works across platforms
33
32
 
34
33
  Validation by:
35
34
  * Various Decoy Database search options: Reverse/Shuffle, concatenated/separate, with various hashing options (e.g., by amino acid sequence + charge)
@@ -38,10 +37,12 @@ Validation by:
38
37
  * Generic sample bias (e.g., low abundance/high abundance proteins)
39
38
  * Defined sample
40
39
 
41
- Working with:
42
- * Bioworks (3.2-3.3.1)
43
- * Peptide/Protein Prophet
44
- * Easily extensible to others
40
+ Spectra and Spectra Identification
41
+ ----------------------------------
42
+
43
+ The [MS](ms/index.html) namespace contains objects for working with mass spectra and associated file formats.
44
+
45
+ The [SpecID](spec_id/index.html) namespace contains objects for working with spectral identifications.
45
46
 
46
47
  Tutorials
47
48
  ---------
@@ -54,12 +55,13 @@ Warning
54
55
  -------
55
56
 
56
57
  This is an experimental package. As such, all versions prior to version 1.0
57
- will only loosely follow the rubygems versioning scheme: interfaces are
58
- subject to change without a major change in version number prior to version
59
- 1.0. Beyond version 1.0, the versioning scheme will be strictly adhered to.
58
+ may contain interface changes on minor revisions (major.minor.build) (e.g.,
59
+ 0.4.0 may contain interface change from 0.3.9). Beyond version 1.0, the
60
+ versioning scheme will be strictly adhered to (no interface changes except on
61
+ major revisions).
60
62
 
61
63
  Installation
62
64
  ------------
63
65
 
64
- see [Install](install.html)
66
+ see [Install](install/index.html)
65
67
 
data/README.rdoc ADDED
@@ -0,0 +1,18 @@
1
+
2
+ = mspire
3
+
4
+ mass spectrometry proteomics in ruby
5
+
6
+ Please refer to the latest Documentation[http://mspire.rubyforge.org]
7
+
8
+ Please see Installation[http://mspire.rubyforge.org/install/index.html]
9
+
10
+ == Data Models and Examples
11
+
12
+ Object models and usage examples are online:
13
+
14
+ [MS::MSRun] http://mspire.rubyforge.org/ms/msrun.html
15
+ [SpecID] http://mspire.rubyforge.org/spec_id/spec_id.html
16
+ [SRF] http://mspire.rubyforge.org/spec_id/srf.html
17
+ [False Identification Rate Determination] http://mspire.rubyforge.org/spec_id/fir/index.html
18
+ [OBI-Warp] http://mspire.rubyforge.org/ms/obiwarp.html
data/Rakefile CHANGED
@@ -17,7 +17,7 @@ $dependencies = %w(libjtp)
17
17
  $tfiles_large = 'test_files_large'
18
18
  changelog = "changelog.txt"
19
19
 
20
- core_files = FL["INSTALL", "README", "Rakefile", "LICENSE", changelog, "release_notes.txt", "{lib,bin,script,specs,tutorial,test_files}/**/*"]
20
+ core_files = FL["INSTALL", "README", "README.rdoc", "Rakefile", "LICENSE", changelog, "release_notes.txt", "{lib,bin,script,specs,tutorial,test_files}/**/*"]
21
21
  big_dist_files = core_files + FL["test_files_large/**/*"]
22
22
 
23
23
  dist_files = core_files
@@ -43,7 +43,7 @@ def move_and_add_webgen_header(file, newfile, src_dir, heading)
43
43
  string = IO.read file
44
44
  with_header = heading + string
45
45
  File.open(newfile, 'w') {|v| v.print with_header }
46
- FileUtils.mv newfile, src_dir
46
+ FileUtils.mv newfile, src_dir, :force => true
47
47
  end
48
48
 
49
49
  desc "copy top level files into doc/src"
@@ -55,27 +55,40 @@ directoryName: mspire
55
55
  ---\n"
56
56
  src = "doc/src"
57
57
  move_and_add_webgen_header('README', 'index.page', src, string.sub('TITLE', 'Home'))
58
- move_and_add_webgen_header('INSTALL', 'install.page', src, string.sub('TITLE', 'Install'))
58
+ move_and_add_webgen_header('INSTALL', 'index.page', src + '/install', string.sub('TITLE', 'Install').sub('mspire', 'Install').sub("inMenu: true\n", ''))
59
59
  end
60
60
 
61
- desc "create and upload docs to server"
62
- task :upload_docs => :html_docs do
61
+ desc "upload docs (doc/output) to server"
62
+ task :upload_docs do
63
63
  sh "scp -i ~/.ssh/rubyforge_key -r doc/output/* jtprince@rubyforge.org:/var/www/gforge-projects/mspire/"
64
64
  end
65
65
 
66
+ # best to use webgen 0.3.8 right now
67
+ # to get working (may not require all these steps):
68
+ # gem install RedCloth
69
+ # gem install BlueCloth
70
+ # soft link the bluecloth binary into path
66
71
  desc "creates docs in doc/html"
67
72
  task :html_docs => [:cp_top_level_docs] do
68
73
  FileUtils.cd 'doc' do
69
74
  sh "webgen"
70
75
  end
76
+ FileUtils.cp 'doc/src/archive/t2x', 'doc/output/archive/t2x'
71
77
  end
72
78
 
73
- rdoc_options = ['--main', 'README', '--title', NAME]
74
- rdoc_extra_includes = ["README", "INSTALL", "LICENSE"]
79
+ desc "does html_docs and rdoc and puts rdoc inside html_docs"
80
+ task :all_docs => [:html_docs, :rdoc] do
81
+ FileUtils.mv 'html', 'doc/output/rdoc'
82
+ end
83
+
84
+ #rdoc_options = ['--main', 'README', '--title', NAME]
85
+ rdoc_options = ['--main', 'README.rdoc', '--title', NAME]
86
+ #rdoc_extra_includes = ["README", "INSTALL", "LICENSE"]
87
+ rdoc_extra_includes = ['README.rdoc']
75
88
 
76
89
  Rake::RDocTask.new do |rd|
77
- rd.main = "README"
78
- rd.rdoc_files.include rdoc_extra_includes
90
+ rd.main = "README.rdoc"
91
+ rd.rdoc_files.include("lib/**/*.rb", *rdoc_extra_includes )
79
92
  rd.options.push( *rdoc_options )
80
93
  end
81
94
 
@@ -124,7 +137,12 @@ Spec::Rake::SpecTask.new('spec') do |t|
124
137
  Rake::Task[:ensure_gem_is_uninstalled].invoke
125
138
  Rake::Task[:ensure_dependencies].invoke
126
139
  Rake::Task[:ensure_large_testfiles].invoke
127
- t.libs = ['lib']
140
+ t.libs =
141
+ if !ENV['LIB'].nil?
142
+ [ENV['LIB']]
143
+ else
144
+ ['lib']
145
+ end
128
146
  #t.ruby_opts = ['-I', 'lib']
129
147
  t.spec_files = FileList['specs/**/*_spec.rb']
130
148
  end
@@ -135,7 +153,13 @@ Spec::Rake::SpecTask.new('specl') do |t|
135
153
  Rake::Task[:ensure_dependencies].invoke
136
154
  Rake::Task[:ensure_large_testfiles].invoke
137
155
  t.spec_files = FileList['specs/**/*_spec.rb']
138
- t.libs = ['lib']
156
+ t.libs =
157
+ if !ENV['LIB'].nil?
158
+ [ENV['LIB']]
159
+ else
160
+ ['lib']
161
+ end
162
+ #t.libs = ['lib']
139
163
  #t.ruby_opts = ['-I', 'lib']
140
164
  t.spec_opts = ['--format', 'specdoc' ]
141
165
  end
@@ -147,7 +171,12 @@ Spec::Rake::SpecTask.new('rcov') do |t|
147
171
  Rake::Task[:ensure_large_testfiles].invoke
148
172
  t.spec_files = FileList['specs/**/*_spec.rb']
149
173
  t.rcov = true
150
- t.libs = ['lib']
174
+ t.libs =
175
+ if !ENV['LIB'].nil?
176
+ [ENV['LIB']]
177
+ else
178
+ ['lib']
179
+ end
151
180
  #t.ruby_opts = ['-I', 'lib']
152
181
  t.rcov_opts = ['--exclude', 'specs']
153
182
  end
@@ -160,9 +189,15 @@ task :speci => [:ensure_gem_is_uninstalled, :ensure_dependencies, :ensure_large_
160
189
  file.include?(ENV['SPECM'])
161
190
  end
162
191
  end
192
+ lib =
193
+ if !ENV['LIB'].nil?
194
+ ENV['LIB']
195
+ else
196
+ 'lib'
197
+ end
163
198
  files_to_run.each do |spc|
164
199
  puts "------ SPEC=#{spc} ------"
165
- system "ruby -I lib -S spec #{spc} --format specdoc"
200
+ system "ruby -I #{lib} -S spec #{spc} --format specdoc"
166
201
  end
167
202
  end
168
203
 
@@ -238,8 +273,9 @@ spec = Gem::Specification.new do |s|
238
273
  s.rdoc_options = rdoc_options
239
274
  s.extra_rdoc_files = rdoc_extra_includes
240
275
  s.executables = FL["bin/*"].map {|file| File.basename(file) }
241
- s.add_dependency('libjtp', '~> 0.2.13')
276
+ s.add_dependency('libjtp', '~> 0.2.14')
242
277
  s.add_dependency('axml', '~> 0.0.0')
278
+ s.add_dependency('arrayclass', '~> 0.1.0')
243
279
  s.requirements << '"libxml" is the prefered xml parser right now. libxml, xmlparser, REXML and regular expressions are used as fallback in some routines.'
244
280
  s.requirements << 'some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)'
245
281
  s.requirements << 'the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications'
data/bin/aafreqs.rb CHANGED
File without changes
File without changes
@@ -12,7 +12,8 @@ DEFAULT_MS_MODEL = 'LCQ'
12
12
  DEFAULT_MASS_ANALYZER = 'Ion Trap'
13
13
  ##############################################################
14
14
 
15
- require 'spec_id/sequest/pepxml'
15
+ #require 'spec_id/sequest/pepxml' # dies of this guy is called (why???)
16
+ require 'spec_id/proph/pep_summary' # <- he requests the above...hmmm
16
17
  require 'spec_id'
17
18
  require 'optparse'
18
19
  require 'ostruct'
File without changes
data/bin/fasta_shaker.rb CHANGED
File without changes
File without changes
data/bin/gi2annot.rb CHANGED
File without changes
data/bin/id_class_anal.rb CHANGED
File without changes
data/bin/id_precision.rb CHANGED
File without changes
data/bin/ms_to_lmat.rb CHANGED
File without changes
File without changes
File without changes
File without changes
data/bin/raw_to_mzXML.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/ruby -w
2
2
 
3
3
  require 'optparse'
4
- require 'spec/mzxml'
4
+ require 'ms/converter/mzxml'
5
5
  require 'fileutils'
6
6
 
7
7
  progname = File.basename(__FILE__)
@@ -21,11 +21,11 @@ if ARGV.size == 0
21
21
  exit
22
22
  end
23
23
 
24
- converter = MS::MzXML.find_mzxml_converter
24
+ converter = MS::Converter::MzXML.find_mzxml_converter
25
25
  if converter
26
26
  $stderr.puts "using #{converter} to convert files"
27
27
  else
28
- puts "cannot find [#{MS::MzXML::Potential_mzxml_converters.join(', ')}] in the paths:"
28
+ puts "cannot find [#{MS::Converter::MzXML::Potential_mzxml_converters.join(', ')}] in the paths:"
29
29
  puts ENV['PATH'].split(/[:;]/).join(", ")
30
30
  abort
31
31
  end
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/ruby
2
+
3
+ perc_cmd = 'percolator'
4
+
5
+ require 'optparse'
6
+ require 'spec_id/srf'
7
+
8
+ # percolator_v1.02_32bit_linux -o reverse_meta.sqm normal_NOCYS/meta.sqm reverse_NOCYS/meta.sqm
9
+ # percolator_v1.02_32bit_linux -o reverse_cat_meta.sqm -P INV_ reverse_cat_NOCYS/meta.sqm &
10
+
11
+ file_hash = {
12
+ :srg => "bioworks.srg",
13
+ :sqg_in => "bioworks.sqg",
14
+ :sqg_decoy => "decoy.sqg",
15
+ :perc_out => "perc.sqg",
16
+ :perc_stdout => "perc.stdout",
17
+ :perc_stderr => "perc.stderr",
18
+ :perc_ext => ".psqt",
19
+ }
20
+ (default_srg, sqg_in, perc_out, sqg_decoy, perc_stdout, perc_stderr, perc_ext) = file_hash.values_at(:srg, :sqg_in, :perc_out, :sqg_decoy, :perc_stdout, :perc_stderr, :perc_ext)
21
+
22
+ opt = {}
23
+ toclean = []
24
+ opts = OptionParser.new do |op|
25
+ op.banner = "usage: #{File.basename(__FILE__)} -d PATTERN <file>.srf ..."
26
+ op.separator " #{File.basename(__FILE__)} -d PATTERN <file>.srg"
27
+ op.separator " #{File.basename(__FILE__)} <normal>.srg <decoy>.srg"
28
+ op.separator ""
29
+ op.separator " creates necessary meta files in current working directory and"
30
+ op.separator " runs command '#{perc_cmd}'"
31
+ op.separator ""
32
+ op.separator " (all in current working directory)"
33
+ op.separator " 1) (if given .srf files) creates file: #{default_srg}"
34
+ op.separator " 2) creates .sqt file for each srf file (placed in dir with srf file)"
35
+ op.separator " 3) creates percolator (meta) input file(s): #{sqg_in}"
36
+ op.separator " [and for separate searches: #{sqg_decoy}]"
37
+ op.separator " 4) creates a percolator (meta) output file: #{perc_out}"
38
+ op.separator " 5) runs percolator which creates a a #{perc_ext} for each .srf file"
39
+ op.separator " 6) captures stdout in #{perc_stdout} and stderr in #{perc_stderr}"
40
+ op.separator ""
41
+ op.separator " .srg files are text files with full paths to .srf files"
42
+ op.separator " create with command 'srf_group.rb'"
43
+ op.separator ""
44
+ op.on("-d", "--decoy <pattern>", "decoy pattern, eg.: -d REVERSE_") {|v| opt[:decoy] = v }
45
+ op.on("-c", "--clean", "removes ALL generated files except #{perc_ext}") {|v| opt[:clean] = v }
46
+ op.on("-v", "--verbose", "spits out info") {|v| $VERBOSE = v }
47
+ end
48
+ opts.parse!
49
+
50
+ if ARGV.size == 0 or (!opt[:decoy] && (ARGV.size != 2))
51
+ puts opts.to_s
52
+ exit
53
+ end
54
+
55
+ #raise RunTimeError, "command #{perc_cmd} must be callable!" unless `#{perc_cmd}`.match(/Usage/)
56
+
57
+ files = ARGV.to_a
58
+
59
+ # create srg file:
60
+ srg_files =
61
+ if files[0] =~ /\.srf$/i
62
+ obj = SRFGroup.new
63
+ obj.filenames = files.to_a
64
+ puts("CREATING: #{default_srg}") if $VERBOSE
65
+ obj.to_srg(default_srg)
66
+ toclean << default_srg
67
+ [default_srg]
68
+ elsif files[0] =~ /\.srg$/i
69
+ files
70
+ else
71
+ abort "files must have proper extensions"
72
+ end
73
+
74
+ # create the sqt files:
75
+ all_sqt_filenames = srg_files.map do |srg_file|
76
+ srf_filenames = SRFGroup.srg_to_paths(srg_file)
77
+ srf_filenames.map do |file|
78
+ new_filename = file.sub(/\.srf$/i, '.sqt')
79
+ puts("CREATING: #{new_filename}") if $VERBOSE
80
+ SRFGroup.new([file], opt['filter']).srfs.first.to_sqt(new_filename)
81
+ toclean << new_filename
82
+ new_filename
83
+ end
84
+ end
85
+
86
+ # create the percolator input file:
87
+ all_sqt_filenames.zip(file_hash.values_at(:sqg_in, :sqg_decoy)) do |sqt_filenames,filename|
88
+ puts("CREATING: #{filename}") if $VERBOSE
89
+ File.open(filename, 'w') {|fh| fh.puts(sqt_filenames.join("\n")) }
90
+ toclean << filename
91
+ end
92
+
93
+ # create the percolator output file:
94
+ psqt_filenames = all_sqt_filenames[0].map do |file|
95
+ file.sub(/\.sqt$/, perc_ext)
96
+ end
97
+
98
+ puts("CREATING: #{perc_out}") if $VERBOSE
99
+ File.open(perc_out, 'w') {|fh| fh.puts(psqt_filenames.join("\n")) }
100
+ toclean << perc_out
101
+
102
+ # run percolator
103
+ to_run =
104
+ if opt[:decoy]
105
+ "#{perc_cmd} -o #{perc_out} -P #{opt[:decoy]} #{sqg_in} 1>#{perc_stdout} 2>#{perc_stderr}"
106
+ else
107
+ "#{perc_cmd} -o #{perc_out} #{sqg_in} #{sqg_decoy} 1>#{perc_stdout} 2>#{perc_stderr}"
108
+ end
109
+
110
+ puts("RUNNING: #{to_run}") if $VERBOSE
111
+ `#{to_run}`
112
+
113
+ toclean << perc_stdout
114
+ toclean << perc_stderr
115
+
116
+ if opt[:clean]
117
+ toclean.each do |file|
118
+ puts("REMOVING: #{file}") if $VERBOSE
119
+ File.unlink(file) if File.exist?(file)
120
+ end
121
+ end
122
+
data/bin/sqt_group.rb CHANGED
File without changes
data/bin/srf_group.rb CHANGED
File without changes
data/changelog.txt CHANGED
@@ -162,3 +162,32 @@ sample_enzyme)
162
162
  ## version 0.3.9
163
163
 
164
164
  1. doesn't clobber mzdata filename in ms_to_lmat.rb conversion
165
+
166
+ ## version 0.3.10
167
+
168
+ 1. added run_percolator.rb script which makes running multiple files easy
169
+
170
+ ## version 0.3.11
171
+
172
+ 1. faster sensing of bad scan tags in mzXML v. 2.0 files
173
+ 2. implemented lazy evaluation of spectrum in 2 different ways allowing much
174
+ larger files to be parsed
175
+
176
+ ## version 0.4.0
177
+
178
+ 1. ** INTERFACE CHANGE: each scan can only have one precursor (used to be an array)
179
+ 2. ** INTERFACE CHANGE: spectrum mz and intensity data accessed with mzs and intensities
180
+ 3. lazy eval working on mzData
181
+ 4. mzData not necessarily guaranteed to have precursor intensities on lazy
182
+ eval methos (however, the method intensity_at_mz will still work (causing
183
+ evaluation))
184
+
185
+ ## version 0.4.1
186
+
187
+ 1. added support for reading mzXML version 3.0 (may fail in some cases)
188
+
189
+ ## version 0.4.2
190
+
191
+ 1. added MS::MSRun.open method
192
+ 2. added method to write dta files from SRF
193
+
@@ -1,4 +1,3 @@
1
- require 'array_class'
2
1
 
3
2
  # This is modeled after the Thermo gradient
4
3
  class GradientProgram
data/lib/ms/msrun.rb CHANGED
@@ -9,6 +9,8 @@ require 'ms/converter/mzxml'
9
9
 
10
10
  module MS; end
11
11
  class MS::MSRun
12
+
13
+ MSRunDefaultOpts = { :lazy => :string }
12
14
 
13
15
  attr_accessor :start_time, :end_time
14
16
  attr_accessor :scans
@@ -19,24 +21,56 @@ class MS::MSRun
19
21
  # the total number of scans
20
22
  attr_writer :scan_count
21
23
 
22
- # should be able to read basic information from a variety of files
23
- # this will be written in regexp's because REXML is way too slow, xmlparser
24
- # is not guaranteed to be on every system, xmlib is not on win32.
25
- # spectra is false, then spectra are not parsed out and included
24
+
25
+ #### # [note: precursor intensities not guaranteed to exist unless :
26
+ # TODO: may need to eliminate unavailable precursor intensities if they
27
+ # doing lazy evaluation?? or it becomes lazy too??
28
+
26
29
  # OPTIONS:
27
- # :spectra => *true|false # whether to parse out spectra
28
- # [note: precursor intensities not guaranteed to exist unless :spectra == true]
29
- def initialize(file=nil, opts={})
30
- myopts = opts.dup ; myopts[:msrun] = self
31
- if file
32
- filetype_and_version = MS::Parser.filetype_and_version(file)
33
- parser = MS::Parser.new(filetype_and_version, :msrun)
34
- parser.parse(file, myopts)
30
+ # :lazy => :string | :not | :no_spectra | :io
31
+ # :string = (default) stores each spectrum as a base64 decoded
32
+ # string that is further processed into Arrays of Floats when m/z
33
+ # or intensity information is access. This lazy evaluation
34
+ # should work on most files.
35
+ # :not = all information is read into memory and parsed into
36
+ # objects. Should only be used for small-medium files (< 80MB on
37
+ # a machine with 2GB memory)
38
+ # :no_spectra = if no peak information is required use this to
39
+ # avoid the overhead of parsing and creating spectra.
40
+ # :io = stores the io object and indices into spectrum data.
41
+ # When spectral information is requested (m/z or intensity
42
+ # information) then the spectrum is read from the io object and
43
+ # evaluated (requires an open io object when spectrum information
44
+ # is requested)
45
+ def initialize(file_or_io=nil, opts={})
46
+ if opts[:lazy] == :io
47
+ if !file_or_io.is_a?(IO)
48
+ raise ArgumentError, "Caller must provide an IO object (rather than filename) if using {:lazy => :io}"
49
+ end
50
+ end
51
+ myopts = MSRunDefaultOpts.merge(opts)
52
+ myopts[:msrun] = self
53
+ if file_or_io
54
+ filetype_and_version = MS::Parser.filetype_and_version(file_or_io)
55
+ parser = MS::Parser.new(filetype_and_version, :msrun, myopts)
56
+ parser.parse(file_or_io, myopts)
35
57
  #MS::Parser.new(filetype_and_version, :msrun).parse(file, myopts)
36
58
  (@filetype, @version) = filetype_and_version
37
59
  end
38
60
  end
39
61
 
62
+ # This will automatically use :lazy => :io, open the file, and close it
63
+ # after the block returns.
64
+ # MS::MSRun.open("file.mzXML") do |ms|
65
+ # ms.scans.each {|scan| ... do something }
66
+ # end
67
+ def self.open(filename, opts={})
68
+ File.open(filename) do |fh|
69
+ ms = MS::MSRun.new(fh, {:lazy => :io}.merge(opts))
70
+ yield(ms)
71
+ end
72
+ end
73
+
40
74
  # returns an array, whose indices provide the number of scans in each index level the ms_levels, [0] = all the scans, [1] = mslevel 1, [2] = mslevel 2,
41
75
  # ...
42
76
  def scan_counts
@@ -95,9 +129,9 @@ class MS::MSRun
95
129
  lo_mz = sc.start_mz
96
130
  hi_mz = sc.end_mz
97
131
  else
98
- mz = sc.spectrum.mz
99
- hi_mz = mz.last
100
- lo_mz = mz.first
132
+ mz_ar = sc.spectrum.mzs
133
+ hi_mz = mz_ar.last
134
+ lo_mz = mz_ar.first
101
135
  end
102
136
  break
103
137
  end
@@ -117,12 +151,12 @@ class MS::MSRun
117
151
  # didn't have the attributes (find by brute force)
118
152
  scans.each do |sc|
119
153
  if sc.ms_level == mslevel
120
- mz = sc.spectrum.mz
121
- if mz.last > hi_mz
122
- hi_mz = mz.last
154
+ mz_ar = sc.spectrum.mzs
155
+ if mz_ar.last > hi_mz
156
+ hi_mz = mz_ar.last
123
157
  end
124
- if mz.last < lo_mz
125
- lo_mz = mz.last
158
+ if mz_ar.last < lo_mz
159
+ lo_mz = mz_ar.last
126
160
  end
127
161
  end
128
162
  end
@@ -135,7 +169,7 @@ class MS::MSRun
135
169
  def precursor_mz_by_scan_num
136
170
  ar = Array.new(@scans.size + 1)
137
171
  @scans.each do |scan|
138
- if prec = scan.precursors.first
172
+ if prec = scan.precursor
139
173
  ar[scan.num] = prec.mz
140
174
  else
141
175
  ar[scan.num] = nil
@@ -170,7 +204,7 @@ class MS::MSRun
170
204
  # same as the instance method (creates an object without spectrum and calls
171
205
  # instance method of the same name)
172
206
  def self.precursor_mz_by_scan_num(file)
173
- self.new(file, :spectra => false).precursor_mz_by_scan_num
207
+ self.new(file, :lazy => :no_spectra, :fix_bad_tags => true).precursor_mz_by_scan_num
174
208
  end
175
209
 
176
210
  # only adds the parent if one is not already present!
@@ -190,13 +224,12 @@ class MS::MSRun
190
224
  (prev_level - level).times do parent_stack.shift end
191
225
  end
192
226
  if scan.ms_level > 1
193
- scan.precursors.each do |precursor|
194
- #precursor.parent = parent_stack.first # that's the next line's
195
- precursor[2] = parent_stack.first unless precursor[2]
196
- #precursor.intensity
197
- if add_intensities
198
- precursor[1] = precursor[2].spectrum.intensity_at_mz(precursor[0])
199
- end
227
+ precursor = scan.precursor
228
+ #precursor.parent = parent_stack.first # that's the next line's
229
+ precursor[2] = parent_stack.first unless precursor[2]
230
+ #precursor.intensity
231
+ if add_intensities
232
+ precursor[1] = precursor[2].spectrum.intensity_at_mz(precursor[0])
200
233
  end
201
234
  end
202
235
  prev_level = level