mspire 0.3.9 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. data/INSTALL +24 -7
  2. data/README +15 -13
  3. data/README.rdoc +18 -0
  4. data/Rakefile +50 -14
  5. data/bin/aafreqs.rb +0 -0
  6. data/bin/bioworks2excel.rb +0 -0
  7. data/bin/bioworks_to_pepxml.rb +2 -1
  8. data/bin/bioworks_to_pepxml_gui.rb +0 -0
  9. data/bin/fasta_shaker.rb +0 -0
  10. data/bin/filter_and_validate.rb +0 -0
  11. data/bin/gi2annot.rb +0 -0
  12. data/bin/id_class_anal.rb +0 -0
  13. data/bin/id_precision.rb +0 -0
  14. data/bin/ms_to_lmat.rb +0 -0
  15. data/bin/pepproph_filter.rb +0 -0
  16. data/bin/protein_summary.rb +0 -0
  17. data/bin/protxml2prots_peps.rb +0 -0
  18. data/bin/raw_to_mzXML.rb +3 -3
  19. data/bin/run_percolator.rb +122 -0
  20. data/bin/sqt_group.rb +0 -0
  21. data/bin/srf_group.rb +0 -0
  22. data/changelog.txt +29 -0
  23. data/lib/ms/gradient_program.rb +0 -1
  24. data/lib/ms/msrun.rb +62 -29
  25. data/lib/ms/parser/mzdata/axml.rb +55 -0
  26. data/lib/ms/parser/mzdata/dom.rb +51 -36
  27. data/lib/ms/parser/mzdata.rb +8 -2
  28. data/lib/ms/parser/mzxml/axml.rb +59 -0
  29. data/lib/ms/parser/mzxml/dom.rb +80 -57
  30. data/lib/ms/parser/mzxml/hpricot.rb +1 -1
  31. data/lib/ms/parser/mzxml/libxml.rb +6 -2
  32. data/lib/ms/parser/mzxml.rb +110 -3
  33. data/lib/ms/parser.rb +4 -4
  34. data/lib/ms/precursor.rb +19 -4
  35. data/lib/ms/scan.rb +7 -7
  36. data/lib/ms/spectrum.rb +249 -58
  37. data/lib/mspire.rb +1 -1
  38. data/lib/spec_id/bioworks.rb +2 -2
  39. data/lib/spec_id/precision/filter/cmdline.rb +8 -1
  40. data/lib/spec_id/precision/prob/cmdline.rb +2 -2
  41. data/lib/spec_id/precision/prob.rb +1 -0
  42. data/lib/spec_id/proph/pep_summary.rb +3 -4
  43. data/lib/spec_id/proph/prot_summary.rb +3 -3
  44. data/lib/spec_id/protein_summary.rb +1 -1
  45. data/lib/spec_id/sequest/pepxml.rb +5 -5
  46. data/lib/spec_id/sqt.rb +4 -4
  47. data/lib/spec_id/srf.rb +49 -8
  48. data/lib/spec_id.rb +5 -0
  49. data/lib/xml_style_parser.rb +16 -2
  50. data/script/compile_and_plot_smriti_final.rb +0 -0
  51. data/script/create_little_pepxml.rb +0 -0
  52. data/script/degenerate_peptides.rb +0 -0
  53. data/script/estimate_fpr_by_cysteine.rb +0 -0
  54. data/script/extract_gradient_programs.rb +1 -1
  55. data/script/find_cysteine_background.rb +0 -0
  56. data/script/genuine_tps_and_probs.rb +0 -0
  57. data/script/get_apex_values_rexml.rb +0 -0
  58. data/script/mascot_fix_pepxml.rb +123 -0
  59. data/script/msvis.rb +0 -0
  60. data/script/mzXML2timeIndex.rb +0 -0
  61. data/script/peps_per_bin.rb +0 -0
  62. data/script/prep_dir.rb +0 -0
  63. data/script/simple_protein_digestion.rb +0 -0
  64. data/script/smriti_final_analysis.rb +0 -0
  65. data/script/sqt_to_meta.rb +0 -0
  66. data/script/top_hit_per_scan.rb +0 -0
  67. data/script/toppred_to_yaml.rb +0 -0
  68. data/script/tpp_installer.rb +0 -0
  69. data/specs/bin/prob_validate_spec.rb +5 -2
  70. data/specs/bin/protein_summary_spec.rb +5 -1
  71. data/specs/ms/msrun_spec.rb +176 -133
  72. data/specs/ms/parser_spec.rb +3 -3
  73. data/specs/ms/spectrum_spec.rb +0 -2
  74. data/specs/spec_id/precision/filter_spec.rb +4 -1
  75. data/specs/spec_id/precision/prob_spec.rb +2 -2
  76. data/specs/spec_id/sequest/pepxml_spec.rb +1 -1
  77. data/specs/spec_id/sqt_spec.rb +5 -5
  78. data/specs/spec_id/srf_spec.rb +56 -93
  79. data/specs/spec_id/srf_spec_helper.rb +121 -284
  80. data/specs/spec_id_spec.rb +3 -0
  81. data/specs/transmem/toppred_spec.rb +1 -0
  82. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +683 -0
  83. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +382 -0
  84. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +683 -0
  85. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +382 -0
  86. data/test_files/opd1_2runs_2mods/data/README.txt +6 -0
  87. metadata +247 -229
data/INSTALL CHANGED
@@ -4,24 +4,35 @@ Prerequisites
4
4
 
5
5
  Much of the package will work without any prerequisites at all. Some functionality may require addition ruby packages or other converters. These are listed in current order of importance:
6
6
 
7
+ * libjtp - generic library installed automatically if you install mspire with rubygems (or 'gem install libjtp')
7
8
  * [xmlparser](http://www.yoshidam.net/Ruby.html) (comes with one-click Windows; on Ubuntu: 'sudo apt-get libxml-parser-ruby1.8')
8
- * [libxml](http://libxml.rubyforge.org/) in Ubuntu: sudo apt-get install libxml2 libxml2-dev ; sudo gem install libxml-ruby --remote
9
- * ['t2x'](http://sashimi.sourceforge.net/software_glossolalia.html#ReAdW) to convert .RAW files to version 1 mzXML files
10
- * [gnuplot](http://rgplot.rubyforge.org/) ('gem install gnuplot'). Of course, you'll need [gnuplot](http://www.gnuplot.info/) before this package will work. Under one-click installer for windows this package requires a little configuration. It works with no configuration on cygwin (or linux).
9
+ * [axml](http://axml.rubyforge.org/) dom wrapper for xmlparser. ('gem install axml')
10
+ * ['t2x'](archive/t2x) linux executable to convert .RAW files (Xcalibur 1.x) to version 1 mzXML files
11
+
12
+ Optional:
13
+ * [libxml](http://libxml.rubyforge.org/) can use instead of xmlparser. In Ubuntu: sudo apt-get install libxml2 libxml2-dev ; sudo gem install libxml-ruby --remote
14
+ * [gnuplot](http://rgplot.rubyforge.org/) ('gem install gnuplot'). For some plotting. Of course, you'll need [gnuplot](http://www.gnuplot.info/) before this package will work. Under one-click installer for windows this package requires a little configuration. It works with no configuration on cygwin (or linux).
11
15
 
12
16
  Installation
13
17
  ------------
14
18
 
15
19
  gem install mspire
16
20
 
17
- or
18
-
19
- gem install -t mspire (to run tests)
21
+ See [installation under cygwin](cygwin.html) if you're on Windows.
20
22
 
21
23
  Development
22
24
  -----------
23
25
 
24
- prereq: [rake](http://rake.rubyforge.org/) (with rubygems: 'gem install rake --remote')
26
+ anonymous svn checkout:
27
+
28
+ svn checkout svn://rubyforge.org/var/svn/mspire
29
+
30
+ prerequisites:
31
+
32
+ * [rake](http://rake.rubyforge.org/) (with rubygems: 'gem install rake --remote')
33
+ * [rspec](http://rspec.info/) (with rubygems: 'gem install rspec --remote')
34
+
35
+ Use rake:
25
36
 
26
37
  % rake -T
27
38
  rake clean # Remove any temporary products.
@@ -32,3 +43,9 @@ prereq: [rake](http://rake.rubyforge.org/) (with rubygems: 'gem install rake --r
32
43
  rake upload_docs # create and upload docs to server
33
44
  ...etc...
34
45
 
46
+ run tests: rake spec
47
+ (or): rake specl
48
+
49
+ run tests with large files: rake spec SPEC_LARGE=t
50
+
51
+ run test on one file: rake spec SPEC=specs/{path_to_spec_file}
data/README CHANGED
@@ -1,5 +1,3 @@
1
- mspire
2
- ======
3
1
 
4
2
  mspire - 'Mass Spectrometry Proteomics in Ruby' is a collection of tools for
5
3
  working with MS proteomics data in ruby. It seeks to provide support for open
@@ -12,7 +10,7 @@ Current Focus
12
10
 
13
11
  The project is currently focusing on the following:
14
12
 
15
- * SEQUEST data (particularly the output of Bioworks 3.2)
13
+ * SEQUEST data (particularly the output of Bioworks 3.2-3.3)
16
14
  * mzXML
17
15
  * mzData
18
16
  * ProteinProphet
@@ -21,15 +19,16 @@ The project is currently focusing on the following:
21
19
  Features
22
20
  --------
23
21
 
24
- * mzXML (version 1 & 2) parsing
22
+ * mzXML (version 1, 2, and 3) parsing
25
23
  * mzData parsing
26
24
  * bioworks .srf (binary files) reader
25
+ * read/write .sqt files
27
26
  * bioworks to PeptideProphet input (pepXML files)
28
27
  * lightweight APEX values parser
29
28
  * histogram protein probabilities
30
- * developed for Linux, should port easily to Windows or others
31
29
  * protein summary views with custom false ID cutoff values
32
30
  * conversion to OBI-Warp input files
31
+ * portable: works across platforms
33
32
 
34
33
  Validation by:
35
34
  * Various Decoy Database search options: Reverse/Shuffle, concatenated/separate, with various hashing options (e.g., by amino acid sequence + charge)
@@ -38,10 +37,12 @@ Validation by:
38
37
  * Generic sample bias (e.g., low abundance/high abundance proteins)
39
38
  * Defined sample
40
39
 
41
- Working with:
42
- * Bioworks (3.2-3.3.1)
43
- * Peptide/Protein Prophet
44
- * Easily extensible to others
40
+ Spectra and Spectra Identification
41
+ ----------------------------------
42
+
43
+ The [MS](ms/index.html) namespace contains objects for working with mass spectra and associated file formats.
44
+
45
+ The [SpecID](spec_id/index.html) namespace contains objects for working with spectral identifications.
45
46
 
46
47
  Tutorials
47
48
  ---------
@@ -54,12 +55,13 @@ Warning
54
55
  -------
55
56
 
56
57
  This is an experimental package. As such, all versions prior to version 1.0
57
- will only loosely follow the rubygems versioning scheme: interfaces are
58
- subject to change without a major change in version number prior to version
59
- 1.0. Beyond version 1.0, the versioning scheme will be strictly adhered to.
58
+ may contain interface changes on minor revisions (major.minor.build) (e.g.,
59
+ 0.4.0 may contain interface change from 0.3.9). Beyond version 1.0, the
60
+ versioning scheme will be strictly adhered to (no interface changes except on
61
+ major revisions).
60
62
 
61
63
  Installation
62
64
  ------------
63
65
 
64
- see [Install](install.html)
66
+ see [Install](install/index.html)
65
67
 
data/README.rdoc ADDED
@@ -0,0 +1,18 @@
1
+
2
+ = mspire
3
+
4
+ mass spectrometry proteomics in ruby
5
+
6
+ Please refer to the latest Documentation[http://mspire.rubyforge.org]
7
+
8
+ Please see Installation[http://mspire.rubyforge.org/install/index.html]
9
+
10
+ == Data Models and Examples
11
+
12
+ Object models and usage examples are online:
13
+
14
+ [MS::MSRun] http://mspire.rubyforge.org/ms/msrun.html
15
+ [SpecID] http://mspire.rubyforge.org/spec_id/spec_id.html
16
+ [SRF] http://mspire.rubyforge.org/spec_id/srf.html
17
+ [False Identification Rate Determination] http://mspire.rubyforge.org/spec_id/fir/index.html
18
+ [OBI-Warp] http://mspire.rubyforge.org/ms/obiwarp.html
data/Rakefile CHANGED
@@ -17,7 +17,7 @@ $dependencies = %w(libjtp)
17
17
  $tfiles_large = 'test_files_large'
18
18
  changelog = "changelog.txt"
19
19
 
20
- core_files = FL["INSTALL", "README", "Rakefile", "LICENSE", changelog, "release_notes.txt", "{lib,bin,script,specs,tutorial,test_files}/**/*"]
20
+ core_files = FL["INSTALL", "README", "README.rdoc", "Rakefile", "LICENSE", changelog, "release_notes.txt", "{lib,bin,script,specs,tutorial,test_files}/**/*"]
21
21
  big_dist_files = core_files + FL["test_files_large/**/*"]
22
22
 
23
23
  dist_files = core_files
@@ -43,7 +43,7 @@ def move_and_add_webgen_header(file, newfile, src_dir, heading)
43
43
  string = IO.read file
44
44
  with_header = heading + string
45
45
  File.open(newfile, 'w') {|v| v.print with_header }
46
- FileUtils.mv newfile, src_dir
46
+ FileUtils.mv newfile, src_dir, :force => true
47
47
  end
48
48
 
49
49
  desc "copy top level files into doc/src"
@@ -55,27 +55,40 @@ directoryName: mspire
55
55
  ---\n"
56
56
  src = "doc/src"
57
57
  move_and_add_webgen_header('README', 'index.page', src, string.sub('TITLE', 'Home'))
58
- move_and_add_webgen_header('INSTALL', 'install.page', src, string.sub('TITLE', 'Install'))
58
+ move_and_add_webgen_header('INSTALL', 'index.page', src + '/install', string.sub('TITLE', 'Install').sub('mspire', 'Install').sub("inMenu: true\n", ''))
59
59
  end
60
60
 
61
- desc "create and upload docs to server"
62
- task :upload_docs => :html_docs do
61
+ desc "upload docs (doc/output) to server"
62
+ task :upload_docs do
63
63
  sh "scp -i ~/.ssh/rubyforge_key -r doc/output/* jtprince@rubyforge.org:/var/www/gforge-projects/mspire/"
64
64
  end
65
65
 
66
+ # best to use webgen 0.3.8 right now
67
+ # to get working (may not require all these steps):
68
+ # gem install RedCloth
69
+ # gem install BlueCloth
70
+ # soft link the bluecloth binary into path
66
71
  desc "creates docs in doc/html"
67
72
  task :html_docs => [:cp_top_level_docs] do
68
73
  FileUtils.cd 'doc' do
69
74
  sh "webgen"
70
75
  end
76
+ FileUtils.cp 'doc/src/archive/t2x', 'doc/output/archive/t2x'
71
77
  end
72
78
 
73
- rdoc_options = ['--main', 'README', '--title', NAME]
74
- rdoc_extra_includes = ["README", "INSTALL", "LICENSE"]
79
+ desc "does html_docs and rdoc and puts rdoc inside html_docs"
80
+ task :all_docs => [:html_docs, :rdoc] do
81
+ FileUtils.mv 'html', 'doc/output/rdoc'
82
+ end
83
+
84
+ #rdoc_options = ['--main', 'README', '--title', NAME]
85
+ rdoc_options = ['--main', 'README.rdoc', '--title', NAME]
86
+ #rdoc_extra_includes = ["README", "INSTALL", "LICENSE"]
87
+ rdoc_extra_includes = ['README.rdoc']
75
88
 
76
89
  Rake::RDocTask.new do |rd|
77
- rd.main = "README"
78
- rd.rdoc_files.include rdoc_extra_includes
90
+ rd.main = "README.rdoc"
91
+ rd.rdoc_files.include("lib/**/*.rb", *rdoc_extra_includes )
79
92
  rd.options.push( *rdoc_options )
80
93
  end
81
94
 
@@ -124,7 +137,12 @@ Spec::Rake::SpecTask.new('spec') do |t|
124
137
  Rake::Task[:ensure_gem_is_uninstalled].invoke
125
138
  Rake::Task[:ensure_dependencies].invoke
126
139
  Rake::Task[:ensure_large_testfiles].invoke
127
- t.libs = ['lib']
140
+ t.libs =
141
+ if !ENV['LIB'].nil?
142
+ [ENV['LIB']]
143
+ else
144
+ ['lib']
145
+ end
128
146
  #t.ruby_opts = ['-I', 'lib']
129
147
  t.spec_files = FileList['specs/**/*_spec.rb']
130
148
  end
@@ -135,7 +153,13 @@ Spec::Rake::SpecTask.new('specl') do |t|
135
153
  Rake::Task[:ensure_dependencies].invoke
136
154
  Rake::Task[:ensure_large_testfiles].invoke
137
155
  t.spec_files = FileList['specs/**/*_spec.rb']
138
- t.libs = ['lib']
156
+ t.libs =
157
+ if !ENV['LIB'].nil?
158
+ [ENV['LIB']]
159
+ else
160
+ ['lib']
161
+ end
162
+ #t.libs = ['lib']
139
163
  #t.ruby_opts = ['-I', 'lib']
140
164
  t.spec_opts = ['--format', 'specdoc' ]
141
165
  end
@@ -147,7 +171,12 @@ Spec::Rake::SpecTask.new('rcov') do |t|
147
171
  Rake::Task[:ensure_large_testfiles].invoke
148
172
  t.spec_files = FileList['specs/**/*_spec.rb']
149
173
  t.rcov = true
150
- t.libs = ['lib']
174
+ t.libs =
175
+ if !ENV['LIB'].nil?
176
+ [ENV['LIB']]
177
+ else
178
+ ['lib']
179
+ end
151
180
  #t.ruby_opts = ['-I', 'lib']
152
181
  t.rcov_opts = ['--exclude', 'specs']
153
182
  end
@@ -160,9 +189,15 @@ task :speci => [:ensure_gem_is_uninstalled, :ensure_dependencies, :ensure_large_
160
189
  file.include?(ENV['SPECM'])
161
190
  end
162
191
  end
192
+ lib =
193
+ if !ENV['LIB'].nil?
194
+ ENV['LIB']
195
+ else
196
+ 'lib'
197
+ end
163
198
  files_to_run.each do |spc|
164
199
  puts "------ SPEC=#{spc} ------"
165
- system "ruby -I lib -S spec #{spc} --format specdoc"
200
+ system "ruby -I #{lib} -S spec #{spc} --format specdoc"
166
201
  end
167
202
  end
168
203
 
@@ -238,8 +273,9 @@ spec = Gem::Specification.new do |s|
238
273
  s.rdoc_options = rdoc_options
239
274
  s.extra_rdoc_files = rdoc_extra_includes
240
275
  s.executables = FL["bin/*"].map {|file| File.basename(file) }
241
- s.add_dependency('libjtp', '~> 0.2.13')
276
+ s.add_dependency('libjtp', '~> 0.2.14')
242
277
  s.add_dependency('axml', '~> 0.0.0')
278
+ s.add_dependency('arrayclass', '~> 0.1.0')
243
279
  s.requirements << '"libxml" is the prefered xml parser right now. libxml, xmlparser, REXML and regular expressions are used as fallback in some routines.'
244
280
  s.requirements << 'some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)'
245
281
  s.requirements << 'the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications'
data/bin/aafreqs.rb CHANGED
File without changes
File without changes
@@ -12,7 +12,8 @@ DEFAULT_MS_MODEL = 'LCQ'
12
12
  DEFAULT_MASS_ANALYZER = 'Ion Trap'
13
13
  ##############################################################
14
14
 
15
- require 'spec_id/sequest/pepxml'
15
+ #require 'spec_id/sequest/pepxml' # dies of this guy is called (why???)
16
+ require 'spec_id/proph/pep_summary' # <- he requests the above...hmmm
16
17
  require 'spec_id'
17
18
  require 'optparse'
18
19
  require 'ostruct'
File without changes
data/bin/fasta_shaker.rb CHANGED
File without changes
File without changes
data/bin/gi2annot.rb CHANGED
File without changes
data/bin/id_class_anal.rb CHANGED
File without changes
data/bin/id_precision.rb CHANGED
File without changes
data/bin/ms_to_lmat.rb CHANGED
File without changes
File without changes
File without changes
File without changes
data/bin/raw_to_mzXML.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/ruby -w
2
2
 
3
3
  require 'optparse'
4
- require 'spec/mzxml'
4
+ require 'ms/converter/mzxml'
5
5
  require 'fileutils'
6
6
 
7
7
  progname = File.basename(__FILE__)
@@ -21,11 +21,11 @@ if ARGV.size == 0
21
21
  exit
22
22
  end
23
23
 
24
- converter = MS::MzXML.find_mzxml_converter
24
+ converter = MS::Converter::MzXML.find_mzxml_converter
25
25
  if converter
26
26
  $stderr.puts "using #{converter} to convert files"
27
27
  else
28
- puts "cannot find [#{MS::MzXML::Potential_mzxml_converters.join(', ')}] in the paths:"
28
+ puts "cannot find [#{MS::Converter::MzXML::Potential_mzxml_converters.join(', ')}] in the paths:"
29
29
  puts ENV['PATH'].split(/[:;]/).join(", ")
30
30
  abort
31
31
  end
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/ruby
2
+
3
+ perc_cmd = 'percolator'
4
+
5
+ require 'optparse'
6
+ require 'spec_id/srf'
7
+
8
+ # percolator_v1.02_32bit_linux -o reverse_meta.sqm normal_NOCYS/meta.sqm reverse_NOCYS/meta.sqm
9
+ # percolator_v1.02_32bit_linux -o reverse_cat_meta.sqm -P INV_ reverse_cat_NOCYS/meta.sqm &
10
+
11
+ file_hash = {
12
+ :srg => "bioworks.srg",
13
+ :sqg_in => "bioworks.sqg",
14
+ :sqg_decoy => "decoy.sqg",
15
+ :perc_out => "perc.sqg",
16
+ :perc_stdout => "perc.stdout",
17
+ :perc_stderr => "perc.stderr",
18
+ :perc_ext => ".psqt",
19
+ }
20
+ (default_srg, sqg_in, perc_out, sqg_decoy, perc_stdout, perc_stderr, perc_ext) = file_hash.values_at(:srg, :sqg_in, :perc_out, :sqg_decoy, :perc_stdout, :perc_stderr, :perc_ext)
21
+
22
+ opt = {}
23
+ toclean = []
24
+ opts = OptionParser.new do |op|
25
+ op.banner = "usage: #{File.basename(__FILE__)} -d PATTERN <file>.srf ..."
26
+ op.separator " #{File.basename(__FILE__)} -d PATTERN <file>.srg"
27
+ op.separator " #{File.basename(__FILE__)} <normal>.srg <decoy>.srg"
28
+ op.separator ""
29
+ op.separator " creates necessary meta files in current working directory and"
30
+ op.separator " runs command '#{perc_cmd}'"
31
+ op.separator ""
32
+ op.separator " (all in current working directory)"
33
+ op.separator " 1) (if given .srf files) creates file: #{default_srg}"
34
+ op.separator " 2) creates .sqt file for each srf file (placed in dir with srf file)"
35
+ op.separator " 3) creates percolator (meta) input file(s): #{sqg_in}"
36
+ op.separator " [and for separate searches: #{sqg_decoy}]"
37
+ op.separator " 4) creates a percolator (meta) output file: #{perc_out}"
38
+ op.separator " 5) runs percolator which creates a a #{perc_ext} for each .srf file"
39
+ op.separator " 6) captures stdout in #{perc_stdout} and stderr in #{perc_stderr}"
40
+ op.separator ""
41
+ op.separator " .srg files are text files with full paths to .srf files"
42
+ op.separator " create with command 'srf_group.rb'"
43
+ op.separator ""
44
+ op.on("-d", "--decoy <pattern>", "decoy pattern, eg.: -d REVERSE_") {|v| opt[:decoy] = v }
45
+ op.on("-c", "--clean", "removes ALL generated files except #{perc_ext}") {|v| opt[:clean] = v }
46
+ op.on("-v", "--verbose", "spits out info") {|v| $VERBOSE = v }
47
+ end
48
+ opts.parse!
49
+
50
+ if ARGV.size == 0 or (!opt[:decoy] && (ARGV.size != 2))
51
+ puts opts.to_s
52
+ exit
53
+ end
54
+
55
+ #raise RunTimeError, "command #{perc_cmd} must be callable!" unless `#{perc_cmd}`.match(/Usage/)
56
+
57
+ files = ARGV.to_a
58
+
59
+ # create srg file:
60
+ srg_files =
61
+ if files[0] =~ /\.srf$/i
62
+ obj = SRFGroup.new
63
+ obj.filenames = files.to_a
64
+ puts("CREATING: #{default_srg}") if $VERBOSE
65
+ obj.to_srg(default_srg)
66
+ toclean << default_srg
67
+ [default_srg]
68
+ elsif files[0] =~ /\.srg$/i
69
+ files
70
+ else
71
+ abort "files must have proper extensions"
72
+ end
73
+
74
+ # create the sqt files:
75
+ all_sqt_filenames = srg_files.map do |srg_file|
76
+ srf_filenames = SRFGroup.srg_to_paths(srg_file)
77
+ srf_filenames.map do |file|
78
+ new_filename = file.sub(/\.srf$/i, '.sqt')
79
+ puts("CREATING: #{new_filename}") if $VERBOSE
80
+ SRFGroup.new([file], opt['filter']).srfs.first.to_sqt(new_filename)
81
+ toclean << new_filename
82
+ new_filename
83
+ end
84
+ end
85
+
86
+ # create the percolator input file:
87
+ all_sqt_filenames.zip(file_hash.values_at(:sqg_in, :sqg_decoy)) do |sqt_filenames,filename|
88
+ puts("CREATING: #{filename}") if $VERBOSE
89
+ File.open(filename, 'w') {|fh| fh.puts(sqt_filenames.join("\n")) }
90
+ toclean << filename
91
+ end
92
+
93
+ # create the percolator output file:
94
+ psqt_filenames = all_sqt_filenames[0].map do |file|
95
+ file.sub(/\.sqt$/, perc_ext)
96
+ end
97
+
98
+ puts("CREATING: #{perc_out}") if $VERBOSE
99
+ File.open(perc_out, 'w') {|fh| fh.puts(psqt_filenames.join("\n")) }
100
+ toclean << perc_out
101
+
102
+ # run percolator
103
+ to_run =
104
+ if opt[:decoy]
105
+ "#{perc_cmd} -o #{perc_out} -P #{opt[:decoy]} #{sqg_in} 1>#{perc_stdout} 2>#{perc_stderr}"
106
+ else
107
+ "#{perc_cmd} -o #{perc_out} #{sqg_in} #{sqg_decoy} 1>#{perc_stdout} 2>#{perc_stderr}"
108
+ end
109
+
110
+ puts("RUNNING: #{to_run}") if $VERBOSE
111
+ `#{to_run}`
112
+
113
+ toclean << perc_stdout
114
+ toclean << perc_stderr
115
+
116
+ if opt[:clean]
117
+ toclean.each do |file|
118
+ puts("REMOVING: #{file}") if $VERBOSE
119
+ File.unlink(file) if File.exist?(file)
120
+ end
121
+ end
122
+
data/bin/sqt_group.rb CHANGED
File without changes
data/bin/srf_group.rb CHANGED
File without changes
data/changelog.txt CHANGED
@@ -162,3 +162,32 @@ sample_enzyme)
162
162
  ## version 0.3.9
163
163
 
164
164
  1. doesn't clobber mzdata filename in ms_to_lmat.rb conversion
165
+
166
+ ## version 0.3.10
167
+
168
+ 1. added run_percolator.rb script which makes running multiple files easy
169
+
170
+ ## version 0.3.11
171
+
172
+ 1. faster sensing of bad scan tags in mzXML v. 2.0 files
173
+ 2. implemented lazy evaluation of spectrum in 2 different ways allowing much
174
+ larger files to be parsed
175
+
176
+ ## version 0.4.0
177
+
178
+ 1. ** INTERFACE CHANGE: each scan can only have one precursor (used to be an array)
179
+ 2. ** INTERFACE CHANGE: spectrum mz and intensity data accessed with mzs and intensities
180
+ 3. lazy eval working on mzData
181
+ 4. mzData not necessarily guaranteed to have precursor intensities on lazy
182
+ eval methos (however, the method intensity_at_mz will still work (causing
183
+ evaluation))
184
+
185
+ ## version 0.4.1
186
+
187
+ 1. added support for reading mzXML version 3.0 (may fail in some cases)
188
+
189
+ ## version 0.4.2
190
+
191
+ 1. added MS::MSRun.open method
192
+ 2. added method to write dta files from SRF
193
+
@@ -1,4 +1,3 @@
1
- require 'array_class'
2
1
 
3
2
  # This is modeled after the Thermo gradient
4
3
  class GradientProgram
data/lib/ms/msrun.rb CHANGED
@@ -9,6 +9,8 @@ require 'ms/converter/mzxml'
9
9
 
10
10
  module MS; end
11
11
  class MS::MSRun
12
+
13
+ MSRunDefaultOpts = { :lazy => :string }
12
14
 
13
15
  attr_accessor :start_time, :end_time
14
16
  attr_accessor :scans
@@ -19,24 +21,56 @@ class MS::MSRun
19
21
  # the total number of scans
20
22
  attr_writer :scan_count
21
23
 
22
- # should be able to read basic information from a variety of files
23
- # this will be written in regexp's because REXML is way too slow, xmlparser
24
- # is not guaranteed to be on every system, xmlib is not on win32.
25
- # spectra is false, then spectra are not parsed out and included
24
+
25
+ #### # [note: precursor intensities not guaranteed to exist unless :
26
+ # TODO: may need to eliminate unavailable precursor intensities if they
27
+ # doing lazy evaluation?? or it becomes lazy too??
28
+
26
29
  # OPTIONS:
27
- # :spectra => *true|false # whether to parse out spectra
28
- # [note: precursor intensities not guaranteed to exist unless :spectra == true]
29
- def initialize(file=nil, opts={})
30
- myopts = opts.dup ; myopts[:msrun] = self
31
- if file
32
- filetype_and_version = MS::Parser.filetype_and_version(file)
33
- parser = MS::Parser.new(filetype_and_version, :msrun)
34
- parser.parse(file, myopts)
30
+ # :lazy => :string | :not | :no_spectra | :io
31
+ # :string = (default) stores each spectrum as a base64 decoded
32
+ # string that is further processed into Arrays of Floats when m/z
33
+ # or intensity information is access. This lazy evaluation
34
+ # should work on most files.
35
+ # :not = all information is read into memory and parsed into
36
+ # objects. Should only be used for small-medium files (< 80MB on
37
+ # a machine with 2GB memory)
38
+ # :no_spectra = if no peak information is required use this to
39
+ # avoid the overhead of parsing and creating spectra.
40
+ # :io = stores the io object and indices into spectrum data.
41
+ # When spectral information is requested (m/z or intensity
42
+ # information) then the spectrum is read from the io object and
43
+ # evaluated (requires an open io object when spectrum information
44
+ # is requested)
45
+ def initialize(file_or_io=nil, opts={})
46
+ if opts[:lazy] == :io
47
+ if !file_or_io.is_a?(IO)
48
+ raise ArgumentError, "Caller must provide an IO object (rather than filename) if using {:lazy => :io}"
49
+ end
50
+ end
51
+ myopts = MSRunDefaultOpts.merge(opts)
52
+ myopts[:msrun] = self
53
+ if file_or_io
54
+ filetype_and_version = MS::Parser.filetype_and_version(file_or_io)
55
+ parser = MS::Parser.new(filetype_and_version, :msrun, myopts)
56
+ parser.parse(file_or_io, myopts)
35
57
  #MS::Parser.new(filetype_and_version, :msrun).parse(file, myopts)
36
58
  (@filetype, @version) = filetype_and_version
37
59
  end
38
60
  end
39
61
 
62
+ # This will automatically use :lazy => :io, open the file, and close it
63
+ # after the block returns.
64
+ # MS::MSRun.open("file.mzXML") do |ms|
65
+ # ms.scans.each {|scan| ... do something }
66
+ # end
67
+ def self.open(filename, opts={})
68
+ File.open(filename) do |fh|
69
+ ms = MS::MSRun.new(fh, {:lazy => :io}.merge(opts))
70
+ yield(ms)
71
+ end
72
+ end
73
+
40
74
  # returns an array, whose indices provide the number of scans in each index level the ms_levels, [0] = all the scans, [1] = mslevel 1, [2] = mslevel 2,
41
75
  # ...
42
76
  def scan_counts
@@ -95,9 +129,9 @@ class MS::MSRun
95
129
  lo_mz = sc.start_mz
96
130
  hi_mz = sc.end_mz
97
131
  else
98
- mz = sc.spectrum.mz
99
- hi_mz = mz.last
100
- lo_mz = mz.first
132
+ mz_ar = sc.spectrum.mzs
133
+ hi_mz = mz_ar.last
134
+ lo_mz = mz_ar.first
101
135
  end
102
136
  break
103
137
  end
@@ -117,12 +151,12 @@ class MS::MSRun
117
151
  # didn't have the attributes (find by brute force)
118
152
  scans.each do |sc|
119
153
  if sc.ms_level == mslevel
120
- mz = sc.spectrum.mz
121
- if mz.last > hi_mz
122
- hi_mz = mz.last
154
+ mz_ar = sc.spectrum.mzs
155
+ if mz_ar.last > hi_mz
156
+ hi_mz = mz_ar.last
123
157
  end
124
- if mz.last < lo_mz
125
- lo_mz = mz.last
158
+ if mz_ar.last < lo_mz
159
+ lo_mz = mz_ar.last
126
160
  end
127
161
  end
128
162
  end
@@ -135,7 +169,7 @@ class MS::MSRun
135
169
  def precursor_mz_by_scan_num
136
170
  ar = Array.new(@scans.size + 1)
137
171
  @scans.each do |scan|
138
- if prec = scan.precursors.first
172
+ if prec = scan.precursor
139
173
  ar[scan.num] = prec.mz
140
174
  else
141
175
  ar[scan.num] = nil
@@ -170,7 +204,7 @@ class MS::MSRun
170
204
  # same as the instance method (creates an object without spectrum and calls
171
205
  # instance method of the same name)
172
206
  def self.precursor_mz_by_scan_num(file)
173
- self.new(file, :spectra => false).precursor_mz_by_scan_num
207
+ self.new(file, :lazy => :no_spectra, :fix_bad_tags => true).precursor_mz_by_scan_num
174
208
  end
175
209
 
176
210
  # only adds the parent if one is not already present!
@@ -190,13 +224,12 @@ class MS::MSRun
190
224
  (prev_level - level).times do parent_stack.shift end
191
225
  end
192
226
  if scan.ms_level > 1
193
- scan.precursors.each do |precursor|
194
- #precursor.parent = parent_stack.first # that's the next line's
195
- precursor[2] = parent_stack.first unless precursor[2]
196
- #precursor.intensity
197
- if add_intensities
198
- precursor[1] = precursor[2].spectrum.intensity_at_mz(precursor[0])
199
- end
227
+ precursor = scan.precursor
228
+ #precursor.parent = parent_stack.first # that's the next line's
229
+ precursor[2] = parent_stack.first unless precursor[2]
230
+ #precursor.intensity
231
+ if add_intensities
232
+ precursor[1] = precursor[2].spectrum.intensity_at_mz(precursor[0])
200
233
  end
201
234
  end
202
235
  prev_level = level