RubyGems - mspire - Versions diffs - 0.3.9 → 0.4.2 - Mend

mspire 0.3.9 → 0.4.2

Files changed (87) hide show

data/INSTALL +24 -7
data/README +15 -13
data/README.rdoc +18 -0
data/Rakefile +50 -14
data/bin/aafreqs.rb +0 -0
data/bin/bioworks2excel.rb +0 -0
data/bin/bioworks_to_pepxml.rb +2 -1
data/bin/bioworks_to_pepxml_gui.rb +0 -0
data/bin/fasta_shaker.rb +0 -0
data/bin/filter_and_validate.rb +0 -0
data/bin/gi2annot.rb +0 -0
data/bin/id_class_anal.rb +0 -0
data/bin/id_precision.rb +0 -0
data/bin/ms_to_lmat.rb +0 -0
data/bin/pepproph_filter.rb +0 -0
data/bin/protein_summary.rb +0 -0
data/bin/protxml2prots_peps.rb +0 -0
data/bin/raw_to_mzXML.rb +3 -3
data/bin/run_percolator.rb +122 -0
data/bin/sqt_group.rb +0 -0
data/bin/srf_group.rb +0 -0
data/changelog.txt +29 -0
data/lib/ms/gradient_program.rb +0 -1
data/lib/ms/msrun.rb +62 -29
data/lib/ms/parser/mzdata/axml.rb +55 -0
data/lib/ms/parser/mzdata/dom.rb +51 -36
data/lib/ms/parser/mzdata.rb +8 -2
data/lib/ms/parser/mzxml/axml.rb +59 -0
data/lib/ms/parser/mzxml/dom.rb +80 -57
data/lib/ms/parser/mzxml/hpricot.rb +1 -1
data/lib/ms/parser/mzxml/libxml.rb +6 -2
data/lib/ms/parser/mzxml.rb +110 -3
data/lib/ms/parser.rb +4 -4
data/lib/ms/precursor.rb +19 -4
data/lib/ms/scan.rb +7 -7
data/lib/ms/spectrum.rb +249 -58
data/lib/mspire.rb +1 -1
data/lib/spec_id/bioworks.rb +2 -2
data/lib/spec_id/precision/filter/cmdline.rb +8 -1
data/lib/spec_id/precision/prob/cmdline.rb +2 -2
data/lib/spec_id/precision/prob.rb +1 -0
data/lib/spec_id/proph/pep_summary.rb +3 -4
data/lib/spec_id/proph/prot_summary.rb +3 -3
data/lib/spec_id/protein_summary.rb +1 -1
data/lib/spec_id/sequest/pepxml.rb +5 -5
data/lib/spec_id/sqt.rb +4 -4
data/lib/spec_id/srf.rb +49 -8
data/lib/spec_id.rb +5 -0
data/lib/xml_style_parser.rb +16 -2
data/script/compile_and_plot_smriti_final.rb +0 -0
data/script/create_little_pepxml.rb +0 -0
data/script/degenerate_peptides.rb +0 -0
data/script/estimate_fpr_by_cysteine.rb +0 -0
data/script/extract_gradient_programs.rb +1 -1
data/script/find_cysteine_background.rb +0 -0
data/script/genuine_tps_and_probs.rb +0 -0
data/script/get_apex_values_rexml.rb +0 -0
data/script/mascot_fix_pepxml.rb +123 -0
data/script/msvis.rb +0 -0
data/script/mzXML2timeIndex.rb +0 -0
data/script/peps_per_bin.rb +0 -0
data/script/prep_dir.rb +0 -0
data/script/simple_protein_digestion.rb +0 -0
data/script/smriti_final_analysis.rb +0 -0
data/script/sqt_to_meta.rb +0 -0
data/script/top_hit_per_scan.rb +0 -0
data/script/toppred_to_yaml.rb +0 -0
data/script/tpp_installer.rb +0 -0
data/specs/bin/prob_validate_spec.rb +5 -2
data/specs/bin/protein_summary_spec.rb +5 -1
data/specs/ms/msrun_spec.rb +176 -133
data/specs/ms/parser_spec.rb +3 -3
data/specs/ms/spectrum_spec.rb +0 -2
data/specs/spec_id/precision/filter_spec.rb +4 -1
data/specs/spec_id/precision/prob_spec.rb +2 -2
data/specs/spec_id/sequest/pepxml_spec.rb +1 -1
data/specs/spec_id/sqt_spec.rb +5 -5
data/specs/spec_id/srf_spec.rb +56 -93
data/specs/spec_id/srf_spec_helper.rb +121 -284
data/specs/spec_id_spec.rb +3 -0
data/specs/transmem/toppred_spec.rb +1 -0
data/test_files/opd1_2runs_2mods/data/020.mzData.xml +683 -0
data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +382 -0
data/test_files/opd1_2runs_2mods/data/040.mzData.xml +683 -0
data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +382 -0
data/test_files/opd1_2runs_2mods/data/README.txt +6 -0
metadata +247 -229

data/INSTALL CHANGED Viewed

@@ -4,24 +4,35 @@ Prerequisites
 Much of the package will work without any prerequisites at all.  Some functionality may require addition ruby packages or other converters.  These are listed in current order of importance:
+* libjtp - generic library installed automatically if you install mspire with rubygems (or 'gem install libjtp')
 * [xmlparser](http://www.yoshidam.net/Ruby.html) (comes with one-click Windows; on Ubuntu: 'sudo apt-get libxml-parser-ruby1.8')
-* [libxml](http://libxml.rubyforge.org/) in Ubuntu: sudo apt-get install libxml2 libxml2-dev ; sudo gem install libxml-ruby --remote
-* ['t2x'](http://sashimi.sourceforge.net/software_glossolalia.html#ReAdW) to convert .RAW files to version 1 mzXML files
-* [gnuplot](http://rgplot.rubyforge.org/) ('gem install gnuplot').  Of course, you'll need [gnuplot](http://www.gnuplot.info/) before this package will work.  Under one-click installer for windows this package requires a little configuration.  It works with no configuration on cygwin (or linux).
+* [axml](http://axml.rubyforge.org/) dom wrapper for xmlparser. ('gem install axml')
+* ['t2x'](archive/t2x) linux executable to convert .RAW files (Xcalibur 1.x) to version 1 mzXML files
+Optional:
+* [libxml](http://libxml.rubyforge.org/) can use instead of xmlparser.  In Ubuntu: sudo apt-get install libxml2 libxml2-dev ; sudo gem install libxml-ruby --remote
+* [gnuplot](http://rgplot.rubyforge.org/) ('gem install gnuplot').  For some plotting.  Of course, you'll need [gnuplot](http://www.gnuplot.info/) before this package will work.  Under one-click installer for windows this package requires a little configuration.  It works with no configuration on cygwin (or linux).
 Installation
 ------------
     gem install mspire
-or
-    gem install -t mspire (to run tests)
+See [installation under cygwin](cygwin.html) if you're on Windows.
 Development
 -----------
-prereq: [rake](http://rake.rubyforge.org/) (with rubygems: 'gem install rake --remote')
+anonymous svn checkout:
+    svn checkout svn://rubyforge.org/var/svn/mspire
+prerequisites:
+* [rake](http://rake.rubyforge.org/) (with rubygems: 'gem install rake --remote')
+* [rspec](http://rspec.info/) (with rubygems: 'gem install rspec --remote')
+Use rake:
     % rake -T
     rake clean              # Remove any temporary products.
@@ -32,3 +43,9 @@ prereq: [rake](http://rake.rubyforge.org/) (with rubygems: 'gem install rake --r
     rake upload_docs        # create and upload docs to server
     ...etc...
+    run tests: rake spec
+         (or): rake specl
+    run tests with large files: rake spec SPEC_LARGE=t
+    run test on one file: rake spec SPEC=specs/{path_to_spec_file}

data/README CHANGED Viewed

@@ -1,5 +1,3 @@
-mspire
-======
 mspire - 'Mass Spectrometry Proteomics in Ruby' is a collection of tools for
 working with MS proteomics data in ruby.  It seeks to provide support for open
@@ -12,7 +10,7 @@ Current Focus
 The project is currently focusing on the following:
-* SEQUEST data (particularly the output of Bioworks 3.2)
+* SEQUEST data (particularly the output of Bioworks 3.2-3.3)
 * mzXML
 * mzData
 * ProteinProphet
@@ -21,15 +19,16 @@ The project is currently focusing on the following:
 Features
 --------
-* mzXML (version 1 & 2) parsing
+* mzXML (version 1, 2, and 3) parsing
 * mzData parsing
 * bioworks .srf (binary files) reader
+* read/write .sqt files
 * bioworks to PeptideProphet input (pepXML files)
 * lightweight APEX values parser
 * histogram protein probabilities
-* developed for Linux, should port easily to Windows or others
 * protein summary views with custom false ID cutoff values
 * conversion to OBI-Warp input files
+* portable: works across platforms
 Validation by:
   * Various Decoy Database search options: Reverse/Shuffle, concatenated/separate, with various hashing options (e.g., by amino acid sequence + charge)
@@ -38,10 +37,12 @@ Validation by:
   * Generic sample bias (e.g., low abundance/high abundance proteins)
   * Defined sample
-Working with:
-  * Bioworks (3.2-3.3.1)
-  * Peptide/Protein Prophet
-  * Easily extensible to others
+Spectra and Spectra Identification
+----------------------------------
+The [MS](ms/index.html) namespace contains objects for working with mass spectra and associated file formats.
+The [SpecID](spec_id/index.html) namespace contains objects for working with spectral identifications.
 Tutorials
 ---------
@@ -54,12 +55,13 @@ Warning
 -------
 This is an experimental package.  As such, all versions prior to version 1.0
-will only loosely follow the rubygems versioning scheme: interfaces are
-subject to change without a major change in version number prior to version
-1.0.  Beyond version 1.0, the versioning scheme will be strictly adhered to.
+may contain interface changes on minor revisions (major.minor.build) (e.g.,
+0.4.0 may contain interface change from 0.3.9).  Beyond version 1.0, the
+versioning scheme will be strictly adhered to (no interface changes except on
+major revisions).
 Installation
 ------------
-see [Install](install.html)
+see [Install](install/index.html)

data/README.rdoc ADDED Viewed

@@ -0,0 +1,18 @@
+= mspire
+mass spectrometry proteomics in ruby
+Please refer to the latest Documentation[http://mspire.rubyforge.org]
+Please see Installation[http://mspire.rubyforge.org/install/index.html]
+== Data Models and Examples
+Object models and usage examples are online:
+[MS::MSRun] http://mspire.rubyforge.org/ms/msrun.html
+[SpecID] http://mspire.rubyforge.org/spec_id/spec_id.html
+[SRF] http://mspire.rubyforge.org/spec_id/srf.html
+[False Identification Rate Determination] http://mspire.rubyforge.org/spec_id/fir/index.html
+[OBI-Warp] http://mspire.rubyforge.org/ms/obiwarp.html

data/Rakefile CHANGED Viewed

@@ -17,7 +17,7 @@ $dependencies = %w(libjtp)
 $tfiles_large = 'test_files_large'
 changelog = "changelog.txt"
-core_files = FL["INSTALL", "README", "Rakefile", "LICENSE", changelog, "release_notes.txt", "{lib,bin,script,specs,tutorial,test_files}/**/*"]
+core_files = FL["INSTALL", "README", "README.rdoc", "Rakefile", "LICENSE", changelog, "release_notes.txt", "{lib,bin,script,specs,tutorial,test_files}/**/*"]
 big_dist_files = core_files + FL["test_files_large/**/*"]
 dist_files = core_files
@@ -43,7 +43,7 @@ def move_and_add_webgen_header(file, newfile, src_dir, heading)
   string = IO.read file
   with_header = heading + string
   File.open(newfile, 'w') {|v| v.print with_header }
-  FileUtils.mv newfile, src_dir
+  FileUtils.mv newfile, src_dir, :force => true
 end
 desc "copy top level files into doc/src"
@@ -55,27 +55,40 @@ directoryName: mspire
 ---\n"
   src = "doc/src"
   move_and_add_webgen_header('README', 'index.page', src, string.sub('TITLE', 'Home'))
-  move_and_add_webgen_header('INSTALL', 'install.page', src, string.sub('TITLE', 'Install'))
+  move_and_add_webgen_header('INSTALL', 'index.page', src + '/install', string.sub('TITLE', 'Install').sub('mspire', 'Install').sub("inMenu: true\n", ''))
 end
-desc "create and upload docs to server"
-task :upload_docs => :html_docs do
+desc "upload docs (doc/output) to server"
+task :upload_docs do
   sh "scp -i ~/.ssh/rubyforge_key -r doc/output/* jtprince@rubyforge.org:/var/www/gforge-projects/mspire/"
 end
+# best to use webgen 0.3.8 right now
+# to get working (may not require all these steps):
+#    gem install RedCloth
+#    gem install BlueCloth
+#    soft link the bluecloth binary into path
 desc "creates docs in doc/html"
 task :html_docs => [:cp_top_level_docs] do
   FileUtils.cd 'doc' do
     sh "webgen"
   end
+  FileUtils.cp 'doc/src/archive/t2x', 'doc/output/archive/t2x'
 end
-rdoc_options = ['--main', 'README', '--title', NAME]
-rdoc_extra_includes = ["README", "INSTALL", "LICENSE"]
+desc "does html_docs and rdoc and puts rdoc inside html_docs"
+task :all_docs => [:html_docs, :rdoc] do
+  FileUtils.mv 'html', 'doc/output/rdoc'
+end
+#rdoc_options = ['--main', 'README', '--title', NAME]
+rdoc_options = ['--main', 'README.rdoc', '--title', NAME]
+#rdoc_extra_includes = ["README", "INSTALL", "LICENSE"]
+rdoc_extra_includes = ['README.rdoc']
 Rake::RDocTask.new do |rd|
-  rd.main = "README"
-  rd.rdoc_files.include rdoc_extra_includes
+  rd.main = "README.rdoc"
+  rd.rdoc_files.include("lib/**/*.rb", *rdoc_extra_includes )
   rd.options.push( *rdoc_options )
 end
@@ -124,7 +137,12 @@ Spec::Rake::SpecTask.new('spec') do |t|
   Rake::Task[:ensure_gem_is_uninstalled].invoke
   Rake::Task[:ensure_dependencies].invoke
   Rake::Task[:ensure_large_testfiles].invoke
-  t.libs = ['lib']
+  t.libs =
+    if !ENV['LIB'].nil?
+      [ENV['LIB']]
+    else
+      ['lib']
+    end
   #t.ruby_opts = ['-I', 'lib']
   t.spec_files = FileList['specs/**/*_spec.rb']
 end
@@ -135,7 +153,13 @@ Spec::Rake::SpecTask.new('specl') do |t|
   Rake::Task[:ensure_dependencies].invoke
   Rake::Task[:ensure_large_testfiles].invoke
   t.spec_files = FileList['specs/**/*_spec.rb']
-  t.libs = ['lib']
+  t.libs =
+    if !ENV['LIB'].nil?
+      [ENV['LIB']]
+    else
+      ['lib']
+    end
+  #t.libs = ['lib']
   #t.ruby_opts = ['-I', 'lib']
   t.spec_opts = ['--format', 'specdoc' ]
 end
@@ -147,7 +171,12 @@ Spec::Rake::SpecTask.new('rcov') do |t|
   Rake::Task[:ensure_large_testfiles].invoke
   t.spec_files = FileList['specs/**/*_spec.rb']
   t.rcov = true
-  t.libs = ['lib']
+  t.libs =
+    if !ENV['LIB'].nil?
+      [ENV['LIB']]
+    else
+      ['lib']
+    end
   #t.ruby_opts = ['-I', 'lib']
   t.rcov_opts = ['--exclude', 'specs']
 end
@@ -160,9 +189,15 @@ task :speci => [:ensure_gem_is_uninstalled, :ensure_dependencies, :ensure_large_
       file.include?(ENV['SPECM'])
     end
   end
+  lib =
+    if !ENV['LIB'].nil?
+      ENV['LIB']
+    else
+      'lib'
+    end
   files_to_run.each do |spc|
     puts "------ SPEC=#{spc} ------"
-    system "ruby -I lib -S spec #{spc} --format specdoc"
+    system "ruby -I #{lib} -S spec #{spc} --format specdoc"
   end
 end
@@ -238,8 +273,9 @@ spec = Gem::Specification.new do |s|
   s.rdoc_options = rdoc_options
   s.extra_rdoc_files = rdoc_extra_includes
   s.executables = FL["bin/*"].map {|file| File.basename(file) }
-  s.add_dependency('libjtp', '~> 0.2.13')
+  s.add_dependency('libjtp', '~> 0.2.14')
   s.add_dependency('axml', '~> 0.0.0')
+  s.add_dependency('arrayclass', '~> 0.1.0')
   s.requirements << '"libxml" is the prefered xml parser right now.  libxml, xmlparser, REXML and regular expressions are used as fallback in some routines.'
   s.requirements << 'some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)'
   s.requirements << 'the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications'

data/bin/aafreqs.rb CHANGED Viewed

File without changes

data/bin/bioworks2excel.rb CHANGED Viewed

File without changes

data/bin/bioworks_to_pepxml.rb CHANGED Viewed

@@ -12,7 +12,8 @@ DEFAULT_MS_MODEL = 'LCQ'
 DEFAULT_MASS_ANALYZER = 'Ion Trap'
 ##############################################################
-require 'spec_id/sequest/pepxml'
+#require 'spec_id/sequest/pepxml'  # dies of this guy is called (why???)
+require 'spec_id/proph/pep_summary'  # <- he requests the above...hmmm
 require 'spec_id'
 require 'optparse'
 require 'ostruct'

data/bin/bioworks_to_pepxml_gui.rb CHANGED Viewed

File without changes

data/bin/fasta_shaker.rb CHANGED Viewed

File without changes

data/bin/filter_and_validate.rb CHANGED Viewed

File without changes

data/bin/gi2annot.rb CHANGED Viewed

File without changes

data/bin/id_class_anal.rb CHANGED Viewed

File without changes

data/bin/id_precision.rb CHANGED Viewed

File without changes

data/bin/ms_to_lmat.rb CHANGED Viewed

File without changes

data/bin/pepproph_filter.rb CHANGED Viewed

File without changes

data/bin/protein_summary.rb CHANGED Viewed

File without changes

data/bin/protxml2prots_peps.rb CHANGED Viewed

File without changes

data/bin/raw_to_mzXML.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/ruby -w
 require 'optparse'
-require 'spec/mzxml'
+require 'ms/converter/mzxml'
 require 'fileutils'
 progname = File.basename(__FILE__)
@@ -21,11 +21,11 @@ if ARGV.size == 0
   exit
 end
-converter = MS::MzXML.find_mzxml_converter
+converter = MS::Converter::MzXML.find_mzxml_converter
 if converter
   $stderr.puts "using #{converter} to convert files"
 else
-  puts "cannot find [#{MS::MzXML::Potential_mzxml_converters.join(', ')}] in the paths:"
+  puts "cannot find [#{MS::Converter::MzXML::Potential_mzxml_converters.join(', ')}] in the paths:"
   puts ENV['PATH'].split(/[:;]/).join(", ")
   abort
 end

data/bin/run_percolator.rb ADDED Viewed

@@ -0,0 +1,122 @@
+#!/usr/bin/ruby
+perc_cmd = 'percolator'
+require 'optparse'
+require 'spec_id/srf'
+# percolator_v1.02_32bit_linux -o reverse_meta.sqm normal_NOCYS/meta.sqm reverse_NOCYS/meta.sqm
+# percolator_v1.02_32bit_linux -o reverse_cat_meta.sqm -P INV_ reverse_cat_NOCYS/meta.sqm &
+file_hash = {
+  :srg => "bioworks.srg",
+  :sqg_in => "bioworks.sqg",
+  :sqg_decoy => "decoy.sqg",
+  :perc_out => "perc.sqg",
+  :perc_stdout => "perc.stdout",
+  :perc_stderr => "perc.stderr",
+  :perc_ext => ".psqt",
+}
+(default_srg, sqg_in, perc_out, sqg_decoy, perc_stdout, perc_stderr, perc_ext) = file_hash.values_at(:srg, :sqg_in, :perc_out, :sqg_decoy, :perc_stdout, :perc_stderr, :perc_ext)
+opt = {}
+toclean = []
+opts = OptionParser.new do |op|
+  op.banner =  "usage: #{File.basename(__FILE__)} -d PATTERN <file>.srf ..."
+  op.separator "       #{File.basename(__FILE__)} -d PATTERN <file>.srg"
+  op.separator "       #{File.basename(__FILE__)} <normal>.srg <decoy>.srg"
+  op.separator ""
+  op.separator "  creates necessary meta files in current working directory and"
+  op.separator "  runs command '#{perc_cmd}'"
+  op.separator ""
+  op.separator "  (all in current working directory)"
+  op.separator "  1) (if given .srf files) creates file: #{default_srg}"
+  op.separator "  2) creates .sqt file for each srf file (placed in dir with srf file)"
+  op.separator "  3) creates percolator (meta) input file(s): #{sqg_in}"
+  op.separator "                  [and for separate searches: #{sqg_decoy}]"
+  op.separator "  4) creates a percolator (meta) output file: #{perc_out}"
+  op.separator "  5) runs percolator which creates a  a #{perc_ext} for each .srf file"
+  op.separator "  6) captures stdout in #{perc_stdout} and stderr in #{perc_stderr}"
+  op.separator ""
+  op.separator "  .srg files are text files with full paths to .srf files"
+  op.separator "  create with command 'srf_group.rb'"
+  op.separator ""
+  op.on("-d", "--decoy <pattern>", "decoy pattern, eg.: -d REVERSE_") {|v| opt[:decoy] = v }
+  op.on("-c", "--clean", "removes ALL generated files except #{perc_ext}") {|v| opt[:clean] = v }
+  op.on("-v", "--verbose", "spits out info") {|v| $VERBOSE = v }
+end
+opts.parse!
+if ARGV.size == 0 or (!opt[:decoy] && (ARGV.size != 2))
+  puts opts.to_s
+  exit
+end
+#raise RunTimeError, "command #{perc_cmd} must be callable!" unless `#{perc_cmd}`.match(/Usage/)
+files = ARGV.to_a
+# create srg file:
+srg_files =
+  if files[0] =~ /\.srf$/i
+    obj = SRFGroup.new
+    obj.filenames = files.to_a
+    puts("CREATING: #{default_srg}") if $VERBOSE
+    obj.to_srg(default_srg)
+    toclean << default_srg
+    [default_srg]
+  elsif files[0] =~ /\.srg$/i
+    files
+  else
+    abort "files must have proper extensions"
+  end
+# create the sqt files:
+all_sqt_filenames = srg_files.map do |srg_file|
+  srf_filenames = SRFGroup.srg_to_paths(srg_file)
+  srf_filenames.map do |file|
+    new_filename = file.sub(/\.srf$/i, '.sqt')
+    puts("CREATING: #{new_filename}") if $VERBOSE
+    SRFGroup.new([file], opt['filter']).srfs.first.to_sqt(new_filename)
+    toclean << new_filename
+    new_filename
+  end
+end
+# create the percolator input file:
+all_sqt_filenames.zip(file_hash.values_at(:sqg_in, :sqg_decoy)) do |sqt_filenames,filename|
+  puts("CREATING: #{filename}") if $VERBOSE
+  File.open(filename, 'w') {|fh| fh.puts(sqt_filenames.join("\n")) }
+  toclean << filename
+end
+# create the percolator output file:
+psqt_filenames = all_sqt_filenames[0].map do |file|
+  file.sub(/\.sqt$/, perc_ext)
+end
+puts("CREATING: #{perc_out}") if $VERBOSE
+File.open(perc_out, 'w') {|fh| fh.puts(psqt_filenames.join("\n")) }
+toclean << perc_out
+# run percolator
+to_run =
+  if opt[:decoy]
+  "#{perc_cmd} -o #{perc_out} -P #{opt[:decoy]} #{sqg_in} 1>#{perc_stdout} 2>#{perc_stderr}"
+  else
+  "#{perc_cmd} -o #{perc_out} #{sqg_in} #{sqg_decoy} 1>#{perc_stdout} 2>#{perc_stderr}"
+  end
+puts("RUNNING: #{to_run}") if $VERBOSE
+`#{to_run}`
+toclean << perc_stdout
+toclean << perc_stderr
+if opt[:clean]
+  toclean.each do |file|
+    puts("REMOVING: #{file}") if $VERBOSE
+    File.unlink(file) if File.exist?(file)
+  end
+end

data/bin/sqt_group.rb CHANGED Viewed

File without changes

data/bin/srf_group.rb CHANGED Viewed

File without changes

data/changelog.txt CHANGED Viewed

@@ -162,3 +162,32 @@ sample_enzyme)
 ## version 0.3.9
 1. doesn't clobber mzdata filename in ms_to_lmat.rb conversion
+## version 0.3.10
+1. added run_percolator.rb script which makes running multiple files easy
+## version 0.3.11
+1. faster sensing of bad scan tags in mzXML v. 2.0 files
+2. implemented lazy evaluation of spectrum in 2 different ways allowing much
+larger files to be parsed
+## version 0.4.0
+1. ** INTERFACE CHANGE: each scan can only have one precursor (used to be an array)
+2. ** INTERFACE CHANGE: spectrum mz and intensity data accessed with mzs and intensities
+3. lazy eval working on mzData
+4. mzData not necessarily guaranteed to have precursor intensities on lazy
+eval methos (however, the method intensity_at_mz will still work (causing
+evaluation))
+## version 0.4.1
+1. added support for reading mzXML version 3.0 (may fail in some cases)
+## version 0.4.2
+1. added MS::MSRun.open method
+2. added method to write dta files from SRF

data/lib/ms/gradient_program.rb CHANGED Viewed

@@ -1,4 +1,3 @@
-require 'array_class'
 # This is modeled after the Thermo gradient
 class GradientProgram

data/lib/ms/msrun.rb CHANGED Viewed

@@ -9,6 +9,8 @@ require 'ms/converter/mzxml'
 module MS; end
 class MS::MSRun
+  MSRunDefaultOpts = { :lazy => :string }
   attr_accessor :start_time, :end_time
   attr_accessor :scans
@@ -19,24 +21,56 @@ class MS::MSRun
   # the total number of scans
   attr_writer :scan_count
-  # should be able to read basic information from a variety of files
-  # this will be written in regexp's because REXML is way too slow, xmlparser
-  # is not guaranteed to be on every system, xmlib is not on win32.
-  # spectra is false, then spectra are not parsed out and included
+  #### # [note: precursor intensities not guaranteed to exist unless :
+  # TODO: may need to eliminate unavailable precursor intensities if they
+  # doing lazy evaluation??  or it becomes lazy too??
   # OPTIONS:
-  #   :spectra => *true|false   # whether to parse out spectra
-  # [note: precursor intensities not guaranteed to exist unless :spectra == true]
-  def initialize(file=nil, opts={})
-    myopts = opts.dup ; myopts[:msrun] = self
-    if file
-      filetype_and_version = MS::Parser.filetype_and_version(file)
-      parser = MS::Parser.new(filetype_and_version, :msrun)
-      parser.parse(file, myopts)
+  #   :lazy => :string | :not | :no_spectra | :io
+  #            :string = (default) stores each spectrum as a base64 decoded
+  #            string that is further processed into Arrays of Floats when m/z
+  #            or intensity information is access.  This lazy evaluation
+  #            should work on most files.
+  #            :not = all information is read into memory and parsed into
+  #            objects.  Should only be used for small-medium files (< 80MB on
+  #            a machine with 2GB memory)
+  #            :no_spectra = if no peak information is required use this to
+  #            avoid the overhead of parsing and creating spectra.
+  #            :io = stores the io object and indices into spectrum data.
+  #            When spectral information is requested (m/z or intensity
+  #            information) then the spectrum is read from the io object and
+  #            evaluated (requires an open io object when spectrum information
+  #            is requested)
+  def initialize(file_or_io=nil, opts={})
+    if opts[:lazy] == :io
+      if !file_or_io.is_a?(IO)
+        raise ArgumentError, "Caller must provide an IO object (rather than filename) if using {:lazy => :io}"
+      end
+    end
+    myopts = MSRunDefaultOpts.merge(opts)
+    myopts[:msrun] = self
+    if file_or_io
+      filetype_and_version = MS::Parser.filetype_and_version(file_or_io)
+      parser = MS::Parser.new(filetype_and_version, :msrun, myopts)
+      parser.parse(file_or_io, myopts)
       #MS::Parser.new(filetype_and_version, :msrun).parse(file, myopts)
       (@filetype, @version) = filetype_and_version
     end
   end
+  # This will automatically use :lazy => :io, open the file, and close it
+  # after the block returns.
+  #     MS::MSRun.open("file.mzXML") do |ms|
+  #       ms.scans.each {|scan| ... do something }
+  #     end
+  def self.open(filename, opts={})
+    File.open(filename) do |fh|
+      ms = MS::MSRun.new(fh, {:lazy => :io}.merge(opts))
+      yield(ms)
+    end
+  end
   # returns an array, whose indices provide the number of scans in each index level the ms_levels, [0] = all the scans, [1] = mslevel 1, [2] = mslevel 2,
   # ...
   def scan_counts
@@ -95,9 +129,9 @@ class MS::MSRun
           lo_mz = sc.start_mz
           hi_mz = sc.end_mz
         else
-          mz = sc.spectrum.mz
-          hi_mz = mz.last
-          lo_mz = mz.first
+          mz_ar = sc.spectrum.mzs
+          hi_mz = mz_ar.last
+          lo_mz = mz_ar.first
         end
         break
       end
@@ -117,12 +151,12 @@ class MS::MSRun
       # didn't have the attributes (find by brute force)
       scans.each do |sc|
         if sc.ms_level == mslevel
-          mz = sc.spectrum.mz
-          if mz.last > hi_mz
-            hi_mz = mz.last
+          mz_ar = sc.spectrum.mzs
+          if mz_ar.last > hi_mz
+            hi_mz = mz_ar.last
           end
-          if mz.last < lo_mz
-            lo_mz = mz.last
+          if mz_ar.last < lo_mz
+            lo_mz = mz_ar.last
           end
         end
       end
@@ -135,7 +169,7 @@ class MS::MSRun
   def precursor_mz_by_scan_num
     ar = Array.new(@scans.size + 1)
     @scans.each do |scan|
-      if prec = scan.precursors.first
+      if prec = scan.precursor
         ar[scan.num] = prec.mz
       else
         ar[scan.num] = nil
@@ -170,7 +204,7 @@ class MS::MSRun
   # same as the instance method (creates an object without spectrum and calls
   # instance method of the same name)
   def self.precursor_mz_by_scan_num(file)
-    self.new(file, :spectra => false).precursor_mz_by_scan_num
+    self.new(file, :lazy => :no_spectra, :fix_bad_tags => true).precursor_mz_by_scan_num
   end
   # only adds the parent if one is not already present!
@@ -190,13 +224,12 @@ class MS::MSRun
         (prev_level - level).times do parent_stack.shift end
       end
       if scan.ms_level > 1
-        scan.precursors.each do |precursor|
-          #precursor.parent = parent_stack.first  # that's the next line's
-          precursor[2] = parent_stack.first unless precursor[2]
-          #precursor.intensity
-          if add_intensities
-            precursor[1] = precursor[2].spectrum.intensity_at_mz(precursor[0])
-          end
+        precursor = scan.precursor
+        #precursor.parent = parent_stack.first  # that's the next line's
+        precursor[2] = parent_stack.first unless precursor[2]
+        #precursor.intensity
+        if add_intensities
+          precursor[1] = precursor[2].spectrum.intensity_at_mz(precursor[0])
         end
       end
       prev_level = level