RubyGems - mspire - Versions diffs - 0.2.4 → 0.3.0 - Mend

mspire 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (233) hide show

data/INSTALL +1 -0
data/README +25 -0
data/Rakefile +129 -40
data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
data/bin/bioworks_to_pepxml.rb +1 -0
data/bin/fasta_shaker.rb +1 -96
data/bin/filter_and_validate.rb +5 -0
data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
data/bin/prob_validate.rb +6 -0
data/bin/raw_to_mzXML.rb +2 -2
data/bin/srf_group.rb +1 -0
data/bin/srf_to_sqt.rb +40 -0
data/changelog.txt +68 -0
data/lib/align/chams.rb +6 -6
data/lib/align.rb +4 -3
data/lib/bsearch.rb +120 -0
data/lib/fasta.rb +318 -86
data/lib/group_by.rb +10 -0
data/lib/index_by.rb +11 -0
data/lib/merge_deep.rb +21 -0
data/lib/{spec → ms/converter}/mzxml.rb +77 -109
data/lib/ms/gradient_program.rb +171 -0
data/lib/ms/msrun.rb +209 -0
data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
data/lib/ms/parser/mzdata/axml.rb +12 -0
data/lib/ms/parser/mzdata/dom.rb +160 -0
data/lib/ms/parser/mzdata/libxml.rb +7 -0
data/lib/ms/parser/mzdata.rb +25 -0
data/lib/ms/parser/mzxml/axml.rb +11 -0
data/lib/ms/parser/mzxml/dom.rb +159 -0
data/lib/ms/parser/mzxml/hpricot.rb +253 -0
data/lib/ms/parser/mzxml/libxml.rb +15 -0
data/lib/ms/parser/mzxml/regexp.rb +122 -0
data/lib/ms/parser/mzxml/rexml.rb +72 -0
data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
data/lib/ms/parser/mzxml.rb +175 -0
data/lib/ms/parser.rb +108 -0
data/lib/ms/precursor.rb +10 -0
data/lib/ms/scan.rb +81 -0
data/lib/ms/spectrum.rb +193 -0
data/lib/ms.rb +10 -0
data/lib/mspire.rb +4 -0
data/lib/roc.rb +61 -1
data/lib/sample_enzyme.rb +31 -8
data/lib/scan_i.rb +21 -0
data/lib/spec_id/aa_freqs.rb +7 -3
data/lib/spec_id/bioworks.rb +20 -14
data/lib/spec_id/digestor.rb +139 -0
data/lib/spec_id/mass.rb +116 -0
data/lib/spec_id/parser/proph.rb +236 -0
data/lib/spec_id/precision/filter/cmdline.rb +209 -0
data/lib/spec_id/precision/filter/interactive.rb +134 -0
data/lib/spec_id/precision/filter/output.rb +147 -0
data/lib/spec_id/precision/filter.rb +623 -0
data/lib/spec_id/precision/output.rb +60 -0
data/lib/spec_id/precision/prob/cmdline.rb +139 -0
data/lib/spec_id/precision/prob/output.rb +88 -0
data/lib/spec_id/precision/prob.rb +171 -0
data/lib/spec_id/proph/pep_summary.rb +92 -0
data/lib/spec_id/proph/prot_summary.rb +484 -0
data/lib/spec_id/proph.rb +2 -466
data/lib/spec_id/protein_summary.rb +2 -2
data/lib/spec_id/sequest/params.rb +316 -0
data/lib/spec_id/sequest/pepxml.rb +1513 -0
data/lib/spec_id/sequest.rb +2 -1672
data/lib/spec_id/srf.rb +445 -177
data/lib/spec_id.rb +183 -95
data/lib/spec_id_xml.rb +8 -10
data/lib/transmem/phobius.rb +147 -0
data/lib/transmem/toppred.rb +368 -0
data/lib/transmem.rb +157 -0
data/lib/validator/aa.rb +135 -0
data/lib/validator/background.rb +73 -0
data/lib/validator/bias.rb +95 -0
data/lib/validator/cmdline.rb +260 -0
data/lib/validator/decoy.rb +94 -0
data/lib/validator/digestion_based.rb +69 -0
data/lib/validator/probability.rb +48 -0
data/lib/validator/prot_from_pep.rb +234 -0
data/lib/validator/transmem.rb +272 -0
data/lib/validator/true_pos.rb +46 -0
data/lib/validator.rb +214 -0
data/lib/xml.rb +38 -0
data/lib/xml_style_parser.rb +105 -0
data/lib/xmlparser_wrapper.rb +19 -0
data/script/compile_and_plot_smriti_final.rb +97 -0
data/script/extract_gradient_programs.rb +56 -0
data/script/get_apex_values_rexml.rb +44 -0
data/script/mzXML2timeIndex.rb +1 -1
data/script/smriti_final_analysis.rb +103 -0
data/script/toppred_to_yaml.rb +47 -0
data/script/tpp_installer.rb +1 -1
data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
data/specs/bin/fasta_shaker_spec.rb +259 -0
data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
data/specs/bin/filter_and_validate_spec.rb +124 -0
data/specs/bin/ms_to_lmat_spec.rb +34 -0
data/specs/bin/prob_validate_spec.rb +62 -0
data/specs/bin/protein_summary_spec.rb +10 -0
data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
data/specs/gi_spec.rb +22 -0
data/specs/load_bin_path.rb +7 -0
data/specs/merge_deep_spec.rb +13 -0
data/specs/ms/gradient_program_spec.rb +77 -0
data/specs/ms/msrun_spec.rb +455 -0
data/specs/ms/parser_spec.rb +92 -0
data/specs/ms/spectrum_spec.rb +89 -0
data/specs/roc_spec.rb +251 -0
data/specs/rspec_autotest.rb +149 -0
data/specs/sample_enzyme_spec.rb +41 -0
data/specs/spec_helper.rb +133 -0
data/specs/spec_id/aa_freqs_spec.rb +52 -0
data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
data/specs/spec_id/digestor_spec.rb +75 -0
data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
data/specs/spec_id/precision/filter/output_spec.rb +31 -0
data/specs/spec_id/precision/filter_spec.rb +243 -0
data/specs/spec_id/precision/prob_spec.rb +111 -0
data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
data/specs/spec_id/sequest/params_spec.rb +68 -0
data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
data/specs/spec_id/sqt_spec.rb +138 -0
data/specs/spec_id/srf_spec.rb +209 -0
data/specs/spec_id/srf_spec_helper.rb +302 -0
data/specs/spec_id_helper.rb +33 -0
data/specs/spec_id_spec.rb +361 -0
data/specs/spec_id_xml_spec.rb +33 -0
data/specs/transmem/phobius_spec.rb +423 -0
data/specs/transmem/toppred_spec.rb +297 -0
data/specs/transmem_spec.rb +60 -0
data/specs/transmem_spec_shared.rb +64 -0
data/specs/validator/aa_spec.rb +107 -0
data/specs/validator/background_spec.rb +51 -0
data/specs/validator/bias_spec.rb +146 -0
data/specs/validator/decoy_spec.rb +51 -0
data/specs/validator/fasta_helper.rb +26 -0
data/specs/validator/prot_from_pep_spec.rb +141 -0
data/specs/validator/transmem_spec.rb +145 -0
data/specs/validator/true_pos_spec.rb +58 -0
data/specs/validator_helper.rb +33 -0
data/specs/xml_spec.rb +12 -0
data/test_files/000_pepxml18_small.xml +206 -0
data/test_files/020a.mzXML.timeIndex +4710 -0
data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
data/test_files/4-03-03_small-prot.xml +321 -0
data/test_files/4-03-03_small.xml +3876 -0
data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
data/test_files/bioworks-3.3_10prots.xml +5999 -0
data/test_files/bioworks31.params +77 -0
data/test_files/bioworks32.params +62 -0
data/test_files/bioworks33.params +63 -0
data/test_files/bioworks_single_run_small.xml +7237 -0
data/test_files/bioworks_small.fasta +212 -0
data/test_files/bioworks_small.params +63 -0
data/test_files/bioworks_small.phobius +109 -0
data/test_files/bioworks_small.toppred.out +2847 -0
data/test_files/bioworks_small.xml +5610 -0
data/test_files/bioworks_with_INV_small.xml +3753 -0
data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
data/test_files/corrupted_900.srf +0 -0
data/test_files/head_of_7MIX.srf +0 -0
data/test_files/interact-opd1_mods_small-prot.xml +304 -0
data/test_files/messups.fasta +297 -0
data/test_files/opd1/000.my_answer.100lines.xml +101 -0
data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
data/test_files/opd1/000_020-prot.png +0 -0
data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
data/test_files/opd1/000_020_3prots-prot.xml +62 -0
data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
data/test_files/opd1/sequest.3.1.params +77 -0
data/test_files/opd1/sequest.3.2.params +62 -0
data/test_files/opd1/twenty_scans.mzXML +418 -0
data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
data/test_files/opd1/twenty_scans_answ.lmat +0 -0
data/test_files/opd1/twenty_scans_answ.lmata +9 -0
data/test_files/opd1_020_beginning.RAW +0 -0
data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
data/test_files/pepproph_small.xml +4691 -0
data/test_files/phobius.small.noheader.txt +50 -0
data/test_files/phobius.small.small.txt +53 -0
data/test_files/s01_anC1_ld020mM.key.txt +25 -0
data/test_files/s01_anC1_ld020mM.meth +0 -0
data/test_files/small.fasta +297 -0
data/test_files/smallraw.RAW +0 -0
data/test_files/tf_bioworks2excel.bioXML +14340 -0
data/test_files/tf_bioworks2excel.txt.actual +1035 -0
data/test_files/toppred.small.out +416 -0
data/test_files/toppred.xml.out +318 -0
data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
data/test_files/yeast_gly_small-prot.xml +265 -0
data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
data/test_files/yeast_gly_small.xml +3807 -0
data/test_files/yeast_gly_small2.parentTimes +6 -0
metadata +273 -57
data/bin/filter.rb +0 -6
data/bin/precision.rb +0 -5
data/lib/spec/mzdata/parser.rb +0 -108
data/lib/spec/mzdata.rb +0 -48
data/lib/spec/mzxml/parser.rb +0 -449
data/lib/spec/scan.rb +0 -55
data/lib/spec_id/filter.rb +0 -797
data/lib/spec_id/precision.rb +0 -421
data/lib/toppred.rb +0 -18
data/script/filter-peps.rb +0 -164
data/test/tc_aa_freqs.rb +0 -59
data/test/tc_fasta_shaker.rb +0 -149
data/test/tc_filter.rb +0 -203
data/test/tc_filter_peps.rb +0 -46
data/test/tc_gi.rb +0 -17
data/test/tc_id_class_anal.rb +0 -70
data/test/tc_id_precision.rb +0 -89
data/test/tc_msrun.rb +0 -88
data/test/tc_mzxml.rb +0 -88
data/test/tc_mzxml_to_lmat.rb +0 -36
data/test/tc_peptide_parent_times.rb +0 -27
data/test/tc_precision.rb +0 -60
data/test/tc_roc.rb +0 -166
data/test/tc_sample_enzyme.rb +0 -32
data/test/tc_scan.rb +0 -26
data/test/tc_sequest.rb +0 -336
data/test/tc_spec.rb +0 -78
data/test/tc_spec_id.rb +0 -201
data/test/tc_spec_id_xml.rb +0 -36
data/test/tc_srf.rb +0 -262

data/INSTALL CHANGED Viewed

@@ -5,6 +5,7 @@ Prerequisites
 Much of the package will work without any prerequisites at all.  Some functionality may require addition ruby packages or other converters.  These are listed in current order of importance:
 * [xmlparser](http://www.yoshidam.net/Ruby.html) (comes with one-click Windows; on Ubuntu: 'sudo apt-get libxml-parser-ruby1.8')
+* [libxml](http://libxml.rubyforge.org/) in Ubuntu: sudo apt-get install libxml2 libxml2-dev ; sudo gem install libxml-ruby --remote
 * ['t2x'](http://sashimi.sourceforge.net/software_glossolalia.html#ReAdW) to convert .RAW files to version 1 mzXML files
 * [gnuplot](http://rgplot.rubyforge.org/) ('gem install gnuplot').  Of course, you'll need [gnuplot](http://www.gnuplot.info/) before this package will work.  Under one-click installer for windows this package requires a little configuration.  It works with no configuration on cygwin (or linux).

data/README CHANGED Viewed

@@ -18,6 +18,31 @@ The project is currently focusing on the following:
 * ProteinProphet
 * Preparation of files for [obiwarp](http://obi-warp.sourceforge.net/)
+Features
+--------
+* mzXML (version 1 & 2) parsing
+* mzData parsing
+* bioworks .srf (binary files) reader
+* bioworks to PeptideProphet input (pepXML files)
+* lightweight APEX values parser
+* histogram protein probabilities
+* developed for Linux, should port easily to Windows or others
+* protein summary views with custom false ID cutoff values
+* conversion to OBI-Warp input files
+Validation by:
+  * Various Decoy Database search options: Reverse/Shuffle, concatenated/separate, with various hashing options (e.g., by amino acid sequence + charge)
+  * Amino acid (e.g., search for unblocked cysteines)
+  * Transmembrane prediction (Phobius or TopPred)
+  * Generic sample bias (e.g., low abundance/high abundance proteins)
+  * Defined sample
+Working with:
+  * Bioworks (3.2-3.3.1)
+  * Peptide/Protein Prophet
+  * Easily extensible to others
 Tutorials
 ---------

data/Rakefile CHANGED Viewed

@@ -2,9 +2,9 @@ require 'rake'
 require 'rubygems'
 require 'rake/rdoctask'
 require 'rake/gempackagetask'
-require 'rake/testtask'
 require 'rake/clean'
 require 'fileutils'
+require 'spec/rake/spectask'
 ###############################################
 # GLOBAL
@@ -13,23 +13,25 @@ FL = FileList
 NAME = "mspire"
-lib_files = FL["lib/**/*"]
-test_dir_too = FL["test/**/*"]
+$dependencies = %w(libjtp)
+$tfiles_large = 'test_files_large'
+changelog = "changelog.txt"
-little_dist_files = lib_files + FL["INSTALL", "README", "Rakefile", "LICENSE", "changelog.txt", "release_notes.txt", "{bin,script,tutorial}/**/*"]
-dist_files = lib_files + FL["INSTALL", "README", "Rakefile", "LICENSE", "{bin,script,tutorial}/**/*", test_dir_too]
+core_files = FL["INSTALL", "README", "Rakefile", "LICENSE", changelog, "release_notes.txt", "{lib,bin,script,specs,tutorial,test_files}/**/*"]
+big_dist_files = core_files + FL["test_files_large/**/*"]
-dist_files = little_dist_files # comment out to include test files
+dist_files = core_files
+# dist_files = big_dist_files
 ###############################################
 # ENVIRONMENT
 ###############################################
 ENV["OS"] == "Windows_NT" ? WIN32 = true : WIN32 = false
-gemcmd = "gem"
+$gemcmd = "gem"
 if WIN32
   unless ENV["TERM"] == "cygwin"
-    gemcmd << ".cmd"
+    $gemcmd << ".cmd"
   end
 end
@@ -81,40 +83,123 @@ end
 # TESTS
 ###############################################
-desc "Run unit tests."
-Rake::TestTask.new do |t|
-  reply = `#{gemcmd} list -l #{NAME}`
+namespace :spec do
+  task :autotest do
+    require './specs/rspec_autotest'
+    RspecAutotest.run
+  end
+end
+task :ensure_dependencies do
+  $dependencies.each do |dep|
+    unless `#{$gemcmd} list -l #{dep}`.include?(dep)
+      abort "ABORTING: install #{dep} before testing!"
+    end
+  end
+end
+task :ensure_large_testfiles do
+  if !File.exist?($tfiles_large) and !ENV['SPEC_LARGE'].nil?
+    warn "Not running with large files since #{$tfiles_large} does not exist!"
+    warn "Removing SPEC_LARGE from ENV!"
+    ENV.delete('SPEC_LARGE')
+  end
+end
+task :ensure_gem_is_uninstalled do
+  reply = `#{$gemcmd} list -l #{NAME}`
   if reply.include? NAME + " ("
     puts "GOING to uninstall gem '#{NAME}' for testing"
     if WIN32
-      %x( #{gemcmd} uninstall -x #{NAME} )
+      %x( #{$gemcmd} uninstall -x #{NAME} )
     else
-      %x( sudo #{gemcmd} uninstall -x #{NAME} )
+      %x( sudo #{$gemcmd} uninstall -x #{NAME} )
     end
   end
-  #  t.libs << "lib"  ## done by default
-  t.test_files = FL["test/tc_*.rb"]
-  #t.verbose = true
 end
+desc "Run all specs"
+Spec::Rake::SpecTask.new('spec') do |t|
+  Rake::Task[:ensure_gem_is_uninstalled].invoke
+  Rake::Task[:ensure_dependencies].invoke
+  Rake::Task[:ensure_large_testfiles].invoke
+  t.libs = ['lib']
+  #t.ruby_opts = ['-I', 'lib']
+  t.spec_files = FileList['specs/**/*_spec.rb']
+end
+desc "Run all specs"
+Spec::Rake::SpecTask.new('specl') do |t|
+  Rake::Task[:ensure_gem_is_uninstalled].invoke
+  Rake::Task[:ensure_dependencies].invoke
+  Rake::Task[:ensure_large_testfiles].invoke
+  t.spec_files = FileList['specs/**/*_spec.rb']
+  t.libs = ['lib']
+  #t.ruby_opts = ['-I', 'lib']
+  t.spec_opts = ['--format', 'specdoc' ]
+end
-desc "Run unit tests individual on each test"
-task :test_ind do |t|
-  reply = `#{gemcmd} list -l #{NAME}`
-  if reply.include? NAME + " ("
-    %x( sudo #{gemcmd} uninstall -x #{NAME} )
-  end
+desc "Run all specs with RCov"
+Spec::Rake::SpecTask.new('rcov') do |t|
+  Rake::Task[:ensure_gem_is_uninstalled].invoke
+  Rake::Task[:ensure_dependencies].invoke
+  Rake::Task[:ensure_large_testfiles].invoke
+  t.spec_files = FileList['specs/**/*_spec.rb']
+  t.rcov = true
+  t.libs = ['lib']
+  #t.ruby_opts = ['-I', 'lib']
+  t.rcov_opts = ['--exclude', 'specs']
+end
-  #  t.libs << "lib"  ## done by default
-  test_files = FL["test/tc_*.rb"]
-  test_files.each do |file|
-    puts "TESTING: #{file.sub(/test\//,'')}"
-    puts `ruby -I lib #{file}`
+task :speci => [:ensure_gem_is_uninstalled, :ensure_dependencies, :ensure_large_testfiles] do
+  # files that match a key word
+  files_to_run = ENV['SPEC'] || FileList['specs/**/*_spec.rb']
+  if ENV['SPECM']
+    files_to_run = files_to_run.select do |file|
+      file.include?(ENV['SPECM'])
+    end
+  end
+  files_to_run.each do |spc|
+    puts "------ SPEC=#{spc} ------"
+    system "ruby -I lib -S spec #{spc} --format specdoc"
   end
-  #t.verbose = true
 end
+#Spec::Rake::SpecTask.new(:spec) do |t|
+#  uninstall_gem
+#  t.spec_files = FileList['spec/**/spec_*.rb']
+#  t.libs = FileList['lib']
+#  t.spec_opts = ['--format', 'specdoc']
+#end
+#desc "Run unit tests."
+#Rake::TestTask.new do |t|
+#  uninstall_gem
+#  #  t.libs << "lib"  ## done by default
+#  t.test_files = FL["test/tc_*.rb"]
+#  #t.verbose = true
+#end
+#desc "Run unit tests individual on each test"
+#task :test_ind do |t|
+#  reply = `#{$gemcmd} list -l #{NAME}`
+#  if reply.include? NAME + " ("
+#    %x( sudo #{$gemcmd} uninstall -x #{NAME} )
+#  end
+#
+#  #  t.libs << "lib"  ## done by default
+#  test_files = FL["test/tc_*.rb"]
+#  test_files.each do |file|
+#    puts "TESTING: #{file.sub(/test\//,'')}"
+#    puts `ruby -I lib #{file}`
+#  end
+#  #t.verbose = true
+#end
@@ -140,7 +225,7 @@ tm = Time.now
 spec = Gem::Specification.new do |s|
   s.platform = Gem::Platform::RUBY
   s.name = NAME
-  s.version = "0.2.4"
+  s.version = IO.readlines(changelog).grep(/##.*version/).pop.split(/\s+/).last.chomp
   s.summary = "Mass Spectrometry Proteomics Objects, Scripts, and Executables"
   s.date = "#{tm.year}-#{tm.month}-#{tm.day}"
   s.email = "jprince@icmb.utexas.edu"
@@ -149,17 +234,19 @@ spec = Gem::Specification.new do |s|
   s.description = "mspire is for working with mass spectrometry proteomics data"
   s.has_rdoc = true
   s.authors = ["John Prince"]
-  s.files = little_dist_files
+  s.files = dist_files
   s.rdoc_options = rdoc_options
   s.extra_rdoc_files = rdoc_extra_includes
   s.executables = FL["bin/*"].map {|file| File.basename(file) }
-  s.add_dependency('libjtp', '~> 0.1.4')
-  s.requirements << '"xmlparser" is the prefered xml parser right now.  REXML and regular expressions are used as fallback in some routines.'
+  s.add_dependency('libjtp', '~> 0.2.5')
+  s.add_dependency('axml')
+  s.requirements << '"libxml" is the prefered xml parser right now.  libxml, xmlparser, REXML and regular expressions are used as fallback in some routines.'
   s.requirements << 'some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)'
   s.requirements << 'the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications'
   s.requirements << '"rake" is useful for development'
   s.requirements << '"webgen (with gems redcloth and bluecloth) is necessary to build web pages'
-  s.test_files = FL["test/tc_*.rb"]
+  #s.test_files = FL["test/tc_*.rb"]
+  s.test_files = FL["specs/**/*_spec.rb"]
 end
 desc "Create packages."
@@ -180,20 +267,22 @@ end
 #  t.package_task
 #end
+task :remove_pkg do
+  FileUtils.rm_rf "pkg"
+end
 task :install => [:reinstall]
 desc "uninstalls the package, packages a fresh one, and installs"
-task :reinstall => [:clean, :package] do
-  reply = `#{gemcmd} list -l #{NAME}`
-  if reply.include? NAME + " ("
-    %x( #{gemcmd} uninstall -x #{NAME} )
+task :reinstall => [:remove_pkg, :clean, :package] do
+  reply = `#{$gemcmd} list -l #{NAME}`
+  if reply.include?(NAME + " (")
+    %x( #{$gemcmd} uninstall -x #{NAME} )
   end
   FileUtils.cd("pkg") do
-    %x( #{gemcmd} install #{NAME} )
+    %x( #{$gemcmd} install #{NAME}*.gem )
   end
 end
 ###############################################

data/bin/{find_aa_freq.rb → aafreqs.rb} RENAMED Viewed

@@ -1,6 +1,6 @@
 #!/usr/bin/ruby -w
+require 'fasta'
 require 'spec_id/aa_freqs'
 if ARGV.size < 1
@@ -10,7 +10,7 @@ if ARGV.size < 1
 end
 ARGV.each do |file|
-  obj = SpecID::AAFreqs.new(file)
+  obj = SpecID::AAFreqs.new(Fasta.new(file))
   puts file
   obj.aafreqs.sort_by{|v| v.to_s }.each do |k,v|
     puts "#{k}: #{v}"

data/bin/bioworks_to_pepxml.rb CHANGED Viewed

@@ -12,6 +12,7 @@ DEFAULT_MS_MODEL = 'LCQ'
 DEFAULT_MASS_ANALYZER = 'Ion Trap'
 ##############################################################
+require 'spec_id/sequest/pepxml'
 require 'spec_id'
 require 'optparse'
 require 'ostruct'

data/bin/fasta_shaker.rb CHANGED Viewed

@@ -1,100 +1,5 @@
 #!/usr/bin/ruby
-# This is my second attempt at writing a simple interface for messing with
-# fasta files.  Acheiving simplicity (and power) is challenging.  It usually
-# only happens on the second (or sometimes more) try.  Of course, in
-# retrospect the simple solution seems sooo obvious.  But its deceptive.
-# It takes work to acheive simplicity for complex tasks.  That's my thought
-# for the day.
-# fasta_shaker as in a salt shaker.  Shake up your fasta proteins and let them
-# season your dinner (hopefully a protein dinner).  Mmmm.  Don't they taste
-# good all mixed up?  If you want, you can think of it as a pepper shaker.
-# I don't usually comment on my scripts (in my script, anyway), but this one
-# came out so nice and clean that I feel like I have room to spare.
 require 'fasta'
-require 'optparse'
-opt = {}
-opts = OptionParser.new do |op|
-  prog = File.basename(__FILE__)
-  op.banner = "usage: #{prog} <method> [OPTIONS] <file>.fasta"
-  op.separator "   <method> = reverse | shuffle"
-  op.on("-c", "--cat", "catenates the output to copy of original") {|v| opt[:cat] = v }
-  op.on("-o", "--out <string>", "name of output file (default is descriptive)") {|v| opt[:out] = v }
-  op.on("-p", "--prefix <string>", "give a header prefix to modified prots") {|v| opt[:prefix] = v }
-  op.on("-f", "--fraction <float>", "creates some fraction of proteins") {|v| opt[:fraction] = v }
-  op.separator "        [if fraction > 1 then the tag 'f<frac#>_' prefixed to proteins"
-  op.separator "         (after any given prefix) so that proteins are unique]"
-  op.on("--tryptic_peptides", "applies method to [KR][^P] peptides") {|v| opt[:tryptic_peptides] = v }
-  op.separator "EXAMPLES: "
-  op.separator "   #{prog} reverse file.fasta -o protein_aa_sequence_reversed.fasta"
-  op.separator "   #{prog} shuffle file.fasta -o protein_aa_sequence_shuffled.fasta"
-  op.separator "   #{prog} shuffle file.fasta -c -p SH_ -o normal_cat_shuffled_with_prefix.fasta"
-  op.separator "   #{prog} reverse file.fasta --tryptic_peptides tryptic_peptides_reversed.fasta"
-end
-opts.parse!
-if ARGV.size < 2
-  puts opts
-  exit
-end
-(method, file) = ARGV
-if opt[:cat] && !opt[:prefix]
-  puts "WARNING: concatenated proteins don't have unique headers"
-  puts "[you probably wanted to use the '--prefix' option!]"
-end
-# OUT filename:
-unless opt[:out]
-  filebase = file.sub(/\..*$/,'')
-  parts = [filebase]
-  parts << 'cat' if opt[:cat]
-  parts << method
-  parts << 'prefix' << opt[:prefix] if opt[:prefix]
-  parts << 'fraction' << opt[:fraction] if opt[:fraction]
-  parts << 'tryptic_peptides' if opt[:tryptic_peptides]
-  opt[:out] = parts.join("_") << ".fasta"
-end
-## READ the file
-fasta = Fasta.new.read_file(file)
-## CAT (save an original copy)
-fasta_orig = fasta.dup if opt[:cat]
-## FRACTION the proteins
-if f = opt[:fraction]
-  prefix = nil
-  f = f.to_f
-  if f > 1.0
-    prefix = proc {|cnt| "f#{cnt}_" }
-  end
-  fasta = fasta.fraction_of_prots(f, prefix)
-end
-## PREFIX the proteins
-if pre = opt[:prefix]
-  fasta.header_prefix!(pre)
-end
-## MODIFY the proteins
-fasta.aaseq!((method + '!').to_sym, opt[:tryptic_peptides])
-## CAT (finish it up)
-if opt[:cat]
-  fasta_orig << fasta
-  fasta = fasta_orig
-end
-## WRITE out the file
-fasta.write_file(opt[:out])
+FastaShaker.shake_from_argv(ARGV)

data/bin/filter_and_validate.rb ADDED Viewed

@@ -0,0 +1,5 @@
+#!/usr/bin/ruby
+require 'spec_id/precision/filter'
+SpecID::Precision::Filter.new.filter_and_validate_cmdline(ARGV)

data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} RENAMED Viewed

@@ -1,6 +1,6 @@
 #!/usr/bin/ruby
-require 'spec/mzxml/parser'
+require 'ms/msrun'
 require 'optparse'
 require 'ostruct'
 require 'lmat'
@@ -14,7 +14,8 @@ opt[:inc_mz] = 1.0
 # get options:
 opts = OptionParser.new do |op|
-  op.banner = "usage: #{File.basename(__FILE__)} [options] file.mzXML ..."
+  op.banner = "usage: #{File.basename(__FILE__)} [options] <msfile> ..."
+  op.separator "input: .mzdata or .mzXML (versions 1.x and 2.x)"
   op.separator ""
   op.separator "(sums m/z values that round to the same bin)"
   op.separator ""
@@ -32,10 +33,10 @@ if ARGV.size < 1
 end
 ARGV.each do |file|
-  parser = Spec::MzXML::Parser.new
-  (start_mz, end_mz) = parser.start_and_end_mz(file)
-  (times, spectra) = parser.times_and_spectra(file)
-  times.map! do |tm| tm.to_f end
+  msrun = MS::MSRun.new(file)
+  mslevel = 1
+  (start_mz, end_mz) = msrun.start_and_end_mz(mslevel)
+  (times, spectra) = msrun.times_and_spectra(mslevel)
   args = {
     :start_mz => start_mz,
     :end_mz => end_mz,
@@ -45,7 +46,7 @@ ARGV.each do |file|
     :inc_tm => nil,
   }
   args.merge!(opt)
-  lmat = LMat.new.from_raw_spectra(times, spectra, args)
+  lmat = LMat.new.from_times_and_spectra(times, spectra, args)
   outfile = file.sub(/\.mzXML$/, opt[:newext])
   if args[:ascii]
     outfile << "a"

data/bin/prob_validate.rb ADDED Viewed

@@ -0,0 +1,6 @@
+#!/usr/bin/ruby
+require 'spec_id/precision/prob'
+SpecID::Precision::Prob.new.precision_vs_num_hits_cmdline(ARGV)

data/bin/raw_to_mzXML.rb CHANGED Viewed

@@ -21,11 +21,11 @@ if ARGV.size == 0
   exit
 end
-converter = Spec::MzXML.find_mzxml_converter
+converter = MS::MzXML.find_mzxml_converter
 if converter
   $stderr.puts "using #{converter} to convert files"
 else
-  puts "cannot find [#{Spec::MzXML::Potential_mzxml_converters.join(', ')}] in the paths:"
+  puts "cannot find [#{MS::MzXML::Potential_mzxml_converters.join(', ')}] in the paths:"
   puts ENV['PATH'].split(/[:;]/).join(", ")
   abort
 end

data/bin/srf_group.rb CHANGED Viewed

@@ -18,6 +18,7 @@ end
 if ARGV.size == 0
   puts opts
+  exit
 end
 obj = SRFGroup.new

data/bin/srf_to_sqt.rb ADDED Viewed

@@ -0,0 +1,40 @@
+#!/usr/bin/ruby
+require 'spec_id/srf'
+require 'optparse'
+opt = {}
+opt['db-info'] = false
+opt['db-path'] = nil
+opt['filter'] = true
+opts = OptionParser.new do |op|
+  op.banner = "usage: #{File.basename(__FILE__)} [OPTIONS] <file>.srf ..."
+  op.separator "outputs: <file>.sqt ..."
+  op.separator ""
+  op.separator "OPTIONS"
+  op.on("-d", "--db-info", "calculates num aa's and md5sum on db") {|v| opt['db-info'] = v }
+  op.on("-p", "--db-path <path_to_dir>", "if your database path has changed",
+                                         "and you want db-info, then give the",
+                                         "path to the new *directory*",
+                                         "e.g. /my/new/path") {|v| opt['db-path'] = v }
+  op.on("-u", "--db-update", "update the sqt file to reflect --db-path") {|v| opt['db-update'] = v }
+  op.on("-n", "--no-filter", "by default, pephit must be within",
+                             "peptide_mass_tolerance (defined in params)",
+                             "to be displayed.  Turns this off.") {|v| opt['filter'] = false}
+  op.on("-r", "--round", "round floating point values reasonably") {|v| opt['round'] = v }
+end
+opts.parse!
+if ARGV.size == 0
+  puts opts.to_s
+  exit
+end
+ARGV.each do |file|
+  abort "file #{file} must be named .srf" if file !~ /\.srf$/i
+  new_filename = file.sub(/\.srf$/i, '.sqt')
+  SRFGroup.new([file], opt['filter']).srfs.first.to_sqt(new_filename, :db_info => opt['db-info'], :new_db_path => opt['db-path'], :update_db_path => opt['db-update'], :round => opt['round'])
+end

data/changelog.txt CHANGED Viewed

@@ -54,3 +54,71 @@ a prefix option
 in protein_summary.rb added handling for proteins with no annotation. (either
 dispaly NA or use gi2annnot to grab them from NCBI)
+## version 0.2.5
+renamed prep_list in roc (potential breaks in code)
+## version 0.2.6
+1. Massive refactorization of filtering and validation.  Validation objects are
+created and then can be used to validate just about anything.
+2. Massive redo of the parsing of MS runs.  Can parse mzXML v1, v2.X
+(including readw broken output), and mzData (even Thermo's broken output).
+4. Moved all tests to specs (rspec).
+5. Can read gradient programs off of .meth or .RAW files (both Xcal 1.X and
+2.X)
+Bugfixes:
+1. The search_summary 'base_name' in pepxml output was incorrect (this did not
+appear to influence our analyses, however). Fixed.
+2. Enzymes with no exceptions (e.g., cuts at KR) would report one too many
+missed cleavages if the last amino acid was a cut point. Fixed.
+## version 0.2.7
+1. In conversion from bioworks to pepxml, the default was trypsin (KR/P).
+Now, the sample enzyme is set explicitly from the params file and the option
+is not available.  This can give more accuract pepxml files than from
+previous depending on your enzyme.
+## version 0.2.9
+1. Added support for phobius transmembrane predictions
+2. have filter_and_validate.rb working well (multiple validators allowed).
+3. Can read bioworks 3.3.1 .srf files (.srf version 3.5 files)
+4. Added a bias validator
+## version 0.2.10
+1. Fixed --hits_separate flag in spec_id/filter
+## version 0.2.11
+1. Added prob precision support and reorganized filter_and_validate libs
+## version 0.2.12
+1. Fixed bug in transmem for prob and others.
+2. Can use axml (XMLParser based) or libxml depending on availability
+## version 0.2.13
+1. Fixed issue with --hits_separate
+2. filter_and_validate.rb requires decoy validator if decoy proteins
+(refactored code)
+## version 0.2.14
+1. Can read PeptideProphet files (should be able to read pepxml files, too)
+2. API change: Some slight modifications to the Sequest::PepXML object
+interfaces and implementations (using ArrayClass)
+## version 0.2.15
+1. can convert srf files to sqt files
+## version 0.3.0
+1. IMPORTANT BUG FIX: protein reporting in srf files is correct now (proteins after the first protein were being assigned to the last hit in an out file).
+2. SQT export is correct and works at least on 3.2 and 3.3.1.

data/lib/align/chams.rb CHANGED Viewed

@@ -1,5 +1,5 @@
-require 'spec/msrun'
+require 'ms/msrun'
 module Align; end
 class Align::CHAMS
@@ -8,7 +8,9 @@ class Align::CHAMS
   # Scan1	Scan2	Edge_cost	Path_cost	Edge_direction
   attr_accessor :avg_score, :time_mscans, :time_nscans, :mscans, :nscans, :edge_costs, :path_costs, :directions
-  def initialize(chams_file, timeIndex_file1, timeIndex_file2)
+  # requires an object that will respond to [<scan_num>] to give time
+  # (seconds) for each file
+  def initialize(chams_file, time_by_scan_num1, time_by_scan_num2)
     @time_mscans = []
     @time_nscans = []
     @mscans = []
@@ -17,13 +19,11 @@ class Align::CHAMS
     @path_costs = []
     @directions = []
     read_chams_file(chams_file)
-    scans_by_num1 = Spec::MSRunIndex.new(timeIndex_file1).scans_by_num
-    scans_by_num2 = Spec::MSRunIndex.new(timeIndex_file2).scans_by_num
     @mscans.each_with_index do |scan,i|
-      @time_mscans[i] = scans_by_num1[scan].time
+      @time_mscans[i] = time_by_scan_num1[scan]
     end
     @nscans.each_with_index do |scan,i|
-      @time_nscans[i] = scans_by_num2[scan].time
+      @time_nscans[i] = time_by_scan_num2[scan]
     end
   end

data/lib/align.rb CHANGED Viewed

@@ -1,6 +1,7 @@
-require 'spec/mzxml/parser'
-require 'spec/msrun'
+#require 'ms/parser'
+#require 'ms/parser/mzxml'
+require 'ms/msrun'
 require 'spec_id/proph'
 require 'vec'
@@ -18,7 +19,7 @@ class Align
     ## Create scan indices on msrun name
     if mztimes.class != Array ; mztimes = [mztimes] end
-    msrun_indices = mztimes.collect do |file| Spec::MSRunIndex.new(file) end
+    msrun_indices = mztimes.collect do |file| MS::MSRunIndex.new(file) end
     scanindex_by_basename_noext = {}
     msrun_indices.each do |runindex|
       scanindex_by_basename_noext[runindex.basename_noext] = runindex.scans_by_num