RubyGems - mspire - Versions diffs - 0.2.4 → 0.3.0 - Mend

mspire 0.2.4 → 0.3.0

Files changed (233) hide show

data/INSTALL +1 -0
data/README +25 -0
data/Rakefile +129 -40
data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
data/bin/bioworks_to_pepxml.rb +1 -0
data/bin/fasta_shaker.rb +1 -96
data/bin/filter_and_validate.rb +5 -0
data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
data/bin/prob_validate.rb +6 -0
data/bin/raw_to_mzXML.rb +2 -2
data/bin/srf_group.rb +1 -0
data/bin/srf_to_sqt.rb +40 -0
data/changelog.txt +68 -0
data/lib/align/chams.rb +6 -6
data/lib/align.rb +4 -3
data/lib/bsearch.rb +120 -0
data/lib/fasta.rb +318 -86
data/lib/group_by.rb +10 -0
data/lib/index_by.rb +11 -0
data/lib/merge_deep.rb +21 -0
data/lib/{spec → ms/converter}/mzxml.rb +77 -109
data/lib/ms/gradient_program.rb +171 -0
data/lib/ms/msrun.rb +209 -0
data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
data/lib/ms/parser/mzdata/axml.rb +12 -0
data/lib/ms/parser/mzdata/dom.rb +160 -0
data/lib/ms/parser/mzdata/libxml.rb +7 -0
data/lib/ms/parser/mzdata.rb +25 -0
data/lib/ms/parser/mzxml/axml.rb +11 -0
data/lib/ms/parser/mzxml/dom.rb +159 -0
data/lib/ms/parser/mzxml/hpricot.rb +253 -0
data/lib/ms/parser/mzxml/libxml.rb +15 -0
data/lib/ms/parser/mzxml/regexp.rb +122 -0
data/lib/ms/parser/mzxml/rexml.rb +72 -0
data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
data/lib/ms/parser/mzxml.rb +175 -0
data/lib/ms/parser.rb +108 -0
data/lib/ms/precursor.rb +10 -0
data/lib/ms/scan.rb +81 -0
data/lib/ms/spectrum.rb +193 -0
data/lib/ms.rb +10 -0
data/lib/mspire.rb +4 -0
data/lib/roc.rb +61 -1
data/lib/sample_enzyme.rb +31 -8
data/lib/scan_i.rb +21 -0
data/lib/spec_id/aa_freqs.rb +7 -3
data/lib/spec_id/bioworks.rb +20 -14
data/lib/spec_id/digestor.rb +139 -0
data/lib/spec_id/mass.rb +116 -0
data/lib/spec_id/parser/proph.rb +236 -0
data/lib/spec_id/precision/filter/cmdline.rb +209 -0
data/lib/spec_id/precision/filter/interactive.rb +134 -0
data/lib/spec_id/precision/filter/output.rb +147 -0
data/lib/spec_id/precision/filter.rb +623 -0
data/lib/spec_id/precision/output.rb +60 -0
data/lib/spec_id/precision/prob/cmdline.rb +139 -0
data/lib/spec_id/precision/prob/output.rb +88 -0
data/lib/spec_id/precision/prob.rb +171 -0
data/lib/spec_id/proph/pep_summary.rb +92 -0
data/lib/spec_id/proph/prot_summary.rb +484 -0
data/lib/spec_id/proph.rb +2 -466
data/lib/spec_id/protein_summary.rb +2 -2
data/lib/spec_id/sequest/params.rb +316 -0
data/lib/spec_id/sequest/pepxml.rb +1513 -0
data/lib/spec_id/sequest.rb +2 -1672
data/lib/spec_id/srf.rb +445 -177
data/lib/spec_id.rb +183 -95
data/lib/spec_id_xml.rb +8 -10
data/lib/transmem/phobius.rb +147 -0
data/lib/transmem/toppred.rb +368 -0
data/lib/transmem.rb +157 -0
data/lib/validator/aa.rb +135 -0
data/lib/validator/background.rb +73 -0
data/lib/validator/bias.rb +95 -0
data/lib/validator/cmdline.rb +260 -0
data/lib/validator/decoy.rb +94 -0
data/lib/validator/digestion_based.rb +69 -0
data/lib/validator/probability.rb +48 -0
data/lib/validator/prot_from_pep.rb +234 -0
data/lib/validator/transmem.rb +272 -0
data/lib/validator/true_pos.rb +46 -0
data/lib/validator.rb +214 -0
data/lib/xml.rb +38 -0
data/lib/xml_style_parser.rb +105 -0
data/lib/xmlparser_wrapper.rb +19 -0
data/script/compile_and_plot_smriti_final.rb +97 -0
data/script/extract_gradient_programs.rb +56 -0
data/script/get_apex_values_rexml.rb +44 -0
data/script/mzXML2timeIndex.rb +1 -1
data/script/smriti_final_analysis.rb +103 -0
data/script/toppred_to_yaml.rb +47 -0
data/script/tpp_installer.rb +1 -1
data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
data/specs/bin/fasta_shaker_spec.rb +259 -0
data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
data/specs/bin/filter_and_validate_spec.rb +124 -0
data/specs/bin/ms_to_lmat_spec.rb +34 -0
data/specs/bin/prob_validate_spec.rb +62 -0
data/specs/bin/protein_summary_spec.rb +10 -0
data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
data/specs/gi_spec.rb +22 -0
data/specs/load_bin_path.rb +7 -0
data/specs/merge_deep_spec.rb +13 -0
data/specs/ms/gradient_program_spec.rb +77 -0
data/specs/ms/msrun_spec.rb +455 -0
data/specs/ms/parser_spec.rb +92 -0
data/specs/ms/spectrum_spec.rb +89 -0
data/specs/roc_spec.rb +251 -0
data/specs/rspec_autotest.rb +149 -0
data/specs/sample_enzyme_spec.rb +41 -0
data/specs/spec_helper.rb +133 -0
data/specs/spec_id/aa_freqs_spec.rb +52 -0
data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
data/specs/spec_id/digestor_spec.rb +75 -0
data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
data/specs/spec_id/precision/filter/output_spec.rb +31 -0
data/specs/spec_id/precision/filter_spec.rb +243 -0
data/specs/spec_id/precision/prob_spec.rb +111 -0
data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
data/specs/spec_id/sequest/params_spec.rb +68 -0
data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
data/specs/spec_id/sqt_spec.rb +138 -0
data/specs/spec_id/srf_spec.rb +209 -0
data/specs/spec_id/srf_spec_helper.rb +302 -0
data/specs/spec_id_helper.rb +33 -0
data/specs/spec_id_spec.rb +361 -0
data/specs/spec_id_xml_spec.rb +33 -0
data/specs/transmem/phobius_spec.rb +423 -0
data/specs/transmem/toppred_spec.rb +297 -0
data/specs/transmem_spec.rb +60 -0
data/specs/transmem_spec_shared.rb +64 -0
data/specs/validator/aa_spec.rb +107 -0
data/specs/validator/background_spec.rb +51 -0
data/specs/validator/bias_spec.rb +146 -0
data/specs/validator/decoy_spec.rb +51 -0
data/specs/validator/fasta_helper.rb +26 -0
data/specs/validator/prot_from_pep_spec.rb +141 -0
data/specs/validator/transmem_spec.rb +145 -0
data/specs/validator/true_pos_spec.rb +58 -0
data/specs/validator_helper.rb +33 -0
data/specs/xml_spec.rb +12 -0
data/test_files/000_pepxml18_small.xml +206 -0
data/test_files/020a.mzXML.timeIndex +4710 -0
data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
data/test_files/4-03-03_small-prot.xml +321 -0
data/test_files/4-03-03_small.xml +3876 -0
data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
data/test_files/bioworks-3.3_10prots.xml +5999 -0
data/test_files/bioworks31.params +77 -0
data/test_files/bioworks32.params +62 -0
data/test_files/bioworks33.params +63 -0
data/test_files/bioworks_single_run_small.xml +7237 -0
data/test_files/bioworks_small.fasta +212 -0
data/test_files/bioworks_small.params +63 -0
data/test_files/bioworks_small.phobius +109 -0
data/test_files/bioworks_small.toppred.out +2847 -0
data/test_files/bioworks_small.xml +5610 -0
data/test_files/bioworks_with_INV_small.xml +3753 -0
data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
data/test_files/corrupted_900.srf +0 -0
data/test_files/head_of_7MIX.srf +0 -0
data/test_files/interact-opd1_mods_small-prot.xml +304 -0
data/test_files/messups.fasta +297 -0
data/test_files/opd1/000.my_answer.100lines.xml +101 -0
data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
data/test_files/opd1/000_020-prot.png +0 -0
data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
data/test_files/opd1/000_020_3prots-prot.xml +62 -0
data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
data/test_files/opd1/sequest.3.1.params +77 -0
data/test_files/opd1/sequest.3.2.params +62 -0
data/test_files/opd1/twenty_scans.mzXML +418 -0
data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
data/test_files/opd1/twenty_scans_answ.lmat +0 -0
data/test_files/opd1/twenty_scans_answ.lmata +9 -0
data/test_files/opd1_020_beginning.RAW +0 -0
data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
data/test_files/pepproph_small.xml +4691 -0
data/test_files/phobius.small.noheader.txt +50 -0
data/test_files/phobius.small.small.txt +53 -0
data/test_files/s01_anC1_ld020mM.key.txt +25 -0
data/test_files/s01_anC1_ld020mM.meth +0 -0
data/test_files/small.fasta +297 -0
data/test_files/smallraw.RAW +0 -0
data/test_files/tf_bioworks2excel.bioXML +14340 -0
data/test_files/tf_bioworks2excel.txt.actual +1035 -0
data/test_files/toppred.small.out +416 -0
data/test_files/toppred.xml.out +318 -0
data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
data/test_files/yeast_gly_small-prot.xml +265 -0
data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
data/test_files/yeast_gly_small.xml +3807 -0
data/test_files/yeast_gly_small2.parentTimes +6 -0
metadata +273 -57
data/bin/filter.rb +0 -6
data/bin/precision.rb +0 -5
data/lib/spec/mzdata/parser.rb +0 -108
data/lib/spec/mzdata.rb +0 -48
data/lib/spec/mzxml/parser.rb +0 -449
data/lib/spec/scan.rb +0 -55
data/lib/spec_id/filter.rb +0 -797
data/lib/spec_id/precision.rb +0 -421
data/lib/toppred.rb +0 -18
data/script/filter-peps.rb +0 -164
data/test/tc_aa_freqs.rb +0 -59
data/test/tc_fasta_shaker.rb +0 -149
data/test/tc_filter.rb +0 -203
data/test/tc_filter_peps.rb +0 -46
data/test/tc_gi.rb +0 -17
data/test/tc_id_class_anal.rb +0 -70
data/test/tc_id_precision.rb +0 -89
data/test/tc_msrun.rb +0 -88
data/test/tc_mzxml.rb +0 -88
data/test/tc_mzxml_to_lmat.rb +0 -36
data/test/tc_peptide_parent_times.rb +0 -27
data/test/tc_precision.rb +0 -60
data/test/tc_roc.rb +0 -166
data/test/tc_sample_enzyme.rb +0 -32
data/test/tc_scan.rb +0 -26
data/test/tc_sequest.rb +0 -336
data/test/tc_spec.rb +0 -78
data/test/tc_spec_id.rb +0 -201
data/test/tc_spec_id_xml.rb +0 -36
data/test/tc_srf.rb +0 -262

data/INSTALL CHANGED Viewed

@@ -5,6 +5,7 @@ Prerequisites
 Much of the package will work without any prerequisites at all.  Some functionality may require addition ruby packages or other converters.  These are listed in current order of importance:
 * [xmlparser](http://www.yoshidam.net/Ruby.html) (comes with one-click Windows; on Ubuntu: 'sudo apt-get libxml-parser-ruby1.8')
+* [libxml](http://libxml.rubyforge.org/) in Ubuntu: sudo apt-get install libxml2 libxml2-dev ; sudo gem install libxml-ruby --remote
 * ['t2x'](http://sashimi.sourceforge.net/software_glossolalia.html#ReAdW) to convert .RAW files to version 1 mzXML files
 * [gnuplot](http://rgplot.rubyforge.org/) ('gem install gnuplot').  Of course, you'll need [gnuplot](http://www.gnuplot.info/) before this package will work.  Under one-click installer for windows this package requires a little configuration.  It works with no configuration on cygwin (or linux).

data/README CHANGED Viewed

@@ -18,6 +18,31 @@ The project is currently focusing on the following:
 * ProteinProphet
 * Preparation of files for [obiwarp](http://obi-warp.sourceforge.net/)
+Features
+--------
+* mzXML (version 1 & 2) parsing
+* mzData parsing
+* bioworks .srf (binary files) reader
+* bioworks to PeptideProphet input (pepXML files)
+* lightweight APEX values parser
+* histogram protein probabilities
+* developed for Linux, should port easily to Windows or others
+* protein summary views with custom false ID cutoff values
+* conversion to OBI-Warp input files
+Validation by:
+  * Various Decoy Database search options: Reverse/Shuffle, concatenated/separate, with various hashing options (e.g., by amino acid sequence + charge)
+  * Amino acid (e.g., search for unblocked cysteines)
+  * Transmembrane prediction (Phobius or TopPred)
+  * Generic sample bias (e.g., low abundance/high abundance proteins)
+  * Defined sample
+Working with:
+  * Bioworks (3.2-3.3.1)
+  * Peptide/Protein Prophet
+  * Easily extensible to others
 Tutorials
 ---------

data/Rakefile CHANGED Viewed

@@ -2,9 +2,9 @@ require 'rake'
 require 'rubygems'
 require 'rake/rdoctask'
 require 'rake/gempackagetask'
-require 'rake/testtask'
 require 'rake/clean'
 require 'fileutils'
+require 'spec/rake/spectask'
 ###############################################
 # GLOBAL
@@ -13,23 +13,25 @@ FL = FileList
 NAME = "mspire"
-lib_files = FL["lib/**/*"]
-test_dir_too = FL["test/**/*"]
+$dependencies = %w(libjtp)
+$tfiles_large = 'test_files_large'
+changelog = "changelog.txt"
-little_dist_files = lib_files + FL["INSTALL", "README", "Rakefile", "LICENSE", "changelog.txt", "release_notes.txt", "{bin,script,tutorial}/**/*"]
-dist_files = lib_files + FL["INSTALL", "README", "Rakefile", "LICENSE", "{bin,script,tutorial}/**/*", test_dir_too]
+core_files = FL["INSTALL", "README", "Rakefile", "LICENSE", changelog, "release_notes.txt", "{lib,bin,script,specs,tutorial,test_files}/**/*"]
+big_dist_files = core_files + FL["test_files_large/**/*"]
-dist_files = little_dist_files # comment out to include test files
+dist_files = core_files
+# dist_files = big_dist_files
 ###############################################
 # ENVIRONMENT
 ###############################################
 ENV["OS"] == "Windows_NT" ? WIN32 = true : WIN32 = false
-gemcmd = "gem"
+$gemcmd = "gem"
 if WIN32
   unless ENV["TERM"] == "cygwin"
-    gemcmd << ".cmd"
+    $gemcmd << ".cmd"
   end
 end
@@ -81,40 +83,123 @@ end
 # TESTS
 ###############################################
-desc "Run unit tests."
-Rake::TestTask.new do |t|
-  reply = `#{gemcmd} list -l #{NAME}`
+namespace :spec do
+  task :autotest do
+    require './specs/rspec_autotest'
+    RspecAutotest.run
+  end
+end
+task :ensure_dependencies do
+  $dependencies.each do |dep|
+    unless `#{$gemcmd} list -l #{dep}`.include?(dep)
+      abort "ABORTING: install #{dep} before testing!"
+    end
+  end
+end
+task :ensure_large_testfiles do
+  if !File.exist?($tfiles_large) and !ENV['SPEC_LARGE'].nil?
+    warn "Not running with large files since #{$tfiles_large} does not exist!"
+    warn "Removing SPEC_LARGE from ENV!"
+    ENV.delete('SPEC_LARGE')
+  end
+end
+task :ensure_gem_is_uninstalled do
+  reply = `#{$gemcmd} list -l #{NAME}`
   if reply.include? NAME + " ("
     puts "GOING to uninstall gem '#{NAME}' for testing"
     if WIN32
-      %x( #{gemcmd} uninstall -x #{NAME} )
+      %x( #{$gemcmd} uninstall -x #{NAME} )
     else
-      %x( sudo #{gemcmd} uninstall -x #{NAME} )
+      %x( sudo #{$gemcmd} uninstall -x #{NAME} )
     end
   end
-  #  t.libs << "lib"  ## done by default
-  t.test_files = FL["test/tc_*.rb"]
-  #t.verbose = true
 end
+desc "Run all specs"
+Spec::Rake::SpecTask.new('spec') do |t|
+  Rake::Task[:ensure_gem_is_uninstalled].invoke
+  Rake::Task[:ensure_dependencies].invoke
+  Rake::Task[:ensure_large_testfiles].invoke
+  t.libs = ['lib']
+  #t.ruby_opts = ['-I', 'lib']
+  t.spec_files = FileList['specs/**/*_spec.rb']
+end
+desc "Run all specs"
+Spec::Rake::SpecTask.new('specl') do |t|
+  Rake::Task[:ensure_gem_is_uninstalled].invoke
+  Rake::Task[:ensure_dependencies].invoke
+  Rake::Task[:ensure_large_testfiles].invoke
+  t.spec_files = FileList['specs/**/*_spec.rb']
+  t.libs = ['lib']
+  #t.ruby_opts = ['-I', 'lib']
+  t.spec_opts = ['--format', 'specdoc' ]
+end
-desc "Run unit tests individual on each test"
-task :test_ind do |t|
-  reply = `#{gemcmd} list -l #{NAME}`
-  if reply.include? NAME + " ("
-    %x( sudo #{gemcmd} uninstall -x #{NAME} )
-  end
+desc "Run all specs with RCov"
+Spec::Rake::SpecTask.new('rcov') do |t|
+  Rake::Task[:ensure_gem_is_uninstalled].invoke
+  Rake::Task[:ensure_dependencies].invoke
+  Rake::Task[:ensure_large_testfiles].invoke
+  t.spec_files = FileList['specs/**/*_spec.rb']
+  t.rcov = true
+  t.libs = ['lib']
+  #t.ruby_opts = ['-I', 'lib']
+  t.rcov_opts = ['--exclude', 'specs']
+end
-  #  t.libs << "lib"  ## done by default
-  test_files = FL["test/tc_*.rb"]
-  test_files.each do |file|
-    puts "TESTING: #{file.sub(/test\//,'')}"
-    puts `ruby -I lib #{file}`
+task :speci => [:ensure_gem_is_uninstalled, :ensure_dependencies, :ensure_large_testfiles] do
+  # files that match a key word
+  files_to_run = ENV['SPEC'] || FileList['specs/**/*_spec.rb']
+  if ENV['SPECM']
+    files_to_run = files_to_run.select do |file|
+      file.include?(ENV['SPECM'])
+    end
+  end
+  files_to_run.each do |spc|
+    puts "------ SPEC=#{spc} ------"
+    system "ruby -I lib -S spec #{spc} --format specdoc"
   end
-  #t.verbose = true
 end
+#Spec::Rake::SpecTask.new(:spec) do |t|
+#  uninstall_gem
+#  t.spec_files = FileList['spec/**/spec_*.rb']
+#  t.libs = FileList['lib']
+#  t.spec_opts = ['--format', 'specdoc']
+#end
+#desc "Run unit tests."
+#Rake::TestTask.new do |t|
+#  uninstall_gem
+#  #  t.libs << "lib"  ## done by default
+#  t.test_files = FL["test/tc_*.rb"]
+#  #t.verbose = true
+#end
+#desc "Run unit tests individual on each test"
+#task :test_ind do |t|
+#  reply = `#{$gemcmd} list -l #{NAME}`
+#  if reply.include? NAME + " ("
+#    %x( sudo #{$gemcmd} uninstall -x #{NAME} )
+#  end
+#
+#  #  t.libs << "lib"  ## done by default
+#  test_files = FL["test/tc_*.rb"]
+#  test_files.each do |file|
+#    puts "TESTING: #{file.sub(/test\//,'')}"
+#    puts `ruby -I lib #{file}`
+#  end
+#  #t.verbose = true
+#end
@@ -140,7 +225,7 @@ tm = Time.now
 spec = Gem::Specification.new do |s|
   s.platform = Gem::Platform::RUBY
   s.name = NAME
-  s.version = "0.2.4"
+  s.version = IO.readlines(changelog).grep(/##.*version/).pop.split(/\s+/).last.chomp
   s.summary = "Mass Spectrometry Proteomics Objects, Scripts, and Executables"
   s.date = "#{tm.year}-#{tm.month}-#{tm.day}"
   s.email = "jprince@icmb.utexas.edu"
@@ -149,17 +234,19 @@ spec = Gem::Specification.new do |s|
   s.description = "mspire is for working with mass spectrometry proteomics data"
   s.has_rdoc = true
   s.authors = ["John Prince"]
-  s.files = little_dist_files
+  s.files = dist_files
   s.rdoc_options = rdoc_options
   s.extra_rdoc_files = rdoc_extra_includes
   s.executables = FL["bin/*"].map {|file| File.basename(file) }
-  s.add_dependency('libjtp', '~> 0.1.4')
-  s.requirements << '"xmlparser" is the prefered xml parser right now.  REXML and regular expressions are used as fallback in some routines.'
+  s.add_dependency('libjtp', '~> 0.2.5')
+  s.add_dependency('axml')
+  s.requirements << '"libxml" is the prefered xml parser right now.  libxml, xmlparser, REXML and regular expressions are used as fallback in some routines.'
   s.requirements << 'some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)'
   s.requirements << 'the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications'
   s.requirements << '"rake" is useful for development'
   s.requirements << '"webgen (with gems redcloth and bluecloth) is necessary to build web pages'
-  s.test_files = FL["test/tc_*.rb"]
+  #s.test_files = FL["test/tc_*.rb"]
+  s.test_files = FL["specs/**/*_spec.rb"]
 end
 desc "Create packages."
@@ -180,20 +267,22 @@ end
 #  t.package_task
 #end
+task :remove_pkg do
+  FileUtils.rm_rf "pkg"
+end
 task :install => [:reinstall]
 desc "uninstalls the package, packages a fresh one, and installs"
-task :reinstall => [:clean, :package] do
-  reply = `#{gemcmd} list -l #{NAME}`
-  if reply.include? NAME + " ("
-    %x( #{gemcmd} uninstall -x #{NAME} )
+task :reinstall => [:remove_pkg, :clean, :package] do
+  reply = `#{$gemcmd} list -l #{NAME}`
+  if reply.include?(NAME + " (")
+    %x( #{$gemcmd} uninstall -x #{NAME} )
   end
   FileUtils.cd("pkg") do
-    %x( #{gemcmd} install #{NAME} )
+    %x( #{$gemcmd} install #{NAME}*.gem )
   end
 end
 ###############################################

data/bin/{find_aa_freq.rb → aafreqs.rb} RENAMED Viewed

@@ -1,6 +1,6 @@
 #!/usr/bin/ruby -w
+require 'fasta'
 require 'spec_id/aa_freqs'
 if ARGV.size < 1
@@ -10,7 +10,7 @@ if ARGV.size < 1
 end
 ARGV.each do |file|
-  obj = SpecID::AAFreqs.new(file)
+  obj = SpecID::AAFreqs.new(Fasta.new(file))
   puts file
   obj.aafreqs.sort_by{|v| v.to_s }.each do |k,v|
     puts "#{k}: #{v}"

data/bin/bioworks_to_pepxml.rb CHANGED Viewed

@@ -12,6 +12,7 @@ DEFAULT_MS_MODEL = 'LCQ'
 DEFAULT_MASS_ANALYZER = 'Ion Trap'
 ##############################################################
+require 'spec_id/sequest/pepxml'
 require 'spec_id'
 require 'optparse'
 require 'ostruct'

data/bin/fasta_shaker.rb CHANGED Viewed

@@ -1,100 +1,5 @@
 #!/usr/bin/ruby
-# This is my second attempt at writing a simple interface for messing with
-# fasta files.  Acheiving simplicity (and power) is challenging.  It usually
-# only happens on the second (or sometimes more) try.  Of course, in
-# retrospect the simple solution seems sooo obvious.  But its deceptive.
-# It takes work to acheive simplicity for complex tasks.  That's my thought
-# for the day.
-# fasta_shaker as in a salt shaker.  Shake up your fasta proteins and let them
-# season your dinner (hopefully a protein dinner).  Mmmm.  Don't they taste
-# good all mixed up?  If you want, you can think of it as a pepper shaker.
-# I don't usually comment on my scripts (in my script, anyway), but this one
-# came out so nice and clean that I feel like I have room to spare.
 require 'fasta'
-require 'optparse'
-opt = {}
-opts = OptionParser.new do |op|
-  prog = File.basename(__FILE__)
-  op.banner = "usage: #{prog} <method> [OPTIONS] <file>.fasta"
-  op.separator "   <method> = reverse | shuffle"
-  op.on("-c", "--cat", "catenates the output to copy of original") {|v| opt[:cat] = v }
-  op.on("-o", "--out <string>", "name of output file (default is descriptive)") {|v| opt[:out] = v }
-  op.on("-p", "--prefix <string>", "give a header prefix to modified prots") {|v| opt[:prefix] = v }
-  op.on("-f", "--fraction <float>", "creates some fraction of proteins") {|v| opt[:fraction] = v }
-  op.separator "        [if fraction > 1 then the tag 'f<frac#>_' prefixed to proteins"
-  op.separator "         (after any given prefix) so that proteins are unique]"
-  op.on("--tryptic_peptides", "applies method to [KR][^P] peptides") {|v| opt[:tryptic_peptides] = v }
-  op.separator "EXAMPLES: "
-  op.separator "   #{prog} reverse file.fasta -o protein_aa_sequence_reversed.fasta"
-  op.separator "   #{prog} shuffle file.fasta -o protein_aa_sequence_shuffled.fasta"
-  op.separator "   #{prog} shuffle file.fasta -c -p SH_ -o normal_cat_shuffled_with_prefix.fasta"
-  op.separator "   #{prog} reverse file.fasta --tryptic_peptides tryptic_peptides_reversed.fasta"
-end
-opts.parse!
-if ARGV.size < 2
-  puts opts
-  exit
-end
-(method, file) = ARGV
-if opt[:cat] && !opt[:prefix]
-  puts "WARNING: concatenated proteins don't have unique headers"
-  puts "[you probably wanted to use the '--prefix' option!]"
-end
-# OUT filename:
-unless opt[:out]
-  filebase = file.sub(/\..*$/,'')
-  parts = [filebase]
-  parts << 'cat' if opt[:cat]
-  parts << method
-  parts << 'prefix' << opt[:prefix] if opt[:prefix]
-  parts << 'fraction' << opt[:fraction] if opt[:fraction]
-  parts << 'tryptic_peptides' if opt[:tryptic_peptides]
-  opt[:out] = parts.join("_") << ".fasta"
-end
-## READ the file
-fasta = Fasta.new.read_file(file)
-## CAT (save an original copy)
-fasta_orig = fasta.dup if opt[:cat]
-## FRACTION the proteins
-if f = opt[:fraction]
-  prefix = nil
-  f = f.to_f
-  if f > 1.0
-    prefix = proc {|cnt| "f#{cnt}_" }
-  end
-  fasta = fasta.fraction_of_prots(f, prefix)
-end
-## PREFIX the proteins
-if pre = opt[:prefix]
-  fasta.header_prefix!(pre)
-end
-## MODIFY the proteins
-fasta.aaseq!((method + '!').to_sym, opt[:tryptic_peptides])
-## CAT (finish it up)
-if opt[:cat]
-  fasta_orig << fasta
-  fasta = fasta_orig
-end
-## WRITE out the file
-fasta.write_file(opt[:out])
+FastaShaker.shake_from_argv(ARGV)

data/bin/filter_and_validate.rb ADDED Viewed

@@ -0,0 +1,5 @@
+#!/usr/bin/ruby
+require 'spec_id/precision/filter'
+SpecID::Precision::Filter.new.filter_and_validate_cmdline(ARGV)

data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} RENAMED Viewed

@@ -1,6 +1,6 @@
 #!/usr/bin/ruby
-require 'spec/mzxml/parser'
+require 'ms/msrun'
 require 'optparse'
 require 'ostruct'
 require 'lmat'
@@ -14,7 +14,8 @@ opt[:inc_mz] = 1.0
 # get options:
 opts = OptionParser.new do |op|
-  op.banner = "usage: #{File.basename(__FILE__)} [options] file.mzXML ..."
+  op.banner = "usage: #{File.basename(__FILE__)} [options] <msfile> ..."
+  op.separator "input: .mzdata or .mzXML (versions 1.x and 2.x)"
   op.separator ""
   op.separator "(sums m/z values that round to the same bin)"
   op.separator ""
@@ -32,10 +33,10 @@ if ARGV.size < 1
 end
 ARGV.each do |file|
-  parser = Spec::MzXML::Parser.new
-  (start_mz, end_mz) = parser.start_and_end_mz(file)
-  (times, spectra) = parser.times_and_spectra(file)
-  times.map! do |tm| tm.to_f end
+  msrun = MS::MSRun.new(file)
+  mslevel = 1
+  (start_mz, end_mz) = msrun.start_and_end_mz(mslevel)
+  (times, spectra) = msrun.times_and_spectra(mslevel)
   args = {
     :start_mz => start_mz,
     :end_mz => end_mz,
@@ -45,7 +46,7 @@ ARGV.each do |file|
     :inc_tm => nil,
   }
   args.merge!(opt)
-  lmat = LMat.new.from_raw_spectra(times, spectra, args)
+  lmat = LMat.new.from_times_and_spectra(times, spectra, args)
   outfile = file.sub(/\.mzXML$/, opt[:newext])
   if args[:ascii]
     outfile << "a"

data/bin/prob_validate.rb ADDED Viewed

@@ -0,0 +1,6 @@
+#!/usr/bin/ruby
+require 'spec_id/precision/prob'
+SpecID::Precision::Prob.new.precision_vs_num_hits_cmdline(ARGV)

data/bin/raw_to_mzXML.rb CHANGED Viewed

@@ -21,11 +21,11 @@ if ARGV.size == 0
   exit
 end
-converter = Spec::MzXML.find_mzxml_converter
+converter = MS::MzXML.find_mzxml_converter
 if converter
   $stderr.puts "using #{converter} to convert files"
 else
-  puts "cannot find [#{Spec::MzXML::Potential_mzxml_converters.join(', ')}] in the paths:"
+  puts "cannot find [#{MS::MzXML::Potential_mzxml_converters.join(', ')}] in the paths:"
   puts ENV['PATH'].split(/[:;]/).join(", ")
   abort
 end

data/bin/srf_group.rb CHANGED Viewed

@@ -18,6 +18,7 @@ end
 if ARGV.size == 0
   puts opts
+  exit
 end
 obj = SRFGroup.new

data/bin/srf_to_sqt.rb ADDED Viewed

@@ -0,0 +1,40 @@
+#!/usr/bin/ruby
+require 'spec_id/srf'
+require 'optparse'
+opt = {}
+opt['db-info'] = false
+opt['db-path'] = nil
+opt['filter'] = true
+opts = OptionParser.new do |op|
+  op.banner = "usage: #{File.basename(__FILE__)} [OPTIONS] <file>.srf ..."
+  op.separator "outputs: <file>.sqt ..."
+  op.separator ""
+  op.separator "OPTIONS"
+  op.on("-d", "--db-info", "calculates num aa's and md5sum on db") {|v| opt['db-info'] = v }
+  op.on("-p", "--db-path <path_to_dir>", "if your database path has changed",
+                                         "and you want db-info, then give the",
+                                         "path to the new *directory*",
+                                         "e.g. /my/new/path") {|v| opt['db-path'] = v }
+  op.on("-u", "--db-update", "update the sqt file to reflect --db-path") {|v| opt['db-update'] = v }
+  op.on("-n", "--no-filter", "by default, pephit must be within",
+                             "peptide_mass_tolerance (defined in params)",
+                             "to be displayed.  Turns this off.") {|v| opt['filter'] = false}
+  op.on("-r", "--round", "round floating point values reasonably") {|v| opt['round'] = v }
+end
+opts.parse!
+if ARGV.size == 0
+  puts opts.to_s
+  exit
+end
+ARGV.each do |file|
+  abort "file #{file} must be named .srf" if file !~ /\.srf$/i
+  new_filename = file.sub(/\.srf$/i, '.sqt')
+  SRFGroup.new([file], opt['filter']).srfs.first.to_sqt(new_filename, :db_info => opt['db-info'], :new_db_path => opt['db-path'], :update_db_path => opt['db-update'], :round => opt['round'])
+end

data/changelog.txt CHANGED Viewed

@@ -54,3 +54,71 @@ a prefix option
 in protein_summary.rb added handling for proteins with no annotation. (either
 dispaly NA or use gi2annnot to grab them from NCBI)
+## version 0.2.5
+renamed prep_list in roc (potential breaks in code)
+## version 0.2.6
+1. Massive refactorization of filtering and validation.  Validation objects are
+created and then can be used to validate just about anything.
+2. Massive redo of the parsing of MS runs.  Can parse mzXML v1, v2.X
+(including readw broken output), and mzData (even Thermo's broken output).
+4. Moved all tests to specs (rspec).
+5. Can read gradient programs off of .meth or .RAW files (both Xcal 1.X and
+2.X)
+Bugfixes:
+1. The search_summary 'base_name' in pepxml output was incorrect (this did not
+appear to influence our analyses, however). Fixed.
+2. Enzymes with no exceptions (e.g., cuts at KR) would report one too many
+missed cleavages if the last amino acid was a cut point. Fixed.
+## version 0.2.7
+1. In conversion from bioworks to pepxml, the default was trypsin (KR/P).
+Now, the sample enzyme is set explicitly from the params file and the option
+is not available.  This can give more accuract pepxml files than from
+previous depending on your enzyme.
+## version 0.2.9
+1. Added support for phobius transmembrane predictions
+2. have filter_and_validate.rb working well (multiple validators allowed).
+3. Can read bioworks 3.3.1 .srf files (.srf version 3.5 files)
+4. Added a bias validator
+## version 0.2.10
+1. Fixed --hits_separate flag in spec_id/filter
+## version 0.2.11
+1. Added prob precision support and reorganized filter_and_validate libs
+## version 0.2.12
+1. Fixed bug in transmem for prob and others.
+2. Can use axml (XMLParser based) or libxml depending on availability
+## version 0.2.13
+1. Fixed issue with --hits_separate
+2. filter_and_validate.rb requires decoy validator if decoy proteins
+(refactored code)
+## version 0.2.14
+1. Can read PeptideProphet files (should be able to read pepxml files, too)
+2. API change: Some slight modifications to the Sequest::PepXML object
+interfaces and implementations (using ArrayClass)
+## version 0.2.15
+1. can convert srf files to sqt files
+## version 0.3.0
+1. IMPORTANT BUG FIX: protein reporting in srf files is correct now (proteins after the first protein were being assigned to the last hit in an out file).
+2. SQT export is correct and works at least on 3.2 and 3.3.1.

data/lib/align/chams.rb CHANGED Viewed

@@ -1,5 +1,5 @@
-require 'spec/msrun'
+require 'ms/msrun'
 module Align; end
 class Align::CHAMS
@@ -8,7 +8,9 @@ class Align::CHAMS
   # Scan1	Scan2	Edge_cost	Path_cost	Edge_direction
   attr_accessor :avg_score, :time_mscans, :time_nscans, :mscans, :nscans, :edge_costs, :path_costs, :directions
-  def initialize(chams_file, timeIndex_file1, timeIndex_file2)
+  # requires an object that will respond to [<scan_num>] to give time
+  # (seconds) for each file
+  def initialize(chams_file, time_by_scan_num1, time_by_scan_num2)
     @time_mscans = []
     @time_nscans = []
     @mscans = []
@@ -17,13 +19,11 @@ class Align::CHAMS
     @path_costs = []
     @directions = []
     read_chams_file(chams_file)
-    scans_by_num1 = Spec::MSRunIndex.new(timeIndex_file1).scans_by_num
-    scans_by_num2 = Spec::MSRunIndex.new(timeIndex_file2).scans_by_num
     @mscans.each_with_index do |scan,i|
-      @time_mscans[i] = scans_by_num1[scan].time
+      @time_mscans[i] = time_by_scan_num1[scan]
     end
     @nscans.each_with_index do |scan,i|
-      @time_nscans[i] = scans_by_num2[scan].time
+      @time_nscans[i] = time_by_scan_num2[scan]
     end
   end

data/lib/align.rb CHANGED Viewed

@@ -1,6 +1,7 @@
-require 'spec/mzxml/parser'
-require 'spec/msrun'
+#require 'ms/parser'
+#require 'ms/parser/mzxml'
+require 'ms/msrun'
 require 'spec_id/proph'
 require 'vec'
@@ -18,7 +19,7 @@ class Align
     ## Create scan indices on msrun name
     if mztimes.class != Array ; mztimes = [mztimes] end
-    msrun_indices = mztimes.collect do |file| Spec::MSRunIndex.new(file) end
+    msrun_indices = mztimes.collect do |file| MS::MSRunIndex.new(file) end
     scanindex_by_basename_noext = {}
     msrun_indices.each do |runindex|
       scanindex_by_basename_noext[runindex.basename_noext] = runindex.scans_by_num