RubyGems - mspire - Versions diffs - 0.1.7 → 0.2.0 - Mend

mspire 0.1.7 → 0.2.0

Files changed (57) hide show

data/Rakefile +41 -14
data/bin/bioworks2excel.rb +1 -1
data/bin/bioworks_to_pepxml.rb +46 -59
data/bin/fasta_shaker.rb +1 -1
data/bin/filter.rb +6 -0
data/bin/find_aa_freq.rb +23 -0
data/bin/id_precision.rb +3 -2
data/bin/mzxml_to_lmat.rb +2 -1
data/bin/pepproph_filter.rb +1 -1
data/bin/precision.rb +1 -1
data/bin/protein_summary.rb +2 -451
data/bin/raw_to_mzXML.rb +55 -0
data/bin/srf_group.rb +26 -0
data/changelog.txt +7 -0
data/lib/align.rb +3 -3
data/lib/fasta.rb +6 -1
data/lib/gi.rb +9 -4
data/lib/roc.rb +2 -0
data/lib/sample_enzyme.rb +2 -1
data/lib/spec/mzxml/parser.rb +2 -43
data/lib/spec/mzxml.rb +65 -2
data/lib/spec_id/aa_freqs.rb +10 -7
data/lib/spec_id/bioworks.rb +67 -87
data/lib/spec_id/filter.rb +794 -0
data/lib/spec_id/precision.rb +29 -36
data/lib/spec_id/proph.rb +5 -3
data/lib/spec_id/protein_summary.rb +459 -0
data/lib/spec_id/sequest.rb +323 -271
data/lib/spec_id/srf.rb +189 -135
data/lib/spec_id.rb +276 -227
data/lib/spec_id_xml.rb +101 -0
data/lib/toppred.rb +18 -0
data/script/degenerate_peptides.rb +47 -0
data/script/filter-peps.rb +5 -1
data/test/tc_align.rb +1 -1
data/test/tc_bioworks.rb +25 -22
data/test/tc_bioworks_to_pepxml.rb +37 -4
data/test/tc_fasta.rb +3 -1
data/test/tc_fasta_shaker.rb +8 -6
data/test/tc_filter.rb +203 -0
data/test/tc_gi.rb +6 -9
data/test/tc_id_precision.rb +31 -0
data/test/tc_mzxml.rb +8 -6
data/test/tc_peptide_parent_times.rb +2 -1
data/test/tc_precision.rb +1 -1
data/test/tc_proph.rb +5 -5
data/test/tc_protein_summary.rb +36 -13
data/test/tc_sequest.rb +78 -33
data/test/tc_spec_id.rb +128 -6
data/test/tc_srf.rb +84 -38
metadata +67 -62
data/bin/fasta_cat.rb +0 -39
data/bin/fasta_cat_mod.rb +0 -59
data/bin/fasta_mod.rb +0 -57
data/bin/filter_spec_id.rb +0 -365
data/bin/raw2mzXML.rb +0 -21
data/script/gen_database_searching.rb +0 -258

data/Rakefile CHANGED Viewed

@@ -37,17 +37,11 @@ end
 # DOC
 ###############################################
-task :tutorial => [] do
-  sys "ruby ./script/gen_database_searching.rb"
-end
-tutorial_files = %w(cat_db_search two_db_search).map {|f| "doc/src/tutorial/database_searching/#{f}.page"}
-tutorial_files << 'doc/src/tutorial/database_searching/index.page'
 def move_and_add_webgen_header(file, newfile, src_dir, heading)
   string = IO.read file
   with_header = heading + string
-  sys.write_to_file(newfile, with_header)
-  sys.mv newfile, src_dir
+  File.open(newfile, 'w') {|v| v.print with_header }
+  FileUtils.mv newfile, src_dir
 end
 desc "copy top level files into doc/src"
@@ -64,13 +58,13 @@ end
 desc "create and upload docs to server"
 task :upload_docs => :html_docs do
-  sys "scp -i ~/.ssh/id_dsa_rubyforge -r doc/output/* jtprince@rubyforge.org:/var/www/gforge-projects/mspire/"
+  sh "scp -i ~/.ssh/id_dsa_rubyforge -r doc/output/* jtprince@rubyforge.org:/var/www/gforge-projects/mspire/"
 end
 desc "creates docs in doc/html"
-task :html_docs => [:cp_top_level_docs, :tutorial] do
-  sys.cd 'doc' do
-    sys "webgen"
+task :html_docs => [:cp_top_level_docs] do
+  FileUtils.cd 'doc' do
+    sh "webgen"
   end
 end
@@ -89,11 +83,42 @@ end
 desc "Run unit tests."
 Rake::TestTask.new do |t|
+  reply = `#{gemcmd} list -l #{NAME}`
+  if reply.include? NAME + " ("
+    puts "GOING to uninstall gem '#{NAME}' for testing"
+    if WIN32
+      %x( #{gemcmd} uninstall -x #{NAME} )
+    else
+      %x( sudo #{gemcmd} uninstall -x #{NAME} )
+    end
+  end
   #  t.libs << "lib"  ## done by default
   t.test_files = FL["test/tc_*.rb"]
   #t.verbose = true
 end
+desc "Run unit tests individual on each test"
+task :test_ind do |t|
+  reply = `#{gemcmd} list -l #{NAME}`
+  if reply.include? NAME + " ("
+    %x( sudo #{gemcmd} uninstall -x #{NAME} )
+  end
+  #  t.libs << "lib"  ## done by default
+  test_files = FL["test/tc_*.rb"]
+  test_files.each do |file|
+    puts "TESTING: #{file.sub(/test\//,'')}"
+    puts `ruby -I lib #{file}`
+  end
+  #t.verbose = true
+end
 #desc "Run all tests"
 #task :test_indiv do
 #  sys.cd "test" do
@@ -115,7 +140,7 @@ tm = Time.now
 spec = Gem::Specification.new do |s|
   s.platform = Gem::Platform::RUBY
   s.name = NAME
-  s.version = "0.1.7"
+  s.version = "0.2.0"
   s.summary = "Mass Spectrometry Proteomics Objects, Scripts, and Executables"
   s.date = "#{tm.year}-#{tm.month}-#{tm.day}"
   s.email = "jprince@icmb.utexas.edu"
@@ -131,7 +156,9 @@ spec = Gem::Specification.new do |s|
   s.add_dependency('libjtp', '~> 0.1.2')
   s.requirements << '"xmlparser" is the prefered xml parser right now.  REXML and regular expressions are used as fallback in some routines.'
   s.requirements << 'some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)'
-  s.requirements << 'the "t2x" binary to convert .RAW files to mzXML is expected in some applications'
+  s.requirements << 'the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications'
+  s.requirements << '"rake" is useful for development'
+  s.requirements << '"webgen (with gems redcloth and bluecloth) is necessary to build web pages'
   s.test_files = FL["test/tc_*.rb"]
 end

data/bin/bioworks2excel.rb CHANGED Viewed

@@ -9,6 +9,6 @@ end
 ARGV.each do |file|
   newfile = file.gsub(".xml", ".txt")
-  obj = SpecID::Bioworks.new(file)
+  obj = Bioworks.new(file)
   obj.to_excel(newfile)
 end

data/bin/bioworks_to_pepxml.rb CHANGED Viewed

@@ -4,11 +4,10 @@
 # GLOBAL CONSTANTS
 DEFAULT_DATABASE_PATH = "/project/marcotte/marcotte/ms/database"
-DEFAULT_MZXML_PATH = "."
+DEFAULT_MZ_PATH = "."
 DEFAULT_OUTDIR = "pepxml"
 DEFAULT_PARAMS_GLOB = "*.params"
 DEFAULT_PARAMS_FILE = Dir[DEFAULT_PARAMS_GLOB].first
-DEFAULT_PEPXML_VERSION = 18
 DEFAULT_MS_MODEL = 'LCQ'
 DEFAULT_MASS_ANALYZER = 'Ion Trap'
 ##############################################################
@@ -17,6 +16,7 @@ require 'spec_id'
 require 'optparse'
 require 'ostruct'
 require 'fileutils'
+require 'spec_id/srf'
 # establish the default database path after examining env vars
 def_dbpath = nil
@@ -30,13 +30,16 @@ end
 opt = OpenStruct.new
 opt_obj = OptionParser.new do |op|
-  op.banner = "\nusage: #{File.basename(__FILE__)} [options] <file>.srf ...
-usage: #{File.basename(__FILE__)} [options] bioworks.xml"
-  op.on_head "
-  Takes .srf files or the xml exported output of Bioworks multi-consensus view
-  (no filtering) and outputs pepXML files (to feed the trans-proteomic pipeline).
-Options:"
+  progname = File.basename(__FILE__)
+  op.banner = "\nusage: #{progname} [options] <file>.srf ..."
+  op.separator "usage: #{progname} [options] <bioworks>.srg"
+  op.separator "usage: #{progname} [options] <bioworks>.xml"
+  op.separator ""
+  op.separator "Takes srf files or the xml exported output of Bioworks multi-consensus view"
+  op.separator "(no filtering) and outputs pepXML files (to feed the trans-proteomic pipeline)."
+  op.separator "Additionally, will group .srf files into an .srg file (like 'srf_group.rb')"
+  op.separator ""
+  op.separator "Options:"
   op.on('-h', '--help', "display this and more notes and exit") {|v| opt.help = v }
   op.on('-o', '--outdir path', "output directory     d: '#{DEFAULT_OUTDIR}'") {|v| opt.outdir = v }
@@ -45,19 +48,21 @@ Options:"
   op.separator ""
   op.on('-p', '--params file', "sequest params file  d: '#{DEFAULT_PARAMS_FILE}'") {|v| opt.params = v }
   op.on('-d', '--dbpath path', "path to databases    d: '#{DEFAULT_DATABASE_PATH}'") {|v| opt.dbpath = v }
-  op.on('-m', '--mspath path', "path to MS files     d: '#{DEFAULT_MZXML_PATH}'") {|v| opt.mspath = v }
-  op.on('--model <LCQ|Orbi|string>', "MS model             d: '#{DEFAULT_MS_MODEL}'") {|v| opt.model = v }
-  op.on('--mass_analyzer <string>',  "Mass Analyzer        d: '#{DEFAULT_MASS_ANALYZER}'") {|v| opt.mass_analyzer = v }
-  op.on('-v', '--version pepxml_version', "pepxml version       d: '#{DEFAULT_PEPXML_VERSION}'") {|v| opt.pepxml_version = v.to_i }
+  op.on('-m', '--mspath path', "path to MS files     d: '#{DEFAULT_MZ_PATH}'") {|v| opt.mspath = v }
+  op.on('--copy_mzxml', "copies mzXML files to outdir path"){|v| opt.copy_mzxml = v }
+  op.on('--model <LCQ|Orbi|string>', "MS model      (xml)  d: '#{DEFAULT_MS_MODEL}'") {|v| opt.model = v }
+  op.on('--mass_analyzer <string>',  "Mass Analyzer (xml)  d: '#{DEFAULT_MASS_ANALYZER}'") {|v| opt.mass_analyzer = v }
 end
 more_notes = "
 Notes:
-  mspath: Directory to RAW or mzXML (version 1) files.
-          This option is not used with Bioworks 3.3 files.
+  mspath: Directory to RAW or mzXML files.
+          This option is needed to view Pep3D files
+          and is critical with Bioworks 3.2 xml export files
   outdir: Path will be created if it does not already exist.
+  (xml) : only bioworks.xml files need to include this information
   model : LCQ -> 'LCQ Deca XP Plus'
         : Orbi -> 'LTQ Orbitrap'
         : other string -> That's the string that will be used.
@@ -93,55 +98,37 @@ end
 opt.outdir ||= DEFAULT_OUTDIR
-## Create dbpath if does not exist
-if opt.outdir
-  FileUtils.mkpath(opt.outdir) unless File.exist? opt.outdir
-end
 files = ARGV.to_a
+bioworks_file = files[0]
 if files[0] =~ /\.srf/i
-  opt.dbpath ||= def_dbpath
-  files.each do |file|
-    hash = {
-      :backup_db_path => opt.dbpath || def_dbpath,
-      :out_path => opt.outdir,
-    }
-    xml_obj = SpecID::Sequest::PepXML.new_from_srf(file, hash)
-    xml_obj.to_pepxml(xml_obj.base_name + ".xml")
-  end
-else
-  ## Ensure params file exists (unless opt given)
-  opt.params ||= DEFAULT_PARAMS_FILE
-  params_obj = SpecID::Sequest::Params.new(opt.params)
-  # Ensure the database exists!
-  unless File.exist?( params_obj.database )
-    if opt.dbpath
-      params_obj.database_path = opt.dbpath
-    else
-      params_obj.database_path = def_dbpath
-    end
+  srg_file = 'bioworks.srg'
+  if File.exist? srg_file
+    srg_file = 'bioworks.tmp.srg'
   end
+  srg = SRFGroup.new(files)
+  srg.to_srg(srg_file)
+unless File.exist? srg_file
+  abort "couldn't create #{srg_file} from: #{files.join(', ')}"
+end
+bioworks_file = srg_file
+end
-  opt.mspath ||= DEFAULT_MZXML_PATH
-  opt.pepxml_version ||= DEFAULT_PEPXML_VERSION
-  opt.model ||= DEFAULT_MS_MODEL
-  opt.mass_analyzer ||= DEFAULT_MASS_ANALYZER
-  case opt.model
-  when "LCQ"
-    model = 'LCQ Deca XP Plus'
-  when "Orbi"
-    model = 'LTQ Orbitrap'
-  else
-    model = opt.model
-  end
+case opt.model
+when "LCQ"
+  model = 'LCQ Deca XP Plus'
+when "Orbi"
+  model = 'LTQ Orbitrap'
+else
+  model = opt.model
+end
-  bioworks = files[0]
-  xml_objs = SpecID::Sequest::PepXML.set_from_bioworks(params_obj, bioworks, opt.mspath, opt.outdir, opt.pepxml_version, 'trypsin', 'ThermoFinnigan', model)
+opt.dbpath ||= def_dbpath
+opt.mspath ||= DEFAULT_MZ_PATH
+opt.params ||= DEFAULT_PARAMS_FILE
+opt.mass_analyzer ||= DEFAULT_MASS_ANALYZER
+opt.model ||= DEFAULT_MS_MODEL
+xml_objs = Sequest::PepXML.set_from_bioworks(bioworks_file, {:params => opt.params, :ms_data => opt.mspath, :out_path => opt.outdir, :model => model, :backup_db_path => opt.dbpath, :copy_mzxml => opt.copy_mzxml, :ms_mass_analyzer => opt.mass_analyzer, :print => true})
-  xml_objs.each do |obj|
-    obj.to_pepxml(obj.base_name + ".xml")
-  end
-end

data/bin/fasta_shaker.rb CHANGED Viewed

@@ -14,7 +14,7 @@
 # came out so nice and clean that I feel like I have room to spare.
 require 'fasta'
-require 'cmdparse'
+require 'optparse'
 opt = {}

data/bin/filter.rb ADDED Viewed

@@ -0,0 +1,6 @@
+#!/usr/bin/ruby -w
+require 'spec_id/filter'
+SpecID::Filter.run_from_argv(ARGV)

data/bin/find_aa_freq.rb ADDED Viewed

@@ -0,0 +1,23 @@
+#!/usr/bin/ruby -w
+require 'spec_id/aa_freqs'
+if ARGV.size < 1
+  puts "usage: #{File.basename(__FILE__)} <file>.fasta ..."
+  puts "prints the amino acid frequencies of every amino acid in each fasta file"
+  exit
+end
+ARGV.each do |file|
+  obj = SpecID::AAFreqs.new(file)
+  puts file
+  obj.aafreqs.sort_by{|v| v.to_s }.each do |k,v|
+    puts "#{k}: #{v}"
+  end
+  puts ""
+end

data/bin/id_precision.rb CHANGED Viewed

@@ -35,8 +35,9 @@ file = ARGV[1]
 obj = SpecID.new(file)
 re_prefix = /^#{Regexp.escape(fp_prefix)}/o
-prc = proc {|it| it.prot.reference =~ re_prefix }
+prc = proc {|it| it.prots.first.reference =~ re_prefix }
 #(match, nomatch) = obj.classify(:peps, prc)
+obj.peps = obj.pep_prots
 (fp, tp) = obj.classify(:peps, prc)
@@ -126,7 +127,7 @@ end
 files = ARGV.to_a
 two_lists = files.collect do |file|
-  obj = SpecID::Bioworks.new(file)
+  obj = Bioworks.new(file)
   list = []
   list.push( obj.pep_probs_by_pep_prots )
   list.push( obj.pep_probs_by_seq_charge )

data/bin/mzxml_to_lmat.rb CHANGED Viewed

@@ -23,6 +23,7 @@ opts = OptionParser.new do |op|
   op.on("--mz_end N", Float, "m/z end (def: end of 1st full scan)") {|n| opt[:end_mz] = n.to_f}
   op.on("--baseline N", Float, "value for missing indices (def: #{opt[:baseline]})") {|n| opt[:baseline] = n.to_f}
   op.on("--ascii", "generates an lmata file instead") {opt[:ascii] = true}
+  op.on("-v", "--verbose") {$VERBOSE = true}
 end
 opts.parse!
@@ -52,7 +53,7 @@ ARGV.each do |file|
   else
     lmat.write(outfile)
   end
-  puts "OUTPUT: #{outfile}"
+  puts("OUTPUT: #{outfile}") if $VERBOSE
 end

data/bin/pepproph_filter.rb CHANGED Viewed

@@ -12,5 +12,5 @@ files = ARGV.to_a
 cutoff = files.shift
 files.each do |file|
   outfile = file.gsub(/\.xml/, "_min#{cutoff}.xml")
-  SpecID::Proph::Pep::Parser.new.filter_by_min_pep_prob(file, outfile, cutoff.to_f)
+  Proph::Pep::Parser.new.filter_by_min_pep_prob(file, outfile, cutoff.to_f)
 end

data/bin/precision.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-#!/usr/bin/ruby -w
+#!/usr/bin/ruby
 require 'spec_id'