RubyGems - mspire - Versions diffs - 0.1.7 → 0.2.0 - Mend

mspire 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

data/Rakefile +41 -14
data/bin/bioworks2excel.rb +1 -1
data/bin/bioworks_to_pepxml.rb +46 -59
data/bin/fasta_shaker.rb +1 -1
data/bin/filter.rb +6 -0
data/bin/find_aa_freq.rb +23 -0
data/bin/id_precision.rb +3 -2
data/bin/mzxml_to_lmat.rb +2 -1
data/bin/pepproph_filter.rb +1 -1
data/bin/precision.rb +1 -1
data/bin/protein_summary.rb +2 -451
data/bin/raw_to_mzXML.rb +55 -0
data/bin/srf_group.rb +26 -0
data/changelog.txt +7 -0
data/lib/align.rb +3 -3
data/lib/fasta.rb +6 -1
data/lib/gi.rb +9 -4
data/lib/roc.rb +2 -0
data/lib/sample_enzyme.rb +2 -1
data/lib/spec/mzxml/parser.rb +2 -43
data/lib/spec/mzxml.rb +65 -2
data/lib/spec_id/aa_freqs.rb +10 -7
data/lib/spec_id/bioworks.rb +67 -87
data/lib/spec_id/filter.rb +794 -0
data/lib/spec_id/precision.rb +29 -36
data/lib/spec_id/proph.rb +5 -3
data/lib/spec_id/protein_summary.rb +459 -0
data/lib/spec_id/sequest.rb +323 -271
data/lib/spec_id/srf.rb +189 -135
data/lib/spec_id.rb +276 -227
data/lib/spec_id_xml.rb +101 -0
data/lib/toppred.rb +18 -0
data/script/degenerate_peptides.rb +47 -0
data/script/filter-peps.rb +5 -1
data/test/tc_align.rb +1 -1
data/test/tc_bioworks.rb +25 -22
data/test/tc_bioworks_to_pepxml.rb +37 -4
data/test/tc_fasta.rb +3 -1
data/test/tc_fasta_shaker.rb +8 -6
data/test/tc_filter.rb +203 -0
data/test/tc_gi.rb +6 -9
data/test/tc_id_precision.rb +31 -0
data/test/tc_mzxml.rb +8 -6
data/test/tc_peptide_parent_times.rb +2 -1
data/test/tc_precision.rb +1 -1
data/test/tc_proph.rb +5 -5
data/test/tc_protein_summary.rb +36 -13
data/test/tc_sequest.rb +78 -33
data/test/tc_spec_id.rb +128 -6
data/test/tc_srf.rb +84 -38
metadata +67 -62
data/bin/fasta_cat.rb +0 -39
data/bin/fasta_cat_mod.rb +0 -59
data/bin/fasta_mod.rb +0 -57
data/bin/filter_spec_id.rb +0 -365
data/bin/raw2mzXML.rb +0 -21
data/script/gen_database_searching.rb +0 -258

data/Rakefile CHANGED Viewed

@@ -37,17 +37,11 @@ end
 # DOC
 ###############################################
-task :tutorial => [] do
-  sys "ruby ./script/gen_database_searching.rb"
-end
-tutorial_files = %w(cat_db_search two_db_search).map {|f| "doc/src/tutorial/database_searching/#{f}.page"}
-tutorial_files << 'doc/src/tutorial/database_searching/index.page'
 def move_and_add_webgen_header(file, newfile, src_dir, heading)
   string = IO.read file
   with_header = heading + string
-  sys.write_to_file(newfile, with_header)
-  sys.mv newfile, src_dir
+  File.open(newfile, 'w') {|v| v.print with_header }
+  FileUtils.mv newfile, src_dir
 end
 desc "copy top level files into doc/src"
@@ -64,13 +58,13 @@ end
 desc "create and upload docs to server"
 task :upload_docs => :html_docs do
-  sys "scp -i ~/.ssh/id_dsa_rubyforge -r doc/output/* jtprince@rubyforge.org:/var/www/gforge-projects/mspire/"
+  sh "scp -i ~/.ssh/id_dsa_rubyforge -r doc/output/* jtprince@rubyforge.org:/var/www/gforge-projects/mspire/"
 end
 desc "creates docs in doc/html"
-task :html_docs => [:cp_top_level_docs, :tutorial] do
-  sys.cd 'doc' do
-    sys "webgen"
+task :html_docs => [:cp_top_level_docs] do
+  FileUtils.cd 'doc' do
+    sh "webgen"
   end
 end
@@ -89,11 +83,42 @@ end
 desc "Run unit tests."
 Rake::TestTask.new do |t|
+  reply = `#{gemcmd} list -l #{NAME}`
+  if reply.include? NAME + " ("
+    puts "GOING to uninstall gem '#{NAME}' for testing"
+    if WIN32
+      %x( #{gemcmd} uninstall -x #{NAME} )
+    else
+      %x( sudo #{gemcmd} uninstall -x #{NAME} )
+    end
+  end
   #  t.libs << "lib"  ## done by default
   t.test_files = FL["test/tc_*.rb"]
   #t.verbose = true
 end
+desc "Run unit tests individual on each test"
+task :test_ind do |t|
+  reply = `#{gemcmd} list -l #{NAME}`
+  if reply.include? NAME + " ("
+    %x( sudo #{gemcmd} uninstall -x #{NAME} )
+  end
+  #  t.libs << "lib"  ## done by default
+  test_files = FL["test/tc_*.rb"]
+  test_files.each do |file|
+    puts "TESTING: #{file.sub(/test\//,'')}"
+    puts `ruby -I lib #{file}`
+  end
+  #t.verbose = true
+end
 #desc "Run all tests"
 #task :test_indiv do
 #  sys.cd "test" do
@@ -115,7 +140,7 @@ tm = Time.now
 spec = Gem::Specification.new do |s|
   s.platform = Gem::Platform::RUBY
   s.name = NAME
-  s.version = "0.1.7"
+  s.version = "0.2.0"
   s.summary = "Mass Spectrometry Proteomics Objects, Scripts, and Executables"
   s.date = "#{tm.year}-#{tm.month}-#{tm.day}"
   s.email = "jprince@icmb.utexas.edu"
@@ -131,7 +156,9 @@ spec = Gem::Specification.new do |s|
   s.add_dependency('libjtp', '~> 0.1.2')
   s.requirements << '"xmlparser" is the prefered xml parser right now.  REXML and regular expressions are used as fallback in some routines.'
   s.requirements << 'some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)'
-  s.requirements << 'the "t2x" binary to convert .RAW files to mzXML is expected in some applications'
+  s.requirements << 'the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications'
+  s.requirements << '"rake" is useful for development'
+  s.requirements << '"webgen (with gems redcloth and bluecloth) is necessary to build web pages'
   s.test_files = FL["test/tc_*.rb"]
 end

data/bin/bioworks2excel.rb CHANGED Viewed

@@ -9,6 +9,6 @@ end
 ARGV.each do |file|
   newfile = file.gsub(".xml", ".txt")
-  obj = SpecID::Bioworks.new(file)
+  obj = Bioworks.new(file)
   obj.to_excel(newfile)
 end

data/bin/bioworks_to_pepxml.rb CHANGED Viewed

@@ -4,11 +4,10 @@
 # GLOBAL CONSTANTS
 DEFAULT_DATABASE_PATH = "/project/marcotte/marcotte/ms/database"
-DEFAULT_MZXML_PATH = "."
+DEFAULT_MZ_PATH = "."
 DEFAULT_OUTDIR = "pepxml"
 DEFAULT_PARAMS_GLOB = "*.params"
 DEFAULT_PARAMS_FILE = Dir[DEFAULT_PARAMS_GLOB].first
-DEFAULT_PEPXML_VERSION = 18
 DEFAULT_MS_MODEL = 'LCQ'
 DEFAULT_MASS_ANALYZER = 'Ion Trap'
 ##############################################################
@@ -17,6 +16,7 @@ require 'spec_id'
 require 'optparse'
 require 'ostruct'
 require 'fileutils'
+require 'spec_id/srf'
 # establish the default database path after examining env vars
 def_dbpath = nil
@@ -30,13 +30,16 @@ end
 opt = OpenStruct.new
 opt_obj = OptionParser.new do |op|
-  op.banner = "\nusage: #{File.basename(__FILE__)} [options] <file>.srf ...
-usage: #{File.basename(__FILE__)} [options] bioworks.xml"
-  op.on_head "
-  Takes .srf files or the xml exported output of Bioworks multi-consensus view
-  (no filtering) and outputs pepXML files (to feed the trans-proteomic pipeline).
-Options:"
+  progname = File.basename(__FILE__)
+  op.banner = "\nusage: #{progname} [options] <file>.srf ..."
+  op.separator "usage: #{progname} [options] <bioworks>.srg"
+  op.separator "usage: #{progname} [options] <bioworks>.xml"
+  op.separator ""
+  op.separator "Takes srf files or the xml exported output of Bioworks multi-consensus view"
+  op.separator "(no filtering) and outputs pepXML files (to feed the trans-proteomic pipeline)."
+  op.separator "Additionally, will group .srf files into an .srg file (like 'srf_group.rb')"
+  op.separator ""
+  op.separator "Options:"
   op.on('-h', '--help', "display this and more notes and exit") {|v| opt.help = v }
   op.on('-o', '--outdir path', "output directory     d: '#{DEFAULT_OUTDIR}'") {|v| opt.outdir = v }
@@ -45,19 +48,21 @@ Options:"
   op.separator ""
   op.on('-p', '--params file', "sequest params file  d: '#{DEFAULT_PARAMS_FILE}'") {|v| opt.params = v }
   op.on('-d', '--dbpath path', "path to databases    d: '#{DEFAULT_DATABASE_PATH}'") {|v| opt.dbpath = v }
-  op.on('-m', '--mspath path', "path to MS files     d: '#{DEFAULT_MZXML_PATH}'") {|v| opt.mspath = v }
-  op.on('--model <LCQ|Orbi|string>', "MS model             d: '#{DEFAULT_MS_MODEL}'") {|v| opt.model = v }
-  op.on('--mass_analyzer <string>',  "Mass Analyzer        d: '#{DEFAULT_MASS_ANALYZER}'") {|v| opt.mass_analyzer = v }
-  op.on('-v', '--version pepxml_version', "pepxml version       d: '#{DEFAULT_PEPXML_VERSION}'") {|v| opt.pepxml_version = v.to_i }
+  op.on('-m', '--mspath path', "path to MS files     d: '#{DEFAULT_MZ_PATH}'") {|v| opt.mspath = v }
+  op.on('--copy_mzxml', "copies mzXML files to outdir path"){|v| opt.copy_mzxml = v }
+  op.on('--model <LCQ|Orbi|string>', "MS model      (xml)  d: '#{DEFAULT_MS_MODEL}'") {|v| opt.model = v }
+  op.on('--mass_analyzer <string>',  "Mass Analyzer (xml)  d: '#{DEFAULT_MASS_ANALYZER}'") {|v| opt.mass_analyzer = v }
 end
 more_notes = "
 Notes:
-  mspath: Directory to RAW or mzXML (version 1) files.
-          This option is not used with Bioworks 3.3 files.
+  mspath: Directory to RAW or mzXML files.
+          This option is needed to view Pep3D files
+          and is critical with Bioworks 3.2 xml export files
   outdir: Path will be created if it does not already exist.
+  (xml) : only bioworks.xml files need to include this information
   model : LCQ -> 'LCQ Deca XP Plus'
         : Orbi -> 'LTQ Orbitrap'
         : other string -> That's the string that will be used.
@@ -93,55 +98,37 @@ end
 opt.outdir ||= DEFAULT_OUTDIR
-## Create dbpath if does not exist
-if opt.outdir
-  FileUtils.mkpath(opt.outdir) unless File.exist? opt.outdir
-end
 files = ARGV.to_a
+bioworks_file = files[0]
 if files[0] =~ /\.srf/i
-  opt.dbpath ||= def_dbpath
-  files.each do |file|
-    hash = {
-      :backup_db_path => opt.dbpath || def_dbpath,
-      :out_path => opt.outdir,
-    }
-    xml_obj = SpecID::Sequest::PepXML.new_from_srf(file, hash)
-    xml_obj.to_pepxml(xml_obj.base_name + ".xml")
-  end
-else
-  ## Ensure params file exists (unless opt given)
-  opt.params ||= DEFAULT_PARAMS_FILE
-  params_obj = SpecID::Sequest::Params.new(opt.params)
-  # Ensure the database exists!
-  unless File.exist?( params_obj.database )
-    if opt.dbpath
-      params_obj.database_path = opt.dbpath
-    else
-      params_obj.database_path = def_dbpath
-    end
+  srg_file = 'bioworks.srg'
+  if File.exist? srg_file
+    srg_file = 'bioworks.tmp.srg'
   end
+  srg = SRFGroup.new(files)
+  srg.to_srg(srg_file)
+unless File.exist? srg_file
+  abort "couldn't create #{srg_file} from: #{files.join(', ')}"
+end
+bioworks_file = srg_file
+end
-  opt.mspath ||= DEFAULT_MZXML_PATH
-  opt.pepxml_version ||= DEFAULT_PEPXML_VERSION
-  opt.model ||= DEFAULT_MS_MODEL
-  opt.mass_analyzer ||= DEFAULT_MASS_ANALYZER
-  case opt.model
-  when "LCQ"
-    model = 'LCQ Deca XP Plus'
-  when "Orbi"
-    model = 'LTQ Orbitrap'
-  else
-    model = opt.model
-  end
+case opt.model
+when "LCQ"
+  model = 'LCQ Deca XP Plus'
+when "Orbi"
+  model = 'LTQ Orbitrap'
+else
+  model = opt.model
+end
-  bioworks = files[0]
-  xml_objs = SpecID::Sequest::PepXML.set_from_bioworks(params_obj, bioworks, opt.mspath, opt.outdir, opt.pepxml_version, 'trypsin', 'ThermoFinnigan', model)
+opt.dbpath ||= def_dbpath
+opt.mspath ||= DEFAULT_MZ_PATH
+opt.params ||= DEFAULT_PARAMS_FILE
+opt.mass_analyzer ||= DEFAULT_MASS_ANALYZER
+opt.model ||= DEFAULT_MS_MODEL
+xml_objs = Sequest::PepXML.set_from_bioworks(bioworks_file, {:params => opt.params, :ms_data => opt.mspath, :out_path => opt.outdir, :model => model, :backup_db_path => opt.dbpath, :copy_mzxml => opt.copy_mzxml, :ms_mass_analyzer => opt.mass_analyzer, :print => true})
-  xml_objs.each do |obj|
-    obj.to_pepxml(obj.base_name + ".xml")
-  end
-end

data/bin/fasta_shaker.rb CHANGED Viewed

@@ -14,7 +14,7 @@
 # came out so nice and clean that I feel like I have room to spare.
 require 'fasta'
-require 'cmdparse'
+require 'optparse'
 opt = {}

data/bin/filter.rb ADDED Viewed

@@ -0,0 +1,6 @@
+#!/usr/bin/ruby -w
+require 'spec_id/filter'
+SpecID::Filter.run_from_argv(ARGV)

data/bin/find_aa_freq.rb ADDED Viewed

@@ -0,0 +1,23 @@
+#!/usr/bin/ruby -w
+require 'spec_id/aa_freqs'
+if ARGV.size < 1
+  puts "usage: #{File.basename(__FILE__)} <file>.fasta ..."
+  puts "prints the amino acid frequencies of every amino acid in each fasta file"
+  exit
+end
+ARGV.each do |file|
+  obj = SpecID::AAFreqs.new(file)
+  puts file
+  obj.aafreqs.sort_by{|v| v.to_s }.each do |k,v|
+    puts "#{k}: #{v}"
+  end
+  puts ""
+end

data/bin/id_precision.rb CHANGED Viewed

@@ -35,8 +35,9 @@ file = ARGV[1]
 obj = SpecID.new(file)
 re_prefix = /^#{Regexp.escape(fp_prefix)}/o
-prc = proc {|it| it.prot.reference =~ re_prefix }
+prc = proc {|it| it.prots.first.reference =~ re_prefix }
 #(match, nomatch) = obj.classify(:peps, prc)
+obj.peps = obj.pep_prots
 (fp, tp) = obj.classify(:peps, prc)
@@ -126,7 +127,7 @@ end
 files = ARGV.to_a
 two_lists = files.collect do |file|
-  obj = SpecID::Bioworks.new(file)
+  obj = Bioworks.new(file)
   list = []
   list.push( obj.pep_probs_by_pep_prots )
   list.push( obj.pep_probs_by_seq_charge )

data/bin/mzxml_to_lmat.rb CHANGED Viewed

@@ -23,6 +23,7 @@ opts = OptionParser.new do |op|
   op.on("--mz_end N", Float, "m/z end (def: end of 1st full scan)") {|n| opt[:end_mz] = n.to_f}
   op.on("--baseline N", Float, "value for missing indices (def: #{opt[:baseline]})") {|n| opt[:baseline] = n.to_f}
   op.on("--ascii", "generates an lmata file instead") {opt[:ascii] = true}
+  op.on("-v", "--verbose") {$VERBOSE = true}
 end
 opts.parse!
@@ -52,7 +53,7 @@ ARGV.each do |file|
   else
     lmat.write(outfile)
   end
-  puts "OUTPUT: #{outfile}"
+  puts("OUTPUT: #{outfile}") if $VERBOSE
 end

data/bin/pepproph_filter.rb CHANGED Viewed

@@ -12,5 +12,5 @@ files = ARGV.to_a
 cutoff = files.shift
 files.each do |file|
   outfile = file.gsub(/\.xml/, "_min#{cutoff}.xml")
-  SpecID::Proph::Pep::Parser.new.filter_by_min_pep_prob(file, outfile, cutoff.to_f)
+  Proph::Pep::Parser.new.filter_by_min_pep_prob(file, outfile, cutoff.to_f)
 end

data/bin/precision.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-#!/usr/bin/ruby -w
+#!/usr/bin/ruby
 require 'spec_id'