mspire 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. data/Rakefile +41 -14
  2. data/bin/bioworks2excel.rb +1 -1
  3. data/bin/bioworks_to_pepxml.rb +46 -59
  4. data/bin/fasta_shaker.rb +1 -1
  5. data/bin/filter.rb +6 -0
  6. data/bin/find_aa_freq.rb +23 -0
  7. data/bin/id_precision.rb +3 -2
  8. data/bin/mzxml_to_lmat.rb +2 -1
  9. data/bin/pepproph_filter.rb +1 -1
  10. data/bin/precision.rb +1 -1
  11. data/bin/protein_summary.rb +2 -451
  12. data/bin/raw_to_mzXML.rb +55 -0
  13. data/bin/srf_group.rb +26 -0
  14. data/changelog.txt +7 -0
  15. data/lib/align.rb +3 -3
  16. data/lib/fasta.rb +6 -1
  17. data/lib/gi.rb +9 -4
  18. data/lib/roc.rb +2 -0
  19. data/lib/sample_enzyme.rb +2 -1
  20. data/lib/spec/mzxml/parser.rb +2 -43
  21. data/lib/spec/mzxml.rb +65 -2
  22. data/lib/spec_id/aa_freqs.rb +10 -7
  23. data/lib/spec_id/bioworks.rb +67 -87
  24. data/lib/spec_id/filter.rb +794 -0
  25. data/lib/spec_id/precision.rb +29 -36
  26. data/lib/spec_id/proph.rb +5 -3
  27. data/lib/spec_id/protein_summary.rb +459 -0
  28. data/lib/spec_id/sequest.rb +323 -271
  29. data/lib/spec_id/srf.rb +189 -135
  30. data/lib/spec_id.rb +276 -227
  31. data/lib/spec_id_xml.rb +101 -0
  32. data/lib/toppred.rb +18 -0
  33. data/script/degenerate_peptides.rb +47 -0
  34. data/script/filter-peps.rb +5 -1
  35. data/test/tc_align.rb +1 -1
  36. data/test/tc_bioworks.rb +25 -22
  37. data/test/tc_bioworks_to_pepxml.rb +37 -4
  38. data/test/tc_fasta.rb +3 -1
  39. data/test/tc_fasta_shaker.rb +8 -6
  40. data/test/tc_filter.rb +203 -0
  41. data/test/tc_gi.rb +6 -9
  42. data/test/tc_id_precision.rb +31 -0
  43. data/test/tc_mzxml.rb +8 -6
  44. data/test/tc_peptide_parent_times.rb +2 -1
  45. data/test/tc_precision.rb +1 -1
  46. data/test/tc_proph.rb +5 -5
  47. data/test/tc_protein_summary.rb +36 -13
  48. data/test/tc_sequest.rb +78 -33
  49. data/test/tc_spec_id.rb +128 -6
  50. data/test/tc_srf.rb +84 -38
  51. metadata +67 -62
  52. data/bin/fasta_cat.rb +0 -39
  53. data/bin/fasta_cat_mod.rb +0 -59
  54. data/bin/fasta_mod.rb +0 -57
  55. data/bin/filter_spec_id.rb +0 -365
  56. data/bin/raw2mzXML.rb +0 -21
  57. data/script/gen_database_searching.rb +0 -258
data/Rakefile CHANGED
@@ -37,17 +37,11 @@ end
37
37
  # DOC
38
38
  ###############################################
39
39
 
40
- task :tutorial => [] do
41
- sys "ruby ./script/gen_database_searching.rb"
42
- end
43
- tutorial_files = %w(cat_db_search two_db_search).map {|f| "doc/src/tutorial/database_searching/#{f}.page"}
44
- tutorial_files << 'doc/src/tutorial/database_searching/index.page'
45
-
46
40
  def move_and_add_webgen_header(file, newfile, src_dir, heading)
47
41
  string = IO.read file
48
42
  with_header = heading + string
49
- sys.write_to_file(newfile, with_header)
50
- sys.mv newfile, src_dir
43
+ File.open(newfile, 'w') {|v| v.print with_header }
44
+ FileUtils.mv newfile, src_dir
51
45
  end
52
46
 
53
47
  desc "copy top level files into doc/src"
@@ -64,13 +58,13 @@ end
64
58
 
65
59
  desc "create and upload docs to server"
66
60
  task :upload_docs => :html_docs do
67
- sys "scp -i ~/.ssh/id_dsa_rubyforge -r doc/output/* jtprince@rubyforge.org:/var/www/gforge-projects/mspire/"
61
+ sh "scp -i ~/.ssh/id_dsa_rubyforge -r doc/output/* jtprince@rubyforge.org:/var/www/gforge-projects/mspire/"
68
62
  end
69
63
 
70
64
  desc "creates docs in doc/html"
71
- task :html_docs => [:cp_top_level_docs, :tutorial] do
72
- sys.cd 'doc' do
73
- sys "webgen"
65
+ task :html_docs => [:cp_top_level_docs] do
66
+ FileUtils.cd 'doc' do
67
+ sh "webgen"
74
68
  end
75
69
  end
76
70
 
@@ -89,11 +83,42 @@ end
89
83
 
90
84
  desc "Run unit tests."
91
85
  Rake::TestTask.new do |t|
86
+ reply = `#{gemcmd} list -l #{NAME}`
87
+ if reply.include? NAME + " ("
88
+ puts "GOING to uninstall gem '#{NAME}' for testing"
89
+ if WIN32
90
+ %x( #{gemcmd} uninstall -x #{NAME} )
91
+ else
92
+ %x( sudo #{gemcmd} uninstall -x #{NAME} )
93
+ end
94
+ end
92
95
  # t.libs << "lib" ## done by default
93
96
  t.test_files = FL["test/tc_*.rb"]
94
97
  #t.verbose = true
95
98
  end
96
99
 
100
+
101
+
102
+ desc "Run unit tests individual on each test"
103
+ task :test_ind do |t|
104
+ reply = `#{gemcmd} list -l #{NAME}`
105
+ if reply.include? NAME + " ("
106
+ %x( sudo #{gemcmd} uninstall -x #{NAME} )
107
+ end
108
+
109
+ # t.libs << "lib" ## done by default
110
+ test_files = FL["test/tc_*.rb"]
111
+ test_files.each do |file|
112
+ puts "TESTING: #{file.sub(/test\//,'')}"
113
+ puts `ruby -I lib #{file}`
114
+ end
115
+ #t.verbose = true
116
+ end
117
+
118
+
119
+
120
+
121
+
97
122
  #desc "Run all tests"
98
123
  #task :test_indiv do
99
124
  # sys.cd "test" do
@@ -115,7 +140,7 @@ tm = Time.now
115
140
  spec = Gem::Specification.new do |s|
116
141
  s.platform = Gem::Platform::RUBY
117
142
  s.name = NAME
118
- s.version = "0.1.7"
143
+ s.version = "0.2.0"
119
144
  s.summary = "Mass Spectrometry Proteomics Objects, Scripts, and Executables"
120
145
  s.date = "#{tm.year}-#{tm.month}-#{tm.day}"
121
146
  s.email = "jprince@icmb.utexas.edu"
@@ -131,7 +156,9 @@ spec = Gem::Specification.new do |s|
131
156
  s.add_dependency('libjtp', '~> 0.1.2')
132
157
  s.requirements << '"xmlparser" is the prefered xml parser right now. REXML and regular expressions are used as fallback in some routines.'
133
158
  s.requirements << 'some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)'
134
- s.requirements << 'the "t2x" binary to convert .RAW files to mzXML is expected in some applications'
159
+ s.requirements << 'the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications'
160
+ s.requirements << '"rake" is useful for development'
161
+ s.requirements << '"webgen (with gems redcloth and bluecloth) is necessary to build web pages'
135
162
  s.test_files = FL["test/tc_*.rb"]
136
163
  end
137
164
 
@@ -9,6 +9,6 @@ end
9
9
 
10
10
  ARGV.each do |file|
11
11
  newfile = file.gsub(".xml", ".txt")
12
- obj = SpecID::Bioworks.new(file)
12
+ obj = Bioworks.new(file)
13
13
  obj.to_excel(newfile)
14
14
  end
@@ -4,11 +4,10 @@
4
4
  # GLOBAL CONSTANTS
5
5
 
6
6
  DEFAULT_DATABASE_PATH = "/project/marcotte/marcotte/ms/database"
7
- DEFAULT_MZXML_PATH = "."
7
+ DEFAULT_MZ_PATH = "."
8
8
  DEFAULT_OUTDIR = "pepxml"
9
9
  DEFAULT_PARAMS_GLOB = "*.params"
10
10
  DEFAULT_PARAMS_FILE = Dir[DEFAULT_PARAMS_GLOB].first
11
- DEFAULT_PEPXML_VERSION = 18
12
11
  DEFAULT_MS_MODEL = 'LCQ'
13
12
  DEFAULT_MASS_ANALYZER = 'Ion Trap'
14
13
  ##############################################################
@@ -17,6 +16,7 @@ require 'spec_id'
17
16
  require 'optparse'
18
17
  require 'ostruct'
19
18
  require 'fileutils'
19
+ require 'spec_id/srf'
20
20
 
21
21
  # establish the default database path after examining env vars
22
22
  def_dbpath = nil
@@ -30,13 +30,16 @@ end
30
30
  opt = OpenStruct.new
31
31
 
32
32
  opt_obj = OptionParser.new do |op|
33
- op.banner = "\nusage: #{File.basename(__FILE__)} [options] <file>.srf ...
34
- usage: #{File.basename(__FILE__)} [options] bioworks.xml"
35
- op.on_head "
36
- Takes .srf files or the xml exported output of Bioworks multi-consensus view
37
- (no filtering) and outputs pepXML files (to feed the trans-proteomic pipeline).
38
-
39
- Options:"
33
+ progname = File.basename(__FILE__)
34
+ op.banner = "\nusage: #{progname} [options] <file>.srf ..."
35
+ op.separator "usage: #{progname} [options] <bioworks>.srg"
36
+ op.separator "usage: #{progname} [options] <bioworks>.xml"
37
+ op.separator ""
38
+ op.separator "Takes srf files or the xml exported output of Bioworks multi-consensus view"
39
+ op.separator "(no filtering) and outputs pepXML files (to feed the trans-proteomic pipeline)."
40
+ op.separator "Additionally, will group .srf files into an .srg file (like 'srf_group.rb')"
41
+ op.separator ""
42
+ op.separator "Options:"
40
43
  op.on('-h', '--help', "display this and more notes and exit") {|v| opt.help = v }
41
44
  op.on('-o', '--outdir path', "output directory d: '#{DEFAULT_OUTDIR}'") {|v| opt.outdir = v }
42
45
 
@@ -45,19 +48,21 @@ Options:"
45
48
  op.separator ""
46
49
  op.on('-p', '--params file', "sequest params file d: '#{DEFAULT_PARAMS_FILE}'") {|v| opt.params = v }
47
50
  op.on('-d', '--dbpath path', "path to databases d: '#{DEFAULT_DATABASE_PATH}'") {|v| opt.dbpath = v }
48
- op.on('-m', '--mspath path', "path to MS files d: '#{DEFAULT_MZXML_PATH}'") {|v| opt.mspath = v }
49
- op.on('--model <LCQ|Orbi|string>', "MS model d: '#{DEFAULT_MS_MODEL}'") {|v| opt.model = v }
50
- op.on('--mass_analyzer <string>', "Mass Analyzer d: '#{DEFAULT_MASS_ANALYZER}'") {|v| opt.mass_analyzer = v }
51
- op.on('-v', '--version pepxml_version', "pepxml version d: '#{DEFAULT_PEPXML_VERSION}'") {|v| opt.pepxml_version = v.to_i }
51
+ op.on('-m', '--mspath path', "path to MS files d: '#{DEFAULT_MZ_PATH}'") {|v| opt.mspath = v }
52
+ op.on('--copy_mzxml', "copies mzXML files to outdir path"){|v| opt.copy_mzxml = v }
53
+ op.on('--model <LCQ|Orbi|string>', "MS model (xml) d: '#{DEFAULT_MS_MODEL}'") {|v| opt.model = v }
54
+ op.on('--mass_analyzer <string>', "Mass Analyzer (xml) d: '#{DEFAULT_MASS_ANALYZER}'") {|v| opt.mass_analyzer = v }
52
55
 
53
56
  end
54
57
 
55
58
  more_notes = "
56
59
  Notes:
57
60
 
58
- mspath: Directory to RAW or mzXML (version 1) files.
59
- This option is not used with Bioworks 3.3 files.
61
+ mspath: Directory to RAW or mzXML files.
62
+ This option is needed to view Pep3D files
63
+ and is critical with Bioworks 3.2 xml export files
60
64
  outdir: Path will be created if it does not already exist.
65
+ (xml) : only bioworks.xml files need to include this information
61
66
  model : LCQ -> 'LCQ Deca XP Plus'
62
67
  : Orbi -> 'LTQ Orbitrap'
63
68
  : other string -> That's the string that will be used.
@@ -93,55 +98,37 @@ end
93
98
 
94
99
  opt.outdir ||= DEFAULT_OUTDIR
95
100
 
96
- ## Create dbpath if does not exist
97
- if opt.outdir
98
- FileUtils.mkpath(opt.outdir) unless File.exist? opt.outdir
99
- end
100
101
 
101
102
  files = ARGV.to_a
102
-
103
+ bioworks_file = files[0]
103
104
  if files[0] =~ /\.srf/i
104
- opt.dbpath ||= def_dbpath
105
- files.each do |file|
106
- hash = {
107
- :backup_db_path => opt.dbpath || def_dbpath,
108
- :out_path => opt.outdir,
109
- }
110
- xml_obj = SpecID::Sequest::PepXML.new_from_srf(file, hash)
111
- xml_obj.to_pepxml(xml_obj.base_name + ".xml")
112
- end
113
- else
114
- ## Ensure params file exists (unless opt given)
115
- opt.params ||= DEFAULT_PARAMS_FILE
116
- params_obj = SpecID::Sequest::Params.new(opt.params)
117
- # Ensure the database exists!
118
- unless File.exist?( params_obj.database )
119
- if opt.dbpath
120
- params_obj.database_path = opt.dbpath
121
- else
122
- params_obj.database_path = def_dbpath
123
- end
105
+ srg_file = 'bioworks.srg'
106
+ if File.exist? srg_file
107
+ srg_file = 'bioworks.tmp.srg'
124
108
  end
109
+ srg = SRFGroup.new(files)
110
+ srg.to_srg(srg_file)
111
+ unless File.exist? srg_file
112
+ abort "couldn't create #{srg_file} from: #{files.join(', ')}"
113
+ end
114
+ bioworks_file = srg_file
115
+ end
125
116
 
126
- opt.mspath ||= DEFAULT_MZXML_PATH
127
- opt.pepxml_version ||= DEFAULT_PEPXML_VERSION
128
- opt.model ||= DEFAULT_MS_MODEL
129
- opt.mass_analyzer ||= DEFAULT_MASS_ANALYZER
130
-
131
- case opt.model
132
- when "LCQ"
133
- model = 'LCQ Deca XP Plus'
134
- when "Orbi"
135
- model = 'LTQ Orbitrap'
136
- else
137
- model = opt.model
138
- end
139
117
 
118
+ case opt.model
119
+ when "LCQ"
120
+ model = 'LCQ Deca XP Plus'
121
+ when "Orbi"
122
+ model = 'LTQ Orbitrap'
123
+ else
124
+ model = opt.model
125
+ end
140
126
 
141
- bioworks = files[0]
142
- xml_objs = SpecID::Sequest::PepXML.set_from_bioworks(params_obj, bioworks, opt.mspath, opt.outdir, opt.pepxml_version, 'trypsin', 'ThermoFinnigan', model)
127
+ opt.dbpath ||= def_dbpath
128
+ opt.mspath ||= DEFAULT_MZ_PATH
129
+ opt.params ||= DEFAULT_PARAMS_FILE
130
+ opt.mass_analyzer ||= DEFAULT_MASS_ANALYZER
131
+ opt.model ||= DEFAULT_MS_MODEL
132
+
133
+ xml_objs = Sequest::PepXML.set_from_bioworks(bioworks_file, {:params => opt.params, :ms_data => opt.mspath, :out_path => opt.outdir, :model => model, :backup_db_path => opt.dbpath, :copy_mzxml => opt.copy_mzxml, :ms_mass_analyzer => opt.mass_analyzer, :print => true})
143
134
 
144
- xml_objs.each do |obj|
145
- obj.to_pepxml(obj.base_name + ".xml")
146
- end
147
- end
data/bin/fasta_shaker.rb CHANGED
@@ -14,7 +14,7 @@
14
14
  # came out so nice and clean that I feel like I have room to spare.
15
15
 
16
16
  require 'fasta'
17
- require 'cmdparse'
17
+ require 'optparse'
18
18
 
19
19
  opt = {}
20
20
 
data/bin/filter.rb ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+
4
+ require 'spec_id/filter'
5
+
6
+ SpecID::Filter.run_from_argv(ARGV)
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+
4
+ require 'spec_id/aa_freqs'
5
+
6
+ if ARGV.size < 1
7
+ puts "usage: #{File.basename(__FILE__)} <file>.fasta ..."
8
+ puts "prints the amino acid frequencies of every amino acid in each fasta file"
9
+ exit
10
+ end
11
+
12
+ ARGV.each do |file|
13
+ obj = SpecID::AAFreqs.new(file)
14
+ puts file
15
+ obj.aafreqs.sort_by{|v| v.to_s }.each do |k,v|
16
+ puts "#{k}: #{v}"
17
+ end
18
+ puts ""
19
+ end
20
+
21
+
22
+
23
+
data/bin/id_precision.rb CHANGED
@@ -35,8 +35,9 @@ file = ARGV[1]
35
35
 
36
36
  obj = SpecID.new(file)
37
37
  re_prefix = /^#{Regexp.escape(fp_prefix)}/o
38
- prc = proc {|it| it.prot.reference =~ re_prefix }
38
+ prc = proc {|it| it.prots.first.reference =~ re_prefix }
39
39
  #(match, nomatch) = obj.classify(:peps, prc)
40
+ obj.peps = obj.pep_prots
40
41
  (fp, tp) = obj.classify(:peps, prc)
41
42
 
42
43
 
@@ -126,7 +127,7 @@ end
126
127
  files = ARGV.to_a
127
128
 
128
129
  two_lists = files.collect do |file|
129
- obj = SpecID::Bioworks.new(file)
130
+ obj = Bioworks.new(file)
130
131
  list = []
131
132
  list.push( obj.pep_probs_by_pep_prots )
132
133
  list.push( obj.pep_probs_by_seq_charge )
data/bin/mzxml_to_lmat.rb CHANGED
@@ -23,6 +23,7 @@ opts = OptionParser.new do |op|
23
23
  op.on("--mz_end N", Float, "m/z end (def: end of 1st full scan)") {|n| opt[:end_mz] = n.to_f}
24
24
  op.on("--baseline N", Float, "value for missing indices (def: #{opt[:baseline]})") {|n| opt[:baseline] = n.to_f}
25
25
  op.on("--ascii", "generates an lmata file instead") {opt[:ascii] = true}
26
+ op.on("-v", "--verbose") {$VERBOSE = true}
26
27
  end
27
28
  opts.parse!
28
29
 
@@ -52,7 +53,7 @@ ARGV.each do |file|
52
53
  else
53
54
  lmat.write(outfile)
54
55
  end
55
- puts "OUTPUT: #{outfile}"
56
+ puts("OUTPUT: #{outfile}") if $VERBOSE
56
57
  end
57
58
 
58
59
 
@@ -12,5 +12,5 @@ files = ARGV.to_a
12
12
  cutoff = files.shift
13
13
  files.each do |file|
14
14
  outfile = file.gsub(/\.xml/, "_min#{cutoff}.xml")
15
- SpecID::Proph::Pep::Parser.new.filter_by_min_pep_prob(file, outfile, cutoff.to_f)
15
+ Proph::Pep::Parser.new.filter_by_min_pep_prob(file, outfile, cutoff.to_f)
16
16
  end
data/bin/precision.rb CHANGED
@@ -1,4 +1,4 @@
1
- #!/usr/bin/ruby -w
1
+ #!/usr/bin/ruby
2
2
 
3
3
  require 'spec_id'
4
4