mspire 0.1.7 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. data/Rakefile +41 -14
  2. data/bin/bioworks2excel.rb +1 -1
  3. data/bin/bioworks_to_pepxml.rb +46 -59
  4. data/bin/fasta_shaker.rb +1 -1
  5. data/bin/filter.rb +6 -0
  6. data/bin/find_aa_freq.rb +23 -0
  7. data/bin/id_precision.rb +3 -2
  8. data/bin/mzxml_to_lmat.rb +2 -1
  9. data/bin/pepproph_filter.rb +1 -1
  10. data/bin/precision.rb +1 -1
  11. data/bin/protein_summary.rb +2 -451
  12. data/bin/raw_to_mzXML.rb +55 -0
  13. data/bin/srf_group.rb +26 -0
  14. data/changelog.txt +7 -0
  15. data/lib/align.rb +3 -3
  16. data/lib/fasta.rb +6 -1
  17. data/lib/gi.rb +9 -4
  18. data/lib/roc.rb +2 -0
  19. data/lib/sample_enzyme.rb +2 -1
  20. data/lib/spec/mzxml/parser.rb +2 -43
  21. data/lib/spec/mzxml.rb +65 -2
  22. data/lib/spec_id/aa_freqs.rb +10 -7
  23. data/lib/spec_id/bioworks.rb +67 -87
  24. data/lib/spec_id/filter.rb +794 -0
  25. data/lib/spec_id/precision.rb +29 -36
  26. data/lib/spec_id/proph.rb +5 -3
  27. data/lib/spec_id/protein_summary.rb +459 -0
  28. data/lib/spec_id/sequest.rb +323 -271
  29. data/lib/spec_id/srf.rb +189 -135
  30. data/lib/spec_id.rb +276 -227
  31. data/lib/spec_id_xml.rb +101 -0
  32. data/lib/toppred.rb +18 -0
  33. data/script/degenerate_peptides.rb +47 -0
  34. data/script/filter-peps.rb +5 -1
  35. data/test/tc_align.rb +1 -1
  36. data/test/tc_bioworks.rb +25 -22
  37. data/test/tc_bioworks_to_pepxml.rb +37 -4
  38. data/test/tc_fasta.rb +3 -1
  39. data/test/tc_fasta_shaker.rb +8 -6
  40. data/test/tc_filter.rb +203 -0
  41. data/test/tc_gi.rb +6 -9
  42. data/test/tc_id_precision.rb +31 -0
  43. data/test/tc_mzxml.rb +8 -6
  44. data/test/tc_peptide_parent_times.rb +2 -1
  45. data/test/tc_precision.rb +1 -1
  46. data/test/tc_proph.rb +5 -5
  47. data/test/tc_protein_summary.rb +36 -13
  48. data/test/tc_sequest.rb +78 -33
  49. data/test/tc_spec_id.rb +128 -6
  50. data/test/tc_srf.rb +84 -38
  51. metadata +67 -62
  52. data/bin/fasta_cat.rb +0 -39
  53. data/bin/fasta_cat_mod.rb +0 -59
  54. data/bin/fasta_mod.rb +0 -57
  55. data/bin/filter_spec_id.rb +0 -365
  56. data/bin/raw2mzXML.rb +0 -21
  57. data/script/gen_database_searching.rb +0 -258
data/Rakefile CHANGED
@@ -37,17 +37,11 @@ end
37
37
  # DOC
38
38
  ###############################################
39
39
 
40
- task :tutorial => [] do
41
- sys "ruby ./script/gen_database_searching.rb"
42
- end
43
- tutorial_files = %w(cat_db_search two_db_search).map {|f| "doc/src/tutorial/database_searching/#{f}.page"}
44
- tutorial_files << 'doc/src/tutorial/database_searching/index.page'
45
-
46
40
  def move_and_add_webgen_header(file, newfile, src_dir, heading)
47
41
  string = IO.read file
48
42
  with_header = heading + string
49
- sys.write_to_file(newfile, with_header)
50
- sys.mv newfile, src_dir
43
+ File.open(newfile, 'w') {|v| v.print with_header }
44
+ FileUtils.mv newfile, src_dir
51
45
  end
52
46
 
53
47
  desc "copy top level files into doc/src"
@@ -64,13 +58,13 @@ end
64
58
 
65
59
  desc "create and upload docs to server"
66
60
  task :upload_docs => :html_docs do
67
- sys "scp -i ~/.ssh/id_dsa_rubyforge -r doc/output/* jtprince@rubyforge.org:/var/www/gforge-projects/mspire/"
61
+ sh "scp -i ~/.ssh/id_dsa_rubyforge -r doc/output/* jtprince@rubyforge.org:/var/www/gforge-projects/mspire/"
68
62
  end
69
63
 
70
64
  desc "creates docs in doc/html"
71
- task :html_docs => [:cp_top_level_docs, :tutorial] do
72
- sys.cd 'doc' do
73
- sys "webgen"
65
+ task :html_docs => [:cp_top_level_docs] do
66
+ FileUtils.cd 'doc' do
67
+ sh "webgen"
74
68
  end
75
69
  end
76
70
 
@@ -89,11 +83,42 @@ end
89
83
 
90
84
  desc "Run unit tests."
91
85
  Rake::TestTask.new do |t|
86
+ reply = `#{gemcmd} list -l #{NAME}`
87
+ if reply.include? NAME + " ("
88
+ puts "GOING to uninstall gem '#{NAME}' for testing"
89
+ if WIN32
90
+ %x( #{gemcmd} uninstall -x #{NAME} )
91
+ else
92
+ %x( sudo #{gemcmd} uninstall -x #{NAME} )
93
+ end
94
+ end
92
95
  # t.libs << "lib" ## done by default
93
96
  t.test_files = FL["test/tc_*.rb"]
94
97
  #t.verbose = true
95
98
  end
96
99
 
100
+
101
+
102
+ desc "Run unit tests individual on each test"
103
+ task :test_ind do |t|
104
+ reply = `#{gemcmd} list -l #{NAME}`
105
+ if reply.include? NAME + " ("
106
+ %x( sudo #{gemcmd} uninstall -x #{NAME} )
107
+ end
108
+
109
+ # t.libs << "lib" ## done by default
110
+ test_files = FL["test/tc_*.rb"]
111
+ test_files.each do |file|
112
+ puts "TESTING: #{file.sub(/test\//,'')}"
113
+ puts `ruby -I lib #{file}`
114
+ end
115
+ #t.verbose = true
116
+ end
117
+
118
+
119
+
120
+
121
+
97
122
  #desc "Run all tests"
98
123
  #task :test_indiv do
99
124
  # sys.cd "test" do
@@ -115,7 +140,7 @@ tm = Time.now
115
140
  spec = Gem::Specification.new do |s|
116
141
  s.platform = Gem::Platform::RUBY
117
142
  s.name = NAME
118
- s.version = "0.1.7"
143
+ s.version = "0.2.0"
119
144
  s.summary = "Mass Spectrometry Proteomics Objects, Scripts, and Executables"
120
145
  s.date = "#{tm.year}-#{tm.month}-#{tm.day}"
121
146
  s.email = "jprince@icmb.utexas.edu"
@@ -131,7 +156,9 @@ spec = Gem::Specification.new do |s|
131
156
  s.add_dependency('libjtp', '~> 0.1.2')
132
157
  s.requirements << '"xmlparser" is the prefered xml parser right now. REXML and regular expressions are used as fallback in some routines.'
133
158
  s.requirements << 'some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)'
134
- s.requirements << 'the "t2x" binary to convert .RAW files to mzXML is expected in some applications'
159
+ s.requirements << 'the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications'
160
+ s.requirements << '"rake" is useful for development'
161
+ s.requirements << '"webgen (with gems redcloth and bluecloth) is necessary to build web pages'
135
162
  s.test_files = FL["test/tc_*.rb"]
136
163
  end
137
164
 
@@ -9,6 +9,6 @@ end
9
9
 
10
10
  ARGV.each do |file|
11
11
  newfile = file.gsub(".xml", ".txt")
12
- obj = SpecID::Bioworks.new(file)
12
+ obj = Bioworks.new(file)
13
13
  obj.to_excel(newfile)
14
14
  end
@@ -4,11 +4,10 @@
4
4
  # GLOBAL CONSTANTS
5
5
 
6
6
  DEFAULT_DATABASE_PATH = "/project/marcotte/marcotte/ms/database"
7
- DEFAULT_MZXML_PATH = "."
7
+ DEFAULT_MZ_PATH = "."
8
8
  DEFAULT_OUTDIR = "pepxml"
9
9
  DEFAULT_PARAMS_GLOB = "*.params"
10
10
  DEFAULT_PARAMS_FILE = Dir[DEFAULT_PARAMS_GLOB].first
11
- DEFAULT_PEPXML_VERSION = 18
12
11
  DEFAULT_MS_MODEL = 'LCQ'
13
12
  DEFAULT_MASS_ANALYZER = 'Ion Trap'
14
13
  ##############################################################
@@ -17,6 +16,7 @@ require 'spec_id'
17
16
  require 'optparse'
18
17
  require 'ostruct'
19
18
  require 'fileutils'
19
+ require 'spec_id/srf'
20
20
 
21
21
  # establish the default database path after examining env vars
22
22
  def_dbpath = nil
@@ -30,13 +30,16 @@ end
30
30
  opt = OpenStruct.new
31
31
 
32
32
  opt_obj = OptionParser.new do |op|
33
- op.banner = "\nusage: #{File.basename(__FILE__)} [options] <file>.srf ...
34
- usage: #{File.basename(__FILE__)} [options] bioworks.xml"
35
- op.on_head "
36
- Takes .srf files or the xml exported output of Bioworks multi-consensus view
37
- (no filtering) and outputs pepXML files (to feed the trans-proteomic pipeline).
38
-
39
- Options:"
33
+ progname = File.basename(__FILE__)
34
+ op.banner = "\nusage: #{progname} [options] <file>.srf ..."
35
+ op.separator "usage: #{progname} [options] <bioworks>.srg"
36
+ op.separator "usage: #{progname} [options] <bioworks>.xml"
37
+ op.separator ""
38
+ op.separator "Takes srf files or the xml exported output of Bioworks multi-consensus view"
39
+ op.separator "(no filtering) and outputs pepXML files (to feed the trans-proteomic pipeline)."
40
+ op.separator "Additionally, will group .srf files into an .srg file (like 'srf_group.rb')"
41
+ op.separator ""
42
+ op.separator "Options:"
40
43
  op.on('-h', '--help', "display this and more notes and exit") {|v| opt.help = v }
41
44
  op.on('-o', '--outdir path', "output directory d: '#{DEFAULT_OUTDIR}'") {|v| opt.outdir = v }
42
45
 
@@ -45,19 +48,21 @@ Options:"
45
48
  op.separator ""
46
49
  op.on('-p', '--params file', "sequest params file d: '#{DEFAULT_PARAMS_FILE}'") {|v| opt.params = v }
47
50
  op.on('-d', '--dbpath path', "path to databases d: '#{DEFAULT_DATABASE_PATH}'") {|v| opt.dbpath = v }
48
- op.on('-m', '--mspath path', "path to MS files d: '#{DEFAULT_MZXML_PATH}'") {|v| opt.mspath = v }
49
- op.on('--model <LCQ|Orbi|string>', "MS model d: '#{DEFAULT_MS_MODEL}'") {|v| opt.model = v }
50
- op.on('--mass_analyzer <string>', "Mass Analyzer d: '#{DEFAULT_MASS_ANALYZER}'") {|v| opt.mass_analyzer = v }
51
- op.on('-v', '--version pepxml_version', "pepxml version d: '#{DEFAULT_PEPXML_VERSION}'") {|v| opt.pepxml_version = v.to_i }
51
+ op.on('-m', '--mspath path', "path to MS files d: '#{DEFAULT_MZ_PATH}'") {|v| opt.mspath = v }
52
+ op.on('--copy_mzxml', "copies mzXML files to outdir path"){|v| opt.copy_mzxml = v }
53
+ op.on('--model <LCQ|Orbi|string>', "MS model (xml) d: '#{DEFAULT_MS_MODEL}'") {|v| opt.model = v }
54
+ op.on('--mass_analyzer <string>', "Mass Analyzer (xml) d: '#{DEFAULT_MASS_ANALYZER}'") {|v| opt.mass_analyzer = v }
52
55
 
53
56
  end
54
57
 
55
58
  more_notes = "
56
59
  Notes:
57
60
 
58
- mspath: Directory to RAW or mzXML (version 1) files.
59
- This option is not used with Bioworks 3.3 files.
61
+ mspath: Directory to RAW or mzXML files.
62
+ This option is needed to view Pep3D files
63
+ and is critical with Bioworks 3.2 xml export files
60
64
  outdir: Path will be created if it does not already exist.
65
+ (xml) : only bioworks.xml files need to include this information
61
66
  model : LCQ -> 'LCQ Deca XP Plus'
62
67
  : Orbi -> 'LTQ Orbitrap'
63
68
  : other string -> That's the string that will be used.
@@ -93,55 +98,37 @@ end
93
98
 
94
99
  opt.outdir ||= DEFAULT_OUTDIR
95
100
 
96
- ## Create dbpath if does not exist
97
- if opt.outdir
98
- FileUtils.mkpath(opt.outdir) unless File.exist? opt.outdir
99
- end
100
101
 
101
102
  files = ARGV.to_a
102
-
103
+ bioworks_file = files[0]
103
104
  if files[0] =~ /\.srf/i
104
- opt.dbpath ||= def_dbpath
105
- files.each do |file|
106
- hash = {
107
- :backup_db_path => opt.dbpath || def_dbpath,
108
- :out_path => opt.outdir,
109
- }
110
- xml_obj = SpecID::Sequest::PepXML.new_from_srf(file, hash)
111
- xml_obj.to_pepxml(xml_obj.base_name + ".xml")
112
- end
113
- else
114
- ## Ensure params file exists (unless opt given)
115
- opt.params ||= DEFAULT_PARAMS_FILE
116
- params_obj = SpecID::Sequest::Params.new(opt.params)
117
- # Ensure the database exists!
118
- unless File.exist?( params_obj.database )
119
- if opt.dbpath
120
- params_obj.database_path = opt.dbpath
121
- else
122
- params_obj.database_path = def_dbpath
123
- end
105
+ srg_file = 'bioworks.srg'
106
+ if File.exist? srg_file
107
+ srg_file = 'bioworks.tmp.srg'
124
108
  end
109
+ srg = SRFGroup.new(files)
110
+ srg.to_srg(srg_file)
111
+ unless File.exist? srg_file
112
+ abort "couldn't create #{srg_file} from: #{files.join(', ')}"
113
+ end
114
+ bioworks_file = srg_file
115
+ end
125
116
 
126
- opt.mspath ||= DEFAULT_MZXML_PATH
127
- opt.pepxml_version ||= DEFAULT_PEPXML_VERSION
128
- opt.model ||= DEFAULT_MS_MODEL
129
- opt.mass_analyzer ||= DEFAULT_MASS_ANALYZER
130
-
131
- case opt.model
132
- when "LCQ"
133
- model = 'LCQ Deca XP Plus'
134
- when "Orbi"
135
- model = 'LTQ Orbitrap'
136
- else
137
- model = opt.model
138
- end
139
117
 
118
+ case opt.model
119
+ when "LCQ"
120
+ model = 'LCQ Deca XP Plus'
121
+ when "Orbi"
122
+ model = 'LTQ Orbitrap'
123
+ else
124
+ model = opt.model
125
+ end
140
126
 
141
- bioworks = files[0]
142
- xml_objs = SpecID::Sequest::PepXML.set_from_bioworks(params_obj, bioworks, opt.mspath, opt.outdir, opt.pepxml_version, 'trypsin', 'ThermoFinnigan', model)
127
+ opt.dbpath ||= def_dbpath
128
+ opt.mspath ||= DEFAULT_MZ_PATH
129
+ opt.params ||= DEFAULT_PARAMS_FILE
130
+ opt.mass_analyzer ||= DEFAULT_MASS_ANALYZER
131
+ opt.model ||= DEFAULT_MS_MODEL
132
+
133
+ xml_objs = Sequest::PepXML.set_from_bioworks(bioworks_file, {:params => opt.params, :ms_data => opt.mspath, :out_path => opt.outdir, :model => model, :backup_db_path => opt.dbpath, :copy_mzxml => opt.copy_mzxml, :ms_mass_analyzer => opt.mass_analyzer, :print => true})
143
134
 
144
- xml_objs.each do |obj|
145
- obj.to_pepxml(obj.base_name + ".xml")
146
- end
147
- end
data/bin/fasta_shaker.rb CHANGED
@@ -14,7 +14,7 @@
14
14
  # came out so nice and clean that I feel like I have room to spare.
15
15
 
16
16
  require 'fasta'
17
- require 'cmdparse'
17
+ require 'optparse'
18
18
 
19
19
  opt = {}
20
20
 
data/bin/filter.rb ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+
4
+ require 'spec_id/filter'
5
+
6
+ SpecID::Filter.run_from_argv(ARGV)
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+
4
+ require 'spec_id/aa_freqs'
5
+
6
+ if ARGV.size < 1
7
+ puts "usage: #{File.basename(__FILE__)} <file>.fasta ..."
8
+ puts "prints the amino acid frequencies of every amino acid in each fasta file"
9
+ exit
10
+ end
11
+
12
+ ARGV.each do |file|
13
+ obj = SpecID::AAFreqs.new(file)
14
+ puts file
15
+ obj.aafreqs.sort_by{|v| v.to_s }.each do |k,v|
16
+ puts "#{k}: #{v}"
17
+ end
18
+ puts ""
19
+ end
20
+
21
+
22
+
23
+
data/bin/id_precision.rb CHANGED
@@ -35,8 +35,9 @@ file = ARGV[1]
35
35
 
36
36
  obj = SpecID.new(file)
37
37
  re_prefix = /^#{Regexp.escape(fp_prefix)}/o
38
- prc = proc {|it| it.prot.reference =~ re_prefix }
38
+ prc = proc {|it| it.prots.first.reference =~ re_prefix }
39
39
  #(match, nomatch) = obj.classify(:peps, prc)
40
+ obj.peps = obj.pep_prots
40
41
  (fp, tp) = obj.classify(:peps, prc)
41
42
 
42
43
 
@@ -126,7 +127,7 @@ end
126
127
  files = ARGV.to_a
127
128
 
128
129
  two_lists = files.collect do |file|
129
- obj = SpecID::Bioworks.new(file)
130
+ obj = Bioworks.new(file)
130
131
  list = []
131
132
  list.push( obj.pep_probs_by_pep_prots )
132
133
  list.push( obj.pep_probs_by_seq_charge )
data/bin/mzxml_to_lmat.rb CHANGED
@@ -23,6 +23,7 @@ opts = OptionParser.new do |op|
23
23
  op.on("--mz_end N", Float, "m/z end (def: end of 1st full scan)") {|n| opt[:end_mz] = n.to_f}
24
24
  op.on("--baseline N", Float, "value for missing indices (def: #{opt[:baseline]})") {|n| opt[:baseline] = n.to_f}
25
25
  op.on("--ascii", "generates an lmata file instead") {opt[:ascii] = true}
26
+ op.on("-v", "--verbose") {$VERBOSE = true}
26
27
  end
27
28
  opts.parse!
28
29
 
@@ -52,7 +53,7 @@ ARGV.each do |file|
52
53
  else
53
54
  lmat.write(outfile)
54
55
  end
55
- puts "OUTPUT: #{outfile}"
56
+ puts("OUTPUT: #{outfile}") if $VERBOSE
56
57
  end
57
58
 
58
59
 
@@ -12,5 +12,5 @@ files = ARGV.to_a
12
12
  cutoff = files.shift
13
13
  files.each do |file|
14
14
  outfile = file.gsub(/\.xml/, "_min#{cutoff}.xml")
15
- SpecID::Proph::Pep::Parser.new.filter_by_min_pep_prob(file, outfile, cutoff.to_f)
15
+ Proph::Pep::Parser.new.filter_by_min_pep_prob(file, outfile, cutoff.to_f)
16
16
  end
data/bin/precision.rb CHANGED
@@ -1,4 +1,4 @@
1
- #!/usr/bin/ruby -w
1
+ #!/usr/bin/ruby
2
2
 
3
3
  require 'spec_id'
4
4