mspire 0.1.7 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +41 -14
- data/bin/bioworks2excel.rb +1 -1
- data/bin/bioworks_to_pepxml.rb +46 -59
- data/bin/fasta_shaker.rb +1 -1
- data/bin/filter.rb +6 -0
- data/bin/find_aa_freq.rb +23 -0
- data/bin/id_precision.rb +3 -2
- data/bin/mzxml_to_lmat.rb +2 -1
- data/bin/pepproph_filter.rb +1 -1
- data/bin/precision.rb +1 -1
- data/bin/protein_summary.rb +2 -451
- data/bin/raw_to_mzXML.rb +55 -0
- data/bin/srf_group.rb +26 -0
- data/changelog.txt +7 -0
- data/lib/align.rb +3 -3
- data/lib/fasta.rb +6 -1
- data/lib/gi.rb +9 -4
- data/lib/roc.rb +2 -0
- data/lib/sample_enzyme.rb +2 -1
- data/lib/spec/mzxml/parser.rb +2 -43
- data/lib/spec/mzxml.rb +65 -2
- data/lib/spec_id/aa_freqs.rb +10 -7
- data/lib/spec_id/bioworks.rb +67 -87
- data/lib/spec_id/filter.rb +794 -0
- data/lib/spec_id/precision.rb +29 -36
- data/lib/spec_id/proph.rb +5 -3
- data/lib/spec_id/protein_summary.rb +459 -0
- data/lib/spec_id/sequest.rb +323 -271
- data/lib/spec_id/srf.rb +189 -135
- data/lib/spec_id.rb +276 -227
- data/lib/spec_id_xml.rb +101 -0
- data/lib/toppred.rb +18 -0
- data/script/degenerate_peptides.rb +47 -0
- data/script/filter-peps.rb +5 -1
- data/test/tc_align.rb +1 -1
- data/test/tc_bioworks.rb +25 -22
- data/test/tc_bioworks_to_pepxml.rb +37 -4
- data/test/tc_fasta.rb +3 -1
- data/test/tc_fasta_shaker.rb +8 -6
- data/test/tc_filter.rb +203 -0
- data/test/tc_gi.rb +6 -9
- data/test/tc_id_precision.rb +31 -0
- data/test/tc_mzxml.rb +8 -6
- data/test/tc_peptide_parent_times.rb +2 -1
- data/test/tc_precision.rb +1 -1
- data/test/tc_proph.rb +5 -5
- data/test/tc_protein_summary.rb +36 -13
- data/test/tc_sequest.rb +78 -33
- data/test/tc_spec_id.rb +128 -6
- data/test/tc_srf.rb +84 -38
- metadata +67 -62
- data/bin/fasta_cat.rb +0 -39
- data/bin/fasta_cat_mod.rb +0 -59
- data/bin/fasta_mod.rb +0 -57
- data/bin/filter_spec_id.rb +0 -365
- data/bin/raw2mzXML.rb +0 -21
- data/script/gen_database_searching.rb +0 -258
data/Rakefile
CHANGED
@@ -37,17 +37,11 @@ end
|
|
37
37
|
# DOC
|
38
38
|
###############################################
|
39
39
|
|
40
|
-
task :tutorial => [] do
|
41
|
-
sys "ruby ./script/gen_database_searching.rb"
|
42
|
-
end
|
43
|
-
tutorial_files = %w(cat_db_search two_db_search).map {|f| "doc/src/tutorial/database_searching/#{f}.page"}
|
44
|
-
tutorial_files << 'doc/src/tutorial/database_searching/index.page'
|
45
|
-
|
46
40
|
def move_and_add_webgen_header(file, newfile, src_dir, heading)
|
47
41
|
string = IO.read file
|
48
42
|
with_header = heading + string
|
49
|
-
|
50
|
-
|
43
|
+
File.open(newfile, 'w') {|v| v.print with_header }
|
44
|
+
FileUtils.mv newfile, src_dir
|
51
45
|
end
|
52
46
|
|
53
47
|
desc "copy top level files into doc/src"
|
@@ -64,13 +58,13 @@ end
|
|
64
58
|
|
65
59
|
desc "create and upload docs to server"
|
66
60
|
task :upload_docs => :html_docs do
|
67
|
-
|
61
|
+
sh "scp -i ~/.ssh/id_dsa_rubyforge -r doc/output/* jtprince@rubyforge.org:/var/www/gforge-projects/mspire/"
|
68
62
|
end
|
69
63
|
|
70
64
|
desc "creates docs in doc/html"
|
71
|
-
task :html_docs => [:cp_top_level_docs
|
72
|
-
|
73
|
-
|
65
|
+
task :html_docs => [:cp_top_level_docs] do
|
66
|
+
FileUtils.cd 'doc' do
|
67
|
+
sh "webgen"
|
74
68
|
end
|
75
69
|
end
|
76
70
|
|
@@ -89,11 +83,42 @@ end
|
|
89
83
|
|
90
84
|
desc "Run unit tests."
|
91
85
|
Rake::TestTask.new do |t|
|
86
|
+
reply = `#{gemcmd} list -l #{NAME}`
|
87
|
+
if reply.include? NAME + " ("
|
88
|
+
puts "GOING to uninstall gem '#{NAME}' for testing"
|
89
|
+
if WIN32
|
90
|
+
%x( #{gemcmd} uninstall -x #{NAME} )
|
91
|
+
else
|
92
|
+
%x( sudo #{gemcmd} uninstall -x #{NAME} )
|
93
|
+
end
|
94
|
+
end
|
92
95
|
# t.libs << "lib" ## done by default
|
93
96
|
t.test_files = FL["test/tc_*.rb"]
|
94
97
|
#t.verbose = true
|
95
98
|
end
|
96
99
|
|
100
|
+
|
101
|
+
|
102
|
+
desc "Run unit tests individual on each test"
|
103
|
+
task :test_ind do |t|
|
104
|
+
reply = `#{gemcmd} list -l #{NAME}`
|
105
|
+
if reply.include? NAME + " ("
|
106
|
+
%x( sudo #{gemcmd} uninstall -x #{NAME} )
|
107
|
+
end
|
108
|
+
|
109
|
+
# t.libs << "lib" ## done by default
|
110
|
+
test_files = FL["test/tc_*.rb"]
|
111
|
+
test_files.each do |file|
|
112
|
+
puts "TESTING: #{file.sub(/test\//,'')}"
|
113
|
+
puts `ruby -I lib #{file}`
|
114
|
+
end
|
115
|
+
#t.verbose = true
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
|
120
|
+
|
121
|
+
|
97
122
|
#desc "Run all tests"
|
98
123
|
#task :test_indiv do
|
99
124
|
# sys.cd "test" do
|
@@ -115,7 +140,7 @@ tm = Time.now
|
|
115
140
|
spec = Gem::Specification.new do |s|
|
116
141
|
s.platform = Gem::Platform::RUBY
|
117
142
|
s.name = NAME
|
118
|
-
s.version = "0.
|
143
|
+
s.version = "0.2.0"
|
119
144
|
s.summary = "Mass Spectrometry Proteomics Objects, Scripts, and Executables"
|
120
145
|
s.date = "#{tm.year}-#{tm.month}-#{tm.day}"
|
121
146
|
s.email = "jprince@icmb.utexas.edu"
|
@@ -131,7 +156,9 @@ spec = Gem::Specification.new do |s|
|
|
131
156
|
s.add_dependency('libjtp', '~> 0.1.2')
|
132
157
|
s.requirements << '"xmlparser" is the prefered xml parser right now. REXML and regular expressions are used as fallback in some routines.'
|
133
158
|
s.requirements << 'some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)'
|
134
|
-
s.requirements << 'the "t2x" binary to convert .RAW files to mzXML
|
159
|
+
s.requirements << 'the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications'
|
160
|
+
s.requirements << '"rake" is useful for development'
|
161
|
+
s.requirements << '"webgen (with gems redcloth and bluecloth) is necessary to build web pages'
|
135
162
|
s.test_files = FL["test/tc_*.rb"]
|
136
163
|
end
|
137
164
|
|
data/bin/bioworks2excel.rb
CHANGED
data/bin/bioworks_to_pepxml.rb
CHANGED
@@ -4,11 +4,10 @@
|
|
4
4
|
# GLOBAL CONSTANTS
|
5
5
|
|
6
6
|
DEFAULT_DATABASE_PATH = "/project/marcotte/marcotte/ms/database"
|
7
|
-
|
7
|
+
DEFAULT_MZ_PATH = "."
|
8
8
|
DEFAULT_OUTDIR = "pepxml"
|
9
9
|
DEFAULT_PARAMS_GLOB = "*.params"
|
10
10
|
DEFAULT_PARAMS_FILE = Dir[DEFAULT_PARAMS_GLOB].first
|
11
|
-
DEFAULT_PEPXML_VERSION = 18
|
12
11
|
DEFAULT_MS_MODEL = 'LCQ'
|
13
12
|
DEFAULT_MASS_ANALYZER = 'Ion Trap'
|
14
13
|
##############################################################
|
@@ -17,6 +16,7 @@ require 'spec_id'
|
|
17
16
|
require 'optparse'
|
18
17
|
require 'ostruct'
|
19
18
|
require 'fileutils'
|
19
|
+
require 'spec_id/srf'
|
20
20
|
|
21
21
|
# establish the default database path after examining env vars
|
22
22
|
def_dbpath = nil
|
@@ -30,13 +30,16 @@ end
|
|
30
30
|
opt = OpenStruct.new
|
31
31
|
|
32
32
|
opt_obj = OptionParser.new do |op|
|
33
|
-
|
34
|
-
|
35
|
-
op.
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
33
|
+
progname = File.basename(__FILE__)
|
34
|
+
op.banner = "\nusage: #{progname} [options] <file>.srf ..."
|
35
|
+
op.separator "usage: #{progname} [options] <bioworks>.srg"
|
36
|
+
op.separator "usage: #{progname} [options] <bioworks>.xml"
|
37
|
+
op.separator ""
|
38
|
+
op.separator "Takes srf files or the xml exported output of Bioworks multi-consensus view"
|
39
|
+
op.separator "(no filtering) and outputs pepXML files (to feed the trans-proteomic pipeline)."
|
40
|
+
op.separator "Additionally, will group .srf files into an .srg file (like 'srf_group.rb')"
|
41
|
+
op.separator ""
|
42
|
+
op.separator "Options:"
|
40
43
|
op.on('-h', '--help', "display this and more notes and exit") {|v| opt.help = v }
|
41
44
|
op.on('-o', '--outdir path', "output directory d: '#{DEFAULT_OUTDIR}'") {|v| opt.outdir = v }
|
42
45
|
|
@@ -45,19 +48,21 @@ Options:"
|
|
45
48
|
op.separator ""
|
46
49
|
op.on('-p', '--params file', "sequest params file d: '#{DEFAULT_PARAMS_FILE}'") {|v| opt.params = v }
|
47
50
|
op.on('-d', '--dbpath path', "path to databases d: '#{DEFAULT_DATABASE_PATH}'") {|v| opt.dbpath = v }
|
48
|
-
op.on('-m', '--mspath path', "path to MS files d: '#{
|
49
|
-
op.on('--
|
50
|
-
op.on('--
|
51
|
-
op.on('
|
51
|
+
op.on('-m', '--mspath path', "path to MS files d: '#{DEFAULT_MZ_PATH}'") {|v| opt.mspath = v }
|
52
|
+
op.on('--copy_mzxml', "copies mzXML files to outdir path"){|v| opt.copy_mzxml = v }
|
53
|
+
op.on('--model <LCQ|Orbi|string>', "MS model (xml) d: '#{DEFAULT_MS_MODEL}'") {|v| opt.model = v }
|
54
|
+
op.on('--mass_analyzer <string>', "Mass Analyzer (xml) d: '#{DEFAULT_MASS_ANALYZER}'") {|v| opt.mass_analyzer = v }
|
52
55
|
|
53
56
|
end
|
54
57
|
|
55
58
|
more_notes = "
|
56
59
|
Notes:
|
57
60
|
|
58
|
-
mspath: Directory to RAW or mzXML
|
59
|
-
This option is
|
61
|
+
mspath: Directory to RAW or mzXML files.
|
62
|
+
This option is needed to view Pep3D files
|
63
|
+
and is critical with Bioworks 3.2 xml export files
|
60
64
|
outdir: Path will be created if it does not already exist.
|
65
|
+
(xml) : only bioworks.xml files need to include this information
|
61
66
|
model : LCQ -> 'LCQ Deca XP Plus'
|
62
67
|
: Orbi -> 'LTQ Orbitrap'
|
63
68
|
: other string -> That's the string that will be used.
|
@@ -93,55 +98,37 @@ end
|
|
93
98
|
|
94
99
|
opt.outdir ||= DEFAULT_OUTDIR
|
95
100
|
|
96
|
-
## Create dbpath if does not exist
|
97
|
-
if opt.outdir
|
98
|
-
FileUtils.mkpath(opt.outdir) unless File.exist? opt.outdir
|
99
|
-
end
|
100
101
|
|
101
102
|
files = ARGV.to_a
|
102
|
-
|
103
|
+
bioworks_file = files[0]
|
103
104
|
if files[0] =~ /\.srf/i
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
:backup_db_path => opt.dbpath || def_dbpath,
|
108
|
-
:out_path => opt.outdir,
|
109
|
-
}
|
110
|
-
xml_obj = SpecID::Sequest::PepXML.new_from_srf(file, hash)
|
111
|
-
xml_obj.to_pepxml(xml_obj.base_name + ".xml")
|
112
|
-
end
|
113
|
-
else
|
114
|
-
## Ensure params file exists (unless opt given)
|
115
|
-
opt.params ||= DEFAULT_PARAMS_FILE
|
116
|
-
params_obj = SpecID::Sequest::Params.new(opt.params)
|
117
|
-
# Ensure the database exists!
|
118
|
-
unless File.exist?( params_obj.database )
|
119
|
-
if opt.dbpath
|
120
|
-
params_obj.database_path = opt.dbpath
|
121
|
-
else
|
122
|
-
params_obj.database_path = def_dbpath
|
123
|
-
end
|
105
|
+
srg_file = 'bioworks.srg'
|
106
|
+
if File.exist? srg_file
|
107
|
+
srg_file = 'bioworks.tmp.srg'
|
124
108
|
end
|
109
|
+
srg = SRFGroup.new(files)
|
110
|
+
srg.to_srg(srg_file)
|
111
|
+
unless File.exist? srg_file
|
112
|
+
abort "couldn't create #{srg_file} from: #{files.join(', ')}"
|
113
|
+
end
|
114
|
+
bioworks_file = srg_file
|
115
|
+
end
|
125
116
|
|
126
|
-
opt.mspath ||= DEFAULT_MZXML_PATH
|
127
|
-
opt.pepxml_version ||= DEFAULT_PEPXML_VERSION
|
128
|
-
opt.model ||= DEFAULT_MS_MODEL
|
129
|
-
opt.mass_analyzer ||= DEFAULT_MASS_ANALYZER
|
130
|
-
|
131
|
-
case opt.model
|
132
|
-
when "LCQ"
|
133
|
-
model = 'LCQ Deca XP Plus'
|
134
|
-
when "Orbi"
|
135
|
-
model = 'LTQ Orbitrap'
|
136
|
-
else
|
137
|
-
model = opt.model
|
138
|
-
end
|
139
117
|
|
118
|
+
case opt.model
|
119
|
+
when "LCQ"
|
120
|
+
model = 'LCQ Deca XP Plus'
|
121
|
+
when "Orbi"
|
122
|
+
model = 'LTQ Orbitrap'
|
123
|
+
else
|
124
|
+
model = opt.model
|
125
|
+
end
|
140
126
|
|
141
|
-
|
142
|
-
|
127
|
+
opt.dbpath ||= def_dbpath
|
128
|
+
opt.mspath ||= DEFAULT_MZ_PATH
|
129
|
+
opt.params ||= DEFAULT_PARAMS_FILE
|
130
|
+
opt.mass_analyzer ||= DEFAULT_MASS_ANALYZER
|
131
|
+
opt.model ||= DEFAULT_MS_MODEL
|
132
|
+
|
133
|
+
xml_objs = Sequest::PepXML.set_from_bioworks(bioworks_file, {:params => opt.params, :ms_data => opt.mspath, :out_path => opt.outdir, :model => model, :backup_db_path => opt.dbpath, :copy_mzxml => opt.copy_mzxml, :ms_mass_analyzer => opt.mass_analyzer, :print => true})
|
143
134
|
|
144
|
-
xml_objs.each do |obj|
|
145
|
-
obj.to_pepxml(obj.base_name + ".xml")
|
146
|
-
end
|
147
|
-
end
|
data/bin/fasta_shaker.rb
CHANGED
data/bin/filter.rb
ADDED
data/bin/find_aa_freq.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
|
4
|
+
require 'spec_id/aa_freqs'
|
5
|
+
|
6
|
+
if ARGV.size < 1
|
7
|
+
puts "usage: #{File.basename(__FILE__)} <file>.fasta ..."
|
8
|
+
puts "prints the amino acid frequencies of every amino acid in each fasta file"
|
9
|
+
exit
|
10
|
+
end
|
11
|
+
|
12
|
+
ARGV.each do |file|
|
13
|
+
obj = SpecID::AAFreqs.new(file)
|
14
|
+
puts file
|
15
|
+
obj.aafreqs.sort_by{|v| v.to_s }.each do |k,v|
|
16
|
+
puts "#{k}: #{v}"
|
17
|
+
end
|
18
|
+
puts ""
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
|
data/bin/id_precision.rb
CHANGED
@@ -35,8 +35,9 @@ file = ARGV[1]
|
|
35
35
|
|
36
36
|
obj = SpecID.new(file)
|
37
37
|
re_prefix = /^#{Regexp.escape(fp_prefix)}/o
|
38
|
-
prc = proc {|it| it.
|
38
|
+
prc = proc {|it| it.prots.first.reference =~ re_prefix }
|
39
39
|
#(match, nomatch) = obj.classify(:peps, prc)
|
40
|
+
obj.peps = obj.pep_prots
|
40
41
|
(fp, tp) = obj.classify(:peps, prc)
|
41
42
|
|
42
43
|
|
@@ -126,7 +127,7 @@ end
|
|
126
127
|
files = ARGV.to_a
|
127
128
|
|
128
129
|
two_lists = files.collect do |file|
|
129
|
-
obj =
|
130
|
+
obj = Bioworks.new(file)
|
130
131
|
list = []
|
131
132
|
list.push( obj.pep_probs_by_pep_prots )
|
132
133
|
list.push( obj.pep_probs_by_seq_charge )
|
data/bin/mzxml_to_lmat.rb
CHANGED
@@ -23,6 +23,7 @@ opts = OptionParser.new do |op|
|
|
23
23
|
op.on("--mz_end N", Float, "m/z end (def: end of 1st full scan)") {|n| opt[:end_mz] = n.to_f}
|
24
24
|
op.on("--baseline N", Float, "value for missing indices (def: #{opt[:baseline]})") {|n| opt[:baseline] = n.to_f}
|
25
25
|
op.on("--ascii", "generates an lmata file instead") {opt[:ascii] = true}
|
26
|
+
op.on("-v", "--verbose") {$VERBOSE = true}
|
26
27
|
end
|
27
28
|
opts.parse!
|
28
29
|
|
@@ -52,7 +53,7 @@ ARGV.each do |file|
|
|
52
53
|
else
|
53
54
|
lmat.write(outfile)
|
54
55
|
end
|
55
|
-
puts
|
56
|
+
puts("OUTPUT: #{outfile}") if $VERBOSE
|
56
57
|
end
|
57
58
|
|
58
59
|
|
data/bin/pepproph_filter.rb
CHANGED
@@ -12,5 +12,5 @@ files = ARGV.to_a
|
|
12
12
|
cutoff = files.shift
|
13
13
|
files.each do |file|
|
14
14
|
outfile = file.gsub(/\.xml/, "_min#{cutoff}.xml")
|
15
|
-
|
15
|
+
Proph::Pep::Parser.new.filter_by_min_pep_prob(file, outfile, cutoff.to_f)
|
16
16
|
end
|
data/bin/precision.rb
CHANGED