mspire 0.1.7 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +41 -14
- data/bin/bioworks2excel.rb +1 -1
- data/bin/bioworks_to_pepxml.rb +46 -59
- data/bin/fasta_shaker.rb +1 -1
- data/bin/filter.rb +6 -0
- data/bin/find_aa_freq.rb +23 -0
- data/bin/id_precision.rb +3 -2
- data/bin/mzxml_to_lmat.rb +2 -1
- data/bin/pepproph_filter.rb +1 -1
- data/bin/precision.rb +1 -1
- data/bin/protein_summary.rb +2 -451
- data/bin/raw_to_mzXML.rb +55 -0
- data/bin/srf_group.rb +26 -0
- data/changelog.txt +7 -0
- data/lib/align.rb +3 -3
- data/lib/fasta.rb +6 -1
- data/lib/gi.rb +9 -4
- data/lib/roc.rb +2 -0
- data/lib/sample_enzyme.rb +2 -1
- data/lib/spec/mzxml/parser.rb +2 -43
- data/lib/spec/mzxml.rb +65 -2
- data/lib/spec_id/aa_freqs.rb +10 -7
- data/lib/spec_id/bioworks.rb +67 -87
- data/lib/spec_id/filter.rb +794 -0
- data/lib/spec_id/precision.rb +29 -36
- data/lib/spec_id/proph.rb +5 -3
- data/lib/spec_id/protein_summary.rb +459 -0
- data/lib/spec_id/sequest.rb +323 -271
- data/lib/spec_id/srf.rb +189 -135
- data/lib/spec_id.rb +276 -227
- data/lib/spec_id_xml.rb +101 -0
- data/lib/toppred.rb +18 -0
- data/script/degenerate_peptides.rb +47 -0
- data/script/filter-peps.rb +5 -1
- data/test/tc_align.rb +1 -1
- data/test/tc_bioworks.rb +25 -22
- data/test/tc_bioworks_to_pepxml.rb +37 -4
- data/test/tc_fasta.rb +3 -1
- data/test/tc_fasta_shaker.rb +8 -6
- data/test/tc_filter.rb +203 -0
- data/test/tc_gi.rb +6 -9
- data/test/tc_id_precision.rb +31 -0
- data/test/tc_mzxml.rb +8 -6
- data/test/tc_peptide_parent_times.rb +2 -1
- data/test/tc_precision.rb +1 -1
- data/test/tc_proph.rb +5 -5
- data/test/tc_protein_summary.rb +36 -13
- data/test/tc_sequest.rb +78 -33
- data/test/tc_spec_id.rb +128 -6
- data/test/tc_srf.rb +84 -38
- metadata +67 -62
- data/bin/fasta_cat.rb +0 -39
- data/bin/fasta_cat_mod.rb +0 -59
- data/bin/fasta_mod.rb +0 -57
- data/bin/filter_spec_id.rb +0 -365
- data/bin/raw2mzXML.rb +0 -21
- data/script/gen_database_searching.rb +0 -258
data/Rakefile
CHANGED
@@ -37,17 +37,11 @@ end
|
|
37
37
|
# DOC
|
38
38
|
###############################################
|
39
39
|
|
40
|
-
task :tutorial => [] do
|
41
|
-
sys "ruby ./script/gen_database_searching.rb"
|
42
|
-
end
|
43
|
-
tutorial_files = %w(cat_db_search two_db_search).map {|f| "doc/src/tutorial/database_searching/#{f}.page"}
|
44
|
-
tutorial_files << 'doc/src/tutorial/database_searching/index.page'
|
45
|
-
|
46
40
|
def move_and_add_webgen_header(file, newfile, src_dir, heading)
|
47
41
|
string = IO.read file
|
48
42
|
with_header = heading + string
|
49
|
-
|
50
|
-
|
43
|
+
File.open(newfile, 'w') {|v| v.print with_header }
|
44
|
+
FileUtils.mv newfile, src_dir
|
51
45
|
end
|
52
46
|
|
53
47
|
desc "copy top level files into doc/src"
|
@@ -64,13 +58,13 @@ end
|
|
64
58
|
|
65
59
|
desc "create and upload docs to server"
|
66
60
|
task :upload_docs => :html_docs do
|
67
|
-
|
61
|
+
sh "scp -i ~/.ssh/id_dsa_rubyforge -r doc/output/* jtprince@rubyforge.org:/var/www/gforge-projects/mspire/"
|
68
62
|
end
|
69
63
|
|
70
64
|
desc "creates docs in doc/html"
|
71
|
-
task :html_docs => [:cp_top_level_docs
|
72
|
-
|
73
|
-
|
65
|
+
task :html_docs => [:cp_top_level_docs] do
|
66
|
+
FileUtils.cd 'doc' do
|
67
|
+
sh "webgen"
|
74
68
|
end
|
75
69
|
end
|
76
70
|
|
@@ -89,11 +83,42 @@ end
|
|
89
83
|
|
90
84
|
desc "Run unit tests."
|
91
85
|
Rake::TestTask.new do |t|
|
86
|
+
reply = `#{gemcmd} list -l #{NAME}`
|
87
|
+
if reply.include? NAME + " ("
|
88
|
+
puts "GOING to uninstall gem '#{NAME}' for testing"
|
89
|
+
if WIN32
|
90
|
+
%x( #{gemcmd} uninstall -x #{NAME} )
|
91
|
+
else
|
92
|
+
%x( sudo #{gemcmd} uninstall -x #{NAME} )
|
93
|
+
end
|
94
|
+
end
|
92
95
|
# t.libs << "lib" ## done by default
|
93
96
|
t.test_files = FL["test/tc_*.rb"]
|
94
97
|
#t.verbose = true
|
95
98
|
end
|
96
99
|
|
100
|
+
|
101
|
+
|
102
|
+
desc "Run unit tests individual on each test"
|
103
|
+
task :test_ind do |t|
|
104
|
+
reply = `#{gemcmd} list -l #{NAME}`
|
105
|
+
if reply.include? NAME + " ("
|
106
|
+
%x( sudo #{gemcmd} uninstall -x #{NAME} )
|
107
|
+
end
|
108
|
+
|
109
|
+
# t.libs << "lib" ## done by default
|
110
|
+
test_files = FL["test/tc_*.rb"]
|
111
|
+
test_files.each do |file|
|
112
|
+
puts "TESTING: #{file.sub(/test\//,'')}"
|
113
|
+
puts `ruby -I lib #{file}`
|
114
|
+
end
|
115
|
+
#t.verbose = true
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
|
120
|
+
|
121
|
+
|
97
122
|
#desc "Run all tests"
|
98
123
|
#task :test_indiv do
|
99
124
|
# sys.cd "test" do
|
@@ -115,7 +140,7 @@ tm = Time.now
|
|
115
140
|
spec = Gem::Specification.new do |s|
|
116
141
|
s.platform = Gem::Platform::RUBY
|
117
142
|
s.name = NAME
|
118
|
-
s.version = "0.
|
143
|
+
s.version = "0.2.0"
|
119
144
|
s.summary = "Mass Spectrometry Proteomics Objects, Scripts, and Executables"
|
120
145
|
s.date = "#{tm.year}-#{tm.month}-#{tm.day}"
|
121
146
|
s.email = "jprince@icmb.utexas.edu"
|
@@ -131,7 +156,9 @@ spec = Gem::Specification.new do |s|
|
|
131
156
|
s.add_dependency('libjtp', '~> 0.1.2')
|
132
157
|
s.requirements << '"xmlparser" is the prefered xml parser right now. REXML and regular expressions are used as fallback in some routines.'
|
133
158
|
s.requirements << 'some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)'
|
134
|
-
s.requirements << 'the "t2x" binary to convert .RAW files to mzXML
|
159
|
+
s.requirements << 'the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications'
|
160
|
+
s.requirements << '"rake" is useful for development'
|
161
|
+
s.requirements << '"webgen (with gems redcloth and bluecloth) is necessary to build web pages'
|
135
162
|
s.test_files = FL["test/tc_*.rb"]
|
136
163
|
end
|
137
164
|
|
data/bin/bioworks2excel.rb
CHANGED
data/bin/bioworks_to_pepxml.rb
CHANGED
@@ -4,11 +4,10 @@
|
|
4
4
|
# GLOBAL CONSTANTS
|
5
5
|
|
6
6
|
DEFAULT_DATABASE_PATH = "/project/marcotte/marcotte/ms/database"
|
7
|
-
|
7
|
+
DEFAULT_MZ_PATH = "."
|
8
8
|
DEFAULT_OUTDIR = "pepxml"
|
9
9
|
DEFAULT_PARAMS_GLOB = "*.params"
|
10
10
|
DEFAULT_PARAMS_FILE = Dir[DEFAULT_PARAMS_GLOB].first
|
11
|
-
DEFAULT_PEPXML_VERSION = 18
|
12
11
|
DEFAULT_MS_MODEL = 'LCQ'
|
13
12
|
DEFAULT_MASS_ANALYZER = 'Ion Trap'
|
14
13
|
##############################################################
|
@@ -17,6 +16,7 @@ require 'spec_id'
|
|
17
16
|
require 'optparse'
|
18
17
|
require 'ostruct'
|
19
18
|
require 'fileutils'
|
19
|
+
require 'spec_id/srf'
|
20
20
|
|
21
21
|
# establish the default database path after examining env vars
|
22
22
|
def_dbpath = nil
|
@@ -30,13 +30,16 @@ end
|
|
30
30
|
opt = OpenStruct.new
|
31
31
|
|
32
32
|
opt_obj = OptionParser.new do |op|
|
33
|
-
|
34
|
-
|
35
|
-
op.
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
33
|
+
progname = File.basename(__FILE__)
|
34
|
+
op.banner = "\nusage: #{progname} [options] <file>.srf ..."
|
35
|
+
op.separator "usage: #{progname} [options] <bioworks>.srg"
|
36
|
+
op.separator "usage: #{progname} [options] <bioworks>.xml"
|
37
|
+
op.separator ""
|
38
|
+
op.separator "Takes srf files or the xml exported output of Bioworks multi-consensus view"
|
39
|
+
op.separator "(no filtering) and outputs pepXML files (to feed the trans-proteomic pipeline)."
|
40
|
+
op.separator "Additionally, will group .srf files into an .srg file (like 'srf_group.rb')"
|
41
|
+
op.separator ""
|
42
|
+
op.separator "Options:"
|
40
43
|
op.on('-h', '--help', "display this and more notes and exit") {|v| opt.help = v }
|
41
44
|
op.on('-o', '--outdir path', "output directory d: '#{DEFAULT_OUTDIR}'") {|v| opt.outdir = v }
|
42
45
|
|
@@ -45,19 +48,21 @@ Options:"
|
|
45
48
|
op.separator ""
|
46
49
|
op.on('-p', '--params file', "sequest params file d: '#{DEFAULT_PARAMS_FILE}'") {|v| opt.params = v }
|
47
50
|
op.on('-d', '--dbpath path', "path to databases d: '#{DEFAULT_DATABASE_PATH}'") {|v| opt.dbpath = v }
|
48
|
-
op.on('-m', '--mspath path', "path to MS files d: '#{
|
49
|
-
op.on('--
|
50
|
-
op.on('--
|
51
|
-
op.on('
|
51
|
+
op.on('-m', '--mspath path', "path to MS files d: '#{DEFAULT_MZ_PATH}'") {|v| opt.mspath = v }
|
52
|
+
op.on('--copy_mzxml', "copies mzXML files to outdir path"){|v| opt.copy_mzxml = v }
|
53
|
+
op.on('--model <LCQ|Orbi|string>', "MS model (xml) d: '#{DEFAULT_MS_MODEL}'") {|v| opt.model = v }
|
54
|
+
op.on('--mass_analyzer <string>', "Mass Analyzer (xml) d: '#{DEFAULT_MASS_ANALYZER}'") {|v| opt.mass_analyzer = v }
|
52
55
|
|
53
56
|
end
|
54
57
|
|
55
58
|
more_notes = "
|
56
59
|
Notes:
|
57
60
|
|
58
|
-
mspath: Directory to RAW or mzXML
|
59
|
-
This option is
|
61
|
+
mspath: Directory to RAW or mzXML files.
|
62
|
+
This option is needed to view Pep3D files
|
63
|
+
and is critical with Bioworks 3.2 xml export files
|
60
64
|
outdir: Path will be created if it does not already exist.
|
65
|
+
(xml) : only bioworks.xml files need to include this information
|
61
66
|
model : LCQ -> 'LCQ Deca XP Plus'
|
62
67
|
: Orbi -> 'LTQ Orbitrap'
|
63
68
|
: other string -> That's the string that will be used.
|
@@ -93,55 +98,37 @@ end
|
|
93
98
|
|
94
99
|
opt.outdir ||= DEFAULT_OUTDIR
|
95
100
|
|
96
|
-
## Create dbpath if does not exist
|
97
|
-
if opt.outdir
|
98
|
-
FileUtils.mkpath(opt.outdir) unless File.exist? opt.outdir
|
99
|
-
end
|
100
101
|
|
101
102
|
files = ARGV.to_a
|
102
|
-
|
103
|
+
bioworks_file = files[0]
|
103
104
|
if files[0] =~ /\.srf/i
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
:backup_db_path => opt.dbpath || def_dbpath,
|
108
|
-
:out_path => opt.outdir,
|
109
|
-
}
|
110
|
-
xml_obj = SpecID::Sequest::PepXML.new_from_srf(file, hash)
|
111
|
-
xml_obj.to_pepxml(xml_obj.base_name + ".xml")
|
112
|
-
end
|
113
|
-
else
|
114
|
-
## Ensure params file exists (unless opt given)
|
115
|
-
opt.params ||= DEFAULT_PARAMS_FILE
|
116
|
-
params_obj = SpecID::Sequest::Params.new(opt.params)
|
117
|
-
# Ensure the database exists!
|
118
|
-
unless File.exist?( params_obj.database )
|
119
|
-
if opt.dbpath
|
120
|
-
params_obj.database_path = opt.dbpath
|
121
|
-
else
|
122
|
-
params_obj.database_path = def_dbpath
|
123
|
-
end
|
105
|
+
srg_file = 'bioworks.srg'
|
106
|
+
if File.exist? srg_file
|
107
|
+
srg_file = 'bioworks.tmp.srg'
|
124
108
|
end
|
109
|
+
srg = SRFGroup.new(files)
|
110
|
+
srg.to_srg(srg_file)
|
111
|
+
unless File.exist? srg_file
|
112
|
+
abort "couldn't create #{srg_file} from: #{files.join(', ')}"
|
113
|
+
end
|
114
|
+
bioworks_file = srg_file
|
115
|
+
end
|
125
116
|
|
126
|
-
opt.mspath ||= DEFAULT_MZXML_PATH
|
127
|
-
opt.pepxml_version ||= DEFAULT_PEPXML_VERSION
|
128
|
-
opt.model ||= DEFAULT_MS_MODEL
|
129
|
-
opt.mass_analyzer ||= DEFAULT_MASS_ANALYZER
|
130
|
-
|
131
|
-
case opt.model
|
132
|
-
when "LCQ"
|
133
|
-
model = 'LCQ Deca XP Plus'
|
134
|
-
when "Orbi"
|
135
|
-
model = 'LTQ Orbitrap'
|
136
|
-
else
|
137
|
-
model = opt.model
|
138
|
-
end
|
139
117
|
|
118
|
+
case opt.model
|
119
|
+
when "LCQ"
|
120
|
+
model = 'LCQ Deca XP Plus'
|
121
|
+
when "Orbi"
|
122
|
+
model = 'LTQ Orbitrap'
|
123
|
+
else
|
124
|
+
model = opt.model
|
125
|
+
end
|
140
126
|
|
141
|
-
|
142
|
-
|
127
|
+
opt.dbpath ||= def_dbpath
|
128
|
+
opt.mspath ||= DEFAULT_MZ_PATH
|
129
|
+
opt.params ||= DEFAULT_PARAMS_FILE
|
130
|
+
opt.mass_analyzer ||= DEFAULT_MASS_ANALYZER
|
131
|
+
opt.model ||= DEFAULT_MS_MODEL
|
132
|
+
|
133
|
+
xml_objs = Sequest::PepXML.set_from_bioworks(bioworks_file, {:params => opt.params, :ms_data => opt.mspath, :out_path => opt.outdir, :model => model, :backup_db_path => opt.dbpath, :copy_mzxml => opt.copy_mzxml, :ms_mass_analyzer => opt.mass_analyzer, :print => true})
|
143
134
|
|
144
|
-
xml_objs.each do |obj|
|
145
|
-
obj.to_pepxml(obj.base_name + ".xml")
|
146
|
-
end
|
147
|
-
end
|
data/bin/fasta_shaker.rb
CHANGED
data/bin/filter.rb
ADDED
data/bin/find_aa_freq.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
|
4
|
+
require 'spec_id/aa_freqs'
|
5
|
+
|
6
|
+
if ARGV.size < 1
|
7
|
+
puts "usage: #{File.basename(__FILE__)} <file>.fasta ..."
|
8
|
+
puts "prints the amino acid frequencies of every amino acid in each fasta file"
|
9
|
+
exit
|
10
|
+
end
|
11
|
+
|
12
|
+
ARGV.each do |file|
|
13
|
+
obj = SpecID::AAFreqs.new(file)
|
14
|
+
puts file
|
15
|
+
obj.aafreqs.sort_by{|v| v.to_s }.each do |k,v|
|
16
|
+
puts "#{k}: #{v}"
|
17
|
+
end
|
18
|
+
puts ""
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
|
data/bin/id_precision.rb
CHANGED
@@ -35,8 +35,9 @@ file = ARGV[1]
|
|
35
35
|
|
36
36
|
obj = SpecID.new(file)
|
37
37
|
re_prefix = /^#{Regexp.escape(fp_prefix)}/o
|
38
|
-
prc = proc {|it| it.
|
38
|
+
prc = proc {|it| it.prots.first.reference =~ re_prefix }
|
39
39
|
#(match, nomatch) = obj.classify(:peps, prc)
|
40
|
+
obj.peps = obj.pep_prots
|
40
41
|
(fp, tp) = obj.classify(:peps, prc)
|
41
42
|
|
42
43
|
|
@@ -126,7 +127,7 @@ end
|
|
126
127
|
files = ARGV.to_a
|
127
128
|
|
128
129
|
two_lists = files.collect do |file|
|
129
|
-
obj =
|
130
|
+
obj = Bioworks.new(file)
|
130
131
|
list = []
|
131
132
|
list.push( obj.pep_probs_by_pep_prots )
|
132
133
|
list.push( obj.pep_probs_by_seq_charge )
|
data/bin/mzxml_to_lmat.rb
CHANGED
@@ -23,6 +23,7 @@ opts = OptionParser.new do |op|
|
|
23
23
|
op.on("--mz_end N", Float, "m/z end (def: end of 1st full scan)") {|n| opt[:end_mz] = n.to_f}
|
24
24
|
op.on("--baseline N", Float, "value for missing indices (def: #{opt[:baseline]})") {|n| opt[:baseline] = n.to_f}
|
25
25
|
op.on("--ascii", "generates an lmata file instead") {opt[:ascii] = true}
|
26
|
+
op.on("-v", "--verbose") {$VERBOSE = true}
|
26
27
|
end
|
27
28
|
opts.parse!
|
28
29
|
|
@@ -52,7 +53,7 @@ ARGV.each do |file|
|
|
52
53
|
else
|
53
54
|
lmat.write(outfile)
|
54
55
|
end
|
55
|
-
puts
|
56
|
+
puts("OUTPUT: #{outfile}") if $VERBOSE
|
56
57
|
end
|
57
58
|
|
58
59
|
|
data/bin/pepproph_filter.rb
CHANGED
@@ -12,5 +12,5 @@ files = ARGV.to_a
|
|
12
12
|
cutoff = files.shift
|
13
13
|
files.each do |file|
|
14
14
|
outfile = file.gsub(/\.xml/, "_min#{cutoff}.xml")
|
15
|
-
|
15
|
+
Proph::Pep::Parser.new.filter_by_min_pep_prob(file, outfile, cutoff.to_f)
|
16
16
|
end
|
data/bin/precision.rb
CHANGED