protk 1.2.6.pre5 → 1.3.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +84 -45
- data/bin/add_retention_times.rb +9 -5
- data/bin/augustus_to_proteindb.rb +7 -11
- data/bin/interprophet.rb +28 -46
- data/bin/make_decoy.rb +16 -48
- data/bin/mascot_search.rb +57 -71
- data/bin/mascot_to_pepxml.rb +13 -26
- data/bin/msgfplus_search.rb +70 -107
- data/bin/omssa_search.rb +52 -109
- data/bin/peptide_prophet.rb +44 -119
- data/bin/pepxml_to_table.rb +24 -27
- data/bin/protein_prophet.rb +22 -82
- data/bin/protxml_to_gff.rb +22 -519
- data/bin/protxml_to_table.rb +2 -16
- data/bin/sixframe.rb +10 -32
- data/bin/tandem_search.rb +30 -403
- data/bin/tandem_to_pepxml.rb +43 -0
- data/bin/unimod_to_loc.rb +1 -1
- data/ext/{protk/decoymaker → decoymaker}/decoymaker.c +74 -21
- data/ext/decoymaker/extconf.rb +3 -0
- data/lib/protk/constants.rb +16 -2
- data/lib/protk/data/default_config.yml +2 -1
- data/lib/protk/data/tandem_gpm_defaults.xml +175 -0
- data/lib/protk/data/tandem_isb_kscore_defaults.xml +123 -0
- data/lib/protk/data/tandem_isb_native_defaults.xml +123 -0
- data/lib/protk/data/tandem_params.xml +17 -54
- data/lib/protk/fastadb.rb +2 -2
- data/lib/protk/prophet_tool.rb +1 -1
- data/lib/protk/protxml_to_gff_tool.rb +474 -0
- data/lib/protk/search_tool.rb +58 -103
- data/lib/protk/setup_rakefile.rake +9 -5
- data/lib/protk/tandem_search_tool.rb +256 -0
- data/lib/protk/tool.rb +85 -104
- data/lib/protk.rb +1 -6
- metadata +24 -103
- data/bin/annotate_ids.rb +0 -59
- data/bin/asapratio.rb +0 -27
- data/bin/blastxml_to_table.rb +0 -119
- data/bin/correct_omssa_retention_times.rb +0 -27
- data/bin/feature_finder.rb +0 -95
- data/bin/file_convert.rb +0 -164
- data/bin/generate_omssa_loc.rb +0 -42
- data/bin/gffmerge.rb +0 -208
- data/bin/libra.rb +0 -70
- data/bin/toppas_pipeline.rb +0 -84
- data/bin/uniprot_annotation.rb +0 -141
- data/bin/xls_to_table.rb +0 -52
- data/bin/xpress.rb +0 -27
- data/ext/protk/decoymaker/extconf.rb +0 -3
- data/ext/protk/simplealign/extconf.rb +0 -3
- data/lib/protk/biotools_excel_converter.rb +0 -60
- data/lib/protk/eupathdb_gene_information_table.rb +0 -158
- data/lib/protk/gapped_aligner.rb +0 -264
- data/lib/protk/protein_annotator.rb +0 -646
- data/lib/protk/spreadsheet_extensions.rb +0 -79
- data/lib/protk/xtandem_defaults.rb +0 -11
data/lib/protk/search_tool.rb
CHANGED
@@ -13,6 +13,15 @@ require 'optparse'
|
|
13
13
|
require 'pathname'
|
14
14
|
require 'protk/tool'
|
15
15
|
|
16
|
+
class FastaDatabase
|
17
|
+
attr :name
|
18
|
+
attr :path
|
19
|
+
def initialize(name,path)
|
20
|
+
@name=name
|
21
|
+
@path=path
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
16
25
|
class SearchTool < Tool
|
17
26
|
|
18
27
|
# Initializes commandline options common to all search tools.
|
@@ -21,158 +30,104 @@ class SearchTool < Tool
|
|
21
30
|
def initialize(option_support=[])
|
22
31
|
super(option_support)
|
23
32
|
|
24
|
-
if (option_support.include? :database)
|
25
|
-
|
26
|
-
@option_parser.on( '-d', '--database dbname', 'Specify the database to use for this search. Default=sphuman' ) do |dbname|
|
27
|
-
options.database = dbname
|
28
|
-
end
|
29
|
-
|
33
|
+
if (option_support.include? :database)
|
34
|
+
add_value_option(:database,"sphuman",['-d', '--database dbname', 'Specify the database to use for this search. Can be a named protk database or the path to a fasta file'])
|
30
35
|
end
|
31
36
|
|
32
37
|
if ( option_support.include? :enzyme )
|
33
|
-
|
34
|
-
@option_parser.on('--enzyme enz', 'Enzyme') do |enz|
|
35
|
-
@options.enzyme=enz
|
36
|
-
end
|
38
|
+
add_value_option(:enzyme,"Trypsin",['--enzyme enz', 'Enzyme'])
|
37
39
|
end
|
38
40
|
|
39
41
|
if ( option_support.include? :modifications )
|
40
|
-
|
41
|
-
|
42
|
-
@options.var_mods = vm
|
43
|
-
end
|
44
|
-
|
45
|
-
@options.fix_mods = ""
|
46
|
-
@option_parser.on('--fix-mods fm', 'Fixed modifications' ) do |fm|
|
47
|
-
@options.fix_mods = fm
|
48
|
-
end
|
42
|
+
add_value_option(:var_mods,"",['--var-mods vm','Variable modifications. These should be provided in a comma separated list'])
|
43
|
+
add_value_option(:fix_mods,"",['--fix-mods fm','Fixed modifications. These should be provided in a comma separated list'])
|
49
44
|
end
|
50
45
|
|
51
46
|
if ( option_support.include? :instrument )
|
52
|
-
|
53
|
-
@option_parser.on('--instrument instrument', 'Instrument') do |instrument|
|
54
|
-
@options.instrument=instrument
|
55
|
-
end
|
47
|
+
add_value_option(:instrument,"ESI-QUAD-TOF",['--instrument instrument', 'Instrument'])
|
56
48
|
end
|
57
49
|
|
58
50
|
if ( option_support.include? :mass_tolerance_units )
|
59
|
-
|
60
|
-
|
61
|
-
@options.fragment_tolu = tolu
|
62
|
-
end
|
63
|
-
|
64
|
-
@options.precursor_tolu="ppm"
|
65
|
-
@option_parser.on('--precursor-ion-tol-units tolu', 'Precursor ion mass tolerance units (ppm or Da). Default=ppm' ) do |tolu|
|
66
|
-
@options.precursor_tolu = tolu
|
67
|
-
end
|
51
|
+
add_value_option(:fragment_tolu,"Da",['--fragment-ion-tol-units tolu', 'Fragment ion mass tolerance units (Da or mmu). Default=Da'])
|
52
|
+
add_value_option(:precursor_tolu,"ppm",['--precursor-ion-tol-units tolu', 'Precursor ion mass tolerance units (ppm or Da). Default=ppm'])
|
68
53
|
end
|
69
54
|
|
70
55
|
if ( option_support.include? :mass_tolerance )
|
71
|
-
|
72
|
-
|
73
|
-
@options.fragment_tol = tol
|
74
|
-
end
|
75
|
-
|
76
|
-
@options.precursor_tol=200
|
77
|
-
@option_parser.on( '-p', '--precursor-ion-tol tol', 'Precursor ion mass tolerance in (ppm if precursor search type is monoisotopic or Da if it is average). Default=200' ) do |tol|
|
78
|
-
@options.precursor_tol = tol.to_f
|
79
|
-
end
|
56
|
+
add_value_option(:fragment_tol,0.65,['-f', '--fragment-ion-tol tol', 'Fragment ion mass tolerance (unit dependent). Default=0.65'])
|
57
|
+
add_value_option(:precursor_tol,200,['-p','--precursor-ion-tol tol', 'Precursor ion mass tolerance. Default=200'])
|
80
58
|
end
|
81
59
|
|
82
60
|
if ( option_support.include? :precursor_search_type )
|
83
|
-
|
84
|
-
@option_parser.on( '-a', '--search-type type', 'Use monoisotopic or average precursor masses. (monoisotopic or average)' ) do |type|
|
85
|
-
@options.precursor_search_type = type
|
86
|
-
end
|
61
|
+
add_value_option(:precursor_search_type,"monoisotopic",['-a', '--search-type type', 'Use monoisotopic or average precursor masses. (monoisotopic or average)'])
|
87
62
|
end
|
88
63
|
|
89
64
|
if ( option_support.include? :strict_monoisotopic_mass )
|
90
|
-
|
91
|
-
@option_parser.on( '-s', '--strict-monoisotopic-mass', 'Dont allow for misassignment of monoisotopic mass to another isotopic peak') do
|
92
|
-
@options.strict_monoisotopic_mass=true
|
93
|
-
end
|
65
|
+
add_boolean_option(:strict_monoisotopic_mass,false,['-s', '--strict-monoisotopic-mass', 'Dont allow for misassignment of monoisotopic mass to another isotopic peak'])
|
94
66
|
end
|
95
67
|
|
96
68
|
if ( option_support.include? :missed_cleavages )
|
97
|
-
|
98
|
-
@option_parser.on( '-v', '--num-missed-cleavages num', 'Number of missed cleavages allowed' ) do |num|
|
99
|
-
@options.missed_cleavages = num
|
100
|
-
end
|
69
|
+
add_value_option(:missed_cleavages,2,['-v', '--num-missed-cleavages num', 'Number of missed cleavages allowed'])
|
101
70
|
end
|
102
71
|
|
103
72
|
if ( option_support.include? :cleavage_semi )
|
104
|
-
|
105
|
-
@option_parser.on( '--no-cleavage-semi', 'Dont allow up to 1 non tryptic terminus on peptides' ) do
|
106
|
-
@options.cleavage_semi=false
|
107
|
-
end
|
73
|
+
add_boolean_option(:cleavage_semi,false,['--cleavage-semi', 'Search for peptides with up to 1 non-enzymatic cleavage site'])
|
108
74
|
end
|
109
75
|
|
110
76
|
if ( option_support.include? :respect_precursor_charges )
|
111
|
-
|
112
|
-
@option_parser.on( '-q', '--respect-charges','Dont respect charges in the input file. Instead impute them by trying various options') do
|
113
|
-
@options.respect_precursor_charges=true
|
114
|
-
end
|
77
|
+
add_boolean_option(:respect_precursor_charges,false,['-q', '--respect-charges','Dont respect charges in the input file. Instead impute them by trying various options'])
|
115
78
|
end
|
116
79
|
|
117
80
|
if ( option_support.include? :searched_ions )
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
81
|
+
add_value_option(:searched_ions,"",['--searched-ions si', 'Ion series to search (default=b,y)'])
|
82
|
+
end
|
83
|
+
|
84
|
+
if ( option_support.include? :multi_isotope_search )
|
85
|
+
add_boolean_option(:multi_isotope_search,false,["--multi-isotope-search","Expand parent mass window to include windows around neighbouring isotopic peaks"])
|
122
86
|
end
|
123
87
|
|
124
88
|
if ( option_support.include? :num_peaks_for_multi_isotope_search )
|
125
|
-
|
126
|
-
@option_parser.on("--num-peaks-for-multi-isotope-search np","Number of peaks to include in multi-isotope search") do |np|
|
127
|
-
@options.num_peaks_for_multi_isotope_search=np
|
128
|
-
end
|
89
|
+
add_value_option(:num_peaks_for_multi_isotope_search,0,["--num-peaks-for-multi-isotope-search np","Number of peaks to include in multi-isotope search"])
|
129
90
|
end
|
130
91
|
|
131
92
|
if ( option_support.include? :glyco)
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
93
|
+
add_boolean_option(:glyco,false,['-g','--glyco', 'Expect N-Glycosylation modifications as variable mod in a search or as a parameter when building statistical models'])
|
94
|
+
end
|
95
|
+
|
96
|
+
if ( option_support.include? :acetyl_nterm)
|
97
|
+
add_boolean_option(:acetyl_nterm,false,['-y','--acetyl-nterm', 'Expect N-terminal acetylation as a variable mod in a search or as a parameter when building statistical models'])
|
136
98
|
end
|
137
99
|
|
138
100
|
if ( option_support.include? :methionine_oxidation)
|
139
|
-
|
140
|
-
@option_parser.on( '-m', '--methionineo', 'Expect Oxidised Methionine modifications as variable mod in a search' ) do
|
141
|
-
@options.methionine_oxidation = true
|
142
|
-
end
|
101
|
+
add_boolean_option(:methionine_oxidation,false,['-m', '--methionineo', 'Expect Oxidised Methionine modifications as variable mod in a search'])
|
143
102
|
end
|
144
103
|
|
145
104
|
if ( option_support.include? :carbamidomethyl)
|
146
|
-
|
147
|
-
@option_parser.on( '-c', '--carbamidomethyl', 'Expect Carbamidomethyl C modifications as fixed mod in a search' ) do
|
148
|
-
@options.carbamidomethyl = true
|
149
|
-
end
|
105
|
+
add_boolean_option(:carbamidomethyl,false,['-c', '--carbamidomethyl', 'Expect Carbamidomethyl C modifications as fixed mod in a search'])
|
150
106
|
end
|
151
107
|
|
152
108
|
if ( option_support.include? :maldi)
|
153
|
-
|
154
|
-
@option_parser.on( '-l', '--maldi', 'Run a search on MALDI data') do
|
155
|
-
@options.maldi=true
|
156
|
-
end
|
109
|
+
add_boolean_option(:maldi,false,['-l', '--maldi', 'Run a search on MALDI data'])
|
157
110
|
end
|
111
|
+
|
112
|
+
@option_parser.summary_width=40
|
113
|
+
|
158
114
|
|
159
115
|
end
|
160
|
-
|
161
|
-
|
162
|
-
def
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
# This function returns the name of the database with an extension appropriate to the database type
|
173
|
-
#
|
174
|
-
def current_database(db_type,db=@options.database)
|
175
|
-
return Constants.new.current_database_for_name(db)
|
116
|
+
|
117
|
+
|
118
|
+
def database_info
|
119
|
+
case
|
120
|
+
when Pathname.new(@options.database).exist? # It's an explicitly named db
|
121
|
+
db_path=Pathname.new(@options.database).expand_path.to_s
|
122
|
+
db_name=Pathname.new(@options.database).basename.to_s
|
123
|
+
else
|
124
|
+
db_path=Constants.new.current_database_for_name @options.database
|
125
|
+
db_name=@options.database
|
126
|
+
end
|
127
|
+
FastaDatabase.new(db_name,db_path)
|
176
128
|
end
|
177
|
-
|
178
|
-
end
|
129
|
+
|
130
|
+
end
|
131
|
+
|
132
|
+
|
133
|
+
|
@@ -143,9 +143,10 @@ tpp_download_file = download_task tpp_url, tpp_packagefile
|
|
143
143
|
# Build
|
144
144
|
file tpp_installed_file => [@build_dir,tpp_download_file] do
|
145
145
|
sh %{cp #{@download_dir}/#{tpp_packagefile} #{@build_dir}}
|
146
|
+
# use_perl_locallib_cmd="echo hi"
|
146
147
|
use_perl_locallib_cmd="eval $(perl -I#{perl_dir}/lib/perl5 -Mlocal::lib=#{perl_dir})"
|
147
|
-
sh %{#{use_perl_locallib_cmd};cpanm --local-lib=#{env.protk_dir}/perl5 XML::Parser}
|
148
|
-
sh %{#{use_perl_locallib_cmd};cpanm --local-lib=#{env.protk_dir}/perl5 CGI --force}
|
148
|
+
# sh %{#{use_perl_locallib_cmd};cpanm --local-lib=#{env.protk_dir}/perl5 XML::Parser}
|
149
|
+
# sh %{#{use_perl_locallib_cmd};cpanm --local-lib=#{env.protk_dir}/perl5 CGI --force}
|
149
150
|
|
150
151
|
sh %{cd #{@build_dir};tar -xvzf TPP-#{tpp_version}.tgz}
|
151
152
|
|
@@ -173,6 +174,8 @@ file tpp_installed_file => [@build_dir,tpp_download_file] do
|
|
173
174
|
f.write subs_text
|
174
175
|
end
|
175
176
|
end
|
177
|
+
sh %{cd #{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src;echo '' > ../perl/tpp_models.pl;echo '' > ../perl/exporTPP.pl;echo '' > ../CGI/show_nspbin.pl;echo '' > ../CGI/tpp_gui/tpp_gui.pl}
|
178
|
+
|
176
179
|
build_cmd = "#{use_perl_locallib_cmd};cd #{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src ; make -s"
|
177
180
|
install_cmd = "#{use_perl_locallib_cmd};cd #{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src ; make install"
|
178
181
|
env.log build_cmd, :info
|
@@ -354,9 +357,9 @@ file tandem_installed_file => [@build_dir,"#{@download_dir}/#{tandem_packagefile
|
|
354
357
|
tandem_dirname = "#{tandem_packagefile.chomp('.zip')}"
|
355
358
|
|
356
359
|
if ( tandem_platform=="linux") #Must compile
|
357
|
-
tandem_src_dir = "#{@build_dir}/#{tandem_dirname}
|
360
|
+
tandem_src_dir = "#{@build_dir}/#{tandem_dirname}/src/"
|
358
361
|
sh %{cd #{tandem_src_dir}; make}
|
359
|
-
sh %{cd #{@build_dir}; cp -r ./#{tandem_dirname}
|
362
|
+
sh %{cd #{@build_dir}; cp -r ./#{tandem_dirname}/bin #{env.tandem_root}/}
|
360
363
|
else
|
361
364
|
sh %{cd #{@build_dir}; cp -r ./#{tandem_packagefile.chomp('.zip')}/* #{env.tandem_root}/}
|
362
365
|
sh %{chmod u+x #{env.gpmtandem}}
|
@@ -402,8 +405,9 @@ task :galaxyenv => protk_galaxy_envfile
|
|
402
405
|
|
403
406
|
# multitask :downloads => FileList["nr","env_nr","gi_taxid_prot.zip","taxdmp.zip"]
|
404
407
|
|
408
|
+
task :base => [:perl_locallib]
|
405
409
|
|
406
|
-
task :all => [:tpp,:omssa,:blast,:msgfplus,:pwiz,:openms
|
410
|
+
task :all => [:tpp,:omssa,:blast,:msgfplus,:pwiz,:openms]
|
407
411
|
|
408
412
|
# Special task when installing via toolshed
|
409
413
|
#
|
@@ -0,0 +1,256 @@
|
|
1
|
+
require 'protk/search_tool'
|
2
|
+
|
3
|
+
class String
|
4
|
+
def xtandem_modification_motif?
|
5
|
+
# 124@[ is not a modification motif, it is a residue (N-term) modification,
|
6
|
+
# so when checking if modification is a motif look for paired square brackets.
|
7
|
+
ismotif=false
|
8
|
+
case self
|
9
|
+
when /[\(\)\{\}\!]/,/\[.*\]/
|
10
|
+
ismotif=true
|
11
|
+
end
|
12
|
+
ismotif
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class TandemSearchTool < SearchTool
|
17
|
+
attr :defaults_path
|
18
|
+
attr :taxonomy_path
|
19
|
+
attr :default_data_path
|
20
|
+
|
21
|
+
attr :supported_xtandem_keys
|
22
|
+
|
23
|
+
def initialize
|
24
|
+
|
25
|
+
super([
|
26
|
+
:database,
|
27
|
+
:explicit_output,
|
28
|
+
:over_write,
|
29
|
+
:enzyme,
|
30
|
+
:modifications,
|
31
|
+
:mass_tolerance_units,
|
32
|
+
:mass_tolerance,
|
33
|
+
:multi_isotope_search,
|
34
|
+
:missed_cleavages,
|
35
|
+
:cleavage_semi,
|
36
|
+
:methionine_oxidation,
|
37
|
+
:glyco,
|
38
|
+
:acetyl_nterm,
|
39
|
+
:threads
|
40
|
+
])
|
41
|
+
|
42
|
+
@xtandem_keys_with_single_multiplicity = {
|
43
|
+
:fragment_tol => "spectrum, fragment monoisotopic mass error",
|
44
|
+
:missed_cleavages => "scoring, maximum missed cleavage sites",
|
45
|
+
:cleavage_semi => "protein, cleavage semi",
|
46
|
+
:precursor_tolu => "spectrum, parent monoisotopic mass error units",
|
47
|
+
:multi_isotope_search => "spectrum, parent monoisotopic mass isotope error",
|
48
|
+
:fragment_tolu => "spectrum, fragment monoisotopic mass error units",
|
49
|
+
:acetyl_nterm => "protein, quick acetyl",
|
50
|
+
:output_spectra => "output, spectra",
|
51
|
+
:threads => "spectrum, threads"
|
52
|
+
}
|
53
|
+
|
54
|
+
@xtandem_keys_for_precursor_tol = {
|
55
|
+
:precursor_tol => ["spectrum, parent monoisotopic mass error minus", "spectrum, parent monoisotopic mass error plus"]
|
56
|
+
}
|
57
|
+
|
58
|
+
@defaults_path="#{File.dirname(__FILE__)}/data/tandem_params.xml"
|
59
|
+
@taxonomy_path="#{File.dirname(__FILE__)}/data/taxonomy_template.xml"
|
60
|
+
@default_data_path="#{File.dirname(__FILE__)}/data/"
|
61
|
+
|
62
|
+
@option_parser.banner = "Run an X!Tandem msms search on a set of mzML input files.\n\nUsage: tandem_search.rb [options] file1.mzML file2.mzML ..."
|
63
|
+
@options.output_suffix="_tandem"
|
64
|
+
|
65
|
+
add_value_option(:tandem_params,"isb_native",['-T', '--tandem-params tandem', 'Either the full path to an xml file containing a complete set of default parameters, or one of the following (isb_native,isb_kscore,gpm). Default is isb_native'])
|
66
|
+
add_boolean_option(:keep_params_files,false,['-K', '--keep-params-files', 'Keep X!Tandem parameter files'])
|
67
|
+
add_boolean_option(:output_spectra,false,['--output-spectra', 'Include spectra in the output file'])
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
# Galaxy changes things like @ to __at__ we need to change it back
|
73
|
+
#
|
74
|
+
def decode_modification_string(mstring)
|
75
|
+
mstring.gsub!("__at__","@")
|
76
|
+
mstring.gsub!("__oc__","{")
|
77
|
+
mstring.gsub!("__cc__","}")
|
78
|
+
mstring.gsub!("__ob__","[")
|
79
|
+
mstring.gsub!("__cb__","]")
|
80
|
+
mstring
|
81
|
+
end
|
82
|
+
|
83
|
+
def set_option(std_params, tandem_key, value)
|
84
|
+
notes = std_params.find("/bioml/note[@type=\"input\" and @label=\"#{tandem_key}\"]")
|
85
|
+
throw "Exactly one parameter named (#{tandem_key}) is required in parameter file" unless notes.length==1
|
86
|
+
notes[0].content=value.to_s
|
87
|
+
end
|
88
|
+
|
89
|
+
def append_option(std_params, tandem_key, value)
|
90
|
+
notes = std_params.find("/bioml/note[@type=\"input\" and @label=\"#{tandem_key}\"]")
|
91
|
+
if notes.length == 0
|
92
|
+
node = XML::Node.new('note')
|
93
|
+
node["type"] = "input"
|
94
|
+
node["label"] = tandem_key
|
95
|
+
node.content = value
|
96
|
+
std_params.find('/bioml')[0] << node
|
97
|
+
else
|
98
|
+
throw "Exactly one parameter named (#{tandem_key}) is required in parameter file" unless notes.length==1
|
99
|
+
notes[0].content = append_string(notes[0].content, value)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def tandem_keys_in_params_file(default_params_path)
|
104
|
+
params_parser=XML::Parser.file(default_params_path)
|
105
|
+
default_params=params_parser.parse
|
106
|
+
input_nodes=default_params.find('/bioml/note[@type="input"]')
|
107
|
+
defined_keys=[]
|
108
|
+
input_nodes.each do |node|
|
109
|
+
defined_keys << node.attributes['label']
|
110
|
+
end
|
111
|
+
defined_keys
|
112
|
+
end
|
113
|
+
|
114
|
+
def taxon_from_taxonomy_file(taxo_path)
|
115
|
+
taxo_parser=XML::Parser.file(taxo_path)
|
116
|
+
taxo_doc=taxo_parser.parse
|
117
|
+
taxon_nodes=taxo_doc.find('/bioml/taxon')
|
118
|
+
throw "Exactly one taxon entry allowed in taxonomy file but found #{taxon_nodes.length}" unless taxon_nodes.length==1
|
119
|
+
taxon_nodes[0].attributes['label']
|
120
|
+
end
|
121
|
+
|
122
|
+
def generate_parameter_doc(std_params,output_path,input_path,db_info,taxo_path)
|
123
|
+
|
124
|
+
#
|
125
|
+
# The TandemSearchTool class has a special defaults system
|
126
|
+
# Defaults are read from (a) The commandline (b) A defaults file (c) commandline defaults.
|
127
|
+
# The ideal priority order is a -> b -> c
|
128
|
+
#
|
129
|
+
# In order to support this we need to read the defaults file and check options defined there
|
130
|
+
# against those defined on the commandline
|
131
|
+
#
|
132
|
+
# In addition, we support some default parameter files built-in to protk. These are treated the same
|
133
|
+
# but are specified if the user provides a keyword rather than a path
|
134
|
+
#
|
135
|
+
default_params_notes=std_params.find('/bioml/note[@type="input" and @label="list path, default parameters"]')
|
136
|
+
throw "Exactly one list path, default parameters note is required in the parameter file" unless default_params_notes.length==1
|
137
|
+
|
138
|
+
is_file=File.exists?(self.tandem_params)
|
139
|
+
if is_file
|
140
|
+
default_params_notes[0].content="#{self.tandem_params}"
|
141
|
+
else
|
142
|
+
default_params_notes[0].content="#{@default_data_path}tandem_#{self.tandem_params}_defaults.xml"
|
143
|
+
end
|
144
|
+
|
145
|
+
|
146
|
+
keys_in_params_file=tandem_keys_in_params_file(default_params_notes[0].content)
|
147
|
+
keys_on_commandline=@options_defined_by_user.keys
|
148
|
+
|
149
|
+
# Set the input and output paths
|
150
|
+
#
|
151
|
+
set_option(std_params,"spectrum, path",input_path)
|
152
|
+
set_option(std_params,"output, path",output_path)
|
153
|
+
|
154
|
+
# Taxonomy and Database
|
155
|
+
#
|
156
|
+
set_option(std_params,"list path, taxonomy information",taxo_path)
|
157
|
+
set_option(std_params,"protein, taxon",db_info.name)
|
158
|
+
|
159
|
+
|
160
|
+
|
161
|
+
# set_option(std_params, "protein, cleavage semi", self.cleavage_semi ? "yes" : "no")
|
162
|
+
|
163
|
+
# Simple options (unique with a 1:1 mapping to parameters from this tool)
|
164
|
+
#
|
165
|
+
@xtandem_keys_with_single_multiplicity.each_pair do |commandline_option_key, xtandem_key|
|
166
|
+
if (!keys_in_params_file.include?(xtandem_key) || keys_on_commandline.include?(commandline_option_key))
|
167
|
+
opt_val=self.send(commandline_option_key)
|
168
|
+
if opt_val.is_a?(TrueClass) || opt_val.is_a?(FalseClass)
|
169
|
+
opt_val = opt_val ? "yes" : "no"
|
170
|
+
end
|
171
|
+
append_option(std_params,xtandem_key,opt_val.to_s)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
# Precursor mass tolerance is a special case as it requires two xtandem options
|
176
|
+
#
|
177
|
+
@xtandem_keys_for_precursor_tol.each_pair do |commandline_option_key, xtandem_keys|
|
178
|
+
xtandem_keys.each do |xtandem_key|
|
179
|
+
if (!keys_in_params_file.include?(xtandem_key) || keys_on_commandline.include?(commandline_option_key))
|
180
|
+
append_option(std_params,xtandem_key,(self.precursor_tol.to_f*0.5).to_s)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
# Per residue Fixed and Variable Modifications
|
186
|
+
#
|
187
|
+
# These can be added using a variety of methods in xtandem
|
188
|
+
#
|
189
|
+
# residue, potential modification mass
|
190
|
+
# residue, modification mass
|
191
|
+
# residue, potential modification motif
|
192
|
+
#
|
193
|
+
# We support these primarily via the var_mods and fix_mods commandline params
|
194
|
+
# Modification masses and/or motifs can be entered via these arguments
|
195
|
+
#
|
196
|
+
|
197
|
+
var_mods = self.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }
|
198
|
+
var_mods=var_mods.collect {|mod| decode_modification_string(mod) }
|
199
|
+
|
200
|
+
# var_mods allows motif's as well as standard mods. These should be in a separate array
|
201
|
+
var_motifs = [].replace(var_mods)
|
202
|
+
var_mods.delete_if {|mod| mod.xtandem_modification_motif? }
|
203
|
+
var_motifs.keep_if {|mod| mod.xtandem_modification_motif? }
|
204
|
+
|
205
|
+
fix_mods = self.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }
|
206
|
+
fix_mods=fix_mods.collect {|mod| decode_modification_string(mod)}
|
207
|
+
|
208
|
+
# We also support the --glyco and --methionineo shortcuts.
|
209
|
+
# Add these here. No check is made for duplication
|
210
|
+
#
|
211
|
+
var_motifs << "0.998@N!{P}[ST]" if self.glyco
|
212
|
+
var_mods << "15.994915@M" if self.methionine_oxidation
|
213
|
+
|
214
|
+
append_option(std_params,"residue, modification mass",fix_mods.join(",")) unless fix_mods.length==0
|
215
|
+
append_option(std_params,"residue, potential modification mass",var_mods.join(",")) unless var_mods.length==0
|
216
|
+
append_option(std_params,"residue, potential modification motif",var_motifs.join(",")) unless var_motifs.length==0
|
217
|
+
|
218
|
+
std_params
|
219
|
+
|
220
|
+
end
|
221
|
+
|
222
|
+
public
|
223
|
+
def taxonomy_doc(db_info)
|
224
|
+
throw "Invalid input db_info must be a FastaDatabase object" unless db_info.class==FastaDatabase
|
225
|
+
database_path=db_info.path
|
226
|
+
taxon=db_info.name
|
227
|
+
# Parse taxonomy template file
|
228
|
+
#
|
229
|
+
taxo_parser=XML::Parser.file(@taxonomy_path)
|
230
|
+
taxo_doc=taxo_parser.parse
|
231
|
+
|
232
|
+
taxon_label=taxo_doc.find('/bioml/taxon')
|
233
|
+
throw "Exactly one taxon label is required in the taxonomy_template file" unless taxon_label.length==1
|
234
|
+
taxon_label[0].attributes['label']=taxon
|
235
|
+
|
236
|
+
db_file=taxo_doc.find('/bioml/taxon/file')
|
237
|
+
throw "Exactly one database file is required in the taxonomy_template file" unless db_file.length==1
|
238
|
+
db_file[0].attributes['URL']=database_path
|
239
|
+
|
240
|
+
taxo_doc
|
241
|
+
end
|
242
|
+
|
243
|
+
|
244
|
+
def params_doc(db_info,taxo_path,input_path,output_path)
|
245
|
+
params_parser=XML::Parser.file(@defaults_path)
|
246
|
+
std_params=params_parser.parse
|
247
|
+
|
248
|
+
|
249
|
+
throw "Invalid input db_info must be a FastaDatabase object" unless db_info.class==FastaDatabase
|
250
|
+
|
251
|
+
generate_parameter_doc(std_params,output_path,input_path,db_info,taxo_path)
|
252
|
+
end
|
253
|
+
|
254
|
+
|
255
|
+
|
256
|
+
end
|