protk 1.2.6.pre5 → 1.3.0.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +84 -45
- data/bin/add_retention_times.rb +9 -5
- data/bin/augustus_to_proteindb.rb +7 -11
- data/bin/interprophet.rb +28 -46
- data/bin/make_decoy.rb +16 -48
- data/bin/mascot_search.rb +57 -71
- data/bin/mascot_to_pepxml.rb +13 -26
- data/bin/msgfplus_search.rb +70 -107
- data/bin/omssa_search.rb +52 -109
- data/bin/peptide_prophet.rb +44 -119
- data/bin/pepxml_to_table.rb +24 -27
- data/bin/protein_prophet.rb +22 -82
- data/bin/protxml_to_gff.rb +22 -519
- data/bin/protxml_to_table.rb +2 -16
- data/bin/sixframe.rb +10 -32
- data/bin/tandem_search.rb +30 -403
- data/bin/tandem_to_pepxml.rb +43 -0
- data/bin/unimod_to_loc.rb +1 -1
- data/ext/{protk/decoymaker → decoymaker}/decoymaker.c +74 -21
- data/ext/decoymaker/extconf.rb +3 -0
- data/lib/protk/constants.rb +16 -2
- data/lib/protk/data/default_config.yml +2 -1
- data/lib/protk/data/tandem_gpm_defaults.xml +175 -0
- data/lib/protk/data/tandem_isb_kscore_defaults.xml +123 -0
- data/lib/protk/data/tandem_isb_native_defaults.xml +123 -0
- data/lib/protk/data/tandem_params.xml +17 -54
- data/lib/protk/fastadb.rb +2 -2
- data/lib/protk/prophet_tool.rb +1 -1
- data/lib/protk/protxml_to_gff_tool.rb +474 -0
- data/lib/protk/search_tool.rb +58 -103
- data/lib/protk/setup_rakefile.rake +9 -5
- data/lib/protk/tandem_search_tool.rb +256 -0
- data/lib/protk/tool.rb +85 -104
- data/lib/protk.rb +1 -6
- metadata +24 -103
- data/bin/annotate_ids.rb +0 -59
- data/bin/asapratio.rb +0 -27
- data/bin/blastxml_to_table.rb +0 -119
- data/bin/correct_omssa_retention_times.rb +0 -27
- data/bin/feature_finder.rb +0 -95
- data/bin/file_convert.rb +0 -164
- data/bin/generate_omssa_loc.rb +0 -42
- data/bin/gffmerge.rb +0 -208
- data/bin/libra.rb +0 -70
- data/bin/toppas_pipeline.rb +0 -84
- data/bin/uniprot_annotation.rb +0 -141
- data/bin/xls_to_table.rb +0 -52
- data/bin/xpress.rb +0 -27
- data/ext/protk/decoymaker/extconf.rb +0 -3
- data/ext/protk/simplealign/extconf.rb +0 -3
- data/lib/protk/biotools_excel_converter.rb +0 -60
- data/lib/protk/eupathdb_gene_information_table.rb +0 -158
- data/lib/protk/gapped_aligner.rb +0 -264
- data/lib/protk/protein_annotator.rb +0 -646
- data/lib/protk/spreadsheet_extensions.rb +0 -79
- data/lib/protk/xtandem_defaults.rb +0 -11
data/lib/protk/search_tool.rb
CHANGED
@@ -13,6 +13,15 @@ require 'optparse'
|
|
13
13
|
require 'pathname'
|
14
14
|
require 'protk/tool'
|
15
15
|
|
16
|
+
class FastaDatabase
|
17
|
+
attr :name
|
18
|
+
attr :path
|
19
|
+
def initialize(name,path)
|
20
|
+
@name=name
|
21
|
+
@path=path
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
16
25
|
class SearchTool < Tool
|
17
26
|
|
18
27
|
# Initializes commandline options common to all search tools.
|
@@ -21,158 +30,104 @@ class SearchTool < Tool
|
|
21
30
|
def initialize(option_support=[])
|
22
31
|
super(option_support)
|
23
32
|
|
24
|
-
if (option_support.include? :database)
|
25
|
-
|
26
|
-
@option_parser.on( '-d', '--database dbname', 'Specify the database to use for this search. Default=sphuman' ) do |dbname|
|
27
|
-
options.database = dbname
|
28
|
-
end
|
29
|
-
|
33
|
+
if (option_support.include? :database)
|
34
|
+
add_value_option(:database,"sphuman",['-d', '--database dbname', 'Specify the database to use for this search. Can be a named protk database or the path to a fasta file'])
|
30
35
|
end
|
31
36
|
|
32
37
|
if ( option_support.include? :enzyme )
|
33
|
-
|
34
|
-
@option_parser.on('--enzyme enz', 'Enzyme') do |enz|
|
35
|
-
@options.enzyme=enz
|
36
|
-
end
|
38
|
+
add_value_option(:enzyme,"Trypsin",['--enzyme enz', 'Enzyme'])
|
37
39
|
end
|
38
40
|
|
39
41
|
if ( option_support.include? :modifications )
|
40
|
-
|
41
|
-
|
42
|
-
@options.var_mods = vm
|
43
|
-
end
|
44
|
-
|
45
|
-
@options.fix_mods = ""
|
46
|
-
@option_parser.on('--fix-mods fm', 'Fixed modifications' ) do |fm|
|
47
|
-
@options.fix_mods = fm
|
48
|
-
end
|
42
|
+
add_value_option(:var_mods,"",['--var-mods vm','Variable modifications. These should be provided in a comma separated list'])
|
43
|
+
add_value_option(:fix_mods,"",['--fix-mods fm','Fixed modifications. These should be provided in a comma separated list'])
|
49
44
|
end
|
50
45
|
|
51
46
|
if ( option_support.include? :instrument )
|
52
|
-
|
53
|
-
@option_parser.on('--instrument instrument', 'Instrument') do |instrument|
|
54
|
-
@options.instrument=instrument
|
55
|
-
end
|
47
|
+
add_value_option(:instrument,"ESI-QUAD-TOF",['--instrument instrument', 'Instrument'])
|
56
48
|
end
|
57
49
|
|
58
50
|
if ( option_support.include? :mass_tolerance_units )
|
59
|
-
|
60
|
-
|
61
|
-
@options.fragment_tolu = tolu
|
62
|
-
end
|
63
|
-
|
64
|
-
@options.precursor_tolu="ppm"
|
65
|
-
@option_parser.on('--precursor-ion-tol-units tolu', 'Precursor ion mass tolerance units (ppm or Da). Default=ppm' ) do |tolu|
|
66
|
-
@options.precursor_tolu = tolu
|
67
|
-
end
|
51
|
+
add_value_option(:fragment_tolu,"Da",['--fragment-ion-tol-units tolu', 'Fragment ion mass tolerance units (Da or mmu). Default=Da'])
|
52
|
+
add_value_option(:precursor_tolu,"ppm",['--precursor-ion-tol-units tolu', 'Precursor ion mass tolerance units (ppm or Da). Default=ppm'])
|
68
53
|
end
|
69
54
|
|
70
55
|
if ( option_support.include? :mass_tolerance )
|
71
|
-
|
72
|
-
|
73
|
-
@options.fragment_tol = tol
|
74
|
-
end
|
75
|
-
|
76
|
-
@options.precursor_tol=200
|
77
|
-
@option_parser.on( '-p', '--precursor-ion-tol tol', 'Precursor ion mass tolerance in (ppm if precursor search type is monoisotopic or Da if it is average). Default=200' ) do |tol|
|
78
|
-
@options.precursor_tol = tol.to_f
|
79
|
-
end
|
56
|
+
add_value_option(:fragment_tol,0.65,['-f', '--fragment-ion-tol tol', 'Fragment ion mass tolerance (unit dependent). Default=0.65'])
|
57
|
+
add_value_option(:precursor_tol,200,['-p','--precursor-ion-tol tol', 'Precursor ion mass tolerance. Default=200'])
|
80
58
|
end
|
81
59
|
|
82
60
|
if ( option_support.include? :precursor_search_type )
|
83
|
-
|
84
|
-
@option_parser.on( '-a', '--search-type type', 'Use monoisotopic or average precursor masses. (monoisotopic or average)' ) do |type|
|
85
|
-
@options.precursor_search_type = type
|
86
|
-
end
|
61
|
+
add_value_option(:precursor_search_type,"monoisotopic",['-a', '--search-type type', 'Use monoisotopic or average precursor masses. (monoisotopic or average)'])
|
87
62
|
end
|
88
63
|
|
89
64
|
if ( option_support.include? :strict_monoisotopic_mass )
|
90
|
-
|
91
|
-
@option_parser.on( '-s', '--strict-monoisotopic-mass', 'Dont allow for misassignment of monoisotopic mass to another isotopic peak') do
|
92
|
-
@options.strict_monoisotopic_mass=true
|
93
|
-
end
|
65
|
+
add_boolean_option(:strict_monoisotopic_mass,false,['-s', '--strict-monoisotopic-mass', 'Dont allow for misassignment of monoisotopic mass to another isotopic peak'])
|
94
66
|
end
|
95
67
|
|
96
68
|
if ( option_support.include? :missed_cleavages )
|
97
|
-
|
98
|
-
@option_parser.on( '-v', '--num-missed-cleavages num', 'Number of missed cleavages allowed' ) do |num|
|
99
|
-
@options.missed_cleavages = num
|
100
|
-
end
|
69
|
+
add_value_option(:missed_cleavages,2,['-v', '--num-missed-cleavages num', 'Number of missed cleavages allowed'])
|
101
70
|
end
|
102
71
|
|
103
72
|
if ( option_support.include? :cleavage_semi )
|
104
|
-
|
105
|
-
@option_parser.on( '--no-cleavage-semi', 'Dont allow up to 1 non tryptic terminus on peptides' ) do
|
106
|
-
@options.cleavage_semi=false
|
107
|
-
end
|
73
|
+
add_boolean_option(:cleavage_semi,false,['--cleavage-semi', 'Search for peptides with up to 1 non-enzymatic cleavage site'])
|
108
74
|
end
|
109
75
|
|
110
76
|
if ( option_support.include? :respect_precursor_charges )
|
111
|
-
|
112
|
-
@option_parser.on( '-q', '--respect-charges','Dont respect charges in the input file. Instead impute them by trying various options') do
|
113
|
-
@options.respect_precursor_charges=true
|
114
|
-
end
|
77
|
+
add_boolean_option(:respect_precursor_charges,false,['-q', '--respect-charges','Dont respect charges in the input file. Instead impute them by trying various options'])
|
115
78
|
end
|
116
79
|
|
117
80
|
if ( option_support.include? :searched_ions )
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
81
|
+
add_value_option(:searched_ions,"",['--searched-ions si', 'Ion series to search (default=b,y)'])
|
82
|
+
end
|
83
|
+
|
84
|
+
if ( option_support.include? :multi_isotope_search )
|
85
|
+
add_boolean_option(:multi_isotope_search,false,["--multi-isotope-search","Expand parent mass window to include windows around neighbouring isotopic peaks"])
|
122
86
|
end
|
123
87
|
|
124
88
|
if ( option_support.include? :num_peaks_for_multi_isotope_search )
|
125
|
-
|
126
|
-
@option_parser.on("--num-peaks-for-multi-isotope-search np","Number of peaks to include in multi-isotope search") do |np|
|
127
|
-
@options.num_peaks_for_multi_isotope_search=np
|
128
|
-
end
|
89
|
+
add_value_option(:num_peaks_for_multi_isotope_search,0,["--num-peaks-for-multi-isotope-search np","Number of peaks to include in multi-isotope search"])
|
129
90
|
end
|
130
91
|
|
131
92
|
if ( option_support.include? :glyco)
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
93
|
+
add_boolean_option(:glyco,false,['-g','--glyco', 'Expect N-Glycosylation modifications as variable mod in a search or as a parameter when building statistical models'])
|
94
|
+
end
|
95
|
+
|
96
|
+
if ( option_support.include? :acetyl_nterm)
|
97
|
+
add_boolean_option(:acetyl_nterm,false,['-y','--acetyl-nterm', 'Expect N-terminal acetylation as a variable mod in a search or as a parameter when building statistical models'])
|
136
98
|
end
|
137
99
|
|
138
100
|
if ( option_support.include? :methionine_oxidation)
|
139
|
-
|
140
|
-
@option_parser.on( '-m', '--methionineo', 'Expect Oxidised Methionine modifications as variable mod in a search' ) do
|
141
|
-
@options.methionine_oxidation = true
|
142
|
-
end
|
101
|
+
add_boolean_option(:methionine_oxidation,false,['-m', '--methionineo', 'Expect Oxidised Methionine modifications as variable mod in a search'])
|
143
102
|
end
|
144
103
|
|
145
104
|
if ( option_support.include? :carbamidomethyl)
|
146
|
-
|
147
|
-
@option_parser.on( '-c', '--carbamidomethyl', 'Expect Carbamidomethyl C modifications as fixed mod in a search' ) do
|
148
|
-
@options.carbamidomethyl = true
|
149
|
-
end
|
105
|
+
add_boolean_option(:carbamidomethyl,false,['-c', '--carbamidomethyl', 'Expect Carbamidomethyl C modifications as fixed mod in a search'])
|
150
106
|
end
|
151
107
|
|
152
108
|
if ( option_support.include? :maldi)
|
153
|
-
|
154
|
-
@option_parser.on( '-l', '--maldi', 'Run a search on MALDI data') do
|
155
|
-
@options.maldi=true
|
156
|
-
end
|
109
|
+
add_boolean_option(:maldi,false,['-l', '--maldi', 'Run a search on MALDI data'])
|
157
110
|
end
|
111
|
+
|
112
|
+
@option_parser.summary_width=40
|
113
|
+
|
158
114
|
|
159
115
|
end
|
160
|
-
|
161
|
-
|
162
|
-
def
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
# This function returns the name of the database with an extension appropriate to the database type
|
173
|
-
#
|
174
|
-
def current_database(db_type,db=@options.database)
|
175
|
-
return Constants.new.current_database_for_name(db)
|
116
|
+
|
117
|
+
|
118
|
+
def database_info
|
119
|
+
case
|
120
|
+
when Pathname.new(@options.database).exist? # It's an explicitly named db
|
121
|
+
db_path=Pathname.new(@options.database).expand_path.to_s
|
122
|
+
db_name=Pathname.new(@options.database).basename.to_s
|
123
|
+
else
|
124
|
+
db_path=Constants.new.current_database_for_name @options.database
|
125
|
+
db_name=@options.database
|
126
|
+
end
|
127
|
+
FastaDatabase.new(db_name,db_path)
|
176
128
|
end
|
177
|
-
|
178
|
-
end
|
129
|
+
|
130
|
+
end
|
131
|
+
|
132
|
+
|
133
|
+
|
@@ -143,9 +143,10 @@ tpp_download_file = download_task tpp_url, tpp_packagefile
|
|
143
143
|
# Build
|
144
144
|
file tpp_installed_file => [@build_dir,tpp_download_file] do
|
145
145
|
sh %{cp #{@download_dir}/#{tpp_packagefile} #{@build_dir}}
|
146
|
+
# use_perl_locallib_cmd="echo hi"
|
146
147
|
use_perl_locallib_cmd="eval $(perl -I#{perl_dir}/lib/perl5 -Mlocal::lib=#{perl_dir})"
|
147
|
-
sh %{#{use_perl_locallib_cmd};cpanm --local-lib=#{env.protk_dir}/perl5 XML::Parser}
|
148
|
-
sh %{#{use_perl_locallib_cmd};cpanm --local-lib=#{env.protk_dir}/perl5 CGI --force}
|
148
|
+
# sh %{#{use_perl_locallib_cmd};cpanm --local-lib=#{env.protk_dir}/perl5 XML::Parser}
|
149
|
+
# sh %{#{use_perl_locallib_cmd};cpanm --local-lib=#{env.protk_dir}/perl5 CGI --force}
|
149
150
|
|
150
151
|
sh %{cd #{@build_dir};tar -xvzf TPP-#{tpp_version}.tgz}
|
151
152
|
|
@@ -173,6 +174,8 @@ file tpp_installed_file => [@build_dir,tpp_download_file] do
|
|
173
174
|
f.write subs_text
|
174
175
|
end
|
175
176
|
end
|
177
|
+
sh %{cd #{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src;echo '' > ../perl/tpp_models.pl;echo '' > ../perl/exporTPP.pl;echo '' > ../CGI/show_nspbin.pl;echo '' > ../CGI/tpp_gui/tpp_gui.pl}
|
178
|
+
|
176
179
|
build_cmd = "#{use_perl_locallib_cmd};cd #{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src ; make -s"
|
177
180
|
install_cmd = "#{use_perl_locallib_cmd};cd #{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src ; make install"
|
178
181
|
env.log build_cmd, :info
|
@@ -354,9 +357,9 @@ file tandem_installed_file => [@build_dir,"#{@download_dir}/#{tandem_packagefile
|
|
354
357
|
tandem_dirname = "#{tandem_packagefile.chomp('.zip')}"
|
355
358
|
|
356
359
|
if ( tandem_platform=="linux") #Must compile
|
357
|
-
tandem_src_dir = "#{@build_dir}/#{tandem_dirname}
|
360
|
+
tandem_src_dir = "#{@build_dir}/#{tandem_dirname}/src/"
|
358
361
|
sh %{cd #{tandem_src_dir}; make}
|
359
|
-
sh %{cd #{@build_dir}; cp -r ./#{tandem_dirname}
|
362
|
+
sh %{cd #{@build_dir}; cp -r ./#{tandem_dirname}/bin #{env.tandem_root}/}
|
360
363
|
else
|
361
364
|
sh %{cd #{@build_dir}; cp -r ./#{tandem_packagefile.chomp('.zip')}/* #{env.tandem_root}/}
|
362
365
|
sh %{chmod u+x #{env.gpmtandem}}
|
@@ -402,8 +405,9 @@ task :galaxyenv => protk_galaxy_envfile
|
|
402
405
|
|
403
406
|
# multitask :downloads => FileList["nr","env_nr","gi_taxid_prot.zip","taxdmp.zip"]
|
404
407
|
|
408
|
+
task :base => [:perl_locallib]
|
405
409
|
|
406
|
-
task :all => [:tpp,:omssa,:blast,:msgfplus,:pwiz,:openms
|
410
|
+
task :all => [:tpp,:omssa,:blast,:msgfplus,:pwiz,:openms]
|
407
411
|
|
408
412
|
# Special task when installing via toolshed
|
409
413
|
#
|
@@ -0,0 +1,256 @@
|
|
1
|
+
require 'protk/search_tool'
|
2
|
+
|
3
|
+
class String
|
4
|
+
def xtandem_modification_motif?
|
5
|
+
# 124@[ is not a modification motif, it is a residue (N-term) modification,
|
6
|
+
# so when checking if modification is a motif look for paired square brackets.
|
7
|
+
ismotif=false
|
8
|
+
case self
|
9
|
+
when /[\(\)\{\}\!]/,/\[.*\]/
|
10
|
+
ismotif=true
|
11
|
+
end
|
12
|
+
ismotif
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class TandemSearchTool < SearchTool
|
17
|
+
attr :defaults_path
|
18
|
+
attr :taxonomy_path
|
19
|
+
attr :default_data_path
|
20
|
+
|
21
|
+
attr :supported_xtandem_keys
|
22
|
+
|
23
|
+
def initialize
|
24
|
+
|
25
|
+
super([
|
26
|
+
:database,
|
27
|
+
:explicit_output,
|
28
|
+
:over_write,
|
29
|
+
:enzyme,
|
30
|
+
:modifications,
|
31
|
+
:mass_tolerance_units,
|
32
|
+
:mass_tolerance,
|
33
|
+
:multi_isotope_search,
|
34
|
+
:missed_cleavages,
|
35
|
+
:cleavage_semi,
|
36
|
+
:methionine_oxidation,
|
37
|
+
:glyco,
|
38
|
+
:acetyl_nterm,
|
39
|
+
:threads
|
40
|
+
])
|
41
|
+
|
42
|
+
@xtandem_keys_with_single_multiplicity = {
|
43
|
+
:fragment_tol => "spectrum, fragment monoisotopic mass error",
|
44
|
+
:missed_cleavages => "scoring, maximum missed cleavage sites",
|
45
|
+
:cleavage_semi => "protein, cleavage semi",
|
46
|
+
:precursor_tolu => "spectrum, parent monoisotopic mass error units",
|
47
|
+
:multi_isotope_search => "spectrum, parent monoisotopic mass isotope error",
|
48
|
+
:fragment_tolu => "spectrum, fragment monoisotopic mass error units",
|
49
|
+
:acetyl_nterm => "protein, quick acetyl",
|
50
|
+
:output_spectra => "output, spectra",
|
51
|
+
:threads => "spectrum, threads"
|
52
|
+
}
|
53
|
+
|
54
|
+
@xtandem_keys_for_precursor_tol = {
|
55
|
+
:precursor_tol => ["spectrum, parent monoisotopic mass error minus", "spectrum, parent monoisotopic mass error plus"]
|
56
|
+
}
|
57
|
+
|
58
|
+
@defaults_path="#{File.dirname(__FILE__)}/data/tandem_params.xml"
|
59
|
+
@taxonomy_path="#{File.dirname(__FILE__)}/data/taxonomy_template.xml"
|
60
|
+
@default_data_path="#{File.dirname(__FILE__)}/data/"
|
61
|
+
|
62
|
+
@option_parser.banner = "Run an X!Tandem msms search on a set of mzML input files.\n\nUsage: tandem_search.rb [options] file1.mzML file2.mzML ..."
|
63
|
+
@options.output_suffix="_tandem"
|
64
|
+
|
65
|
+
add_value_option(:tandem_params,"isb_native",['-T', '--tandem-params tandem', 'Either the full path to an xml file containing a complete set of default parameters, or one of the following (isb_native,isb_kscore,gpm). Default is isb_native'])
|
66
|
+
add_boolean_option(:keep_params_files,false,['-K', '--keep-params-files', 'Keep X!Tandem parameter files'])
|
67
|
+
add_boolean_option(:output_spectra,false,['--output-spectra', 'Include spectra in the output file'])
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
# Galaxy changes things like @ to __at__ we need to change it back
|
73
|
+
#
|
74
|
+
def decode_modification_string(mstring)
|
75
|
+
mstring.gsub!("__at__","@")
|
76
|
+
mstring.gsub!("__oc__","{")
|
77
|
+
mstring.gsub!("__cc__","}")
|
78
|
+
mstring.gsub!("__ob__","[")
|
79
|
+
mstring.gsub!("__cb__","]")
|
80
|
+
mstring
|
81
|
+
end
|
82
|
+
|
83
|
+
def set_option(std_params, tandem_key, value)
|
84
|
+
notes = std_params.find("/bioml/note[@type=\"input\" and @label=\"#{tandem_key}\"]")
|
85
|
+
throw "Exactly one parameter named (#{tandem_key}) is required in parameter file" unless notes.length==1
|
86
|
+
notes[0].content=value.to_s
|
87
|
+
end
|
88
|
+
|
89
|
+
def append_option(std_params, tandem_key, value)
|
90
|
+
notes = std_params.find("/bioml/note[@type=\"input\" and @label=\"#{tandem_key}\"]")
|
91
|
+
if notes.length == 0
|
92
|
+
node = XML::Node.new('note')
|
93
|
+
node["type"] = "input"
|
94
|
+
node["label"] = tandem_key
|
95
|
+
node.content = value
|
96
|
+
std_params.find('/bioml')[0] << node
|
97
|
+
else
|
98
|
+
throw "Exactly one parameter named (#{tandem_key}) is required in parameter file" unless notes.length==1
|
99
|
+
notes[0].content = append_string(notes[0].content, value)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def tandem_keys_in_params_file(default_params_path)
|
104
|
+
params_parser=XML::Parser.file(default_params_path)
|
105
|
+
default_params=params_parser.parse
|
106
|
+
input_nodes=default_params.find('/bioml/note[@type="input"]')
|
107
|
+
defined_keys=[]
|
108
|
+
input_nodes.each do |node|
|
109
|
+
defined_keys << node.attributes['label']
|
110
|
+
end
|
111
|
+
defined_keys
|
112
|
+
end
|
113
|
+
|
114
|
+
def taxon_from_taxonomy_file(taxo_path)
|
115
|
+
taxo_parser=XML::Parser.file(taxo_path)
|
116
|
+
taxo_doc=taxo_parser.parse
|
117
|
+
taxon_nodes=taxo_doc.find('/bioml/taxon')
|
118
|
+
throw "Exactly one taxon entry allowed in taxonomy file but found #{taxon_nodes.length}" unless taxon_nodes.length==1
|
119
|
+
taxon_nodes[0].attributes['label']
|
120
|
+
end
|
121
|
+
|
122
|
+
def generate_parameter_doc(std_params,output_path,input_path,db_info,taxo_path)
|
123
|
+
|
124
|
+
#
|
125
|
+
# The TandemSearchTool class has a special defaults system
|
126
|
+
# Defaults are read from (a) The commandline (b) A defaults file (c) commandline defaults.
|
127
|
+
# The ideal priority order is a -> b -> c
|
128
|
+
#
|
129
|
+
# In order to support this we need to read the defaults file and check options defined there
|
130
|
+
# against those defined on the commandline
|
131
|
+
#
|
132
|
+
# In addition, we support some default parameter files built-in to protk. These are treated the same
|
133
|
+
# but are specified if the user provides a keyword rather than a path
|
134
|
+
#
|
135
|
+
default_params_notes=std_params.find('/bioml/note[@type="input" and @label="list path, default parameters"]')
|
136
|
+
throw "Exactly one list path, default parameters note is required in the parameter file" unless default_params_notes.length==1
|
137
|
+
|
138
|
+
is_file=File.exists?(self.tandem_params)
|
139
|
+
if is_file
|
140
|
+
default_params_notes[0].content="#{self.tandem_params}"
|
141
|
+
else
|
142
|
+
default_params_notes[0].content="#{@default_data_path}tandem_#{self.tandem_params}_defaults.xml"
|
143
|
+
end
|
144
|
+
|
145
|
+
|
146
|
+
keys_in_params_file=tandem_keys_in_params_file(default_params_notes[0].content)
|
147
|
+
keys_on_commandline=@options_defined_by_user.keys
|
148
|
+
|
149
|
+
# Set the input and output paths
|
150
|
+
#
|
151
|
+
set_option(std_params,"spectrum, path",input_path)
|
152
|
+
set_option(std_params,"output, path",output_path)
|
153
|
+
|
154
|
+
# Taxonomy and Database
|
155
|
+
#
|
156
|
+
set_option(std_params,"list path, taxonomy information",taxo_path)
|
157
|
+
set_option(std_params,"protein, taxon",db_info.name)
|
158
|
+
|
159
|
+
|
160
|
+
|
161
|
+
# set_option(std_params, "protein, cleavage semi", self.cleavage_semi ? "yes" : "no")
|
162
|
+
|
163
|
+
# Simple options (unique with a 1:1 mapping to parameters from this tool)
|
164
|
+
#
|
165
|
+
@xtandem_keys_with_single_multiplicity.each_pair do |commandline_option_key, xtandem_key|
|
166
|
+
if (!keys_in_params_file.include?(xtandem_key) || keys_on_commandline.include?(commandline_option_key))
|
167
|
+
opt_val=self.send(commandline_option_key)
|
168
|
+
if opt_val.is_a?(TrueClass) || opt_val.is_a?(FalseClass)
|
169
|
+
opt_val = opt_val ? "yes" : "no"
|
170
|
+
end
|
171
|
+
append_option(std_params,xtandem_key,opt_val.to_s)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
# Precursor mass tolerance is a special case as it requires two xtandem options
|
176
|
+
#
|
177
|
+
@xtandem_keys_for_precursor_tol.each_pair do |commandline_option_key, xtandem_keys|
|
178
|
+
xtandem_keys.each do |xtandem_key|
|
179
|
+
if (!keys_in_params_file.include?(xtandem_key) || keys_on_commandline.include?(commandline_option_key))
|
180
|
+
append_option(std_params,xtandem_key,(self.precursor_tol.to_f*0.5).to_s)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
# Per residue Fixed and Variable Modifications
|
186
|
+
#
|
187
|
+
# These can be added using a variety of methods in xtandem
|
188
|
+
#
|
189
|
+
# residue, potential modification mass
|
190
|
+
# residue, modification mass
|
191
|
+
# residue, potential modification motif
|
192
|
+
#
|
193
|
+
# We support these primarily via the var_mods and fix_mods commandline params
|
194
|
+
# Modification masses and/or motifs can be entered via these arguments
|
195
|
+
#
|
196
|
+
|
197
|
+
var_mods = self.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }
|
198
|
+
var_mods=var_mods.collect {|mod| decode_modification_string(mod) }
|
199
|
+
|
200
|
+
# var_mods allows motif's as well as standard mods. These should be in a separate array
|
201
|
+
var_motifs = [].replace(var_mods)
|
202
|
+
var_mods.delete_if {|mod| mod.xtandem_modification_motif? }
|
203
|
+
var_motifs.keep_if {|mod| mod.xtandem_modification_motif? }
|
204
|
+
|
205
|
+
fix_mods = self.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }
|
206
|
+
fix_mods=fix_mods.collect {|mod| decode_modification_string(mod)}
|
207
|
+
|
208
|
+
# We also support the --glyco and --methionineo shortcuts.
|
209
|
+
# Add these here. No check is made for duplication
|
210
|
+
#
|
211
|
+
var_motifs << "0.998@N!{P}[ST]" if self.glyco
|
212
|
+
var_mods << "15.994915@M" if self.methionine_oxidation
|
213
|
+
|
214
|
+
append_option(std_params,"residue, modification mass",fix_mods.join(",")) unless fix_mods.length==0
|
215
|
+
append_option(std_params,"residue, potential modification mass",var_mods.join(",")) unless var_mods.length==0
|
216
|
+
append_option(std_params,"residue, potential modification motif",var_motifs.join(",")) unless var_motifs.length==0
|
217
|
+
|
218
|
+
std_params
|
219
|
+
|
220
|
+
end
|
221
|
+
|
222
|
+
public
|
223
|
+
def taxonomy_doc(db_info)
|
224
|
+
throw "Invalid input db_info must be a FastaDatabase object" unless db_info.class==FastaDatabase
|
225
|
+
database_path=db_info.path
|
226
|
+
taxon=db_info.name
|
227
|
+
# Parse taxonomy template file
|
228
|
+
#
|
229
|
+
taxo_parser=XML::Parser.file(@taxonomy_path)
|
230
|
+
taxo_doc=taxo_parser.parse
|
231
|
+
|
232
|
+
taxon_label=taxo_doc.find('/bioml/taxon')
|
233
|
+
throw "Exactly one taxon label is required in the taxonomy_template file" unless taxon_label.length==1
|
234
|
+
taxon_label[0].attributes['label']=taxon
|
235
|
+
|
236
|
+
db_file=taxo_doc.find('/bioml/taxon/file')
|
237
|
+
throw "Exactly one database file is required in the taxonomy_template file" unless db_file.length==1
|
238
|
+
db_file[0].attributes['URL']=database_path
|
239
|
+
|
240
|
+
taxo_doc
|
241
|
+
end
|
242
|
+
|
243
|
+
|
244
|
+
def params_doc(db_info,taxo_path,input_path,output_path)
|
245
|
+
params_parser=XML::Parser.file(@defaults_path)
|
246
|
+
std_params=params_parser.parse
|
247
|
+
|
248
|
+
|
249
|
+
throw "Invalid input db_info must be a FastaDatabase object" unless db_info.class==FastaDatabase
|
250
|
+
|
251
|
+
generate_parameter_doc(std_params,output_path,input_path,db_info,taxo_path)
|
252
|
+
end
|
253
|
+
|
254
|
+
|
255
|
+
|
256
|
+
end
|