protk 1.2.6.pre5 → 1.3.0.pre1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +84 -45
  3. data/bin/add_retention_times.rb +9 -5
  4. data/bin/augustus_to_proteindb.rb +7 -11
  5. data/bin/interprophet.rb +28 -46
  6. data/bin/make_decoy.rb +16 -48
  7. data/bin/mascot_search.rb +57 -71
  8. data/bin/mascot_to_pepxml.rb +13 -26
  9. data/bin/msgfplus_search.rb +70 -107
  10. data/bin/omssa_search.rb +52 -109
  11. data/bin/peptide_prophet.rb +44 -119
  12. data/bin/pepxml_to_table.rb +24 -27
  13. data/bin/protein_prophet.rb +22 -82
  14. data/bin/protxml_to_gff.rb +22 -519
  15. data/bin/protxml_to_table.rb +2 -16
  16. data/bin/sixframe.rb +10 -32
  17. data/bin/tandem_search.rb +30 -403
  18. data/bin/tandem_to_pepxml.rb +43 -0
  19. data/bin/unimod_to_loc.rb +1 -1
  20. data/ext/{protk/decoymaker → decoymaker}/decoymaker.c +74 -21
  21. data/ext/decoymaker/extconf.rb +3 -0
  22. data/lib/protk/constants.rb +16 -2
  23. data/lib/protk/data/default_config.yml +2 -1
  24. data/lib/protk/data/tandem_gpm_defaults.xml +175 -0
  25. data/lib/protk/data/tandem_isb_kscore_defaults.xml +123 -0
  26. data/lib/protk/data/tandem_isb_native_defaults.xml +123 -0
  27. data/lib/protk/data/tandem_params.xml +17 -54
  28. data/lib/protk/fastadb.rb +2 -2
  29. data/lib/protk/prophet_tool.rb +1 -1
  30. data/lib/protk/protxml_to_gff_tool.rb +474 -0
  31. data/lib/protk/search_tool.rb +58 -103
  32. data/lib/protk/setup_rakefile.rake +9 -5
  33. data/lib/protk/tandem_search_tool.rb +256 -0
  34. data/lib/protk/tool.rb +85 -104
  35. data/lib/protk.rb +1 -6
  36. metadata +24 -103
  37. data/bin/annotate_ids.rb +0 -59
  38. data/bin/asapratio.rb +0 -27
  39. data/bin/blastxml_to_table.rb +0 -119
  40. data/bin/correct_omssa_retention_times.rb +0 -27
  41. data/bin/feature_finder.rb +0 -95
  42. data/bin/file_convert.rb +0 -164
  43. data/bin/generate_omssa_loc.rb +0 -42
  44. data/bin/gffmerge.rb +0 -208
  45. data/bin/libra.rb +0 -70
  46. data/bin/toppas_pipeline.rb +0 -84
  47. data/bin/uniprot_annotation.rb +0 -141
  48. data/bin/xls_to_table.rb +0 -52
  49. data/bin/xpress.rb +0 -27
  50. data/ext/protk/decoymaker/extconf.rb +0 -3
  51. data/ext/protk/simplealign/extconf.rb +0 -3
  52. data/lib/protk/biotools_excel_converter.rb +0 -60
  53. data/lib/protk/eupathdb_gene_information_table.rb +0 -158
  54. data/lib/protk/gapped_aligner.rb +0 -264
  55. data/lib/protk/protein_annotator.rb +0 -646
  56. data/lib/protk/spreadsheet_extensions.rb +0 -79
  57. data/lib/protk/xtandem_defaults.rb +0 -11
@@ -13,6 +13,15 @@ require 'optparse'
13
13
  require 'pathname'
14
14
  require 'protk/tool'
15
15
 
16
+ class FastaDatabase
17
+ attr :name
18
+ attr :path
19
+ def initialize(name,path)
20
+ @name=name
21
+ @path=path
22
+ end
23
+ end
24
+
16
25
  class SearchTool < Tool
17
26
 
18
27
  # Initializes commandline options common to all search tools.
@@ -21,158 +30,104 @@ class SearchTool < Tool
21
30
  def initialize(option_support=[])
22
31
  super(option_support)
23
32
 
24
- if (option_support.include? :database)
25
- @options.database = "sphuman"
26
- @option_parser.on( '-d', '--database dbname', 'Specify the database to use for this search. Default=sphuman' ) do |dbname|
27
- options.database = dbname
28
- end
29
-
33
+ if (option_support.include? :database)
34
+ add_value_option(:database,"sphuman",['-d', '--database dbname', 'Specify the database to use for this search. Can be a named protk database or the path to a fasta file'])
30
35
  end
31
36
 
32
37
  if ( option_support.include? :enzyme )
33
- @options.enzyme = "Trypsin"
34
- @option_parser.on('--enzyme enz', 'Enzyme') do |enz|
35
- @options.enzyme=enz
36
- end
38
+ add_value_option(:enzyme,"Trypsin",['--enzyme enz', 'Enzyme'])
37
39
  end
38
40
 
39
41
  if ( option_support.include? :modifications )
40
- @options.var_mods = ""
41
- @option_parser.on('--var-mods vm', 'Variable modifications' ) do |vm|
42
- @options.var_mods = vm
43
- end
44
-
45
- @options.fix_mods = ""
46
- @option_parser.on('--fix-mods fm', 'Fixed modifications' ) do |fm|
47
- @options.fix_mods = fm
48
- end
42
+ add_value_option(:var_mods,"",['--var-mods vm','Variable modifications. These should be provided in a comma separated list'])
43
+ add_value_option(:fix_mods,"",['--fix-mods fm','Fixed modifications. These should be provided in a comma separated list'])
49
44
  end
50
45
 
51
46
  if ( option_support.include? :instrument )
52
- @options.instrument = "ESI-QUAD-TOF"
53
- @option_parser.on('--instrument instrument', 'Instrument') do |instrument|
54
- @options.instrument=instrument
55
- end
47
+ add_value_option(:instrument,"ESI-QUAD-TOF",['--instrument instrument', 'Instrument'])
56
48
  end
57
49
 
58
50
  if ( option_support.include? :mass_tolerance_units )
59
- @options.fragment_tolu="Da"
60
- @option_parser.on('--fragment-ion-tol-units tolu', 'Fragment ion mass tolerance units (Da or mmu). Default=Da' ) do |tolu|
61
- @options.fragment_tolu = tolu
62
- end
63
-
64
- @options.precursor_tolu="ppm"
65
- @option_parser.on('--precursor-ion-tol-units tolu', 'Precursor ion mass tolerance units (ppm or Da). Default=ppm' ) do |tolu|
66
- @options.precursor_tolu = tolu
67
- end
51
+ add_value_option(:fragment_tolu,"Da",['--fragment-ion-tol-units tolu', 'Fragment ion mass tolerance units (Da or mmu). Default=Da'])
52
+ add_value_option(:precursor_tolu,"ppm",['--precursor-ion-tol-units tolu', 'Precursor ion mass tolerance units (ppm or Da). Default=ppm'])
68
53
  end
69
54
 
70
55
  if ( option_support.include? :mass_tolerance )
71
- @options.fragment_tol=0.65
72
- @option_parser.on( '-f', '--fragment-ion-tol tol', 'Fragment ion mass tolerance (unit dependent). Default=0.65' ) do |tol|
73
- @options.fragment_tol = tol
74
- end
75
-
76
- @options.precursor_tol=200
77
- @option_parser.on( '-p', '--precursor-ion-tol tol', 'Precursor ion mass tolerance in (ppm if precursor search type is monoisotopic or Da if it is average). Default=200' ) do |tol|
78
- @options.precursor_tol = tol.to_f
79
- end
56
+ add_value_option(:fragment_tol,0.65,['-f', '--fragment-ion-tol tol', 'Fragment ion mass tolerance (unit dependent). Default=0.65'])
57
+ add_value_option(:precursor_tol,200,['-p','--precursor-ion-tol tol', 'Precursor ion mass tolerance. Default=200'])
80
58
  end
81
59
 
82
60
  if ( option_support.include? :precursor_search_type )
83
- @options.precursor_search_type="monoisotopic"
84
- @option_parser.on( '-a', '--search-type type', 'Use monoisotopic or average precursor masses. (monoisotopic or average)' ) do |type|
85
- @options.precursor_search_type = type
86
- end
61
+ add_value_option(:precursor_search_type,"monoisotopic",['-a', '--search-type type', 'Use monoisotopic or average precursor masses. (monoisotopic or average)'])
87
62
  end
88
63
 
89
64
  if ( option_support.include? :strict_monoisotopic_mass )
90
- @options.strict_monoisotopic_mass=false
91
- @option_parser.on( '-s', '--strict-monoisotopic-mass', 'Dont allow for misassignment of monoisotopic mass to another isotopic peak') do
92
- @options.strict_monoisotopic_mass=true
93
- end
65
+ add_boolean_option(:strict_monoisotopic_mass,false,['-s', '--strict-monoisotopic-mass', 'Dont allow for misassignment of monoisotopic mass to another isotopic peak'])
94
66
  end
95
67
 
96
68
  if ( option_support.include? :missed_cleavages )
97
- @options.missed_cleavages=2
98
- @option_parser.on( '-v', '--num-missed-cleavages num', 'Number of missed cleavages allowed' ) do |num|
99
- @options.missed_cleavages = num
100
- end
69
+ add_value_option(:missed_cleavages,2,['-v', '--num-missed-cleavages num', 'Number of missed cleavages allowed'])
101
70
  end
102
71
 
103
72
  if ( option_support.include? :cleavage_semi )
104
- @options.cleavage_semi=true
105
- @option_parser.on( '--no-cleavage-semi', 'Dont allow up to 1 non tryptic terminus on peptides' ) do
106
- @options.cleavage_semi=false
107
- end
73
+ add_boolean_option(:cleavage_semi,false,['--cleavage-semi', 'Search for peptides with up to 1 non-enzymatic cleavage site'])
108
74
  end
109
75
 
110
76
  if ( option_support.include? :respect_precursor_charges )
111
- @options.respect_precursor_charges=false
112
- @option_parser.on( '-q', '--respect-charges','Dont respect charges in the input file. Instead impute them by trying various options') do
113
- @options.respect_precursor_charges=true
114
- end
77
+ add_boolean_option(:respect_precursor_charges,false,['-q', '--respect-charges','Dont respect charges in the input file. Instead impute them by trying various options'])
115
78
  end
116
79
 
117
80
  if ( option_support.include? :searched_ions )
118
- @options.searched_ions = ""
119
- @option_parser.on('--searched-ions si', 'Ion series to search (default=b,y)' ) do |si|
120
- @options.searched_ions = si
121
- end
81
+ add_value_option(:searched_ions,"",['--searched-ions si', 'Ion series to search (default=b,y)'])
82
+ end
83
+
84
+ if ( option_support.include? :multi_isotope_search )
85
+ add_boolean_option(:multi_isotope_search,false,["--multi-isotope-search","Expand parent mass window to include windows around neighbouring isotopic peaks"])
122
86
  end
123
87
 
124
88
  if ( option_support.include? :num_peaks_for_multi_isotope_search )
125
- @options.num_peaks_for_multi_isotope_search="0"
126
- @option_parser.on("--num-peaks-for-multi-isotope-search np","Number of peaks to include in multi-isotope search") do |np|
127
- @options.num_peaks_for_multi_isotope_search=np
128
- end
89
+ add_value_option(:num_peaks_for_multi_isotope_search,0,["--num-peaks-for-multi-isotope-search np","Number of peaks to include in multi-isotope search"])
129
90
  end
130
91
 
131
92
  if ( option_support.include? :glyco)
132
- @options.glyco = false
133
- @option_parser.on( '-g', '--glyco', 'Expect N-Glycosylation modifications as variable mod in a search or as a parameter when building statistical models' ) do
134
- @options.glyco = true
135
- end
93
+ add_boolean_option(:glyco,false,['-g','--glyco', 'Expect N-Glycosylation modifications as variable mod in a search or as a parameter when building statistical models'])
94
+ end
95
+
96
+ if ( option_support.include? :acetyl_nterm)
97
+ add_boolean_option(:acetyl_nterm,false,['-y','--acetyl-nterm', 'Expect N-terminal acetylation as a variable mod in a search or as a parameter when building statistical models'])
136
98
  end
137
99
 
138
100
  if ( option_support.include? :methionine_oxidation)
139
- @options.methionine_oxidation = false
140
- @option_parser.on( '-m', '--methionineo', 'Expect Oxidised Methionine modifications as variable mod in a search' ) do
141
- @options.methionine_oxidation = true
142
- end
101
+ add_boolean_option(:methionine_oxidation,false,['-m', '--methionineo', 'Expect Oxidised Methionine modifications as variable mod in a search'])
143
102
  end
144
103
 
145
104
  if ( option_support.include? :carbamidomethyl)
146
- @options.carbamidomethyl = false
147
- @option_parser.on( '-c', '--carbamidomethyl', 'Expect Carbamidomethyl C modifications as fixed mod in a search' ) do
148
- @options.carbamidomethyl = true
149
- end
105
+ add_boolean_option(:carbamidomethyl,false,['-c', '--carbamidomethyl', 'Expect Carbamidomethyl C modifications as fixed mod in a search'])
150
106
  end
151
107
 
152
108
  if ( option_support.include? :maldi)
153
- @options.maldi=false
154
- @option_parser.on( '-l', '--maldi', 'Run a search on MALDI data') do
155
- @options.maldi=true
156
- end
109
+ add_boolean_option(:maldi,false,['-l', '--maldi', 'Run a search on MALDI data'])
157
110
  end
111
+
112
+ @option_parser.summary_width=40
113
+
158
114
 
159
115
  end
160
-
161
-
162
- def jobid_from_filename(filename)
163
- jobid="protk"
164
- jobnum_match=filename.match(/(.{1,10}).*?\./)
165
- if (jobnum_match!=nil)
166
- jobid="#{self.jobid_prefix}#{jobnum_match[1]}"
167
- end
168
- return jobid
169
- end
170
-
171
- # Based on the database setting and global database path, find the most current version of the required database
172
- # This function returns the name of the database with an extension appropriate to the database type
173
- #
174
- def current_database(db_type,db=@options.database)
175
- return Constants.new.current_database_for_name(db)
116
+
117
+
118
+ def database_info
119
+ case
120
+ when Pathname.new(@options.database).exist? # It's an explicitly named db
121
+ db_path=Pathname.new(@options.database).expand_path.to_s
122
+ db_name=Pathname.new(@options.database).basename.to_s
123
+ else
124
+ db_path=Constants.new.current_database_for_name @options.database
125
+ db_name=@options.database
126
+ end
127
+ FastaDatabase.new(db_name,db_path)
176
128
  end
177
-
178
- end
129
+
130
+ end
131
+
132
+
133
+
@@ -143,9 +143,10 @@ tpp_download_file = download_task tpp_url, tpp_packagefile
143
143
  # Build
144
144
  file tpp_installed_file => [@build_dir,tpp_download_file] do
145
145
  sh %{cp #{@download_dir}/#{tpp_packagefile} #{@build_dir}}
146
+ # use_perl_locallib_cmd="echo hi"
146
147
  use_perl_locallib_cmd="eval $(perl -I#{perl_dir}/lib/perl5 -Mlocal::lib=#{perl_dir})"
147
- sh %{#{use_perl_locallib_cmd};cpanm --local-lib=#{env.protk_dir}/perl5 XML::Parser}
148
- sh %{#{use_perl_locallib_cmd};cpanm --local-lib=#{env.protk_dir}/perl5 CGI --force}
148
+ # sh %{#{use_perl_locallib_cmd};cpanm --local-lib=#{env.protk_dir}/perl5 XML::Parser}
149
+ # sh %{#{use_perl_locallib_cmd};cpanm --local-lib=#{env.protk_dir}/perl5 CGI --force}
149
150
 
150
151
  sh %{cd #{@build_dir};tar -xvzf TPP-#{tpp_version}.tgz}
151
152
 
@@ -173,6 +174,8 @@ file tpp_installed_file => [@build_dir,tpp_download_file] do
173
174
  f.write subs_text
174
175
  end
175
176
  end
177
+ sh %{cd #{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src;echo '' > ../perl/tpp_models.pl;echo '' > ../perl/exporTPP.pl;echo '' > ../CGI/show_nspbin.pl;echo '' > ../CGI/tpp_gui/tpp_gui.pl}
178
+
176
179
  build_cmd = "#{use_perl_locallib_cmd};cd #{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src ; make -s"
177
180
  install_cmd = "#{use_perl_locallib_cmd};cd #{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src ; make install"
178
181
  env.log build_cmd, :info
@@ -354,9 +357,9 @@ file tandem_installed_file => [@build_dir,"#{@download_dir}/#{tandem_packagefile
354
357
  tandem_dirname = "#{tandem_packagefile.chomp('.zip')}"
355
358
 
356
359
  if ( tandem_platform=="linux") #Must compile
357
- tandem_src_dir = "#{@build_dir}/#{tandem_dirname}/#{tandem_dirname}/src/"
360
+ tandem_src_dir = "#{@build_dir}/#{tandem_dirname}/src/"
358
361
  sh %{cd #{tandem_src_dir}; make}
359
- sh %{cd #{@build_dir}; cp -r ./#{tandem_dirname}/#{tandem_dirname}/bin #{env.tandem_root}/}
362
+ sh %{cd #{@build_dir}; cp -r ./#{tandem_dirname}/bin #{env.tandem_root}/}
360
363
  else
361
364
  sh %{cd #{@build_dir}; cp -r ./#{tandem_packagefile.chomp('.zip')}/* #{env.tandem_root}/}
362
365
  sh %{chmod u+x #{env.gpmtandem}}
@@ -402,8 +405,9 @@ task :galaxyenv => protk_galaxy_envfile
402
405
 
403
406
  # multitask :downloads => FileList["nr","env_nr","gi_taxid_prot.zip","taxdmp.zip"]
404
407
 
408
+ task :base => [:perl_locallib]
405
409
 
406
- task :all => [:tpp,:omssa,:blast,:msgfplus,:pwiz,:openms,:galaxyenv]
410
+ task :all => [:tpp,:omssa,:blast,:msgfplus,:pwiz,:openms]
407
411
 
408
412
  # Special task when installing via toolshed
409
413
  #
@@ -0,0 +1,256 @@
1
+ require 'protk/search_tool'
2
+
3
+ class String
4
+ def xtandem_modification_motif?
5
+ # 124@[ is not a modification motif, it is a residue (N-term) modification,
6
+ # so when checking if modification is a motif look for paired square brackets.
7
+ ismotif=false
8
+ case self
9
+ when /[\(\)\{\}\!]/,/\[.*\]/
10
+ ismotif=true
11
+ end
12
+ ismotif
13
+ end
14
+ end
15
+
16
+ class TandemSearchTool < SearchTool
17
+ attr :defaults_path
18
+ attr :taxonomy_path
19
+ attr :default_data_path
20
+
21
+ attr :supported_xtandem_keys
22
+
23
+ def initialize
24
+
25
+ super([
26
+ :database,
27
+ :explicit_output,
28
+ :over_write,
29
+ :enzyme,
30
+ :modifications,
31
+ :mass_tolerance_units,
32
+ :mass_tolerance,
33
+ :multi_isotope_search,
34
+ :missed_cleavages,
35
+ :cleavage_semi,
36
+ :methionine_oxidation,
37
+ :glyco,
38
+ :acetyl_nterm,
39
+ :threads
40
+ ])
41
+
42
+ @xtandem_keys_with_single_multiplicity = {
43
+ :fragment_tol => "spectrum, fragment monoisotopic mass error",
44
+ :missed_cleavages => "scoring, maximum missed cleavage sites",
45
+ :cleavage_semi => "protein, cleavage semi",
46
+ :precursor_tolu => "spectrum, parent monoisotopic mass error units",
47
+ :multi_isotope_search => "spectrum, parent monoisotopic mass isotope error",
48
+ :fragment_tolu => "spectrum, fragment monoisotopic mass error units",
49
+ :acetyl_nterm => "protein, quick acetyl",
50
+ :output_spectra => "output, spectra",
51
+ :threads => "spectrum, threads"
52
+ }
53
+
54
+ @xtandem_keys_for_precursor_tol = {
55
+ :precursor_tol => ["spectrum, parent monoisotopic mass error minus", "spectrum, parent monoisotopic mass error plus"]
56
+ }
57
+
58
+ @defaults_path="#{File.dirname(__FILE__)}/data/tandem_params.xml"
59
+ @taxonomy_path="#{File.dirname(__FILE__)}/data/taxonomy_template.xml"
60
+ @default_data_path="#{File.dirname(__FILE__)}/data/"
61
+
62
+ @option_parser.banner = "Run an X!Tandem msms search on a set of mzML input files.\n\nUsage: tandem_search.rb [options] file1.mzML file2.mzML ..."
63
+ @options.output_suffix="_tandem"
64
+
65
+ add_value_option(:tandem_params,"isb_native",['-T', '--tandem-params tandem', 'Either the full path to an xml file containing a complete set of default parameters, or one of the following (isb_native,isb_kscore,gpm). Default is isb_native'])
66
+ add_boolean_option(:keep_params_files,false,['-K', '--keep-params-files', 'Keep X!Tandem parameter files'])
67
+ add_boolean_option(:output_spectra,false,['--output-spectra', 'Include spectra in the output file'])
68
+
69
+ end
70
+
71
+ private
72
+ # Galaxy changes things like @ to __at__ we need to change it back
73
+ #
74
+ def decode_modification_string(mstring)
75
+ mstring.gsub!("__at__","@")
76
+ mstring.gsub!("__oc__","{")
77
+ mstring.gsub!("__cc__","}")
78
+ mstring.gsub!("__ob__","[")
79
+ mstring.gsub!("__cb__","]")
80
+ mstring
81
+ end
82
+
83
+ def set_option(std_params, tandem_key, value)
84
+ notes = std_params.find("/bioml/note[@type=\"input\" and @label=\"#{tandem_key}\"]")
85
+ throw "Exactly one parameter named (#{tandem_key}) is required in parameter file" unless notes.length==1
86
+ notes[0].content=value.to_s
87
+ end
88
+
89
+ def append_option(std_params, tandem_key, value)
90
+ notes = std_params.find("/bioml/note[@type=\"input\" and @label=\"#{tandem_key}\"]")
91
+ if notes.length == 0
92
+ node = XML::Node.new('note')
93
+ node["type"] = "input"
94
+ node["label"] = tandem_key
95
+ node.content = value
96
+ std_params.find('/bioml')[0] << node
97
+ else
98
+ throw "Exactly one parameter named (#{tandem_key}) is required in parameter file" unless notes.length==1
99
+ notes[0].content = append_string(notes[0].content, value)
100
+ end
101
+ end
102
+
103
+ def tandem_keys_in_params_file(default_params_path)
104
+ params_parser=XML::Parser.file(default_params_path)
105
+ default_params=params_parser.parse
106
+ input_nodes=default_params.find('/bioml/note[@type="input"]')
107
+ defined_keys=[]
108
+ input_nodes.each do |node|
109
+ defined_keys << node.attributes['label']
110
+ end
111
+ defined_keys
112
+ end
113
+
114
+ def taxon_from_taxonomy_file(taxo_path)
115
+ taxo_parser=XML::Parser.file(taxo_path)
116
+ taxo_doc=taxo_parser.parse
117
+ taxon_nodes=taxo_doc.find('/bioml/taxon')
118
+ throw "Exactly one taxon entry allowed in taxonomy file but found #{taxon_nodes.length}" unless taxon_nodes.length==1
119
+ taxon_nodes[0].attributes['label']
120
+ end
121
+
122
+ def generate_parameter_doc(std_params,output_path,input_path,db_info,taxo_path)
123
+
124
+ #
125
+ # The TandemSearchTool class has a special defaults system
126
+ # Defaults are read from (a) The commandline (b) A defaults file (c) commandline defaults.
127
+ # The ideal priority order is a -> b -> c
128
+ #
129
+ # In order to support this we need to read the defaults file and check options defined there
130
+ # against those defined on the commandline
131
+ #
132
+ # In addition, we support some default parameter files built-in to protk. These are treated the same
133
+ # but are specified if the user provides a keyword rather than a path
134
+ #
135
+ default_params_notes=std_params.find('/bioml/note[@type="input" and @label="list path, default parameters"]')
136
+ throw "Exactly one list path, default parameters note is required in the parameter file" unless default_params_notes.length==1
137
+
138
+ is_file=File.exists?(self.tandem_params)
139
+ if is_file
140
+ default_params_notes[0].content="#{self.tandem_params}"
141
+ else
142
+ default_params_notes[0].content="#{@default_data_path}tandem_#{self.tandem_params}_defaults.xml"
143
+ end
144
+
145
+
146
+ keys_in_params_file=tandem_keys_in_params_file(default_params_notes[0].content)
147
+ keys_on_commandline=@options_defined_by_user.keys
148
+
149
+ # Set the input and output paths
150
+ #
151
+ set_option(std_params,"spectrum, path",input_path)
152
+ set_option(std_params,"output, path",output_path)
153
+
154
+ # Taxonomy and Database
155
+ #
156
+ set_option(std_params,"list path, taxonomy information",taxo_path)
157
+ set_option(std_params,"protein, taxon",db_info.name)
158
+
159
+
160
+
161
+ # set_option(std_params, "protein, cleavage semi", self.cleavage_semi ? "yes" : "no")
162
+
163
+ # Simple options (unique with a 1:1 mapping to parameters from this tool)
164
+ #
165
+ @xtandem_keys_with_single_multiplicity.each_pair do |commandline_option_key, xtandem_key|
166
+ if (!keys_in_params_file.include?(xtandem_key) || keys_on_commandline.include?(commandline_option_key))
167
+ opt_val=self.send(commandline_option_key)
168
+ if opt_val.is_a?(TrueClass) || opt_val.is_a?(FalseClass)
169
+ opt_val = opt_val ? "yes" : "no"
170
+ end
171
+ append_option(std_params,xtandem_key,opt_val.to_s)
172
+ end
173
+ end
174
+
175
+ # Precursor mass tolerance is a special case as it requires two xtandem options
176
+ #
177
+ @xtandem_keys_for_precursor_tol.each_pair do |commandline_option_key, xtandem_keys|
178
+ xtandem_keys.each do |xtandem_key|
179
+ if (!keys_in_params_file.include?(xtandem_key) || keys_on_commandline.include?(commandline_option_key))
180
+ append_option(std_params,xtandem_key,(self.precursor_tol.to_f*0.5).to_s)
181
+ end
182
+ end
183
+ end
184
+
185
+ # Per residue Fixed and Variable Modifications
186
+ #
187
+ # These can be added using a variety of methods in xtandem
188
+ #
189
+ # residue, potential modification mass
190
+ # residue, modification mass
191
+ # residue, potential modification motif
192
+ #
193
+ # We support these primarily via the var_mods and fix_mods commandline params
194
+ # Modification masses and/or motifs can be entered via these arguments
195
+ #
196
+
197
+ var_mods = self.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }
198
+ var_mods=var_mods.collect {|mod| decode_modification_string(mod) }
199
+
200
+ # var_mods allows motif's as well as standard mods. These should be in a separate array
201
+ var_motifs = [].replace(var_mods)
202
+ var_mods.delete_if {|mod| mod.xtandem_modification_motif? }
203
+ var_motifs.keep_if {|mod| mod.xtandem_modification_motif? }
204
+
205
+ fix_mods = self.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }
206
+ fix_mods=fix_mods.collect {|mod| decode_modification_string(mod)}
207
+
208
+ # We also support the --glyco and --methionineo shortcuts.
209
+ # Add these here. No check is made for duplication
210
+ #
211
+ var_motifs << "0.998@N!{P}[ST]" if self.glyco
212
+ var_mods << "15.994915@M" if self.methionine_oxidation
213
+
214
+ append_option(std_params,"residue, modification mass",fix_mods.join(",")) unless fix_mods.length==0
215
+ append_option(std_params,"residue, potential modification mass",var_mods.join(",")) unless var_mods.length==0
216
+ append_option(std_params,"residue, potential modification motif",var_motifs.join(",")) unless var_motifs.length==0
217
+
218
+ std_params
219
+
220
+ end
221
+
222
+ public
223
+ def taxonomy_doc(db_info)
224
+ throw "Invalid input db_info must be a FastaDatabase object" unless db_info.class==FastaDatabase
225
+ database_path=db_info.path
226
+ taxon=db_info.name
227
+ # Parse taxonomy template file
228
+ #
229
+ taxo_parser=XML::Parser.file(@taxonomy_path)
230
+ taxo_doc=taxo_parser.parse
231
+
232
+ taxon_label=taxo_doc.find('/bioml/taxon')
233
+ throw "Exactly one taxon label is required in the taxonomy_template file" unless taxon_label.length==1
234
+ taxon_label[0].attributes['label']=taxon
235
+
236
+ db_file=taxo_doc.find('/bioml/taxon/file')
237
+ throw "Exactly one database file is required in the taxonomy_template file" unless db_file.length==1
238
+ db_file[0].attributes['URL']=database_path
239
+
240
+ taxo_doc
241
+ end
242
+
243
+
244
+ def params_doc(db_info,taxo_path,input_path,output_path)
245
+ params_parser=XML::Parser.file(@defaults_path)
246
+ std_params=params_parser.parse
247
+
248
+
249
+ throw "Invalid input db_info must be a FastaDatabase object" unless db_info.class==FastaDatabase
250
+
251
+ generate_parameter_doc(std_params,output_path,input_path,db_info,taxo_path)
252
+ end
253
+
254
+
255
+
256
+ end