protk 1.2.6.pre5 → 1.3.0.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +84 -45
  3. data/bin/add_retention_times.rb +9 -5
  4. data/bin/augustus_to_proteindb.rb +7 -11
  5. data/bin/interprophet.rb +28 -46
  6. data/bin/make_decoy.rb +16 -48
  7. data/bin/mascot_search.rb +57 -71
  8. data/bin/mascot_to_pepxml.rb +13 -26
  9. data/bin/msgfplus_search.rb +70 -107
  10. data/bin/omssa_search.rb +52 -109
  11. data/bin/peptide_prophet.rb +44 -119
  12. data/bin/pepxml_to_table.rb +24 -27
  13. data/bin/protein_prophet.rb +22 -82
  14. data/bin/protxml_to_gff.rb +22 -519
  15. data/bin/protxml_to_table.rb +2 -16
  16. data/bin/sixframe.rb +10 -32
  17. data/bin/tandem_search.rb +30 -403
  18. data/bin/tandem_to_pepxml.rb +43 -0
  19. data/bin/unimod_to_loc.rb +1 -1
  20. data/ext/{protk/decoymaker → decoymaker}/decoymaker.c +74 -21
  21. data/ext/decoymaker/extconf.rb +3 -0
  22. data/lib/protk/constants.rb +16 -2
  23. data/lib/protk/data/default_config.yml +2 -1
  24. data/lib/protk/data/tandem_gpm_defaults.xml +175 -0
  25. data/lib/protk/data/tandem_isb_kscore_defaults.xml +123 -0
  26. data/lib/protk/data/tandem_isb_native_defaults.xml +123 -0
  27. data/lib/protk/data/tandem_params.xml +17 -54
  28. data/lib/protk/fastadb.rb +2 -2
  29. data/lib/protk/prophet_tool.rb +1 -1
  30. data/lib/protk/protxml_to_gff_tool.rb +474 -0
  31. data/lib/protk/search_tool.rb +58 -103
  32. data/lib/protk/setup_rakefile.rake +9 -5
  33. data/lib/protk/tandem_search_tool.rb +256 -0
  34. data/lib/protk/tool.rb +85 -104
  35. data/lib/protk.rb +1 -6
  36. metadata +24 -103
  37. data/bin/annotate_ids.rb +0 -59
  38. data/bin/asapratio.rb +0 -27
  39. data/bin/blastxml_to_table.rb +0 -119
  40. data/bin/correct_omssa_retention_times.rb +0 -27
  41. data/bin/feature_finder.rb +0 -95
  42. data/bin/file_convert.rb +0 -164
  43. data/bin/generate_omssa_loc.rb +0 -42
  44. data/bin/gffmerge.rb +0 -208
  45. data/bin/libra.rb +0 -70
  46. data/bin/toppas_pipeline.rb +0 -84
  47. data/bin/uniprot_annotation.rb +0 -141
  48. data/bin/xls_to_table.rb +0 -52
  49. data/bin/xpress.rb +0 -27
  50. data/ext/protk/decoymaker/extconf.rb +0 -3
  51. data/ext/protk/simplealign/extconf.rb +0 -3
  52. data/lib/protk/biotools_excel_converter.rb +0 -60
  53. data/lib/protk/eupathdb_gene_information_table.rb +0 -158
  54. data/lib/protk/gapped_aligner.rb +0 -264
  55. data/lib/protk/protein_annotator.rb +0 -646
  56. data/lib/protk/spreadsheet_extensions.rb +0 -79
  57. data/lib/protk/xtandem_defaults.rb +0 -11
@@ -13,6 +13,15 @@ require 'optparse'
13
13
  require 'pathname'
14
14
  require 'protk/tool'
15
15
 
16
+ class FastaDatabase
17
+ attr :name
18
+ attr :path
19
+ def initialize(name,path)
20
+ @name=name
21
+ @path=path
22
+ end
23
+ end
24
+
16
25
  class SearchTool < Tool
17
26
 
18
27
  # Initializes commandline options common to all search tools.
@@ -21,158 +30,104 @@ class SearchTool < Tool
21
30
  def initialize(option_support=[])
22
31
  super(option_support)
23
32
 
24
- if (option_support.include? :database)
25
- @options.database = "sphuman"
26
- @option_parser.on( '-d', '--database dbname', 'Specify the database to use for this search. Default=sphuman' ) do |dbname|
27
- options.database = dbname
28
- end
29
-
33
+ if (option_support.include? :database)
34
+ add_value_option(:database,"sphuman",['-d', '--database dbname', 'Specify the database to use for this search. Can be a named protk database or the path to a fasta file'])
30
35
  end
31
36
 
32
37
  if ( option_support.include? :enzyme )
33
- @options.enzyme = "Trypsin"
34
- @option_parser.on('--enzyme enz', 'Enzyme') do |enz|
35
- @options.enzyme=enz
36
- end
38
+ add_value_option(:enzyme,"Trypsin",['--enzyme enz', 'Enzyme'])
37
39
  end
38
40
 
39
41
  if ( option_support.include? :modifications )
40
- @options.var_mods = ""
41
- @option_parser.on('--var-mods vm', 'Variable modifications' ) do |vm|
42
- @options.var_mods = vm
43
- end
44
-
45
- @options.fix_mods = ""
46
- @option_parser.on('--fix-mods fm', 'Fixed modifications' ) do |fm|
47
- @options.fix_mods = fm
48
- end
42
+ add_value_option(:var_mods,"",['--var-mods vm','Variable modifications. These should be provided in a comma separated list'])
43
+ add_value_option(:fix_mods,"",['--fix-mods fm','Fixed modifications. These should be provided in a comma separated list'])
49
44
  end
50
45
 
51
46
  if ( option_support.include? :instrument )
52
- @options.instrument = "ESI-QUAD-TOF"
53
- @option_parser.on('--instrument instrument', 'Instrument') do |instrument|
54
- @options.instrument=instrument
55
- end
47
+ add_value_option(:instrument,"ESI-QUAD-TOF",['--instrument instrument', 'Instrument'])
56
48
  end
57
49
 
58
50
  if ( option_support.include? :mass_tolerance_units )
59
- @options.fragment_tolu="Da"
60
- @option_parser.on('--fragment-ion-tol-units tolu', 'Fragment ion mass tolerance units (Da or mmu). Default=Da' ) do |tolu|
61
- @options.fragment_tolu = tolu
62
- end
63
-
64
- @options.precursor_tolu="ppm"
65
- @option_parser.on('--precursor-ion-tol-units tolu', 'Precursor ion mass tolerance units (ppm or Da). Default=ppm' ) do |tolu|
66
- @options.precursor_tolu = tolu
67
- end
51
+ add_value_option(:fragment_tolu,"Da",['--fragment-ion-tol-units tolu', 'Fragment ion mass tolerance units (Da or mmu). Default=Da'])
52
+ add_value_option(:precursor_tolu,"ppm",['--precursor-ion-tol-units tolu', 'Precursor ion mass tolerance units (ppm or Da). Default=ppm'])
68
53
  end
69
54
 
70
55
  if ( option_support.include? :mass_tolerance )
71
- @options.fragment_tol=0.65
72
- @option_parser.on( '-f', '--fragment-ion-tol tol', 'Fragment ion mass tolerance (unit dependent). Default=0.65' ) do |tol|
73
- @options.fragment_tol = tol
74
- end
75
-
76
- @options.precursor_tol=200
77
- @option_parser.on( '-p', '--precursor-ion-tol tol', 'Precursor ion mass tolerance in (ppm if precursor search type is monoisotopic or Da if it is average). Default=200' ) do |tol|
78
- @options.precursor_tol = tol.to_f
79
- end
56
+ add_value_option(:fragment_tol,0.65,['-f', '--fragment-ion-tol tol', 'Fragment ion mass tolerance (unit dependent). Default=0.65'])
57
+ add_value_option(:precursor_tol,200,['-p','--precursor-ion-tol tol', 'Precursor ion mass tolerance. Default=200'])
80
58
  end
81
59
 
82
60
  if ( option_support.include? :precursor_search_type )
83
- @options.precursor_search_type="monoisotopic"
84
- @option_parser.on( '-a', '--search-type type', 'Use monoisotopic or average precursor masses. (monoisotopic or average)' ) do |type|
85
- @options.precursor_search_type = type
86
- end
61
+ add_value_option(:precursor_search_type,"monoisotopic",['-a', '--search-type type', 'Use monoisotopic or average precursor masses. (monoisotopic or average)'])
87
62
  end
88
63
 
89
64
  if ( option_support.include? :strict_monoisotopic_mass )
90
- @options.strict_monoisotopic_mass=false
91
- @option_parser.on( '-s', '--strict-monoisotopic-mass', 'Dont allow for misassignment of monoisotopic mass to another isotopic peak') do
92
- @options.strict_monoisotopic_mass=true
93
- end
65
+ add_boolean_option(:strict_monoisotopic_mass,false,['-s', '--strict-monoisotopic-mass', 'Dont allow for misassignment of monoisotopic mass to another isotopic peak'])
94
66
  end
95
67
 
96
68
  if ( option_support.include? :missed_cleavages )
97
- @options.missed_cleavages=2
98
- @option_parser.on( '-v', '--num-missed-cleavages num', 'Number of missed cleavages allowed' ) do |num|
99
- @options.missed_cleavages = num
100
- end
69
+ add_value_option(:missed_cleavages,2,['-v', '--num-missed-cleavages num', 'Number of missed cleavages allowed'])
101
70
  end
102
71
 
103
72
  if ( option_support.include? :cleavage_semi )
104
- @options.cleavage_semi=true
105
- @option_parser.on( '--no-cleavage-semi', 'Dont allow up to 1 non tryptic terminus on peptides' ) do
106
- @options.cleavage_semi=false
107
- end
73
+ add_boolean_option(:cleavage_semi,false,['--cleavage-semi', 'Search for peptides with up to 1 non-enzymatic cleavage site'])
108
74
  end
109
75
 
110
76
  if ( option_support.include? :respect_precursor_charges )
111
- @options.respect_precursor_charges=false
112
- @option_parser.on( '-q', '--respect-charges','Dont respect charges in the input file. Instead impute them by trying various options') do
113
- @options.respect_precursor_charges=true
114
- end
77
+ add_boolean_option(:respect_precursor_charges,false,['-q', '--respect-charges','Dont respect charges in the input file. Instead impute them by trying various options'])
115
78
  end
116
79
 
117
80
  if ( option_support.include? :searched_ions )
118
- @options.searched_ions = ""
119
- @option_parser.on('--searched-ions si', 'Ion series to search (default=b,y)' ) do |si|
120
- @options.searched_ions = si
121
- end
81
+ add_value_option(:searched_ions,"",['--searched-ions si', 'Ion series to search (default=b,y)'])
82
+ end
83
+
84
+ if ( option_support.include? :multi_isotope_search )
85
+ add_boolean_option(:multi_isotope_search,false,["--multi-isotope-search","Expand parent mass window to include windows around neighbouring isotopic peaks"])
122
86
  end
123
87
 
124
88
  if ( option_support.include? :num_peaks_for_multi_isotope_search )
125
- @options.num_peaks_for_multi_isotope_search="0"
126
- @option_parser.on("--num-peaks-for-multi-isotope-search np","Number of peaks to include in multi-isotope search") do |np|
127
- @options.num_peaks_for_multi_isotope_search=np
128
- end
89
+ add_value_option(:num_peaks_for_multi_isotope_search,0,["--num-peaks-for-multi-isotope-search np","Number of peaks to include in multi-isotope search"])
129
90
  end
130
91
 
131
92
  if ( option_support.include? :glyco)
132
- @options.glyco = false
133
- @option_parser.on( '-g', '--glyco', 'Expect N-Glycosylation modifications as variable mod in a search or as a parameter when building statistical models' ) do
134
- @options.glyco = true
135
- end
93
+ add_boolean_option(:glyco,false,['-g','--glyco', 'Expect N-Glycosylation modifications as variable mod in a search or as a parameter when building statistical models'])
94
+ end
95
+
96
+ if ( option_support.include? :acetyl_nterm)
97
+ add_boolean_option(:acetyl_nterm,false,['-y','--acetyl-nterm', 'Expect N-terminal acetylation as a variable mod in a search or as a parameter when building statistical models'])
136
98
  end
137
99
 
138
100
  if ( option_support.include? :methionine_oxidation)
139
- @options.methionine_oxidation = false
140
- @option_parser.on( '-m', '--methionineo', 'Expect Oxidised Methionine modifications as variable mod in a search' ) do
141
- @options.methionine_oxidation = true
142
- end
101
+ add_boolean_option(:methionine_oxidation,false,['-m', '--methionineo', 'Expect Oxidised Methionine modifications as variable mod in a search'])
143
102
  end
144
103
 
145
104
  if ( option_support.include? :carbamidomethyl)
146
- @options.carbamidomethyl = false
147
- @option_parser.on( '-c', '--carbamidomethyl', 'Expect Carbamidomethyl C modifications as fixed mod in a search' ) do
148
- @options.carbamidomethyl = true
149
- end
105
+ add_boolean_option(:carbamidomethyl,false,['-c', '--carbamidomethyl', 'Expect Carbamidomethyl C modifications as fixed mod in a search'])
150
106
  end
151
107
 
152
108
  if ( option_support.include? :maldi)
153
- @options.maldi=false
154
- @option_parser.on( '-l', '--maldi', 'Run a search on MALDI data') do
155
- @options.maldi=true
156
- end
109
+ add_boolean_option(:maldi,false,['-l', '--maldi', 'Run a search on MALDI data'])
157
110
  end
111
+
112
+ @option_parser.summary_width=40
113
+
158
114
 
159
115
  end
160
-
161
-
162
- def jobid_from_filename(filename)
163
- jobid="protk"
164
- jobnum_match=filename.match(/(.{1,10}).*?\./)
165
- if (jobnum_match!=nil)
166
- jobid="#{self.jobid_prefix}#{jobnum_match[1]}"
167
- end
168
- return jobid
169
- end
170
-
171
- # Based on the database setting and global database path, find the most current version of the required database
172
- # This function returns the name of the database with an extension appropriate to the database type
173
- #
174
- def current_database(db_type,db=@options.database)
175
- return Constants.new.current_database_for_name(db)
116
+
117
+
118
+ def database_info
119
+ case
120
+ when Pathname.new(@options.database).exist? # It's an explicitly named db
121
+ db_path=Pathname.new(@options.database).expand_path.to_s
122
+ db_name=Pathname.new(@options.database).basename.to_s
123
+ else
124
+ db_path=Constants.new.current_database_for_name @options.database
125
+ db_name=@options.database
126
+ end
127
+ FastaDatabase.new(db_name,db_path)
176
128
  end
177
-
178
- end
129
+
130
+ end
131
+
132
+
133
+
@@ -143,9 +143,10 @@ tpp_download_file = download_task tpp_url, tpp_packagefile
143
143
  # Build
144
144
  file tpp_installed_file => [@build_dir,tpp_download_file] do
145
145
  sh %{cp #{@download_dir}/#{tpp_packagefile} #{@build_dir}}
146
+ # use_perl_locallib_cmd="echo hi"
146
147
  use_perl_locallib_cmd="eval $(perl -I#{perl_dir}/lib/perl5 -Mlocal::lib=#{perl_dir})"
147
- sh %{#{use_perl_locallib_cmd};cpanm --local-lib=#{env.protk_dir}/perl5 XML::Parser}
148
- sh %{#{use_perl_locallib_cmd};cpanm --local-lib=#{env.protk_dir}/perl5 CGI --force}
148
+ # sh %{#{use_perl_locallib_cmd};cpanm --local-lib=#{env.protk_dir}/perl5 XML::Parser}
149
+ # sh %{#{use_perl_locallib_cmd};cpanm --local-lib=#{env.protk_dir}/perl5 CGI --force}
149
150
 
150
151
  sh %{cd #{@build_dir};tar -xvzf TPP-#{tpp_version}.tgz}
151
152
 
@@ -173,6 +174,8 @@ file tpp_installed_file => [@build_dir,tpp_download_file] do
173
174
  f.write subs_text
174
175
  end
175
176
  end
177
+ sh %{cd #{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src;echo '' > ../perl/tpp_models.pl;echo '' > ../perl/exporTPP.pl;echo '' > ../CGI/show_nspbin.pl;echo '' > ../CGI/tpp_gui/tpp_gui.pl}
178
+
176
179
  build_cmd = "#{use_perl_locallib_cmd};cd #{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src ; make -s"
177
180
  install_cmd = "#{use_perl_locallib_cmd};cd #{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src ; make install"
178
181
  env.log build_cmd, :info
@@ -354,9 +357,9 @@ file tandem_installed_file => [@build_dir,"#{@download_dir}/#{tandem_packagefile
354
357
  tandem_dirname = "#{tandem_packagefile.chomp('.zip')}"
355
358
 
356
359
  if ( tandem_platform=="linux") #Must compile
357
- tandem_src_dir = "#{@build_dir}/#{tandem_dirname}/#{tandem_dirname}/src/"
360
+ tandem_src_dir = "#{@build_dir}/#{tandem_dirname}/src/"
358
361
  sh %{cd #{tandem_src_dir}; make}
359
- sh %{cd #{@build_dir}; cp -r ./#{tandem_dirname}/#{tandem_dirname}/bin #{env.tandem_root}/}
362
+ sh %{cd #{@build_dir}; cp -r ./#{tandem_dirname}/bin #{env.tandem_root}/}
360
363
  else
361
364
  sh %{cd #{@build_dir}; cp -r ./#{tandem_packagefile.chomp('.zip')}/* #{env.tandem_root}/}
362
365
  sh %{chmod u+x #{env.gpmtandem}}
@@ -402,8 +405,9 @@ task :galaxyenv => protk_galaxy_envfile
402
405
 
403
406
  # multitask :downloads => FileList["nr","env_nr","gi_taxid_prot.zip","taxdmp.zip"]
404
407
 
408
+ task :base => [:perl_locallib]
405
409
 
406
- task :all => [:tpp,:omssa,:blast,:msgfplus,:pwiz,:openms,:galaxyenv]
410
+ task :all => [:tpp,:omssa,:blast,:msgfplus,:pwiz,:openms]
407
411
 
408
412
  # Special task when installing via toolshed
409
413
  #
@@ -0,0 +1,256 @@
1
+ require 'protk/search_tool'
2
+
3
+ class String
4
+ def xtandem_modification_motif?
5
+ # 124@[ is not a modification motif, it is a residue (N-term) modification,
6
+ # so when checking if modification is a motif look for paired square brackets.
7
+ ismotif=false
8
+ case self
9
+ when /[\(\)\{\}\!]/,/\[.*\]/
10
+ ismotif=true
11
+ end
12
+ ismotif
13
+ end
14
+ end
15
+
16
+ class TandemSearchTool < SearchTool
17
+ attr :defaults_path
18
+ attr :taxonomy_path
19
+ attr :default_data_path
20
+
21
+ attr :supported_xtandem_keys
22
+
23
+ def initialize
24
+
25
+ super([
26
+ :database,
27
+ :explicit_output,
28
+ :over_write,
29
+ :enzyme,
30
+ :modifications,
31
+ :mass_tolerance_units,
32
+ :mass_tolerance,
33
+ :multi_isotope_search,
34
+ :missed_cleavages,
35
+ :cleavage_semi,
36
+ :methionine_oxidation,
37
+ :glyco,
38
+ :acetyl_nterm,
39
+ :threads
40
+ ])
41
+
42
+ @xtandem_keys_with_single_multiplicity = {
43
+ :fragment_tol => "spectrum, fragment monoisotopic mass error",
44
+ :missed_cleavages => "scoring, maximum missed cleavage sites",
45
+ :cleavage_semi => "protein, cleavage semi",
46
+ :precursor_tolu => "spectrum, parent monoisotopic mass error units",
47
+ :multi_isotope_search => "spectrum, parent monoisotopic mass isotope error",
48
+ :fragment_tolu => "spectrum, fragment monoisotopic mass error units",
49
+ :acetyl_nterm => "protein, quick acetyl",
50
+ :output_spectra => "output, spectra",
51
+ :threads => "spectrum, threads"
52
+ }
53
+
54
+ @xtandem_keys_for_precursor_tol = {
55
+ :precursor_tol => ["spectrum, parent monoisotopic mass error minus", "spectrum, parent monoisotopic mass error plus"]
56
+ }
57
+
58
+ @defaults_path="#{File.dirname(__FILE__)}/data/tandem_params.xml"
59
+ @taxonomy_path="#{File.dirname(__FILE__)}/data/taxonomy_template.xml"
60
+ @default_data_path="#{File.dirname(__FILE__)}/data/"
61
+
62
+ @option_parser.banner = "Run an X!Tandem msms search on a set of mzML input files.\n\nUsage: tandem_search.rb [options] file1.mzML file2.mzML ..."
63
+ @options.output_suffix="_tandem"
64
+
65
+ add_value_option(:tandem_params,"isb_native",['-T', '--tandem-params tandem', 'Either the full path to an xml file containing a complete set of default parameters, or one of the following (isb_native,isb_kscore,gpm). Default is isb_native'])
66
+ add_boolean_option(:keep_params_files,false,['-K', '--keep-params-files', 'Keep X!Tandem parameter files'])
67
+ add_boolean_option(:output_spectra,false,['--output-spectra', 'Include spectra in the output file'])
68
+
69
+ end
70
+
71
+ private
72
+ # Galaxy changes things like @ to __at__ we need to change it back
73
+ #
74
+ def decode_modification_string(mstring)
75
+ mstring.gsub!("__at__","@")
76
+ mstring.gsub!("__oc__","{")
77
+ mstring.gsub!("__cc__","}")
78
+ mstring.gsub!("__ob__","[")
79
+ mstring.gsub!("__cb__","]")
80
+ mstring
81
+ end
82
+
83
+ def set_option(std_params, tandem_key, value)
84
+ notes = std_params.find("/bioml/note[@type=\"input\" and @label=\"#{tandem_key}\"]")
85
+ throw "Exactly one parameter named (#{tandem_key}) is required in parameter file" unless notes.length==1
86
+ notes[0].content=value.to_s
87
+ end
88
+
89
+ def append_option(std_params, tandem_key, value)
90
+ notes = std_params.find("/bioml/note[@type=\"input\" and @label=\"#{tandem_key}\"]")
91
+ if notes.length == 0
92
+ node = XML::Node.new('note')
93
+ node["type"] = "input"
94
+ node["label"] = tandem_key
95
+ node.content = value
96
+ std_params.find('/bioml')[0] << node
97
+ else
98
+ throw "Exactly one parameter named (#{tandem_key}) is required in parameter file" unless notes.length==1
99
+ notes[0].content = append_string(notes[0].content, value)
100
+ end
101
+ end
102
+
103
+ def tandem_keys_in_params_file(default_params_path)
104
+ params_parser=XML::Parser.file(default_params_path)
105
+ default_params=params_parser.parse
106
+ input_nodes=default_params.find('/bioml/note[@type="input"]')
107
+ defined_keys=[]
108
+ input_nodes.each do |node|
109
+ defined_keys << node.attributes['label']
110
+ end
111
+ defined_keys
112
+ end
113
+
114
+ def taxon_from_taxonomy_file(taxo_path)
115
+ taxo_parser=XML::Parser.file(taxo_path)
116
+ taxo_doc=taxo_parser.parse
117
+ taxon_nodes=taxo_doc.find('/bioml/taxon')
118
+ throw "Exactly one taxon entry allowed in taxonomy file but found #{taxon_nodes.length}" unless taxon_nodes.length==1
119
+ taxon_nodes[0].attributes['label']
120
+ end
121
+
122
+ def generate_parameter_doc(std_params,output_path,input_path,db_info,taxo_path)
123
+
124
+ #
125
+ # The TandemSearchTool class has a special defaults system
126
+ # Defaults are read from (a) The commandline (b) A defaults file (c) commandline defaults.
127
+ # The ideal priority order is a -> b -> c
128
+ #
129
+ # In order to support this we need to read the defaults file and check options defined there
130
+ # against those defined on the commandline
131
+ #
132
+ # In addition, we support some default parameter files built-in to protk. These are treated the same
133
+ # but are specified if the user provides a keyword rather than a path
134
+ #
135
+ default_params_notes=std_params.find('/bioml/note[@type="input" and @label="list path, default parameters"]')
136
+ throw "Exactly one list path, default parameters note is required in the parameter file" unless default_params_notes.length==1
137
+
138
+ is_file=File.exists?(self.tandem_params)
139
+ if is_file
140
+ default_params_notes[0].content="#{self.tandem_params}"
141
+ else
142
+ default_params_notes[0].content="#{@default_data_path}tandem_#{self.tandem_params}_defaults.xml"
143
+ end
144
+
145
+
146
+ keys_in_params_file=tandem_keys_in_params_file(default_params_notes[0].content)
147
+ keys_on_commandline=@options_defined_by_user.keys
148
+
149
+ # Set the input and output paths
150
+ #
151
+ set_option(std_params,"spectrum, path",input_path)
152
+ set_option(std_params,"output, path",output_path)
153
+
154
+ # Taxonomy and Database
155
+ #
156
+ set_option(std_params,"list path, taxonomy information",taxo_path)
157
+ set_option(std_params,"protein, taxon",db_info.name)
158
+
159
+
160
+
161
+ # set_option(std_params, "protein, cleavage semi", self.cleavage_semi ? "yes" : "no")
162
+
163
+ # Simple options (unique with a 1:1 mapping to parameters from this tool)
164
+ #
165
+ @xtandem_keys_with_single_multiplicity.each_pair do |commandline_option_key, xtandem_key|
166
+ if (!keys_in_params_file.include?(xtandem_key) || keys_on_commandline.include?(commandline_option_key))
167
+ opt_val=self.send(commandline_option_key)
168
+ if opt_val.is_a?(TrueClass) || opt_val.is_a?(FalseClass)
169
+ opt_val = opt_val ? "yes" : "no"
170
+ end
171
+ append_option(std_params,xtandem_key,opt_val.to_s)
172
+ end
173
+ end
174
+
175
+ # Precursor mass tolerance is a special case as it requires two xtandem options
176
+ #
177
+ @xtandem_keys_for_precursor_tol.each_pair do |commandline_option_key, xtandem_keys|
178
+ xtandem_keys.each do |xtandem_key|
179
+ if (!keys_in_params_file.include?(xtandem_key) || keys_on_commandline.include?(commandline_option_key))
180
+ append_option(std_params,xtandem_key,(self.precursor_tol.to_f*0.5).to_s)
181
+ end
182
+ end
183
+ end
184
+
185
+ # Per residue Fixed and Variable Modifications
186
+ #
187
+ # These can be added using a variety of methods in xtandem
188
+ #
189
+ # residue, potential modification mass
190
+ # residue, modification mass
191
+ # residue, potential modification motif
192
+ #
193
+ # We support these primarily via the var_mods and fix_mods commandline params
194
+ # Modification masses and/or motifs can be entered via these arguments
195
+ #
196
+
197
+ var_mods = self.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }
198
+ var_mods=var_mods.collect {|mod| decode_modification_string(mod) }
199
+
200
+ # var_mods allows motif's as well as standard mods. These should be in a separate array
201
+ var_motifs = [].replace(var_mods)
202
+ var_mods.delete_if {|mod| mod.xtandem_modification_motif? }
203
+ var_motifs.keep_if {|mod| mod.xtandem_modification_motif? }
204
+
205
+ fix_mods = self.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }
206
+ fix_mods=fix_mods.collect {|mod| decode_modification_string(mod)}
207
+
208
+ # We also support the --glyco and --methionineo shortcuts.
209
+ # Add these here. No check is made for duplication
210
+ #
211
+ var_motifs << "0.998@N!{P}[ST]" if self.glyco
212
+ var_mods << "15.994915@M" if self.methionine_oxidation
213
+
214
+ append_option(std_params,"residue, modification mass",fix_mods.join(",")) unless fix_mods.length==0
215
+ append_option(std_params,"residue, potential modification mass",var_mods.join(",")) unless var_mods.length==0
216
+ append_option(std_params,"residue, potential modification motif",var_motifs.join(",")) unless var_motifs.length==0
217
+
218
+ std_params
219
+
220
+ end
221
+
222
+ public
223
+ def taxonomy_doc(db_info)
224
+ throw "Invalid input db_info must be a FastaDatabase object" unless db_info.class==FastaDatabase
225
+ database_path=db_info.path
226
+ taxon=db_info.name
227
+ # Parse taxonomy template file
228
+ #
229
+ taxo_parser=XML::Parser.file(@taxonomy_path)
230
+ taxo_doc=taxo_parser.parse
231
+
232
+ taxon_label=taxo_doc.find('/bioml/taxon')
233
+ throw "Exactly one taxon label is required in the taxonomy_template file" unless taxon_label.length==1
234
+ taxon_label[0].attributes['label']=taxon
235
+
236
+ db_file=taxo_doc.find('/bioml/taxon/file')
237
+ throw "Exactly one database file is required in the taxonomy_template file" unless db_file.length==1
238
+ db_file[0].attributes['URL']=database_path
239
+
240
+ taxo_doc
241
+ end
242
+
243
+
244
+ def params_doc(db_info,taxo_path,input_path,output_path)
245
+ params_parser=XML::Parser.file(@defaults_path)
246
+ std_params=params_parser.parse
247
+
248
+
249
+ throw "Invalid input db_info must be a FastaDatabase object" unless db_info.class==FastaDatabase
250
+
251
+ generate_parameter_doc(std_params,output_path,input_path,db_info,taxo_path)
252
+ end
253
+
254
+
255
+
256
+ end