protk 1.2.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,7 +20,10 @@ genv=Constants.new
20
20
 
21
21
  # Setup specific command-line options for this tool. Other options are inherited from SearchTool
22
22
  #
23
- search_tool=SearchTool.new({:msms_search=>true,:background=>true,:glyco=>true,:database=>true,:explicit_output=>true,:over_write=>true,:msms_search_detailed_options=>true})
23
+ search_tool=SearchTool.new([:background,:glyco,:database,:explicit_output,:over_write,
24
+ :enzyme,:modifications,:mass_tolerance_units,:mass_tolerance,:strict_monoisotopic_mass,
25
+ :missed_cleavages,:carbamidomethyl,:methionine_oxidation
26
+ ])
24
27
  search_tool.jobid_prefix="x"
25
28
  search_tool.option_parser.banner = "Run an X!Tandem msms search on a set of mzML input files.\n\nUsage: tandem_search.rb [options] file1.mzML file2.mzML ..."
26
29
  search_tool.options.output_suffix="_tandem"
@@ -94,7 +97,13 @@ search_tool.option_parser.on('--cleavage-c-terminal-mod-mass mass') do |mass|
94
97
  search_tool.options.cleavage_c_terminal_mod_mass = mass
95
98
  end
96
99
 
97
- search_tool.option_parser.parse!
100
+ exit unless search_tool.check_options
101
+
102
+ if ( ARGV[0].nil? )
103
+ puts "You must supply an input file"
104
+ puts search_tool.option_parser
105
+ exit
106
+ end
98
107
 
99
108
 
100
109
  # Set search engine specific parameters on the SearchTool object
@@ -15,7 +15,7 @@ require 'libxml'
15
15
 
16
16
  include LibXML
17
17
 
18
- tool=Tool.new({:explicit_output=>false, :background=>true,:over_write=>false})
18
+ tool=Tool.new([:explicit_output, :background,:over_write])
19
19
  tool.option_parser.banner = "Execute a toppas pipeline with a single inputs node\n\nUsage: toppas_pipeline.rb [options] input1 input2 ..."
20
20
 
21
21
  tool.options.outdir = ""
@@ -28,7 +28,13 @@ tool.option_parser.on( '--toppas-file f',"the toppas file to run" ) do |file|
28
28
  tool.options.toppas_file = file
29
29
  end
30
30
 
31
- tool.option_parser.parse!
31
+ exit unless tool.check_options
32
+
33
+ if ( ARGV[0].nil? )
34
+ puts "You must supply an input file"
35
+ puts tool.option_parser
36
+ exit
37
+ end
32
38
 
33
39
  # Obtain a global environment object
34
40
  genv=Constants.new
@@ -15,7 +15,7 @@ require 'protk/bio_sptr_extensions'
15
15
 
16
16
  # Setup specific command-line options for this tool. Other options are inherited from Tool
17
17
  #
18
- tool=Tool.new({:explicit_output=>true})
18
+ tool=Tool.new([:explicit_output])
19
19
  tool.option_parser.banner = "Retrieve information from the Uniprot database given a list of ID's.\n\n\
20
20
  Usage: uniprot_annotation.rb [options] input.tsv"
21
21
 
@@ -29,7 +29,13 @@ tool.option_parser.on( '--fields flds', 'A comma separated list of fields to ex
29
29
  tool.options.fields=flds
30
30
  end
31
31
 
32
- tool.option_parser.parse!
32
+ exit unless tool.check_options
33
+
34
+ if ( ARGV[0].nil? )
35
+ puts "You must supply an input file"
36
+ puts tool.option_parser
37
+ exit
38
+ end
33
39
 
34
40
  # Obtain a global environment object
35
41
  genv=Constants.new
@@ -13,7 +13,7 @@ require 'protk/uniprot_mapper'
13
13
 
14
14
  # Setup specific command-line options for this tool. Other options are inherited from SearchTool
15
15
  #
16
- tool=Tool.new({:explicit_output=>true})
16
+ tool=Tool.new([:explicit_output])
17
17
 
18
18
  tool.options.id_column=1
19
19
  tool.option_parser.on( '--id-column num', 'Specify a column for ids (default is column 1)' ) do |col|
@@ -27,7 +27,13 @@ end
27
27
 
28
28
  tool.option_parser.banner = "Given a set of IDs convert them to a different type of ID\n\nUsage: uniprot_mapper.rb input_file.txt fromID_type"
29
29
 
30
- tool.option_parser.parse!
30
+ exit unless tool.check_options
31
+
32
+ if ( ARGV[0].nil? )
33
+ puts "You must supply an input file"
34
+ puts tool.option_parser
35
+ exit
36
+ end
31
37
 
32
38
 
33
39
  from_file=ARGV.shift
@@ -14,10 +14,16 @@ require 'spreadsheet'
14
14
 
15
15
  # Setup command-line options for this tool.
16
16
  #
17
- tool=Tool.new({:explicit_output=>true})
17
+ tool=Tool.new([:explicit_output])
18
18
  tool.option_parser.banner = "Convert an xls file to a tab delimited table.\n\nUsage: xls_to_table.rb [options] file1.xls"
19
19
 
20
- tool.option_parser.parse!
20
+ exit unless tool.check_options
21
+
22
+ if ( ARGV[0].nil? )
23
+ puts "You must supply an input file"
24
+ puts tool.option_parser
25
+ exit
26
+ end
21
27
 
22
28
  input_file=ARGV[0]
23
29
 
@@ -29,6 +29,7 @@ class Constants
29
29
 
30
30
  attr :info_level
31
31
  attr :protk_dir
32
+ attr :data_lib_dir
32
33
 
33
34
  # Provides direct access to constants through methods of the same name
34
35
  # This will be used for all constants other than paths
@@ -227,6 +228,7 @@ class Constants
227
228
  #
228
229
  def initialize
229
230
 
231
+ @data_lib_dir="#{File.dirname(__FILE__)}/data"
230
232
  @protk_dir="#{Dir.home}/.protk"
231
233
 
232
234
  if ( ENV['PROTK_INSTALL_DIR']!=nil )
@@ -0,0 +1,29 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <?xml-stylesheet type="text/xsl" href="/home/iracooke/.protk/tools/tpp/schema/pepXML_std.xsl"?>
3
+ <msms_pipeline_analysis date="2013-01-07T18:29:15" xmlns="http://regis-web.systemsbiology.net/pepXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://regis-web.systemsbiology.net/pepXML /home/iracooke/.protk/tools/tpp/schema/pepXML_v117.xsd" summary_xml="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml.pep.xml">
4
+ <msms_run_summary base_name="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml" raw_data_type="raw" raw_data="(null)">
5
+ <sample_enzyme name="trypsin">
6
+ <specificity cut="KR" no_cut="P" sense="C"/>
7
+ </sample_enzyme>
8
+ <search_summary base_name="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml" search_engine="MASCOT" precursor_mass_type="monoisotopic" fragment_mass_type="monoisotopic" search_id="1">
9
+ <search_database local_path="/home/iracooke/.protk/Databases/sphuman/current.fasta" type="AA"/>
10
+ <enzymatic_search_constraint enzyme="trypsin" max_num_internal_cleavages="2" min_number_termini="2"/>
11
+ <aminoacid_modification aminoacid="C" massdiff="57.0215" mass="160.0306" variable="N"/>
12
+ <parameter name="" value=""/>
13
+ </search_summary>
14
+ <spectrum_query spectrum="" start_scan="" end_scan="" precursor_neutral_mass="" assumed_charge="" index="">
15
+ <search_result>
16
+ <search_hit hit_rank="" peptide="" peptide_prev_aa="" peptide_next_aa="" protein="" num_tot_proteins="" num_matched_ions="" tot_num_ions="" calc_neutral_pep_mass="" massdiff="" num_tol_term="" num_missed_cleavages="" is_rejected="">
17
+ <modification_info>
18
+ <mod_aminoacid_mass position="" mass=""/>
19
+ </modification_info>
20
+ <search_score name="ionscore" value=""/>
21
+ <search_score name="identityscore" value=""/>
22
+ <search_score name="star" value="0"/>
23
+ <search_score name="homologyscore" value=""/>
24
+ <search_score name="expect" value=""/>
25
+ </search_hit>
26
+ </search_result>
27
+ </spectrum_query>
28
+ </msms_run_summary>
29
+ </msms_pipeline_analysis>
@@ -23,6 +23,7 @@ class MascotUtil
23
23
  end
24
24
 
25
25
  def self.remove_charge_from_title_string(tstring)
26
+
26
27
  if ( tstring=~/(.*)\..*?\..*?\.$/)
27
28
  return tstring.chop
28
29
  end
@@ -30,6 +31,10 @@ class MascotUtil
30
31
  if ( tstring=~/(.*)\..*?\..*?\.\d$/)
31
32
  return tstring.chop!.chop
32
33
  end
34
+
35
+ if ( tstring=~/(.*)\..*?\..*?$/)
36
+ return tstring
37
+ end
33
38
 
34
39
  throw "Unrecognised title string format #{tstring}"
35
40
 
@@ -17,9 +17,7 @@ class ProphetTool < SearchTool
17
17
 
18
18
 
19
19
  # Initializes the commandline options
20
- def initialize(option_support={})
21
- option_support[:prefix_suffix]=true;
22
- option_support[:over_write]=true;
20
+ def initialize(option_support=[:prefix_suffix,:over_write])
23
21
 
24
22
  super(option_support)
25
23
 
@@ -18,11 +18,10 @@ class SearchTool < Tool
18
18
  # Initializes commandline options common to all search tools.
19
19
  # Individual search tools can add their own options, but should use Capital letters to avoid conflicts
20
20
  #
21
- def initialize(option_support={})
21
+ def initialize(option_support=[])
22
22
  super(option_support)
23
23
 
24
- if (option_support[:database]==true)
25
-
24
+ if (option_support.include? :database)
26
25
  @options.database = "sphuman"
27
26
  @option_parser.on( '-d', '--database dbname', 'Specify the database to use for this search. Default=sphuman' ) do |dbname|
28
27
  options.database = dbname
@@ -30,78 +29,45 @@ class SearchTool < Tool
30
29
 
31
30
  end
32
31
 
33
- if ( option_support[:msms_search_detailed_options]==true)
34
- @options.allowed_charges="1+,2+,3+"
35
- @option_parser.on( '--allowed-charges ac', 'Allowed precursor ion charges. Default=1+,2+,3+' ) do |ac|
36
- @options.allowed_charges = ac
37
- end
38
-
32
+ if ( option_support.include? :enzyme )
39
33
  @options.enzyme = "Trypsin"
40
34
  @option_parser.on('--enzyme enz', 'Enzyme') do |enz|
41
35
  @options.enzyme=enz
42
36
  end
43
-
44
- @options.instrument = "ESI-QUAD-TOF"
45
- @option_parser.on('--instrument instrument', 'Instrument') do |instrument|
46
- @options.instrument=instrument
47
- end
48
-
49
-
50
- @options.var_mods = ""
51
- @option_parser.on('--var-mods vm', 'Variable modifications (Overrides -g)' ) do |vm|
52
- @options.var_mods = vm
53
- end
54
-
55
- @options.fix_mods = ""
56
- @option_parser.on('--fix-mods fm', 'Fixed modifications (Overrides -c and -m options)' ) do |fm|
57
- @options.fix_mods = fm
58
- end
59
-
60
- @options.searched_ions = ""
61
- @option_parser.on('--searched-ions si', 'Ion series to search (default=b,y)' ) do |si|
62
- @options.searched_ions = si
63
- end
64
-
65
-
66
- @options.fragment_tolu="Da"
67
- @option_parser.on('--fragment-ion-tol-units tolu', 'Fragment ion mass tolerance units (Da or mmu). Default=Da' ) do |tolu|
68
- @options.fragment_tolu = tolu
69
- end
70
-
71
- @options.precursor_tolu="ppm"
72
- @option_parser.on('--precursor-ion-tol-units tolu', 'Precursor ion mass tolerance units (ppm or Da). Default=ppm' ) do |tolu|
73
- @options.precursor_tolu = tolu
74
- end
75
-
76
- @options.email=""
77
- @option_parser.on('--email em', 'User email.') do |em|
78
- @options.email = em
79
- end
37
+ end
80
38
 
81
- @options.username=""
82
- @option_parser.on('--username un', 'Username.') do |un|
83
- @options.username = un
84
- end
39
+ if ( option_support.include? :modifications )
40
+ @options.var_mods = ""
41
+ @option_parser.on('--var-mods vm', 'Variable modifications' ) do |vm|
42
+ @options.var_mods = vm
43
+ end
85
44
 
86
- @options.mascot_server="www.matrixscience.com"
87
- @option_parser.on( '-S', '--server url', 'The url to the cgi directory of the mascot server' ) do |url|
88
- @options.mascot_server=url
89
- end
45
+ @options.fix_mods = ""
46
+ @option_parser.on('--fix-mods fm', 'Fixed modifications' ) do |fm|
47
+ @options.fix_mods = fm
48
+ end
49
+ end
90
50
 
91
- @options.mascot_server=""
92
- @option_parser.on('--username un', 'Username.') do |un|
93
- @options.username = un
94
- end
51
+ if ( option_support.include? :instrument )
52
+ @options.instrument = "ESI-QUAD-TOF"
53
+ @option_parser.on('--instrument instrument', 'Instrument') do |instrument|
54
+ @options.instrument=instrument
55
+ end
56
+ end
95
57
 
96
- @options.num_peaks_for_multi_isotope_search="0"
97
- @option_parser.on("--num-peaks-for-multi-isotope-search np","Number of peaks to include in multi-isotope search") do |np|
98
- @options.num_peaks_for_multi_isotope_search=np
99
- end
58
+ if ( option_support.include? :mass_tolerance_units )
59
+ @options.fragment_tolu="Da"
60
+ @option_parser.on('--fragment-ion-tol-units tolu', 'Fragment ion mass tolerance units (Da or mmu). Default=Da' ) do |tolu|
61
+ @options.fragment_tolu = tolu
62
+ end
63
+
64
+ @options.precursor_tolu="ppm"
65
+ @option_parser.on('--precursor-ion-tol-units tolu', 'Precursor ion mass tolerance units (ppm or Da). Default=ppm' ) do |tolu|
66
+ @options.precursor_tolu = tolu
67
+ end
68
+ end
100
69
 
101
-
102
- end
103
-
104
- if ( option_support[:msms_search]==true)
70
+ if ( option_support.include? :mass_tolerance )
105
71
  @options.fragment_tol=0.65
106
72
  @option_parser.on( '-f', '--fragment-ion-tol tol', 'Fragment ion mass tolerance (unit dependent). Default=0.65' ) do |tol|
107
73
  @options.fragment_tol = tol
@@ -111,49 +77,72 @@ class SearchTool < Tool
111
77
  @option_parser.on( '-p', '--precursor-ion-tol tol', 'Precursor ion mass tolerance in (ppm if precursor search type is monoisotopic or Da if it is average). Default=200' ) do |tol|
112
78
  @options.precursor_tol = tol.to_f
113
79
  end
114
-
115
- @options.respect_precursor_charges=false
116
- @option_parser.on( '-q', '--respect-charges','Dont respect charges in the input file. Instead impute them by trying various options') do
117
- @options.respect_precursor_charges=true
118
- end
119
-
80
+ end
81
+
82
+ if ( option_support.include? :precursor_search_type )
120
83
  @options.precursor_search_type="monoisotopic"
121
84
  @option_parser.on( '-a', '--search-type type', 'Use monoisotopic or average precursor masses. (monoisotopic or average)' ) do |type|
122
85
  @options.precursor_search_type = type
123
86
  end
124
-
87
+ end
88
+
89
+ if ( option_support.include? :strict_monoisotopic_mass )
125
90
  @options.strict_monoisotopic_mass=false
126
91
  @option_parser.on( '-s', '--strict-monoisotopic-mass', 'Dont allow for misassignment of monoisotopic mass to another isotopic peak') do
127
92
  @options.strict_monoisotopic_mass=true
128
93
  end
129
-
94
+ end
95
+
96
+ if ( option_support.include? :missed_cleavages )
130
97
  @options.missed_cleavages=2
131
98
  @option_parser.on( '-v', '--num-missed-cleavages num', 'Number of missed cleavages allowed' ) do |num|
132
99
  @options.missed_cleavages = num
133
100
  end
134
-
135
- @options.carbamidomethyl=true
136
- @option_parser.on( '-c', '--no-carbamidomethyl', 'Run a search without a carbamidomethyl fixed modification' ) do
137
- @options.carbamidomethyl = false
138
- end
101
+ end
139
102
 
140
- @options.methionine_oxidation=false
141
- @option_parser.on( '-m', '--methionine-oxidation', 'Run a search with oxidised methionines as a variable modification' ) do
142
- @options.methionine_oxidation = true
103
+ if ( option_support.include? :respect_precursor_charges )
104
+ @options.respect_precursor_charges=false
105
+ @option_parser.on( '-q', '--respect-charges','Dont respect charges in the input file. Instead impute them by trying various options') do
106
+ @options.respect_precursor_charges=true
143
107
  end
144
-
145
108
  end
146
-
147
- if ( option_support[:glyco]==true)
148
109
 
110
+ if ( option_support.include? :searched_ions )
111
+ @options.searched_ions = ""
112
+ @option_parser.on('--searched-ions si', 'Ion series to search (default=b,y)' ) do |si|
113
+ @options.searched_ions = si
114
+ end
115
+ end
116
+
117
+ if ( option_support.include? :num_peaks_for_multi_isotope_search )
118
+ @options.num_peaks_for_multi_isotope_search="0"
119
+ @option_parser.on("--num-peaks-for-multi-isotope-search np","Number of peaks to include in multi-isotope search") do |np|
120
+ @options.num_peaks_for_multi_isotope_search=np
121
+ end
122
+ end
123
+
124
+ if ( option_support.include? :glyco)
149
125
  @options.glyco = false
150
126
  @option_parser.on( '-g', '--glyco', 'Expect N-Glycosylation modifications as variable mod in a search or as a parameter when building statistical models' ) do
151
127
  @options.glyco = true
152
128
  end
129
+ end
153
130
 
131
+ if ( option_support.include? :methionine_oxidation)
132
+ @options.methionine_oxidation = false
133
+ @option_parser.on( '-m', '--methionineo', 'Expect Oxidised Methionine modifications as variable mod in a search' ) do
134
+ @options.methionine_oxidation = true
135
+ end
136
+ end
137
+
138
+ if ( option_support.include? :carbamidomethyl)
139
+ @options.carbamidomethyl = false
140
+ @option_parser.on( '-c', '--carbamidomethyl', 'Expect Carbamidomethyl C modifications as fixed mod in a search' ) do
141
+ @options.carbamidomethyl = true
142
+ end
154
143
  end
155
144
 
156
- if ( option_support[:maldi]==true)
145
+ if ( option_support.include? :maldi)
157
146
  @options.maldi=false
158
147
  @option_parser.on( '-l', '--maldi', 'Run a search on MALDI data') do
159
148
  @options.maldi=true
@@ -143,8 +143,9 @@ tpp_download_file = download_task tpp_url, tpp_packagefile
143
143
  # Build
144
144
  file tpp_installed_file => [@build_dir,tpp_download_file] do
145
145
  sh %{cp #{@download_dir}/#{tpp_packagefile} #{@build_dir}}
146
- sh %{eval $(perl -I#{perl_dir}/lib/perl5 -Mlocal::lib=#{perl_dir});cpanm --local-lib=#{env.protk_dir}/perl5 XML::Parser}
147
- sh %{eval $(perl -I#{perl_dir}/lib/perl5 -Mlocal::lib=#{perl_dir});cpanm --local-lib=#{env.protk_dir}/perl5 CGI --force}
146
+ use_perl_locallib_cmd="eval $(perl -I#{perl_dir}/lib/perl5 -Mlocal::lib=#{perl_dir})"
147
+ sh %{#{use_perl_locallib_cmd};cpanm --local-lib=#{env.protk_dir}/perl5 XML::Parser}
148
+ sh %{#{use_perl_locallib_cmd};cpanm --local-lib=#{env.protk_dir}/perl5 CGI --force}
148
149
 
149
150
  sh %{cd #{@build_dir};tar -xvzf TPP-#{tpp_version}.tgz}
150
151
 
@@ -172,8 +173,14 @@ file tpp_installed_file => [@build_dir,tpp_download_file] do
172
173
  f.write subs_text
173
174
  end
174
175
  end
175
- sh %{eval $(perl -I#{perl_dir}/lib/perl5 -Mlocal::lib=#{perl_dir});cd #{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src ; make; make install}
176
-
176
+ build_cmd = "#{use_perl_locallib_cmd};cd #{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src ; make"
177
+ install_cmd = "#{use_perl_locallib_cmd};cd #{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src ; make install"
178
+ env.log build_cmd, :info
179
+ sh %{#{build_cmd}}
180
+ env.log "Done Building", :info
181
+ env.log install_cmd, :info
182
+ sh %{#{install_cmd}}
183
+ env.log "Done Installing", :info
177
184
  end
178
185
 
179
186
  task :tpp => tpp_installed_file
@@ -241,7 +248,7 @@ task :blast => blast_installed_file
241
248
  #
242
249
  # MSGFPlus
243
250
  #
244
- msgfplus_version="20130227"
251
+ msgfplus_version="20130410"
245
252
  msgfplus_packagefile="MSGFPlus.#{msgfplus_version}.zip"
246
253
  msgfplus_url="http://proteomics.ucsd.edu/Downloads/MSGFPlus.#{msgfplus_version}.zip"
247
254
  msgfplus_installed_file="#{env.msgfplusjar}"