protk 1.1.2 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -23,6 +23,8 @@ On OSX
23
23
  rvm install 1.9.3 --with-gcc=clang
24
24
  rvm use 1.9.3
25
25
  gem install protk
26
+ protk_setup.rb package_manager
27
+ protk_setup.rb system_packages
26
28
  protk_setup.rb all
27
29
 
28
30
  On Linux
@@ -30,7 +32,7 @@ On Linux
30
32
  rvm install 1.9.3
31
33
  rvm use 1.9.3
32
34
  gem install protk
33
- sudo protk_setup.rb system_dependencies
35
+ sudo protk_setup.rb system_packages
34
36
  protk_setup all
35
37
 
36
38
 
@@ -63,25 +65,19 @@ Although all the protk tools can be run directly from the command-line a nicer w
63
65
  2. Make the protk tools available to galaxy.
64
66
  - Create a directory for galaxy tool dependencies. It's best if this directory is outside the galaxy-dist directory. I usually create a directory called `tool_depends` alongside `galaxy-dist`.
65
67
  - Open the file `universe_wsgi.ini` in the `galaxy-dist` directory and set the configuration option `tool_dependency_dir` to point to the directory you just created
66
- - Create a symbolic link from the protk directory to the appropriate subdirectory of `<tool_dependency_dir>`. In the instructions below substitute 1.0.0 for the version number of [the protk galaxy tools](https://bitbucket.org/iracooke/protk-toolshed "protk galaxy tools") you are using.
68
+ - Create a protkgem directory inside `<tool_dependency_dir>`.
67
69
 
68
70
  cd <tool_dependency_dir>
69
- mkdir protk
70
- cd protk
71
- mkdir 1.0.0
72
- ln -s 1.0.0 default
73
- ln -s <path_where_protk_was_installed> 1.0.0/bin
71
+ mkdir protkgem
72
+ cd protkgem
73
+ mkdir rvm193
74
+ ln -s rvm193 default
75
+ cd default
76
+ ln -s ~/.protk/galaxy/env.sh env.sh
74
77
 
75
- 3. Configure the shell in which galaxy tools will run.
76
- - Create a symlink to the `env.sh` file so it will be sourced by galaxy as it runs each tool. This file should have been autogenerated by `setup.sh`
78
+ 3. Install any of the Proteomics tools that depend on protk from the galaxy toolshed
77
79
 
78
- ln -s <path_where_protk_was_installed>/env.sh 1.0.0/env.sh
79
-
80
- 4. Install the protk galaxy wrapper tools from the galaxy toolshed. You will need to restart galaxy after doing so for the new datatype sniffers to be activated.
81
-
82
- 5. After installing the protk wrapper tools from the toolshed it will be necessary to tell those tools about databases you have installed. Use the manage_db.rb tool to do this. To do this, first edit config.yml to make sure the `galaxy_root` setting points to the root directory of your galaxy installation (this will allow `manage_db.rb` to update the `pepxml_databases.loc` file inside `galaxy_root/tool-data`). The run the following command and then restart the galaxy server;
83
-
84
- manage_db.rb list -G
80
+ 4. After installing the protk wrapper tools from the toolshed it will be necessary to tell those tools about databases you have installed. Use the manage_db.rb tool to do this.
85
81
 
86
82
 
87
83
 
@@ -1,37 +1,32 @@
1
+ #!/usr/bin/env ruby
1
2
  #
2
3
  # This file is part of protk
3
4
  # Created by Ira Cooke 21/3/2012
4
5
  #
5
6
  # A wrapper for the OpenMS FeatureFinder tools (FeatureFinderCentroided and FeatureFinderIsotopeWavelet)
6
- #
7
- #
8
- #!/bin/sh
9
- if [ -z "$PROTK_RUBY_PATH" ] ; then
10
- PROTK_RUBY_PATH=`which ruby`
11
- fi
12
-
13
- eval 'exec "$PROTK_RUBY_PATH" $PROTK_RUBY_FLAGS -rubygems -x -S $0 ${1+"$@"}'
14
- echo "The 'exec \"$PROTK_RUBY_PATH\" -x -S ...' failed!" >&2
15
- exit 1
16
- #! ruby
17
- #
18
7
 
19
- $LOAD_PATH.unshift("#{File.dirname(__FILE__)}/lib/")
8
+ require 'protk/constants'
9
+ require 'protk/command_runner'
10
+ require 'protk/tool'
11
+ require 'protk/openms_defaults'
12
+ require 'libxml'
20
13
 
21
- require 'constants'
22
- require 'command_runner'
23
- require 'tool'
14
+ include LibXML
24
15
 
25
- # Setup specific command-line options for this tool. Other options are inherited from ProphetTool
26
- #
27
- tool=Tool.new({:explicit_output=>true, :background=>true,:over_write=>true})
16
+ tool=Tool.new({:explicit_output=>true, :background=>true,:over_write=>true,:prefix_suffix=>true})
28
17
  tool.option_parser.banner = "Find molecular features on a set of input files.\n\nUsage: feature_finder.rb [options] file1.mzML file2.mzML ..."
29
18
 
30
- tool.options.profile = false
31
- tool.option_parser.on( '--profile',"Input files are profile data" ) do
32
- tool.options.profile = true
19
+ tool.options.intensity_type = "ref"
20
+ tool.option_parser.on( '--intensity-type type',"method used to calculate intensities (ref,trans,corrected). Default = ref. See OpenMS documentation for details" ) do |type|
21
+ tool.options.intensity_type = type
33
22
  end
34
23
 
24
+ tool.options.intensity_threshold = "3"
25
+ tool.option_parser.on( '--intensity-threshold thresh',"discard features below this intensity (Default=3). Set to -1 to retain all detected features" ) do |thresh|
26
+ tool.options.intensity_threshold = thresh
27
+ end
28
+
29
+
35
30
  tool.option_parser.parse!
36
31
 
37
32
  # Obtain a global environment object
@@ -42,32 +37,44 @@ def run_ff(genv,tool,cmd,output_path,jobid)
42
37
  genv.log("Skipping analysis on existing file #{output_path}",:warn)
43
38
  else
44
39
  jobscript_path="#{output_path}.pbs.sh"
45
- job_params={:jobid=>jobid, :vmem=>"12Gb", :queue => "sixteen"}
40
+ job_params={:jobid=>jobid, :vmem=>"14Gb", :queue => "sixteen"}
46
41
  code=tool.run(cmd,genv,job_params,jobscript_path)
47
42
  throw "Command failed with exit code #{code}" unless code==0
48
43
  end
49
44
  end
50
45
 
46
+ def generate_ini(tool,out_path)
47
+ base_ini_file=OpenMSDefaults.new.featurefinderisotopewavelet
48
+ parser = XML::Parser.file(base_ini_file)
49
+ doc = parser.parse
50
+ intensity_threshold_node = doc.find('//ITEM[@name="intensity_threshold"]')[0]
51
+ intensity_type_node = doc.find('//ITEM[@name="intensity_type"]')[0]
52
+ intensity_threshold_node['value']=tool.intensity_threshold
53
+ intensity_type_node['value']=tool.intensity_type
54
+ doc.save(out_path)
55
+ end
51
56
 
52
57
  throw "Cannot use explicit output in combination with multiple input files" if ( tool.explicit_output && ARGV.length>1)
53
- throw "The profile option is not yet implemented" if ( tool.profile )
54
58
 
55
- ini_file="#{File.dirname(__FILE__)}/params/FeatureFinderCentroided.ini"
59
+ ini_file="#{Pathname.new(ARGV[0]).dirname.realpath.to_s}/feature_finder.ini"
60
+
61
+ generate_ini(tool,ini_file)
56
62
 
57
63
  ARGV.each do |filen|
58
64
  input_file=filen.chomp
59
65
  throw "Input must be an mzML file" unless input_file=~/\.mzML$/
60
66
 
61
67
  input_basename=input_file.gsub(/\.mzML$/,'')
62
- output_filename=tool.explicit_output
63
- output_file="#{input_basename}.featureXML" if output_filename==nil
64
-
68
+ output_dir=Pathname.new(input_basename).dirname.realpath.to_s
69
+ output_base=Pathname.new(input_basename).basename.to_s
70
+ output_file = "#{output_dir}/#{tool.output_prefix}#{output_base}#{tool.output_suffix}.featureXML"
71
+
65
72
  if ( tool.over_write || !Pathname.new(output_file).exist? )
66
- output_dir=Pathname.new(output_file).dirname.realpath.to_s
67
73
  output_base_filename=Pathname.new(output_file).basename.to_s
68
74
  cmd=""
69
- cmd<<"#{genv.openms_root}/FeatureFinderCentroided -in #{Pathname.new(input_file).realpath.to_s} -out #{output_dir}/#{output_base_filename} -ini #{ini_file}"
70
-
75
+ cmd<<"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.protk/tools/openms/lib;
76
+ #{genv.featurefinderisotopewavelet} -in #{Pathname.new(input_file).realpath.to_s} -out #{output_dir}/#{output_base_filename} -ini #{ini_file}"
77
+
71
78
  run_ff(genv,tool,cmd,output_file,tool.jobid_from_filename(input_basename))
72
79
 
73
80
  else
data/bin/gffmerge.rb ADDED
@@ -0,0 +1,199 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Original python version created by Max Grant
5
+ # Translated to ruby by Ira Cooke 29/1/2013
6
+ #
7
+ #
8
+
9
+ require 'protk/constants'
10
+ require 'protk/tool'
11
+ require 'protk/fastadb'
12
+ require 'libxml'
13
+ require 'bio'
14
+
15
+ include LibXML
16
+
17
+ tool=Tool.new(:explicit_output=>true)
18
+ tool.option_parser.banner = "Create a gff containing peptide observations.\n\nUsage: gffmerge.rb "
19
+
20
+
21
+ tool.options.gff_predicted=nil
22
+ tool.option_parser.on( '-g filename','--gff filename', 'Predicted Data (GFF3 Format)' ) do |file|
23
+ tool.options.gff_predicted=file
24
+ end
25
+
26
+ tool.options.protxml=nil
27
+ tool.option_parser.on( '-p filename','--protxml filename', 'Observed Data (ProtXML Format)' ) do |file|
28
+ tool.options.protxml=file
29
+ end
30
+
31
+ tool.options.sixframe=nil
32
+ tool.option_parser.on( '-t filename','--sixframe filename', 'Sixframe Translations (Fasta Format)' ) do |file|
33
+ tool.options.sixframe=file
34
+ end
35
+
36
+ tool.options.skip_fasta_indexing=false
37
+ tool.option_parser.on('--skip-index','Don\'t index sixframe translations (Index should already exist)') do
38
+ tool.options.skip_fasta_indexing=true
39
+ end
40
+
41
+ tool.options.peptide_probability_threshold=0.95
42
+ tool.option_parser.on('--threshold prob','Peptide Probability Threshold (Default 0.95)') do |thresh|
43
+ tool.options.peptide_probability_threshold=thresh.to_f
44
+ end
45
+
46
+ # Checking for required options
47
+ begin
48
+ tool.option_parser.parse!
49
+ mandatory = [:protxml,:sixframe]
50
+ missing = mandatory.select{ |param| tool.send(param).nil? }
51
+ if not missing.empty?
52
+ puts "Missing options: #{missing.join(', ')}"
53
+ puts tool.option_parser
54
+ exit
55
+ end
56
+ rescue OptionParser::InvalidOption, OptionParser::MissingArgument
57
+ puts $!.to_s
58
+ puts tool.option_parser
59
+ exit
60
+ end
61
+
62
+ gff_out_file="merged.gff"
63
+ if ( tool.explicit_output != nil)
64
+ gff_out_file=tool.explicit_output
65
+ end
66
+
67
+ gff_db = Bio::GFF.new()
68
+ if ( tool.gff_predicted !=nil)
69
+ p "Reading source gff file"
70
+ gff_db = Bio::GFF::GFF3.new(File.open(tool.gff_predicted))
71
+ # p gff_db.records[1].attributes
72
+ # exit
73
+ end
74
+
75
+ f = open(gff_out_file,'w+')
76
+ gff_db.records.each { |rec|
77
+ f.write(rec.to_s)
78
+ }
79
+
80
+ p "Parsing proteins from protxml"
81
+ protxml_parser=XML::Parser.file(tool.protxml)
82
+ protxml_doc=protxml_parser.parse
83
+ proteins = protxml_doc.find('.//protxml:protein','protxml:http://regis-web.systemsbiology.net/protXML')
84
+
85
+ p "Indexing sixframe translations"
86
+ db_filename = Pathname.new(tool.sixframe).realpath.to_s
87
+
88
+ if tool.skip_fasta_indexing
89
+ orf_lookup = FastaDB.new(db_filename)
90
+ else
91
+ orf_lookup = FastaDB.create(db_filename,db_filename,'prot')
92
+ end
93
+
94
+ p "Aligning peptides and writing GFF data..."
95
+ low_prob = 0
96
+ skipped = 0
97
+ peptide_count = 0
98
+ protein_count = 0
99
+ total_peptides = 0
100
+ for prot in proteins
101
+ prot_prob = prot['probability']
102
+ indis_proteins = prot.find('protxml:indistinguishable_protein','protxml:http://regis-web.systemsbiology.net/protXML')
103
+ prot_names = [prot['protein_name']]
104
+ for protein in indis_proteins
105
+ prot_names += [protein['protein_name']]
106
+ end
107
+
108
+ peptides = prot.find('protxml:peptide','protxml:http://regis-web.systemsbiology.net/protXML')
109
+
110
+ for protein_name in prot_names
111
+ protein_count += 1
112
+ prot_qualifiers = {"source" => "OBSERVATION", "score" => prot_prob, "ID" => 'pr' + protein_count.to_s}
113
+ begin
114
+ p "Looking up #{protein_name}"
115
+ orf = orf_lookup.get_by_id protein_name
116
+ if ( orf == nil)
117
+ raise KeyError
118
+ end
119
+
120
+
121
+ position = orf.identifiers.description.split('|').collect { |pos| pos.to_i }
122
+
123
+ if ( position.length != 2 )
124
+ raise EncodingError
125
+ end
126
+ orf_name = orf.entry_id.scan(/lcl\|(.*)/)[0][0]
127
+ frame=orf_name.scan(/frame_(\d)/)[0][0]
128
+ scaffold_name = orf_name.scan(/(scaffold_\d+)/)[0][0]
129
+
130
+ # strand = frame > 3 ? -1 : 1
131
+ strand = +1
132
+
133
+ prot_id = "pr#{protein_count.to_s}"
134
+ prot_attributes = [["ID",prot_id]]
135
+ prot_gff_line = Bio::GFF::GFF3::Record.new(seqid = scaffold_name,source="OBSERVATION",feature_type="protein",
136
+ start_position=position[0],end_position=position[1],score=prot_prob,strand=strand,frame=frame,attributes=prot_attributes)
137
+ gff_db.records += [prot_gff_line]
138
+
139
+ prot_seq = orf.aaseq.to_s
140
+ throw "Not amino_acids" if prot_seq != orf.seq.to_s
141
+
142
+
143
+ for peptide in peptides
144
+ pprob = peptide['nsp_adjusted_probability'].to_f
145
+ if ( pprob >= tool.peptide_probability_threshold )
146
+ total_peptides += 1
147
+ pep_seq = peptide['peptide_sequence']
148
+
149
+
150
+ start_indexes = [0]
151
+ prot_seq.scan /#{pep_seq}/ do |match|
152
+ start_indexes << prot_seq.index(match,start_indexes.last)
153
+ end
154
+ start_indexes.delete_at(0)
155
+
156
+ # Now convert peptide coordinate to genome coordinates
157
+ # And create gff lines for each match
158
+ start_indexes.collect do |si|
159
+ pep_genomic_start = position[0] + 3*si
160
+ pep_genomic_end = pep_genomic_start + 3*pep_seq.length
161
+ peptide_count+=1
162
+ pep_attributes = [["ID","p#{peptide_count.to_s}"],["Parent",prot_id]]
163
+ pep_gff_line = Bio::GFF::GFF3::Record.new(seqid = scaffold_name,source="OBSERVATION",
164
+ feature_type="peptide",start_position=pep_genomic_start,end_position=pep_genomic_end,score=pprob,
165
+ strand=strand,frame=frame,attributes=pep_attributes)
166
+ gff_db.records += [pep_gff_line]
167
+ # p pep_gff_line
168
+
169
+ end
170
+
171
+
172
+ end
173
+ end
174
+
175
+ rescue KeyError,EncodingError
176
+ skipped+=0
177
+ p "Lookup failed for #{protein_name}"
178
+ end
179
+
180
+ # p orf_name
181
+ # p prot_gff_line
182
+ # exit
183
+ end
184
+
185
+ end
186
+
187
+ f = open(gff_out_file,'w+')
188
+ gff_db.records.each { |rec|
189
+ f.write(rec.to_s)
190
+ }
191
+ f.close
192
+
193
+ p "Finished."
194
+ p "Proteins: #{protein_count}"
195
+ p "Skipped Decoys: #{skipped}"
196
+ p "Total Peptides: #{total_peptides}"
197
+ p "Peptides Written: #{total_peptides - low_prob}"
198
+ p "Peptides Culled: #{low_prob}"
199
+ exit(0)
data/bin/protk_setup.rb CHANGED
@@ -16,16 +16,38 @@ require 'pp'
16
16
  # Setup specific command-line options for this tool. Other options are inherited from Tool
17
17
  #
18
18
  tool=SetupTool.new
19
- if ( tool.option_parser.banner=="")
20
- tool.option_parser.banner = "Post install tasks for protk.\nUsage: protk_setup.rb [options] toolname"
21
- end
19
+ tool.option_parser.banner = "Post install tasks for protk.\nUsage: protk_setup.rb [options] toolname"
22
20
 
23
21
  tool.option_parser.parse!
24
22
 
23
+ if ( ARGV.length < 1)
24
+ p "You must supply a setup task [all,system_packages]"
25
+ p tool.option_parser
26
+ exit
27
+ end
28
+
29
+ # Checking for required options
30
+ # begin
31
+ # tool.option_parser.parse!
32
+ # mandatory = [:gff_predicted, :protxml,:sixframe]
33
+ # missing = mandatory.select{ |param| tool.send(param).nil? }
34
+ # if not missing.empty?
35
+ # puts "Missing options: #{missing.join(', ')}"
36
+ # puts tool.option_parser
37
+ # exit
38
+ # end
39
+ # rescue OptionParser::InvalidOption, OptionParser::MissingArgument
40
+ # puts $!.to_s
41
+ # puts tool.option_parser
42
+ # exit
43
+ # end
44
+
45
+
25
46
  # Create install directory if it doesn't already exist
26
47
  #
27
48
  env=Constants.new
28
49
 
29
50
  ARGV.each do |toolname|
30
- tool.install toolname
51
+ p toolname
52
+ tool.install toolname
31
53
  end
data/bin/sixframe.rb ADDED
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Original python version created by Max Grant
5
+ # Translated to ruby by Ira Cooke 7/2/2013
6
+ #
7
+ #
8
+
9
+ require 'protk/constants'
10
+ require 'protk/tool'
11
+ require 'bio'
12
+
13
+ tool=Tool.new(:explicit_output=>true)
14
+ tool.option_parser.banner = "Create a sixframe translation of a genome.\n\nUsage: sixframe.rb [options] genome.fasta"
15
+
16
+ tool.option_parser.parse!
17
+
18
+ inname=ARGV.shift
19
+
20
+ outfile=File.open("#{inname}.translated.fasta",'w')
21
+ if ( tool.explicit_output != nil)
22
+ outfile=File.open(tool.explicit_output,'w')
23
+ end
24
+
25
+
26
+ file = Bio::FastaFormat.open(inname)
27
+
28
+ file.each do |entry|
29
+ length = entry.naseq.length
30
+ (1...7).each do |frame|
31
+ translated_seq= entry.naseq.translate(frame)
32
+ orfs=translated_seq.split("*")
33
+ orf_index = 0
34
+ position = ((frame - 1) % 3) + 1
35
+
36
+ oi=0
37
+ orfs.each do |orf|
38
+ oi+=1
39
+ if ( orf.length > 20 )
40
+ position_start = position
41
+ position_end = position_start + orf.length*3 -1
42
+
43
+ if ( frame > 3)
44
+ position_start = length - (position - 1)
45
+ position_end = position_start - orf.length * 3 + 1
46
+ end
47
+
48
+
49
+ # Create accession compliant with NCBI naming standard
50
+ # See http://www.ncbi.nlm.nih.gov/books/NBK7183/?rendertype=table&id=ch_demo.T5
51
+ ncbi_scaffold_id = entry.entry_id.gsub('|','_').gsub(' ','_')
52
+ ncbi_accession = "lcl|#{ncbi_scaffold_id}_frame_#{frame}_orf_#{oi}"
53
+
54
+ # Output in fasta format
55
+ outfile.write(">#{ncbi_accession} #{position_start}|#{position_end}\n#{orf}\n")
56
+
57
+ end
58
+ position += orf.length*3+3
59
+ end
60
+
61
+ end
62
+ end
@@ -0,0 +1,73 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Created by Ira Cooke 30/01/13
5
+ #
6
+ # A wrapper for the OpenMS tool ExecutePipeline.
7
+ # Executes simple toppas pipelines, automatically creating the trf file.
8
+
9
+ require 'protk/constants'
10
+ require 'protk/command_runner'
11
+ require 'protk/tool'
12
+ require 'protk/openms_defaults'
13
+ require 'tempfile'
14
+ require 'libxml'
15
+
16
+ include LibXML
17
+
18
+ tool=Tool.new({:explicit_output=>false, :background=>true,:over_write=>false})
19
+ tool.option_parser.banner = "Execute a toppas pipeline with a single inputs node\n\nUsage: toppas_pipeline.rb [options] input1 input2 ..."
20
+
21
+ tool.options.outdir = ""
22
+ tool.option_parser.on( '--outdir dir',"save outputs to dir" ) do |dir|
23
+ tool.options.outdir = dir
24
+ end
25
+
26
+ tool.options.toppas_file = ""
27
+ tool.option_parser.on( '--toppas-file f',"the toppas file to run" ) do |file|
28
+ tool.options.toppas_file = file
29
+ end
30
+
31
+ tool.option_parser.parse!
32
+
33
+ # Obtain a global environment object
34
+ genv=Constants.new
35
+
36
+ def run_pipeline(genv,tool,cmd,output_path,jobid)
37
+ jobscript_path="#{output_path}.pbs.sh"
38
+ job_params={:jobid=>jobid, :vmem=>"14Gb", :queue => "sixteen"}
39
+ code=tool.run(cmd,genv,job_params,jobscript_path)
40
+ throw "Command failed with exit code #{code}" unless code==0
41
+ end
42
+
43
+ def generate_trf(input_files,out_path)
44
+ p OpenMSDefaults.new.trf_path
45
+ parser=XML::Parser.file(OpenMSDefaults.new.trf_path)
46
+ doc=parser.parse
47
+ itemlist_node=doc.find('/PARAMETERS/NODE/ITEMLIST')[0]
48
+
49
+ input_files.each do |f|
50
+
51
+ mnode=XML::Node.new('LISTITEM')
52
+ mnode["value"]="file://#{Pathname.new(f).realpath.to_s}"
53
+
54
+ itemlist_node << mnode
55
+ end
56
+ p out_path
57
+ doc.save(out_path)
58
+ end
59
+
60
+ throw "outdir is a required parameter" if tool.outdir==""
61
+ throw "toppas-file is a required parameter" if tool.toppas_file==""
62
+ throw "outdir must exist" unless Dir.exist?(tool.outdir)
63
+
64
+ trf_path = "#{tool.toppas_file}.trf"
65
+
66
+ generate_trf(ARGV,trf_path)
67
+
68
+ cmd=""
69
+ cmd<<"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.protk/tools/openms/lib;
70
+ #{genv.executepipeline} -in #{Pathname.new(tool.toppas_file).realpath.to_s} -out_dir #{Pathname.new(tool.outdir).realpath.to_s} -resource_file #{Pathname.new(trf_path).realpath.to_s}"
71
+
72
+ run_pipeline(genv,tool,cmd,tool.outdir,tool.jobid_from_filename(tool.toppas_file))
73
+
@@ -121,15 +121,6 @@ class Constants
121
121
  def omssa2pepxml
122
122
  return "#{self.omssa_root}/omssa2pepXML"
123
123
  end
124
-
125
- def openms_root
126
- path=@env['openms_root']
127
- if ( path =~ /^\// )
128
- return path
129
- else
130
- return "#{@protk_dir}/#{@env['openms_root']}"
131
- end
132
- end
133
124
 
134
125
  def msgfplus_root
135
126
  path=@env['msgfplus_root']
@@ -161,6 +152,22 @@ class Constants
161
152
  return "#{self.pwiz_root}/msconvert"
162
153
  end
163
154
 
155
+ def openms_root
156
+ path=@env['openms_root']
157
+ if ( path =~ /^\//)
158
+ return path
159
+ else
160
+ return "#{@protk_dir}/#{@env['openms_root']}"
161
+ end
162
+ end
163
+
164
+ def featurefinderisotopewavelet
165
+ return "#{self.openms_root}/bin/FeatureFinderIsotopeWavelet"
166
+ end
167
+
168
+ def executepipeline
169
+ return "#{self.openms_root}/bin/ExecutePipeline"
170
+ end
164
171
 
165
172
  def protein_database_root
166
173
  path=@env['protein_database_root']
@@ -187,6 +194,10 @@ class Constants
187
194
  def makeblastdb
188
195
  return "#{self.blast_root}/bin/makeblastdb"
189
196
  end
197
+
198
+ def searchblastdb
199
+ return "#{self.blast_root}/bin/blastdbcmd"
200
+ end
190
201
 
191
202
  def log_file
192
203
  path=@env['log_file']
@@ -209,10 +220,18 @@ class Constants
209
220
  default_config_yml = YAML.load_file "#{File.dirname(__FILE__)}/data/default_config.yml"
210
221
  throw "Unable to read the config file at #{File.dirname(__FILE__)}/data/default_config.yml" unless default_config_yml!=nil
211
222
 
212
- @env=default_config_yml
223
+ user_config_yml = nil
224
+ user_config_yml = YAML.load_file "#{@protk_dir}/config.yml" if File.exist? "#{@protk_dir}/config.yml"
225
+ if ( user_config_yml !=nil )
226
+ @env = default_config_yml.merge user_config_yml
227
+ else
228
+ @env=default_config_yml
229
+ end
230
+
213
231
  throw "No data found in config file" unless @env!=nil
214
232
  @info_level=default_config_yml['message_level']
215
233
 
234
+
216
235
 
217
236
  end
218
237
 
@@ -0,0 +1,7 @@
1
+ <?xml version="1.0" encoding="ISO-8859-1"?>
2
+ <PARAMETERS version="1.3" xsi:noNamespaceSchemaLocation="http://open-ms.sourceforge.net/schemas/Param_1_3.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
+ <NODE name="1" description="">
4
+ <ITEMLIST name="url_list" type="string" description="">
5
+ </ITEMLIST>
6
+ </NODE>
7
+ </PARAMETERS>
@@ -0,0 +1,26 @@
1
+ <?xml version="1.0" encoding="ISO-8859-1"?>
2
+ <PARAMETERS version="1.3" xsi:noNamespaceSchemaLocation="http://open-ms.sourceforge.net/schemas/Param_1_3.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
+ <NODE name="FeatureFinderIsotopeWavelet" description="Detects two-dimensional features in LC-MS data.">
4
+ <ITEM name="version" value="1.9.0" type="string" description="Version of the tool that generated this parameters file." tags="advanced" />
5
+ <NODE name="1" description="Instance &apos;1&apos; section for &apos;FeatureFinderIsotopeWavelet&apos;">
6
+ <ITEM name="in" value="" type="string" description="input file" tags="input file,required" restrictions="*.mzML" />
7
+ <ITEM name="out" value="" type="string" description="output file" tags="output file,required" restrictions="*.featureXML" />
8
+ <ITEM name="log" value="" type="string" description="Name of log file (created only when specified)" tags="advanced" />
9
+ <ITEM name="debug" value="0" type="int" description="Sets the debug level" tags="advanced" />
10
+ <ITEM name="threads" value="1" type="int" description="Sets the number of threads allowed to be used by the TOPP tool" />
11
+ <ITEM name="no_progress" value="false" type="string" description="Disables progress logging to command line" tags="advanced" restrictions="true,false" />
12
+ <ITEM name="test" value="false" type="string" description="Enables the test mode (needed for internal use only)" tags="advanced" restrictions="true,false" />
13
+ <NODE name="algorithm" description="Algorithm section">
14
+ <ITEM name="max_charge" value="3" type="int" description="The maximal charge state to be considered." restrictions="1:" />
15
+ <ITEM name="intensity_threshold" value="3" type="float" description="The final threshold t&apos; is build upon the formula: t&apos; = av+t*sd, where t is the intensity_threshold, av the average intensity within the wavelet transformed signal and sd the standard deviation of the transform. If you set intensity_threshold=-1, t&apos; will be zero.#br#As the &apos;optimal&apos; value for this parameter is highly data dependent, we would recommend to start with -1, which will also extract features with very low signal-to-noise ratio. Subsequently, one might increase the threshold to find an optimized trade-off between false positives and true positives. Depending on the dynamic range of your spectra, suitable value ranges include: -1, [0:10], and if your data features even very high intensity values, t can also adopt values up to around 30. Please note that this parameter is not of an integer type, s.t. you can also use t:=0.1, e.g." />
16
+ <ITEM name="intensity_type" value="ref" type="string" description="Determines the intensity type returned for the identified features. &apos;ref&apos; (default) returns the sum of the intensities of each isotopic peak within an isotope pattern. &apos;trans&apos; refers to the intensity of the monoisotopic peak within the wavelet transform. &apos;corrected&apos; refers also to the transformed intensity with an attempt to remove the effects of the convolution. While the latter ones might be preferable for qualitative analyses, &apos;ref&apos; might be the best option to obtain quantitative results. Please note that intensity values might be spoiled (in particular for the option &apos;ref&apos;), as soon as patterns overlap (see also the explanations given in the class documentation of FeatureFinderAlgorihtmIsotopeWavelet)." tags="advanced" restrictions="ref,trans,corrected" />
17
+ <ITEM name="check_ppm" value="true" type="string" description="Enables/disables a ppm test vs. the averagine model, i.e. potential peptide masses are checked for plausibility. In addition, a heuristic correcting potential mass shifts induced by the wavelet is applied." tags="advanced" restrictions="true,false" />
18
+ <ITEM name="hr_data" value="false" type="string" description="Must be true in case of high-resolution data, i.e. for spectra featuring large m/z-gaps (present in FTICR and Orbitrap data, e.g.). Please check a single MS scan out of your recording, if you are unsure." restrictions="true,false" />
19
+ <NODE name="sweep_line" description="">
20
+ <ITEM name="rt_votes_cutoff" value="5" type="int" description="Defines the minimum number of subsequent scans where a pattern must occur to be considered as a feature." tags="advanced" restrictions="0:" />
21
+ <ITEM name="rt_interleave" value="1" type="int" description="Defines the maximum number of scans (w.r.t. rt_votes_cutoff) where an expected pattern is missing. There is usually no reason to change the default value." tags="advanced" restrictions="0:" />
22
+ </NODE>
23
+ </NODE>
24
+ </NODE>
25
+ </NODE>
26
+ </PARAMETERS>
@@ -4,7 +4,13 @@
4
4
 
5
5
  common:
6
6
  - wget
7
+ - cpanm
8
+ - libxml2
7
9
  - gd
8
10
  - libpng12
9
- - cpanm
10
- - libxml2
11
+
12
+ openms:
13
+ - autoconf
14
+ - automake
15
+ - libtool
16
+ - cmake
@@ -0,0 +1,17 @@
1
+ temp_file=`mktemp /tmp/protkXXX`
2
+ export temp_file
3
+
4
+ bash << %%%
5
+
6
+ [[ -s "$HOME/.rvm/scripts/rvm" ]] && source "$HOME/.rvm/scripts/rvm"
7
+
8
+
9
+ rvm 1.9.3
10
+
11
+ export | grep 'declare -x' | sed 's/declare -x/export/g' > $temp_file
12
+
13
+ %%%
14
+
15
+ . $temp_file
16
+
17
+ rm $temp_file
@@ -0,0 +1,48 @@
1
+ require 'protk/constants'
2
+ require 'bio'
3
+
4
+ #
5
+ # Warning: Uses Bio::Command which is a private API of the Bio package
6
+ #
7
+
8
+ class FastaDB
9
+
10
+ def initialize(blast_database_file_path)
11
+ env = Constants.new
12
+ @database = blast_database_file_path
13
+ @makedbcmd = env.makeblastdb
14
+ @searchdbcmd = env.searchblastdb
15
+ end
16
+
17
+ def self.create(blast_database_file_path,input_fasta_filepath,type='nucl')
18
+ db = FastaDB.new(blast_database_file_path)
19
+ db.make_index(input_fasta_filepath,type)
20
+ db
21
+ end
22
+
23
+ def get_by_id(entry_id)
24
+ fetch(entry_id).shift
25
+ end
26
+
27
+ def make_index(input_fasta,dbtype)
28
+ cmd = [ @makedbcmd, '-in', input_fasta, '-parse_seqids','-out',@database,'-dbtype',dbtype]
29
+ res = Bio::Command.call_command(cmd) do |io|
30
+ puts io.read
31
+ end
32
+ end
33
+
34
+ def fetch(list)
35
+ if list.respond_to?(:join)
36
+ entry_id = list.join(",")
37
+ else
38
+ entry_id = list
39
+ end
40
+
41
+ cmd = [ @searchdbcmd, '-db', @database, '-entry', entry_id ]
42
+ Bio::Command.call_command(cmd) do |io|
43
+ io.close_write
44
+ Bio::FlatFile.new(Bio::FastaFormat, io).to_a
45
+ end
46
+ end
47
+
48
+ end
@@ -1,3 +1,5 @@
1
+ $VERBOSE=nil
2
+
1
3
  require 'pathname'
2
4
 
3
5
  class GalaxyStager
@@ -0,0 +1,11 @@
1
+ require 'libxml'
2
+ include LibXML
3
+
4
+ class OpenMSDefaults
5
+ attr :featurefinderisotopewavelet
6
+ attr :trf_path
7
+ def initialize
8
+ @featurefinderisotopewavelet="#{File.dirname(__FILE__)}/data/FeatureFinderIsotopeWavelet.ini"
9
+ @trf_path = "#{File.dirname(__FILE__)}/data/ExecutePipeline.trf"
10
+ end
11
+ end
@@ -22,7 +22,7 @@ end
22
22
 
23
23
  def supports_package_manager name
24
24
  res = %x[which #{name}]
25
- (res == "")
25
+ (res != "")
26
26
  end
27
27
 
28
28
  def clean_build_dir
@@ -53,9 +53,10 @@ task :package_manager do
53
53
  end
54
54
 
55
55
  if needs_homebrew
56
- sh { "ruby -e \"$(curl -fsSkL raw.github.com/mxcl/homebrew/go)" }
57
- sh { "brew update"}
58
- sh { "brew tap homebrew/versions"}
56
+ puts "Installing Homebrew"
57
+ sh %{ ruby -e \"$(curl -fsSkL raw.github.com/mxcl/homebrew/go)\" }
58
+ sh %{ brew update}
59
+ sh %{ brew tap homebrew/versions}
59
60
  end
60
61
 
61
62
  end
@@ -265,7 +266,7 @@ end
265
266
 
266
267
  def platform_bunzip
267
268
  if RbConfig::CONFIG['host_os'] =~ /darwin/
268
- return 'pbunzip2'
269
+ return 'bunzip2'
269
270
  end
270
271
  'bunzip2'
271
272
  end
@@ -287,6 +288,50 @@ end
287
288
 
288
289
  task :pwiz => pwiz_installed_file
289
290
 
290
- task :all => [:tpp,:omssa,:blast,:msgfplus,:pwiz]
291
+ #
292
+ # openms
293
+ #
294
+
295
+ def platform_cmake_args
296
+ if RbConfig::CONFIG['host_os'] =~ /darwin/
297
+ return '-D CMAKE_CXX_COMPILER=/usr/bin/g++ -D CMAKE_C_COMPILER=/usr/bin/gcc '
298
+ end
299
+ ''
300
+ end
301
+
302
+ openms_version="1.9.0"
303
+ openms_packagefile="OpenMS-#{openms_version}.tar.gz"
304
+ openms_url="https://dl.dropbox.com/u/226794/#{openms_packagefile}"
305
+ openms_installed_file="#{env.featurefinderisotopewavelet}"
306
+
307
+ download_task openms_url, openms_packagefile
308
+
309
+ file openms_installed_file => [@build_dir,"#{@download_dir}/#{openms_packagefile}"] do
310
+ sh %{cp #{@download_dir}/#{openms_packagefile} #{@build_dir}}
311
+ sh %{cd #{@build_dir}; gunzip -f #{openms_packagefile}}
312
+ sh %{cd #{@build_dir}; tar -xvf #{openms_packagefile.chomp('.gz')}}
313
+ sh %{mkdir -p #{env.openms_root}}
314
+ sh %{cd #{@build_dir}/OpenMS-#{openms_version}/contrib; cmake #{platform_cmake_args} .}
315
+ sh %{cd #{@build_dir}/OpenMS-#{openms_version}; cmake -D INSTALL_PREFIX=#{env.openms_root} .}
316
+ sh %{cd #{@build_dir}/OpenMS-#{openms_version}; make install}
317
+ end
318
+
319
+ task :openms => openms_installed_file
320
+
321
+ #
322
+ # Galaxy Environment
323
+ #
324
+
325
+ protk_galaxy_envfile = "#{env.protk_dir}/galaxy/env.sh"
326
+
327
+ file protk_galaxy_envfile do
328
+ sh %{mkdir -p #{env.protk_dir}/galaxy}
329
+ this_dir=File.dirname(__FILE__)
330
+ sh %{cp #{this_dir}/data/galaxyenv.sh #{protk_galaxy_envfile}}
331
+ end
332
+
333
+ task :galaxy => protk_galaxy_envfile
334
+
335
+ task :all => [:tpp,:omssa,:blast,:msgfplus,:pwiz,:openms,:galaxy]
291
336
 
292
337
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: protk
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.2
4
+ version: 1.1.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-19 00:00:00.000000000 Z
12
+ date: 2013-01-29 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ftools
@@ -166,6 +166,10 @@ executables:
166
166
  - unimod_to_loc.rb
167
167
  - generate_omssa_loc.rb
168
168
  - uniprot_mapper.rb
169
+ - feature_finder.rb
170
+ - toppas_pipeline.rb
171
+ - gffmerge.rb
172
+ - sixframe.rb
169
173
  extensions:
170
174
  - ext/protk/extconf.rb
171
175
  extra_rdoc_files: []
@@ -178,11 +182,13 @@ files:
178
182
  - lib/protk/convert_util.rb
179
183
  - lib/protk/data/make_uniprot_table.rb
180
184
  - lib/protk/eupathdb_gene_information_table.rb
185
+ - lib/protk/fastadb.rb
181
186
  - lib/protk/galaxy_stager.rb
182
187
  - lib/protk/galaxy_util.rb
183
188
  - lib/protk/manage_db_tool.rb
184
189
  - lib/protk/mascot_util.rb
185
190
  - lib/protk/omssa_util.rb
191
+ - lib/protk/openms_defaults.rb
186
192
  - lib/protk/pepxml.rb
187
193
  - lib/protk/plasmodb.rb
188
194
  - lib/protk/prophet_tool.rb
@@ -207,6 +213,7 @@ files:
207
213
  - bin/feature_finder.rb
208
214
  - bin/file_convert.rb
209
215
  - bin/generate_omssa_loc.rb
216
+ - bin/gffmerge.rb
210
217
  - bin/interprophet.rb
211
218
  - bin/libra.rb
212
219
  - bin/make_decoy.rb
@@ -220,8 +227,10 @@ files:
220
227
  - bin/protein_prophet.rb
221
228
  - bin/protk_setup.rb
222
229
  - bin/repair_run_summary.rb
230
+ - bin/sixframe.rb
223
231
  - bin/tandem_search.rb
224
232
  - bin/template_search.rb
233
+ - bin/toppas_pipeline.rb
225
234
  - bin/unimod_to_loc.rb
226
235
  - bin/uniprot_mapper.rb
227
236
  - bin/xls_to_table.rb
@@ -230,7 +239,10 @@ files:
230
239
  - lib/protk/data/apt-get_packages.yaml
231
240
  - lib/protk/data/brew_packages.yaml
232
241
  - lib/protk/data/default_config.yml
242
+ - lib/protk/data/ExecutePipeline.trf
233
243
  - lib/protk/data/FeatureFinderCentroided.ini
244
+ - lib/protk/data/FeatureFinderIsotopeWavelet.ini
245
+ - lib/protk/data/galaxyenv.sh
234
246
  - lib/protk/data/predefined_db.crap.yaml
235
247
  - lib/protk/data/predefined_db.sphuman.yaml
236
248
  - lib/protk/data/predefined_db.swissprot_annotation.yaml