protk 1.1.0.pre
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +85 -0
- data/bin/annotate_ids.rb +59 -0
- data/bin/big_search.rb +41 -0
- data/bin/correct_omssa_retention_times.rb +27 -0
- data/bin/feature_finder.rb +76 -0
- data/bin/file_convert.rb +157 -0
- data/bin/generate_omssa_loc.rb +42 -0
- data/bin/interprophet.rb +91 -0
- data/bin/make_decoy.rb +64 -0
- data/bin/manage_db.rb +123 -0
- data/bin/mascot_search.rb +187 -0
- data/bin/mascot_to_pepxml.rb +44 -0
- data/bin/msgfplus_search.rb +191 -0
- data/bin/omssa_search.rb +205 -0
- data/bin/peptide_prophet.rb +245 -0
- data/bin/pepxml_to_table.rb +78 -0
- data/bin/protein_prophet.rb +140 -0
- data/bin/protk_setup.rb +31 -0
- data/bin/repair_run_summary.rb +113 -0
- data/bin/tandem_search.rb +292 -0
- data/bin/template_search.rb +144 -0
- data/bin/unimod_to_loc.rb +118 -0
- data/bin/xls_to_table.rb +46 -0
- data/ext/protk/extconf.rb +3 -0
- data/ext/protk/protk.c +235 -0
- data/lib/protk/big_search_rakefile.rake +16 -0
- data/lib/protk/big_search_tool.rb +23 -0
- data/lib/protk/bio_sptr_extensions.rb +210 -0
- data/lib/protk/biotools_excel_converter.rb +60 -0
- data/lib/protk/command_runner.rb +84 -0
- data/lib/protk/constants.rb +296 -0
- data/lib/protk/data/FeatureFinderCentroided.ini +63 -0
- data/lib/protk/data/apt-get_packages.yaml +47 -0
- data/lib/protk/data/brew_packages.yaml +10 -0
- data/lib/protk/data/default_config.yml +20 -0
- data/lib/protk/data/predefined_db.crap.yaml +19 -0
- data/lib/protk/data/predefined_db.sphuman.yaml +25 -0
- data/lib/protk/data/predefined_db.swissprot_annotation.yaml +20 -0
- data/lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml +20 -0
- data/lib/protk/data/tandem_params.xml +56 -0
- data/lib/protk/data/taxonomy_template.xml +9 -0
- data/lib/protk/data/unimod.xml +16780 -0
- data/lib/protk/eupathdb_gene_information_table.rb +158 -0
- data/lib/protk/galaxy_stager.rb +24 -0
- data/lib/protk/galaxy_util.rb +9 -0
- data/lib/protk/manage_db_rakefile.rake +484 -0
- data/lib/protk/manage_db_tool.rb +181 -0
- data/lib/protk/mascot_util.rb +63 -0
- data/lib/protk/omssa_util.rb +57 -0
- data/lib/protk/plasmodb.rb +50 -0
- data/lib/protk/prophet_tool.rb +85 -0
- data/lib/protk/protein_annotator.rb +646 -0
- data/lib/protk/protxml.rb +137 -0
- data/lib/protk/randomize.rb +7 -0
- data/lib/protk/search_tool.rb +182 -0
- data/lib/protk/setup_rakefile.rake +245 -0
- data/lib/protk/setup_tool.rb +19 -0
- data/lib/protk/spreadsheet_extensions.rb +78 -0
- data/lib/protk/swissprot_database.rb +38 -0
- data/lib/protk/tool.rb +182 -0
- data/lib/protk/xtandem_defaults.rb +11 -0
- data/lib/protk.rb +18 -0
- metadata +256 -0
@@ -0,0 +1,137 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rexml/document'
|
3
|
+
require 'rexml/xpath'
|
4
|
+
|
5
|
+
|
6
|
+
class ProtXML
|
7
|
+
|
8
|
+
attr_accessor :groups
|
9
|
+
|
10
|
+
|
11
|
+
def indistinguishable_proteins_from_protein(protein_element)
|
12
|
+
iprots=[]
|
13
|
+
REXML::XPath.each(protein_element,"./indistinguishable_protein") do |ipel|
|
14
|
+
ipel_attributes={}
|
15
|
+
ipel.attributes.each_attribute { |att| ipel_attributes[att.expanded_name.to_sym]=att.value }
|
16
|
+
iprots.push(ipel_attributes[:protein_name])
|
17
|
+
end
|
18
|
+
iprots
|
19
|
+
end
|
20
|
+
|
21
|
+
def peptides_from_protein(protein_element)
|
22
|
+
peptides=[]
|
23
|
+
REXML::XPath.each(protein_element,"./peptide") do |pel|
|
24
|
+
peptide={}
|
25
|
+
|
26
|
+
pel.attributes.each_attribute { |att| peptide[att.expanded_name.to_sym]=att.value }
|
27
|
+
modifications=pel.get_elements("./modification_info")
|
28
|
+
mods=modifications.collect {|mp| mp.attribute("modified_peptide").value }
|
29
|
+
peptide[:modifications] = mods
|
30
|
+
peptides.push(peptide)
|
31
|
+
end
|
32
|
+
peptides
|
33
|
+
end
|
34
|
+
|
35
|
+
def proteins_from_group(group_element)
|
36
|
+
proteins=[]
|
37
|
+
REXML::XPath.each(group_element,"./protein") do |pel|
|
38
|
+
protein={}
|
39
|
+
pel.attributes.each_attribute { |att| protein[att.expanded_name.to_sym]=att.value }
|
40
|
+
protein[:peptides]=peptides_from_protein(pel)
|
41
|
+
protein[:indistinguishable_prots]=indistinguishable_proteins_from_protein(pel)
|
42
|
+
proteins.push(protein)
|
43
|
+
end
|
44
|
+
proteins
|
45
|
+
end
|
46
|
+
|
47
|
+
def init_groups
|
48
|
+
@groups=[]
|
49
|
+
REXML::XPath.each(@doc.root,"//protein_group") do |gel|
|
50
|
+
group={}
|
51
|
+
group[:group_probability]=gel.attributes["probability"].to_f
|
52
|
+
group[:proteins]=proteins_from_group(gel)
|
53
|
+
groups.push group
|
54
|
+
end
|
55
|
+
@groups
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
def initialize(file_name)
|
60
|
+
@doc=REXML::Document.new(File.new(file_name))
|
61
|
+
@groups=self.init_groups
|
62
|
+
end
|
63
|
+
|
64
|
+
def peptide_sequences_from_protein(prot)
|
65
|
+
peptides=prot[:peptides]
|
66
|
+
sequences=[]
|
67
|
+
peptides.each do |pep|
|
68
|
+
if ( pep[:modifications].length > 0 )
|
69
|
+
pep[:modifications].each {|pmod|
|
70
|
+
sequences.push(pmod) }
|
71
|
+
else
|
72
|
+
sequences.push(pep[:peptide_sequence])
|
73
|
+
end
|
74
|
+
end
|
75
|
+
sequences
|
76
|
+
end
|
77
|
+
|
78
|
+
def protein_to_row(prot)
|
79
|
+
protein_row=[]
|
80
|
+
protein_row.push(prot[:protein_name])
|
81
|
+
protein_row.push(prot[:probability])
|
82
|
+
|
83
|
+
indistinct=prot[:indistinguishable_prots]
|
84
|
+
indist_string="#{prot[:protein_name]};"
|
85
|
+
indistinct.each { |pr| indist_string<<"#{pr};"}
|
86
|
+
indist_string.chop!
|
87
|
+
protein_row.push(indist_string)
|
88
|
+
|
89
|
+
protein_row.push(prot[:peptides].length)
|
90
|
+
|
91
|
+
peptide_string=""
|
92
|
+
peptide_sequences_from_protein(prot).each {|pep| peptide_string<<"#{pep};" }
|
93
|
+
peptide_string.chop!
|
94
|
+
|
95
|
+
protein_row.push(peptide_string)
|
96
|
+
protein_row
|
97
|
+
end
|
98
|
+
|
99
|
+
# Convert the entire prot.xml document to row format
|
100
|
+
# Returns an array of arrays. Each of the sub-arrays is a row.
|
101
|
+
# Each row should contain a simple summary of the protein.
|
102
|
+
# A separate row should be provided for every protein (including indistinguishable ones)
|
103
|
+
# The first row will be the header
|
104
|
+
#
|
105
|
+
# Proteins with probabilities below a threshold are excluded
|
106
|
+
#
|
107
|
+
def as_rows(threshold_probability)
|
108
|
+
|
109
|
+
rows=[]
|
110
|
+
rows.push(["Accession","Probability","Indistinguishable Proteins","Num Peptides","Peptides"])
|
111
|
+
|
112
|
+
proteins=[]
|
113
|
+
@groups.each do |grp|
|
114
|
+
grp[:proteins].each {|prot|
|
115
|
+
if ( prot[:probability].to_f >= threshold_probability)
|
116
|
+
proteins.push(prot)
|
117
|
+
end
|
118
|
+
}
|
119
|
+
end
|
120
|
+
|
121
|
+
proteins.each do |prot|
|
122
|
+
protein_row=protein_to_row(prot)
|
123
|
+
rows.push(protein_row)
|
124
|
+
|
125
|
+
indistinguishables=prot[:indistinguishable_prots]
|
126
|
+
indistinguishables.each do |indist|
|
127
|
+
indist_row=protein_row.clone
|
128
|
+
indist_row[0]=indist
|
129
|
+
rows.push(indist_row)
|
130
|
+
end
|
131
|
+
|
132
|
+
end
|
133
|
+
|
134
|
+
rows
|
135
|
+
end
|
136
|
+
|
137
|
+
end
|
@@ -0,0 +1,182 @@
|
|
1
|
+
#
|
2
|
+
# This file is part of protk
|
3
|
+
# Created by Ira Cooke 15/12/2010
|
4
|
+
#
|
5
|
+
# Provides common functionality used by all msms search tools.
|
6
|
+
#
|
7
|
+
# It allows;
|
8
|
+
# 1. Specification of the search database using a simple name ... this class provides the necessary search for the actual file
|
9
|
+
# 2. Output files to be specified via a prefix or suffix to be added to the name of the corresponding input file
|
10
|
+
#
|
11
|
+
|
12
|
+
require 'optparse'
|
13
|
+
require 'pathname'
|
14
|
+
require 'protk/tool'
|
15
|
+
|
16
|
+
class SearchTool < Tool
|
17
|
+
|
18
|
+
# Initializes commandline options common to all search tools.
|
19
|
+
# Individual search tools can add their own options, but should use Capital letters to avoid conflicts
|
20
|
+
#
|
21
|
+
def initialize(option_support={})
|
22
|
+
super(option_support)
|
23
|
+
|
24
|
+
if (option_support[:database]==true)
|
25
|
+
|
26
|
+
@options.database = "sphuman"
|
27
|
+
@option_parser.on( '-d', '--database dbname', 'Specify the database to use for this search. Default=sphuman' ) do |dbname|
|
28
|
+
options.database = dbname
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
if ( option_support[:msms_search_detailed_options]==true)
|
34
|
+
@options.allowed_charges="1+,2+,3+"
|
35
|
+
@option_parser.on( '--allowed-charges ac', 'Allowed precursor ion charges. Default=1+,2+,3+' ) do |ac|
|
36
|
+
@options.allowed_charges = ac
|
37
|
+
end
|
38
|
+
|
39
|
+
@options.enzyme = "Trypsin"
|
40
|
+
@option_parser.on('--enzyme enz', 'Enzyme') do |enz|
|
41
|
+
@options.enzyme=enz
|
42
|
+
end
|
43
|
+
|
44
|
+
@options.instrument = "ESI-QUAD-TOF"
|
45
|
+
@option_parser.on('--instrument instrument', 'Instrument') do |instrument|
|
46
|
+
@options.instrument=instrument
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
@options.var_mods = ""
|
51
|
+
@option_parser.on('--var-mods vm', 'Variable modifications (Overrides -g)' ) do |vm|
|
52
|
+
@options.var_mods = vm
|
53
|
+
end
|
54
|
+
|
55
|
+
@options.fix_mods = ""
|
56
|
+
@option_parser.on('--fix-mods fm', 'Fixed modifications (Overrides -c and -m options)' ) do |fm|
|
57
|
+
@options.fix_mods = fm
|
58
|
+
end
|
59
|
+
|
60
|
+
@options.searched_ions = ""
|
61
|
+
@option_parser.on('--searched-ions si', 'Ion series to search (default=b,y)' ) do |si|
|
62
|
+
@options.searched_ions = si
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
@options.fragment_tolu="Da"
|
67
|
+
@option_parser.on('--fragment-ion-tol-units tolu', 'Fragment ion mass tolerance units (Da or mmu). Default=Da' ) do |tolu|
|
68
|
+
@options.fragment_tolu = tolu
|
69
|
+
end
|
70
|
+
|
71
|
+
@options.precursor_tolu="ppm"
|
72
|
+
@option_parser.on('--precursor-ion-tol-units tolu', 'Precursor ion mass tolerance units (ppm or Da). Default=ppm' ) do |tolu|
|
73
|
+
@options.precursor_tolu = tolu
|
74
|
+
end
|
75
|
+
|
76
|
+
@options.email=""
|
77
|
+
@option_parser.on('--email em', 'User email.') do |em|
|
78
|
+
@options.email = em
|
79
|
+
end
|
80
|
+
|
81
|
+
@options.username=""
|
82
|
+
@option_parser.on('--username un', 'Username.') do |un|
|
83
|
+
@options.username = un
|
84
|
+
end
|
85
|
+
|
86
|
+
@options.mascot_server="www.matrixscience.com"
|
87
|
+
@option_parser.on( '-S', '--server url', 'The url to the cgi directory of the mascot server' ) do |url|
|
88
|
+
@options.mascot_server=url
|
89
|
+
end
|
90
|
+
|
91
|
+
@options.mascot_server=""
|
92
|
+
@option_parser.on('--username un', 'Username.') do |un|
|
93
|
+
@options.username = un
|
94
|
+
end
|
95
|
+
|
96
|
+
@options.num_peaks_for_multi_isotope_search="0"
|
97
|
+
@option_parser.on("--num-peaks-for-multi-isotope-search np","Number of peaks to include in multi-isotope search") do |np|
|
98
|
+
@options.num_peaks_for_multi_isotope_search=np
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
end
|
103
|
+
|
104
|
+
if ( option_support[:msms_search]==true)
|
105
|
+
@options.fragment_tol=0.65
|
106
|
+
@option_parser.on( '-f', '--fragment-ion-tol tol', 'Fragment ion mass tolerance (unit dependent). Default=0.65' ) do |tol|
|
107
|
+
@options.fragment_tol = tol
|
108
|
+
end
|
109
|
+
|
110
|
+
@options.precursor_tol=200
|
111
|
+
@option_parser.on( '-p', '--precursor-ion-tol tol', 'Precursor ion mass tolerance in (ppm if precursor search type is monoisotopic or Da if it is average). Default=200' ) do |tol|
|
112
|
+
@options.precursor_tol = tol.to_f
|
113
|
+
end
|
114
|
+
|
115
|
+
@options.respect_precursor_charges=false
|
116
|
+
@option_parser.on( '-q', '--respect-charges','Dont respect charges in the input file. Instead impute them by trying various options') do
|
117
|
+
@options.respect_precursor_charges=true
|
118
|
+
end
|
119
|
+
|
120
|
+
@options.precursor_search_type="monoisotopic"
|
121
|
+
@option_parser.on( '-a', '--search-type type', 'Use monoisotopic or average precursor masses. (monoisotopic or average)' ) do |type|
|
122
|
+
@options.precursor_search_type = type
|
123
|
+
end
|
124
|
+
|
125
|
+
@options.strict_monoisotopic_mass=false
|
126
|
+
@option_parser.on( '-s', '--strict-monoisotopic-mass', 'Dont allow for misassignment of monoisotopic mass to another isotopic peak') do
|
127
|
+
@options.strict_monoisotopic_mass=true
|
128
|
+
end
|
129
|
+
|
130
|
+
@options.missed_cleavages=2
|
131
|
+
@option_parser.on( '-v', '--num-missed-cleavages num', 'Number of missed cleavages allowed' ) do |num|
|
132
|
+
@options.missed_cleavages = num
|
133
|
+
end
|
134
|
+
|
135
|
+
@options.carbamidomethyl=true
|
136
|
+
@option_parser.on( '-c', '--no-carbamidomethyl', 'Run a search without a carbamidomethyl fixed modification' ) do
|
137
|
+
@options.carbamidomethyl = false
|
138
|
+
end
|
139
|
+
|
140
|
+
@options.methionine_oxidation=false
|
141
|
+
@option_parser.on( '-m', '--methionine-oxidation', 'Run a search with oxidised methionines as a variable modification' ) do
|
142
|
+
@options.methionine_oxidation = true
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
if ( option_support[:glyco]==true)
|
148
|
+
|
149
|
+
@options.glyco = false
|
150
|
+
@option_parser.on( '-g', '--glyco', 'Expect N-Glycosylation modifications as variable mod in a search or as a parameter when building statistical models' ) do
|
151
|
+
@options.glyco = true
|
152
|
+
end
|
153
|
+
|
154
|
+
end
|
155
|
+
|
156
|
+
if ( option_support[:maldi]==true)
|
157
|
+
@options.maldi=false
|
158
|
+
@option_parser.on( '-l', '--maldi', 'Run a search on MALDI data') do
|
159
|
+
@options.maldi=true
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
end
|
164
|
+
|
165
|
+
|
166
|
+
def jobid_from_filename(filename)
|
167
|
+
jobid="protk"
|
168
|
+
jobnum_match=filename.match(/(.{1,10})\.d/)
|
169
|
+
if (jobnum_match!=nil)
|
170
|
+
jobid="#{self.jobid_prefix}#{jobnum_match[1]}"
|
171
|
+
end
|
172
|
+
return jobid
|
173
|
+
end
|
174
|
+
|
175
|
+
# Based on the database setting and global database path, find the most current version of the required database
|
176
|
+
# This function returns the name of the database with an extension appropriate to the database type
|
177
|
+
#
|
178
|
+
def current_database(db_type,db=@options.database)
|
179
|
+
return Constants.new.current_database_for_name(db)
|
180
|
+
end
|
181
|
+
|
182
|
+
end
|
@@ -0,0 +1,245 @@
|
|
1
|
+
|
2
|
+
require 'protk/constants.rb'
|
3
|
+
require 'rbconfig'
|
4
|
+
|
5
|
+
env=Constants.new
|
6
|
+
|
7
|
+
@build_dir = "#{env.protk_dir}/tmp/build"
|
8
|
+
@download_dir = "#{env.protk_dir}/tmp/download"
|
9
|
+
|
10
|
+
directory @build_dir
|
11
|
+
directory @download_dir
|
12
|
+
|
13
|
+
def package_manager_name
|
14
|
+
if RbConfig::CONFIG['host_os'] =~ /darwin/
|
15
|
+
return 'brew'
|
16
|
+
end
|
17
|
+
'apt-get'
|
18
|
+
end
|
19
|
+
|
20
|
+
def clean_build_dir
|
21
|
+
sh %{cd #{@build_dir}; rm -rf ./*}
|
22
|
+
end
|
23
|
+
|
24
|
+
def download_buildfile url, file
|
25
|
+
sh %{cd #{@download_dir}; wget #{url}}
|
26
|
+
end
|
27
|
+
|
28
|
+
def download_task url, packagefile
|
29
|
+
file "#{@download_dir}/#{packagefile}" => @download_dir do
|
30
|
+
download_buildfile "#{url}", "#{packagefile}"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
#
|
35
|
+
# Package manager
|
36
|
+
#
|
37
|
+
task :package_manager do
|
38
|
+
pmname = package_manager_name
|
39
|
+
needs_homebrew=false
|
40
|
+
sh "which #{pmname}" do |ok,res|
|
41
|
+
unless ok
|
42
|
+
throw "Missing package manager #{pmname}" unless pmname=='brew'
|
43
|
+
needs_homebrew=true
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
if needs_homebrew
|
48
|
+
sh { "ruby -e \"$(curl -fsSkL raw.github.com/mxcl/homebrew/go)" }
|
49
|
+
sh { "brew update"}
|
50
|
+
sh { "brew tap homebrew/versions"}
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
#
|
56
|
+
# System packages
|
57
|
+
#
|
58
|
+
task :system_packages => :package_manager do
|
59
|
+
# Gather package requirements
|
60
|
+
pkgs=YAML::load(File.open("#{File.dirname(__FILE__)}/data/#{package_manager_name}_packages.yaml"))
|
61
|
+
|
62
|
+
unique_packages=[]
|
63
|
+
apps=[]
|
64
|
+
installed_packages=[]
|
65
|
+
for pk in pkgs
|
66
|
+
unique_packages = pk[1] | unique_packages
|
67
|
+
apps = apps.push pk[0]
|
68
|
+
end
|
69
|
+
|
70
|
+
# Install all packages
|
71
|
+
#
|
72
|
+
unique_packages.each { |pk|
|
73
|
+
sh "#{package_manager_name} install #{pk}" do |ok,res|
|
74
|
+
p res
|
75
|
+
installed_packages.push pk if ok
|
76
|
+
end
|
77
|
+
}
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
#
|
82
|
+
# Perl local::lib
|
83
|
+
#
|
84
|
+
perl_locallib_version="1.008004"
|
85
|
+
perl_locallib_packagefile="local-lib-#{perl_locallib_version}.tar.gz"
|
86
|
+
perl_locallib_installed_file = "#{env.protk_dir}/perl5/lib/perl5/local/lib.pm"
|
87
|
+
perl_locallib_url = "http://search.cpan.org/CPAN/authors/id/A/AP/APEIRON/local-lib-#{perl_locallib_version}.tar.gz"
|
88
|
+
|
89
|
+
download_task perl_locallib_url, perl_locallib_packagefile
|
90
|
+
|
91
|
+
file perl_locallib_installed_file => [@build_dir,"#{@download_dir}/#{perl_locallib_packagefile}"] do
|
92
|
+
sh %{cp #{@download_dir}/#{perl_locallib_packagefile} #{@build_dir}}
|
93
|
+
perl_dir = "#{env.protk_dir}/perl5"
|
94
|
+
|
95
|
+
sh %{cd #{@build_dir}; gunzip local-lib-#{perl_locallib_version}.tar.gz }
|
96
|
+
sh %{cd #{@build_dir}; tar -xf local-lib-#{perl_locallib_version}.tar }
|
97
|
+
sh "cd #{@build_dir}/local-lib-#{perl_locallib_version}; perl Makefile.PL --bootstrap=#{perl_dir}; make install" do |ok,res|
|
98
|
+
# clean_build_dir if ok
|
99
|
+
end
|
100
|
+
|
101
|
+
if !Pathname.new("~/.bashrc").exist? || File.read("~/.bashrc") =~ /Mlocal::lib/
|
102
|
+
sh "echo 'eval $(perl -I#{perl_dir}/lib/perl5 -Mlocal::lib=#{perl_dir})' >>~/.bashrc"
|
103
|
+
end
|
104
|
+
|
105
|
+
if !Pathname.new("~/.bash_profile").exist? || File.read("~/.bash_profile") =~ /Mlocal::lib/
|
106
|
+
sh "echo 'eval $(perl -I#{perl_dir}/lib/perl5 -Mlocal::lib=#{perl_dir})' >>~/.bash_profile"
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
task :perl_locallib => [perl_locallib_installed_file]
|
111
|
+
|
112
|
+
|
113
|
+
#
|
114
|
+
# Top Level Packages.
|
115
|
+
#
|
116
|
+
|
117
|
+
|
118
|
+
#
|
119
|
+
# TPP
|
120
|
+
#
|
121
|
+
tpp_version="4.6.1"
|
122
|
+
tpp_packagefile="TPP-#{tpp_version}.tgz"
|
123
|
+
tpp_installed_file = "#{env.xinteract}"
|
124
|
+
tpp_url = "https://dl.dropbox.com/u/226794/TPP-4.6.1.tgz"
|
125
|
+
|
126
|
+
download_task tpp_url, tpp_packagefile
|
127
|
+
|
128
|
+
# Build
|
129
|
+
file tpp_installed_file => [:perl_locallib,@build_dir,"#{@download_dir}/#{tpp_packagefile}"] do
|
130
|
+
sh %{cp #{@download_dir}/#{tpp_packagefile} #{@build_dir}}
|
131
|
+
sh %{cpanm --local-lib=#{env.protk_dir}/perl5 XML::Parser}
|
132
|
+
sh %{cpanm --local-lib=#{env.protk_dir}/perl5 XML::CGI --force}
|
133
|
+
|
134
|
+
sh %{cd #{@build_dir};tar -xvzf TPP-#{tpp_version}.tgz}
|
135
|
+
|
136
|
+
File.open("#{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src/Makefile.config.incl","wb") do |f|
|
137
|
+
f.write "TPP_ROOT=#{env.tpp_root}/\nTPP_WEB=/tpp/\nXSLT_PROC=/usr/bin/xsltproc\nCGI_USERS_DIR=${TPP_ROOT}cgi-bin/"
|
138
|
+
end
|
139
|
+
|
140
|
+
makefile_incl_path="#{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src/Makefile.incl"
|
141
|
+
makefile_incl_text=File.read("#{makefile_incl_path}")
|
142
|
+
|
143
|
+
# Homebrew specific modifications to makefiles
|
144
|
+
#
|
145
|
+
if ( package_manager_name=='brew')
|
146
|
+
File.open("#{makefile_incl_path}","w+") do |f|
|
147
|
+
subs_text = makefile_incl_text.gsub(/GD_LIB= \/opt\/local\/lib\/libgd.a \/opt\/local\/lib\/libpng.a/,"GD_LIB= /usr/local/lib/libgd.a /usr/local/opt/libpng12/lib/libpng.a") #We're using homebrew not fink or macports
|
148
|
+
subs_text = subs_text.gsub(/GD_INCL= -I \/opt\/local\/include\//,"GD_INCL= -I /usr/local/include/ -I /usr/local/opt/libpng12/include")
|
149
|
+
f.write subs_text
|
150
|
+
end
|
151
|
+
|
152
|
+
makefile_path="#{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/CGI/Makefile"
|
153
|
+
makefile_text = File.read("#{makefile_path}")
|
154
|
+
|
155
|
+
File.open("#{makefile_path}","w+") do |f|
|
156
|
+
subs_text = makefile_text.gsub("cp -rfu","cp -rf")
|
157
|
+
f.write subs_text
|
158
|
+
end
|
159
|
+
end
|
160
|
+
sh %{cd #{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src ; make; make install}
|
161
|
+
|
162
|
+
end
|
163
|
+
|
164
|
+
task :tpp => tpp_installed_file
|
165
|
+
|
166
|
+
|
167
|
+
|
168
|
+
#
|
169
|
+
# omssa
|
170
|
+
#
|
171
|
+
def omssa_platform
|
172
|
+
if RbConfig::CONFIG['host_os'] =~ /darwin/
|
173
|
+
return 'macos'
|
174
|
+
end
|
175
|
+
'linux'
|
176
|
+
end
|
177
|
+
|
178
|
+
omssa_packagefile="omssa-#{omssa_platform}.tar.gz"
|
179
|
+
omssa_installed_file = "#{env.omssacl}"
|
180
|
+
omssa_url = "ftp://ftp.ncbi.nih.gov/pub/lewisg/omssa/CURRENT/omssa-#{omssa_platform}.tar.gz"
|
181
|
+
|
182
|
+
download_task omssa_url, omssa_packagefile
|
183
|
+
|
184
|
+
# Install
|
185
|
+
file omssa_installed_file => [@build_dir,"#{@download_dir}/omssa-#{omssa_platform}.tar.gz"] do
|
186
|
+
sh %{cp #{@download_dir}/#{omssa_packagefile} #{@build_dir}}
|
187
|
+
sh %{cd #{@build_dir}; gunzip omssa-#{omssa_platform}.tar.gz}
|
188
|
+
sh %{cd #{@build_dir}; tar -xvf omssa-#{omssa_platform}.tar}
|
189
|
+
sh %{mkdir -p #{env.omssa_root}}
|
190
|
+
sh %{cd #{@build_dir}; cp -r omssa-*.#{omssa_platform}/* #{env.omssa_root}/}
|
191
|
+
end
|
192
|
+
|
193
|
+
task :omssa => omssa_installed_file
|
194
|
+
|
195
|
+
|
196
|
+
|
197
|
+
#
|
198
|
+
# blast
|
199
|
+
#
|
200
|
+
def blast_platform
|
201
|
+
if RbConfig::CONFIG['host_os'] =~ /darwin/
|
202
|
+
return 'universal-macosx'
|
203
|
+
end
|
204
|
+
'x64-linux'
|
205
|
+
end
|
206
|
+
|
207
|
+
blast_version="2.2.27+"
|
208
|
+
blast_packagefile="ncbi-blast-#{blast_version}-#{blast_platform}.tar.gz"
|
209
|
+
blast_url="ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/#{blast_version.chomp('+')}/#{blast_packagefile}"
|
210
|
+
blast_installed_file="#{env.makeblastdb}"
|
211
|
+
|
212
|
+
download_task blast_url, blast_packagefile
|
213
|
+
|
214
|
+
# Install
|
215
|
+
file blast_installed_file => [@build_dir,"#{@download_dir}/#{blast_packagefile}"] do
|
216
|
+
sh %{cp #{@download_dir}/#{blast_packagefile} #{@build_dir}}
|
217
|
+
sh %{cd #{@build_dir}; gunzip #{blast_packagefile}}
|
218
|
+
sh %{cd #{@build_dir}; tar -xvf #{blast_packagefile.chomp('.gz')}}
|
219
|
+
sh %{mkdir -p #{env.blast_root}}
|
220
|
+
sh %{cd #{@build_dir}; cp -r ncbi-blast-#{blast_version}/* #{env.blast_root}/}
|
221
|
+
end
|
222
|
+
|
223
|
+
task :blast => blast_installed_file
|
224
|
+
|
225
|
+
|
226
|
+
#
|
227
|
+
# MSGFPlus
|
228
|
+
#
|
229
|
+
msgfplus_version="20120823"
|
230
|
+
msgfplus_packagefile="MSGFPlus.20120823.zip"
|
231
|
+
msgfplus_url="http://proteomics.ucsd.edu/Downloads/MSGFPlus.20120823.zip"
|
232
|
+
msgfplus_installed_file="#{env.msgfplusjar}"
|
233
|
+
|
234
|
+
download_task msgfplus_url, msgfplus_packagefile
|
235
|
+
|
236
|
+
file msgfplus_installed_file => [@build_dir,"#{@download_dir}/#{msgfplus_packagefile}"] do
|
237
|
+
sh %{cp #{@download_dir}/#{msgfplus_packagefile} #{@build_dir}}
|
238
|
+
sh %{cd #{@build_dir}; unzip #{msgfplus_packagefile}}
|
239
|
+
sh %{mkdir -p #{env.msgfplus_root}}
|
240
|
+
sh %{cd #{@build_dir}; cp MSGFPlus.jar #{env.msgfplus_root}/}
|
241
|
+
end
|
242
|
+
|
243
|
+
task :msgfplus => msgfplus_installed_file
|
244
|
+
|
245
|
+
task :all => [:tpp,:omssa,:blast]
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
require 'pathname'
|
3
|
+
require 'protk/tool'
|
4
|
+
|
5
|
+
require 'rake'
|
6
|
+
require 'pp'
|
7
|
+
|
8
|
+
include Rake::DSL
|
9
|
+
|
10
|
+
Rake.application.init 'protk_setup'
|
11
|
+
Rake.application.rake_require 'protk/setup_rakefile'
|
12
|
+
|
13
|
+
class SetupTool < Tool
|
14
|
+
|
15
|
+
def install toolname
|
16
|
+
Rake.application.invoke_task toolname
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# Add a method to the Spreadsheet::Worksheet class to insert a column
|
2
|
+
class Spreadsheet::Worksheet < Object
|
3
|
+
def insert_column(col,index)
|
4
|
+
# First check to see if the length of the column equals the number of rows
|
5
|
+
if ( col.length!=self.rows.length && self.rows.length!=0)
|
6
|
+
raise "The length of column #{col.length} does not equal the number of rows #{self.rows.length}"
|
7
|
+
end
|
8
|
+
if ( col.class!=Array || index.class!=Fixnum)
|
9
|
+
raise "Wrong arguments. Requires a column array and an integer index"
|
10
|
+
end
|
11
|
+
|
12
|
+
# Check for special case where there are no rows yet and if so then insert as new rows
|
13
|
+
if ( self.rows.length==0)
|
14
|
+
col.each_index { |i|
|
15
|
+
self.insert_row(i,[col[i]])
|
16
|
+
}
|
17
|
+
else
|
18
|
+
# Insert the column row by row. Probably inefficient but it works
|
19
|
+
rowi=0
|
20
|
+
self.each {|row|
|
21
|
+
row.insert(index,col[rowi])
|
22
|
+
rowi+=1
|
23
|
+
}
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class Spreadsheet::Workbook < Object
|
29
|
+
|
30
|
+
|
31
|
+
# creates an output excel file (returning the workbook object), transcribing all original content up to the given number of rows
|
32
|
+
# Throws an error if the input contains more than 1 worksheet
|
33
|
+
#
|
34
|
+
def copyBook(numrows=0)
|
35
|
+
|
36
|
+
if ( !numrows )
|
37
|
+
numrows=0
|
38
|
+
end
|
39
|
+
|
40
|
+
# Create a new workbook from scratch for writing
|
41
|
+
outputBook = Spreadsheet::Workbook.new
|
42
|
+
outputSheet = outputBook.create_worksheet
|
43
|
+
|
44
|
+
# There should only be one worksheet in the input workbook
|
45
|
+
worksheets=self.worksheets
|
46
|
+
if ( self.worksheets.length != 1 )
|
47
|
+
puts "More than one worksheet in this excel file. This script only operates on single worksheets"
|
48
|
+
end
|
49
|
+
|
50
|
+
# Get the worksheet
|
51
|
+
inputSheet=self.worksheet 0
|
52
|
+
|
53
|
+
# Figure out how many rows to convert if not specified
|
54
|
+
if ( numrows==0 || numrows > (inputSheet.row_count+1))
|
55
|
+
numrows=inputSheet.row_count
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
# Transcribe everything from the old worksheet to the new one
|
60
|
+
puts "Creating new spreadsheet with #{numrows} rows"
|
61
|
+
(0...[numrows,inputSheet.row_count].min).each { |r|
|
62
|
+
|
63
|
+
outputSheet.insert_row(r,inputSheet.row(r))
|
64
|
+
|
65
|
+
newRow=outputSheet.row(r)
|
66
|
+
|
67
|
+
# After inserting the row make sure it doesn't contain any nil values
|
68
|
+
newRow.each_index { |ci|
|
69
|
+
if ( newRow[ci]==nil)
|
70
|
+
newRow[ci]=""
|
71
|
+
end
|
72
|
+
}
|
73
|
+
}
|
74
|
+
outputBook
|
75
|
+
end
|
76
|
+
|
77
|
+
|
78
|
+
end
|