protk 1.1.0.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +85 -0
- data/bin/annotate_ids.rb +59 -0
- data/bin/big_search.rb +41 -0
- data/bin/correct_omssa_retention_times.rb +27 -0
- data/bin/feature_finder.rb +76 -0
- data/bin/file_convert.rb +157 -0
- data/bin/generate_omssa_loc.rb +42 -0
- data/bin/interprophet.rb +91 -0
- data/bin/make_decoy.rb +64 -0
- data/bin/manage_db.rb +123 -0
- data/bin/mascot_search.rb +187 -0
- data/bin/mascot_to_pepxml.rb +44 -0
- data/bin/msgfplus_search.rb +191 -0
- data/bin/omssa_search.rb +205 -0
- data/bin/peptide_prophet.rb +245 -0
- data/bin/pepxml_to_table.rb +78 -0
- data/bin/protein_prophet.rb +140 -0
- data/bin/protk_setup.rb +31 -0
- data/bin/repair_run_summary.rb +113 -0
- data/bin/tandem_search.rb +292 -0
- data/bin/template_search.rb +144 -0
- data/bin/unimod_to_loc.rb +118 -0
- data/bin/xls_to_table.rb +46 -0
- data/ext/protk/extconf.rb +3 -0
- data/ext/protk/protk.c +235 -0
- data/lib/protk/big_search_rakefile.rake +16 -0
- data/lib/protk/big_search_tool.rb +23 -0
- data/lib/protk/bio_sptr_extensions.rb +210 -0
- data/lib/protk/biotools_excel_converter.rb +60 -0
- data/lib/protk/command_runner.rb +84 -0
- data/lib/protk/constants.rb +296 -0
- data/lib/protk/data/FeatureFinderCentroided.ini +63 -0
- data/lib/protk/data/apt-get_packages.yaml +47 -0
- data/lib/protk/data/brew_packages.yaml +10 -0
- data/lib/protk/data/default_config.yml +20 -0
- data/lib/protk/data/predefined_db.crap.yaml +19 -0
- data/lib/protk/data/predefined_db.sphuman.yaml +25 -0
- data/lib/protk/data/predefined_db.swissprot_annotation.yaml +20 -0
- data/lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml +20 -0
- data/lib/protk/data/tandem_params.xml +56 -0
- data/lib/protk/data/taxonomy_template.xml +9 -0
- data/lib/protk/data/unimod.xml +16780 -0
- data/lib/protk/eupathdb_gene_information_table.rb +158 -0
- data/lib/protk/galaxy_stager.rb +24 -0
- data/lib/protk/galaxy_util.rb +9 -0
- data/lib/protk/manage_db_rakefile.rake +484 -0
- data/lib/protk/manage_db_tool.rb +181 -0
- data/lib/protk/mascot_util.rb +63 -0
- data/lib/protk/omssa_util.rb +57 -0
- data/lib/protk/plasmodb.rb +50 -0
- data/lib/protk/prophet_tool.rb +85 -0
- data/lib/protk/protein_annotator.rb +646 -0
- data/lib/protk/protxml.rb +137 -0
- data/lib/protk/randomize.rb +7 -0
- data/lib/protk/search_tool.rb +182 -0
- data/lib/protk/setup_rakefile.rake +245 -0
- data/lib/protk/setup_tool.rb +19 -0
- data/lib/protk/spreadsheet_extensions.rb +78 -0
- data/lib/protk/swissprot_database.rb +38 -0
- data/lib/protk/tool.rb +182 -0
- data/lib/protk/xtandem_defaults.rb +11 -0
- data/lib/protk.rb +18 -0
- metadata +256 -0
@@ -0,0 +1,137 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rexml/document'
|
3
|
+
require 'rexml/xpath'
|
4
|
+
|
5
|
+
|
6
|
+
class ProtXML
|
7
|
+
|
8
|
+
attr_accessor :groups
|
9
|
+
|
10
|
+
|
11
|
+
def indistinguishable_proteins_from_protein(protein_element)
|
12
|
+
iprots=[]
|
13
|
+
REXML::XPath.each(protein_element,"./indistinguishable_protein") do |ipel|
|
14
|
+
ipel_attributes={}
|
15
|
+
ipel.attributes.each_attribute { |att| ipel_attributes[att.expanded_name.to_sym]=att.value }
|
16
|
+
iprots.push(ipel_attributes[:protein_name])
|
17
|
+
end
|
18
|
+
iprots
|
19
|
+
end
|
20
|
+
|
21
|
+
def peptides_from_protein(protein_element)
|
22
|
+
peptides=[]
|
23
|
+
REXML::XPath.each(protein_element,"./peptide") do |pel|
|
24
|
+
peptide={}
|
25
|
+
|
26
|
+
pel.attributes.each_attribute { |att| peptide[att.expanded_name.to_sym]=att.value }
|
27
|
+
modifications=pel.get_elements("./modification_info")
|
28
|
+
mods=modifications.collect {|mp| mp.attribute("modified_peptide").value }
|
29
|
+
peptide[:modifications] = mods
|
30
|
+
peptides.push(peptide)
|
31
|
+
end
|
32
|
+
peptides
|
33
|
+
end
|
34
|
+
|
35
|
+
def proteins_from_group(group_element)
|
36
|
+
proteins=[]
|
37
|
+
REXML::XPath.each(group_element,"./protein") do |pel|
|
38
|
+
protein={}
|
39
|
+
pel.attributes.each_attribute { |att| protein[att.expanded_name.to_sym]=att.value }
|
40
|
+
protein[:peptides]=peptides_from_protein(pel)
|
41
|
+
protein[:indistinguishable_prots]=indistinguishable_proteins_from_protein(pel)
|
42
|
+
proteins.push(protein)
|
43
|
+
end
|
44
|
+
proteins
|
45
|
+
end
|
46
|
+
|
47
|
+
def init_groups
|
48
|
+
@groups=[]
|
49
|
+
REXML::XPath.each(@doc.root,"//protein_group") do |gel|
|
50
|
+
group={}
|
51
|
+
group[:group_probability]=gel.attributes["probability"].to_f
|
52
|
+
group[:proteins]=proteins_from_group(gel)
|
53
|
+
groups.push group
|
54
|
+
end
|
55
|
+
@groups
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
def initialize(file_name)
|
60
|
+
@doc=REXML::Document.new(File.new(file_name))
|
61
|
+
@groups=self.init_groups
|
62
|
+
end
|
63
|
+
|
64
|
+
def peptide_sequences_from_protein(prot)
|
65
|
+
peptides=prot[:peptides]
|
66
|
+
sequences=[]
|
67
|
+
peptides.each do |pep|
|
68
|
+
if ( pep[:modifications].length > 0 )
|
69
|
+
pep[:modifications].each {|pmod|
|
70
|
+
sequences.push(pmod) }
|
71
|
+
else
|
72
|
+
sequences.push(pep[:peptide_sequence])
|
73
|
+
end
|
74
|
+
end
|
75
|
+
sequences
|
76
|
+
end
|
77
|
+
|
78
|
+
def protein_to_row(prot)
|
79
|
+
protein_row=[]
|
80
|
+
protein_row.push(prot[:protein_name])
|
81
|
+
protein_row.push(prot[:probability])
|
82
|
+
|
83
|
+
indistinct=prot[:indistinguishable_prots]
|
84
|
+
indist_string="#{prot[:protein_name]};"
|
85
|
+
indistinct.each { |pr| indist_string<<"#{pr};"}
|
86
|
+
indist_string.chop!
|
87
|
+
protein_row.push(indist_string)
|
88
|
+
|
89
|
+
protein_row.push(prot[:peptides].length)
|
90
|
+
|
91
|
+
peptide_string=""
|
92
|
+
peptide_sequences_from_protein(prot).each {|pep| peptide_string<<"#{pep};" }
|
93
|
+
peptide_string.chop!
|
94
|
+
|
95
|
+
protein_row.push(peptide_string)
|
96
|
+
protein_row
|
97
|
+
end
|
98
|
+
|
99
|
+
# Convert the entire prot.xml document to row format
|
100
|
+
# Returns an array of arrays. Each of the sub-arrays is a row.
|
101
|
+
# Each row should contain a simple summary of the protein.
|
102
|
+
# A separate row should be provided for every protein (including indistinguishable ones)
|
103
|
+
# The first row will be the header
|
104
|
+
#
|
105
|
+
# Proteins with probabilities below a threshold are excluded
|
106
|
+
#
|
107
|
+
def as_rows(threshold_probability)
|
108
|
+
|
109
|
+
rows=[]
|
110
|
+
rows.push(["Accession","Probability","Indistinguishable Proteins","Num Peptides","Peptides"])
|
111
|
+
|
112
|
+
proteins=[]
|
113
|
+
@groups.each do |grp|
|
114
|
+
grp[:proteins].each {|prot|
|
115
|
+
if ( prot[:probability].to_f >= threshold_probability)
|
116
|
+
proteins.push(prot)
|
117
|
+
end
|
118
|
+
}
|
119
|
+
end
|
120
|
+
|
121
|
+
proteins.each do |prot|
|
122
|
+
protein_row=protein_to_row(prot)
|
123
|
+
rows.push(protein_row)
|
124
|
+
|
125
|
+
indistinguishables=prot[:indistinguishable_prots]
|
126
|
+
indistinguishables.each do |indist|
|
127
|
+
indist_row=protein_row.clone
|
128
|
+
indist_row[0]=indist
|
129
|
+
rows.push(indist_row)
|
130
|
+
end
|
131
|
+
|
132
|
+
end
|
133
|
+
|
134
|
+
rows
|
135
|
+
end
|
136
|
+
|
137
|
+
end
|
@@ -0,0 +1,182 @@
|
|
1
|
+
#
|
2
|
+
# This file is part of protk
|
3
|
+
# Created by Ira Cooke 15/12/2010
|
4
|
+
#
|
5
|
+
# Provides common functionality used by all msms search tools.
|
6
|
+
#
|
7
|
+
# It allows;
|
8
|
+
# 1. Specification of the search database using a simple name ... this class provides the necessary search for the actual file
|
9
|
+
# 2. Output files to be specified via a prefix or suffix to be added to the name of the corresponding input file
|
10
|
+
#
|
11
|
+
|
12
|
+
require 'optparse'
|
13
|
+
require 'pathname'
|
14
|
+
require 'protk/tool'
|
15
|
+
|
16
|
+
class SearchTool < Tool
|
17
|
+
|
18
|
+
# Initializes commandline options common to all search tools.
|
19
|
+
# Individual search tools can add their own options, but should use Capital letters to avoid conflicts
|
20
|
+
#
|
21
|
+
def initialize(option_support={})
|
22
|
+
super(option_support)
|
23
|
+
|
24
|
+
if (option_support[:database]==true)
|
25
|
+
|
26
|
+
@options.database = "sphuman"
|
27
|
+
@option_parser.on( '-d', '--database dbname', 'Specify the database to use for this search. Default=sphuman' ) do |dbname|
|
28
|
+
options.database = dbname
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
if ( option_support[:msms_search_detailed_options]==true)
|
34
|
+
@options.allowed_charges="1+,2+,3+"
|
35
|
+
@option_parser.on( '--allowed-charges ac', 'Allowed precursor ion charges. Default=1+,2+,3+' ) do |ac|
|
36
|
+
@options.allowed_charges = ac
|
37
|
+
end
|
38
|
+
|
39
|
+
@options.enzyme = "Trypsin"
|
40
|
+
@option_parser.on('--enzyme enz', 'Enzyme') do |enz|
|
41
|
+
@options.enzyme=enz
|
42
|
+
end
|
43
|
+
|
44
|
+
@options.instrument = "ESI-QUAD-TOF"
|
45
|
+
@option_parser.on('--instrument instrument', 'Instrument') do |instrument|
|
46
|
+
@options.instrument=instrument
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
@options.var_mods = ""
|
51
|
+
@option_parser.on('--var-mods vm', 'Variable modifications (Overrides -g)' ) do |vm|
|
52
|
+
@options.var_mods = vm
|
53
|
+
end
|
54
|
+
|
55
|
+
@options.fix_mods = ""
|
56
|
+
@option_parser.on('--fix-mods fm', 'Fixed modifications (Overrides -c and -m options)' ) do |fm|
|
57
|
+
@options.fix_mods = fm
|
58
|
+
end
|
59
|
+
|
60
|
+
@options.searched_ions = ""
|
61
|
+
@option_parser.on('--searched-ions si', 'Ion series to search (default=b,y)' ) do |si|
|
62
|
+
@options.searched_ions = si
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
@options.fragment_tolu="Da"
|
67
|
+
@option_parser.on('--fragment-ion-tol-units tolu', 'Fragment ion mass tolerance units (Da or mmu). Default=Da' ) do |tolu|
|
68
|
+
@options.fragment_tolu = tolu
|
69
|
+
end
|
70
|
+
|
71
|
+
@options.precursor_tolu="ppm"
|
72
|
+
@option_parser.on('--precursor-ion-tol-units tolu', 'Precursor ion mass tolerance units (ppm or Da). Default=ppm' ) do |tolu|
|
73
|
+
@options.precursor_tolu = tolu
|
74
|
+
end
|
75
|
+
|
76
|
+
@options.email=""
|
77
|
+
@option_parser.on('--email em', 'User email.') do |em|
|
78
|
+
@options.email = em
|
79
|
+
end
|
80
|
+
|
81
|
+
@options.username=""
|
82
|
+
@option_parser.on('--username un', 'Username.') do |un|
|
83
|
+
@options.username = un
|
84
|
+
end
|
85
|
+
|
86
|
+
@options.mascot_server="www.matrixscience.com"
|
87
|
+
@option_parser.on( '-S', '--server url', 'The url to the cgi directory of the mascot server' ) do |url|
|
88
|
+
@options.mascot_server=url
|
89
|
+
end
|
90
|
+
|
91
|
+
@options.mascot_server=""
|
92
|
+
@option_parser.on('--username un', 'Username.') do |un|
|
93
|
+
@options.username = un
|
94
|
+
end
|
95
|
+
|
96
|
+
@options.num_peaks_for_multi_isotope_search="0"
|
97
|
+
@option_parser.on("--num-peaks-for-multi-isotope-search np","Number of peaks to include in multi-isotope search") do |np|
|
98
|
+
@options.num_peaks_for_multi_isotope_search=np
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
end
|
103
|
+
|
104
|
+
if ( option_support[:msms_search]==true)
|
105
|
+
@options.fragment_tol=0.65
|
106
|
+
@option_parser.on( '-f', '--fragment-ion-tol tol', 'Fragment ion mass tolerance (unit dependent). Default=0.65' ) do |tol|
|
107
|
+
@options.fragment_tol = tol
|
108
|
+
end
|
109
|
+
|
110
|
+
@options.precursor_tol=200
|
111
|
+
@option_parser.on( '-p', '--precursor-ion-tol tol', 'Precursor ion mass tolerance in (ppm if precursor search type is monoisotopic or Da if it is average). Default=200' ) do |tol|
|
112
|
+
@options.precursor_tol = tol.to_f
|
113
|
+
end
|
114
|
+
|
115
|
+
@options.respect_precursor_charges=false
|
116
|
+
@option_parser.on( '-q', '--respect-charges','Dont respect charges in the input file. Instead impute them by trying various options') do
|
117
|
+
@options.respect_precursor_charges=true
|
118
|
+
end
|
119
|
+
|
120
|
+
@options.precursor_search_type="monoisotopic"
|
121
|
+
@option_parser.on( '-a', '--search-type type', 'Use monoisotopic or average precursor masses. (monoisotopic or average)' ) do |type|
|
122
|
+
@options.precursor_search_type = type
|
123
|
+
end
|
124
|
+
|
125
|
+
@options.strict_monoisotopic_mass=false
|
126
|
+
@option_parser.on( '-s', '--strict-monoisotopic-mass', 'Dont allow for misassignment of monoisotopic mass to another isotopic peak') do
|
127
|
+
@options.strict_monoisotopic_mass=true
|
128
|
+
end
|
129
|
+
|
130
|
+
@options.missed_cleavages=2
|
131
|
+
@option_parser.on( '-v', '--num-missed-cleavages num', 'Number of missed cleavages allowed' ) do |num|
|
132
|
+
@options.missed_cleavages = num
|
133
|
+
end
|
134
|
+
|
135
|
+
@options.carbamidomethyl=true
|
136
|
+
@option_parser.on( '-c', '--no-carbamidomethyl', 'Run a search without a carbamidomethyl fixed modification' ) do
|
137
|
+
@options.carbamidomethyl = false
|
138
|
+
end
|
139
|
+
|
140
|
+
@options.methionine_oxidation=false
|
141
|
+
@option_parser.on( '-m', '--methionine-oxidation', 'Run a search with oxidised methionines as a variable modification' ) do
|
142
|
+
@options.methionine_oxidation = true
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
if ( option_support[:glyco]==true)
|
148
|
+
|
149
|
+
@options.glyco = false
|
150
|
+
@option_parser.on( '-g', '--glyco', 'Expect N-Glycosylation modifications as variable mod in a search or as a parameter when building statistical models' ) do
|
151
|
+
@options.glyco = true
|
152
|
+
end
|
153
|
+
|
154
|
+
end
|
155
|
+
|
156
|
+
if ( option_support[:maldi]==true)
|
157
|
+
@options.maldi=false
|
158
|
+
@option_parser.on( '-l', '--maldi', 'Run a search on MALDI data') do
|
159
|
+
@options.maldi=true
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
end
|
164
|
+
|
165
|
+
|
166
|
+
def jobid_from_filename(filename)
|
167
|
+
jobid="protk"
|
168
|
+
jobnum_match=filename.match(/(.{1,10})\.d/)
|
169
|
+
if (jobnum_match!=nil)
|
170
|
+
jobid="#{self.jobid_prefix}#{jobnum_match[1]}"
|
171
|
+
end
|
172
|
+
return jobid
|
173
|
+
end
|
174
|
+
|
175
|
+
# Based on the database setting and global database path, find the most current version of the required database
|
176
|
+
# This function returns the name of the database with an extension appropriate to the database type
|
177
|
+
#
|
178
|
+
def current_database(db_type,db=@options.database)
|
179
|
+
return Constants.new.current_database_for_name(db)
|
180
|
+
end
|
181
|
+
|
182
|
+
end
|
@@ -0,0 +1,245 @@
|
|
1
|
+
|
2
|
+
require 'protk/constants.rb'
|
3
|
+
require 'rbconfig'
|
4
|
+
|
5
|
+
env=Constants.new
|
6
|
+
|
7
|
+
@build_dir = "#{env.protk_dir}/tmp/build"
|
8
|
+
@download_dir = "#{env.protk_dir}/tmp/download"
|
9
|
+
|
10
|
+
directory @build_dir
|
11
|
+
directory @download_dir
|
12
|
+
|
13
|
+
def package_manager_name
|
14
|
+
if RbConfig::CONFIG['host_os'] =~ /darwin/
|
15
|
+
return 'brew'
|
16
|
+
end
|
17
|
+
'apt-get'
|
18
|
+
end
|
19
|
+
|
20
|
+
def clean_build_dir
|
21
|
+
sh %{cd #{@build_dir}; rm -rf ./*}
|
22
|
+
end
|
23
|
+
|
24
|
+
def download_buildfile url, file
|
25
|
+
sh %{cd #{@download_dir}; wget #{url}}
|
26
|
+
end
|
27
|
+
|
28
|
+
def download_task url, packagefile
|
29
|
+
file "#{@download_dir}/#{packagefile}" => @download_dir do
|
30
|
+
download_buildfile "#{url}", "#{packagefile}"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
#
|
35
|
+
# Package manager
|
36
|
+
#
|
37
|
+
task :package_manager do
|
38
|
+
pmname = package_manager_name
|
39
|
+
needs_homebrew=false
|
40
|
+
sh "which #{pmname}" do |ok,res|
|
41
|
+
unless ok
|
42
|
+
throw "Missing package manager #{pmname}" unless pmname=='brew'
|
43
|
+
needs_homebrew=true
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
if needs_homebrew
|
48
|
+
sh { "ruby -e \"$(curl -fsSkL raw.github.com/mxcl/homebrew/go)" }
|
49
|
+
sh { "brew update"}
|
50
|
+
sh { "brew tap homebrew/versions"}
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
#
|
56
|
+
# System packages
|
57
|
+
#
|
58
|
+
task :system_packages => :package_manager do
|
59
|
+
# Gather package requirements
|
60
|
+
pkgs=YAML::load(File.open("#{File.dirname(__FILE__)}/data/#{package_manager_name}_packages.yaml"))
|
61
|
+
|
62
|
+
unique_packages=[]
|
63
|
+
apps=[]
|
64
|
+
installed_packages=[]
|
65
|
+
for pk in pkgs
|
66
|
+
unique_packages = pk[1] | unique_packages
|
67
|
+
apps = apps.push pk[0]
|
68
|
+
end
|
69
|
+
|
70
|
+
# Install all packages
|
71
|
+
#
|
72
|
+
unique_packages.each { |pk|
|
73
|
+
sh "#{package_manager_name} install #{pk}" do |ok,res|
|
74
|
+
p res
|
75
|
+
installed_packages.push pk if ok
|
76
|
+
end
|
77
|
+
}
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
#
|
82
|
+
# Perl local::lib
|
83
|
+
#
|
84
|
+
perl_locallib_version="1.008004"
|
85
|
+
perl_locallib_packagefile="local-lib-#{perl_locallib_version}.tar.gz"
|
86
|
+
perl_locallib_installed_file = "#{env.protk_dir}/perl5/lib/perl5/local/lib.pm"
|
87
|
+
perl_locallib_url = "http://search.cpan.org/CPAN/authors/id/A/AP/APEIRON/local-lib-#{perl_locallib_version}.tar.gz"
|
88
|
+
|
89
|
+
download_task perl_locallib_url, perl_locallib_packagefile
|
90
|
+
|
91
|
+
file perl_locallib_installed_file => [@build_dir,"#{@download_dir}/#{perl_locallib_packagefile}"] do
|
92
|
+
sh %{cp #{@download_dir}/#{perl_locallib_packagefile} #{@build_dir}}
|
93
|
+
perl_dir = "#{env.protk_dir}/perl5"
|
94
|
+
|
95
|
+
sh %{cd #{@build_dir}; gunzip local-lib-#{perl_locallib_version}.tar.gz }
|
96
|
+
sh %{cd #{@build_dir}; tar -xf local-lib-#{perl_locallib_version}.tar }
|
97
|
+
sh "cd #{@build_dir}/local-lib-#{perl_locallib_version}; perl Makefile.PL --bootstrap=#{perl_dir}; make install" do |ok,res|
|
98
|
+
# clean_build_dir if ok
|
99
|
+
end
|
100
|
+
|
101
|
+
if !Pathname.new("~/.bashrc").exist? || File.read("~/.bashrc") =~ /Mlocal::lib/
|
102
|
+
sh "echo 'eval $(perl -I#{perl_dir}/lib/perl5 -Mlocal::lib=#{perl_dir})' >>~/.bashrc"
|
103
|
+
end
|
104
|
+
|
105
|
+
if !Pathname.new("~/.bash_profile").exist? || File.read("~/.bash_profile") =~ /Mlocal::lib/
|
106
|
+
sh "echo 'eval $(perl -I#{perl_dir}/lib/perl5 -Mlocal::lib=#{perl_dir})' >>~/.bash_profile"
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
task :perl_locallib => [perl_locallib_installed_file]
|
111
|
+
|
112
|
+
|
113
|
+
#
|
114
|
+
# Top Level Packages.
|
115
|
+
#
|
116
|
+
|
117
|
+
|
118
|
+
#
|
119
|
+
# TPP
|
120
|
+
#
|
121
|
+
tpp_version="4.6.1"
|
122
|
+
tpp_packagefile="TPP-#{tpp_version}.tgz"
|
123
|
+
tpp_installed_file = "#{env.xinteract}"
|
124
|
+
tpp_url = "https://dl.dropbox.com/u/226794/TPP-4.6.1.tgz"
|
125
|
+
|
126
|
+
download_task tpp_url, tpp_packagefile
|
127
|
+
|
128
|
+
# Build
|
129
|
+
file tpp_installed_file => [:perl_locallib,@build_dir,"#{@download_dir}/#{tpp_packagefile}"] do
|
130
|
+
sh %{cp #{@download_dir}/#{tpp_packagefile} #{@build_dir}}
|
131
|
+
sh %{cpanm --local-lib=#{env.protk_dir}/perl5 XML::Parser}
|
132
|
+
sh %{cpanm --local-lib=#{env.protk_dir}/perl5 XML::CGI --force}
|
133
|
+
|
134
|
+
sh %{cd #{@build_dir};tar -xvzf TPP-#{tpp_version}.tgz}
|
135
|
+
|
136
|
+
File.open("#{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src/Makefile.config.incl","wb") do |f|
|
137
|
+
f.write "TPP_ROOT=#{env.tpp_root}/\nTPP_WEB=/tpp/\nXSLT_PROC=/usr/bin/xsltproc\nCGI_USERS_DIR=${TPP_ROOT}cgi-bin/"
|
138
|
+
end
|
139
|
+
|
140
|
+
makefile_incl_path="#{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src/Makefile.incl"
|
141
|
+
makefile_incl_text=File.read("#{makefile_incl_path}")
|
142
|
+
|
143
|
+
# Homebrew specific modifications to makefiles
|
144
|
+
#
|
145
|
+
if ( package_manager_name=='brew')
|
146
|
+
File.open("#{makefile_incl_path}","w+") do |f|
|
147
|
+
subs_text = makefile_incl_text.gsub(/GD_LIB= \/opt\/local\/lib\/libgd.a \/opt\/local\/lib\/libpng.a/,"GD_LIB= /usr/local/lib/libgd.a /usr/local/opt/libpng12/lib/libpng.a") #We're using homebrew not fink or macports
|
148
|
+
subs_text = subs_text.gsub(/GD_INCL= -I \/opt\/local\/include\//,"GD_INCL= -I /usr/local/include/ -I /usr/local/opt/libpng12/include")
|
149
|
+
f.write subs_text
|
150
|
+
end
|
151
|
+
|
152
|
+
makefile_path="#{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/CGI/Makefile"
|
153
|
+
makefile_text = File.read("#{makefile_path}")
|
154
|
+
|
155
|
+
File.open("#{makefile_path}","w+") do |f|
|
156
|
+
subs_text = makefile_text.gsub("cp -rfu","cp -rf")
|
157
|
+
f.write subs_text
|
158
|
+
end
|
159
|
+
end
|
160
|
+
sh %{cd #{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src ; make; make install}
|
161
|
+
|
162
|
+
end
|
163
|
+
|
164
|
+
task :tpp => tpp_installed_file
|
165
|
+
|
166
|
+
|
167
|
+
|
168
|
+
#
|
169
|
+
# omssa
|
170
|
+
#
|
171
|
+
def omssa_platform
|
172
|
+
if RbConfig::CONFIG['host_os'] =~ /darwin/
|
173
|
+
return 'macos'
|
174
|
+
end
|
175
|
+
'linux'
|
176
|
+
end
|
177
|
+
|
178
|
+
omssa_packagefile="omssa-#{omssa_platform}.tar.gz"
|
179
|
+
omssa_installed_file = "#{env.omssacl}"
|
180
|
+
omssa_url = "ftp://ftp.ncbi.nih.gov/pub/lewisg/omssa/CURRENT/omssa-#{omssa_platform}.tar.gz"
|
181
|
+
|
182
|
+
download_task omssa_url, omssa_packagefile
|
183
|
+
|
184
|
+
# Install
|
185
|
+
file omssa_installed_file => [@build_dir,"#{@download_dir}/omssa-#{omssa_platform}.tar.gz"] do
|
186
|
+
sh %{cp #{@download_dir}/#{omssa_packagefile} #{@build_dir}}
|
187
|
+
sh %{cd #{@build_dir}; gunzip omssa-#{omssa_platform}.tar.gz}
|
188
|
+
sh %{cd #{@build_dir}; tar -xvf omssa-#{omssa_platform}.tar}
|
189
|
+
sh %{mkdir -p #{env.omssa_root}}
|
190
|
+
sh %{cd #{@build_dir}; cp -r omssa-*.#{omssa_platform}/* #{env.omssa_root}/}
|
191
|
+
end
|
192
|
+
|
193
|
+
task :omssa => omssa_installed_file
|
194
|
+
|
195
|
+
|
196
|
+
|
197
|
+
#
|
198
|
+
# blast
|
199
|
+
#
|
200
|
+
def blast_platform
|
201
|
+
if RbConfig::CONFIG['host_os'] =~ /darwin/
|
202
|
+
return 'universal-macosx'
|
203
|
+
end
|
204
|
+
'x64-linux'
|
205
|
+
end
|
206
|
+
|
207
|
+
blast_version="2.2.27+"
|
208
|
+
blast_packagefile="ncbi-blast-#{blast_version}-#{blast_platform}.tar.gz"
|
209
|
+
blast_url="ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/#{blast_version.chomp('+')}/#{blast_packagefile}"
|
210
|
+
blast_installed_file="#{env.makeblastdb}"
|
211
|
+
|
212
|
+
download_task blast_url, blast_packagefile
|
213
|
+
|
214
|
+
# Install
|
215
|
+
file blast_installed_file => [@build_dir,"#{@download_dir}/#{blast_packagefile}"] do
|
216
|
+
sh %{cp #{@download_dir}/#{blast_packagefile} #{@build_dir}}
|
217
|
+
sh %{cd #{@build_dir}; gunzip #{blast_packagefile}}
|
218
|
+
sh %{cd #{@build_dir}; tar -xvf #{blast_packagefile.chomp('.gz')}}
|
219
|
+
sh %{mkdir -p #{env.blast_root}}
|
220
|
+
sh %{cd #{@build_dir}; cp -r ncbi-blast-#{blast_version}/* #{env.blast_root}/}
|
221
|
+
end
|
222
|
+
|
223
|
+
task :blast => blast_installed_file
|
224
|
+
|
225
|
+
|
226
|
+
#
|
227
|
+
# MSGFPlus
|
228
|
+
#
|
229
|
+
msgfplus_version="20120823"
|
230
|
+
msgfplus_packagefile="MSGFPlus.20120823.zip"
|
231
|
+
msgfplus_url="http://proteomics.ucsd.edu/Downloads/MSGFPlus.20120823.zip"
|
232
|
+
msgfplus_installed_file="#{env.msgfplusjar}"
|
233
|
+
|
234
|
+
download_task msgfplus_url, msgfplus_packagefile
|
235
|
+
|
236
|
+
file msgfplus_installed_file => [@build_dir,"#{@download_dir}/#{msgfplus_packagefile}"] do
|
237
|
+
sh %{cp #{@download_dir}/#{msgfplus_packagefile} #{@build_dir}}
|
238
|
+
sh %{cd #{@build_dir}; unzip #{msgfplus_packagefile}}
|
239
|
+
sh %{mkdir -p #{env.msgfplus_root}}
|
240
|
+
sh %{cd #{@build_dir}; cp MSGFPlus.jar #{env.msgfplus_root}/}
|
241
|
+
end
|
242
|
+
|
243
|
+
task :msgfplus => msgfplus_installed_file
|
244
|
+
|
245
|
+
task :all => [:tpp,:omssa,:blast]
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
require 'pathname'
|
3
|
+
require 'protk/tool'
|
4
|
+
|
5
|
+
require 'rake'
|
6
|
+
require 'pp'
|
7
|
+
|
8
|
+
include Rake::DSL
|
9
|
+
|
10
|
+
Rake.application.init 'protk_setup'
|
11
|
+
Rake.application.rake_require 'protk/setup_rakefile'
|
12
|
+
|
13
|
+
class SetupTool < Tool
|
14
|
+
|
15
|
+
def install toolname
|
16
|
+
Rake.application.invoke_task toolname
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# Add a method to the Spreadsheet::Worksheet class to insert a column
|
2
|
+
class Spreadsheet::Worksheet < Object
|
3
|
+
def insert_column(col,index)
|
4
|
+
# First check to see if the length of the column equals the number of rows
|
5
|
+
if ( col.length!=self.rows.length && self.rows.length!=0)
|
6
|
+
raise "The length of column #{col.length} does not equal the number of rows #{self.rows.length}"
|
7
|
+
end
|
8
|
+
if ( col.class!=Array || index.class!=Fixnum)
|
9
|
+
raise "Wrong arguments. Requires a column array and an integer index"
|
10
|
+
end
|
11
|
+
|
12
|
+
# Check for special case where there are no rows yet and if so then insert as new rows
|
13
|
+
if ( self.rows.length==0)
|
14
|
+
col.each_index { |i|
|
15
|
+
self.insert_row(i,[col[i]])
|
16
|
+
}
|
17
|
+
else
|
18
|
+
# Insert the column row by row. Probably inefficient but it works
|
19
|
+
rowi=0
|
20
|
+
self.each {|row|
|
21
|
+
row.insert(index,col[rowi])
|
22
|
+
rowi+=1
|
23
|
+
}
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class Spreadsheet::Workbook < Object
|
29
|
+
|
30
|
+
|
31
|
+
# creates an output excel file (returning the workbook object), transcribing all original content up to the given number of rows
|
32
|
+
# Throws an error if the input contains more than 1 worksheet
|
33
|
+
#
|
34
|
+
def copyBook(numrows=0)
|
35
|
+
|
36
|
+
if ( !numrows )
|
37
|
+
numrows=0
|
38
|
+
end
|
39
|
+
|
40
|
+
# Create a new workbook from scratch for writing
|
41
|
+
outputBook = Spreadsheet::Workbook.new
|
42
|
+
outputSheet = outputBook.create_worksheet
|
43
|
+
|
44
|
+
# There should only be one worksheet in the input workbook
|
45
|
+
worksheets=self.worksheets
|
46
|
+
if ( self.worksheets.length != 1 )
|
47
|
+
puts "More than one worksheet in this excel file. This script only operates on single worksheets"
|
48
|
+
end
|
49
|
+
|
50
|
+
# Get the worksheet
|
51
|
+
inputSheet=self.worksheet 0
|
52
|
+
|
53
|
+
# Figure out how many rows to convert if not specified
|
54
|
+
if ( numrows==0 || numrows > (inputSheet.row_count+1))
|
55
|
+
numrows=inputSheet.row_count
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
# Transcribe everything from the old worksheet to the new one
|
60
|
+
puts "Creating new spreadsheet with #{numrows} rows"
|
61
|
+
(0...[numrows,inputSheet.row_count].min).each { |r|
|
62
|
+
|
63
|
+
outputSheet.insert_row(r,inputSheet.row(r))
|
64
|
+
|
65
|
+
newRow=outputSheet.row(r)
|
66
|
+
|
67
|
+
# After inserting the row make sure it doesn't contain any nil values
|
68
|
+
newRow.each_index { |ci|
|
69
|
+
if ( newRow[ci]==nil)
|
70
|
+
newRow[ci]=""
|
71
|
+
end
|
72
|
+
}
|
73
|
+
}
|
74
|
+
outputBook
|
75
|
+
end
|
76
|
+
|
77
|
+
|
78
|
+
end
|