protk 1.1.0.pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/README.md +85 -0
  2. data/bin/annotate_ids.rb +59 -0
  3. data/bin/big_search.rb +41 -0
  4. data/bin/correct_omssa_retention_times.rb +27 -0
  5. data/bin/feature_finder.rb +76 -0
  6. data/bin/file_convert.rb +157 -0
  7. data/bin/generate_omssa_loc.rb +42 -0
  8. data/bin/interprophet.rb +91 -0
  9. data/bin/make_decoy.rb +64 -0
  10. data/bin/manage_db.rb +123 -0
  11. data/bin/mascot_search.rb +187 -0
  12. data/bin/mascot_to_pepxml.rb +44 -0
  13. data/bin/msgfplus_search.rb +191 -0
  14. data/bin/omssa_search.rb +205 -0
  15. data/bin/peptide_prophet.rb +245 -0
  16. data/bin/pepxml_to_table.rb +78 -0
  17. data/bin/protein_prophet.rb +140 -0
  18. data/bin/protk_setup.rb +31 -0
  19. data/bin/repair_run_summary.rb +113 -0
  20. data/bin/tandem_search.rb +292 -0
  21. data/bin/template_search.rb +144 -0
  22. data/bin/unimod_to_loc.rb +118 -0
  23. data/bin/xls_to_table.rb +46 -0
  24. data/ext/protk/extconf.rb +3 -0
  25. data/ext/protk/protk.c +235 -0
  26. data/lib/protk/big_search_rakefile.rake +16 -0
  27. data/lib/protk/big_search_tool.rb +23 -0
  28. data/lib/protk/bio_sptr_extensions.rb +210 -0
  29. data/lib/protk/biotools_excel_converter.rb +60 -0
  30. data/lib/protk/command_runner.rb +84 -0
  31. data/lib/protk/constants.rb +296 -0
  32. data/lib/protk/data/FeatureFinderCentroided.ini +63 -0
  33. data/lib/protk/data/apt-get_packages.yaml +47 -0
  34. data/lib/protk/data/brew_packages.yaml +10 -0
  35. data/lib/protk/data/default_config.yml +20 -0
  36. data/lib/protk/data/predefined_db.crap.yaml +19 -0
  37. data/lib/protk/data/predefined_db.sphuman.yaml +25 -0
  38. data/lib/protk/data/predefined_db.swissprot_annotation.yaml +20 -0
  39. data/lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml +20 -0
  40. data/lib/protk/data/tandem_params.xml +56 -0
  41. data/lib/protk/data/taxonomy_template.xml +9 -0
  42. data/lib/protk/data/unimod.xml +16780 -0
  43. data/lib/protk/eupathdb_gene_information_table.rb +158 -0
  44. data/lib/protk/galaxy_stager.rb +24 -0
  45. data/lib/protk/galaxy_util.rb +9 -0
  46. data/lib/protk/manage_db_rakefile.rake +484 -0
  47. data/lib/protk/manage_db_tool.rb +181 -0
  48. data/lib/protk/mascot_util.rb +63 -0
  49. data/lib/protk/omssa_util.rb +57 -0
  50. data/lib/protk/plasmodb.rb +50 -0
  51. data/lib/protk/prophet_tool.rb +85 -0
  52. data/lib/protk/protein_annotator.rb +646 -0
  53. data/lib/protk/protxml.rb +137 -0
  54. data/lib/protk/randomize.rb +7 -0
  55. data/lib/protk/search_tool.rb +182 -0
  56. data/lib/protk/setup_rakefile.rake +245 -0
  57. data/lib/protk/setup_tool.rb +19 -0
  58. data/lib/protk/spreadsheet_extensions.rb +78 -0
  59. data/lib/protk/swissprot_database.rb +38 -0
  60. data/lib/protk/tool.rb +182 -0
  61. data/lib/protk/xtandem_defaults.rb +11 -0
  62. data/lib/protk.rb +18 -0
  63. metadata +256 -0
@@ -0,0 +1,137 @@
1
+ require 'rubygems'
2
+ require 'rexml/document'
3
+ require 'rexml/xpath'
4
+
5
+
6
+ class ProtXML
7
+
8
+ attr_accessor :groups
9
+
10
+
11
+ def indistinguishable_proteins_from_protein(protein_element)
12
+ iprots=[]
13
+ REXML::XPath.each(protein_element,"./indistinguishable_protein") do |ipel|
14
+ ipel_attributes={}
15
+ ipel.attributes.each_attribute { |att| ipel_attributes[att.expanded_name.to_sym]=att.value }
16
+ iprots.push(ipel_attributes[:protein_name])
17
+ end
18
+ iprots
19
+ end
20
+
21
+ def peptides_from_protein(protein_element)
22
+ peptides=[]
23
+ REXML::XPath.each(protein_element,"./peptide") do |pel|
24
+ peptide={}
25
+
26
+ pel.attributes.each_attribute { |att| peptide[att.expanded_name.to_sym]=att.value }
27
+ modifications=pel.get_elements("./modification_info")
28
+ mods=modifications.collect {|mp| mp.attribute("modified_peptide").value }
29
+ peptide[:modifications] = mods
30
+ peptides.push(peptide)
31
+ end
32
+ peptides
33
+ end
34
+
35
+ def proteins_from_group(group_element)
36
+ proteins=[]
37
+ REXML::XPath.each(group_element,"./protein") do |pel|
38
+ protein={}
39
+ pel.attributes.each_attribute { |att| protein[att.expanded_name.to_sym]=att.value }
40
+ protein[:peptides]=peptides_from_protein(pel)
41
+ protein[:indistinguishable_prots]=indistinguishable_proteins_from_protein(pel)
42
+ proteins.push(protein)
43
+ end
44
+ proteins
45
+ end
46
+
47
+ def init_groups
48
+ @groups=[]
49
+ REXML::XPath.each(@doc.root,"//protein_group") do |gel|
50
+ group={}
51
+ group[:group_probability]=gel.attributes["probability"].to_f
52
+ group[:proteins]=proteins_from_group(gel)
53
+ groups.push group
54
+ end
55
+ @groups
56
+ end
57
+
58
+
59
+ def initialize(file_name)
60
+ @doc=REXML::Document.new(File.new(file_name))
61
+ @groups=self.init_groups
62
+ end
63
+
64
+ def peptide_sequences_from_protein(prot)
65
+ peptides=prot[:peptides]
66
+ sequences=[]
67
+ peptides.each do |pep|
68
+ if ( pep[:modifications].length > 0 )
69
+ pep[:modifications].each {|pmod|
70
+ sequences.push(pmod) }
71
+ else
72
+ sequences.push(pep[:peptide_sequence])
73
+ end
74
+ end
75
+ sequences
76
+ end
77
+
78
+ def protein_to_row(prot)
79
+ protein_row=[]
80
+ protein_row.push(prot[:protein_name])
81
+ protein_row.push(prot[:probability])
82
+
83
+ indistinct=prot[:indistinguishable_prots]
84
+ indist_string="#{prot[:protein_name]};"
85
+ indistinct.each { |pr| indist_string<<"#{pr};"}
86
+ indist_string.chop!
87
+ protein_row.push(indist_string)
88
+
89
+ protein_row.push(prot[:peptides].length)
90
+
91
+ peptide_string=""
92
+ peptide_sequences_from_protein(prot).each {|pep| peptide_string<<"#{pep};" }
93
+ peptide_string.chop!
94
+
95
+ protein_row.push(peptide_string)
96
+ protein_row
97
+ end
98
+
99
+ # Convert the entire prot.xml document to row format
100
+ # Returns an array of arrays. Each of the sub-arrays is a row.
101
+ # Each row should contain a simple summary of the protein.
102
+ # A separate row should be provided for every protein (including indistinguishable ones)
103
+ # The first row will be the header
104
+ #
105
+ # Proteins with probabilities below a threshold are excluded
106
+ #
107
+ def as_rows(threshold_probability)
108
+
109
+ rows=[]
110
+ rows.push(["Accession","Probability","Indistinguishable Proteins","Num Peptides","Peptides"])
111
+
112
+ proteins=[]
113
+ @groups.each do |grp|
114
+ grp[:proteins].each {|prot|
115
+ if ( prot[:probability].to_f >= threshold_probability)
116
+ proteins.push(prot)
117
+ end
118
+ }
119
+ end
120
+
121
+ proteins.each do |prot|
122
+ protein_row=protein_to_row(prot)
123
+ rows.push(protein_row)
124
+
125
+ indistinguishables=prot[:indistinguishable_prots]
126
+ indistinguishables.each do |indist|
127
+ indist_row=protein_row.clone
128
+ indist_row[0]=indist
129
+ rows.push(indist_row)
130
+ end
131
+
132
+ end
133
+
134
+ rows
135
+ end
136
+
137
+ end
@@ -0,0 +1,7 @@
1
+ require 'protk/protk'
2
+
3
+ class Randomize
4
+ def self.make_decoys input_path, db_len, output_path, prefix
5
+ Protk.make_decoys input_path.to_s, db_len.to_i, output_path.to_s, prefix.to_s
6
+ end
7
+ end
@@ -0,0 +1,182 @@
1
+ #
2
+ # This file is part of protk
3
+ # Created by Ira Cooke 15/12/2010
4
+ #
5
+ # Provides common functionality used by all msms search tools.
6
+ #
7
+ # It allows;
8
+ # 1. Specification of the search database using a simple name ... this class provides the necessary search for the actual file
9
+ # 2. Output files to be specified via a prefix or suffix to be added to the name of the corresponding input file
10
+ #
11
+
12
+ require 'optparse'
13
+ require 'pathname'
14
+ require 'protk/tool'
15
+
16
+ class SearchTool < Tool
17
+
18
+ # Initializes commandline options common to all search tools.
19
+ # Individual search tools can add their own options, but should use Capital letters to avoid conflicts
20
+ #
21
+ def initialize(option_support={})
22
+ super(option_support)
23
+
24
+ if (option_support[:database]==true)
25
+
26
+ @options.database = "sphuman"
27
+ @option_parser.on( '-d', '--database dbname', 'Specify the database to use for this search. Default=sphuman' ) do |dbname|
28
+ options.database = dbname
29
+ end
30
+
31
+ end
32
+
33
+ if ( option_support[:msms_search_detailed_options]==true)
34
+ @options.allowed_charges="1+,2+,3+"
35
+ @option_parser.on( '--allowed-charges ac', 'Allowed precursor ion charges. Default=1+,2+,3+' ) do |ac|
36
+ @options.allowed_charges = ac
37
+ end
38
+
39
+ @options.enzyme = "Trypsin"
40
+ @option_parser.on('--enzyme enz', 'Enzyme') do |enz|
41
+ @options.enzyme=enz
42
+ end
43
+
44
+ @options.instrument = "ESI-QUAD-TOF"
45
+ @option_parser.on('--instrument instrument', 'Instrument') do |instrument|
46
+ @options.instrument=instrument
47
+ end
48
+
49
+
50
+ @options.var_mods = ""
51
+ @option_parser.on('--var-mods vm', 'Variable modifications (Overrides -g)' ) do |vm|
52
+ @options.var_mods = vm
53
+ end
54
+
55
+ @options.fix_mods = ""
56
+ @option_parser.on('--fix-mods fm', 'Fixed modifications (Overrides -c and -m options)' ) do |fm|
57
+ @options.fix_mods = fm
58
+ end
59
+
60
+ @options.searched_ions = ""
61
+ @option_parser.on('--searched-ions si', 'Ion series to search (default=b,y)' ) do |si|
62
+ @options.searched_ions = si
63
+ end
64
+
65
+
66
+ @options.fragment_tolu="Da"
67
+ @option_parser.on('--fragment-ion-tol-units tolu', 'Fragment ion mass tolerance units (Da or mmu). Default=Da' ) do |tolu|
68
+ @options.fragment_tolu = tolu
69
+ end
70
+
71
+ @options.precursor_tolu="ppm"
72
+ @option_parser.on('--precursor-ion-tol-units tolu', 'Precursor ion mass tolerance units (ppm or Da). Default=ppm' ) do |tolu|
73
+ @options.precursor_tolu = tolu
74
+ end
75
+
76
+ @options.email=""
77
+ @option_parser.on('--email em', 'User email.') do |em|
78
+ @options.email = em
79
+ end
80
+
81
+ @options.username=""
82
+ @option_parser.on('--username un', 'Username.') do |un|
83
+ @options.username = un
84
+ end
85
+
86
+ @options.mascot_server="www.matrixscience.com"
87
+ @option_parser.on( '-S', '--server url', 'The url to the cgi directory of the mascot server' ) do |url|
88
+ @options.mascot_server=url
89
+ end
90
+
91
+ @options.mascot_server=""
92
+ @option_parser.on('--username un', 'Username.') do |un|
93
+ @options.username = un
94
+ end
95
+
96
+ @options.num_peaks_for_multi_isotope_search="0"
97
+ @option_parser.on("--num-peaks-for-multi-isotope-search np","Number of peaks to include in multi-isotope search") do |np|
98
+ @options.num_peaks_for_multi_isotope_search=np
99
+ end
100
+
101
+
102
+ end
103
+
104
+ if ( option_support[:msms_search]==true)
105
+ @options.fragment_tol=0.65
106
+ @option_parser.on( '-f', '--fragment-ion-tol tol', 'Fragment ion mass tolerance (unit dependent). Default=0.65' ) do |tol|
107
+ @options.fragment_tol = tol
108
+ end
109
+
110
+ @options.precursor_tol=200
111
+ @option_parser.on( '-p', '--precursor-ion-tol tol', 'Precursor ion mass tolerance in (ppm if precursor search type is monoisotopic or Da if it is average). Default=200' ) do |tol|
112
+ @options.precursor_tol = tol.to_f
113
+ end
114
+
115
+ @options.respect_precursor_charges=false
116
+ @option_parser.on( '-q', '--respect-charges','Dont respect charges in the input file. Instead impute them by trying various options') do
117
+ @options.respect_precursor_charges=true
118
+ end
119
+
120
+ @options.precursor_search_type="monoisotopic"
121
+ @option_parser.on( '-a', '--search-type type', 'Use monoisotopic or average precursor masses. (monoisotopic or average)' ) do |type|
122
+ @options.precursor_search_type = type
123
+ end
124
+
125
+ @options.strict_monoisotopic_mass=false
126
+ @option_parser.on( '-s', '--strict-monoisotopic-mass', 'Dont allow for misassignment of monoisotopic mass to another isotopic peak') do
127
+ @options.strict_monoisotopic_mass=true
128
+ end
129
+
130
+ @options.missed_cleavages=2
131
+ @option_parser.on( '-v', '--num-missed-cleavages num', 'Number of missed cleavages allowed' ) do |num|
132
+ @options.missed_cleavages = num
133
+ end
134
+
135
+ @options.carbamidomethyl=true
136
+ @option_parser.on( '-c', '--no-carbamidomethyl', 'Run a search without a carbamidomethyl fixed modification' ) do
137
+ @options.carbamidomethyl = false
138
+ end
139
+
140
+ @options.methionine_oxidation=false
141
+ @option_parser.on( '-m', '--methionine-oxidation', 'Run a search with oxidised methionines as a variable modification' ) do
142
+ @options.methionine_oxidation = true
143
+ end
144
+
145
+ end
146
+
147
+ if ( option_support[:glyco]==true)
148
+
149
+ @options.glyco = false
150
+ @option_parser.on( '-g', '--glyco', 'Expect N-Glycosylation modifications as variable mod in a search or as a parameter when building statistical models' ) do
151
+ @options.glyco = true
152
+ end
153
+
154
+ end
155
+
156
+ if ( option_support[:maldi]==true)
157
+ @options.maldi=false
158
+ @option_parser.on( '-l', '--maldi', 'Run a search on MALDI data') do
159
+ @options.maldi=true
160
+ end
161
+ end
162
+
163
+ end
164
+
165
+
166
+ def jobid_from_filename(filename)
167
+ jobid="protk"
168
+ jobnum_match=filename.match(/(.{1,10})\.d/)
169
+ if (jobnum_match!=nil)
170
+ jobid="#{self.jobid_prefix}#{jobnum_match[1]}"
171
+ end
172
+ return jobid
173
+ end
174
+
175
+ # Based on the database setting and global database path, find the most current version of the required database
176
+ # This function returns the name of the database with an extension appropriate to the database type
177
+ #
178
+ def current_database(db_type,db=@options.database)
179
+ return Constants.new.current_database_for_name(db)
180
+ end
181
+
182
+ end
@@ -0,0 +1,245 @@
1
+
2
+ require 'protk/constants.rb'
3
+ require 'rbconfig'
4
+
5
+ env=Constants.new
6
+
7
+ @build_dir = "#{env.protk_dir}/tmp/build"
8
+ @download_dir = "#{env.protk_dir}/tmp/download"
9
+
10
+ directory @build_dir
11
+ directory @download_dir
12
+
13
+ def package_manager_name
14
+ if RbConfig::CONFIG['host_os'] =~ /darwin/
15
+ return 'brew'
16
+ end
17
+ 'apt-get'
18
+ end
19
+
20
+ def clean_build_dir
21
+ sh %{cd #{@build_dir}; rm -rf ./*}
22
+ end
23
+
24
+ def download_buildfile url, file
25
+ sh %{cd #{@download_dir}; wget #{url}}
26
+ end
27
+
28
+ def download_task url, packagefile
29
+ file "#{@download_dir}/#{packagefile}" => @download_dir do
30
+ download_buildfile "#{url}", "#{packagefile}"
31
+ end
32
+ end
33
+
34
+ #
35
+ # Package manager
36
+ #
37
+ task :package_manager do
38
+ pmname = package_manager_name
39
+ needs_homebrew=false
40
+ sh "which #{pmname}" do |ok,res|
41
+ unless ok
42
+ throw "Missing package manager #{pmname}" unless pmname=='brew'
43
+ needs_homebrew=true
44
+ end
45
+ end
46
+
47
+ if needs_homebrew
48
+ sh { "ruby -e \"$(curl -fsSkL raw.github.com/mxcl/homebrew/go)" }
49
+ sh { "brew update"}
50
+ sh { "brew tap homebrew/versions"}
51
+ end
52
+
53
+ end
54
+
55
+ #
56
+ # System packages
57
+ #
58
+ task :system_packages => :package_manager do
59
+ # Gather package requirements
60
+ pkgs=YAML::load(File.open("#{File.dirname(__FILE__)}/data/#{package_manager_name}_packages.yaml"))
61
+
62
+ unique_packages=[]
63
+ apps=[]
64
+ installed_packages=[]
65
+ for pk in pkgs
66
+ unique_packages = pk[1] | unique_packages
67
+ apps = apps.push pk[0]
68
+ end
69
+
70
+ # Install all packages
71
+ #
72
+ unique_packages.each { |pk|
73
+ sh "#{package_manager_name} install #{pk}" do |ok,res|
74
+ p res
75
+ installed_packages.push pk if ok
76
+ end
77
+ }
78
+
79
+ end
80
+
81
+ #
82
+ # Perl local::lib
83
+ #
84
+ perl_locallib_version="1.008004"
85
+ perl_locallib_packagefile="local-lib-#{perl_locallib_version}.tar.gz"
86
+ perl_locallib_installed_file = "#{env.protk_dir}/perl5/lib/perl5/local/lib.pm"
87
+ perl_locallib_url = "http://search.cpan.org/CPAN/authors/id/A/AP/APEIRON/local-lib-#{perl_locallib_version}.tar.gz"
88
+
89
+ download_task perl_locallib_url, perl_locallib_packagefile
90
+
91
+ file perl_locallib_installed_file => [@build_dir,"#{@download_dir}/#{perl_locallib_packagefile}"] do
92
+ sh %{cp #{@download_dir}/#{perl_locallib_packagefile} #{@build_dir}}
93
+ perl_dir = "#{env.protk_dir}/perl5"
94
+
95
+ sh %{cd #{@build_dir}; gunzip local-lib-#{perl_locallib_version}.tar.gz }
96
+ sh %{cd #{@build_dir}; tar -xf local-lib-#{perl_locallib_version}.tar }
97
+ sh "cd #{@build_dir}/local-lib-#{perl_locallib_version}; perl Makefile.PL --bootstrap=#{perl_dir}; make install" do |ok,res|
98
+ # clean_build_dir if ok
99
+ end
100
+
101
+ if !Pathname.new("~/.bashrc").exist? || File.read("~/.bashrc") =~ /Mlocal::lib/
102
+ sh "echo 'eval $(perl -I#{perl_dir}/lib/perl5 -Mlocal::lib=#{perl_dir})' >>~/.bashrc"
103
+ end
104
+
105
+ if !Pathname.new("~/.bash_profile").exist? || File.read("~/.bash_profile") =~ /Mlocal::lib/
106
+ sh "echo 'eval $(perl -I#{perl_dir}/lib/perl5 -Mlocal::lib=#{perl_dir})' >>~/.bash_profile"
107
+ end
108
+ end
109
+
110
+ task :perl_locallib => [perl_locallib_installed_file]
111
+
112
+
113
+ #
114
+ # Top Level Packages.
115
+ #
116
+
117
+
118
+ #
119
+ # TPP
120
+ #
121
+ tpp_version="4.6.1"
122
+ tpp_packagefile="TPP-#{tpp_version}.tgz"
123
+ tpp_installed_file = "#{env.xinteract}"
124
+ tpp_url = "https://dl.dropbox.com/u/226794/TPP-4.6.1.tgz"
125
+
126
+ download_task tpp_url, tpp_packagefile
127
+
128
+ # Build
129
+ file tpp_installed_file => [:perl_locallib,@build_dir,"#{@download_dir}/#{tpp_packagefile}"] do
130
+ sh %{cp #{@download_dir}/#{tpp_packagefile} #{@build_dir}}
131
+ sh %{cpanm --local-lib=#{env.protk_dir}/perl5 XML::Parser}
132
+ sh %{cpanm --local-lib=#{env.protk_dir}/perl5 XML::CGI --force}
133
+
134
+ sh %{cd #{@build_dir};tar -xvzf TPP-#{tpp_version}.tgz}
135
+
136
+ File.open("#{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src/Makefile.config.incl","wb") do |f|
137
+ f.write "TPP_ROOT=#{env.tpp_root}/\nTPP_WEB=/tpp/\nXSLT_PROC=/usr/bin/xsltproc\nCGI_USERS_DIR=${TPP_ROOT}cgi-bin/"
138
+ end
139
+
140
+ makefile_incl_path="#{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src/Makefile.incl"
141
+ makefile_incl_text=File.read("#{makefile_incl_path}")
142
+
143
+ # Homebrew specific modifications to makefiles
144
+ #
145
+ if ( package_manager_name=='brew')
146
+ File.open("#{makefile_incl_path}","w+") do |f|
147
+ subs_text = makefile_incl_text.gsub(/GD_LIB= \/opt\/local\/lib\/libgd.a \/opt\/local\/lib\/libpng.a/,"GD_LIB= /usr/local/lib/libgd.a /usr/local/opt/libpng12/lib/libpng.a") #We're using homebrew not fink or macports
148
+ subs_text = subs_text.gsub(/GD_INCL= -I \/opt\/local\/include\//,"GD_INCL= -I /usr/local/include/ -I /usr/local/opt/libpng12/include")
149
+ f.write subs_text
150
+ end
151
+
152
+ makefile_path="#{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/CGI/Makefile"
153
+ makefile_text = File.read("#{makefile_path}")
154
+
155
+ File.open("#{makefile_path}","w+") do |f|
156
+ subs_text = makefile_text.gsub("cp -rfu","cp -rf")
157
+ f.write subs_text
158
+ end
159
+ end
160
+ sh %{cd #{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src ; make; make install}
161
+
162
+ end
163
+
164
+ task :tpp => tpp_installed_file
165
+
166
+
167
+
168
+ #
169
+ # omssa
170
+ #
171
+ def omssa_platform
172
+ if RbConfig::CONFIG['host_os'] =~ /darwin/
173
+ return 'macos'
174
+ end
175
+ 'linux'
176
+ end
177
+
178
+ omssa_packagefile="omssa-#{omssa_platform}.tar.gz"
179
+ omssa_installed_file = "#{env.omssacl}"
180
+ omssa_url = "ftp://ftp.ncbi.nih.gov/pub/lewisg/omssa/CURRENT/omssa-#{omssa_platform}.tar.gz"
181
+
182
+ download_task omssa_url, omssa_packagefile
183
+
184
+ # Install
185
+ file omssa_installed_file => [@build_dir,"#{@download_dir}/omssa-#{omssa_platform}.tar.gz"] do
186
+ sh %{cp #{@download_dir}/#{omssa_packagefile} #{@build_dir}}
187
+ sh %{cd #{@build_dir}; gunzip omssa-#{omssa_platform}.tar.gz}
188
+ sh %{cd #{@build_dir}; tar -xvf omssa-#{omssa_platform}.tar}
189
+ sh %{mkdir -p #{env.omssa_root}}
190
+ sh %{cd #{@build_dir}; cp -r omssa-*.#{omssa_platform}/* #{env.omssa_root}/}
191
+ end
192
+
193
+ task :omssa => omssa_installed_file
194
+
195
+
196
+
197
+ #
198
+ # blast
199
+ #
200
+ def blast_platform
201
+ if RbConfig::CONFIG['host_os'] =~ /darwin/
202
+ return 'universal-macosx'
203
+ end
204
+ 'x64-linux'
205
+ end
206
+
207
+ blast_version="2.2.27+"
208
+ blast_packagefile="ncbi-blast-#{blast_version}-#{blast_platform}.tar.gz"
209
+ blast_url="ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/#{blast_version.chomp('+')}/#{blast_packagefile}"
210
+ blast_installed_file="#{env.makeblastdb}"
211
+
212
+ download_task blast_url, blast_packagefile
213
+
214
+ # Install
215
+ file blast_installed_file => [@build_dir,"#{@download_dir}/#{blast_packagefile}"] do
216
+ sh %{cp #{@download_dir}/#{blast_packagefile} #{@build_dir}}
217
+ sh %{cd #{@build_dir}; gunzip #{blast_packagefile}}
218
+ sh %{cd #{@build_dir}; tar -xvf #{blast_packagefile.chomp('.gz')}}
219
+ sh %{mkdir -p #{env.blast_root}}
220
+ sh %{cd #{@build_dir}; cp -r ncbi-blast-#{blast_version}/* #{env.blast_root}/}
221
+ end
222
+
223
+ task :blast => blast_installed_file
224
+
225
+
226
+ #
227
+ # MSGFPlus
228
+ #
229
+ msgfplus_version="20120823"
230
+ msgfplus_packagefile="MSGFPlus.20120823.zip"
231
+ msgfplus_url="http://proteomics.ucsd.edu/Downloads/MSGFPlus.20120823.zip"
232
+ msgfplus_installed_file="#{env.msgfplusjar}"
233
+
234
+ download_task msgfplus_url, msgfplus_packagefile
235
+
236
+ file msgfplus_installed_file => [@build_dir,"#{@download_dir}/#{msgfplus_packagefile}"] do
237
+ sh %{cp #{@download_dir}/#{msgfplus_packagefile} #{@build_dir}}
238
+ sh %{cd #{@build_dir}; unzip #{msgfplus_packagefile}}
239
+ sh %{mkdir -p #{env.msgfplus_root}}
240
+ sh %{cd #{@build_dir}; cp MSGFPlus.jar #{env.msgfplus_root}/}
241
+ end
242
+
243
+ task :msgfplus => msgfplus_installed_file
244
+
245
+ task :all => [:tpp,:omssa,:blast]
@@ -0,0 +1,19 @@
1
+ require 'optparse'
2
+ require 'pathname'
3
+ require 'protk/tool'
4
+
5
+ require 'rake'
6
+ require 'pp'
7
+
8
+ include Rake::DSL
9
+
10
+ Rake.application.init 'protk_setup'
11
+ Rake.application.rake_require 'protk/setup_rakefile'
12
+
13
+ class SetupTool < Tool
14
+
15
+ def install toolname
16
+ Rake.application.invoke_task toolname
17
+ end
18
+
19
+ end
@@ -0,0 +1,78 @@
1
+ # Add a method to the Spreadsheet::Worksheet class to insert a column
2
+ class Spreadsheet::Worksheet < Object
3
+ def insert_column(col,index)
4
+ # First check to see if the length of the column equals the number of rows
5
+ if ( col.length!=self.rows.length && self.rows.length!=0)
6
+ raise "The length of column #{col.length} does not equal the number of rows #{self.rows.length}"
7
+ end
8
+ if ( col.class!=Array || index.class!=Fixnum)
9
+ raise "Wrong arguments. Requires a column array and an integer index"
10
+ end
11
+
12
+ # Check for special case where there are no rows yet and if so then insert as new rows
13
+ if ( self.rows.length==0)
14
+ col.each_index { |i|
15
+ self.insert_row(i,[col[i]])
16
+ }
17
+ else
18
+ # Insert the column row by row. Probably inefficient but it works
19
+ rowi=0
20
+ self.each {|row|
21
+ row.insert(index,col[rowi])
22
+ rowi+=1
23
+ }
24
+ end
25
+ end
26
+ end
27
+
28
+ class Spreadsheet::Workbook < Object
29
+
30
+
31
+ # creates an output excel file (returning the workbook object), transcribing all original content up to the given number of rows
32
+ # Throws an error if the input contains more than 1 worksheet
33
+ #
34
+ def copyBook(numrows=0)
35
+
36
+ if ( !numrows )
37
+ numrows=0
38
+ end
39
+
40
+ # Create a new workbook from scratch for writing
41
+ outputBook = Spreadsheet::Workbook.new
42
+ outputSheet = outputBook.create_worksheet
43
+
44
+ # There should only be one worksheet in the input workbook
45
+ worksheets=self.worksheets
46
+ if ( self.worksheets.length != 1 )
47
+ puts "More than one worksheet in this excel file. This script only operates on single worksheets"
48
+ end
49
+
50
+ # Get the worksheet
51
+ inputSheet=self.worksheet 0
52
+
53
+ # Figure out how many rows to convert if not specified
54
+ if ( numrows==0 || numrows > (inputSheet.row_count+1))
55
+ numrows=inputSheet.row_count
56
+ end
57
+
58
+
59
+ # Transcribe everything from the old worksheet to the new one
60
+ puts "Creating new spreadsheet with #{numrows} rows"
61
+ (0...[numrows,inputSheet.row_count].min).each { |r|
62
+
63
+ outputSheet.insert_row(r,inputSheet.row(r))
64
+
65
+ newRow=outputSheet.row(r)
66
+
67
+ # After inserting the row make sure it doesn't contain any nil values
68
+ newRow.each_index { |ci|
69
+ if ( newRow[ci]==nil)
70
+ newRow[ci]=""
71
+ end
72
+ }
73
+ }
74
+ outputBook
75
+ end
76
+
77
+
78
+ end