protk 1.1.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. data/README.md +85 -0
  2. data/bin/annotate_ids.rb +59 -0
  3. data/bin/big_search.rb +41 -0
  4. data/bin/correct_omssa_retention_times.rb +27 -0
  5. data/bin/feature_finder.rb +76 -0
  6. data/bin/file_convert.rb +157 -0
  7. data/bin/generate_omssa_loc.rb +42 -0
  8. data/bin/interprophet.rb +91 -0
  9. data/bin/make_decoy.rb +64 -0
  10. data/bin/manage_db.rb +123 -0
  11. data/bin/mascot_search.rb +187 -0
  12. data/bin/mascot_to_pepxml.rb +44 -0
  13. data/bin/msgfplus_search.rb +191 -0
  14. data/bin/omssa_search.rb +205 -0
  15. data/bin/peptide_prophet.rb +245 -0
  16. data/bin/pepxml_to_table.rb +78 -0
  17. data/bin/protein_prophet.rb +140 -0
  18. data/bin/protk_setup.rb +31 -0
  19. data/bin/repair_run_summary.rb +113 -0
  20. data/bin/tandem_search.rb +292 -0
  21. data/bin/template_search.rb +144 -0
  22. data/bin/unimod_to_loc.rb +118 -0
  23. data/bin/xls_to_table.rb +46 -0
  24. data/ext/protk/extconf.rb +3 -0
  25. data/ext/protk/protk.c +235 -0
  26. data/lib/protk/big_search_rakefile.rake +16 -0
  27. data/lib/protk/big_search_tool.rb +23 -0
  28. data/lib/protk/bio_sptr_extensions.rb +210 -0
  29. data/lib/protk/biotools_excel_converter.rb +60 -0
  30. data/lib/protk/command_runner.rb +84 -0
  31. data/lib/protk/constants.rb +296 -0
  32. data/lib/protk/data/FeatureFinderCentroided.ini +63 -0
  33. data/lib/protk/data/apt-get_packages.yaml +47 -0
  34. data/lib/protk/data/brew_packages.yaml +10 -0
  35. data/lib/protk/data/default_config.yml +20 -0
  36. data/lib/protk/data/predefined_db.crap.yaml +19 -0
  37. data/lib/protk/data/predefined_db.sphuman.yaml +25 -0
  38. data/lib/protk/data/predefined_db.swissprot_annotation.yaml +20 -0
  39. data/lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml +20 -0
  40. data/lib/protk/data/tandem_params.xml +56 -0
  41. data/lib/protk/data/taxonomy_template.xml +9 -0
  42. data/lib/protk/data/unimod.xml +16780 -0
  43. data/lib/protk/eupathdb_gene_information_table.rb +158 -0
  44. data/lib/protk/galaxy_stager.rb +24 -0
  45. data/lib/protk/galaxy_util.rb +9 -0
  46. data/lib/protk/manage_db_rakefile.rake +484 -0
  47. data/lib/protk/manage_db_tool.rb +181 -0
  48. data/lib/protk/mascot_util.rb +63 -0
  49. data/lib/protk/omssa_util.rb +57 -0
  50. data/lib/protk/plasmodb.rb +50 -0
  51. data/lib/protk/prophet_tool.rb +85 -0
  52. data/lib/protk/protein_annotator.rb +646 -0
  53. data/lib/protk/protxml.rb +137 -0
  54. data/lib/protk/randomize.rb +7 -0
  55. data/lib/protk/search_tool.rb +182 -0
  56. data/lib/protk/setup_rakefile.rake +245 -0
  57. data/lib/protk/setup_tool.rb +19 -0
  58. data/lib/protk/spreadsheet_extensions.rb +78 -0
  59. data/lib/protk/swissprot_database.rb +38 -0
  60. data/lib/protk/tool.rb +182 -0
  61. data/lib/protk/xtandem_defaults.rb +11 -0
  62. data/lib/protk.rb +18 -0
  63. metadata +256 -0
@@ -0,0 +1,137 @@
1
+ require 'rubygems'
2
+ require 'rexml/document'
3
+ require 'rexml/xpath'
4
+
5
+
6
+ class ProtXML
7
+
8
+ attr_accessor :groups
9
+
10
+
11
+ def indistinguishable_proteins_from_protein(protein_element)
12
+ iprots=[]
13
+ REXML::XPath.each(protein_element,"./indistinguishable_protein") do |ipel|
14
+ ipel_attributes={}
15
+ ipel.attributes.each_attribute { |att| ipel_attributes[att.expanded_name.to_sym]=att.value }
16
+ iprots.push(ipel_attributes[:protein_name])
17
+ end
18
+ iprots
19
+ end
20
+
21
+ def peptides_from_protein(protein_element)
22
+ peptides=[]
23
+ REXML::XPath.each(protein_element,"./peptide") do |pel|
24
+ peptide={}
25
+
26
+ pel.attributes.each_attribute { |att| peptide[att.expanded_name.to_sym]=att.value }
27
+ modifications=pel.get_elements("./modification_info")
28
+ mods=modifications.collect {|mp| mp.attribute("modified_peptide").value }
29
+ peptide[:modifications] = mods
30
+ peptides.push(peptide)
31
+ end
32
+ peptides
33
+ end
34
+
35
+ def proteins_from_group(group_element)
36
+ proteins=[]
37
+ REXML::XPath.each(group_element,"./protein") do |pel|
38
+ protein={}
39
+ pel.attributes.each_attribute { |att| protein[att.expanded_name.to_sym]=att.value }
40
+ protein[:peptides]=peptides_from_protein(pel)
41
+ protein[:indistinguishable_prots]=indistinguishable_proteins_from_protein(pel)
42
+ proteins.push(protein)
43
+ end
44
+ proteins
45
+ end
46
+
47
+ def init_groups
48
+ @groups=[]
49
+ REXML::XPath.each(@doc.root,"//protein_group") do |gel|
50
+ group={}
51
+ group[:group_probability]=gel.attributes["probability"].to_f
52
+ group[:proteins]=proteins_from_group(gel)
53
+ groups.push group
54
+ end
55
+ @groups
56
+ end
57
+
58
+
59
+ def initialize(file_name)
60
+ @doc=REXML::Document.new(File.new(file_name))
61
+ @groups=self.init_groups
62
+ end
63
+
64
+ def peptide_sequences_from_protein(prot)
65
+ peptides=prot[:peptides]
66
+ sequences=[]
67
+ peptides.each do |pep|
68
+ if ( pep[:modifications].length > 0 )
69
+ pep[:modifications].each {|pmod|
70
+ sequences.push(pmod) }
71
+ else
72
+ sequences.push(pep[:peptide_sequence])
73
+ end
74
+ end
75
+ sequences
76
+ end
77
+
78
+ def protein_to_row(prot)
79
+ protein_row=[]
80
+ protein_row.push(prot[:protein_name])
81
+ protein_row.push(prot[:probability])
82
+
83
+ indistinct=prot[:indistinguishable_prots]
84
+ indist_string="#{prot[:protein_name]};"
85
+ indistinct.each { |pr| indist_string<<"#{pr};"}
86
+ indist_string.chop!
87
+ protein_row.push(indist_string)
88
+
89
+ protein_row.push(prot[:peptides].length)
90
+
91
+ peptide_string=""
92
+ peptide_sequences_from_protein(prot).each {|pep| peptide_string<<"#{pep};" }
93
+ peptide_string.chop!
94
+
95
+ protein_row.push(peptide_string)
96
+ protein_row
97
+ end
98
+
99
+ # Convert the entire prot.xml document to row format
100
+ # Returns an array of arrays. Each of the sub-arrays is a row.
101
+ # Each row should contain a simple summary of the protein.
102
+ # A separate row should be provided for every protein (including indistinguishable ones)
103
+ # The first row will be the header
104
+ #
105
+ # Proteins with probabilities below a threshold are excluded
106
+ #
107
+ def as_rows(threshold_probability)
108
+
109
+ rows=[]
110
+ rows.push(["Accession","Probability","Indistinguishable Proteins","Num Peptides","Peptides"])
111
+
112
+ proteins=[]
113
+ @groups.each do |grp|
114
+ grp[:proteins].each {|prot|
115
+ if ( prot[:probability].to_f >= threshold_probability)
116
+ proteins.push(prot)
117
+ end
118
+ }
119
+ end
120
+
121
+ proteins.each do |prot|
122
+ protein_row=protein_to_row(prot)
123
+ rows.push(protein_row)
124
+
125
+ indistinguishables=prot[:indistinguishable_prots]
126
+ indistinguishables.each do |indist|
127
+ indist_row=protein_row.clone
128
+ indist_row[0]=indist
129
+ rows.push(indist_row)
130
+ end
131
+
132
+ end
133
+
134
+ rows
135
+ end
136
+
137
+ end
@@ -0,0 +1,7 @@
1
+ require 'protk/protk'
2
+
3
+ class Randomize
4
+ def self.make_decoys input_path, db_len, output_path, prefix
5
+ Protk.make_decoys input_path.to_s, db_len.to_i, output_path.to_s, prefix.to_s
6
+ end
7
+ end
@@ -0,0 +1,182 @@
1
+ #
2
+ # This file is part of protk
3
+ # Created by Ira Cooke 15/12/2010
4
+ #
5
+ # Provides common functionality used by all msms search tools.
6
+ #
7
+ # It allows;
8
+ # 1. Specification of the search database using a simple name ... this class provides the necessary search for the actual file
9
+ # 2. Output files to be specified via a prefix or suffix to be added to the name of the corresponding input file
10
+ #
11
+
12
+ require 'optparse'
13
+ require 'pathname'
14
+ require 'protk/tool'
15
+
16
+ class SearchTool < Tool
17
+
18
+ # Initializes commandline options common to all search tools.
19
+ # Individual search tools can add their own options, but should use Capital letters to avoid conflicts
20
+ #
21
+ def initialize(option_support={})
22
+ super(option_support)
23
+
24
+ if (option_support[:database]==true)
25
+
26
+ @options.database = "sphuman"
27
+ @option_parser.on( '-d', '--database dbname', 'Specify the database to use for this search. Default=sphuman' ) do |dbname|
28
+ options.database = dbname
29
+ end
30
+
31
+ end
32
+
33
+ if ( option_support[:msms_search_detailed_options]==true)
34
+ @options.allowed_charges="1+,2+,3+"
35
+ @option_parser.on( '--allowed-charges ac', 'Allowed precursor ion charges. Default=1+,2+,3+' ) do |ac|
36
+ @options.allowed_charges = ac
37
+ end
38
+
39
+ @options.enzyme = "Trypsin"
40
+ @option_parser.on('--enzyme enz', 'Enzyme') do |enz|
41
+ @options.enzyme=enz
42
+ end
43
+
44
+ @options.instrument = "ESI-QUAD-TOF"
45
+ @option_parser.on('--instrument instrument', 'Instrument') do |instrument|
46
+ @options.instrument=instrument
47
+ end
48
+
49
+
50
+ @options.var_mods = ""
51
+ @option_parser.on('--var-mods vm', 'Variable modifications (Overrides -g)' ) do |vm|
52
+ @options.var_mods = vm
53
+ end
54
+
55
+ @options.fix_mods = ""
56
+ @option_parser.on('--fix-mods fm', 'Fixed modifications (Overrides -c and -m options)' ) do |fm|
57
+ @options.fix_mods = fm
58
+ end
59
+
60
+ @options.searched_ions = ""
61
+ @option_parser.on('--searched-ions si', 'Ion series to search (default=b,y)' ) do |si|
62
+ @options.searched_ions = si
63
+ end
64
+
65
+
66
+ @options.fragment_tolu="Da"
67
+ @option_parser.on('--fragment-ion-tol-units tolu', 'Fragment ion mass tolerance units (Da or mmu). Default=Da' ) do |tolu|
68
+ @options.fragment_tolu = tolu
69
+ end
70
+
71
+ @options.precursor_tolu="ppm"
72
+ @option_parser.on('--precursor-ion-tol-units tolu', 'Precursor ion mass tolerance units (ppm or Da). Default=ppm' ) do |tolu|
73
+ @options.precursor_tolu = tolu
74
+ end
75
+
76
+ @options.email=""
77
+ @option_parser.on('--email em', 'User email.') do |em|
78
+ @options.email = em
79
+ end
80
+
81
+ @options.username=""
82
+ @option_parser.on('--username un', 'Username.') do |un|
83
+ @options.username = un
84
+ end
85
+
86
+ @options.mascot_server="www.matrixscience.com"
87
+ @option_parser.on( '-S', '--server url', 'The url to the cgi directory of the mascot server' ) do |url|
88
+ @options.mascot_server=url
89
+ end
90
+
91
+ @options.mascot_server=""
92
+ @option_parser.on('--username un', 'Username.') do |un|
93
+ @options.username = un
94
+ end
95
+
96
+ @options.num_peaks_for_multi_isotope_search="0"
97
+ @option_parser.on("--num-peaks-for-multi-isotope-search np","Number of peaks to include in multi-isotope search") do |np|
98
+ @options.num_peaks_for_multi_isotope_search=np
99
+ end
100
+
101
+
102
+ end
103
+
104
+ if ( option_support[:msms_search]==true)
105
+ @options.fragment_tol=0.65
106
+ @option_parser.on( '-f', '--fragment-ion-tol tol', 'Fragment ion mass tolerance (unit dependent). Default=0.65' ) do |tol|
107
+ @options.fragment_tol = tol
108
+ end
109
+
110
+ @options.precursor_tol=200
111
+ @option_parser.on( '-p', '--precursor-ion-tol tol', 'Precursor ion mass tolerance in (ppm if precursor search type is monoisotopic or Da if it is average). Default=200' ) do |tol|
112
+ @options.precursor_tol = tol.to_f
113
+ end
114
+
115
+ @options.respect_precursor_charges=false
116
+ @option_parser.on( '-q', '--respect-charges','Dont respect charges in the input file. Instead impute them by trying various options') do
117
+ @options.respect_precursor_charges=true
118
+ end
119
+
120
+ @options.precursor_search_type="monoisotopic"
121
+ @option_parser.on( '-a', '--search-type type', 'Use monoisotopic or average precursor masses. (monoisotopic or average)' ) do |type|
122
+ @options.precursor_search_type = type
123
+ end
124
+
125
+ @options.strict_monoisotopic_mass=false
126
+ @option_parser.on( '-s', '--strict-monoisotopic-mass', 'Dont allow for misassignment of monoisotopic mass to another isotopic peak') do
127
+ @options.strict_monoisotopic_mass=true
128
+ end
129
+
130
+ @options.missed_cleavages=2
131
+ @option_parser.on( '-v', '--num-missed-cleavages num', 'Number of missed cleavages allowed' ) do |num|
132
+ @options.missed_cleavages = num
133
+ end
134
+
135
+ @options.carbamidomethyl=true
136
+ @option_parser.on( '-c', '--no-carbamidomethyl', 'Run a search without a carbamidomethyl fixed modification' ) do
137
+ @options.carbamidomethyl = false
138
+ end
139
+
140
+ @options.methionine_oxidation=false
141
+ @option_parser.on( '-m', '--methionine-oxidation', 'Run a search with oxidised methionines as a variable modification' ) do
142
+ @options.methionine_oxidation = true
143
+ end
144
+
145
+ end
146
+
147
+ if ( option_support[:glyco]==true)
148
+
149
+ @options.glyco = false
150
+ @option_parser.on( '-g', '--glyco', 'Expect N-Glycosylation modifications as variable mod in a search or as a parameter when building statistical models' ) do
151
+ @options.glyco = true
152
+ end
153
+
154
+ end
155
+
156
+ if ( option_support[:maldi]==true)
157
+ @options.maldi=false
158
+ @option_parser.on( '-l', '--maldi', 'Run a search on MALDI data') do
159
+ @options.maldi=true
160
+ end
161
+ end
162
+
163
+ end
164
+
165
+
166
+ def jobid_from_filename(filename)
167
+ jobid="protk"
168
+ jobnum_match=filename.match(/(.{1,10})\.d/)
169
+ if (jobnum_match!=nil)
170
+ jobid="#{self.jobid_prefix}#{jobnum_match[1]}"
171
+ end
172
+ return jobid
173
+ end
174
+
175
+ # Based on the database setting and global database path, find the most current version of the required database
176
+ # This function returns the name of the database with an extension appropriate to the database type
177
+ #
178
+ def current_database(db_type,db=@options.database)
179
+ return Constants.new.current_database_for_name(db)
180
+ end
181
+
182
+ end
@@ -0,0 +1,245 @@
1
+
2
+ require 'protk/constants.rb'
3
+ require 'rbconfig'
4
+
5
+ env=Constants.new
6
+
7
+ @build_dir = "#{env.protk_dir}/tmp/build"
8
+ @download_dir = "#{env.protk_dir}/tmp/download"
9
+
10
+ directory @build_dir
11
+ directory @download_dir
12
+
13
+ def package_manager_name
14
+ if RbConfig::CONFIG['host_os'] =~ /darwin/
15
+ return 'brew'
16
+ end
17
+ 'apt-get'
18
+ end
19
+
20
+ def clean_build_dir
21
+ sh %{cd #{@build_dir}; rm -rf ./*}
22
+ end
23
+
24
+ def download_buildfile url, file
25
+ sh %{cd #{@download_dir}; wget #{url}}
26
+ end
27
+
28
+ def download_task url, packagefile
29
+ file "#{@download_dir}/#{packagefile}" => @download_dir do
30
+ download_buildfile "#{url}", "#{packagefile}"
31
+ end
32
+ end
33
+
34
+ #
35
+ # Package manager
36
+ #
37
+ task :package_manager do
38
+ pmname = package_manager_name
39
+ needs_homebrew=false
40
+ sh "which #{pmname}" do |ok,res|
41
+ unless ok
42
+ throw "Missing package manager #{pmname}" unless pmname=='brew'
43
+ needs_homebrew=true
44
+ end
45
+ end
46
+
47
+ if needs_homebrew
48
+ sh { "ruby -e \"$(curl -fsSkL raw.github.com/mxcl/homebrew/go)" }
49
+ sh { "brew update"}
50
+ sh { "brew tap homebrew/versions"}
51
+ end
52
+
53
+ end
54
+
55
+ #
56
+ # System packages
57
+ #
58
+ task :system_packages => :package_manager do
59
+ # Gather package requirements
60
+ pkgs=YAML::load(File.open("#{File.dirname(__FILE__)}/data/#{package_manager_name}_packages.yaml"))
61
+
62
+ unique_packages=[]
63
+ apps=[]
64
+ installed_packages=[]
65
+ for pk in pkgs
66
+ unique_packages = pk[1] | unique_packages
67
+ apps = apps.push pk[0]
68
+ end
69
+
70
+ # Install all packages
71
+ #
72
+ unique_packages.each { |pk|
73
+ sh "#{package_manager_name} install #{pk}" do |ok,res|
74
+ p res
75
+ installed_packages.push pk if ok
76
+ end
77
+ }
78
+
79
+ end
80
+
81
+ #
82
+ # Perl local::lib
83
+ #
84
+ perl_locallib_version="1.008004"
85
+ perl_locallib_packagefile="local-lib-#{perl_locallib_version}.tar.gz"
86
+ perl_locallib_installed_file = "#{env.protk_dir}/perl5/lib/perl5/local/lib.pm"
87
+ perl_locallib_url = "http://search.cpan.org/CPAN/authors/id/A/AP/APEIRON/local-lib-#{perl_locallib_version}.tar.gz"
88
+
89
+ download_task perl_locallib_url, perl_locallib_packagefile
90
+
91
+ file perl_locallib_installed_file => [@build_dir,"#{@download_dir}/#{perl_locallib_packagefile}"] do
92
+ sh %{cp #{@download_dir}/#{perl_locallib_packagefile} #{@build_dir}}
93
+ perl_dir = "#{env.protk_dir}/perl5"
94
+
95
+ sh %{cd #{@build_dir}; gunzip local-lib-#{perl_locallib_version}.tar.gz }
96
+ sh %{cd #{@build_dir}; tar -xf local-lib-#{perl_locallib_version}.tar }
97
+ sh "cd #{@build_dir}/local-lib-#{perl_locallib_version}; perl Makefile.PL --bootstrap=#{perl_dir}; make install" do |ok,res|
98
+ # clean_build_dir if ok
99
+ end
100
+
101
+ if !Pathname.new("~/.bashrc").exist? || File.read("~/.bashrc") =~ /Mlocal::lib/
102
+ sh "echo 'eval $(perl -I#{perl_dir}/lib/perl5 -Mlocal::lib=#{perl_dir})' >>~/.bashrc"
103
+ end
104
+
105
+ if !Pathname.new("~/.bash_profile").exist? || File.read("~/.bash_profile") =~ /Mlocal::lib/
106
+ sh "echo 'eval $(perl -I#{perl_dir}/lib/perl5 -Mlocal::lib=#{perl_dir})' >>~/.bash_profile"
107
+ end
108
+ end
109
+
110
+ task :perl_locallib => [perl_locallib_installed_file]
111
+
112
+
113
+ #
114
+ # Top Level Packages.
115
+ #
116
+
117
+
118
+ #
119
+ # TPP
120
+ #
121
+ tpp_version="4.6.1"
122
+ tpp_packagefile="TPP-#{tpp_version}.tgz"
123
+ tpp_installed_file = "#{env.xinteract}"
124
+ tpp_url = "https://dl.dropbox.com/u/226794/TPP-4.6.1.tgz"
125
+
126
+ download_task tpp_url, tpp_packagefile
127
+
128
+ # Build
129
+ file tpp_installed_file => [:perl_locallib,@build_dir,"#{@download_dir}/#{tpp_packagefile}"] do
130
+ sh %{cp #{@download_dir}/#{tpp_packagefile} #{@build_dir}}
131
+ sh %{cpanm --local-lib=#{env.protk_dir}/perl5 XML::Parser}
132
+ sh %{cpanm --local-lib=#{env.protk_dir}/perl5 XML::CGI --force}
133
+
134
+ sh %{cd #{@build_dir};tar -xvzf TPP-#{tpp_version}.tgz}
135
+
136
+ File.open("#{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src/Makefile.config.incl","wb") do |f|
137
+ f.write "TPP_ROOT=#{env.tpp_root}/\nTPP_WEB=/tpp/\nXSLT_PROC=/usr/bin/xsltproc\nCGI_USERS_DIR=${TPP_ROOT}cgi-bin/"
138
+ end
139
+
140
+ makefile_incl_path="#{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src/Makefile.incl"
141
+ makefile_incl_text=File.read("#{makefile_incl_path}")
142
+
143
+ # Homebrew specific modifications to makefiles
144
+ #
145
+ if ( package_manager_name=='brew')
146
+ File.open("#{makefile_incl_path}","w+") do |f|
147
+ subs_text = makefile_incl_text.gsub(/GD_LIB= \/opt\/local\/lib\/libgd.a \/opt\/local\/lib\/libpng.a/,"GD_LIB= /usr/local/lib/libgd.a /usr/local/opt/libpng12/lib/libpng.a") #We're using homebrew not fink or macports
148
+ subs_text = subs_text.gsub(/GD_INCL= -I \/opt\/local\/include\//,"GD_INCL= -I /usr/local/include/ -I /usr/local/opt/libpng12/include")
149
+ f.write subs_text
150
+ end
151
+
152
+ makefile_path="#{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/CGI/Makefile"
153
+ makefile_text = File.read("#{makefile_path}")
154
+
155
+ File.open("#{makefile_path}","w+") do |f|
156
+ subs_text = makefile_text.gsub("cp -rfu","cp -rf")
157
+ f.write subs_text
158
+ end
159
+ end
160
+ sh %{cd #{@build_dir}/TPP-#{tpp_version}/trans_proteomic_pipeline/src ; make; make install}
161
+
162
+ end
163
+
164
+ task :tpp => tpp_installed_file
165
+
166
+
167
+
168
+ #
169
+ # omssa
170
+ #
171
+ def omssa_platform
172
+ if RbConfig::CONFIG['host_os'] =~ /darwin/
173
+ return 'macos'
174
+ end
175
+ 'linux'
176
+ end
177
+
178
+ omssa_packagefile="omssa-#{omssa_platform}.tar.gz"
179
+ omssa_installed_file = "#{env.omssacl}"
180
+ omssa_url = "ftp://ftp.ncbi.nih.gov/pub/lewisg/omssa/CURRENT/omssa-#{omssa_platform}.tar.gz"
181
+
182
+ download_task omssa_url, omssa_packagefile
183
+
184
+ # Install
185
+ file omssa_installed_file => [@build_dir,"#{@download_dir}/omssa-#{omssa_platform}.tar.gz"] do
186
+ sh %{cp #{@download_dir}/#{omssa_packagefile} #{@build_dir}}
187
+ sh %{cd #{@build_dir}; gunzip omssa-#{omssa_platform}.tar.gz}
188
+ sh %{cd #{@build_dir}; tar -xvf omssa-#{omssa_platform}.tar}
189
+ sh %{mkdir -p #{env.omssa_root}}
190
+ sh %{cd #{@build_dir}; cp -r omssa-*.#{omssa_platform}/* #{env.omssa_root}/}
191
+ end
192
+
193
+ task :omssa => omssa_installed_file
194
+
195
+
196
+
197
+ #
198
+ # blast
199
+ #
200
+ def blast_platform
201
+ if RbConfig::CONFIG['host_os'] =~ /darwin/
202
+ return 'universal-macosx'
203
+ end
204
+ 'x64-linux'
205
+ end
206
+
207
+ blast_version="2.2.27+"
208
+ blast_packagefile="ncbi-blast-#{blast_version}-#{blast_platform}.tar.gz"
209
+ blast_url="ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/#{blast_version.chomp('+')}/#{blast_packagefile}"
210
+ blast_installed_file="#{env.makeblastdb}"
211
+
212
+ download_task blast_url, blast_packagefile
213
+
214
+ # Install
215
+ file blast_installed_file => [@build_dir,"#{@download_dir}/#{blast_packagefile}"] do
216
+ sh %{cp #{@download_dir}/#{blast_packagefile} #{@build_dir}}
217
+ sh %{cd #{@build_dir}; gunzip #{blast_packagefile}}
218
+ sh %{cd #{@build_dir}; tar -xvf #{blast_packagefile.chomp('.gz')}}
219
+ sh %{mkdir -p #{env.blast_root}}
220
+ sh %{cd #{@build_dir}; cp -r ncbi-blast-#{blast_version}/* #{env.blast_root}/}
221
+ end
222
+
223
+ task :blast => blast_installed_file
224
+
225
+
226
+ #
227
+ # MSGFPlus
228
+ #
229
+ msgfplus_version="20120823"
230
+ msgfplus_packagefile="MSGFPlus.20120823.zip"
231
+ msgfplus_url="http://proteomics.ucsd.edu/Downloads/MSGFPlus.20120823.zip"
232
+ msgfplus_installed_file="#{env.msgfplusjar}"
233
+
234
+ download_task msgfplus_url, msgfplus_packagefile
235
+
236
+ file msgfplus_installed_file => [@build_dir,"#{@download_dir}/#{msgfplus_packagefile}"] do
237
+ sh %{cp #{@download_dir}/#{msgfplus_packagefile} #{@build_dir}}
238
+ sh %{cd #{@build_dir}; unzip #{msgfplus_packagefile}}
239
+ sh %{mkdir -p #{env.msgfplus_root}}
240
+ sh %{cd #{@build_dir}; cp MSGFPlus.jar #{env.msgfplus_root}/}
241
+ end
242
+
243
+ task :msgfplus => msgfplus_installed_file
244
+
245
+ task :all => [:tpp,:omssa,:blast]
@@ -0,0 +1,19 @@
1
+ require 'optparse'
2
+ require 'pathname'
3
+ require 'protk/tool'
4
+
5
+ require 'rake'
6
+ require 'pp'
7
+
8
+ include Rake::DSL
9
+
10
+ Rake.application.init 'protk_setup'
11
+ Rake.application.rake_require 'protk/setup_rakefile'
12
+
13
+ class SetupTool < Tool
14
+
15
+ def install toolname
16
+ Rake.application.invoke_task toolname
17
+ end
18
+
19
+ end
@@ -0,0 +1,78 @@
1
+ # Add a method to the Spreadsheet::Worksheet class to insert a column
2
+ class Spreadsheet::Worksheet < Object
3
+ def insert_column(col,index)
4
+ # First check to see if the length of the column equals the number of rows
5
+ if ( col.length!=self.rows.length && self.rows.length!=0)
6
+ raise "The length of column #{col.length} does not equal the number of rows #{self.rows.length}"
7
+ end
8
+ if ( col.class!=Array || index.class!=Fixnum)
9
+ raise "Wrong arguments. Requires a column array and an integer index"
10
+ end
11
+
12
+ # Check for special case where there are no rows yet and if so then insert as new rows
13
+ if ( self.rows.length==0)
14
+ col.each_index { |i|
15
+ self.insert_row(i,[col[i]])
16
+ }
17
+ else
18
+ # Insert the column row by row. Probably inefficient but it works
19
+ rowi=0
20
+ self.each {|row|
21
+ row.insert(index,col[rowi])
22
+ rowi+=1
23
+ }
24
+ end
25
+ end
26
+ end
27
+
28
+ class Spreadsheet::Workbook < Object
29
+
30
+
31
+ # creates an output excel file (returning the workbook object), transcribing all original content up to the given number of rows
32
+ # Throws an error if the input contains more than 1 worksheet
33
+ #
34
+ def copyBook(numrows=0)
35
+
36
+ if ( !numrows )
37
+ numrows=0
38
+ end
39
+
40
+ # Create a new workbook from scratch for writing
41
+ outputBook = Spreadsheet::Workbook.new
42
+ outputSheet = outputBook.create_worksheet
43
+
44
+ # There should only be one worksheet in the input workbook
45
+ worksheets=self.worksheets
46
+ if ( self.worksheets.length != 1 )
47
+ puts "More than one worksheet in this excel file. This script only operates on single worksheets"
48
+ end
49
+
50
+ # Get the worksheet
51
+ inputSheet=self.worksheet 0
52
+
53
+ # Figure out how many rows to convert if not specified
54
+ if ( numrows==0 || numrows > (inputSheet.row_count+1))
55
+ numrows=inputSheet.row_count
56
+ end
57
+
58
+
59
+ # Transcribe everything from the old worksheet to the new one
60
+ puts "Creating new spreadsheet with #{numrows} rows"
61
+ (0...[numrows,inputSheet.row_count].min).each { |r|
62
+
63
+ outputSheet.insert_row(r,inputSheet.row(r))
64
+
65
+ newRow=outputSheet.row(r)
66
+
67
+ # After inserting the row make sure it doesn't contain any nil values
68
+ newRow.each_index { |ci|
69
+ if ( newRow[ci]==nil)
70
+ newRow[ci]=""
71
+ end
72
+ }
73
+ }
74
+ outputBook
75
+ end
76
+
77
+
78
+ end