protk 1.1.0.pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/README.md +85 -0
  2. data/bin/annotate_ids.rb +59 -0
  3. data/bin/big_search.rb +41 -0
  4. data/bin/correct_omssa_retention_times.rb +27 -0
  5. data/bin/feature_finder.rb +76 -0
  6. data/bin/file_convert.rb +157 -0
  7. data/bin/generate_omssa_loc.rb +42 -0
  8. data/bin/interprophet.rb +91 -0
  9. data/bin/make_decoy.rb +64 -0
  10. data/bin/manage_db.rb +123 -0
  11. data/bin/mascot_search.rb +187 -0
  12. data/bin/mascot_to_pepxml.rb +44 -0
  13. data/bin/msgfplus_search.rb +191 -0
  14. data/bin/omssa_search.rb +205 -0
  15. data/bin/peptide_prophet.rb +245 -0
  16. data/bin/pepxml_to_table.rb +78 -0
  17. data/bin/protein_prophet.rb +140 -0
  18. data/bin/protk_setup.rb +31 -0
  19. data/bin/repair_run_summary.rb +113 -0
  20. data/bin/tandem_search.rb +292 -0
  21. data/bin/template_search.rb +144 -0
  22. data/bin/unimod_to_loc.rb +118 -0
  23. data/bin/xls_to_table.rb +46 -0
  24. data/ext/protk/extconf.rb +3 -0
  25. data/ext/protk/protk.c +235 -0
  26. data/lib/protk/big_search_rakefile.rake +16 -0
  27. data/lib/protk/big_search_tool.rb +23 -0
  28. data/lib/protk/bio_sptr_extensions.rb +210 -0
  29. data/lib/protk/biotools_excel_converter.rb +60 -0
  30. data/lib/protk/command_runner.rb +84 -0
  31. data/lib/protk/constants.rb +296 -0
  32. data/lib/protk/data/FeatureFinderCentroided.ini +63 -0
  33. data/lib/protk/data/apt-get_packages.yaml +47 -0
  34. data/lib/protk/data/brew_packages.yaml +10 -0
  35. data/lib/protk/data/default_config.yml +20 -0
  36. data/lib/protk/data/predefined_db.crap.yaml +19 -0
  37. data/lib/protk/data/predefined_db.sphuman.yaml +25 -0
  38. data/lib/protk/data/predefined_db.swissprot_annotation.yaml +20 -0
  39. data/lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml +20 -0
  40. data/lib/protk/data/tandem_params.xml +56 -0
  41. data/lib/protk/data/taxonomy_template.xml +9 -0
  42. data/lib/protk/data/unimod.xml +16780 -0
  43. data/lib/protk/eupathdb_gene_information_table.rb +158 -0
  44. data/lib/protk/galaxy_stager.rb +24 -0
  45. data/lib/protk/galaxy_util.rb +9 -0
  46. data/lib/protk/manage_db_rakefile.rake +484 -0
  47. data/lib/protk/manage_db_tool.rb +181 -0
  48. data/lib/protk/mascot_util.rb +63 -0
  49. data/lib/protk/omssa_util.rb +57 -0
  50. data/lib/protk/plasmodb.rb +50 -0
  51. data/lib/protk/prophet_tool.rb +85 -0
  52. data/lib/protk/protein_annotator.rb +646 -0
  53. data/lib/protk/protxml.rb +137 -0
  54. data/lib/protk/randomize.rb +7 -0
  55. data/lib/protk/search_tool.rb +182 -0
  56. data/lib/protk/setup_rakefile.rake +245 -0
  57. data/lib/protk/setup_tool.rb +19 -0
  58. data/lib/protk/spreadsheet_extensions.rb +78 -0
  59. data/lib/protk/swissprot_database.rb +38 -0
  60. data/lib/protk/tool.rb +182 -0
  61. data/lib/protk/xtandem_defaults.rb +11 -0
  62. data/lib/protk.rb +18 -0
  63. metadata +256 -0
@@ -0,0 +1,38 @@
1
+ require 'rubygems'
2
+ require 'bio'
3
+ require 'protk/constants'
4
+
5
+ # Provides fast indexed access to a swissprot database in a flat .dat file
6
+ #
7
+ class SwissprotDatabase
8
+
9
+ def initialize(env=nil,database="swissprot")
10
+ if ( env!=nil)
11
+ @genv=env
12
+ else
13
+ @genv=Constants.new
14
+ end
15
+
16
+ if ( database=="swissprot")
17
+ @db_object=Bio::FlatFileIndex.new("#{@genv.protein_database_root}/#{@genv.uniprot_sprot_annotation_database}")
18
+ else
19
+ @db_object=Bio::FlatFileIndex.new("#{@genv.protein_database_root}#{@genv.uniprot_trembl_annotation_database}")
20
+ end
21
+
22
+ @db_object.always_check_consistency=false
23
+ end
24
+
25
+
26
+ def get_entry_for_name(name)
27
+ result=@db_object.get_by_id(name)
28
+ if result==""
29
+ if ( @genv!=nil)
30
+ @genv.log("Failed to find UniProt entry for protein named #{name} in database",:warn)
31
+ end
32
+ return nil
33
+ else
34
+ Bio::SPTR.new(result)
35
+ end
36
+ end
37
+
38
+ end
data/lib/protk/tool.rb ADDED
@@ -0,0 +1,182 @@
1
+ #
2
+ # This file is part of protk
3
+ # Created by Ira Cooke 16/12/2010
4
+ #
5
+ # Provides common functionality used by all tools.
6
+ #
7
+
8
+ require 'ostruct'
9
+ require 'optparse'
10
+ require 'pathname'
11
+ require 'protk/command_runner'
12
+
13
+ class Tool
14
+
15
+ # Options set from the command-line
16
+ #
17
+ attr :options, false
18
+
19
+ # The option parser used to parse command-line options.
20
+ #
21
+ attr :option_parser, false
22
+
23
+ # Prefix for background jobs
24
+ # x = X!Tandem, o=OMSSA, p="Phenyx", m="Mascot"
25
+ # Can't use attr_accessor here because we want this available to subclasses
26
+ #
27
+ def jobid_prefix
28
+ @jobid_prefix
29
+ end
30
+
31
+ def jobid_prefix=(p)
32
+ @jobid_prefix=p
33
+ end
34
+
35
+ # Provides direct access to options through methods of the same name
36
+ #
37
+ def method_missing(meth, *args, &block)
38
+ if ( args.length==0 && block==nil)
39
+ @options.send meth
40
+ else
41
+ super
42
+ end
43
+ end
44
+
45
+
46
+
47
+
48
+
49
+ # Creates an empty options object to hold commandline options
50
+ # Also creates an option_parser with default options common to all tools
51
+ #
52
+ def initialize(option_support={:help=>true})
53
+ @jobid_prefix = "x"
54
+ @options = OpenStruct.new
55
+ options.library = []
56
+ options.inplace = false
57
+ options.encoding = "utf8"
58
+ options.transfer_type = :auto
59
+ options.verbose = false
60
+
61
+ @option_parser=OptionParser.new do |opts|
62
+
63
+ if ( option_support[:prefix_suffix]==true)
64
+
65
+ @options.output_prefix = ""
66
+ opts.on( '-b', '--output-prefix pref', 'A string to prepend to the name of output files' ) do |prefix|
67
+ @options.output_prefix = prefix
68
+ end
69
+
70
+ @options.output_suffix = ""
71
+ opts.on( '-e', '--output-suffix suff', 'A string to append to the name of output files' ) do |suffix|
72
+ @options.output_suffix = suffix
73
+ end
74
+
75
+ end
76
+
77
+ if ( option_support[:explicit_output]==true)
78
+ @options.explicit_output = nil
79
+ opts.on( '-o', '--output out', 'An explicitly named output file.' ) do |out|
80
+ @options.explicit_output = out
81
+ end
82
+ end
83
+
84
+ if ( option_support[:over_write]==true)
85
+
86
+ @options.over_write=false
87
+ opts.on( '-r', '--replace-output', 'Dont skip analyses for which the output file already exists' ) do
88
+ @options.over_write = true
89
+ end
90
+
91
+ end
92
+
93
+ if ( option_support[:background]==true)
94
+
95
+ @options.background = false
96
+ opts.on( '-z', '--background', 'Run jobs in the background using pbs' ) do
97
+ @options.background = true
98
+ end
99
+
100
+ end
101
+
102
+ if ( option_support[:help]==true)
103
+
104
+ opts.on( '-h', '--help', 'Display this screen' ) do
105
+ puts opts
106
+ exit
107
+ end
108
+ end
109
+
110
+ end
111
+
112
+ end
113
+
114
+
115
+
116
+
117
+
118
+ # Create and return a full base path (without extension) representing the output file for this analysis
119
+ # Optionally provide the extension to be removed (if not provided it will be inferred)
120
+ #
121
+ def output_base_path(output_file,ext=nil)
122
+
123
+ output_path=Pathname.new(output_file)
124
+ throw "Error: Output directory #{output_path.dirname} does not exist" unless output_path.dirname.exist?
125
+ dir=output_path.dirname.realpath.to_s
126
+ basename=output_path.basename.to_s
127
+ if ( ext==nil)
128
+ ext=output_path.extname
129
+ end
130
+ base_name=basename.gsub(/#{ext}$/,"")
131
+
132
+ "#{dir}/#{@options.output_prefix}#{base_name}#{@options.output_suffix}"
133
+ end
134
+
135
+
136
+
137
+
138
+ # Create a full base path (without extension) representing the input file for this analysis
139
+ # Optionally provide the extension to be removed (if not provided it will be inferred)
140
+ #
141
+ def input_base_path(input_file,ext=nil)
142
+ input_path=Pathname.new(input_file)
143
+ throw "Error: Input directory #{input_path.dirname} does not exist" unless input_path.dirname.exist?
144
+ dir=input_path.dirname.realpath.to_s
145
+ if ( ext==nil)
146
+ ext=input_path.extname
147
+ end
148
+ base_name=input_path.basename.to_s.gsub(/#{ext}$/,"")
149
+ "#{dir}/#{base_name}"
150
+ end
151
+
152
+
153
+
154
+ # Run the search tool using the given command string and global environment
155
+ #
156
+ def run(cmd,genv,job_params=nil,jobscript_path=nil,autodelete=true)
157
+ if ( @options.background )
158
+ throw "Error: Background option was selected but this host does not support background jobs" unless genv.has_pbs
159
+ # Send this job off to be run in a batch queuer
160
+
161
+ cmd_runner=CommandRunner.new(genv)
162
+
163
+
164
+
165
+ cmd_runner.run_batch(cmd,job_params,jobscript_path,autodelete)
166
+
167
+ else
168
+ cmd_runner=CommandRunner.new(genv)
169
+ cmd_runner.run_local(cmd)
170
+ end
171
+ end
172
+
173
+ def jobid_from_filename(filename)
174
+ jobid="protk"
175
+ jobnum_match=filename.match(/(.{1,10})\.d/)
176
+ if (jobnum_match!=nil)
177
+ jobid="#{self.jobid_prefix}#{jobnum_match[1]}"
178
+ end
179
+ return jobid
180
+ end
181
+
182
+ end
@@ -0,0 +1,11 @@
1
+ require 'libxml'
2
+ include LibXML
3
+
4
+ class XTandemDefaults
5
+ attr :path
6
+ attr :taxonomy_path
7
+ def initialize
8
+ @path="#{File.dirname(__FILE__)}/data/tandem_params.xml"
9
+ @taxonomy_path="#{File.dirname(__FILE__)}/data/taxonomy_template.xml"
10
+ end
11
+ end
data/lib/protk.rb ADDED
@@ -0,0 +1,18 @@
1
+ require 'protk/tool.rb'
2
+ require 'protk/swissprot_database.rb'
3
+ require 'protk/spreadsheet_extensions.rb'
4
+ require 'protk/search_tool.rb'
5
+ require 'protk/protxml.rb'
6
+ require 'protk/protein_annotator.rb'
7
+ require 'protk/prophet_tool.rb'
8
+ require 'protk/plasmodb.rb'
9
+ require 'protk/omssa_util.rb'
10
+ require 'protk/mascot_util.rb'
11
+ require 'protk/manage_db_tool.rb'
12
+ require 'protk/galaxy_util.rb'
13
+ require 'protk/galaxy_stager.rb'
14
+ require 'protk/eupathdb_gene_information_table.rb'
15
+ require 'protk/constants.rb'
16
+ require 'protk/command_runner.rb'
17
+ require 'protk/biotools_excel_converter.rb'
18
+ require 'protk/bio_sptr_extensions.rb'
metadata ADDED
@@ -0,0 +1,256 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: protk
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.1.0.pre
5
+ prerelease: 6
6
+ platform: ruby
7
+ authors:
8
+ - Ira Cooke
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-19 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: ftools
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 0.0.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 0.0.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: open4
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: 1.3.0
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: 1.3.0
46
+ - !ruby/object:Gem::Dependency
47
+ name: bio
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: 1.4.3
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: 1.4.3
62
+ - !ruby/object:Gem::Dependency
63
+ name: rest-client
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: 1.6.7
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: 1.6.7
78
+ - !ruby/object:Gem::Dependency
79
+ name: net-ftp-list
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: 3.2.5
86
+ type: :runtime
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: 3.2.5
94
+ - !ruby/object:Gem::Dependency
95
+ name: spreadsheet
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: 0.7.4
102
+ type: :runtime
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: 0.7.4
110
+ - !ruby/object:Gem::Dependency
111
+ name: libxml-ruby
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: 2.3.3
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: 2.3.3
126
+ - !ruby/object:Gem::Dependency
127
+ name: rspec
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ~>
132
+ - !ruby/object:Gem::Version
133
+ version: '2.5'
134
+ type: :development
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ~>
140
+ - !ruby/object:Gem::Version
141
+ version: '2.5'
142
+ description: A bunch of tools for proteomics
143
+ email: iracooke@gmail.com
144
+ executables:
145
+ - protk_setup.rb
146
+ - manage_db.rb
147
+ - tandem_search.rb
148
+ - mascot_search.rb
149
+ - omssa_search.rb
150
+ - msgfplus_search.rb
151
+ - big_search.rb
152
+ - mascot_to_pepxml.rb
153
+ - file_convert.rb
154
+ - make_decoy.rb
155
+ - correct_omssa_retention_times.rb
156
+ - repair_run_summary.rb
157
+ - peptide_prophet.rb
158
+ - interprophet.rb
159
+ - protein_prophet.rb
160
+ - pepxml_to_table.rb
161
+ - xls_to_table.rb
162
+ - annotate_ids.rb
163
+ - unimod_to_loc.rb
164
+ - generate_omssa_loc.rb
165
+ extensions:
166
+ - ext/protk/extconf.rb
167
+ extra_rdoc_files: []
168
+ files:
169
+ - lib/protk/big_search_tool.rb
170
+ - lib/protk/bio_sptr_extensions.rb
171
+ - lib/protk/biotools_excel_converter.rb
172
+ - lib/protk/command_runner.rb
173
+ - lib/protk/constants.rb
174
+ - lib/protk/eupathdb_gene_information_table.rb
175
+ - lib/protk/galaxy_stager.rb
176
+ - lib/protk/galaxy_util.rb
177
+ - lib/protk/manage_db_tool.rb
178
+ - lib/protk/mascot_util.rb
179
+ - lib/protk/omssa_util.rb
180
+ - lib/protk/plasmodb.rb
181
+ - lib/protk/prophet_tool.rb
182
+ - lib/protk/protein_annotator.rb
183
+ - lib/protk/protxml.rb
184
+ - lib/protk/randomize.rb
185
+ - lib/protk/search_tool.rb
186
+ - lib/protk/setup_tool.rb
187
+ - lib/protk/spreadsheet_extensions.rb
188
+ - lib/protk/swissprot_database.rb
189
+ - lib/protk/tool.rb
190
+ - lib/protk/xtandem_defaults.rb
191
+ - lib/protk.rb
192
+ - lib/protk/big_search_rakefile.rake
193
+ - lib/protk/manage_db_rakefile.rake
194
+ - lib/protk/setup_rakefile.rake
195
+ - bin/annotate_ids.rb
196
+ - bin/big_search.rb
197
+ - bin/correct_omssa_retention_times.rb
198
+ - bin/feature_finder.rb
199
+ - bin/file_convert.rb
200
+ - bin/generate_omssa_loc.rb
201
+ - bin/interprophet.rb
202
+ - bin/make_decoy.rb
203
+ - bin/manage_db.rb
204
+ - bin/mascot_search.rb
205
+ - bin/mascot_to_pepxml.rb
206
+ - bin/msgfplus_search.rb
207
+ - bin/omssa_search.rb
208
+ - bin/peptide_prophet.rb
209
+ - bin/pepxml_to_table.rb
210
+ - bin/protein_prophet.rb
211
+ - bin/protk_setup.rb
212
+ - bin/repair_run_summary.rb
213
+ - bin/tandem_search.rb
214
+ - bin/template_search.rb
215
+ - bin/unimod_to_loc.rb
216
+ - bin/xls_to_table.rb
217
+ - README.md
218
+ - lib/protk/data/apt-get_packages.yaml
219
+ - lib/protk/data/brew_packages.yaml
220
+ - lib/protk/data/default_config.yml
221
+ - lib/protk/data/FeatureFinderCentroided.ini
222
+ - lib/protk/data/predefined_db.crap.yaml
223
+ - lib/protk/data/predefined_db.sphuman.yaml
224
+ - lib/protk/data/predefined_db.swissprot_annotation.yaml
225
+ - lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml
226
+ - lib/protk/data/tandem_params.xml
227
+ - lib/protk/data/taxonomy_template.xml
228
+ - lib/protk/data/unimod.xml
229
+ - ext/protk/protk.c
230
+ - ext/protk/extconf.rb
231
+ homepage: http://rubygems.org/gems/protk
232
+ licenses: []
233
+ post_install_message: Now run protk_setup.rb to install third party tools and manage_db.rb
234
+ to install databases.
235
+ rdoc_options: []
236
+ require_paths:
237
+ - lib
238
+ required_ruby_version: !ruby/object:Gem::Requirement
239
+ none: false
240
+ requirements:
241
+ - - ! '>='
242
+ - !ruby/object:Gem::Version
243
+ version: '0'
244
+ required_rubygems_version: !ruby/object:Gem::Requirement
245
+ none: false
246
+ requirements:
247
+ - - ! '>'
248
+ - !ruby/object:Gem::Version
249
+ version: 1.3.1
250
+ requirements: []
251
+ rubyforge_project:
252
+ rubygems_version: 1.8.24
253
+ signing_key:
254
+ specification_version: 3
255
+ summary: Proteomics Toolkit
256
+ test_files: []