protk 1.1.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. data/README.md +85 -0
  2. data/bin/annotate_ids.rb +59 -0
  3. data/bin/big_search.rb +41 -0
  4. data/bin/correct_omssa_retention_times.rb +27 -0
  5. data/bin/feature_finder.rb +76 -0
  6. data/bin/file_convert.rb +157 -0
  7. data/bin/generate_omssa_loc.rb +42 -0
  8. data/bin/interprophet.rb +91 -0
  9. data/bin/make_decoy.rb +64 -0
  10. data/bin/manage_db.rb +123 -0
  11. data/bin/mascot_search.rb +187 -0
  12. data/bin/mascot_to_pepxml.rb +44 -0
  13. data/bin/msgfplus_search.rb +191 -0
  14. data/bin/omssa_search.rb +205 -0
  15. data/bin/peptide_prophet.rb +245 -0
  16. data/bin/pepxml_to_table.rb +78 -0
  17. data/bin/protein_prophet.rb +140 -0
  18. data/bin/protk_setup.rb +31 -0
  19. data/bin/repair_run_summary.rb +113 -0
  20. data/bin/tandem_search.rb +292 -0
  21. data/bin/template_search.rb +144 -0
  22. data/bin/unimod_to_loc.rb +118 -0
  23. data/bin/xls_to_table.rb +46 -0
  24. data/ext/protk/extconf.rb +3 -0
  25. data/ext/protk/protk.c +235 -0
  26. data/lib/protk/big_search_rakefile.rake +16 -0
  27. data/lib/protk/big_search_tool.rb +23 -0
  28. data/lib/protk/bio_sptr_extensions.rb +210 -0
  29. data/lib/protk/biotools_excel_converter.rb +60 -0
  30. data/lib/protk/command_runner.rb +84 -0
  31. data/lib/protk/constants.rb +296 -0
  32. data/lib/protk/data/FeatureFinderCentroided.ini +63 -0
  33. data/lib/protk/data/apt-get_packages.yaml +47 -0
  34. data/lib/protk/data/brew_packages.yaml +10 -0
  35. data/lib/protk/data/default_config.yml +20 -0
  36. data/lib/protk/data/predefined_db.crap.yaml +19 -0
  37. data/lib/protk/data/predefined_db.sphuman.yaml +25 -0
  38. data/lib/protk/data/predefined_db.swissprot_annotation.yaml +20 -0
  39. data/lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml +20 -0
  40. data/lib/protk/data/tandem_params.xml +56 -0
  41. data/lib/protk/data/taxonomy_template.xml +9 -0
  42. data/lib/protk/data/unimod.xml +16780 -0
  43. data/lib/protk/eupathdb_gene_information_table.rb +158 -0
  44. data/lib/protk/galaxy_stager.rb +24 -0
  45. data/lib/protk/galaxy_util.rb +9 -0
  46. data/lib/protk/manage_db_rakefile.rake +484 -0
  47. data/lib/protk/manage_db_tool.rb +181 -0
  48. data/lib/protk/mascot_util.rb +63 -0
  49. data/lib/protk/omssa_util.rb +57 -0
  50. data/lib/protk/plasmodb.rb +50 -0
  51. data/lib/protk/prophet_tool.rb +85 -0
  52. data/lib/protk/protein_annotator.rb +646 -0
  53. data/lib/protk/protxml.rb +137 -0
  54. data/lib/protk/randomize.rb +7 -0
  55. data/lib/protk/search_tool.rb +182 -0
  56. data/lib/protk/setup_rakefile.rake +245 -0
  57. data/lib/protk/setup_tool.rb +19 -0
  58. data/lib/protk/spreadsheet_extensions.rb +78 -0
  59. data/lib/protk/swissprot_database.rb +38 -0
  60. data/lib/protk/tool.rb +182 -0
  61. data/lib/protk/xtandem_defaults.rb +11 -0
  62. data/lib/protk.rb +18 -0
  63. metadata +256 -0
@@ -0,0 +1,38 @@
1
+ require 'rubygems'
2
+ require 'bio'
3
+ require 'protk/constants'
4
+
5
+ # Provides fast indexed access to a swissprot database in a flat .dat file
6
+ #
7
+ class SwissprotDatabase
8
+
9
+ def initialize(env=nil,database="swissprot")
10
+ if ( env!=nil)
11
+ @genv=env
12
+ else
13
+ @genv=Constants.new
14
+ end
15
+
16
+ if ( database=="swissprot")
17
+ @db_object=Bio::FlatFileIndex.new("#{@genv.protein_database_root}/#{@genv.uniprot_sprot_annotation_database}")
18
+ else
19
+ @db_object=Bio::FlatFileIndex.new("#{@genv.protein_database_root}#{@genv.uniprot_trembl_annotation_database}")
20
+ end
21
+
22
+ @db_object.always_check_consistency=false
23
+ end
24
+
25
+
26
+ def get_entry_for_name(name)
27
+ result=@db_object.get_by_id(name)
28
+ if result==""
29
+ if ( @genv!=nil)
30
+ @genv.log("Failed to find UniProt entry for protein named #{name} in database",:warn)
31
+ end
32
+ return nil
33
+ else
34
+ Bio::SPTR.new(result)
35
+ end
36
+ end
37
+
38
+ end
data/lib/protk/tool.rb ADDED
@@ -0,0 +1,182 @@
1
+ #
2
+ # This file is part of protk
3
+ # Created by Ira Cooke 16/12/2010
4
+ #
5
+ # Provides common functionality used by all tools.
6
+ #
7
+
8
+ require 'ostruct'
9
+ require 'optparse'
10
+ require 'pathname'
11
+ require 'protk/command_runner'
12
+
13
+ class Tool
14
+
15
+ # Options set from the command-line
16
+ #
17
+ attr :options, false
18
+
19
+ # The option parser used to parse command-line options.
20
+ #
21
+ attr :option_parser, false
22
+
23
+ # Prefix for background jobs
24
+ # x = X!Tandem, o=OMSSA, p="Phenyx", m="Mascot"
25
+ # Can't use attr_accessor here because we want this available to subclasses
26
+ #
27
+ def jobid_prefix
28
+ @jobid_prefix
29
+ end
30
+
31
+ def jobid_prefix=(p)
32
+ @jobid_prefix=p
33
+ end
34
+
35
+ # Provides direct access to options through methods of the same name
36
+ #
37
+ def method_missing(meth, *args, &block)
38
+ if ( args.length==0 && block==nil)
39
+ @options.send meth
40
+ else
41
+ super
42
+ end
43
+ end
44
+
45
+
46
+
47
+
48
+
49
+ # Creates an empty options object to hold commandline options
50
+ # Also creates an option_parser with default options common to all tools
51
+ #
52
+ def initialize(option_support={:help=>true})
53
+ @jobid_prefix = "x"
54
+ @options = OpenStruct.new
55
+ options.library = []
56
+ options.inplace = false
57
+ options.encoding = "utf8"
58
+ options.transfer_type = :auto
59
+ options.verbose = false
60
+
61
+ @option_parser=OptionParser.new do |opts|
62
+
63
+ if ( option_support[:prefix_suffix]==true)
64
+
65
+ @options.output_prefix = ""
66
+ opts.on( '-b', '--output-prefix pref', 'A string to prepend to the name of output files' ) do |prefix|
67
+ @options.output_prefix = prefix
68
+ end
69
+
70
+ @options.output_suffix = ""
71
+ opts.on( '-e', '--output-suffix suff', 'A string to append to the name of output files' ) do |suffix|
72
+ @options.output_suffix = suffix
73
+ end
74
+
75
+ end
76
+
77
+ if ( option_support[:explicit_output]==true)
78
+ @options.explicit_output = nil
79
+ opts.on( '-o', '--output out', 'An explicitly named output file.' ) do |out|
80
+ @options.explicit_output = out
81
+ end
82
+ end
83
+
84
+ if ( option_support[:over_write]==true)
85
+
86
+ @options.over_write=false
87
+ opts.on( '-r', '--replace-output', 'Dont skip analyses for which the output file already exists' ) do
88
+ @options.over_write = true
89
+ end
90
+
91
+ end
92
+
93
+ if ( option_support[:background]==true)
94
+
95
+ @options.background = false
96
+ opts.on( '-z', '--background', 'Run jobs in the background using pbs' ) do
97
+ @options.background = true
98
+ end
99
+
100
+ end
101
+
102
+ if ( option_support[:help]==true)
103
+
104
+ opts.on( '-h', '--help', 'Display this screen' ) do
105
+ puts opts
106
+ exit
107
+ end
108
+ end
109
+
110
+ end
111
+
112
+ end
113
+
114
+
115
+
116
+
117
+
118
+ # Create and return a full base path (without extension) representing the output file for this analysis
119
+ # Optionally provide the extension to be removed (if not provided it will be inferred)
120
+ #
121
+ def output_base_path(output_file,ext=nil)
122
+
123
+ output_path=Pathname.new(output_file)
124
+ throw "Error: Output directory #{output_path.dirname} does not exist" unless output_path.dirname.exist?
125
+ dir=output_path.dirname.realpath.to_s
126
+ basename=output_path.basename.to_s
127
+ if ( ext==nil)
128
+ ext=output_path.extname
129
+ end
130
+ base_name=basename.gsub(/#{ext}$/,"")
131
+
132
+ "#{dir}/#{@options.output_prefix}#{base_name}#{@options.output_suffix}"
133
+ end
134
+
135
+
136
+
137
+
138
+ # Create a full base path (without extension) representing the input file for this analysis
139
+ # Optionally provide the extension to be removed (if not provided it will be inferred)
140
+ #
141
+ def input_base_path(input_file,ext=nil)
142
+ input_path=Pathname.new(input_file)
143
+ throw "Error: Input directory #{input_path.dirname} does not exist" unless input_path.dirname.exist?
144
+ dir=input_path.dirname.realpath.to_s
145
+ if ( ext==nil)
146
+ ext=input_path.extname
147
+ end
148
+ base_name=input_path.basename.to_s.gsub(/#{ext}$/,"")
149
+ "#{dir}/#{base_name}"
150
+ end
151
+
152
+
153
+
154
+ # Run the search tool using the given command string and global environment
155
+ #
156
+ def run(cmd,genv,job_params=nil,jobscript_path=nil,autodelete=true)
157
+ if ( @options.background )
158
+ throw "Error: Background option was selected but this host does not support background jobs" unless genv.has_pbs
159
+ # Send this job off to be run in a batch queuer
160
+
161
+ cmd_runner=CommandRunner.new(genv)
162
+
163
+
164
+
165
+ cmd_runner.run_batch(cmd,job_params,jobscript_path,autodelete)
166
+
167
+ else
168
+ cmd_runner=CommandRunner.new(genv)
169
+ cmd_runner.run_local(cmd)
170
+ end
171
+ end
172
+
173
+ def jobid_from_filename(filename)
174
+ jobid="protk"
175
+ jobnum_match=filename.match(/(.{1,10})\.d/)
176
+ if (jobnum_match!=nil)
177
+ jobid="#{self.jobid_prefix}#{jobnum_match[1]}"
178
+ end
179
+ return jobid
180
+ end
181
+
182
+ end
@@ -0,0 +1,11 @@
1
+ require 'libxml'
2
+ include LibXML
3
+
4
+ class XTandemDefaults
5
+ attr :path
6
+ attr :taxonomy_path
7
+ def initialize
8
+ @path="#{File.dirname(__FILE__)}/data/tandem_params.xml"
9
+ @taxonomy_path="#{File.dirname(__FILE__)}/data/taxonomy_template.xml"
10
+ end
11
+ end
data/lib/protk.rb ADDED
@@ -0,0 +1,18 @@
1
+ require 'protk/tool.rb'
2
+ require 'protk/swissprot_database.rb'
3
+ require 'protk/spreadsheet_extensions.rb'
4
+ require 'protk/search_tool.rb'
5
+ require 'protk/protxml.rb'
6
+ require 'protk/protein_annotator.rb'
7
+ require 'protk/prophet_tool.rb'
8
+ require 'protk/plasmodb.rb'
9
+ require 'protk/omssa_util.rb'
10
+ require 'protk/mascot_util.rb'
11
+ require 'protk/manage_db_tool.rb'
12
+ require 'protk/galaxy_util.rb'
13
+ require 'protk/galaxy_stager.rb'
14
+ require 'protk/eupathdb_gene_information_table.rb'
15
+ require 'protk/constants.rb'
16
+ require 'protk/command_runner.rb'
17
+ require 'protk/biotools_excel_converter.rb'
18
+ require 'protk/bio_sptr_extensions.rb'
metadata ADDED
@@ -0,0 +1,256 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: protk
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.1.0.pre
5
+ prerelease: 6
6
+ platform: ruby
7
+ authors:
8
+ - Ira Cooke
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-19 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: ftools
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 0.0.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 0.0.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: open4
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: 1.3.0
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: 1.3.0
46
+ - !ruby/object:Gem::Dependency
47
+ name: bio
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: 1.4.3
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: 1.4.3
62
+ - !ruby/object:Gem::Dependency
63
+ name: rest-client
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: 1.6.7
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: 1.6.7
78
+ - !ruby/object:Gem::Dependency
79
+ name: net-ftp-list
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: 3.2.5
86
+ type: :runtime
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: 3.2.5
94
+ - !ruby/object:Gem::Dependency
95
+ name: spreadsheet
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: 0.7.4
102
+ type: :runtime
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: 0.7.4
110
+ - !ruby/object:Gem::Dependency
111
+ name: libxml-ruby
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: 2.3.3
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: 2.3.3
126
+ - !ruby/object:Gem::Dependency
127
+ name: rspec
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ~>
132
+ - !ruby/object:Gem::Version
133
+ version: '2.5'
134
+ type: :development
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ~>
140
+ - !ruby/object:Gem::Version
141
+ version: '2.5'
142
+ description: A bunch of tools for proteomics
143
+ email: iracooke@gmail.com
144
+ executables:
145
+ - protk_setup.rb
146
+ - manage_db.rb
147
+ - tandem_search.rb
148
+ - mascot_search.rb
149
+ - omssa_search.rb
150
+ - msgfplus_search.rb
151
+ - big_search.rb
152
+ - mascot_to_pepxml.rb
153
+ - file_convert.rb
154
+ - make_decoy.rb
155
+ - correct_omssa_retention_times.rb
156
+ - repair_run_summary.rb
157
+ - peptide_prophet.rb
158
+ - interprophet.rb
159
+ - protein_prophet.rb
160
+ - pepxml_to_table.rb
161
+ - xls_to_table.rb
162
+ - annotate_ids.rb
163
+ - unimod_to_loc.rb
164
+ - generate_omssa_loc.rb
165
+ extensions:
166
+ - ext/protk/extconf.rb
167
+ extra_rdoc_files: []
168
+ files:
169
+ - lib/protk/big_search_tool.rb
170
+ - lib/protk/bio_sptr_extensions.rb
171
+ - lib/protk/biotools_excel_converter.rb
172
+ - lib/protk/command_runner.rb
173
+ - lib/protk/constants.rb
174
+ - lib/protk/eupathdb_gene_information_table.rb
175
+ - lib/protk/galaxy_stager.rb
176
+ - lib/protk/galaxy_util.rb
177
+ - lib/protk/manage_db_tool.rb
178
+ - lib/protk/mascot_util.rb
179
+ - lib/protk/omssa_util.rb
180
+ - lib/protk/plasmodb.rb
181
+ - lib/protk/prophet_tool.rb
182
+ - lib/protk/protein_annotator.rb
183
+ - lib/protk/protxml.rb
184
+ - lib/protk/randomize.rb
185
+ - lib/protk/search_tool.rb
186
+ - lib/protk/setup_tool.rb
187
+ - lib/protk/spreadsheet_extensions.rb
188
+ - lib/protk/swissprot_database.rb
189
+ - lib/protk/tool.rb
190
+ - lib/protk/xtandem_defaults.rb
191
+ - lib/protk.rb
192
+ - lib/protk/big_search_rakefile.rake
193
+ - lib/protk/manage_db_rakefile.rake
194
+ - lib/protk/setup_rakefile.rake
195
+ - bin/annotate_ids.rb
196
+ - bin/big_search.rb
197
+ - bin/correct_omssa_retention_times.rb
198
+ - bin/feature_finder.rb
199
+ - bin/file_convert.rb
200
+ - bin/generate_omssa_loc.rb
201
+ - bin/interprophet.rb
202
+ - bin/make_decoy.rb
203
+ - bin/manage_db.rb
204
+ - bin/mascot_search.rb
205
+ - bin/mascot_to_pepxml.rb
206
+ - bin/msgfplus_search.rb
207
+ - bin/omssa_search.rb
208
+ - bin/peptide_prophet.rb
209
+ - bin/pepxml_to_table.rb
210
+ - bin/protein_prophet.rb
211
+ - bin/protk_setup.rb
212
+ - bin/repair_run_summary.rb
213
+ - bin/tandem_search.rb
214
+ - bin/template_search.rb
215
+ - bin/unimod_to_loc.rb
216
+ - bin/xls_to_table.rb
217
+ - README.md
218
+ - lib/protk/data/apt-get_packages.yaml
219
+ - lib/protk/data/brew_packages.yaml
220
+ - lib/protk/data/default_config.yml
221
+ - lib/protk/data/FeatureFinderCentroided.ini
222
+ - lib/protk/data/predefined_db.crap.yaml
223
+ - lib/protk/data/predefined_db.sphuman.yaml
224
+ - lib/protk/data/predefined_db.swissprot_annotation.yaml
225
+ - lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml
226
+ - lib/protk/data/tandem_params.xml
227
+ - lib/protk/data/taxonomy_template.xml
228
+ - lib/protk/data/unimod.xml
229
+ - ext/protk/protk.c
230
+ - ext/protk/extconf.rb
231
+ homepage: http://rubygems.org/gems/protk
232
+ licenses: []
233
+ post_install_message: Now run protk_setup.rb to install third party tools and manage_db.rb
234
+ to install databases.
235
+ rdoc_options: []
236
+ require_paths:
237
+ - lib
238
+ required_ruby_version: !ruby/object:Gem::Requirement
239
+ none: false
240
+ requirements:
241
+ - - ! '>='
242
+ - !ruby/object:Gem::Version
243
+ version: '0'
244
+ required_rubygems_version: !ruby/object:Gem::Requirement
245
+ none: false
246
+ requirements:
247
+ - - ! '>'
248
+ - !ruby/object:Gem::Version
249
+ version: 1.3.1
250
+ requirements: []
251
+ rubyforge_project:
252
+ rubygems_version: 1.8.24
253
+ signing_key:
254
+ specification_version: 3
255
+ summary: Proteomics Toolkit
256
+ test_files: []