protk 1.3.0 → 1.3.1.pre2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+
2
+ module ProteinToGenomeMapper
3
+
4
+ # gene_seq should already have been reverse_complemented if on reverse strand
5
+
6
+
7
+
8
+ end
@@ -10,6 +10,8 @@ class ProtXMLToGFFTool < Tool
10
10
 
11
11
  add_value_option(:database,nil,['-d filename','--database filename','Database used for ms/ms searches (Fasta Format)'])
12
12
  add_value_option(:genome,nil,['-g filename','--genome filename', 'Nucleotide sequences for scaffolds (Fasta Format)'])
13
+ add_value_option(:coords_file,nil,['-c filename','--coords-file filename.gff3', 'If genomic coordinates are not encoded in protein db entries look them up from a supplied gff file'])
14
+ # add_value_option(:contig_regex,nil,['--contig-regex expression','Regular expression with a single capture group to get contig ids from protein ids'])
13
15
  add_value_option(:protein_find,nil,['-f term','--find term', 'Restrict output to proteins whose name matches the specified string'])
14
16
  add_value_option(:nterm_minlen,7,['-n len','--nterm-min-len len', 'Only include inferred N-terminal sequences if longer than len'])
15
17
  add_boolean_option(:skip_fasta_indexing,false,['--skip-index','Don\'t index database (Index should already exist)'])
@@ -317,7 +319,7 @@ class ProtXMLToGFFTool < Tool
317
319
 
318
320
  check_seq = protein_info.strand=='-' ? concat_seq.reverse_complement.translate : concat_seq.translate
319
321
  if ( check_seq != peptide_seq)
320
- require 'debugger';debugger
322
+ # require 'debugger';debugger
321
323
  puts "Fragment seqs not equal to peptide seqs"
322
324
  end
323
325
 
@@ -13,15 +13,6 @@ require 'optparse'
13
13
  require 'pathname'
14
14
  require 'protk/tool'
15
15
 
16
- class FastaDatabase
17
- attr :name
18
- attr :path
19
- def initialize(name,path)
20
- @name=name
21
- @path=path
22
- end
23
- end
24
-
25
16
  class SearchTool < Tool
26
17
 
27
18
  # Initializes commandline options common to all search tools.
@@ -30,9 +21,9 @@ class SearchTool < Tool
30
21
  def initialize(option_support=[])
31
22
  super(option_support)
32
23
 
33
- if (option_support.include? :database)
34
- add_value_option(:database,"sphuman",['-d', '--database dbname', 'Specify the database to use for this search. Can be a named protk database or the path to a fasta file'])
35
- end
24
+ # if (option_support.include? :database)
25
+ # add_value_option(:database,"sphuman",['-d', '--database dbname', 'Specify the database to use for this search. Can be a named protk database or the path to a fasta file'])
26
+ # end
36
27
 
37
28
  if ( option_support.include? :enzyme )
38
29
  add_value_option(:enzyme,"Trypsin",['--enzyme enz', 'Enzyme'])
@@ -115,18 +106,6 @@ class SearchTool < Tool
115
106
  end
116
107
 
117
108
 
118
- def database_info
119
- case
120
- when Pathname.new(@options.database).exist? # It's an explicitly named db
121
- db_path=Pathname.new(@options.database).expand_path.to_s
122
- db_name=Pathname.new(@options.database).basename.to_s
123
- else
124
- db_path=Constants.new.current_database_for_name @options.database
125
- db_name=@options.database
126
- end
127
- FastaDatabase.new(db_name,db_path)
128
- end
129
-
130
109
  end
131
110
 
132
111
 
@@ -7,27 +7,18 @@ require 'pathname'
7
7
  #
8
8
  class SwissprotDatabase
9
9
 
10
- def initialize(env=nil,database="swissprot")
11
- if ( env!=nil)
12
- @genv=env
13
- else
14
- @genv=Constants.new
15
- end
10
+ def initialize(datfile_path,skip_indexing=false)
16
11
 
12
+ dbpath=Pathname.new(datfile_path)
13
+ dbclass=Bio::SPTR
17
14
 
18
- dbpath=Pathname.new(database)
19
-
20
- if ( dbpath.exist? )
21
- # require 'debugger';debugger
22
- dbclass=Bio::SPTR
15
+ unless skip_indexing
23
16
  parser = Bio::FlatFileIndex::Indexer::Parser.new(dbclass, nil, nil)
24
- Bio::FlatFileIndex::Indexer::makeindexFlat(dbpath.realpath.dirname.to_s, parser, {}, dbpath.realpath.to_s)
25
- @db_object=Bio::FlatFileIndex.new("#{dbpath.realpath.dirname.to_s}")
26
- elsif ( database=="swissprot")
27
- @db_object=Bio::FlatFileIndex.new("#{@genv.protein_database_root}/#{@genv.uniprot_sprot_annotation_database}")
28
- else
29
- @db_object=Bio::FlatFileIndex.new("#{@genv.protein_database_root}/#{@genv.uniprot_trembl_annotation_database}")
17
+ Bio::FlatFileIndex::Indexer::makeindexFlat(dbpath.realpath.dirname.to_s, parser, {}, \
18
+ dbpath.realpath.to_s)
30
19
  end
20
+
21
+ @db_object=Bio::FlatFileIndex.new("#{dbpath.realpath.dirname.to_s}")
31
22
 
32
23
  @db_object.always_check_consistency=false
33
24
  end
@@ -36,9 +27,6 @@ class SwissprotDatabase
36
27
  def get_entry_for_name(name)
37
28
  result=@db_object.get_by_id(name)
38
29
  if result==""
39
- if ( @genv!=nil)
40
- @genv.log("Failed to find UniProt entry for protein named #{name} in database",:warn)
41
- end
42
30
  return nil
43
31
  else
44
32
  Bio::SPTR.new(result)
data/lib/protk/tool.rb CHANGED
@@ -10,6 +10,17 @@ require 'optparse'
10
10
  require 'pathname'
11
11
  require 'protk/command_runner'
12
12
 
13
+
14
+ class FastaDatabase
15
+ attr :name
16
+ attr :path
17
+ def initialize(name,path)
18
+ @name=name
19
+ @path=path
20
+ end
21
+ end
22
+
23
+
13
24
  class Tool
14
25
 
15
26
  # Options set from the command-line
@@ -108,7 +119,13 @@ class Tool
108
119
  add_value_option(:threads,1,['-n','--threads num','Number of processing threads to use. Set to 0 to autodetect an appropriate value'])
109
120
  end
110
121
 
122
+ if ( option_support.include? :database)
123
+ add_value_option(:database,"sphuman",['-d', '--database dbname', 'Specify the database to use for this search. Can be a named protk database or the path to a fasta file'])
124
+ end
111
125
 
126
+ if (option_support.include? :debug)
127
+ add_boolean_option(:debug,false,['--debug','Run in debug mode'])
128
+ end
112
129
 
113
130
  end
114
131
 
@@ -147,6 +164,10 @@ class Tool
147
164
  # Checking for required options
148
165
  begin
149
166
  self.option_parser.parse!
167
+
168
+ if has_override
169
+ return true
170
+ end
150
171
  missing = mandatory.select{ |param| self.send(param).nil? }
151
172
  if not missing.empty?
152
173
  puts "Missing options: #{missing.join(', ')}"
@@ -175,5 +196,19 @@ class Tool
175
196
  cmd_runner.run_local(cmd)
176
197
  end
177
198
 
178
-
199
+
200
+ def database_info
201
+ case
202
+ when Pathname.new(@options.database).exist? # It's an explicitly named db
203
+ db_path=Pathname.new(@options.database).expand_path.to_s
204
+ db_name=Pathname.new(@options.database).basename.to_s
205
+ else
206
+ db_path=Constants.new.current_database_for_name @options.database
207
+ db_name=@options.database
208
+ end
209
+ FastaDatabase.new(db_name,db_path)
210
+ end
211
+
212
+
213
+
179
214
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: protk
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.3.1.pre2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ira Cooke
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-24 00:00:00.000000000 Z
11
+ date: 2014-10-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: open4
@@ -152,6 +152,26 @@ dependencies:
152
152
  - - ~>
153
153
  - !ruby/object:Gem::Version
154
154
  version: '0'
155
+ - !ruby/object:Gem::Dependency
156
+ name: debugger
157
+ requirement: !ruby/object:Gem::Requirement
158
+ requirements:
159
+ - - ~>
160
+ - !ruby/object:Gem::Version
161
+ version: '1.6'
162
+ - - '>='
163
+ - !ruby/object:Gem::Version
164
+ version: 1.6.0
165
+ type: :development
166
+ prerelease: false
167
+ version_requirements: !ruby/object:Gem::Requirement
168
+ requirements:
169
+ - - ~>
170
+ - !ruby/object:Gem::Version
171
+ version: '1.6'
172
+ - - '>='
173
+ - !ruby/object:Gem::Version
174
+ version: 1.6.0
155
175
  - !ruby/object:Gem::Dependency
156
176
  name: sqlite3
157
177
  requirement: !ruby/object:Gem::Requirement
@@ -190,11 +210,44 @@ executables:
190
210
  - augustus_to_proteindb.rb
191
211
  - protxml_to_gff.rb
192
212
  - protxml_to_table.rb
213
+ - swissprot_to_table.rb
214
+ - protxml_to_psql.rb
193
215
  extensions:
194
216
  - ext/decoymaker/extconf.rb
195
217
  extra_rdoc_files: []
196
218
  files:
197
- - README.md
219
+ - lib/protk/bio_gff3_extensions.rb
220
+ - lib/protk/bio_sptr_extensions.rb
221
+ - lib/protk/command_runner.rb
222
+ - lib/protk/constants.rb
223
+ - lib/protk/convert_util.rb
224
+ - lib/protk/data/make_uniprot_table.rb
225
+ - lib/protk/fastadb.rb
226
+ - lib/protk/galaxy_stager.rb
227
+ - lib/protk/galaxy_util.rb
228
+ - lib/protk/gffdb.rb
229
+ - lib/protk/manage_db_tool.rb
230
+ - lib/protk/mascot_util.rb
231
+ - lib/protk/mzml_parser.rb
232
+ - lib/protk/omssa_util.rb
233
+ - lib/protk/openms_defaults.rb
234
+ - lib/protk/peptide.rb
235
+ - lib/protk/pepxml.rb
236
+ - lib/protk/plasmodb.rb
237
+ - lib/protk/prophet_tool.rb
238
+ - lib/protk/protein.rb
239
+ - lib/protk/protein_to_genome_mapper.rb
240
+ - lib/protk/protxml_to_gff_tool.rb
241
+ - lib/protk/randomize.rb
242
+ - lib/protk/search_tool.rb
243
+ - lib/protk/setup_tool.rb
244
+ - lib/protk/swissprot_database.rb
245
+ - lib/protk/tandem_search_tool.rb
246
+ - lib/protk/tool.rb
247
+ - lib/protk/uniprot_mapper.rb
248
+ - lib/protk.rb
249
+ - lib/protk/manage_db_rakefile.rake
250
+ - lib/protk/setup_rakefile.rake
198
251
  - bin/add_retention_times.rb
199
252
  - bin/augustus_to_proteindb.rb
200
253
  - bin/interprophet.rb
@@ -213,25 +266,19 @@ files:
213
266
  - bin/protxml_to_table.rb
214
267
  - bin/repair_run_summary.rb
215
268
  - bin/sixframe.rb
269
+ - bin/swissprot_to_table.rb
216
270
  - bin/tandem_search.rb
217
271
  - bin/tandem_to_pepxml.rb
218
272
  - bin/unimod_to_loc.rb
219
273
  - bin/uniprot_mapper.rb
220
- - ext/decoymaker/decoymaker.c
221
- - ext/decoymaker/extconf.rb
222
- - lib/protk.rb
223
- - lib/protk/bio_sptr_extensions.rb
224
- - lib/protk/command_runner.rb
225
- - lib/protk/constants.rb
226
- - lib/protk/convert_util.rb
227
- - lib/protk/data/ExecutePipeline.trf
228
- - lib/protk/data/FeatureFinderCentroided.ini
229
- - lib/protk/data/FeatureFinderIsotopeWavelet.ini
274
+ - README.md
230
275
  - lib/protk/data/apt-get_packages.yaml
231
276
  - lib/protk/data/brew_packages.yaml
232
277
  - lib/protk/data/default_config.yml
278
+ - lib/protk/data/ExecutePipeline.trf
279
+ - lib/protk/data/FeatureFinderCentroided.ini
280
+ - lib/protk/data/FeatureFinderIsotopeWavelet.ini
233
281
  - lib/protk/data/galaxyenv.sh
234
- - lib/protk/data/make_uniprot_table.rb
235
282
  - lib/protk/data/predefined_db.crap.yaml
236
283
  - lib/protk/data/predefined_db.sphuman.yaml
237
284
  - lib/protk/data/predefined_db.swissprot_annotation.yaml
@@ -246,30 +293,11 @@ files:
246
293
  - lib/protk/data/uniprot_accessions_table.txt
247
294
  - lib/protk/data/uniprot_input_accessions.loc
248
295
  - lib/protk/data/yum_packages.yaml
249
- - lib/protk/fastadb.rb
250
- - lib/protk/galaxy_stager.rb
251
- - lib/protk/galaxy_util.rb
252
- - lib/protk/manage_db_rakefile.rake
253
- - lib/protk/manage_db_tool.rb
254
- - lib/protk/mascot_util.rb
255
- - lib/protk/mzml_parser.rb
256
- - lib/protk/omssa_util.rb
257
- - lib/protk/openms_defaults.rb
258
- - lib/protk/pepxml.rb
259
- - lib/protk/plasmodb.rb
260
- - lib/protk/prophet_tool.rb
261
- - lib/protk/protxml.rb
262
- - lib/protk/protxml_to_gff_tool.rb
263
- - lib/protk/randomize.rb
264
- - lib/protk/search_tool.rb
265
- - lib/protk/setup_rakefile.rake
266
- - lib/protk/setup_tool.rb
267
- - lib/protk/swissprot_database.rb
268
- - lib/protk/tandem_search_tool.rb
269
- - lib/protk/tool.rb
270
- - lib/protk/uniprot_mapper.rb
296
+ - ext/decoymaker/decoymaker.c
297
+ - ext/decoymaker/extconf.rb
271
298
  homepage: http://rubygems.org/gems/protk
272
- licenses: []
299
+ licenses:
300
+ - LGPL-2.1
273
301
  metadata: {}
274
302
  post_install_message: Now run protk_setup.rb to install third party tools
275
303
  rdoc_options: []
@@ -282,14 +310,13 @@ required_ruby_version: !ruby/object:Gem::Requirement
282
310
  version: '0'
283
311
  required_rubygems_version: !ruby/object:Gem::Requirement
284
312
  requirements:
285
- - - '>='
313
+ - - '>'
286
314
  - !ruby/object:Gem::Version
287
- version: '0'
315
+ version: 1.3.1
288
316
  requirements: []
289
317
  rubyforge_project:
290
- rubygems_version: 2.2.1
318
+ rubygems_version: 2.0.14
291
319
  signing_key:
292
320
  specification_version: 4
293
321
  summary: Proteomics Toolkit
294
322
  test_files: []
295
- has_rdoc:
data/lib/protk/protxml.rb DELETED
@@ -1,141 +0,0 @@
1
- require 'rubygems'
2
- require 'rexml/document'
3
- require 'rexml/xpath'
4
-
5
- class ProtXML
6
-
7
- attr_accessor :groups
8
-
9
-
10
- def indistinguishable_proteins_from_protein(protein_element)
11
- iprots=[]
12
- REXML::XPath.each(protein_element,"./indistinguishable_protein") do |ipel|
13
- ipel_attributes={}
14
- ipel.attributes.each_attribute { |att| ipel_attributes[att.expanded_name.to_sym]=att.value }
15
- iprots.push(ipel_attributes[:protein_name])
16
- end
17
- iprots
18
- end
19
-
20
- def peptides_from_protein(protein_element)
21
- peptides=[]
22
- REXML::XPath.each(protein_element,"./peptide") do |pel|
23
- peptide={}
24
-
25
- pel.attributes.each_attribute { |att| peptide[att.expanded_name.to_sym]=att.value }
26
- modifications=pel.get_elements("./modification_info")
27
- mods=modifications.collect {|mp| mp.attribute("modified_peptide").value }
28
- peptide[:modifications] = mods
29
- peptides.push(peptide)
30
- end
31
- peptides
32
- end
33
-
34
- def proteins_from_group(group_element)
35
- proteins=[]
36
- REXML::XPath.each(group_element,"./protein") do |pel|
37
- protein={}
38
- pel.attributes.each_attribute { |att| protein[att.expanded_name.to_sym]=att.value }
39
- protein[:peptides]=peptides_from_protein(pel)
40
- protein[:indistinguishable_prots]=indistinguishable_proteins_from_protein(pel)
41
- proteins.push(protein)
42
- end
43
- proteins
44
- end
45
-
46
- def init_groups
47
- @groups=[]
48
- REXML::XPath.each(@doc.root,"//protein_group") do |gel|
49
- group={}
50
- group[:group_probability]=gel.attributes["probability"].to_f
51
- group[:proteins]=proteins_from_group(gel)
52
- groups.push group
53
- end
54
- @groups
55
- end
56
-
57
-
58
- def initialize(file_name)
59
- @doc=REXML::Document.new(File.new(file_name))
60
- @groups=self.init_groups
61
- end
62
-
63
- def find_pep_xml()
64
- header = REXML::XPath.first(@doc, "//protein_summary_header")
65
- source_file = header.attributes["source_files"]
66
- end
67
-
68
- def peptide_sequences_from_protein(prot)
69
- peptides=prot[:peptides]
70
- sequences=[]
71
- peptides.each do |pep|
72
- if ( pep[:modifications].length > 0 )
73
- pep[:modifications].each {|pmod|
74
- sequences.push(pmod) }
75
- else
76
- sequences.push(pep[:peptide_sequence])
77
- end
78
- end
79
- sequences
80
- end
81
-
82
- def protein_to_row(prot)
83
- protein_row=[]
84
- protein_row.push(prot[:protein_name])
85
- protein_row.push(prot[:probability])
86
-
87
- indistinct=prot[:indistinguishable_prots]
88
- indist_string="#{prot[:protein_name]};"
89
- indistinct.each { |pr| indist_string<<"#{pr};"}
90
- indist_string.chop!
91
- protein_row.push(indist_string)
92
-
93
- protein_row.push(prot[:peptides].length)
94
-
95
- peptide_string=""
96
- peptide_sequences_from_protein(prot).each {|pep| peptide_string<<"#{pep};" }
97
- peptide_string.chop!
98
-
99
- protein_row.push(peptide_string)
100
- protein_row
101
- end
102
-
103
- # Convert the entire prot.xml document to row format
104
- # Returns an array of arrays. Each of the sub-arrays is a row.
105
- # Each row should contain a simple summary of the protein.
106
- # A separate row should be provided for every protein (including indistinguishable ones)
107
- # The first row will be the header
108
- #
109
- # Proteins with probabilities below a threshold are excluded
110
- #
111
- def as_rows(threshold_probability)
112
-
113
- rows=[]
114
- rows.push(["Accession","Probability","Indistinguishable Proteins","Num Peptides","Peptides"])
115
-
116
- proteins=[]
117
- @groups.each do |grp|
118
- grp[:proteins].each {|prot|
119
- if ( prot[:probability].to_f >= threshold_probability)
120
- proteins.push(prot)
121
- end
122
- }
123
- end
124
-
125
- proteins.each do |prot|
126
- protein_row=protein_to_row(prot)
127
- rows.push(protein_row)
128
-
129
- indistinguishables=prot[:indistinguishable_prots]
130
- indistinguishables.each do |indist|
131
- indist_row=protein_row.clone
132
- indist_row[0]=indist
133
- rows.push(indist_row)
134
- end
135
-
136
- end
137
-
138
- rows
139
- end
140
-
141
- end