protk 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +69 -36
- data/bin/gffmerge.rb +16 -5
- data/bin/mascot_search.rb +174 -97
- data/bin/omssa_search.rb +31 -8
- data/bin/protk_setup.rb +0 -3
- data/bin/sixframe.rb +0 -6
- data/bin/tandem_search.rb +97 -6
- data/bin/template_search.rb +144 -0
- data/bin/uniprot_annotation.rb +130 -0
- data/lib/convert_util.rb +27 -0
- data/lib/pepxml.rb +22 -0
- data/lib/protk/big_search_rakefile.rake +16 -0
- data/lib/protk/big_search_tool.rb +23 -0
- data/lib/protk/bio_sptr_extensions.rb +41 -2
- data/lib/protk/constants.rb +13 -0
- data/lib/protk/data/apt-get_packages.yaml +4 -0
- data/lib/protk/data/default_config.yml +1 -0
- data/lib/protk/data/make_uniprot_table.rb +29 -0
- data/lib/protk/data/predefined_db.trembl_annotation.yaml +20 -0
- data/lib/protk/data/uniprot_accessions.loc +96 -0
- data/lib/protk/data/uniprot_accessions_table.txt +97 -0
- data/lib/protk/data/uniprot_input_accessions.loc +95 -0
- data/lib/protk/manage_db_rakefile.rake +25 -11
- data/lib/protk/omssa_util.rb +1 -1
- data/lib/protk/setup_rakefile.rake +39 -2
- metadata +13 -1
@@ -192,6 +192,14 @@ def archive_fasta_file(filename)
|
|
192
192
|
end
|
193
193
|
end
|
194
194
|
|
195
|
+
def cleanup_file(filename)
|
196
|
+
if (File.exist? filename )
|
197
|
+
archive_filename="#{filename}.tmp"
|
198
|
+
p "Cleaning up #{filename}"
|
199
|
+
FileUtils.mv(filename,archive_filename,:force=>true)
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
195
203
|
#####################
|
196
204
|
# Source Files #
|
197
205
|
#####################
|
@@ -308,7 +316,9 @@ file raw_db_filename => [source_files,dbspec_file].flatten do
|
|
308
316
|
if ( format == "fasta" && source_filters.length > 0 ) # We can perform concat and filter for fasta only
|
309
317
|
|
310
318
|
archive_fasta_file(raw_db_filename) if dbspec[:archive_old]
|
311
|
-
|
319
|
+
|
320
|
+
cleanup_file(raw_db_filename)
|
321
|
+
|
312
322
|
output_fh=File.open(raw_db_filename, "w")
|
313
323
|
|
314
324
|
id_regexes=dbspec[:id_regexes]
|
@@ -360,6 +370,9 @@ file raw_db_filename => [source_files,dbspec_file].flatten do
|
|
360
370
|
else # Other formats just copy a file across ... must be a single source
|
361
371
|
|
362
372
|
throw "Only a single source file is permitted for formats other than fasta" unless source_files.length == 1
|
373
|
+
|
374
|
+
cleanup_file(raw_db_filename)
|
375
|
+
|
363
376
|
|
364
377
|
sh "cp #{source_files[0]} #{raw_db_filename}" do |ok,res|
|
365
378
|
if ! ok
|
@@ -379,6 +392,7 @@ file decoy_db_filename => raw_db_filename do
|
|
379
392
|
|
380
393
|
archive_fasta_file(decoy_db_filename) if dbspec[:archive_old]
|
381
394
|
|
395
|
+
cleanup_file(decoy_db_filename)
|
382
396
|
|
383
397
|
decoys_filename = "#{dbdir}/decoys_only.fasta"
|
384
398
|
decoy_prefix=dbspec[:decoy_prefix]
|
@@ -450,7 +464,7 @@ if dbspec[:make_blast_index]
|
|
450
464
|
blast_index_files=["#{db_filename}.phr"]
|
451
465
|
blast_index_files.each do |indfile|
|
452
466
|
file indfile => db_filename do
|
453
|
-
cmd="cd #{dbdir}; #{$genv.makeblastdb} -in #{db_filename} -parse_seqids -dbtype prot"
|
467
|
+
cmd="cd #{dbdir}; #{$genv.makeblastdb} -in #{db_filename} -parse_seqids -dbtype prot -max_file_sz 20000000000"
|
454
468
|
p "Creating blast index"
|
455
469
|
sh %{ #{cmd} }
|
456
470
|
end
|
@@ -476,18 +490,18 @@ if dbspec[:make_msgf_index]
|
|
476
490
|
end
|
477
491
|
|
478
492
|
if format=="dat" && dbspec[:is_annotation_db]
|
479
|
-
|
493
|
+
dat_index_file= "#{dbdir}/id_AC.index"
|
494
|
+
|
495
|
+
cleanup_file dat_index_file #Regenerate indexes every time
|
480
496
|
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
Bio::FlatFileIndex::Indexer::makeindexFlat(dbdir, parser, {}, db_filename)
|
487
|
-
end
|
497
|
+
file dat_index_file => db_filename do
|
498
|
+
puts "Indexing annotation database"
|
499
|
+
dbclass=Bio::SPTR
|
500
|
+
parser = Bio::FlatFileIndex::Indexer::Parser.new(dbclass, nil, nil)
|
501
|
+
Bio::FlatFileIndex::Indexer::makeindexFlat(dbdir, parser, {}, db_filename)
|
488
502
|
end
|
489
503
|
|
490
|
-
task dbname =>
|
504
|
+
task dbname => dat_index_file
|
491
505
|
|
492
506
|
end
|
493
507
|
|
data/lib/protk/omssa_util.rb
CHANGED
@@ -15,7 +15,7 @@ class OMSSAUtil
|
|
15
15
|
pepxml_doc=parser.parse
|
16
16
|
rt_table=MascotUtil.index_mgf_times(mgf_file)
|
17
17
|
|
18
|
-
|
18
|
+
# p "Retention time table #{rt_table}"
|
19
19
|
|
20
20
|
# queries=pepxml_doc.find('//x:spectrum_query','x:http://regis-web.systemsbiology.net/pepXML')
|
21
21
|
queries=pepxml_doc.find('//spectrum_query')
|
@@ -141,7 +141,7 @@ tpp_url = "https://dl.dropbox.com/u/226794/TPP-4.6.1.tgz"
|
|
141
141
|
tpp_download_file = download_task tpp_url, tpp_packagefile
|
142
142
|
|
143
143
|
# Build
|
144
|
-
file tpp_installed_file => [
|
144
|
+
file tpp_installed_file => [@build_dir,tpp_download_file] do
|
145
145
|
sh %{cp #{@download_dir}/#{tpp_packagefile} #{@build_dir}}
|
146
146
|
sh %{eval $(perl -I#{perl_dir}/lib/perl5 -Mlocal::lib=#{perl_dir});cpanm --local-lib=#{env.protk_dir}/perl5 XML::Parser}
|
147
147
|
sh %{eval $(perl -I#{perl_dir}/lib/perl5 -Mlocal::lib=#{perl_dir});cpanm --local-lib=#{env.protk_dir}/perl5 CGI --force}
|
@@ -303,7 +303,7 @@ def platform_cmake_args
|
|
303
303
|
''
|
304
304
|
end
|
305
305
|
|
306
|
-
openms_version="1.
|
306
|
+
openms_version="1.10.0"
|
307
307
|
openms_packagefile="OpenMS-#{openms_version}.tar.gz"
|
308
308
|
openms_url="https://dl.dropbox.com/u/226794/#{openms_packagefile}"
|
309
309
|
openms_installed_file="#{env.featurefinderisotopewavelet}"
|
@@ -322,6 +322,43 @@ end
|
|
322
322
|
|
323
323
|
task :openms => openms_installed_file
|
324
324
|
|
325
|
+
#
|
326
|
+
# X!Tandem
|
327
|
+
#
|
328
|
+
|
329
|
+
def tandem_platform
|
330
|
+
if RbConfig::CONFIG['host_os'] =~ /darwin/
|
331
|
+
return 'osx-intel'
|
332
|
+
end
|
333
|
+
'linux'
|
334
|
+
end
|
335
|
+
|
336
|
+
tandem_version="13-02-01-1"
|
337
|
+
tandem_packagefile="tandem-#{tandem_platform}-#{tandem_version}.zip"
|
338
|
+
tandem_url="ftp://ftp.thegpm.org/projects/tandem/source/#{tandem_packagefile}"
|
339
|
+
tandem_installed_file="#{env.gpmxtandem}"
|
340
|
+
|
341
|
+
download_task tandem_url, tandem_packagefile
|
342
|
+
|
343
|
+
file tandem_installed_file => [@build_dir,"#{@download_dir}/#{tandem_packagefile}"] do
|
344
|
+
sh %{cp #{@download_dir}/#{tandem_packagefile} #{@build_dir}}
|
345
|
+
sh %{cd #{@build_dir}; unzip #{tandem_packagefile}}
|
346
|
+
sh %{mkdir -p #{env.tandem_root}}
|
347
|
+
tandem_dirname = "#{tandem_packagefile.chomp('.zip')}"
|
348
|
+
|
349
|
+
if ( tandem_platform=="linux") #Must compile
|
350
|
+
tandem_src_dir = "#{@build_dir}/#{tandem_dirname}/#{tandem_dirname}/src/"
|
351
|
+
sh %{cd #{tandem_src_dir}; make}
|
352
|
+
sh %{cd #{@build_dir}; cp -r ./#{tandem_dirname}/#{tandem_dirname}/bin #{env.tandem_root}/}
|
353
|
+
else
|
354
|
+
sh %{cd #{@build_dir}; cp -r ./#{tandem_packagefile.chomp('.zip')}/* #{env.tandem_root}/}
|
355
|
+
sh %{chmod u+x #{env.gpmtandem}}
|
356
|
+
end
|
357
|
+
|
358
|
+
end
|
359
|
+
|
360
|
+
task :tandem => tandem_installed_file
|
361
|
+
|
325
362
|
#
|
326
363
|
# Galaxy Environment
|
327
364
|
#
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: protk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -170,15 +170,20 @@ executables:
|
|
170
170
|
- toppas_pipeline.rb
|
171
171
|
- gffmerge.rb
|
172
172
|
- sixframe.rb
|
173
|
+
- uniprot_annotation.rb
|
173
174
|
extensions:
|
174
175
|
- ext/protk/extconf.rb
|
175
176
|
extra_rdoc_files: []
|
176
177
|
files:
|
178
|
+
- lib/convert_util.rb
|
179
|
+
- lib/pepxml.rb
|
180
|
+
- lib/protk/big_search_tool.rb
|
177
181
|
- lib/protk/bio_sptr_extensions.rb
|
178
182
|
- lib/protk/biotools_excel_converter.rb
|
179
183
|
- lib/protk/command_runner.rb
|
180
184
|
- lib/protk/constants.rb
|
181
185
|
- lib/protk/convert_util.rb
|
186
|
+
- lib/protk/data/make_uniprot_table.rb
|
182
187
|
- lib/protk/eupathdb_gene_information_table.rb
|
183
188
|
- lib/protk/fastadb.rb
|
184
189
|
- lib/protk/galaxy_stager.rb
|
@@ -201,6 +206,7 @@ files:
|
|
201
206
|
- lib/protk/uniprot_mapper.rb
|
202
207
|
- lib/protk/xtandem_defaults.rb
|
203
208
|
- lib/protk.rb
|
209
|
+
- lib/protk/big_search_rakefile.rake
|
204
210
|
- lib/protk/manage_db_rakefile.rake
|
205
211
|
- lib/protk/setup_rakefile.rake
|
206
212
|
- bin/annotate_ids.rb
|
@@ -226,8 +232,10 @@ files:
|
|
226
232
|
- bin/repair_run_summary.rb
|
227
233
|
- bin/sixframe.rb
|
228
234
|
- bin/tandem_search.rb
|
235
|
+
- bin/template_search.rb
|
229
236
|
- bin/toppas_pipeline.rb
|
230
237
|
- bin/unimod_to_loc.rb
|
238
|
+
- bin/uniprot_annotation.rb
|
231
239
|
- bin/uniprot_mapper.rb
|
232
240
|
- bin/xls_to_table.rb
|
233
241
|
- bin/xpress.rb
|
@@ -243,9 +251,13 @@ files:
|
|
243
251
|
- lib/protk/data/predefined_db.sphuman.yaml
|
244
252
|
- lib/protk/data/predefined_db.swissprot_annotation.yaml
|
245
253
|
- lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml
|
254
|
+
- lib/protk/data/predefined_db.trembl_annotation.yaml
|
246
255
|
- lib/protk/data/tandem_params.xml
|
247
256
|
- lib/protk/data/taxonomy_template.xml
|
248
257
|
- lib/protk/data/unimod.xml
|
258
|
+
- lib/protk/data/uniprot_accessions.loc
|
259
|
+
- lib/protk/data/uniprot_accessions_table.txt
|
260
|
+
- lib/protk/data/uniprot_input_accessions.loc
|
249
261
|
- lib/protk/data/yum_packages.yaml
|
250
262
|
- ext/protk/protk.c
|
251
263
|
- ext/protk/extconf.rb
|