protk 1.2.5 → 1.2.6.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/mascot2xml.rb
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file is part of protk
|
4
|
+
# Created by Ira Cooke 12/4/2010
|
5
|
+
#
|
6
|
+
# Convert mascot dat files to pepxml without using TPP Mascot2XML
|
7
|
+
#
|
8
|
+
|
9
|
+
|
10
|
+
require 'protk/constants'
|
11
|
+
require 'protk/search_tool'
|
12
|
+
require 'mascot/dat'
|
13
|
+
require 'libxml'
|
14
|
+
|
15
|
+
include LibXML
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
# Environment with global constants
|
21
|
+
#
|
22
|
+
genv=Constants.new
|
23
|
+
|
24
|
+
tool=SearchTool.new([:database,:explicit_output,:over_write])
|
25
|
+
tool.option_parser.banner = "Convert mascot dat files to pep.xml files.\n\nUsage: mascot2xml.rb [options] file1.dat file2.dat ... "
|
26
|
+
tool.option_parser.parse!
|
27
|
+
|
28
|
+
throw "Only one file at a time is allowed when using explicit output" if (tool.explicit_output && ARGV.length > 1)
|
29
|
+
|
30
|
+
def update_msms_pipeline_analysis(pepxml_doc,mascot_dat,outname)
|
31
|
+
pipeline_analysis_node=pepxml_doc.find("//#{$pepxml_ns_prefix}msms_pipeline_analysis", $pepxml_ns)[0]
|
32
|
+
pipeline_analysis_node.attributes['date']=Time.at(mascot_dat.header.date.to_i).to_s
|
33
|
+
pipeline_analysis_node.attributes['summary_xml']=outname
|
34
|
+
end
|
35
|
+
|
36
|
+
def update_enzyme(pepxml_doc,mascot_dat)
|
37
|
+
dat_enzyme=mascot_dat.enzyme
|
38
|
+
enzyme_node=pepxml_doc.find("//#{$pepxml_ns_prefix}sample_enzyme", $pepxml_ns)[0]
|
39
|
+
enzyme_node.attributes['name']=dat_enzyme.title
|
40
|
+
specificity=enzyme_node.find("./#{$pepxml_ns_prefix}specificity",$pepxml_ns)[0]
|
41
|
+
# p dat_enzyme.cleavages
|
42
|
+
# TODO: What does the spec say about multiple cut sites
|
43
|
+
# specificity.attributes['cut']=
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
|
48
|
+
ARGV.each do |file_name|
|
49
|
+
name=file_name.chomp
|
50
|
+
|
51
|
+
dat = Mascot::DAT.open(name)
|
52
|
+
pepxml_template_parser=XML::Parser.file("#{genv.data_lib_dir}/pepxml_mascot_template.xml")
|
53
|
+
pepxml_doc = pepxml_template_parser.parse
|
54
|
+
|
55
|
+
outname = nil
|
56
|
+
if ( tool.explicit_output !=nil)
|
57
|
+
outname = tool.explicit_output
|
58
|
+
else
|
59
|
+
basename = Pathname.new(name).basename
|
60
|
+
outname = "#{basename}.pepXML"
|
61
|
+
end
|
62
|
+
|
63
|
+
$pepxml_ns_prefix="xmlns:"
|
64
|
+
$pepxml_ns="xmlns:http://regis-web.systemsbiology.net/pepXML"
|
65
|
+
if not pepxml_doc.root.namespaces.default
|
66
|
+
$pepxml_ns_prefix=""
|
67
|
+
$pepxml_ns=nil
|
68
|
+
end
|
69
|
+
|
70
|
+
update_msms_pipeline_analysis(pepxml_doc,dat,outname)
|
71
|
+
update_enzyme(pepxml_doc,dat)
|
72
|
+
|
73
|
+
spectrum_queries={}
|
74
|
+
|
75
|
+
dat.peptides.each do |psm|
|
76
|
+
# psm.score
|
77
|
+
qnum= psm.query
|
78
|
+
if qnum
|
79
|
+
qid=qnum.to_s
|
80
|
+
spectrum_queries[qid]=[] if ( spectrum_queries[qid]==nil)
|
81
|
+
spectrum_queries[qid] << psm
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
spectrum_queries.each_pair { |name, val| p "#{name} #{val[0].pep}" }
|
86
|
+
|
87
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
static VALUE simplealign_align (VALUE self, VALUE pep_in, VALUE n_in, VALUE gene_in,VALUE m_in)
|
4
|
+
{
|
5
|
+
unsigned int n = NUM2INT(n_in);
|
6
|
+
unsigned int m = NUM2INT(m_in);
|
7
|
+
|
8
|
+
char *peptide = RSTRING_PTR(pep_in);
|
9
|
+
char *gene = RSTRING_PTR(gene_in);
|
10
|
+
}
|
11
|
+
void Init_simplealign(void)
|
12
|
+
{
|
13
|
+
VALUE klass = rb_define_class("SimpleAlign",rb_cObject);
|
14
|
+
|
15
|
+
rb_define_singleton_method(klass,
|
16
|
+
"align",simplealign_align , 9);
|
17
|
+
}
|
data/lib/protk/constants.rb
CHANGED
@@ -236,10 +236,13 @@ class Constants
|
|
236
236
|
@protk_dir=ENV['PROTK_INSTALL_DIR']
|
237
237
|
end
|
238
238
|
|
239
|
-
|
239
|
+
# Protk Defaults
|
240
|
+
#
|
240
241
|
default_config_yml = YAML.load_file "#{File.dirname(__FILE__)}/data/default_config.yml"
|
241
242
|
throw "Unable to read the config file at #{File.dirname(__FILE__)}/data/default_config.yml" unless default_config_yml!=nil
|
242
243
|
|
244
|
+
# User-defined defaults override protk defaults
|
245
|
+
#
|
243
246
|
user_config_yml = nil
|
244
247
|
user_config_yml = YAML.load_file "#{@protk_dir}/config.yml" if File.exist? "#{@protk_dir}/config.yml"
|
245
248
|
if ( user_config_yml !=nil )
|
@@ -248,11 +251,22 @@ class Constants
|
|
248
251
|
@env=default_config_yml
|
249
252
|
end
|
250
253
|
|
251
|
-
|
254
|
+
# Application installation directories. From environment variables
|
255
|
+
#
|
256
|
+
protk_roots = [["tpp","xinteract"],["omssa","omssacl"],["blast","blastdbcmd"],["pwiz","msconvert"],["msgfplus",""],["openms","ExecutePipeline"]]
|
252
257
|
|
253
|
-
protk_roots.each do |r|
|
258
|
+
protk_roots.each do |r,binaryname|
|
254
259
|
env_value = ENV["PROTK_#{r.upcase}_ROOT"]
|
255
260
|
if ( env_value!=nil)
|
261
|
+
# "FROMPATH" means detect the root value
|
262
|
+
if env_value=="FROMPATH"
|
263
|
+
bin_path=Pathname.new(%x[which #{binaryname}].chomp)
|
264
|
+
if bin_path.exist?
|
265
|
+
env_value=bin_path.realpath.dirname.to_s
|
266
|
+
else
|
267
|
+
env_value=""
|
268
|
+
end
|
269
|
+
end
|
256
270
|
p "Using #{r} root #{env_value}"
|
257
271
|
@env["#{r}_root"]=env_value
|
258
272
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<?xml-stylesheet type="text/xsl" href="/home/iracooke/.protk/tools/tpp/schema/pepXML_std.xsl"?>
|
3
|
+
<msms_pipeline_analysis date="2013-01-07T18:29:15" xmlns="http://regis-web.systemsbiology.net/pepXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://regis-web.systemsbiology.net/pepXML /home/iracooke/.protk/tools/tpp/schema/pepXML_v117.xsd" summary_xml="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml.pep.xml">
|
4
|
+
<msms_run_summary base_name="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml" raw_data_type="raw" raw_data="(null)">
|
5
|
+
<sample_enzyme name="trypsin">
|
6
|
+
<specificity cut="KR" no_cut="P" sense="C"/>
|
7
|
+
</sample_enzyme>
|
8
|
+
<search_summary base_name="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml" search_engine="MASCOT" precursor_mass_type="monoisotopic" fragment_mass_type="monoisotopic" search_id="1">
|
9
|
+
<search_database local_path="/home/iracooke/.protk/Databases/sphuman/current.fasta" type="AA"/>
|
10
|
+
<enzymatic_search_constraint enzyme="trypsin" max_num_internal_cleavages="2" min_number_termini="2"/>
|
11
|
+
<aminoacid_modification aminoacid="C" massdiff="57.0215" mass="160.0306" variable="N"/>
|
12
|
+
<parameter name="" value=""/>
|
13
|
+
</search_summary>
|
14
|
+
<spectrum_query spectrum="" start_scan="" end_scan="" precursor_neutral_mass="" assumed_charge="" index="">
|
15
|
+
<search_result>
|
16
|
+
<search_hit hit_rank="" peptide="" peptide_prev_aa="" peptide_next_aa="" protein="" num_tot_proteins="" num_matched_ions="" tot_num_ions="" calc_neutral_pep_mass="" massdiff="" num_tol_term="" num_missed_cleavages="" is_rejected="">
|
17
|
+
<modification_info>
|
18
|
+
<mod_aminoacid_mass position="" mass=""/>
|
19
|
+
</modification_info>
|
20
|
+
<search_score name="ionscore" value=""/>
|
21
|
+
<search_score name="identityscore" value=""/>
|
22
|
+
<search_score name="star" value="0"/>
|
23
|
+
<search_score name="homologyscore" value=""/>
|
24
|
+
<search_score name="expect" value=""/>
|
25
|
+
</search_hit>
|
26
|
+
</search_result>
|
27
|
+
</spectrum_query>
|
28
|
+
</msms_run_summary>
|
29
|
+
</msms_pipeline_analysis>
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#
|
2
|
+
# This is a predefined setup file for manage_db
|
3
|
+
#
|
4
|
+
# Swissprot_uniprot annotation database (full entries for each protein)
|
5
|
+
#
|
6
|
+
---
|
7
|
+
:description: Swissprot Trembl annotation database (full entries for each protein)
|
8
|
+
:archive_old: false
|
9
|
+
:is_annotation_db: true
|
10
|
+
:decoy_prefix: decoy_
|
11
|
+
:include_filters: []
|
12
|
+
|
13
|
+
:format: dat
|
14
|
+
:id_regexes: []
|
15
|
+
|
16
|
+
:make_blast_index: false
|
17
|
+
:sources:
|
18
|
+
- - ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.dat.gz
|
19
|
+
- ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/reldate.txt
|
20
|
+
:decoys: false
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: protk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
5
|
-
prerelease:
|
4
|
+
version: 1.2.6.pre1
|
5
|
+
prerelease: 6
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Ira Cooke
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-10-28 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ftools
|
@@ -256,6 +256,7 @@ files:
|
|
256
256
|
- bin/libra.rb
|
257
257
|
- bin/make_decoy.rb
|
258
258
|
- bin/manage_db.rb
|
259
|
+
- bin/mascot2xml.rb
|
259
260
|
- bin/mascot_search.rb
|
260
261
|
- bin/mascot_to_pepxml.rb
|
261
262
|
- bin/msgfplus_search.rb
|
@@ -283,10 +284,12 @@ files:
|
|
283
284
|
- lib/protk/data/FeatureFinderCentroided.ini
|
284
285
|
- lib/protk/data/FeatureFinderIsotopeWavelet.ini
|
285
286
|
- lib/protk/data/galaxyenv.sh
|
287
|
+
- lib/protk/data/pepxml_mascot_template.xml
|
286
288
|
- lib/protk/data/predefined_db.crap.yaml
|
287
289
|
- lib/protk/data/predefined_db.sphuman.yaml
|
288
290
|
- lib/protk/data/predefined_db.swissprot_annotation.yaml
|
289
291
|
- lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml
|
292
|
+
- lib/protk/data/predefined_db.trembl_annotation.yaml
|
290
293
|
- lib/protk/data/tandem_params.xml
|
291
294
|
- lib/protk/data/taxonomy_template.xml
|
292
295
|
- lib/protk/data/unimod.xml
|
@@ -295,6 +298,7 @@ files:
|
|
295
298
|
- lib/protk/data/uniprot_input_accessions.loc
|
296
299
|
- lib/protk/data/yum_packages.yaml
|
297
300
|
- ext/protk/decoymaker/decoymaker.c
|
301
|
+
- ext/protk/simplealign/simplealign.c
|
298
302
|
- ext/protk/decoymaker/extconf.rb
|
299
303
|
- ext/protk/simplealign/extconf.rb
|
300
304
|
homepage: http://rubygems.org/gems/protk
|
@@ -313,9 +317,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
313
317
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
314
318
|
none: false
|
315
319
|
requirements:
|
316
|
-
- - ! '
|
320
|
+
- - ! '>'
|
317
321
|
- !ruby/object:Gem::Version
|
318
|
-
version:
|
322
|
+
version: 1.3.1
|
319
323
|
requirements: []
|
320
324
|
rubyforge_project:
|
321
325
|
rubygems_version: 1.8.24
|
@@ -323,3 +327,4 @@ signing_key:
|
|
323
327
|
specification_version: 3
|
324
328
|
summary: Proteomics Toolkit
|
325
329
|
test_files: []
|
330
|
+
has_rdoc:
|