protk 1.2.5 → 1.2.6.pre1
Sign up to get free protection for your applications and to get access to all the features.
data/bin/mascot2xml.rb
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file is part of protk
|
4
|
+
# Created by Ira Cooke 12/4/2010
|
5
|
+
#
|
6
|
+
# Convert mascot dat files to pepxml without using TPP Mascot2XML
|
7
|
+
#
|
8
|
+
|
9
|
+
|
10
|
+
require 'protk/constants'
|
11
|
+
require 'protk/search_tool'
|
12
|
+
require 'mascot/dat'
|
13
|
+
require 'libxml'
|
14
|
+
|
15
|
+
include LibXML
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
# Environment with global constants
|
21
|
+
#
|
22
|
+
genv=Constants.new
|
23
|
+
|
24
|
+
tool=SearchTool.new([:database,:explicit_output,:over_write])
|
25
|
+
tool.option_parser.banner = "Convert mascot dat files to pep.xml files.\n\nUsage: mascot2xml.rb [options] file1.dat file2.dat ... "
|
26
|
+
tool.option_parser.parse!
|
27
|
+
|
28
|
+
throw "Only one file at a time is allowed when using explicit output" if (tool.explicit_output && ARGV.length > 1)
|
29
|
+
|
30
|
+
def update_msms_pipeline_analysis(pepxml_doc,mascot_dat,outname)
|
31
|
+
pipeline_analysis_node=pepxml_doc.find("//#{$pepxml_ns_prefix}msms_pipeline_analysis", $pepxml_ns)[0]
|
32
|
+
pipeline_analysis_node.attributes['date']=Time.at(mascot_dat.header.date.to_i).to_s
|
33
|
+
pipeline_analysis_node.attributes['summary_xml']=outname
|
34
|
+
end
|
35
|
+
|
36
|
+
def update_enzyme(pepxml_doc,mascot_dat)
|
37
|
+
dat_enzyme=mascot_dat.enzyme
|
38
|
+
enzyme_node=pepxml_doc.find("//#{$pepxml_ns_prefix}sample_enzyme", $pepxml_ns)[0]
|
39
|
+
enzyme_node.attributes['name']=dat_enzyme.title
|
40
|
+
specificity=enzyme_node.find("./#{$pepxml_ns_prefix}specificity",$pepxml_ns)[0]
|
41
|
+
# p dat_enzyme.cleavages
|
42
|
+
# TODO: What does the spec say about multiple cut sites
|
43
|
+
# specificity.attributes['cut']=
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
|
48
|
+
ARGV.each do |file_name|
|
49
|
+
name=file_name.chomp
|
50
|
+
|
51
|
+
dat = Mascot::DAT.open(name)
|
52
|
+
pepxml_template_parser=XML::Parser.file("#{genv.data_lib_dir}/pepxml_mascot_template.xml")
|
53
|
+
pepxml_doc = pepxml_template_parser.parse
|
54
|
+
|
55
|
+
outname = nil
|
56
|
+
if ( tool.explicit_output !=nil)
|
57
|
+
outname = tool.explicit_output
|
58
|
+
else
|
59
|
+
basename = Pathname.new(name).basename
|
60
|
+
outname = "#{basename}.pepXML"
|
61
|
+
end
|
62
|
+
|
63
|
+
$pepxml_ns_prefix="xmlns:"
|
64
|
+
$pepxml_ns="xmlns:http://regis-web.systemsbiology.net/pepXML"
|
65
|
+
if not pepxml_doc.root.namespaces.default
|
66
|
+
$pepxml_ns_prefix=""
|
67
|
+
$pepxml_ns=nil
|
68
|
+
end
|
69
|
+
|
70
|
+
update_msms_pipeline_analysis(pepxml_doc,dat,outname)
|
71
|
+
update_enzyme(pepxml_doc,dat)
|
72
|
+
|
73
|
+
spectrum_queries={}
|
74
|
+
|
75
|
+
dat.peptides.each do |psm|
|
76
|
+
# psm.score
|
77
|
+
qnum= psm.query
|
78
|
+
if qnum
|
79
|
+
qid=qnum.to_s
|
80
|
+
spectrum_queries[qid]=[] if ( spectrum_queries[qid]==nil)
|
81
|
+
spectrum_queries[qid] << psm
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
spectrum_queries.each_pair { |name, val| p "#{name} #{val[0].pep}" }
|
86
|
+
|
87
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
static VALUE simplealign_align (VALUE self, VALUE pep_in, VALUE n_in, VALUE gene_in,VALUE m_in)
|
4
|
+
{
|
5
|
+
unsigned int n = NUM2INT(n_in);
|
6
|
+
unsigned int m = NUM2INT(m_in);
|
7
|
+
|
8
|
+
char *peptide = RSTRING_PTR(pep_in);
|
9
|
+
char *gene = RSTRING_PTR(gene_in);
|
10
|
+
}
|
11
|
+
void Init_simplealign(void)
|
12
|
+
{
|
13
|
+
VALUE klass = rb_define_class("SimpleAlign",rb_cObject);
|
14
|
+
|
15
|
+
rb_define_singleton_method(klass,
|
16
|
+
"align",simplealign_align , 9);
|
17
|
+
}
|
data/lib/protk/constants.rb
CHANGED
@@ -236,10 +236,13 @@ class Constants
|
|
236
236
|
@protk_dir=ENV['PROTK_INSTALL_DIR']
|
237
237
|
end
|
238
238
|
|
239
|
-
|
239
|
+
# Protk Defaults
|
240
|
+
#
|
240
241
|
default_config_yml = YAML.load_file "#{File.dirname(__FILE__)}/data/default_config.yml"
|
241
242
|
throw "Unable to read the config file at #{File.dirname(__FILE__)}/data/default_config.yml" unless default_config_yml!=nil
|
242
243
|
|
244
|
+
# User-defined defaults override protk defaults
|
245
|
+
#
|
243
246
|
user_config_yml = nil
|
244
247
|
user_config_yml = YAML.load_file "#{@protk_dir}/config.yml" if File.exist? "#{@protk_dir}/config.yml"
|
245
248
|
if ( user_config_yml !=nil )
|
@@ -248,11 +251,22 @@ class Constants
|
|
248
251
|
@env=default_config_yml
|
249
252
|
end
|
250
253
|
|
251
|
-
|
254
|
+
# Application installation directories. From environment variables
|
255
|
+
#
|
256
|
+
protk_roots = [["tpp","xinteract"],["omssa","omssacl"],["blast","blastdbcmd"],["pwiz","msconvert"],["msgfplus",""],["openms","ExecutePipeline"]]
|
252
257
|
|
253
|
-
protk_roots.each do |r|
|
258
|
+
protk_roots.each do |r,binaryname|
|
254
259
|
env_value = ENV["PROTK_#{r.upcase}_ROOT"]
|
255
260
|
if ( env_value!=nil)
|
261
|
+
# "FROMPATH" means detect the root value
|
262
|
+
if env_value=="FROMPATH"
|
263
|
+
bin_path=Pathname.new(%x[which #{binaryname}].chomp)
|
264
|
+
if bin_path.exist?
|
265
|
+
env_value=bin_path.realpath.dirname.to_s
|
266
|
+
else
|
267
|
+
env_value=""
|
268
|
+
end
|
269
|
+
end
|
256
270
|
p "Using #{r} root #{env_value}"
|
257
271
|
@env["#{r}_root"]=env_value
|
258
272
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<?xml-stylesheet type="text/xsl" href="/home/iracooke/.protk/tools/tpp/schema/pepXML_std.xsl"?>
|
3
|
+
<msms_pipeline_analysis date="2013-01-07T18:29:15" xmlns="http://regis-web.systemsbiology.net/pepXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://regis-web.systemsbiology.net/pepXML /home/iracooke/.protk/tools/tpp/schema/pepXML_v117.xsd" summary_xml="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml.pep.xml">
|
4
|
+
<msms_run_summary base_name="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml" raw_data_type="raw" raw_data="(null)">
|
5
|
+
<sample_enzyme name="trypsin">
|
6
|
+
<specificity cut="KR" no_cut="P" sense="C"/>
|
7
|
+
</sample_enzyme>
|
8
|
+
<search_summary base_name="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml" search_engine="MASCOT" precursor_mass_type="monoisotopic" fragment_mass_type="monoisotopic" search_id="1">
|
9
|
+
<search_database local_path="/home/iracooke/.protk/Databases/sphuman/current.fasta" type="AA"/>
|
10
|
+
<enzymatic_search_constraint enzyme="trypsin" max_num_internal_cleavages="2" min_number_termini="2"/>
|
11
|
+
<aminoacid_modification aminoacid="C" massdiff="57.0215" mass="160.0306" variable="N"/>
|
12
|
+
<parameter name="" value=""/>
|
13
|
+
</search_summary>
|
14
|
+
<spectrum_query spectrum="" start_scan="" end_scan="" precursor_neutral_mass="" assumed_charge="" index="">
|
15
|
+
<search_result>
|
16
|
+
<search_hit hit_rank="" peptide="" peptide_prev_aa="" peptide_next_aa="" protein="" num_tot_proteins="" num_matched_ions="" tot_num_ions="" calc_neutral_pep_mass="" massdiff="" num_tol_term="" num_missed_cleavages="" is_rejected="">
|
17
|
+
<modification_info>
|
18
|
+
<mod_aminoacid_mass position="" mass=""/>
|
19
|
+
</modification_info>
|
20
|
+
<search_score name="ionscore" value=""/>
|
21
|
+
<search_score name="identityscore" value=""/>
|
22
|
+
<search_score name="star" value="0"/>
|
23
|
+
<search_score name="homologyscore" value=""/>
|
24
|
+
<search_score name="expect" value=""/>
|
25
|
+
</search_hit>
|
26
|
+
</search_result>
|
27
|
+
</spectrum_query>
|
28
|
+
</msms_run_summary>
|
29
|
+
</msms_pipeline_analysis>
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#
|
2
|
+
# This is a predefined setup file for manage_db
|
3
|
+
#
|
4
|
+
# Swissprot_uniprot annotation database (full entries for each protein)
|
5
|
+
#
|
6
|
+
---
|
7
|
+
:description: Swissprot Trembl annotation database (full entries for each protein)
|
8
|
+
:archive_old: false
|
9
|
+
:is_annotation_db: true
|
10
|
+
:decoy_prefix: decoy_
|
11
|
+
:include_filters: []
|
12
|
+
|
13
|
+
:format: dat
|
14
|
+
:id_regexes: []
|
15
|
+
|
16
|
+
:make_blast_index: false
|
17
|
+
:sources:
|
18
|
+
- - ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.dat.gz
|
19
|
+
- ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/reldate.txt
|
20
|
+
:decoys: false
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: protk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
5
|
-
prerelease:
|
4
|
+
version: 1.2.6.pre1
|
5
|
+
prerelease: 6
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Ira Cooke
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-10-28 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ftools
|
@@ -256,6 +256,7 @@ files:
|
|
256
256
|
- bin/libra.rb
|
257
257
|
- bin/make_decoy.rb
|
258
258
|
- bin/manage_db.rb
|
259
|
+
- bin/mascot2xml.rb
|
259
260
|
- bin/mascot_search.rb
|
260
261
|
- bin/mascot_to_pepxml.rb
|
261
262
|
- bin/msgfplus_search.rb
|
@@ -283,10 +284,12 @@ files:
|
|
283
284
|
- lib/protk/data/FeatureFinderCentroided.ini
|
284
285
|
- lib/protk/data/FeatureFinderIsotopeWavelet.ini
|
285
286
|
- lib/protk/data/galaxyenv.sh
|
287
|
+
- lib/protk/data/pepxml_mascot_template.xml
|
286
288
|
- lib/protk/data/predefined_db.crap.yaml
|
287
289
|
- lib/protk/data/predefined_db.sphuman.yaml
|
288
290
|
- lib/protk/data/predefined_db.swissprot_annotation.yaml
|
289
291
|
- lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml
|
292
|
+
- lib/protk/data/predefined_db.trembl_annotation.yaml
|
290
293
|
- lib/protk/data/tandem_params.xml
|
291
294
|
- lib/protk/data/taxonomy_template.xml
|
292
295
|
- lib/protk/data/unimod.xml
|
@@ -295,6 +298,7 @@ files:
|
|
295
298
|
- lib/protk/data/uniprot_input_accessions.loc
|
296
299
|
- lib/protk/data/yum_packages.yaml
|
297
300
|
- ext/protk/decoymaker/decoymaker.c
|
301
|
+
- ext/protk/simplealign/simplealign.c
|
298
302
|
- ext/protk/decoymaker/extconf.rb
|
299
303
|
- ext/protk/simplealign/extconf.rb
|
300
304
|
homepage: http://rubygems.org/gems/protk
|
@@ -313,9 +317,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
313
317
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
314
318
|
none: false
|
315
319
|
requirements:
|
316
|
-
- - ! '
|
320
|
+
- - ! '>'
|
317
321
|
- !ruby/object:Gem::Version
|
318
|
-
version:
|
322
|
+
version: 1.3.1
|
319
323
|
requirements: []
|
320
324
|
rubyforge_project:
|
321
325
|
rubygems_version: 1.8.24
|
@@ -323,3 +327,4 @@ signing_key:
|
|
323
327
|
specification_version: 3
|
324
328
|
summary: Proteomics Toolkit
|
325
329
|
test_files: []
|
330
|
+
has_rdoc:
|