protk 1.2.6.pre1 → 1.2.6.pre2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +22 -27
- data/bin/blastxml_to_table.rb +50 -3
- data/bin/make_decoy.rb +30 -2
- data/bin/mascot_search.rb +46 -27
- data/bin/msgfplus_search.rb +7 -4
- data/bin/peptide_prophet.rb +9 -0
- data/bin/protxml_to_gff.rb +122 -66
- data/bin/protxml_to_table.rb +26 -3
- data/bin/tandem_search.rb +1 -1
- data/lib/protk/constants.rb +19 -19
- data/lib/protk/data/default_config.yml +0 -7
- data/lib/protk/search_tool.rb +7 -0
- metadata +118 -90
- data/bin/mascot2xml.rb +0 -87
- data/ext/protk/simplealign/simplealign.c +0 -17
- data/lib/protk/data/pepxml_mascot_template.xml +0 -29
- data/lib/protk/data/predefined_db.trembl_annotation.yaml +0 -20
data/bin/mascot2xml.rb
DELETED
@@ -1,87 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
#
|
3
|
-
# This file is part of protk
|
4
|
-
# Created by Ira Cooke 12/4/2010
|
5
|
-
#
|
6
|
-
# Convert mascot dat files to pepxml without using TPP Mascot2XML
|
7
|
-
#
|
8
|
-
|
9
|
-
|
10
|
-
require 'protk/constants'
|
11
|
-
require 'protk/search_tool'
|
12
|
-
require 'mascot/dat'
|
13
|
-
require 'libxml'
|
14
|
-
|
15
|
-
include LibXML
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
# Environment with global constants
|
21
|
-
#
|
22
|
-
genv=Constants.new
|
23
|
-
|
24
|
-
tool=SearchTool.new([:database,:explicit_output,:over_write])
|
25
|
-
tool.option_parser.banner = "Convert mascot dat files to pep.xml files.\n\nUsage: mascot2xml.rb [options] file1.dat file2.dat ... "
|
26
|
-
tool.option_parser.parse!
|
27
|
-
|
28
|
-
throw "Only one file at a time is allowed when using explicit output" if (tool.explicit_output && ARGV.length > 1)
|
29
|
-
|
30
|
-
def update_msms_pipeline_analysis(pepxml_doc,mascot_dat,outname)
|
31
|
-
pipeline_analysis_node=pepxml_doc.find("//#{$pepxml_ns_prefix}msms_pipeline_analysis", $pepxml_ns)[0]
|
32
|
-
pipeline_analysis_node.attributes['date']=Time.at(mascot_dat.header.date.to_i).to_s
|
33
|
-
pipeline_analysis_node.attributes['summary_xml']=outname
|
34
|
-
end
|
35
|
-
|
36
|
-
def update_enzyme(pepxml_doc,mascot_dat)
|
37
|
-
dat_enzyme=mascot_dat.enzyme
|
38
|
-
enzyme_node=pepxml_doc.find("//#{$pepxml_ns_prefix}sample_enzyme", $pepxml_ns)[0]
|
39
|
-
enzyme_node.attributes['name']=dat_enzyme.title
|
40
|
-
specificity=enzyme_node.find("./#{$pepxml_ns_prefix}specificity",$pepxml_ns)[0]
|
41
|
-
# p dat_enzyme.cleavages
|
42
|
-
# TODO: What does the spec say about multiple cut sites
|
43
|
-
# specificity.attributes['cut']=
|
44
|
-
end
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
ARGV.each do |file_name|
|
49
|
-
name=file_name.chomp
|
50
|
-
|
51
|
-
dat = Mascot::DAT.open(name)
|
52
|
-
pepxml_template_parser=XML::Parser.file("#{genv.data_lib_dir}/pepxml_mascot_template.xml")
|
53
|
-
pepxml_doc = pepxml_template_parser.parse
|
54
|
-
|
55
|
-
outname = nil
|
56
|
-
if ( tool.explicit_output !=nil)
|
57
|
-
outname = tool.explicit_output
|
58
|
-
else
|
59
|
-
basename = Pathname.new(name).basename
|
60
|
-
outname = "#{basename}.pepXML"
|
61
|
-
end
|
62
|
-
|
63
|
-
$pepxml_ns_prefix="xmlns:"
|
64
|
-
$pepxml_ns="xmlns:http://regis-web.systemsbiology.net/pepXML"
|
65
|
-
if not pepxml_doc.root.namespaces.default
|
66
|
-
$pepxml_ns_prefix=""
|
67
|
-
$pepxml_ns=nil
|
68
|
-
end
|
69
|
-
|
70
|
-
update_msms_pipeline_analysis(pepxml_doc,dat,outname)
|
71
|
-
update_enzyme(pepxml_doc,dat)
|
72
|
-
|
73
|
-
spectrum_queries={}
|
74
|
-
|
75
|
-
dat.peptides.each do |psm|
|
76
|
-
# psm.score
|
77
|
-
qnum= psm.query
|
78
|
-
if qnum
|
79
|
-
qid=qnum.to_s
|
80
|
-
spectrum_queries[qid]=[] if ( spectrum_queries[qid]==nil)
|
81
|
-
spectrum_queries[qid] << psm
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
spectrum_queries.each_pair { |name, val| p "#{name} #{val[0].pep}" }
|
86
|
-
|
87
|
-
end
|
@@ -1,17 +0,0 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
static VALUE simplealign_align (VALUE self, VALUE pep_in, VALUE n_in, VALUE gene_in,VALUE m_in)
|
4
|
-
{
|
5
|
-
unsigned int n = NUM2INT(n_in);
|
6
|
-
unsigned int m = NUM2INT(m_in);
|
7
|
-
|
8
|
-
char *peptide = RSTRING_PTR(pep_in);
|
9
|
-
char *gene = RSTRING_PTR(gene_in);
|
10
|
-
}
|
11
|
-
void Init_simplealign(void)
|
12
|
-
{
|
13
|
-
VALUE klass = rb_define_class("SimpleAlign",rb_cObject);
|
14
|
-
|
15
|
-
rb_define_singleton_method(klass,
|
16
|
-
"align",simplealign_align , 9);
|
17
|
-
}
|
@@ -1,29 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<?xml-stylesheet type="text/xsl" href="/home/iracooke/.protk/tools/tpp/schema/pepXML_std.xsl"?>
|
3
|
-
<msms_pipeline_analysis date="2013-01-07T18:29:15" xmlns="http://regis-web.systemsbiology.net/pepXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://regis-web.systemsbiology.net/pepXML /home/iracooke/.protk/tools/tpp/schema/pepXML_v117.xsd" summary_xml="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml.pep.xml">
|
4
|
-
<msms_run_summary base_name="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml" raw_data_type="raw" raw_data="(null)">
|
5
|
-
<sample_enzyme name="trypsin">
|
6
|
-
<specificity cut="KR" no_cut="P" sense="C"/>
|
7
|
-
</sample_enzyme>
|
8
|
-
<search_summary base_name="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml" search_engine="MASCOT" precursor_mass_type="monoisotopic" fragment_mass_type="monoisotopic" search_id="1">
|
9
|
-
<search_database local_path="/home/iracooke/.protk/Databases/sphuman/current.fasta" type="AA"/>
|
10
|
-
<enzymatic_search_constraint enzyme="trypsin" max_num_internal_cleavages="2" min_number_termini="2"/>
|
11
|
-
<aminoacid_modification aminoacid="C" massdiff="57.0215" mass="160.0306" variable="N"/>
|
12
|
-
<parameter name="" value=""/>
|
13
|
-
</search_summary>
|
14
|
-
<spectrum_query spectrum="" start_scan="" end_scan="" precursor_neutral_mass="" assumed_charge="" index="">
|
15
|
-
<search_result>
|
16
|
-
<search_hit hit_rank="" peptide="" peptide_prev_aa="" peptide_next_aa="" protein="" num_tot_proteins="" num_matched_ions="" tot_num_ions="" calc_neutral_pep_mass="" massdiff="" num_tol_term="" num_missed_cleavages="" is_rejected="">
|
17
|
-
<modification_info>
|
18
|
-
<mod_aminoacid_mass position="" mass=""/>
|
19
|
-
</modification_info>
|
20
|
-
<search_score name="ionscore" value=""/>
|
21
|
-
<search_score name="identityscore" value=""/>
|
22
|
-
<search_score name="star" value="0"/>
|
23
|
-
<search_score name="homologyscore" value=""/>
|
24
|
-
<search_score name="expect" value=""/>
|
25
|
-
</search_hit>
|
26
|
-
</search_result>
|
27
|
-
</spectrum_query>
|
28
|
-
</msms_run_summary>
|
29
|
-
</msms_pipeline_analysis>
|
@@ -1,20 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# This is a predefined setup file for manage_db
|
3
|
-
#
|
4
|
-
# Swissprot_uniprot annotation database (full entries for each protein)
|
5
|
-
#
|
6
|
-
---
|
7
|
-
:description: Swissprot Trembl annotation database (full entries for each protein)
|
8
|
-
:archive_old: false
|
9
|
-
:is_annotation_db: true
|
10
|
-
:decoy_prefix: decoy_
|
11
|
-
:include_filters: []
|
12
|
-
|
13
|
-
:format: dat
|
14
|
-
:id_regexes: []
|
15
|
-
|
16
|
-
:make_blast_index: false
|
17
|
-
:sources:
|
18
|
-
- - ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.dat.gz
|
19
|
-
- ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/reldate.txt
|
20
|
-
:decoys: false
|