protk 1.2.6.pre1 → 1.2.6.pre2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +22 -27
- data/bin/blastxml_to_table.rb +50 -3
- data/bin/make_decoy.rb +30 -2
- data/bin/mascot_search.rb +46 -27
- data/bin/msgfplus_search.rb +7 -4
- data/bin/peptide_prophet.rb +9 -0
- data/bin/protxml_to_gff.rb +122 -66
- data/bin/protxml_to_table.rb +26 -3
- data/bin/tandem_search.rb +1 -1
- data/lib/protk/constants.rb +19 -19
- data/lib/protk/data/default_config.yml +0 -7
- data/lib/protk/search_tool.rb +7 -0
- metadata +118 -90
- data/bin/mascot2xml.rb +0 -87
- data/ext/protk/simplealign/simplealign.c +0 -17
- data/lib/protk/data/pepxml_mascot_template.xml +0 -29
- data/lib/protk/data/predefined_db.trembl_annotation.yaml +0 -20
data/bin/mascot2xml.rb
DELETED
@@ -1,87 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
#
|
3
|
-
# This file is part of protk
|
4
|
-
# Created by Ira Cooke 12/4/2010
|
5
|
-
#
|
6
|
-
# Convert mascot dat files to pepxml without using TPP Mascot2XML
|
7
|
-
#
|
8
|
-
|
9
|
-
|
10
|
-
require 'protk/constants'
|
11
|
-
require 'protk/search_tool'
|
12
|
-
require 'mascot/dat'
|
13
|
-
require 'libxml'
|
14
|
-
|
15
|
-
include LibXML
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
# Environment with global constants
|
21
|
-
#
|
22
|
-
genv=Constants.new
|
23
|
-
|
24
|
-
tool=SearchTool.new([:database,:explicit_output,:over_write])
|
25
|
-
tool.option_parser.banner = "Convert mascot dat files to pep.xml files.\n\nUsage: mascot2xml.rb [options] file1.dat file2.dat ... "
|
26
|
-
tool.option_parser.parse!
|
27
|
-
|
28
|
-
throw "Only one file at a time is allowed when using explicit output" if (tool.explicit_output && ARGV.length > 1)
|
29
|
-
|
30
|
-
def update_msms_pipeline_analysis(pepxml_doc,mascot_dat,outname)
|
31
|
-
pipeline_analysis_node=pepxml_doc.find("//#{$pepxml_ns_prefix}msms_pipeline_analysis", $pepxml_ns)[0]
|
32
|
-
pipeline_analysis_node.attributes['date']=Time.at(mascot_dat.header.date.to_i).to_s
|
33
|
-
pipeline_analysis_node.attributes['summary_xml']=outname
|
34
|
-
end
|
35
|
-
|
36
|
-
def update_enzyme(pepxml_doc,mascot_dat)
|
37
|
-
dat_enzyme=mascot_dat.enzyme
|
38
|
-
enzyme_node=pepxml_doc.find("//#{$pepxml_ns_prefix}sample_enzyme", $pepxml_ns)[0]
|
39
|
-
enzyme_node.attributes['name']=dat_enzyme.title
|
40
|
-
specificity=enzyme_node.find("./#{$pepxml_ns_prefix}specificity",$pepxml_ns)[0]
|
41
|
-
# p dat_enzyme.cleavages
|
42
|
-
# TODO: What does the spec say about multiple cut sites
|
43
|
-
# specificity.attributes['cut']=
|
44
|
-
end
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
ARGV.each do |file_name|
|
49
|
-
name=file_name.chomp
|
50
|
-
|
51
|
-
dat = Mascot::DAT.open(name)
|
52
|
-
pepxml_template_parser=XML::Parser.file("#{genv.data_lib_dir}/pepxml_mascot_template.xml")
|
53
|
-
pepxml_doc = pepxml_template_parser.parse
|
54
|
-
|
55
|
-
outname = nil
|
56
|
-
if ( tool.explicit_output !=nil)
|
57
|
-
outname = tool.explicit_output
|
58
|
-
else
|
59
|
-
basename = Pathname.new(name).basename
|
60
|
-
outname = "#{basename}.pepXML"
|
61
|
-
end
|
62
|
-
|
63
|
-
$pepxml_ns_prefix="xmlns:"
|
64
|
-
$pepxml_ns="xmlns:http://regis-web.systemsbiology.net/pepXML"
|
65
|
-
if not pepxml_doc.root.namespaces.default
|
66
|
-
$pepxml_ns_prefix=""
|
67
|
-
$pepxml_ns=nil
|
68
|
-
end
|
69
|
-
|
70
|
-
update_msms_pipeline_analysis(pepxml_doc,dat,outname)
|
71
|
-
update_enzyme(pepxml_doc,dat)
|
72
|
-
|
73
|
-
spectrum_queries={}
|
74
|
-
|
75
|
-
dat.peptides.each do |psm|
|
76
|
-
# psm.score
|
77
|
-
qnum= psm.query
|
78
|
-
if qnum
|
79
|
-
qid=qnum.to_s
|
80
|
-
spectrum_queries[qid]=[] if ( spectrum_queries[qid]==nil)
|
81
|
-
spectrum_queries[qid] << psm
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
spectrum_queries.each_pair { |name, val| p "#{name} #{val[0].pep}" }
|
86
|
-
|
87
|
-
end
|
@@ -1,17 +0,0 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
static VALUE simplealign_align (VALUE self, VALUE pep_in, VALUE n_in, VALUE gene_in,VALUE m_in)
|
4
|
-
{
|
5
|
-
unsigned int n = NUM2INT(n_in);
|
6
|
-
unsigned int m = NUM2INT(m_in);
|
7
|
-
|
8
|
-
char *peptide = RSTRING_PTR(pep_in);
|
9
|
-
char *gene = RSTRING_PTR(gene_in);
|
10
|
-
}
|
11
|
-
void Init_simplealign(void)
|
12
|
-
{
|
13
|
-
VALUE klass = rb_define_class("SimpleAlign",rb_cObject);
|
14
|
-
|
15
|
-
rb_define_singleton_method(klass,
|
16
|
-
"align",simplealign_align , 9);
|
17
|
-
}
|
@@ -1,29 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<?xml-stylesheet type="text/xsl" href="/home/iracooke/.protk/tools/tpp/schema/pepXML_std.xsl"?>
|
3
|
-
<msms_pipeline_analysis date="2013-01-07T18:29:15" xmlns="http://regis-web.systemsbiology.net/pepXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://regis-web.systemsbiology.net/pepXML /home/iracooke/.protk/tools/tpp/schema/pepXML_v117.xsd" summary_xml="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml.pep.xml">
|
4
|
-
<msms_run_summary base_name="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml" raw_data_type="raw" raw_data="(null)">
|
5
|
-
<sample_enzyme name="trypsin">
|
6
|
-
<specificity cut="KR" no_cut="P" sense="C"/>
|
7
|
-
</sample_enzyme>
|
8
|
-
<search_summary base_name="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml" search_engine="MASCOT" precursor_mass_type="monoisotopic" fragment_mass_type="monoisotopic" search_id="1">
|
9
|
-
<search_database local_path="/home/iracooke/.protk/Databases/sphuman/current.fasta" type="AA"/>
|
10
|
-
<enzymatic_search_constraint enzyme="trypsin" max_num_internal_cleavages="2" min_number_termini="2"/>
|
11
|
-
<aminoacid_modification aminoacid="C" massdiff="57.0215" mass="160.0306" variable="N"/>
|
12
|
-
<parameter name="" value=""/>
|
13
|
-
</search_summary>
|
14
|
-
<spectrum_query spectrum="" start_scan="" end_scan="" precursor_neutral_mass="" assumed_charge="" index="">
|
15
|
-
<search_result>
|
16
|
-
<search_hit hit_rank="" peptide="" peptide_prev_aa="" peptide_next_aa="" protein="" num_tot_proteins="" num_matched_ions="" tot_num_ions="" calc_neutral_pep_mass="" massdiff="" num_tol_term="" num_missed_cleavages="" is_rejected="">
|
17
|
-
<modification_info>
|
18
|
-
<mod_aminoacid_mass position="" mass=""/>
|
19
|
-
</modification_info>
|
20
|
-
<search_score name="ionscore" value=""/>
|
21
|
-
<search_score name="identityscore" value=""/>
|
22
|
-
<search_score name="star" value="0"/>
|
23
|
-
<search_score name="homologyscore" value=""/>
|
24
|
-
<search_score name="expect" value=""/>
|
25
|
-
</search_hit>
|
26
|
-
</search_result>
|
27
|
-
</spectrum_query>
|
28
|
-
</msms_run_summary>
|
29
|
-
</msms_pipeline_analysis>
|
@@ -1,20 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# This is a predefined setup file for manage_db
|
3
|
-
#
|
4
|
-
# Swissprot_uniprot annotation database (full entries for each protein)
|
5
|
-
#
|
6
|
-
---
|
7
|
-
:description: Swissprot Trembl annotation database (full entries for each protein)
|
8
|
-
:archive_old: false
|
9
|
-
:is_annotation_db: true
|
10
|
-
:decoy_prefix: decoy_
|
11
|
-
:include_filters: []
|
12
|
-
|
13
|
-
:format: dat
|
14
|
-
:id_regexes: []
|
15
|
-
|
16
|
-
:make_blast_index: false
|
17
|
-
:sources:
|
18
|
-
- - ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.dat.gz
|
19
|
-
- ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/reldate.txt
|
20
|
-
:decoys: false
|