protk 1.2.5 → 1.2.6.pre1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,87 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Created by Ira Cooke 12/4/2010
5
+ #
6
+ # Convert mascot dat files to pepxml without using TPP Mascot2XML
7
+ #
8
+
9
+
10
+ require 'protk/constants'
11
+ require 'protk/search_tool'
12
+ require 'mascot/dat'
13
+ require 'libxml'
14
+
15
+ include LibXML
16
+
17
+
18
+
19
+
20
+ # Environment with global constants
21
+ #
22
+ genv=Constants.new
23
+
24
+ tool=SearchTool.new([:database,:explicit_output,:over_write])
25
+ tool.option_parser.banner = "Convert mascot dat files to pep.xml files.\n\nUsage: mascot2xml.rb [options] file1.dat file2.dat ... "
26
+ tool.option_parser.parse!
27
+
28
+ throw "Only one file at a time is allowed when using explicit output" if (tool.explicit_output && ARGV.length > 1)
29
+
30
+ def update_msms_pipeline_analysis(pepxml_doc,mascot_dat,outname)
31
+ pipeline_analysis_node=pepxml_doc.find("//#{$pepxml_ns_prefix}msms_pipeline_analysis", $pepxml_ns)[0]
32
+ pipeline_analysis_node.attributes['date']=Time.at(mascot_dat.header.date.to_i).to_s
33
+ pipeline_analysis_node.attributes['summary_xml']=outname
34
+ end
35
+
36
+ def update_enzyme(pepxml_doc,mascot_dat)
37
+ dat_enzyme=mascot_dat.enzyme
38
+ enzyme_node=pepxml_doc.find("//#{$pepxml_ns_prefix}sample_enzyme", $pepxml_ns)[0]
39
+ enzyme_node.attributes['name']=dat_enzyme.title
40
+ specificity=enzyme_node.find("./#{$pepxml_ns_prefix}specificity",$pepxml_ns)[0]
41
+ # p dat_enzyme.cleavages
42
+ # TODO: What does the spec say about multiple cut sites
43
+ # specificity.attributes['cut']=
44
+ end
45
+
46
+
47
+
48
+ ARGV.each do |file_name|
49
+ name=file_name.chomp
50
+
51
+ dat = Mascot::DAT.open(name)
52
+ pepxml_template_parser=XML::Parser.file("#{genv.data_lib_dir}/pepxml_mascot_template.xml")
53
+ pepxml_doc = pepxml_template_parser.parse
54
+
55
+ outname = nil
56
+ if ( tool.explicit_output !=nil)
57
+ outname = tool.explicit_output
58
+ else
59
+ basename = Pathname.new(name).basename
60
+ outname = "#{basename}.pepXML"
61
+ end
62
+
63
+ $pepxml_ns_prefix="xmlns:"
64
+ $pepxml_ns="xmlns:http://regis-web.systemsbiology.net/pepXML"
65
+ if not pepxml_doc.root.namespaces.default
66
+ $pepxml_ns_prefix=""
67
+ $pepxml_ns=nil
68
+ end
69
+
70
+ update_msms_pipeline_analysis(pepxml_doc,dat,outname)
71
+ update_enzyme(pepxml_doc,dat)
72
+
73
+ spectrum_queries={}
74
+
75
+ dat.peptides.each do |psm|
76
+ # psm.score
77
+ qnum= psm.query
78
+ if qnum
79
+ qid=qnum.to_s
80
+ spectrum_queries[qid]=[] if ( spectrum_queries[qid]==nil)
81
+ spectrum_queries[qid] << psm
82
+ end
83
+ end
84
+
85
+ spectrum_queries.each_pair { |name, val| p "#{name} #{val[0].pep}" }
86
+
87
+ end
@@ -0,0 +1,17 @@
1
+
2
+
3
+ static VALUE simplealign_align (VALUE self, VALUE pep_in, VALUE n_in, VALUE gene_in,VALUE m_in)
4
+ {
5
+ unsigned int n = NUM2INT(n_in);
6
+ unsigned int m = NUM2INT(m_in);
7
+
8
+ char *peptide = RSTRING_PTR(pep_in);
9
+ char *gene = RSTRING_PTR(gene_in);
10
+ }
11
+ void Init_simplealign(void)
12
+ {
13
+ VALUE klass = rb_define_class("SimpleAlign",rb_cObject);
14
+
15
+ rb_define_singleton_method(klass,
16
+ "align",simplealign_align , 9);
17
+ }
@@ -236,10 +236,13 @@ class Constants
236
236
  @protk_dir=ENV['PROTK_INSTALL_DIR']
237
237
  end
238
238
 
239
-
239
+ # Protk Defaults
240
+ #
240
241
  default_config_yml = YAML.load_file "#{File.dirname(__FILE__)}/data/default_config.yml"
241
242
  throw "Unable to read the config file at #{File.dirname(__FILE__)}/data/default_config.yml" unless default_config_yml!=nil
242
243
 
244
+ # User-defined defaults override protk defaults
245
+ #
243
246
  user_config_yml = nil
244
247
  user_config_yml = YAML.load_file "#{@protk_dir}/config.yml" if File.exist? "#{@protk_dir}/config.yml"
245
248
  if ( user_config_yml !=nil )
@@ -248,11 +251,22 @@ class Constants
248
251
  @env=default_config_yml
249
252
  end
250
253
 
251
- protk_roots = ["tpp","omssa","blast","pwiz","msgfplus","openms"]
254
+ # Application installation directories. From environment variables
255
+ #
256
+ protk_roots = [["tpp","xinteract"],["omssa","omssacl"],["blast","blastdbcmd"],["pwiz","msconvert"],["msgfplus",""],["openms","ExecutePipeline"]]
252
257
 
253
- protk_roots.each do |r|
258
+ protk_roots.each do |r,binaryname|
254
259
  env_value = ENV["PROTK_#{r.upcase}_ROOT"]
255
260
  if ( env_value!=nil)
261
+ # "FROMPATH" means detect the root value
262
+ if env_value=="FROMPATH"
263
+ bin_path=Pathname.new(%x[which #{binaryname}].chomp)
264
+ if bin_path.exist?
265
+ env_value=bin_path.realpath.dirname.to_s
266
+ else
267
+ env_value=""
268
+ end
269
+ end
256
270
  p "Using #{r} root #{env_value}"
257
271
  @env["#{r}_root"]=env_value
258
272
  end
@@ -0,0 +1,29 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <?xml-stylesheet type="text/xsl" href="/home/iracooke/.protk/tools/tpp/schema/pepXML_std.xsl"?>
3
+ <msms_pipeline_analysis date="2013-01-07T18:29:15" xmlns="http://regis-web.systemsbiology.net/pepXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://regis-web.systemsbiology.net/pepXML /home/iracooke/.protk/tools/tpp/schema/pepXML_v117.xsd" summary_xml="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml.pep.xml">
4
+ <msms_run_summary base_name="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml" raw_data_type="raw" raw_data="(null)">
5
+ <sample_enzyme name="trypsin">
6
+ <specificity cut="KR" no_cut="P" sense="C"/>
7
+ </sample_enzyme>
8
+ <search_summary base_name="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml" search_engine="MASCOT" precursor_mass_type="monoisotopic" fragment_mass_type="monoisotopic" search_id="1">
9
+ <search_database local_path="/home/iracooke/.protk/Databases/sphuman/current.fasta" type="AA"/>
10
+ <enzymatic_search_constraint enzyme="trypsin" max_num_internal_cleavages="2" min_number_termini="2"/>
11
+ <aminoacid_modification aminoacid="C" massdiff="57.0215" mass="160.0306" variable="N"/>
12
+ <parameter name="" value=""/>
13
+ </search_summary>
14
+ <spectrum_query spectrum="" start_scan="" end_scan="" precursor_neutral_mass="" assumed_charge="" index="">
15
+ <search_result>
16
+ <search_hit hit_rank="" peptide="" peptide_prev_aa="" peptide_next_aa="" protein="" num_tot_proteins="" num_matched_ions="" tot_num_ions="" calc_neutral_pep_mass="" massdiff="" num_tol_term="" num_missed_cleavages="" is_rejected="">
17
+ <modification_info>
18
+ <mod_aminoacid_mass position="" mass=""/>
19
+ </modification_info>
20
+ <search_score name="ionscore" value=""/>
21
+ <search_score name="identityscore" value=""/>
22
+ <search_score name="star" value="0"/>
23
+ <search_score name="homologyscore" value=""/>
24
+ <search_score name="expect" value=""/>
25
+ </search_hit>
26
+ </search_result>
27
+ </spectrum_query>
28
+ </msms_run_summary>
29
+ </msms_pipeline_analysis>
@@ -0,0 +1,20 @@
1
+ #
2
+ # This is a predefined setup file for manage_db
3
+ #
4
+ # Swissprot_uniprot annotation database (full entries for each protein)
5
+ #
6
+ ---
7
+ :description: Swissprot Trembl annotation database (full entries for each protein)
8
+ :archive_old: false
9
+ :is_annotation_db: true
10
+ :decoy_prefix: decoy_
11
+ :include_filters: []
12
+
13
+ :format: dat
14
+ :id_regexes: []
15
+
16
+ :make_blast_index: false
17
+ :sources:
18
+ - - ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.dat.gz
19
+ - ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/reldate.txt
20
+ :decoys: false
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: protk
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.5
5
- prerelease:
4
+ version: 1.2.6.pre1
5
+ prerelease: 6
6
6
  platform: ruby
7
7
  authors:
8
8
  - Ira Cooke
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-06-06 00:00:00.000000000 Z
12
+ date: 2013-10-28 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ftools
@@ -256,6 +256,7 @@ files:
256
256
  - bin/libra.rb
257
257
  - bin/make_decoy.rb
258
258
  - bin/manage_db.rb
259
+ - bin/mascot2xml.rb
259
260
  - bin/mascot_search.rb
260
261
  - bin/mascot_to_pepxml.rb
261
262
  - bin/msgfplus_search.rb
@@ -283,10 +284,12 @@ files:
283
284
  - lib/protk/data/FeatureFinderCentroided.ini
284
285
  - lib/protk/data/FeatureFinderIsotopeWavelet.ini
285
286
  - lib/protk/data/galaxyenv.sh
287
+ - lib/protk/data/pepxml_mascot_template.xml
286
288
  - lib/protk/data/predefined_db.crap.yaml
287
289
  - lib/protk/data/predefined_db.sphuman.yaml
288
290
  - lib/protk/data/predefined_db.swissprot_annotation.yaml
289
291
  - lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml
292
+ - lib/protk/data/predefined_db.trembl_annotation.yaml
290
293
  - lib/protk/data/tandem_params.xml
291
294
  - lib/protk/data/taxonomy_template.xml
292
295
  - lib/protk/data/unimod.xml
@@ -295,6 +298,7 @@ files:
295
298
  - lib/protk/data/uniprot_input_accessions.loc
296
299
  - lib/protk/data/yum_packages.yaml
297
300
  - ext/protk/decoymaker/decoymaker.c
301
+ - ext/protk/simplealign/simplealign.c
298
302
  - ext/protk/decoymaker/extconf.rb
299
303
  - ext/protk/simplealign/extconf.rb
300
304
  homepage: http://rubygems.org/gems/protk
@@ -313,9 +317,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
313
317
  required_rubygems_version: !ruby/object:Gem::Requirement
314
318
  none: false
315
319
  requirements:
316
- - - ! '>='
320
+ - - ! '>'
317
321
  - !ruby/object:Gem::Version
318
- version: '0'
322
+ version: 1.3.1
319
323
  requirements: []
320
324
  rubyforge_project:
321
325
  rubygems_version: 1.8.24
@@ -323,3 +327,4 @@ signing_key:
323
327
  specification_version: 3
324
328
  summary: Proteomics Toolkit
325
329
  test_files: []
330
+ has_rdoc: