protk 1.2.5 → 1.2.6.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,87 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Created by Ira Cooke 12/4/2010
5
+ #
6
+ # Convert mascot dat files to pepxml without using TPP Mascot2XML
7
+ #
8
+
9
+
10
+ require 'protk/constants'
11
+ require 'protk/search_tool'
12
+ require 'mascot/dat'
13
+ require 'libxml'
14
+
15
+ include LibXML
16
+
17
+
18
+
19
+
20
+ # Environment with global constants
21
+ #
22
+ genv=Constants.new
23
+
24
+ tool=SearchTool.new([:database,:explicit_output,:over_write])
25
+ tool.option_parser.banner = "Convert mascot dat files to pep.xml files.\n\nUsage: mascot2xml.rb [options] file1.dat file2.dat ... "
26
+ tool.option_parser.parse!
27
+
28
+ throw "Only one file at a time is allowed when using explicit output" if (tool.explicit_output && ARGV.length > 1)
29
+
30
+ def update_msms_pipeline_analysis(pepxml_doc,mascot_dat,outname)
31
+ pipeline_analysis_node=pepxml_doc.find("//#{$pepxml_ns_prefix}msms_pipeline_analysis", $pepxml_ns)[0]
32
+ pipeline_analysis_node.attributes['date']=Time.at(mascot_dat.header.date.to_i).to_s
33
+ pipeline_analysis_node.attributes['summary_xml']=outname
34
+ end
35
+
36
+ def update_enzyme(pepxml_doc,mascot_dat)
37
+ dat_enzyme=mascot_dat.enzyme
38
+ enzyme_node=pepxml_doc.find("//#{$pepxml_ns_prefix}sample_enzyme", $pepxml_ns)[0]
39
+ enzyme_node.attributes['name']=dat_enzyme.title
40
+ specificity=enzyme_node.find("./#{$pepxml_ns_prefix}specificity",$pepxml_ns)[0]
41
+ # p dat_enzyme.cleavages
42
+ # TODO: What does the spec say about multiple cut sites
43
+ # specificity.attributes['cut']=
44
+ end
45
+
46
+
47
+
48
+ ARGV.each do |file_name|
49
+ name=file_name.chomp
50
+
51
+ dat = Mascot::DAT.open(name)
52
+ pepxml_template_parser=XML::Parser.file("#{genv.data_lib_dir}/pepxml_mascot_template.xml")
53
+ pepxml_doc = pepxml_template_parser.parse
54
+
55
+ outname = nil
56
+ if ( tool.explicit_output !=nil)
57
+ outname = tool.explicit_output
58
+ else
59
+ basename = Pathname.new(name).basename
60
+ outname = "#{basename}.pepXML"
61
+ end
62
+
63
+ $pepxml_ns_prefix="xmlns:"
64
+ $pepxml_ns="xmlns:http://regis-web.systemsbiology.net/pepXML"
65
+ if not pepxml_doc.root.namespaces.default
66
+ $pepxml_ns_prefix=""
67
+ $pepxml_ns=nil
68
+ end
69
+
70
+ update_msms_pipeline_analysis(pepxml_doc,dat,outname)
71
+ update_enzyme(pepxml_doc,dat)
72
+
73
+ spectrum_queries={}
74
+
75
+ dat.peptides.each do |psm|
76
+ # psm.score
77
+ qnum= psm.query
78
+ if qnum
79
+ qid=qnum.to_s
80
+ spectrum_queries[qid]=[] if ( spectrum_queries[qid]==nil)
81
+ spectrum_queries[qid] << psm
82
+ end
83
+ end
84
+
85
+ spectrum_queries.each_pair { |name, val| p "#{name} #{val[0].pep}" }
86
+
87
+ end
@@ -0,0 +1,17 @@
1
+
2
+
3
+ static VALUE simplealign_align (VALUE self, VALUE pep_in, VALUE n_in, VALUE gene_in,VALUE m_in)
4
+ {
5
+ unsigned int n = NUM2INT(n_in);
6
+ unsigned int m = NUM2INT(m_in);
7
+
8
+ char *peptide = RSTRING_PTR(pep_in);
9
+ char *gene = RSTRING_PTR(gene_in);
10
+ }
11
+ void Init_simplealign(void)
12
+ {
13
+ VALUE klass = rb_define_class("SimpleAlign",rb_cObject);
14
+
15
+ rb_define_singleton_method(klass,
16
+ "align",simplealign_align , 9);
17
+ }
@@ -236,10 +236,13 @@ class Constants
236
236
  @protk_dir=ENV['PROTK_INSTALL_DIR']
237
237
  end
238
238
 
239
-
239
+ # Protk Defaults
240
+ #
240
241
  default_config_yml = YAML.load_file "#{File.dirname(__FILE__)}/data/default_config.yml"
241
242
  throw "Unable to read the config file at #{File.dirname(__FILE__)}/data/default_config.yml" unless default_config_yml!=nil
242
243
 
244
+ # User-defined defaults override protk defaults
245
+ #
243
246
  user_config_yml = nil
244
247
  user_config_yml = YAML.load_file "#{@protk_dir}/config.yml" if File.exist? "#{@protk_dir}/config.yml"
245
248
  if ( user_config_yml !=nil )
@@ -248,11 +251,22 @@ class Constants
248
251
  @env=default_config_yml
249
252
  end
250
253
 
251
- protk_roots = ["tpp","omssa","blast","pwiz","msgfplus","openms"]
254
+ # Application installation directories. From environment variables
255
+ #
256
+ protk_roots = [["tpp","xinteract"],["omssa","omssacl"],["blast","blastdbcmd"],["pwiz","msconvert"],["msgfplus",""],["openms","ExecutePipeline"]]
252
257
 
253
- protk_roots.each do |r|
258
+ protk_roots.each do |r,binaryname|
254
259
  env_value = ENV["PROTK_#{r.upcase}_ROOT"]
255
260
  if ( env_value!=nil)
261
+ # "FROMPATH" means detect the root value
262
+ if env_value=="FROMPATH"
263
+ bin_path=Pathname.new(%x[which #{binaryname}].chomp)
264
+ if bin_path.exist?
265
+ env_value=bin_path.realpath.dirname.to_s
266
+ else
267
+ env_value=""
268
+ end
269
+ end
256
270
  p "Using #{r} root #{env_value}"
257
271
  @env["#{r}_root"]=env_value
258
272
  end
@@ -0,0 +1,29 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <?xml-stylesheet type="text/xsl" href="/home/iracooke/.protk/tools/tpp/schema/pepXML_std.xsl"?>
3
+ <msms_pipeline_analysis date="2013-01-07T18:29:15" xmlns="http://regis-web.systemsbiology.net/pepXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://regis-web.systemsbiology.net/pepXML /home/iracooke/.protk/tools/tpp/schema/pepXML_v117.xsd" summary_xml="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml.pep.xml">
4
+ <msms_run_summary base_name="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml" raw_data_type="raw" raw_data="(null)">
5
+ <sample_enzyme name="trypsin">
6
+ <specificity cut="KR" no_cut="P" sense="C"/>
7
+ </sample_enzyme>
8
+ <search_summary base_name="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml" search_engine="MASCOT" precursor_mass_type="monoisotopic" fragment_mass_type="monoisotopic" search_id="1">
9
+ <search_database local_path="/home/iracooke/.protk/Databases/sphuman/current.fasta" type="AA"/>
10
+ <enzymatic_search_constraint enzyme="trypsin" max_num_internal_cleavages="2" min_number_termini="2"/>
11
+ <aminoacid_modification aminoacid="C" massdiff="57.0215" mass="160.0306" variable="N"/>
12
+ <parameter name="" value=""/>
13
+ </search_summary>
14
+ <spectrum_query spectrum="" start_scan="" end_scan="" precursor_neutral_mass="" assumed_charge="" index="">
15
+ <search_result>
16
+ <search_hit hit_rank="" peptide="" peptide_prev_aa="" peptide_next_aa="" protein="" num_tot_proteins="" num_matched_ions="" tot_num_ions="" calc_neutral_pep_mass="" massdiff="" num_tol_term="" num_missed_cleavages="" is_rejected="">
17
+ <modification_info>
18
+ <mod_aminoacid_mass position="" mass=""/>
19
+ </modification_info>
20
+ <search_score name="ionscore" value=""/>
21
+ <search_score name="identityscore" value=""/>
22
+ <search_score name="star" value="0"/>
23
+ <search_score name="homologyscore" value=""/>
24
+ <search_score name="expect" value=""/>
25
+ </search_hit>
26
+ </search_result>
27
+ </spectrum_query>
28
+ </msms_run_summary>
29
+ </msms_pipeline_analysis>
@@ -0,0 +1,20 @@
1
+ #
2
+ # This is a predefined setup file for manage_db
3
+ #
4
+ # Swissprot_uniprot annotation database (full entries for each protein)
5
+ #
6
+ ---
7
+ :description: Swissprot Trembl annotation database (full entries for each protein)
8
+ :archive_old: false
9
+ :is_annotation_db: true
10
+ :decoy_prefix: decoy_
11
+ :include_filters: []
12
+
13
+ :format: dat
14
+ :id_regexes: []
15
+
16
+ :make_blast_index: false
17
+ :sources:
18
+ - - ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.dat.gz
19
+ - ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/reldate.txt
20
+ :decoys: false
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: protk
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.5
5
- prerelease:
4
+ version: 1.2.6.pre1
5
+ prerelease: 6
6
6
  platform: ruby
7
7
  authors:
8
8
  - Ira Cooke
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-06-06 00:00:00.000000000 Z
12
+ date: 2013-10-28 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ftools
@@ -256,6 +256,7 @@ files:
256
256
  - bin/libra.rb
257
257
  - bin/make_decoy.rb
258
258
  - bin/manage_db.rb
259
+ - bin/mascot2xml.rb
259
260
  - bin/mascot_search.rb
260
261
  - bin/mascot_to_pepxml.rb
261
262
  - bin/msgfplus_search.rb
@@ -283,10 +284,12 @@ files:
283
284
  - lib/protk/data/FeatureFinderCentroided.ini
284
285
  - lib/protk/data/FeatureFinderIsotopeWavelet.ini
285
286
  - lib/protk/data/galaxyenv.sh
287
+ - lib/protk/data/pepxml_mascot_template.xml
286
288
  - lib/protk/data/predefined_db.crap.yaml
287
289
  - lib/protk/data/predefined_db.sphuman.yaml
288
290
  - lib/protk/data/predefined_db.swissprot_annotation.yaml
289
291
  - lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml
292
+ - lib/protk/data/predefined_db.trembl_annotation.yaml
290
293
  - lib/protk/data/tandem_params.xml
291
294
  - lib/protk/data/taxonomy_template.xml
292
295
  - lib/protk/data/unimod.xml
@@ -295,6 +298,7 @@ files:
295
298
  - lib/protk/data/uniprot_input_accessions.loc
296
299
  - lib/protk/data/yum_packages.yaml
297
300
  - ext/protk/decoymaker/decoymaker.c
301
+ - ext/protk/simplealign/simplealign.c
298
302
  - ext/protk/decoymaker/extconf.rb
299
303
  - ext/protk/simplealign/extconf.rb
300
304
  homepage: http://rubygems.org/gems/protk
@@ -313,9 +317,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
313
317
  required_rubygems_version: !ruby/object:Gem::Requirement
314
318
  none: false
315
319
  requirements:
316
- - - ! '>='
320
+ - - ! '>'
317
321
  - !ruby/object:Gem::Version
318
- version: '0'
322
+ version: 1.3.1
319
323
  requirements: []
320
324
  rubyforge_project:
321
325
  rubygems_version: 1.8.24
@@ -323,3 +327,4 @@ signing_key:
323
327
  specification_version: 3
324
328
  summary: Proteomics Toolkit
325
329
  test_files: []
330
+ has_rdoc: