rbbt-phgx 2.1.2 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- ZDRkZjNlZjhkMzIzZGVmNmEyMDAyZmY4NzM5NzQ5YmUwYzU3MzQ4Yg==
5
- data.tar.gz: !binary |-
6
- YmM0ZWI0YTU0Y2Q5MmE0YmM4ZmUwN2JiODJjNzRiNDUyMGVhZWU1YQ==
2
+ SHA1:
3
+ metadata.gz: 36529e9255e02520dc576429de183175c52b2e30
4
+ data.tar.gz: 95a9925f8c9c79c2ea65e1effe28964d3ece20d8
7
5
  SHA512:
8
- metadata.gz: !binary |-
9
- NGQ1MGQ1MzU3OGViMzBhNGZjZDgxZTg2NDU0MTczN2I4MTk0NDVlNzRjZGUx
10
- OWU2MTdlMGJhNTFjNzlhOTYyNjA1ZDFkNjM4ZTczZTAyNmFkNzMxZWFjZjBk
11
- YmEwM2U1MmViNzU2OGE4ZGI3Njk3NTYyMmZjMzVjZDA5MjJkODk=
12
- data.tar.gz: !binary |-
13
- YWU2NGJiMmE4MmVkNjYxZmJkYmVjODc0OGJhZGYwNTliN2QwMmVhZDNlMDJh
14
- MjJkMmUxYmViNmY3MGNhYTVlMTZiNmUyMDQxYTRiOGUzYTVhNTI2MWIzMzk2
15
- NmE4OTg0ZDRjOGYxYjQ1YjE5NTI4Y2M1ZWI5NmVlNDczYjU2MzE=
6
+ metadata.gz: 860587fb353eb97f5bb7e6515e030e9590d258533c1e0698896f416b7bab70db5214581c7d1d2cb0d842d91b4e3100d4c47a0faa57c0d42356b39f386d5f1745
7
+ data.tar.gz: 5c1f9ca7ce18f225aa5a8c013067f3e665a26c97284922e7138b22b29ef7d913b92f1a871c9b38a7c73eef4bbf0a814260d60e8ebae63b3d9d984b016c731780
metadata CHANGED
@@ -1,30 +1,31 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-phgx
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.2
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-11-20 00:00:00.000000000 Z
11
+ date: 2014-02-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ! '>='
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: 4.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ! '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: 4.0.0
27
- description: Pharmaco-genomics related data sources
27
+ description: Pharmaco-genomics related data sources. Deprecated. Relevant files moved
28
+ to rbbt-sources for the time been
28
29
  email: miguel.vazquez@fdi.ucm.es
29
30
  executables: []
30
31
  extensions: []
@@ -43,24 +44,13 @@ files:
43
44
  - lib/rbbt/sources/biogrid.rb
44
45
  - lib/rbbt/sources/cancer.rb
45
46
  - lib/rbbt/sources/dbsnp.rb
46
- - lib/rbbt/sources/kegg.rb
47
- - lib/rbbt/sources/matador.rb
48
- - lib/rbbt/sources/pharmagkb.rb
49
- - lib/rbbt/sources/pina.rb
50
- - lib/rbbt/sources/stitch.rb
51
- - lib/rbbt/sources/string.rb
52
47
  - share/Cancer/anais_annotations
53
48
  - share/Cancer/anais_interactions
54
49
  - share/Cancer/cancer_genes.tsv
55
50
  - share/install/Biogrid/Rakefile
56
51
  - share/install/DBSNP/Rakefile
57
- - share/install/KEGG/Rakefile
58
52
  - share/install/Matador/Rakefile
59
53
  - share/install/NCI/Rakefile
60
- - share/install/PharmaGKB/Rakefile
61
- - share/install/Pina/Rakefile
62
- - share/install/STITCH/Rakefile
63
- - share/install/STRING/Rakefile
64
54
  - share/install/lib/rake_helper.rb
65
55
  - test/rbbt/mutation/test_chasm.rb
66
56
  - test/rbbt/mutation/test_fireDB.rb
@@ -86,32 +76,32 @@ require_paths:
86
76
  - lib
87
77
  required_ruby_version: !ruby/object:Gem::Requirement
88
78
  requirements:
89
- - - ! '>='
79
+ - - ">="
90
80
  - !ruby/object:Gem::Version
91
81
  version: '0'
92
82
  required_rubygems_version: !ruby/object:Gem::Requirement
93
83
  requirements:
94
- - - ! '>='
84
+ - - ">="
95
85
  - !ruby/object:Gem::Version
96
86
  version: '0'
97
87
  requirements: []
98
88
  rubyforge_project:
99
- rubygems_version: 2.2.0
89
+ rubygems_version: 2.2.1
100
90
  signing_key:
101
91
  specification_version: 4
102
- summary: Pharmaco-genomics for the Ruby Bioinformatics Toolkit (rbbt)
92
+ summary: DEPRECATED -- Pharmaco-genomics for the Ruby Bioinformatics Toolkit (rbbt)
103
93
  test_files:
104
- - test/rbbt/sources/test_matador.rb
105
- - test/rbbt/sources/test_cancer.rb
106
- - test/rbbt/sources/test_stitch.rb
107
- - test/rbbt/sources/test_pharmagkb.rb
108
- - test/rbbt/sources/test_kegg.rb
109
- - test/rbbt/mutation/test_oncodriveFM.rb
110
- - test/rbbt/mutation/test_polyphen.rb
111
94
  - test/rbbt/mutation/test_chasm.rb
112
- - test/rbbt/mutation/test_sift.rb
113
95
  - test/rbbt/mutation/test_transFIC.rb
114
96
  - test/rbbt/mutation/test_snps_and_go.rb
97
+ - test/rbbt/mutation/test_sift.rb
98
+ - test/rbbt/mutation/test_oncodriveFM.rb
99
+ - test/rbbt/mutation/test_polyphen.rb
115
100
  - test/rbbt/mutation/test_mutation_assessor.rb
116
101
  - test/rbbt/mutation/test_fireDB.rb
102
+ - test/rbbt/sources/test_pharmagkb.rb
103
+ - test/rbbt/sources/test_kegg.rb
104
+ - test/rbbt/sources/test_matador.rb
105
+ - test/rbbt/sources/test_cancer.rb
106
+ - test/rbbt/sources/test_stitch.rb
117
107
  - test/test_helper.rb
@@ -1,127 +0,0 @@
1
- require 'rbbt'
2
- require 'rbbt/resource'
3
-
4
- module KEGG
5
- extend Resource
6
- self.pkgdir = "phgx"
7
- self.subdir = "share/kegg"
8
-
9
-
10
- KEGG.claim KEGG.root, :rake, Rbbt.share.install.KEGG.Rakefile.find(:lib)
11
-
12
- def self.names
13
- @@names ||= KEGG.pathways.tsv :fields => ["Pathway Name"], :persist => true, :type => :single, :unnamed => true
14
- end
15
-
16
- def self.descriptions
17
- @@descriptions ||= KEGG.pathways.tsv(:fields => ["Pathway Description"], :persist => true, :type => :single, :unnamed => true)
18
- end
19
-
20
-
21
- def self.index2genes
22
- @@index2genes ||= KEGG.gene_pathway.tsv(:key_field => "KEGG Pathway ID", :fields => ["KEGG Gene ID"], :persist => true, :type => :flat, :merge => true)
23
- end
24
-
25
- def self.index2ens
26
- @@index2ens ||= KEGG.identifiers.index(:persist => true)
27
- end
28
-
29
- def self.index2kegg
30
- @@index2kegg ||= KEGG.identifiers.index(:target => "KEGG Gene ID", :persist => true)
31
- end
32
-
33
- def self.id2name(id)
34
- names[id]
35
- end
36
-
37
- def self.name2id(name)
38
- names.select{|id,n| n.downcase.index(name.downcase) == 0}.collect{|id,n| id} rescue []
39
- end
40
-
41
-
42
- def self.description(id)
43
- descriptions[id]
44
- end
45
- end
46
-
47
- if defined? Entity
48
-
49
- module KeggPathway
50
- extend Entity
51
- self.format = "KEGG Pathway ID"
52
-
53
- self.annotation :organism
54
-
55
- def self.filter(query, field = nil, options = nil, entity = nil)
56
- return true if query == entity
57
-
58
- return true if KeggPathway.setup(entity.dup, options.merge(:format => field)).name.index query
59
-
60
- false
61
- end
62
-
63
- property :name => :single2array do
64
- return nil if self.nil?
65
- name = KEGG.id2name(self)
66
- name.sub(/ - Homo.*/,'') unless name.nil?
67
- end
68
-
69
- property :description => :single2array do
70
- KEGG.description(self)
71
- end
72
-
73
- property :genes => :array2single do |*args|
74
- organism = args.first || self.organism
75
- KEGG.index2genes.values_at(*self).
76
- each{|gene| gene.organism = organism if gene.respond_to? :organism }
77
- end
78
- end
79
-
80
- if defined? Gene and Entity === Gene
81
- module Gene
82
- self.format = "KEGG Gene ID"
83
-
84
- def to_kegg
85
- return self if format == "KEGG Gene ID"
86
- if Array === self
87
- Gene.setup(KEGG.index2kegg.values_at(*to("Ensembl Gene ID")), "KEGG Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
88
- else
89
- Gene.setup(KEGG.index2kegg[to("Ensembl Gene ID")], "KEGG Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
90
- end
91
- end
92
-
93
- def from_kegg
94
- return self unless format == "KEGG Gene ID"
95
- if Array === self
96
- Gene.setup(KEGG.index2ens.values_at(*self), "Ensembl Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
97
- else
98
- Gene.setup(KEGG.index2ens[self], "Ensembl Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
99
- end
100
- end
101
-
102
- def self.gene_kegg_pathway_index
103
- @@gene_kegg_pathway_index ||=
104
- KEGG.gene_pathway.tsv(:persist => true, :key_field => "KEGG Gene ID", :fields => ["KEGG Pathway ID"], :type => :flat, :merge => true)
105
- end
106
-
107
- property :to => :array2single do |new_format|
108
- case
109
- when format == new_format
110
- self
111
- when format == "KEGG Gene ID"
112
- ensembl = from_kegg.clean_annotations
113
- Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => ensembl, :format => new_format).exec.chunked_values_at(ensembl), new_format, organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
114
- when new_format == "KEGG Gene ID"
115
- to_kegg
116
- else
117
- Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.chunked_values_at(self), new_format, organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
118
- end
119
- end
120
-
121
- property :kegg_pathways => :array2single do
122
- @kegg_pathways ||= Gene.gene_kegg_pathway_index.values_at(*self.to_kegg).
123
- each{|pth| pth.organism = organism if pth.respond_to? :organism }.tap{|o| KeggPathway.setup(o, organism)}
124
- end
125
- end
126
- end
127
- end
@@ -1,9 +0,0 @@
1
- require 'phgx'
2
-
3
- module Matador
4
- extend Resource
5
- self.pkgdir = "phgx"
6
- self.subdir = "share/matador"
7
-
8
- Matador.claim Matador.root, :rake, Rbbt.share.install.Matador.Rakefile.find(:lib)
9
- end
@@ -1,9 +0,0 @@
1
- require 'phgx'
2
-
3
- module PharmaGKB
4
- extend Resource
5
- self.pkgdir = "phgx"
6
- self.subdir = "share/pharmagkb"
7
-
8
- PharmaGKB.claim PharmaGKB.root, :rake, Rbbt.share.install.PharmaGKB.Rakefile.find(:lib)
9
- end
@@ -1,35 +0,0 @@
1
- require 'phgx'
2
-
3
- module Pina
4
- extend Resource
5
- self.pkgdir = "phgx"
6
- self.subdir = "share/pina"
7
-
8
- Pina.claim Pina.root, :rake, Rbbt.share.install.Pina.Rakefile.find(:lib)
9
- end
10
-
11
- if defined? Entity and defined? Gene and Entity === Gene
12
- require 'rbbt/entity/gene'
13
- require 'rbbt/entity/interactor'
14
- require 'rbbt/sources/PSI_MI'
15
-
16
- module Gene
17
- property :pina_interactors => :array2single do
18
- ens2uniprot = Organism.identifiers(organism).tsv :key_field => "Ensembl Gene ID", :fields => ["UniProt/SwissProt Accession"], :type => :flat, :persist => true, :unnamed => true
19
- pina = Pina.protein_protein.tsv(:persist => true, :fields => ["Interactor UniProt/SwissProt Accession", "Method", "PMID"], :type => :double, :merge => true, :unnamed => true)
20
-
21
- int = self.ensembl.collect do |ens|
22
- uniprot = ens2uniprot[ens]
23
- list = pina.values_at(*uniprot).compact.collect do |v|
24
- Misc.zip_fields(v).collect do |o, method, articles|
25
- Interactor.setup(o, PSI_MITerm.setup(method.split(";;")), PMID.setup(articles.split(";;")))
26
- end
27
- end.flatten.uniq
28
- Gene.setup(list, "UniProt/SwissProt Accession", organism).extend(AnnotatedArray)
29
- end
30
-
31
- Gene.setup(int, "UniProt/SwissProt Accession", organism).extend(AnnotatedArray)
32
- end
33
- end
34
- end
35
-
@@ -1,9 +0,0 @@
1
- require 'phgx'
2
-
3
- module STITCH
4
- extend Resource
5
- self.pkgdir = "phgx"
6
- self.subdir = "share/stitch"
7
-
8
- STITCH.claim STITCH.root, :rake, Rbbt.share.install.STITCH.Rakefile.find(:lib)
9
- end
@@ -1,27 +0,0 @@
1
- require 'phgx'
2
-
3
- module STRING
4
- extend Resource
5
- self.pkgdir = "phgx"
6
- self.subdir = "share/string"
7
-
8
- STRING.claim STRING.root, :rake, Rbbt.share.install.STRING.Rakefile.find(:lib)
9
- end
10
-
11
- if defined? Entity and defined? Gene and Entity === Gene
12
- module Gene
13
- property :string_interactors => :array2single do |*args|
14
- threshold = args.first || 800
15
- string = STRING.protein_protein.tsv(:unnamed => true, :persist => true, :type => :double)
16
- all = self.ensembl.collect do |gene|
17
- interactors = gene.proteins.collect{|protein| Misc.zip_fields((string[protein] || [[],[]])).select{|i, score| score.to_i > threshold}.collect{|ints,s| ints}}.compact.flatten.uniq
18
- Protein.setup(interactors, "Ensembl Protein ID", organism).transcript.gene.compact.uniq
19
- end
20
-
21
- all.compact.first.annotate all if Annotated === all.compact.first
22
-
23
- all
24
- end
25
- end
26
- end
27
-
@@ -1,114 +0,0 @@
1
- require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
-
3
- define_source_tasks "h.sapiens" => "ftp://ftp.genome.jp/pub/kegg/genes/organisms/hsa/H.sapiens.ent",
4
- "hsa_gene_map.tab" => "ftp://ftp.genome.jp/pub/kegg/pathway/organisms/hsa/hsa_gene_map.tab",
5
- "drugs" => "ftp://ftp.genome.jp/pub/kegg/medicus/drug/drug",
6
- "pathways" => "ftp://ftp.genome.jp/pub/kegg/pathway/pathway"
7
-
8
-
9
- file :identifiers => 'source/h.sapiens' do |t|
10
- pairs = {}
11
- entry = nil
12
- Open.read(t.prerequisites.first).each do |line|
13
- if line =~ /^ENTRY\s+(\d+)/
14
- entry = $1
15
- next
16
- end
17
-
18
- if line =~ /Ensembl: (ENSG\d+)/
19
- pairs[entry] = $1
20
- end
21
- end
22
-
23
- Open.write(t.name, ['#Ensembl Gene ID','KEGG Gene ID'] * "\t" + "\n" + pairs.collect{|entry, ens| [ens, "hsa:" + entry] * "\t"} * "\n")
24
- end
25
-
26
- file :gene_drug => 'source/drugs' do |t|
27
- pairs = {}
28
- drug = nil
29
- Open.read(t.prerequisites.first).
30
- scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|TARGET/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
31
- if line =~ /^ENTRY\s+(\w+)/
32
- drug = $1
33
- next
34
- end
35
-
36
- if line =~ /TARGET.*?\[HSA:(.*?)\]/
37
- genes = $1.split(/\s/)
38
- genes.each do |gene|
39
- pairs[gene] ||= []
40
- pairs[gene] << drug
41
- end
42
- end
43
- end
44
-
45
- Open.write(t.name, ['#KEGG Gene ID', 'KEGG Drug ID'] * "\t" + "\n" + pairs.collect{|gene, drugs| ["hsa:" + gene, drugs * "|" ] * "\t"} * "\n")
46
- end
47
-
48
- file :drugs => 'source/drugs' do |t|
49
- info = {}
50
- drug = nil
51
- Open.read(t.prerequisites.first).
52
- scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|NAME|DBLINKS/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
53
- if line =~ /^ENTRY\s+(\w+)/
54
- drug = $1
55
- next
56
- end
57
-
58
- if line =~ /^NAME(.*)/
59
- names = $1.split(/;/)
60
- names.each do |name|
61
- info[drug] ||= [[],[]]
62
- info[drug][0] << name.chomp.strip
63
- end
64
- end
65
-
66
- if line =~ /^DBLINKS(.*)/
67
- $1.match(/PubChem: (\d*)/)
68
- pubchem = $1
69
- next unless pubchem
70
- info[drug] ||= [[],[]]
71
- info[drug][1] << pubchem.chomp.strip
72
- end
73
- end
74
-
75
- Open.write(t.name, ['#KEGG Drug ID', 'KEGG Drug Name', 'PubChem Drug ID'] * "\t" + "\n" + info.collect{|drug, info| [drug, info.collect{|v| v * "|"} ].flatten * "\t"} * "\n")
76
- end
77
-
78
- file :pathways => 'source/pathways' do |t|
79
- descs = {}
80
- names = {}
81
- klass = {}
82
- pathway = nil
83
- Open.read(t.prerequisites.first).split(/\n/).each do |line|
84
- if line =~ /ENTRY\s+(\w+)/
85
- pathway = $1.strip
86
- end
87
-
88
- if line =~ /NAME (.*)/
89
- names[pathway] = $1.strip
90
- end
91
-
92
- if line =~ /DESCRIPTION (.*)/
93
- descs[pathway] = $1.strip
94
- end
95
-
96
- if line =~ /CLASS (.*)/
97
- klass[pathway] = $1.strip
98
- end
99
- end
100
-
101
- Open.write(t.name, "#: :type=:list\n" + ['#KEGG Pathway ID', 'Pathway Name', 'Pathway Description', 'Pathway Class'] * "\t" + "\n" + names.keys.collect{|pathway| [pathway, names[pathway], descs[pathway], klass[pathway]] * "\t"} * "\n")
102
- end
103
-
104
- process_tsv :gene_pathway, 'hsa_gene_map.tab',
105
- :sep2 => ' ' do
106
- headers ['KEGG Gene ID', 'KEGG Pathway ID']
107
- data do |gene, pathway|
108
- "hsa:#{ gene }\t#{pathway.flatten.collect{|name| "hsa" + name} * "|"}"
109
- end
110
- end
111
-
112
- add_to_defaults [:pathways, :drugs, :gene_drug, :genes]
113
-
114
-
@@ -1,211 +0,0 @@
1
- require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
-
3
- define_source_tasks "genes" => "http://www.pharmgkb.org/commonFileDownload.action?filename=genes.zip",
4
- "drugs" => "http://www.pharmgkb.org/commonFileDownload.action?filename=drugs.zip",
5
- "diseases" => "http://www.pharmgkb.org/commonFileDownload.action?filename=diseases.zip",
6
- "relationships" => "http://www.pharmgkb.org/commonFileDownload.action?filename=relationships.zip",
7
- "variants" => "http://www.pharmgkb.org/commonFileDownload.action?filename=variantAnnotations.zip",
8
- "pathways" => "http://www.pharmgkb.org/commonFileDownload.action?filename=pathways-tsv.zip"
9
-
10
-
11
- process_tsv :diseases, 'diseases',
12
- :header_hash => "",
13
- :fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
14
- headers ['PhGKB Disease ID']
15
- end
16
-
17
- process_tsv :identifiers, 'genes',
18
- :header_hash => "",
19
- :fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
20
- headers ['PhGKB Gene ID', 'Entrez Gene ID', 'Ensembl Gene Id', 'UniProt/SwissProt Accession', 'Long Name', 'Associated Gene Name']
21
- end
22
-
23
- process_tsv :drugs, 'drugs',
24
- :header_hash => "",
25
- :fields => ['Name', 'DrugBank Id', 'SMILES', "MeSH IDs"],
26
- :fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
27
- headers ['PhGKB Drug ID', 'Drug Name', 'DrugBank Id', 'SMILES', "MeSH ID"]
28
- end
29
-
30
- process_tsv :relationships, 'relationships',
31
- :header_hash => "",
32
- :merge => true,
33
- :fix => proc{|l|
34
- l.gsub!(/Gene:|Drug:|Disease:/,'')
35
- parts = l.split("\t")
36
- rels = parts.pop
37
- parts = [parts.values_at(0, 2) * ":"]
38
- pmids = []
39
- pathways = []
40
- rsids = []
41
- rels.split(',').each do |r|
42
- case
43
- when r =~ /PMID:(.*)/
44
- pmids << $1
45
- when r =~ /Pathway:(.*)/
46
- pathways << $1
47
- when r =~ /RSID:(.*)/
48
- rsids << $1
49
- end
50
- end
51
-
52
- parts << pmids * "|"
53
- parts << pathways * "|"
54
- parts << rsids * "|"
55
-
56
- parts * "\t"
57
- },
58
- :keep_empty => true do
59
-
60
- headers ['PhGKB Relationship', "PMID", "PhGKB Pathway ID", "Variant ID"]
61
- end
62
-
63
-
64
- process_tsv :gene_drug, 'relationships',
65
- :select => proc{|l| l =~ /^Gene:/ && l =~ /Drug:/},
66
- :header_hash => "",
67
- :merge => true,
68
- :fix => proc{|l|
69
- l.gsub!(/Gene:|Drug:|Disease:/,'')
70
- parts = l.split("\t")
71
- rels = parts.pop
72
- parts = parts.values_at 0, 2
73
-
74
- parts * "\t"
75
- },
76
- :keep_empty => true do
77
-
78
- headers ['PhGKB Gene ID', 'PhGKB Drug ID']
79
- end
80
-
81
- process_tsv :gene_disease, 'relationships',
82
- :select => proc{|l| l =~ /^Gene:/ && l =~ /Disease:/},
83
- :key_field => 1,
84
- :fields => 3,
85
- :merge => true,
86
- :header_hash => "",
87
- :fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
88
- :keep_empty => true do
89
-
90
- headers ['PhGKB Gene ID', 'PhGKB Disease ID']
91
- end
92
-
93
- process_tsv :variants, 'variants',
94
- :key_field => 1,
95
- :fields => [3,7,8,9,10,4,6,5],
96
- :header_hash => "",
97
- :merge => true,
98
- :fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
99
- :keep_empty => true do
100
-
101
- headers ['Variant ID', 'Associated Gene Name', 'Drug', 'Drug_Class', 'Disease', 'Curation', 'Feature', 'Annotation', 'Evidence']
102
- end
103
-
104
- file :pathways => 'source/pathways' do |t|
105
- File.open(t.name, 'w') do |f|
106
- f.puts "#" + ['PhGKB Pathway ID','Pathway Name','Pathway Annotation Source'] * "\t"
107
- Open.read(t.prerequisites.first).split(/\n/).each do |line|
108
- case
109
- when line =~ /(PA\d+): (.*) - \((.*)\)/
110
- f.puts [$1,$2,$3] * "\t"
111
- when line =~ /(PA\d+): (.*)/
112
- f.puts [$1,$2,""] * "\t"
113
- end
114
- end
115
- end
116
- end
117
-
118
- file :gene_pathway => 'source/pathways' do |t|
119
- pathways = {}
120
- last_pathway = nil
121
-
122
- Open.read(t.prerequisites.first).split(/\n/).each do |line|
123
- if line =~ /(P.*):(.*)/
124
- last_pathway = $1
125
- pathways[last_pathway] = {:name => $2}
126
- else
127
- type, code, name = line.split(/\t/)
128
- next unless type =='Gene'
129
- pathways[last_pathway][:genes] ||= []
130
- pathways[last_pathway][:genes] << name
131
- end
132
- end
133
- end
134
-
135
- file :gene_pathway => 'source/pathways' do |t|
136
- pathways = {}
137
- last_pathway = nil
138
-
139
- Open.read(t.prerequisites.first).split(/\n/).each do |line|
140
- if line =~ /(P.*):(.*)/
141
- last_pathway = $1
142
- pathways[last_pathway] = {:name => $2}
143
- else
144
- type, code, name = line.split(/\t/)
145
- next unless type =='Gene'
146
- pathways[last_pathway][:genes] ||= []
147
- pathways[last_pathway][:genes] << name
148
- end
149
- end
150
-
151
- File.open(t.name, 'w') do |f|
152
- f.puts "#" + ['PhGKB Pathway ID', 'Pathway Name', 'Associated Gene Name'] * "\t"
153
- pathways.each do |pathway, info|
154
- next if info[:genes].nil?
155
- f.puts "#{ pathway }\t#{info[:name]}\t#{info[:genes] * "|"}"
156
- end
157
- end
158
- end
159
-
160
- file :pathway_drugs => 'source/pathways' do |t|
161
- pathways = {}
162
- last_pathway = nil
163
-
164
- Open.read(t.prerequisites.first).split(/\n/).each do |line|
165
- if line =~ /(P.*):(.*)/
166
- last_pathway = $1
167
- pathways[last_pathway] = {:name => $2}
168
- else
169
- type, code, name = line.split(/\t/)
170
- next unless type =='Drug'
171
- pathways[last_pathway][:drugs] ||= []
172
- pathways[last_pathway][:drugs] << code
173
- end
174
- end
175
-
176
- File.open(t.name, 'w') do |f|
177
- f.puts "#" + ["PhGKB Pathway ID", "PhGKB Drug ID"]* "\t"
178
- pathways.each do |pathway, info|
179
- next if info[:drugs].nil?
180
- f.puts "#{ pathway }\t#{info[:drugs] * "|"}"
181
- end
182
- end
183
- end
184
-
185
-
186
- file :disease_pathway => 'source/pathways' do |t|
187
- pathways = {}
188
- last_pathway = nil
189
-
190
- Open.read(t.prerequisites.first).split(/\n/).each do |line|
191
- if line =~ /(P.*):(.*)/
192
- last_pathway = $1
193
- pathways[last_pathway] = {:name => $2}
194
- else
195
- type, code, name = line.split(/\t/)
196
- next unless type =='Disease'
197
- pathways[last_pathway][:diseases] ||= []
198
- pathways[last_pathway][:diseases] << name
199
- end
200
- end
201
-
202
- File.open(t.name, 'w') do |f|
203
- f.puts "#" + %w(ID Name Diseases) * "\t"
204
- pathways.each do |pathway, info|
205
- next if info[:diseases].nil?
206
- f.puts "#{ pathway }\t#{info[:name]}\t#{info[:diseases] * "|"}"
207
- end
208
- end
209
- end
210
-
211
- add_to_defaults [:gene_pathway, :drug_pathway, :disease_pathway]
@@ -1,16 +0,0 @@
1
- require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
-
3
- define_source_tasks "Homo sapiens-20110628.txt" => "http://cbg.garvan.unsw.edu.au/pina/download/Homo%20sapiens-20110628.txt"
4
-
5
- process_tsv :protein_protein, 'Homo sapiens-20110628.txt',
6
- :key => 0,
7
- :fix => lambda{|l| l.gsub("uniprotkb:", '').gsub("(gene name)",'').gsub("pubmed:",'').gsub("|", ';;').gsub(/\([^)]+\)/,'')},
8
- :fields => [1,6,8],
9
- :header_hash => "#",
10
- :merge => true,
11
- :keep_empty => true do
12
-
13
- headers ['UniProt/SwissProt Accession', 'Interactor UniProt/SwissProt Accession', 'Method', 'PMID']
14
- end
15
-
16
-
@@ -1,30 +0,0 @@
1
- require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
-
3
- define_source_tasks "protein_chemicals" => "http://stitch.embl.de:8080/download/protein_chemical.links.v2.0.tsv.gz",
4
- "chemicals" => "http://stitch.embl.de:8080/download/chemical.aliases.v2.0.tsv.gz"
5
-
6
- process_tsv :protein_chemical, 'protein_chemicals',
7
- :key => 1,
8
- :grep => "9606\.",
9
- :fix => lambda{|l| l.sub(/9606\./,'')},
10
- :keep_empty => true do
11
-
12
- headers ['Ensembl Protein ID', 'STITCH Chemical ID', 'Score']
13
- end
14
-
15
- $grep_re = []
16
- process_tsv :chemicals, 'chemicals',
17
- :grep => $grep_re,
18
- :key => 0 do
19
-
20
- Rake::Task['protein_chemical'].invoke
21
-
22
- Log.debug "Getting chemicals"
23
- chemicals = TSV.open('protein_chemical', :key_field => 1, :fields => []).keys
24
- Log.debug "Getting chemicals [done]"
25
-
26
- $grep_re.replace chemicals
27
-
28
- headers ['STITCH Chemical ID', 'Name', 'Source']
29
- end
30
-
@@ -1,8 +0,0 @@
1
- require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
-
3
- define_source_tasks "protein_protein" => "http://string-db.org/newstring_download/protein.links.v9.05.txt.gz"
4
-
5
- process_tsv :protein_protein, 'protein_protein', :grep => '9606\.ENSP', :fix => lambda{|l| l.gsub(/9606\./,'')}, :merge => true, :sep => "\s" do
6
- headers ['Ensembl Protein ID', 'Interactor Ensembl Protein ID', 'Score']
7
- end
8
-