rbbt-sources 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -13,3 +13,15 @@ dec2007:
13
13
  - agilent_cgh_44b
14
14
  - illumina_humanwg_6_v2
15
15
  - illumina_humanwg_6_v3
16
+ aug2007:
17
+ - protein_id
18
+ - affy_hc_g110
19
+ - affy_hg_u133a_2
20
+ - affy_huex_1_0_st_v2
21
+ - affy_hugene_1_0_st_v1
22
+ - agilent_wholegenome
23
+ - agilent_cgh_44b
24
+ - illumina_humanwg_6_v2
25
+ - hgnc
26
+ - hgnc_id
27
+ - illumina_humanwg_6_v3
@@ -32,6 +32,9 @@ module BioMart
32
32
  EOT
33
33
 
34
34
  def self.set_archive(date)
35
+ if defined? Rbbt and Rbbt.etc.allowed_biomart_archives.exists?
36
+ raise "Biomart archive #{ date } is not allowed in this installation" unless Rbbt.etc.allowed_biomart_archives.read.split("\n").include? date
37
+ end
35
38
  @archive = date
36
39
  @archive_url = BIOMART_URL.sub(/http:\/\/biomart\./, 'http://' + date + '.archive.ensembl.')
37
40
  Log.debug "Using Archive URL #{ @archive_url }"
@@ -1,4 +1,7 @@
1
1
  require 'rbbt-util'
2
2
  module JoChem
3
- Rbbt.claim Rbbt.share.databases.JoChem, :rake, Rbbt.share.install.JoChem.Rakefile.find
3
+ extend Resource
4
+ self.subdir = "share/databases/JoChem"
5
+
6
+ JoChem.claim JoChem.root, :rake, Rbbt.share.install.JoChem.Rakefile.find
4
7
  end
@@ -25,6 +25,30 @@ $biomart_protein_identifiers = [
25
25
  [ 'UniProt/SwissProt Accession', "uniprot_swissprot_accession" ],
26
26
  ]
27
27
 
28
+ $biomart_probe_identifiers = [
29
+ [ 'AFFY HC G110', 'affy_hc_g110' ],
30
+ [ 'AFFY HG FOCUS', 'affy_hg_focus' ],
31
+ [ 'AFFY HG U133-PLUS-2', 'affy_hg_u133_plus_2' ],
32
+ [ 'AFFY HG U133A_2', 'affy_hg_u133a_2' ],
33
+ [ 'AFFY HG U133A', 'affy_hg_u133a' ],
34
+ [ 'AFFY HG U133B', 'affy_hg_u133b' ],
35
+ [ 'AFFY HG U95AV2', 'affy_hg_u95av2' ],
36
+ [ 'AFFY HG U95B', 'affy_hg_u95b' ],
37
+ [ 'AFFY HG U95C', 'affy_hg_u95c' ],
38
+ [ 'AFFY HG U95D', 'affy_hg_u95d' ],
39
+ [ 'AFFY HG U95E', 'affy_hg_u95e' ],
40
+ [ 'AFFY HG U95A', 'affy_hg_u95a' ],
41
+ [ 'AFFY HUGENEFL', 'affy_hugenefl' ],
42
+ [ 'AFFY HuEx', 'affy_huex_1_0_st_v2', "HuEx" ],
43
+ [ 'AFFY HuGene', 'affy_hugene_1_0_st_v1' ],
44
+ [ 'AFFY U133 X3P', 'affy_u133_x3p' ],
45
+ [ 'Agilent WholeGenome',"agilent_wholegenome" ],
46
+ [ 'Agilent CGH 44b', 'agilent_cgh_44b' ],
47
+ [ 'Codelink ID', 'codelink' ],
48
+ [ 'Illumina HumanWG 6 v2', 'illumina_humanwg_6_v2' ],
49
+ [ 'Illumina HumanWG 6 v3', 'illumina_humanwg_6_v3' ],
50
+ ]
51
+
28
52
  $biomart_identifiers = [
29
53
  [ 'Entrez Gene ID', "entrezgene"],
30
54
  [ 'Ensembl Protein ID', "ensembl_peptide_id" ],
@@ -38,7 +62,7 @@ $biomart_identifiers = [
38
62
  [ 'HGNC ID', "hgnc_id", 'HGNC'],
39
63
  [ 'EMBL (Genbank) ID' , "embl"] ,
40
64
 
41
- # Affymetrix
65
+ # Probes
42
66
  [ 'AFFY HC G110', 'affy_hc_g110' ],
43
67
  [ 'AFFY HG FOCUS', 'affy_hg_focus' ],
44
68
  [ 'AFFY HG U133-PLUS-2', 'affy_hg_u133_plus_2' ],
@@ -68,6 +68,7 @@ file 'identifiers' do |t|
68
68
  identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_identifiers, [], nil, :namespace => $namespace)
69
69
 
70
70
  $biomart_identifiers.each do |name, key, prefix|
71
+ next unless identifiers.all_fields.include? name
71
72
  if prefix
72
73
  identifiers.process name do |field, key, values| field.each{|v| v.replace "#{prefix}:#{v}"} end
73
74
  end
@@ -128,6 +129,16 @@ file 'protein_identifiers' do |t|
128
129
  File.open(t.name, 'w') do |f| f.puts identifiers end
129
130
  end
130
131
 
132
+ file 'probe_transcripts' do |t|
133
+ identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_probe_identifiers, [], nil, :namespace => $namespace)
134
+ $biomart_probe_identifiers.each do |name, key, prefix|
135
+ if prefix
136
+ identifiers.process name do |field, key, values| field.each{|v| v.replace "#{prefix}:#{v}"} end
137
+ end
138
+ end
139
+
140
+ File.open(t.name, 'w') do |f| f.puts identifiers end
141
+ end
131
142
 
132
143
  file 'gene_transcripts' do |t|
133
144
  transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_transcript, [], nil, :type => :flat, :namespace => $namespace)
@@ -343,6 +354,22 @@ file 'gene_go' do |t|
343
354
  File.open(t.name, 'w') do |f| f.puts goterms end
344
355
  end
345
356
 
357
+ file 'gene_go_bp' => 'gene_go' do |t|
358
+ gene_go = TSV.open(t.prerequisites.first)
359
+
360
+ gene_go.monitor = true
361
+ gene_go.process "GO ID" do |key, go_id, values|
362
+ clean = values.zip_fields.select do |id, type|
363
+ type == "biological_process"
364
+ end
365
+ clean.collect{|id, type| id}
366
+ end
367
+
368
+
369
+ File.open(t.name, 'w') do |f| f.puts gene_go.slice "GO ID" end
370
+ end
371
+
372
+
346
373
 
347
374
  file 'gene_pfam' do |t|
348
375
  goterms = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_pfam, [], nil, :type => :double, :namespace => $namespace)
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
5
- prerelease: false
4
+ hash: 21
5
+ prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 0
9
- - 0
10
- version: 1.0.0
9
+ - 1
10
+ version: 1.0.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Miguel Vazquez
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-09-07 00:00:00 +02:00
18
+ date: 2011-10-03 00:00:00 +02:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -160,7 +160,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
160
160
  requirements: []
161
161
 
162
162
  rubyforge_project:
163
- rubygems_version: 1.3.7
163
+ rubygems_version: 1.6.2
164
164
  signing_key:
165
165
  specification_version: 3
166
166
  summary: Data sources for the Ruby Bioinformatics Toolkit (rbbt)