rbbt-sources 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,3 +13,15 @@ dec2007:
13
13
  - agilent_cgh_44b
14
14
  - illumina_humanwg_6_v2
15
15
  - illumina_humanwg_6_v3
16
+ aug2007:
17
+ - protein_id
18
+ - affy_hc_g110
19
+ - affy_hg_u133a_2
20
+ - affy_huex_1_0_st_v2
21
+ - affy_hugene_1_0_st_v1
22
+ - agilent_wholegenome
23
+ - agilent_cgh_44b
24
+ - illumina_humanwg_6_v2
25
+ - hgnc
26
+ - hgnc_id
27
+ - illumina_humanwg_6_v3
@@ -32,6 +32,9 @@ module BioMart
32
32
  EOT
33
33
 
34
34
  def self.set_archive(date)
35
+ if defined? Rbbt and Rbbt.etc.allowed_biomart_archives.exists?
36
+ raise "Biomart archive #{ date } is not allowed in this installation" unless Rbbt.etc.allowed_biomart_archives.read.split("\n").include? date
37
+ end
35
38
  @archive = date
36
39
  @archive_url = BIOMART_URL.sub(/http:\/\/biomart\./, 'http://' + date + '.archive.ensembl.')
37
40
  Log.debug "Using Archive URL #{ @archive_url }"
@@ -1,4 +1,7 @@
1
1
  require 'rbbt-util'
2
2
  module JoChem
3
- Rbbt.claim Rbbt.share.databases.JoChem, :rake, Rbbt.share.install.JoChem.Rakefile.find
3
+ extend Resource
4
+ self.subdir = "share/databases/JoChem"
5
+
6
+ JoChem.claim JoChem.root, :rake, Rbbt.share.install.JoChem.Rakefile.find
4
7
  end
@@ -25,6 +25,30 @@ $biomart_protein_identifiers = [
25
25
  [ 'UniProt/SwissProt Accession', "uniprot_swissprot_accession" ],
26
26
  ]
27
27
 
28
+ $biomart_probe_identifiers = [
29
+ [ 'AFFY HC G110', 'affy_hc_g110' ],
30
+ [ 'AFFY HG FOCUS', 'affy_hg_focus' ],
31
+ [ 'AFFY HG U133-PLUS-2', 'affy_hg_u133_plus_2' ],
32
+ [ 'AFFY HG U133A_2', 'affy_hg_u133a_2' ],
33
+ [ 'AFFY HG U133A', 'affy_hg_u133a' ],
34
+ [ 'AFFY HG U133B', 'affy_hg_u133b' ],
35
+ [ 'AFFY HG U95AV2', 'affy_hg_u95av2' ],
36
+ [ 'AFFY HG U95B', 'affy_hg_u95b' ],
37
+ [ 'AFFY HG U95C', 'affy_hg_u95c' ],
38
+ [ 'AFFY HG U95D', 'affy_hg_u95d' ],
39
+ [ 'AFFY HG U95E', 'affy_hg_u95e' ],
40
+ [ 'AFFY HG U95A', 'affy_hg_u95a' ],
41
+ [ 'AFFY HUGENEFL', 'affy_hugenefl' ],
42
+ [ 'AFFY HuEx', 'affy_huex_1_0_st_v2', "HuEx" ],
43
+ [ 'AFFY HuGene', 'affy_hugene_1_0_st_v1' ],
44
+ [ 'AFFY U133 X3P', 'affy_u133_x3p' ],
45
+ [ 'Agilent WholeGenome',"agilent_wholegenome" ],
46
+ [ 'Agilent CGH 44b', 'agilent_cgh_44b' ],
47
+ [ 'Codelink ID', 'codelink' ],
48
+ [ 'Illumina HumanWG 6 v2', 'illumina_humanwg_6_v2' ],
49
+ [ 'Illumina HumanWG 6 v3', 'illumina_humanwg_6_v3' ],
50
+ ]
51
+
28
52
  $biomart_identifiers = [
29
53
  [ 'Entrez Gene ID', "entrezgene"],
30
54
  [ 'Ensembl Protein ID', "ensembl_peptide_id" ],
@@ -38,7 +62,7 @@ $biomart_identifiers = [
38
62
  [ 'HGNC ID', "hgnc_id", 'HGNC'],
39
63
  [ 'EMBL (Genbank) ID' , "embl"] ,
40
64
 
41
- # Affymetrix
65
+ # Probes
42
66
  [ 'AFFY HC G110', 'affy_hc_g110' ],
43
67
  [ 'AFFY HG FOCUS', 'affy_hg_focus' ],
44
68
  [ 'AFFY HG U133-PLUS-2', 'affy_hg_u133_plus_2' ],
@@ -68,6 +68,7 @@ file 'identifiers' do |t|
68
68
  identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_identifiers, [], nil, :namespace => $namespace)
69
69
 
70
70
  $biomart_identifiers.each do |name, key, prefix|
71
+ next unless identifiers.all_fields.include? name
71
72
  if prefix
72
73
  identifiers.process name do |field, key, values| field.each{|v| v.replace "#{prefix}:#{v}"} end
73
74
  end
@@ -128,6 +129,16 @@ file 'protein_identifiers' do |t|
128
129
  File.open(t.name, 'w') do |f| f.puts identifiers end
129
130
  end
130
131
 
132
+ file 'probe_transcripts' do |t|
133
+ identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_probe_identifiers, [], nil, :namespace => $namespace)
134
+ $biomart_probe_identifiers.each do |name, key, prefix|
135
+ if prefix
136
+ identifiers.process name do |field, key, values| field.each{|v| v.replace "#{prefix}:#{v}"} end
137
+ end
138
+ end
139
+
140
+ File.open(t.name, 'w') do |f| f.puts identifiers end
141
+ end
131
142
 
132
143
  file 'gene_transcripts' do |t|
133
144
  transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_transcript, [], nil, :type => :flat, :namespace => $namespace)
@@ -343,6 +354,22 @@ file 'gene_go' do |t|
343
354
  File.open(t.name, 'w') do |f| f.puts goterms end
344
355
  end
345
356
 
357
+ file 'gene_go_bp' => 'gene_go' do |t|
358
+ gene_go = TSV.open(t.prerequisites.first)
359
+
360
+ gene_go.monitor = true
361
+ gene_go.process "GO ID" do |key, go_id, values|
362
+ clean = values.zip_fields.select do |id, type|
363
+ type == "biological_process"
364
+ end
365
+ clean.collect{|id, type| id}
366
+ end
367
+
368
+
369
+ File.open(t.name, 'w') do |f| f.puts gene_go.slice "GO ID" end
370
+ end
371
+
372
+
346
373
 
347
374
  file 'gene_pfam' do |t|
348
375
  goterms = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_pfam, [], nil, :type => :double, :namespace => $namespace)
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
5
- prerelease: false
4
+ hash: 21
5
+ prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 0
9
- - 0
10
- version: 1.0.0
9
+ - 1
10
+ version: 1.0.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Miguel Vazquez
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-09-07 00:00:00 +02:00
18
+ date: 2011-10-03 00:00:00 +02:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -160,7 +160,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
160
160
  requirements: []
161
161
 
162
162
  rubyforge_project:
163
- rubygems_version: 1.3.7
163
+ rubygems_version: 1.6.2
164
164
  signing_key:
165
165
  specification_version: 3
166
166
  summary: Data sources for the Ruby Bioinformatics Toolkit (rbbt)