rbbt-sources 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -13,3 +13,15 @@ dec2007:
|
|
|
13
13
|
- agilent_cgh_44b
|
|
14
14
|
- illumina_humanwg_6_v2
|
|
15
15
|
- illumina_humanwg_6_v3
|
|
16
|
+
aug2007:
|
|
17
|
+
- protein_id
|
|
18
|
+
- affy_hc_g110
|
|
19
|
+
- affy_hg_u133a_2
|
|
20
|
+
- affy_huex_1_0_st_v2
|
|
21
|
+
- affy_hugene_1_0_st_v1
|
|
22
|
+
- agilent_wholegenome
|
|
23
|
+
- agilent_cgh_44b
|
|
24
|
+
- illumina_humanwg_6_v2
|
|
25
|
+
- hgnc
|
|
26
|
+
- hgnc_id
|
|
27
|
+
- illumina_humanwg_6_v3
|
data/lib/rbbt/sources/biomart.rb
CHANGED
|
@@ -32,6 +32,9 @@ module BioMart
|
|
|
32
32
|
EOT
|
|
33
33
|
|
|
34
34
|
def self.set_archive(date)
|
|
35
|
+
if defined? Rbbt and Rbbt.etc.allowed_biomart_archives.exists?
|
|
36
|
+
raise "Biomart archive #{ date } is not allowed in this installation" unless Rbbt.etc.allowed_biomart_archives.read.split("\n").include? date
|
|
37
|
+
end
|
|
35
38
|
@archive = date
|
|
36
39
|
@archive_url = BIOMART_URL.sub(/http:\/\/biomart\./, 'http://' + date + '.archive.ensembl.')
|
|
37
40
|
Log.debug "Using Archive URL #{ @archive_url }"
|
data/lib/rbbt/sources/jochem.rb
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
require 'rbbt-util'
|
|
2
2
|
module JoChem
|
|
3
|
-
|
|
3
|
+
extend Resource
|
|
4
|
+
self.subdir = "share/databases/JoChem"
|
|
5
|
+
|
|
6
|
+
JoChem.claim JoChem.root, :rake, Rbbt.share.install.JoChem.Rakefile.find
|
|
4
7
|
end
|
|
@@ -25,6 +25,30 @@ $biomart_protein_identifiers = [
|
|
|
25
25
|
[ 'UniProt/SwissProt Accession', "uniprot_swissprot_accession" ],
|
|
26
26
|
]
|
|
27
27
|
|
|
28
|
+
$biomart_probe_identifiers = [
|
|
29
|
+
[ 'AFFY HC G110', 'affy_hc_g110' ],
|
|
30
|
+
[ 'AFFY HG FOCUS', 'affy_hg_focus' ],
|
|
31
|
+
[ 'AFFY HG U133-PLUS-2', 'affy_hg_u133_plus_2' ],
|
|
32
|
+
[ 'AFFY HG U133A_2', 'affy_hg_u133a_2' ],
|
|
33
|
+
[ 'AFFY HG U133A', 'affy_hg_u133a' ],
|
|
34
|
+
[ 'AFFY HG U133B', 'affy_hg_u133b' ],
|
|
35
|
+
[ 'AFFY HG U95AV2', 'affy_hg_u95av2' ],
|
|
36
|
+
[ 'AFFY HG U95B', 'affy_hg_u95b' ],
|
|
37
|
+
[ 'AFFY HG U95C', 'affy_hg_u95c' ],
|
|
38
|
+
[ 'AFFY HG U95D', 'affy_hg_u95d' ],
|
|
39
|
+
[ 'AFFY HG U95E', 'affy_hg_u95e' ],
|
|
40
|
+
[ 'AFFY HG U95A', 'affy_hg_u95a' ],
|
|
41
|
+
[ 'AFFY HUGENEFL', 'affy_hugenefl' ],
|
|
42
|
+
[ 'AFFY HuEx', 'affy_huex_1_0_st_v2', "HuEx" ],
|
|
43
|
+
[ 'AFFY HuGene', 'affy_hugene_1_0_st_v1' ],
|
|
44
|
+
[ 'AFFY U133 X3P', 'affy_u133_x3p' ],
|
|
45
|
+
[ 'Agilent WholeGenome',"agilent_wholegenome" ],
|
|
46
|
+
[ 'Agilent CGH 44b', 'agilent_cgh_44b' ],
|
|
47
|
+
[ 'Codelink ID', 'codelink' ],
|
|
48
|
+
[ 'Illumina HumanWG 6 v2', 'illumina_humanwg_6_v2' ],
|
|
49
|
+
[ 'Illumina HumanWG 6 v3', 'illumina_humanwg_6_v3' ],
|
|
50
|
+
]
|
|
51
|
+
|
|
28
52
|
$biomart_identifiers = [
|
|
29
53
|
[ 'Entrez Gene ID', "entrezgene"],
|
|
30
54
|
[ 'Ensembl Protein ID', "ensembl_peptide_id" ],
|
|
@@ -38,7 +62,7 @@ $biomart_identifiers = [
|
|
|
38
62
|
[ 'HGNC ID', "hgnc_id", 'HGNC'],
|
|
39
63
|
[ 'EMBL (Genbank) ID' , "embl"] ,
|
|
40
64
|
|
|
41
|
-
#
|
|
65
|
+
# Probes
|
|
42
66
|
[ 'AFFY HC G110', 'affy_hc_g110' ],
|
|
43
67
|
[ 'AFFY HG FOCUS', 'affy_hg_focus' ],
|
|
44
68
|
[ 'AFFY HG U133-PLUS-2', 'affy_hg_u133_plus_2' ],
|
|
@@ -68,6 +68,7 @@ file 'identifiers' do |t|
|
|
|
68
68
|
identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_identifiers, [], nil, :namespace => $namespace)
|
|
69
69
|
|
|
70
70
|
$biomart_identifiers.each do |name, key, prefix|
|
|
71
|
+
next unless identifiers.all_fields.include? name
|
|
71
72
|
if prefix
|
|
72
73
|
identifiers.process name do |field, key, values| field.each{|v| v.replace "#{prefix}:#{v}"} end
|
|
73
74
|
end
|
|
@@ -128,6 +129,16 @@ file 'protein_identifiers' do |t|
|
|
|
128
129
|
File.open(t.name, 'w') do |f| f.puts identifiers end
|
|
129
130
|
end
|
|
130
131
|
|
|
132
|
+
file 'probe_transcripts' do |t|
|
|
133
|
+
identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_probe_identifiers, [], nil, :namespace => $namespace)
|
|
134
|
+
$biomart_probe_identifiers.each do |name, key, prefix|
|
|
135
|
+
if prefix
|
|
136
|
+
identifiers.process name do |field, key, values| field.each{|v| v.replace "#{prefix}:#{v}"} end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
File.open(t.name, 'w') do |f| f.puts identifiers end
|
|
141
|
+
end
|
|
131
142
|
|
|
132
143
|
file 'gene_transcripts' do |t|
|
|
133
144
|
transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_transcript, [], nil, :type => :flat, :namespace => $namespace)
|
|
@@ -343,6 +354,22 @@ file 'gene_go' do |t|
|
|
|
343
354
|
File.open(t.name, 'w') do |f| f.puts goterms end
|
|
344
355
|
end
|
|
345
356
|
|
|
357
|
+
file 'gene_go_bp' => 'gene_go' do |t|
|
|
358
|
+
gene_go = TSV.open(t.prerequisites.first)
|
|
359
|
+
|
|
360
|
+
gene_go.monitor = true
|
|
361
|
+
gene_go.process "GO ID" do |key, go_id, values|
|
|
362
|
+
clean = values.zip_fields.select do |id, type|
|
|
363
|
+
type == "biological_process"
|
|
364
|
+
end
|
|
365
|
+
clean.collect{|id, type| id}
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
File.open(t.name, 'w') do |f| f.puts gene_go.slice "GO ID" end
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
|
|
346
373
|
|
|
347
374
|
file 'gene_pfam' do |t|
|
|
348
375
|
goterms = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_pfam, [], nil, :type => :double, :namespace => $namespace)
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rbbt-sources
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
hash:
|
|
5
|
-
prerelease:
|
|
4
|
+
hash: 21
|
|
5
|
+
prerelease:
|
|
6
6
|
segments:
|
|
7
7
|
- 1
|
|
8
8
|
- 0
|
|
9
|
-
-
|
|
10
|
-
version: 1.0.
|
|
9
|
+
- 1
|
|
10
|
+
version: 1.0.1
|
|
11
11
|
platform: ruby
|
|
12
12
|
authors:
|
|
13
13
|
- Miguel Vazquez
|
|
@@ -15,7 +15,7 @@ autorequire:
|
|
|
15
15
|
bindir: bin
|
|
16
16
|
cert_chain: []
|
|
17
17
|
|
|
18
|
-
date: 2011-
|
|
18
|
+
date: 2011-10-03 00:00:00 +02:00
|
|
19
19
|
default_executable:
|
|
20
20
|
dependencies:
|
|
21
21
|
- !ruby/object:Gem::Dependency
|
|
@@ -160,7 +160,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
160
160
|
requirements: []
|
|
161
161
|
|
|
162
162
|
rubyforge_project:
|
|
163
|
-
rubygems_version: 1.
|
|
163
|
+
rubygems_version: 1.6.2
|
|
164
164
|
signing_key:
|
|
165
165
|
specification_version: 3
|
|
166
166
|
summary: Data sources for the Ruby Bioinformatics Toolkit (rbbt)
|