rbbt-sources 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
@@ -13,3 +13,15 @@ dec2007:
|
|
13
13
|
- agilent_cgh_44b
|
14
14
|
- illumina_humanwg_6_v2
|
15
15
|
- illumina_humanwg_6_v3
|
16
|
+
aug2007:
|
17
|
+
- protein_id
|
18
|
+
- affy_hc_g110
|
19
|
+
- affy_hg_u133a_2
|
20
|
+
- affy_huex_1_0_st_v2
|
21
|
+
- affy_hugene_1_0_st_v1
|
22
|
+
- agilent_wholegenome
|
23
|
+
- agilent_cgh_44b
|
24
|
+
- illumina_humanwg_6_v2
|
25
|
+
- hgnc
|
26
|
+
- hgnc_id
|
27
|
+
- illumina_humanwg_6_v3
|
data/lib/rbbt/sources/biomart.rb
CHANGED
@@ -32,6 +32,9 @@ module BioMart
|
|
32
32
|
EOT
|
33
33
|
|
34
34
|
def self.set_archive(date)
|
35
|
+
if defined? Rbbt and Rbbt.etc.allowed_biomart_archives.exists?
|
36
|
+
raise "Biomart archive #{ date } is not allowed in this installation" unless Rbbt.etc.allowed_biomart_archives.read.split("\n").include? date
|
37
|
+
end
|
35
38
|
@archive = date
|
36
39
|
@archive_url = BIOMART_URL.sub(/http:\/\/biomart\./, 'http://' + date + '.archive.ensembl.')
|
37
40
|
Log.debug "Using Archive URL #{ @archive_url }"
|
data/lib/rbbt/sources/jochem.rb
CHANGED
@@ -1,4 +1,7 @@
|
|
1
1
|
require 'rbbt-util'
|
2
2
|
module JoChem
|
3
|
-
|
3
|
+
extend Resource
|
4
|
+
self.subdir = "share/databases/JoChem"
|
5
|
+
|
6
|
+
JoChem.claim JoChem.root, :rake, Rbbt.share.install.JoChem.Rakefile.find
|
4
7
|
end
|
@@ -25,6 +25,30 @@ $biomart_protein_identifiers = [
|
|
25
25
|
[ 'UniProt/SwissProt Accession', "uniprot_swissprot_accession" ],
|
26
26
|
]
|
27
27
|
|
28
|
+
$biomart_probe_identifiers = [
|
29
|
+
[ 'AFFY HC G110', 'affy_hc_g110' ],
|
30
|
+
[ 'AFFY HG FOCUS', 'affy_hg_focus' ],
|
31
|
+
[ 'AFFY HG U133-PLUS-2', 'affy_hg_u133_plus_2' ],
|
32
|
+
[ 'AFFY HG U133A_2', 'affy_hg_u133a_2' ],
|
33
|
+
[ 'AFFY HG U133A', 'affy_hg_u133a' ],
|
34
|
+
[ 'AFFY HG U133B', 'affy_hg_u133b' ],
|
35
|
+
[ 'AFFY HG U95AV2', 'affy_hg_u95av2' ],
|
36
|
+
[ 'AFFY HG U95B', 'affy_hg_u95b' ],
|
37
|
+
[ 'AFFY HG U95C', 'affy_hg_u95c' ],
|
38
|
+
[ 'AFFY HG U95D', 'affy_hg_u95d' ],
|
39
|
+
[ 'AFFY HG U95E', 'affy_hg_u95e' ],
|
40
|
+
[ 'AFFY HG U95A', 'affy_hg_u95a' ],
|
41
|
+
[ 'AFFY HUGENEFL', 'affy_hugenefl' ],
|
42
|
+
[ 'AFFY HuEx', 'affy_huex_1_0_st_v2', "HuEx" ],
|
43
|
+
[ 'AFFY HuGene', 'affy_hugene_1_0_st_v1' ],
|
44
|
+
[ 'AFFY U133 X3P', 'affy_u133_x3p' ],
|
45
|
+
[ 'Agilent WholeGenome',"agilent_wholegenome" ],
|
46
|
+
[ 'Agilent CGH 44b', 'agilent_cgh_44b' ],
|
47
|
+
[ 'Codelink ID', 'codelink' ],
|
48
|
+
[ 'Illumina HumanWG 6 v2', 'illumina_humanwg_6_v2' ],
|
49
|
+
[ 'Illumina HumanWG 6 v3', 'illumina_humanwg_6_v3' ],
|
50
|
+
]
|
51
|
+
|
28
52
|
$biomart_identifiers = [
|
29
53
|
[ 'Entrez Gene ID', "entrezgene"],
|
30
54
|
[ 'Ensembl Protein ID', "ensembl_peptide_id" ],
|
@@ -38,7 +62,7 @@ $biomart_identifiers = [
|
|
38
62
|
[ 'HGNC ID', "hgnc_id", 'HGNC'],
|
39
63
|
[ 'EMBL (Genbank) ID' , "embl"] ,
|
40
64
|
|
41
|
-
#
|
65
|
+
# Probes
|
42
66
|
[ 'AFFY HC G110', 'affy_hc_g110' ],
|
43
67
|
[ 'AFFY HG FOCUS', 'affy_hg_focus' ],
|
44
68
|
[ 'AFFY HG U133-PLUS-2', 'affy_hg_u133_plus_2' ],
|
@@ -68,6 +68,7 @@ file 'identifiers' do |t|
|
|
68
68
|
identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_identifiers, [], nil, :namespace => $namespace)
|
69
69
|
|
70
70
|
$biomart_identifiers.each do |name, key, prefix|
|
71
|
+
next unless identifiers.all_fields.include? name
|
71
72
|
if prefix
|
72
73
|
identifiers.process name do |field, key, values| field.each{|v| v.replace "#{prefix}:#{v}"} end
|
73
74
|
end
|
@@ -128,6 +129,16 @@ file 'protein_identifiers' do |t|
|
|
128
129
|
File.open(t.name, 'w') do |f| f.puts identifiers end
|
129
130
|
end
|
130
131
|
|
132
|
+
file 'probe_transcripts' do |t|
|
133
|
+
identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_probe_identifiers, [], nil, :namespace => $namespace)
|
134
|
+
$biomart_probe_identifiers.each do |name, key, prefix|
|
135
|
+
if prefix
|
136
|
+
identifiers.process name do |field, key, values| field.each{|v| v.replace "#{prefix}:#{v}"} end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
File.open(t.name, 'w') do |f| f.puts identifiers end
|
141
|
+
end
|
131
142
|
|
132
143
|
file 'gene_transcripts' do |t|
|
133
144
|
transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_transcript, [], nil, :type => :flat, :namespace => $namespace)
|
@@ -343,6 +354,22 @@ file 'gene_go' do |t|
|
|
343
354
|
File.open(t.name, 'w') do |f| f.puts goterms end
|
344
355
|
end
|
345
356
|
|
357
|
+
file 'gene_go_bp' => 'gene_go' do |t|
|
358
|
+
gene_go = TSV.open(t.prerequisites.first)
|
359
|
+
|
360
|
+
gene_go.monitor = true
|
361
|
+
gene_go.process "GO ID" do |key, go_id, values|
|
362
|
+
clean = values.zip_fields.select do |id, type|
|
363
|
+
type == "biological_process"
|
364
|
+
end
|
365
|
+
clean.collect{|id, type| id}
|
366
|
+
end
|
367
|
+
|
368
|
+
|
369
|
+
File.open(t.name, 'w') do |f| f.puts gene_go.slice "GO ID" end
|
370
|
+
end
|
371
|
+
|
372
|
+
|
346
373
|
|
347
374
|
file 'gene_pfam' do |t|
|
348
375
|
goterms = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_pfam, [], nil, :type => :double, :namespace => $namespace)
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: 21
|
5
|
+
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 1.0.
|
9
|
+
- 1
|
10
|
+
version: 1.0.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-10-03 00:00:00 +02:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -160,7 +160,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
160
160
|
requirements: []
|
161
161
|
|
162
162
|
rubyforge_project:
|
163
|
-
rubygems_version: 1.
|
163
|
+
rubygems_version: 1.6.2
|
164
164
|
signing_key:
|
165
165
|
specification_version: 3
|
166
166
|
summary: Data sources for the Ruby Bioinformatics Toolkit (rbbt)
|