rbbt-sources 3.0.13 → 3.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/etc/allowed_biomart_archives +0 -2
- data/etc/biomart/missing_in_archive +2 -5
- data/lib/rbbt/sources/biomart.rb +13 -4
- data/lib/rbbt/sources/organism.rb +2 -2
- data/lib/rbbt/sources/pfam.rb +2 -0
- data/share/install/Organism/Hsa/Rakefile +4 -4
- data/share/install/Organism/Mmu/Rakefile +2 -2
- data/share/install/Organism/organism_helpers.rb +0 -1
- data/test/rbbt/sources/test_biomart.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 44c4642f4e5c392008336caa916d504c46c17216
|
4
|
+
data.tar.gz: e678ca8dd3a41639b1c5ad62adec658b8cf870fa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f1a38cc05206dec04fb3db08c4685f0bd92e39305acfe92ede5d017ea33bc9eb98f3b542df4c8db3b78fb551b02c7d77c4f7ef7297e38ee868c38204ca8f4a85
|
7
|
+
data.tar.gz: 540498257d57872b85a3bc391ad697fe72cefab2228511104f074e2322e8e0815f90dbeb6d734bc1694f31cf1884d25d8303c48bc097221dadca96970689eb5b
|
@@ -1,19 +1,17 @@
|
|
1
|
-
|
1
|
+
all:
|
2
2
|
- refseq_mrna
|
3
|
+
- external_gene_name~external_gene_id
|
3
4
|
may2010:
|
4
|
-
- refseq_mrna
|
5
5
|
- agilent_wholegenome
|
6
6
|
- agilent_cgh_44b
|
7
7
|
- illumina_humanwg_6_v2
|
8
8
|
- illumina_humanwg_6_v3
|
9
9
|
may2009:
|
10
|
-
- refseq_mrna
|
11
10
|
- agilent_wholegenome
|
12
11
|
- agilent_cgh_44b
|
13
12
|
- illumina_humanwg_6_v2
|
14
13
|
- illumina_humanwg_6_v3
|
15
14
|
dec2007:
|
16
|
-
- refseq_mrna
|
17
15
|
- protein_id
|
18
16
|
- affy_hc_g110
|
19
17
|
- affy_hg_u133a_2
|
@@ -24,7 +22,6 @@ dec2007:
|
|
24
22
|
- illumina_humanwg_6_v2
|
25
23
|
- illumina_humanwg_6_v3
|
26
24
|
aug2007:
|
27
|
-
- refseq_mrna
|
28
25
|
- protein_id
|
29
26
|
- affy_hc_g110
|
30
27
|
- affy_hg_u133a_2
|
data/lib/rbbt/sources/biomart.rb
CHANGED
@@ -13,7 +13,7 @@ module BioMart
|
|
13
13
|
|
14
14
|
class BioMart::QueryError < StandardError; end
|
15
15
|
|
16
|
-
BIOMART_URL = 'http://
|
16
|
+
BIOMART_URL = 'http://www.ensembl.org/biomart/martservice?query='
|
17
17
|
|
18
18
|
MISSING_IN_ARCHIVE = Rbbt.etc.biomart.missing_in_archive.exists? ? Rbbt.etc.biomart.missing_in_archive.yaml : {}
|
19
19
|
|
@@ -36,7 +36,7 @@ module BioMart
|
|
36
36
|
raise "Biomart archive #{ date } is not allowed in this installation" unless Rbbt.etc.allowed_biomart_archives.read.split("\n").include? date
|
37
37
|
end
|
38
38
|
Thread.current['archive'] = date
|
39
|
-
Thread.current['archive_url'] = BIOMART_URL.sub(/
|
39
|
+
Thread.current['archive_url'] = BIOMART_URL.sub(/www/, date + '.archive')
|
40
40
|
Log.debug "Using Archive URL #{ Thread.current['archive_url'] }"
|
41
41
|
end
|
42
42
|
|
@@ -191,8 +191,17 @@ module BioMart
|
|
191
191
|
def self.tsv(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
|
192
192
|
attrs ||= []
|
193
193
|
|
194
|
-
if Thread.current['
|
195
|
-
|
194
|
+
if Thread.current['archive']
|
195
|
+
missing = MISSING_IN_ARCHIVE[Thread.current['archive']] || []
|
196
|
+
missing += MISSING_IN_ARCHIVE['all'] || []
|
197
|
+
attrs = attrs.uniq.reject{|attr| missing.include? attr[1]}
|
198
|
+
changes = {}
|
199
|
+
missing.select{|m| m.include? "~" }.each do |str|
|
200
|
+
orig,_sep, new = str.partition "~"
|
201
|
+
changes[orig] = new
|
202
|
+
end
|
203
|
+
attrs = attrs.collect{|n,k| [n, changes[k] || k] }
|
204
|
+
attrs
|
196
205
|
end
|
197
206
|
|
198
207
|
|
@@ -11,8 +11,8 @@ module Organism
|
|
11
11
|
Rbbt.etc.allowed_biomart_archives.list.collect{|build| [organism, build] * "/" }
|
12
12
|
else
|
13
13
|
Rbbt.etc.organisms.list.collect{|organism|
|
14
|
-
organism =~ /\// ? organism : Rbbt.etc.allowed_biomart_archives.list.collect{|build| [organism, build] * "/" }
|
15
|
-
}.flatten.compact.uniq
|
14
|
+
organism =~ /\// ? organism : Rbbt.etc.allowed_biomart_archives.list.collect{|build| [organism, build] * "/" } + [organism]
|
15
|
+
}.flatten.compact.uniq
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
data/lib/rbbt/sources/pfam.rb
CHANGED
@@ -12,7 +12,7 @@ $biomart_db_germline_variation = 'hsapiens_snp'
|
|
12
12
|
$biomart_db_somatic_variation = 'hsapiens_snp_som'
|
13
13
|
|
14
14
|
$biomart_lexicon = [
|
15
|
-
[ 'Associated Gene Name' , "
|
15
|
+
[ 'Associated Gene Name' , "external_gene_name"],
|
16
16
|
[ 'HGNC symbol', "hgnc_symbol" ],
|
17
17
|
[ 'HGNC automatic gene name', "hgnc_automatic_gene_name" ],
|
18
18
|
[ 'HGNC curated gene name ', "hgnc_curated_gene_name" ],
|
@@ -53,7 +53,7 @@ $biomart_probe_identifiers = [
|
|
53
53
|
$biomart_identifiers = [
|
54
54
|
[ 'Entrez Gene ID', "entrezgene"],
|
55
55
|
[ 'Ensembl Protein ID', "ensembl_peptide_id" ],
|
56
|
-
[ 'Associated Gene Name', "
|
56
|
+
[ 'Associated Gene Name', "external_gene_name" ],
|
57
57
|
[ 'CCDS ID', "ccds" ],
|
58
58
|
[ 'Protein ID', "protein_id" ],
|
59
59
|
[ 'RefSeq Protein ID', "refseq_peptide" ],
|
@@ -61,8 +61,8 @@ $biomart_identifiers = [
|
|
61
61
|
[ 'UniProt/SwissProt ID', "uniprot_swissprot" ],
|
62
62
|
[ 'UniProt/SwissProt Accession', "uniprot_swissprot_accession" ],
|
63
63
|
[ 'HGNC ID', "hgnc_id", 'HGNC'],
|
64
|
-
[ 'EMBL (Genbank) ID' , "embl"] ,
|
65
|
-
[ 'RefSeq mRNA' , "refseq_mrna"] ,
|
64
|
+
#[ 'EMBL (Genbank) ID' , "embl"] ,
|
65
|
+
#[ 'RefSeq mRNA' , "refseq_mrna"] ,
|
66
66
|
|
67
67
|
# Probes
|
68
68
|
[ 'AFFY HC G110', 'affy_hc_g110' ],
|
@@ -12,7 +12,7 @@ $biomart_db_germline_variation = 'mmusculus_snp'
|
|
12
12
|
$biomart_db_somatic_variation = 'mmusculus_snp_som'
|
13
13
|
|
14
14
|
$biomart_lexicon = [
|
15
|
-
[ 'Associated Gene Name' , "
|
15
|
+
[ 'Associated Gene Name' , "external_gene_name"],
|
16
16
|
[ 'HGNC symbol', "hgnc_symbol" ],
|
17
17
|
[ 'HGNC automatic gene name', "hgnc_automatic_gene_name" ],
|
18
18
|
[ 'HGNC curated gene name ', "hgnc_curated_gene_name" ],
|
@@ -32,7 +32,7 @@ $biomart_probe_identifiers = [
|
|
32
32
|
$biomart_identifiers = [
|
33
33
|
[ 'Entrez Gene ID', "entrezgene"],
|
34
34
|
[ 'Ensembl Protein ID', "ensembl_peptide_id" ],
|
35
|
-
[ 'Associated Gene Name', "
|
35
|
+
[ 'Associated Gene Name', "external_gene_name" ],
|
36
36
|
[ 'CCDS ID', "ccds" ],
|
37
37
|
[ 'Protein ID', "protein_id" ],
|
38
38
|
[ 'RefSeq Protein ID', "refseq_peptide" ],
|
@@ -16,7 +16,7 @@ class TestBioMart < Test::Unit::TestCase
|
|
16
16
|
BioMart.unset_archive
|
17
17
|
end
|
18
18
|
|
19
|
-
def
|
19
|
+
def _test_get
|
20
20
|
assert_raise BioMart::QueryError do
|
21
21
|
BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['protein_id'],['with_unknownattr'])
|
22
22
|
end
|
@@ -30,7 +30,7 @@ class TestBioMart < Test::Unit::TestCase
|
|
30
30
|
assert(tsv['852236'][1].include? 'YBL044W')
|
31
31
|
end
|
32
32
|
|
33
|
-
def
|
33
|
+
def _test_query
|
34
34
|
data = BioMart.query('scerevisiae_gene_ensembl','entrezgene', ['protein_id','refseq_peptide','external_gene_id','ensembl_gene_id'], [], nil, :nocache => false, :wget_options => { :quiet => false})
|
35
35
|
assert(data['852236']['external_gene_id'].include? 'YBL044W')
|
36
36
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-08-
|
11
|
+
date: 2014-08-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|