rbbt-sources 2.1.5 → 2.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- data/lib/rbbt/sources/entrez.rb +28 -68
- data/lib/rbbt/sources/organism.rb +6 -0
- data/lib/rbbt/sources/uniprot.rb +75 -15
- data/test/rbbt/sources/test_entrez.rb +2 -11
- data/test/rbbt/sources/test_organism.rb +12 -12
- metadata +21 -24
- data/lib/rbbt/sources/COSMIC.rb +0 -153
- data/lib/rbbt/sources/dbSNP.rb +0 -194
- data/lib/rbbt/sources/genomes1000.rb +0 -109
checksums.yaml
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
5
|
-
data.tar.gz: !binary |-
|
6
|
-
YWJlYTE4Y2M2YWM0ZjIxYTAxZTE4ZjExZmExNjQwYTJjNTg3NGVmZg==
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: daf367338fb6e78d2cb7b76440e67712d27f34ab
|
4
|
+
data.tar.gz: 5b7a7308779ec4441fa2eb997d6f9b7f0dd37e3a
|
7
5
|
SHA512:
|
8
|
-
metadata.gz:
|
9
|
-
|
10
|
-
ZTM0MjA5ZWFmZjlkMzAzMmRjYTBhOGVhMjM4Y2JhMmM2OThjMjQ1MDRkY2Vi
|
11
|
-
YTFiOWYyNmEwMGZmMzg5MDFiNjQwMWNlNDVhODEwM2VjNTg0MTc=
|
12
|
-
data.tar.gz: !binary |-
|
13
|
-
ZTY3ZGFjM2E3ZmY0OThmZjZiNzI2OTAwNWNmZWZlYmI5ODRkMTEyY2IzODNm
|
14
|
-
YmZkNjY3NTI2MjQzNjMzMTc4YjgzYjVkM2IwZjc0OTA0NWM0YzM1ZDUzMjU5
|
15
|
-
ZTliYTNjZWY4YWMwMjUxMDFkMTRiMGRmNWRkNWQyNjBjYjgwYzE=
|
6
|
+
metadata.gz: bb568b0d788284e82d0ac0d9cdbd14db7c0e59b4977ddce57e2701f25ca18bbef93d43424179a188f73daaacc87963d039a17aaf0916872945f2d384e6441552
|
7
|
+
data.tar.gz: b24f422176f10f518f692a7878c2389df0276df27a074feb5918bae1993f860fae4558d330f95de66a7857da712b5c811e487c0b117f553106215d1065f856af
|
data/lib/rbbt/sources/entrez.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
|
-
require 'rbbt'
|
1
|
+
require 'rbbt-util'
|
2
2
|
require 'rbbt/tsv'
|
3
3
|
require 'rbbt/resource'
|
4
|
+
require 'rbbt/util/filecache'
|
4
5
|
require 'rbbt/bow/bow'
|
5
6
|
require 'set'
|
6
7
|
|
@@ -70,85 +71,44 @@ module Entrez
|
|
70
71
|
|
71
72
|
private
|
72
73
|
|
73
|
-
def self.get_online(geneids)
|
74
74
|
|
75
|
-
|
75
|
+
def self.get_gene(geneids)
|
76
|
+
_array = Array === geneids
|
76
77
|
|
77
|
-
|
78
|
-
|
79
|
-
begin
|
80
|
-
Misc.try3times do
|
81
|
-
url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&retmode=xml&id=#{geneids_list * ","}"
|
78
|
+
geneids = [geneids] unless Array === geneids
|
79
|
+
geneids = geneids.compact.collect{|id| id}
|
82
80
|
|
83
|
-
|
81
|
+
result_files = FileCache.cache_online_elements(geneids, 'gene-{ID}.xml') do |ids|
|
82
|
+
result = {}
|
83
|
+
values = []
|
84
|
+
Misc.divide(ids, (ids.length / 100) + 1).each do |list|
|
85
|
+
begin
|
86
|
+
Misc.try3times do
|
87
|
+
url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&retmode=xml&id=#{list * ","}"
|
84
88
|
|
85
|
-
|
89
|
+
xml = Open.read(url, :wget_options => {:quiet => true}, :nocache => true)
|
90
|
+
|
91
|
+
values += xml.scan(/(<Entrezgene>.*?<\/Entrezgene>)/sm).flatten
|
92
|
+
end
|
93
|
+
rescue
|
94
|
+
Log.error $!.message
|
86
95
|
end
|
87
|
-
rescue
|
88
|
-
puts $!.message
|
89
|
-
genes += geneids_list.collect{|g| nil}
|
90
96
|
end
|
91
|
-
end
|
92
97
|
|
93
|
-
|
94
|
-
list = Hash[*genes_complete.zip([nil]).flatten]
|
95
|
-
genes.each{|gene|
|
98
|
+
values.each do |xml|
|
96
99
|
geneid = gene.match(/<Gene-track_geneid>(\d+)/)[1]
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
return list
|
101
|
-
else
|
102
|
-
return genes.first
|
100
|
+
|
101
|
+
result[geneid] = xml
|
102
|
+
end
|
103
103
|
end
|
104
|
-
end
|
105
104
|
|
106
|
-
|
105
|
+
genes = {}
|
106
|
+
geneids.each{|id| genes[id] = Gene.new(Open.read(result_files[id])) }
|
107
107
|
|
108
|
-
|
109
|
-
|
110
|
-
end
|
111
|
-
|
112
|
-
def self.get_gene(geneid)
|
113
|
-
return nil if geneid.nil?
|
114
|
-
|
115
|
-
if Array === geneid
|
116
|
-
missing = []
|
117
|
-
list = {}
|
118
|
-
|
119
|
-
geneid.each{|p|
|
120
|
-
next if p.nil?
|
121
|
-
if FileCache.found(gene_filename p)
|
122
|
-
list[p] = Gene.new(Open.read(FileCache.path(gene_filename p)))
|
123
|
-
else
|
124
|
-
missing << p
|
125
|
-
end
|
126
|
-
}
|
127
|
-
|
128
|
-
|
129
|
-
return list unless missing.any?
|
130
|
-
genes = get_online(missing)
|
131
|
-
|
132
|
-
genes.each{|p, xml|
|
133
|
-
filename = gene_filename p
|
134
|
-
FileCache.add(filename,xml) unless FileCache.found(filename)
|
135
|
-
list[p] = Gene.new(xml)
|
136
|
-
}
|
137
|
-
|
138
|
-
return list
|
108
|
+
if _array
|
109
|
+
genes
|
139
110
|
else
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
if FileCache.found(filename)
|
144
|
-
return Gene.new(Open.read(FileCache.path(filename)))
|
145
|
-
else
|
146
|
-
xml = get_online(geneid)
|
147
|
-
|
148
|
-
FileCache.add(filename, xml) unless FileCache.found(filename)
|
149
|
-
|
150
|
-
return Gene.new(xml)
|
151
|
-
end
|
111
|
+
genes.values.first
|
152
112
|
end
|
153
113
|
end
|
154
114
|
|
@@ -144,6 +144,12 @@ module Organism
|
|
144
144
|
}.first
|
145
145
|
end
|
146
146
|
|
147
|
+
def self.organism_code(name)
|
148
|
+
organisms.select{|organism|
|
149
|
+
organism == name or Organism.scientific_name(organism) =~ /#{ name }/i
|
150
|
+
}.first
|
151
|
+
end
|
152
|
+
|
147
153
|
def self.known_ids(name)
|
148
154
|
TSV::Parser.new(Organism.identifiers(name).open).all_fields
|
149
155
|
end
|
data/lib/rbbt/sources/uniprot.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
require 'rbbt'
|
1
|
+
require 'rbbt-util'
|
2
2
|
require 'rbbt/util/open'
|
3
|
+
require 'rbbt/util/filecache'
|
3
4
|
require 'rbbt/resource'
|
4
5
|
require 'rbbt/sources/cath'
|
5
6
|
require 'rbbt/sources/uniprot'
|
@@ -32,12 +33,78 @@ module UniProt
|
|
32
33
|
tsv.to_s
|
33
34
|
end
|
34
35
|
|
35
|
-
|
36
36
|
UNIPROT_TEXT="http://www.uniprot.org/uniprot/[PROTEIN].txt"
|
37
37
|
UNIPROT_FASTA="http://www.uniprot.org/uniprot/[PROTEIN].fasta"
|
38
|
+
|
39
|
+
def self.get_uniprot_entry(uniprotids)
|
40
|
+
_array = Array === uniprotids
|
41
|
+
|
42
|
+
uniprotids = [uniprotids] unless Array === uniprotids
|
43
|
+
uniprotids = uniprotids.compact.collect{|id| id}
|
44
|
+
|
45
|
+
result_files = FileCache.cache_online_elements(uniprotids, 'uniprot-{ID}.xml') do |ids|
|
46
|
+
result = {}
|
47
|
+
ids.each do |id|
|
48
|
+
begin
|
49
|
+
Misc.try3times do
|
50
|
+
|
51
|
+
content = Open.read(UNIPROT_TEXT.sub("[PROTEIN]", id), :wget_options => {:quiet => true}, :nocache => true)
|
52
|
+
|
53
|
+
result[id] = content
|
54
|
+
end
|
55
|
+
rescue
|
56
|
+
Log.error $!.message
|
57
|
+
end
|
58
|
+
end
|
59
|
+
result
|
60
|
+
end
|
61
|
+
|
62
|
+
uniprots = {}
|
63
|
+
uniprotids.each{|id| uniprots[id] = Open.read(result_files[id]) }
|
64
|
+
|
65
|
+
if _array
|
66
|
+
uniprots
|
67
|
+
else
|
68
|
+
uniprots.values.first
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.get_uniprot_sequence(uniprotids)
|
73
|
+
_array = Array === uniprotids
|
74
|
+
|
75
|
+
uniprotids = [uniprotids] unless Array === uniprotids
|
76
|
+
uniprotids = uniprotids.compact.collect{|id| id}
|
77
|
+
|
78
|
+
result_files = FileCache.cache_online_elements(uniprotids, 'uniprot-sequence-{ID}') do |ids|
|
79
|
+
result = {}
|
80
|
+
ids.each do |id|
|
81
|
+
begin
|
82
|
+
Misc.try3times do
|
83
|
+
|
84
|
+
url = UNIPROT_FASTA.sub "[PROTEIN]", id
|
85
|
+
text = Open.read(url, :nocache => true)
|
86
|
+
|
87
|
+
result[id] = text.split(/\n/).select{|line| line !~ /^>/} * ""
|
88
|
+
end
|
89
|
+
rescue
|
90
|
+
Log.error $!.message
|
91
|
+
end
|
92
|
+
end
|
93
|
+
result
|
94
|
+
end
|
95
|
+
|
96
|
+
uniprots = {}
|
97
|
+
uniprotids.each{|id| uniprots[id] = Open.read(result_files[id]) }
|
98
|
+
|
99
|
+
if _array
|
100
|
+
uniprots
|
101
|
+
else
|
102
|
+
uniprots.values.first
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
38
106
|
def self.pdbs(protein)
|
39
|
-
|
40
|
-
text = Open.read(url)
|
107
|
+
text = get_uniprot_entry(protein)
|
41
108
|
|
42
109
|
pdb = {}
|
43
110
|
|
@@ -59,15 +126,11 @@ module UniProt
|
|
59
126
|
end
|
60
127
|
|
61
128
|
def self.sequence(protein)
|
62
|
-
|
63
|
-
text = Open.read(url)
|
64
|
-
|
65
|
-
text.split(/\n/).select{|line| line !~ /^>/} * ""
|
129
|
+
get_uniprot_sequence(protein)
|
66
130
|
end
|
67
131
|
|
68
132
|
def self.features(protein)
|
69
|
-
|
70
|
-
text = Open.read(url)
|
133
|
+
text = get_uniprot_entry(protein)
|
71
134
|
|
72
135
|
text = text.split(/\n/).select{|line| line =~ /^FT/} * "\n"
|
73
136
|
|
@@ -78,7 +141,6 @@ module UniProt
|
|
78
141
|
|
79
142
|
type = nil
|
80
143
|
parts.each do |part|
|
81
|
-
parts
|
82
144
|
if part[0..1] == "FT"
|
83
145
|
type = part.gsub(/FT\s+/,'')
|
84
146
|
next
|
@@ -111,8 +173,7 @@ module UniProt
|
|
111
173
|
|
112
174
|
|
113
175
|
def self.variants(protein)
|
114
|
-
|
115
|
-
text = Open.read(url)
|
176
|
+
text = get_uniprot_entry(protein)
|
116
177
|
|
117
178
|
text = text.split(/\n/).select{|line| line =~ /^FT/} * "\n"
|
118
179
|
|
@@ -157,8 +218,7 @@ module UniProt
|
|
157
218
|
end
|
158
219
|
|
159
220
|
def self.cath(protein)
|
160
|
-
|
161
|
-
text = Open.read(url)
|
221
|
+
text = get_uniprot_entry(protein)
|
162
222
|
|
163
223
|
cath = {}
|
164
224
|
text.split(/\n/).each{|l|
|
@@ -21,21 +21,12 @@ class TestEntrez < Test::Unit::TestCase
|
|
21
21
|
assert(data['850320'].include? '1574125')
|
22
22
|
end
|
23
23
|
|
24
|
-
def test_getonline
|
25
|
-
geneids = 9129
|
26
|
-
|
27
|
-
assert_match(/PRP3 pre-mRNA processing factor/s, Entrez.get_online(geneids))
|
28
|
-
|
29
|
-
geneids = [9129,9]
|
30
|
-
assert_match(/PRP3 pre-mRNA processing factor/s, Entrez.get_online(geneids)[9129])
|
31
|
-
end
|
32
|
-
|
33
24
|
def test_getgene
|
34
25
|
geneids = 9129
|
35
|
-
assert_equal([["
|
26
|
+
assert_equal([["pre-mRNA processing factor 3"]], Entrez.get_gene(geneids).description)
|
36
27
|
|
37
28
|
geneids = [9129, 728049]
|
38
|
-
assert_equal([["
|
29
|
+
assert_equal([["pre-mRNA processing factor 3"]], Entrez.get_gene(geneids)[9129].description)
|
39
30
|
end
|
40
31
|
|
41
32
|
def test_similarity
|
@@ -5,37 +5,37 @@ require 'rbbt/sources/ensembl_ftp'
|
|
5
5
|
|
6
6
|
class TestOrganism < Test::Unit::TestCase
|
7
7
|
|
8
|
-
def
|
8
|
+
def _test_known_ids
|
9
9
|
assert Organism.known_ids("Hsa").include?("Associated Gene Name")
|
10
10
|
end
|
11
11
|
|
12
|
-
def
|
12
|
+
def _test_location
|
13
13
|
assert_equal "share/organisms/Sce/identifiers", Organism.identifiers('Sce')
|
14
14
|
end
|
15
15
|
|
16
|
-
def
|
16
|
+
def _test_identifiers
|
17
17
|
assert Organism.identifiers('Hsa').tsv(:key_field => "Entrez Gene ID", :persist => true)['1020']["Associated Gene Name"].include?('CDK5')
|
18
18
|
assert Organism.identifiers('Sce').tsv(:persist => true)['S000006120']["Ensembl Gene ID"].include?('YPL199C')
|
19
19
|
assert Organism.identifiers("Sce").tsv(:persist => true)['S000006120']["Ensembl Gene ID"].include?('YPL199C')
|
20
20
|
end
|
21
21
|
|
22
|
-
def
|
22
|
+
def _test_lexicon
|
23
23
|
assert TSV.open(Organism.lexicon('Sce'))['S000006120'].flatten.include?('YPL199C')
|
24
24
|
end
|
25
25
|
|
26
|
-
def
|
26
|
+
def _test_guess_id
|
27
27
|
ensembl = %w(YOL044W YDR289C YAL034C YGR246C ARS519 tH(GUG)E2 YDR218C YLR002C YGL224C)
|
28
28
|
gene_name = %w(SNR64 MIP1 MRPS18 TFB2 JEN1 IVY1 TRS33 GAS3)
|
29
29
|
assert_equal "Associated Gene Name", Organism.guess_id("Sce", gene_name).first
|
30
30
|
assert_equal "Ensembl Gene ID", Organism.guess_id("Sce", ensembl).first
|
31
31
|
end
|
32
32
|
|
33
|
-
def
|
33
|
+
def _test_organisms
|
34
34
|
assert Organism.organisms.include? "Hsa"
|
35
35
|
assert_equal "Hsa", Organism.organism("Homo sapiens")
|
36
36
|
end
|
37
37
|
|
38
|
-
def
|
38
|
+
def _test_attach_translations
|
39
39
|
tsv = TSV.setup({"1020" => []}, :type => :list)
|
40
40
|
tsv.key_field = "Entrez Gene ID"
|
41
41
|
tsv.fields = []
|
@@ -47,7 +47,7 @@ class TestOrganism < Test::Unit::TestCase
|
|
47
47
|
assert_equal "CDK5", tsv["1020"]["Associated Gene Name"]
|
48
48
|
end
|
49
49
|
|
50
|
-
def
|
50
|
+
def _test_entrez_taxids
|
51
51
|
assert_equal "Hsa", Organism.entrez_taxid_organism('9606')
|
52
52
|
end
|
53
53
|
|
@@ -61,22 +61,22 @@ class TestOrganism < Test::Unit::TestCase
|
|
61
61
|
assert_equal mutation_19, Organism.liftOver([mutation_18], target_build, source_build).first
|
62
62
|
end
|
63
63
|
|
64
|
-
def
|
64
|
+
def _test_orhtolog
|
65
65
|
require 'rbbt/entity/gene'
|
66
66
|
assert_equal ["ENSG00000133703"], Gene.setup("Kras", "Associated Gene Name", "Mmu/jun2011").ensembl.ortholog("Hsa/jun2011")
|
67
67
|
end
|
68
68
|
|
69
|
-
#def
|
69
|
+
#def _test_genes_at_chromosome
|
70
70
|
# pos = [12, 117799500]
|
71
71
|
# assert_equal "ENSG00000089250", Organism::Hsa.genes_at_chromosome_positions(pos.first, pos.last)
|
72
72
|
#end
|
73
73
|
|
74
|
-
#def
|
74
|
+
#def _test_genes_at_chromosome_array
|
75
75
|
# pos = [12, [117799500, 106903900]]
|
76
76
|
# assert_equal ["ENSG00000089250", "ENSG00000013503"], Organism::Hsa.genes_at_chromosome_positions(pos.first, pos.last)
|
77
77
|
#end
|
78
78
|
|
79
|
-
#def
|
79
|
+
#def _test_genes_at_genomic_positions
|
80
80
|
# pos = [[12, 117799500], [12, 106903900], [1, 115259500]]
|
81
81
|
# assert_equal ["ENSG00000089250", "ENSG00000013503", "ENSG00000213281"], Organism::Hsa.genes_at_genomic_positions(pos)
|
82
82
|
#end
|
metadata
CHANGED
@@ -1,83 +1,83 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-02-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: 4.0.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 4.0.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rbbt-text
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: mechanize
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: libxml-ruby
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: bio
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- -
|
73
|
+
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '0'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
description: Data sources like PubMed, Entrez Gene, or Gene Ontology
|
@@ -88,7 +88,6 @@ extra_rdoc_files: []
|
|
88
88
|
files:
|
89
89
|
- etc/allowed_biomart_archives
|
90
90
|
- etc/biomart/missing_in_archive
|
91
|
-
- lib/rbbt/sources/COSMIC.rb
|
92
91
|
- lib/rbbt/sources/COSTART.rb
|
93
92
|
- lib/rbbt/sources/CTCAE.rb
|
94
93
|
- lib/rbbt/sources/HPRD.rb
|
@@ -100,11 +99,9 @@ files:
|
|
100
99
|
- lib/rbbt/sources/bibtex.rb
|
101
100
|
- lib/rbbt/sources/biomart.rb
|
102
101
|
- lib/rbbt/sources/cath.rb
|
103
|
-
- lib/rbbt/sources/dbSNP.rb
|
104
102
|
- lib/rbbt/sources/ensembl.rb
|
105
103
|
- lib/rbbt/sources/ensembl_ftp.rb
|
106
104
|
- lib/rbbt/sources/entrez.rb
|
107
|
-
- lib/rbbt/sources/genomes1000.rb
|
108
105
|
- lib/rbbt/sources/go.rb
|
109
106
|
- lib/rbbt/sources/gscholar.rb
|
110
107
|
- lib/rbbt/sources/jochem.rb
|
@@ -143,25 +140,25 @@ require_paths:
|
|
143
140
|
- lib
|
144
141
|
required_ruby_version: !ruby/object:Gem::Requirement
|
145
142
|
requirements:
|
146
|
-
- -
|
143
|
+
- - ">="
|
147
144
|
- !ruby/object:Gem::Version
|
148
145
|
version: '0'
|
149
146
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
150
147
|
requirements:
|
151
|
-
- -
|
148
|
+
- - ">="
|
152
149
|
- !ruby/object:Gem::Version
|
153
150
|
version: '0'
|
154
151
|
requirements: []
|
155
152
|
rubyforge_project:
|
156
|
-
rubygems_version: 2.2.
|
153
|
+
rubygems_version: 2.2.1
|
157
154
|
signing_key:
|
158
155
|
specification_version: 4
|
159
156
|
summary: Data sources for the Ruby Bioinformatics Toolkit (rbbt)
|
160
157
|
test_files:
|
161
|
-
- test/rbbt/sources/
|
162
|
-
- test/rbbt/sources/test_entrez.rb
|
158
|
+
- test/rbbt/sources/test_pubmed.rb
|
163
159
|
- test/rbbt/sources/test_biomart.rb
|
164
160
|
- test/rbbt/sources/test_gscholar.rb
|
161
|
+
- test/rbbt/sources/test_entrez.rb
|
162
|
+
- test/rbbt/sources/test_go.rb
|
165
163
|
- test/rbbt/sources/test_organism.rb
|
166
|
-
- test/rbbt/sources/test_pubmed.rb
|
167
164
|
- test/test_helper.rb
|
data/lib/rbbt/sources/COSMIC.rb
DELETED
@@ -1,153 +0,0 @@
|
|
1
|
-
require 'rbbt'
|
2
|
-
require 'rbbt/resource'
|
3
|
-
|
4
|
-
module COSMIC
|
5
|
-
extend Resource
|
6
|
-
self.subdir = "share/databases/COSMIC"
|
7
|
-
|
8
|
-
COSMIC.claim COSMIC.mutations, :proc do
|
9
|
-
url = "ftp://ftp.sanger.ac.uk/pub/CGP/cosmic/data_export/CosmicCompleteExport_v67_241013.tsv.gz"
|
10
|
-
|
11
|
-
stream = CMD.cmd('awk \'BEGIN{FS="\t"} { if ($12 != "" && $12 != "Mutation ID") { sub($12, "COSM" $12 ":" $4)}; print}\'', :in => Open.open(url), :pipe => true)
|
12
|
-
tsv = TSV.open(stream, :type => :list, :header_hash => "", :key_field => "Mutation ID", :namespace => "Hsa/jun2011")
|
13
|
-
tsv.fields = tsv.fields.collect{|f| f == "Gene name" ? "Associated Gene Name" : f}
|
14
|
-
tsv.add_field "Genomic Mutation" do |mid, values|
|
15
|
-
position = values["Mutation GRCh37 genome position"]
|
16
|
-
cds = values["Mutation CDS"]
|
17
|
-
|
18
|
-
if position.nil? or position.empty?
|
19
|
-
nil
|
20
|
-
else
|
21
|
-
position = position.split("-").first
|
22
|
-
|
23
|
-
chr, pos = position.split(":")
|
24
|
-
chr = "X" if chr == "23"
|
25
|
-
chr = "Y" if chr == "24"
|
26
|
-
chr = "M" if chr == "25"
|
27
|
-
position = [chr, pos ] * ":"
|
28
|
-
|
29
|
-
if cds.nil?
|
30
|
-
position
|
31
|
-
else
|
32
|
-
change = case
|
33
|
-
when cds =~ />/
|
34
|
-
cds.split(">").last
|
35
|
-
when cds =~ /del/
|
36
|
-
deletion = cds.split("del").last
|
37
|
-
case
|
38
|
-
when deletion =~ /^\d+$/
|
39
|
-
"-" * deletion.to_i
|
40
|
-
when deletion =~ /^[ACTG]+$/i
|
41
|
-
"-" * deletion.length
|
42
|
-
else
|
43
|
-
Log.debug "Unknown deletion: #{ deletion }"
|
44
|
-
deletion
|
45
|
-
end
|
46
|
-
when cds =~ /ins/
|
47
|
-
insertion = cds.split("ins").last
|
48
|
-
case
|
49
|
-
when insertion =~ /^\d+$/
|
50
|
-
"+" + "N" * insertion.to_i
|
51
|
-
when insertion =~ /^[NACTG]+$/i
|
52
|
-
"+" + insertion
|
53
|
-
else
|
54
|
-
Log.debug "Unknown insertion: #{insertion }"
|
55
|
-
insertion
|
56
|
-
end
|
57
|
-
else
|
58
|
-
Log.debug "Unknown change: #{cds}"
|
59
|
-
"?(" << cds << ")"
|
60
|
-
end
|
61
|
-
position + ":" + change
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
tsv.to_s.gsub(/(\d)-(\d)/,'\1:\2')
|
67
|
-
end
|
68
|
-
|
69
|
-
COSMIC.claim COSMIC.mutations_hg18, :proc do |filename|
|
70
|
-
require 'rbbt/sources/organism'
|
71
|
-
file = COSMIC.mutations.open
|
72
|
-
begin
|
73
|
-
|
74
|
-
while (line = file.gets) !~ /Genomic Mutation/; end
|
75
|
-
fields = line[1..-2].split("\t")
|
76
|
-
mutation_pos = fields.index "Genomic Mutation"
|
77
|
-
|
78
|
-
mutations = CMD.cmd("grep -v '^#'|cut -f #{mutation_pos + 1}|sort -u", :in => COSMIC.mutations.open).read.split("\n").select{|m| m.include? ":" }
|
79
|
-
|
80
|
-
translations = Misc.process_to_hash(mutations){|mutations| Organism.liftOver(mutations, "Hsa/jun2011", "Hsa/may2009")}
|
81
|
-
|
82
|
-
File.open(filename, 'w') do |f|
|
83
|
-
f.puts "#: :type=:list#:namespace=Hsa/may2009"
|
84
|
-
f.puts "#" + fields * "\t"
|
85
|
-
while line = file.gets do
|
86
|
-
next if line[0] == "#"[0]
|
87
|
-
line.strip!
|
88
|
-
parts = line.split("\t")
|
89
|
-
parts[mutation_pos] = translations[parts[mutation_pos]]
|
90
|
-
f.puts parts * "\t"
|
91
|
-
end
|
92
|
-
end
|
93
|
-
rescue Exception
|
94
|
-
FileUtils.rm filename if File.exists? filename
|
95
|
-
raise $!
|
96
|
-
ensure
|
97
|
-
file.close
|
98
|
-
end
|
99
|
-
|
100
|
-
nil
|
101
|
-
end
|
102
|
-
|
103
|
-
|
104
|
-
def self.rsid_index(organism, chromosome = nil)
|
105
|
-
build = Organism.hg_build(organism)
|
106
|
-
|
107
|
-
tag = [build, chromosome] * ":"
|
108
|
-
fwt = nil
|
109
|
-
Persist.persist("StaticPosIndex for COSMIC [#{ tag }]", :fwt, :persist => true) do
|
110
|
-
value_size = 0
|
111
|
-
file = COSMIC[build == "hg19" ? "mutations" : "mutations_hg18"]
|
112
|
-
chr_positions = []
|
113
|
-
begin
|
114
|
-
Open.read(CMD.cmd("grep '\t#{chromosome}:'", :in => file.open, :pipe => true)) do |line|
|
115
|
-
next if line[0] == "#"[0]
|
116
|
-
rsid, mutation = line.split("\t").values_at 0, 25
|
117
|
-
next if mutation.nil? or mutation.empty?
|
118
|
-
chr, pos = mutation.split(":")
|
119
|
-
next if chr != chromosome or pos.nil? or pos.empty?
|
120
|
-
chr_positions << [rsid, pos.to_i]
|
121
|
-
value_size = rsid.length if rsid.length > value_size
|
122
|
-
end
|
123
|
-
rescue
|
124
|
-
end
|
125
|
-
fwt = FixWidthTable.new :memory, value_size
|
126
|
-
fwt.add_point(chr_positions)
|
127
|
-
fwt
|
128
|
-
end
|
129
|
-
end
|
130
|
-
|
131
|
-
def self.mutation_index(organism)
|
132
|
-
build = Organism.hg_build(organism)
|
133
|
-
file = COSMIC[build == "hg19" ? "mutations" : "mutations_hg18"]
|
134
|
-
@mutation_index ||= {}
|
135
|
-
@mutation_index[build] ||= file.tsv :persist => true, :fields => ["Genomic Mutation"], :type => :single, :persist => true
|
136
|
-
end
|
137
|
-
|
138
|
-
|
139
|
-
end
|
140
|
-
|
141
|
-
if defined? Entity
|
142
|
-
if defined? Gene and Entity === Gene
|
143
|
-
module Gene
|
144
|
-
property :COSMIC_rsids => :single2array do
|
145
|
-
COSMIC.rsid_index(organism, chromosome)[self.chr_range]
|
146
|
-
end
|
147
|
-
|
148
|
-
property :COSMIC_mutations => :single2array do
|
149
|
-
GenomicMutation.setup(COSMIC.mutation_index(organism).values_at(*self.COSMIC_rsids).uniq, "COSMIC mutations over #{self.name || self}", organism, false)
|
150
|
-
end
|
151
|
-
end
|
152
|
-
end
|
153
|
-
end
|
data/lib/rbbt/sources/dbSNP.rb
DELETED
@@ -1,194 +0,0 @@
|
|
1
|
-
require 'rbbt'
|
2
|
-
require 'rbbt/util/open'
|
3
|
-
require 'rbbt/resource'
|
4
|
-
require 'net/ftp'
|
5
|
-
|
6
|
-
module DbSNP
|
7
|
-
extend Resource
|
8
|
-
self.subdir = "share/databases/dbSNP"
|
9
|
-
|
10
|
-
URL = "ftp://ftp.ncbi.nlm.nih.gov/snp/organisms/human_9606/VCF/common_all.vcf.gz"
|
11
|
-
|
12
|
-
DbSNP.claim DbSNP.mutations_ncbi, :proc do
|
13
|
-
tsv = TSV.setup({}, :key_field => "RS ID", :fields => ["Genomic Mutation"], :type => :flat)
|
14
|
-
file = Open.open(URL, :nocache => true)
|
15
|
-
while line = file.gets do
|
16
|
-
next if line[0] == "#"[0]
|
17
|
-
chr, position, id, ref, alt = line.split "\t"
|
18
|
-
|
19
|
-
mutations = alt.split(",").collect do |a|
|
20
|
-
if alt[0] == ref[0]
|
21
|
-
alt[0] = '+'[0]
|
22
|
-
end
|
23
|
-
[chr, position, alt] * ":"
|
24
|
-
end
|
25
|
-
|
26
|
-
tsv.namespace = "Hsa/may2012"
|
27
|
-
tsv[id] = mutations
|
28
|
-
end
|
29
|
-
|
30
|
-
tsv.to_s
|
31
|
-
end
|
32
|
-
|
33
|
-
DbSNP.claim DbSNP.rsids, :proc do |filename|
|
34
|
-
ftp = Net::FTP.new('ftp.broadinstitute.org')
|
35
|
-
ftp.passive = true
|
36
|
-
ftp.login('gsapubftp-anonymous', 'devnull@nomail.org')
|
37
|
-
ftp.chdir('/bundle/2.3/hg19')
|
38
|
-
|
39
|
-
tmpfile = TmpFile.tmp_file + '.gz'
|
40
|
-
ftp.getbinaryfile('dbsnp_137.hg19.vcf.gz', tmpfile, 1024)
|
41
|
-
|
42
|
-
file = Open.open(tmpfile, :nocache => true)
|
43
|
-
begin
|
44
|
-
File.open(filename, 'w') do |f|
|
45
|
-
f.puts "#: :type=:list#:namespace=Hsa/may2012"
|
46
|
-
f.puts "#" + ["RS ID", "GMAF", "G5", "G5A", "dbSNP Build ID"] * "\t"
|
47
|
-
while line = file.gets do
|
48
|
-
next if line[0] == "#"[0]
|
49
|
-
|
50
|
-
chr, position, id, ref, muts, qual, filter, info = line.split "\t"
|
51
|
-
|
52
|
-
g5 = g5a = dbsnp_build_id = gmaf = nil
|
53
|
-
|
54
|
-
gmaf = $1 if info =~ /GMAF=([0-9.]+)/
|
55
|
-
g5 = true if info =~ /\bG5\b/
|
56
|
-
g5a = true if info =~ /\bG5A\b/
|
57
|
-
dbsnp_build_id = $1 if info =~ /dbSNPBuildID=(\d+)/
|
58
|
-
|
59
|
-
f.puts [id, gmaf, g5, g5a, dbsnp_build_id] * "\t"
|
60
|
-
end
|
61
|
-
end
|
62
|
-
rescue Exception
|
63
|
-
FileUtils.rm filename if File.exists? filename
|
64
|
-
raise $!
|
65
|
-
ensure
|
66
|
-
file.close
|
67
|
-
FileUtils.rm tmpfile
|
68
|
-
end
|
69
|
-
|
70
|
-
nil
|
71
|
-
end
|
72
|
-
|
73
|
-
DbSNP.claim DbSNP.mutations, :proc do |filename|
|
74
|
-
ftp = Net::FTP.new('ftp.broadinstitute.org')
|
75
|
-
ftp.passive = true
|
76
|
-
ftp.login('gsapubftp-anonymous', 'devnull@nomail.org')
|
77
|
-
ftp.chdir('/bundle/2.3/hg19')
|
78
|
-
|
79
|
-
tmpfile = TmpFile.tmp_file + '.gz'
|
80
|
-
ftp.getbinaryfile('dbsnp_137.hg19.vcf.gz', tmpfile, 1024)
|
81
|
-
|
82
|
-
file = Open.open(tmpfile, :nocache => true)
|
83
|
-
begin
|
84
|
-
File.open(filename, 'w') do |f|
|
85
|
-
f.puts "#: :type=:flat#:namespace=Hsa/may2012"
|
86
|
-
f.puts "#" + ["RS ID", "Genomic Mutation"] * "\t"
|
87
|
-
while line = file.gets do
|
88
|
-
next if line[0] == "#"[0]
|
89
|
-
|
90
|
-
chr, position, id, ref, muts, qual, filter, info = line.split "\t"
|
91
|
-
|
92
|
-
chr.sub!('chr', '')
|
93
|
-
|
94
|
-
position, muts = Misc.correct_vcf_mutation(position.to_i, ref, muts)
|
95
|
-
|
96
|
-
mutations = muts.collect{|mut| [chr, position, mut] * ":" }
|
97
|
-
|
98
|
-
f.puts ([id] + mutations) * "\t"
|
99
|
-
end
|
100
|
-
end
|
101
|
-
rescue Exception
|
102
|
-
FileUtils.rm filename if File.exists? filename
|
103
|
-
raise $!
|
104
|
-
ensure
|
105
|
-
file.close
|
106
|
-
FileUtils.rm tmpfile
|
107
|
-
end
|
108
|
-
|
109
|
-
nil
|
110
|
-
end
|
111
|
-
|
112
|
-
DbSNP.claim DbSNP.mutations_hg18, :proc do |filename|
|
113
|
-
require 'rbbt/sources/organism'
|
114
|
-
|
115
|
-
mutations = CMD.cmd("grep -v '^#'|cut -f 2|sort -u", :in => DbSNP.mutations.open).read.split("\n").collect{|l| l.split("|")}.flatten
|
116
|
-
|
117
|
-
translations = Misc.process_to_hash(mutations){|mutations| Organism.liftOver(mutations, "Hsa/jun2011", "Hsa/may2009")}
|
118
|
-
begin
|
119
|
-
file = Open.open(DbSNP.mutations.find, :nocache => true)
|
120
|
-
File.open(filename, 'w') do |f|
|
121
|
-
f.puts "#: :type=:flat#:namespace=Hsa/may2009"
|
122
|
-
f.puts "#" + ["RS ID", "Genomic Mutation"] * "\t"
|
123
|
-
while line = file.gets do
|
124
|
-
next if line[0] == "#"[0]
|
125
|
-
parts = line.split("\t")
|
126
|
-
parts[1..-1] = parts[1..-1].collect{|p| translations[p]} * "|"
|
127
|
-
f.puts parts * "\t"
|
128
|
-
end
|
129
|
-
end
|
130
|
-
rescue Exception
|
131
|
-
FileUtils.rm filename if File.exists? filename
|
132
|
-
raise $!
|
133
|
-
ensure
|
134
|
-
file.close
|
135
|
-
end
|
136
|
-
|
137
|
-
nil
|
138
|
-
end
|
139
|
-
|
140
|
-
def self.rsid_index(organism, chromosome = nil)
|
141
|
-
build = Organism.hg_build(organism)
|
142
|
-
|
143
|
-
tag = [build, chromosome] * ":"
|
144
|
-
Persist.persist("StaticPosIndex for dbSNP [#{ tag }]", :fwt, :persist => true) do
|
145
|
-
value_size = 0
|
146
|
-
file = DbSNP[build == "hg19" ? "mutations" : "mutations_hg18"]
|
147
|
-
chr_positions = []
|
148
|
-
Open.read(CMD.cmd("grep '\t#{chromosome}:'", :in => file.open, :pipe => true)) do |line|
|
149
|
-
next if line[0] == "#"[0]
|
150
|
-
rsid, mutation = line.split("\t")
|
151
|
-
next if mutation.nil? or mutation.empty?
|
152
|
-
chr, pos = mutation.split(":")
|
153
|
-
next if chr != chromosome or pos.nil? or pos.empty?
|
154
|
-
chr_positions << [rsid, pos.to_i]
|
155
|
-
value_size = rsid.length if rsid.length > value_size
|
156
|
-
end
|
157
|
-
fwt = FixWidthTable.new :memory, value_size
|
158
|
-
fwt.add_point(chr_positions)
|
159
|
-
fwt
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
def self.mutation_index(organism)
|
164
|
-
build = Organism.hg_build(organism)
|
165
|
-
file = DbSNP[build == "hg19" ? "mutations" : "mutations_hg18"]
|
166
|
-
@mutation_index ||= {}
|
167
|
-
@mutation_index[build] ||= file.tsv :persist => true, :fields => ["Genomic Mutation"], :type => :single, :persist => true
|
168
|
-
end
|
169
|
-
|
170
|
-
end
|
171
|
-
|
172
|
-
if defined? Entity
|
173
|
-
if defined? Gene and Entity === Gene
|
174
|
-
module Gene
|
175
|
-
property :dbSNP_rsids => :single2array do
|
176
|
-
DbSNP.rsid_index(organism, chromosome)[self.chr_range]
|
177
|
-
end
|
178
|
-
|
179
|
-
property :dbSNP_mutations => :single2array do
|
180
|
-
GenomicMutation.setup(DbSNP.mutation_index(organism).values_at(*self.dbSNP_rsids).compact.flatten.uniq, "dbSNP mutations over #{self.name || self}", organism, true)
|
181
|
-
end
|
182
|
-
end
|
183
|
-
end
|
184
|
-
|
185
|
-
if defined? GenomicMutation and Entity === GenomicMutation
|
186
|
-
module GenomicMutation
|
187
|
-
property :dbSNP => :array2single do
|
188
|
-
dbSNP.mutations.tsv(:persist => true, :key_field => "Genomic Mutation", :fields => ["RS ID"], :type => :single).values_at *self
|
189
|
-
end
|
190
|
-
end
|
191
|
-
|
192
|
-
end
|
193
|
-
end
|
194
|
-
|
@@ -1,109 +0,0 @@
|
|
1
|
-
require 'rbbt'
|
2
|
-
require 'rbbt/util/open'
|
3
|
-
require 'rbbt/resource'
|
4
|
-
require 'rbbt/entity/gene'
|
5
|
-
|
6
|
-
module Genomes1000
|
7
|
-
extend Resource
|
8
|
-
self.subdir = "share/databases/genomes_1000"
|
9
|
-
|
10
|
-
RELEASE_URL = "ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20110521/ALL.wgs.phase1_release_v3.20101123.snps_indels_sv.sites.vcf.gz"
|
11
|
-
|
12
|
-
Genomes1000.claim Genomes1000.mutations, :proc do |filename|
|
13
|
-
|
14
|
-
begin
|
15
|
-
Open.write(filename) do |file|
|
16
|
-
file.puts "#: :type=:single#:namespace=Hsa"
|
17
|
-
file.puts "#Variant ID\tGenomic Mutation"
|
18
|
-
|
19
|
-
Open.read(RELEASE_URL) do |line|
|
20
|
-
next if line[0] == "#"[0]
|
21
|
-
|
22
|
-
chromosome, position, id, references, alternative, quality, filter, info = line.split("\t")
|
23
|
-
|
24
|
-
file.puts [id, [chromosome, position, alternative] * ":"] * "\t"
|
25
|
-
end
|
26
|
-
end
|
27
|
-
rescue
|
28
|
-
FileUtils.rm filename if File.exists? filename
|
29
|
-
raise $!
|
30
|
-
end
|
31
|
-
nil
|
32
|
-
end
|
33
|
-
|
34
|
-
|
35
|
-
Genomes1000.claim Genomes1000.mutations_hg18, :proc do
|
36
|
-
require 'rbbt/sources/organism'
|
37
|
-
|
38
|
-
hg19_tsv = Genomes1000.mutations.tsv :unnamed => true
|
39
|
-
|
40
|
-
mutations = hg19_tsv.values
|
41
|
-
|
42
|
-
translations = Misc.process_to_hash(mutations){|mutations| Organism.liftOver(mutations, "Hsa/jun2011", "Hsa/may2009")}
|
43
|
-
|
44
|
-
tsv = hg19_tsv.process "Genomic Mutation" do |mutation|
|
45
|
-
translations[mutation]
|
46
|
-
end
|
47
|
-
|
48
|
-
tsv.namespace = "Hsa/may2009"
|
49
|
-
|
50
|
-
tsv.to_s
|
51
|
-
end
|
52
|
-
|
53
|
-
def self.rsid_index(organism, chromosome = nil)
|
54
|
-
build = Organism.hg_build(organism)
|
55
|
-
|
56
|
-
tag = [build, chromosome] * ":"
|
57
|
-
Persist.persist("StaticPosIndex for Genomes1000 [#{ tag }]", :fwt, :persist => true) do
|
58
|
-
value_size = 0
|
59
|
-
file = Genomes1000[build == "hg19" ? "mutations" : "mutations_hg18"]
|
60
|
-
chr_positions = []
|
61
|
-
Open.read(CMD.cmd("grep '\t#{chromosome}:'", :in => file.open, :pipe => true)) do |line|
|
62
|
-
next if line[0] == "#"[0]
|
63
|
-
rsid, mutation = line.split("\t")
|
64
|
-
next if mutation.nil? or mutation.empty?
|
65
|
-
chr, pos = mutation.split(":")
|
66
|
-
next if chr != chromosome or pos.nil? or pos.empty?
|
67
|
-
chr_positions << [rsid, pos.to_i]
|
68
|
-
value_size = rsid.length if rsid.length > value_size
|
69
|
-
end
|
70
|
-
fwt = FixWidthTable.new :memory, value_size
|
71
|
-
fwt.add_point(chr_positions)
|
72
|
-
fwt
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
def self.mutation_index(organism)
|
77
|
-
build = Organism.hg_build(organism)
|
78
|
-
file = Genomes1000[build == "hg19" ? "mutations" : "mutations_hg18"]
|
79
|
-
@mutation_index ||= {}
|
80
|
-
@mutation_index[build] ||= file.tsv :persist => true, :fields => ["Genomic Mutation"], :type => :single, :persist => true
|
81
|
-
end
|
82
|
-
|
83
|
-
|
84
|
-
end
|
85
|
-
|
86
|
-
|
87
|
-
if defined? Entity
|
88
|
-
if defined? Gene and Entity === Gene
|
89
|
-
module Gene
|
90
|
-
property :genomes_1000_rsids => :single2array do
|
91
|
-
Genomes1000.rsid_index(organism, chromosome)[self.chr_range]
|
92
|
-
end
|
93
|
-
|
94
|
-
property :genomes_1000_mutations => :single2array do
|
95
|
-
GenomicMutation.setup(Genomes1000.mutation_index(organism).values_at(*self.genomes_1000_rsids).uniq, "1000 Genomes mutations over #{self.name || self}", organism, true)
|
96
|
-
end
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
if defined? GenomicMutation and Entity === GenomicMutation
|
101
|
-
module GenomicMutation
|
102
|
-
property :genomes_1000 => :array2single do
|
103
|
-
Genomes1000.mutations.tsv(:persist => true, :key_field => "Genomic Mutation", :fields => ["Variant ID"], :type => :single).values_at *self
|
104
|
-
end
|
105
|
-
end
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
|