rbbt-sources 2.1.5 → 2.1.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -13
- data/lib/rbbt/sources/entrez.rb +28 -68
- data/lib/rbbt/sources/organism.rb +6 -0
- data/lib/rbbt/sources/uniprot.rb +75 -15
- data/test/rbbt/sources/test_entrez.rb +2 -11
- data/test/rbbt/sources/test_organism.rb +12 -12
- metadata +21 -24
- data/lib/rbbt/sources/COSMIC.rb +0 -153
- data/lib/rbbt/sources/dbSNP.rb +0 -194
- data/lib/rbbt/sources/genomes1000.rb +0 -109
checksums.yaml
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
5
|
-
data.tar.gz: !binary |-
|
6
|
-
YWJlYTE4Y2M2YWM0ZjIxYTAxZTE4ZjExZmExNjQwYTJjNTg3NGVmZg==
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: daf367338fb6e78d2cb7b76440e67712d27f34ab
|
4
|
+
data.tar.gz: 5b7a7308779ec4441fa2eb997d6f9b7f0dd37e3a
|
7
5
|
SHA512:
|
8
|
-
metadata.gz:
|
9
|
-
|
10
|
-
ZTM0MjA5ZWFmZjlkMzAzMmRjYTBhOGVhMjM4Y2JhMmM2OThjMjQ1MDRkY2Vi
|
11
|
-
YTFiOWYyNmEwMGZmMzg5MDFiNjQwMWNlNDVhODEwM2VjNTg0MTc=
|
12
|
-
data.tar.gz: !binary |-
|
13
|
-
ZTY3ZGFjM2E3ZmY0OThmZjZiNzI2OTAwNWNmZWZlYmI5ODRkMTEyY2IzODNm
|
14
|
-
YmZkNjY3NTI2MjQzNjMzMTc4YjgzYjVkM2IwZjc0OTA0NWM0YzM1ZDUzMjU5
|
15
|
-
ZTliYTNjZWY4YWMwMjUxMDFkMTRiMGRmNWRkNWQyNjBjYjgwYzE=
|
6
|
+
metadata.gz: bb568b0d788284e82d0ac0d9cdbd14db7c0e59b4977ddce57e2701f25ca18bbef93d43424179a188f73daaacc87963d039a17aaf0916872945f2d384e6441552
|
7
|
+
data.tar.gz: b24f422176f10f518f692a7878c2389df0276df27a074feb5918bae1993f860fae4558d330f95de66a7857da712b5c811e487c0b117f553106215d1065f856af
|
data/lib/rbbt/sources/entrez.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
|
-
require 'rbbt'
|
1
|
+
require 'rbbt-util'
|
2
2
|
require 'rbbt/tsv'
|
3
3
|
require 'rbbt/resource'
|
4
|
+
require 'rbbt/util/filecache'
|
4
5
|
require 'rbbt/bow/bow'
|
5
6
|
require 'set'
|
6
7
|
|
@@ -70,85 +71,44 @@ module Entrez
|
|
70
71
|
|
71
72
|
private
|
72
73
|
|
73
|
-
def self.get_online(geneids)
|
74
74
|
|
75
|
-
|
75
|
+
def self.get_gene(geneids)
|
76
|
+
_array = Array === geneids
|
76
77
|
|
77
|
-
|
78
|
-
|
79
|
-
begin
|
80
|
-
Misc.try3times do
|
81
|
-
url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&retmode=xml&id=#{geneids_list * ","}"
|
78
|
+
geneids = [geneids] unless Array === geneids
|
79
|
+
geneids = geneids.compact.collect{|id| id}
|
82
80
|
|
83
|
-
|
81
|
+
result_files = FileCache.cache_online_elements(geneids, 'gene-{ID}.xml') do |ids|
|
82
|
+
result = {}
|
83
|
+
values = []
|
84
|
+
Misc.divide(ids, (ids.length / 100) + 1).each do |list|
|
85
|
+
begin
|
86
|
+
Misc.try3times do
|
87
|
+
url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&retmode=xml&id=#{list * ","}"
|
84
88
|
|
85
|
-
|
89
|
+
xml = Open.read(url, :wget_options => {:quiet => true}, :nocache => true)
|
90
|
+
|
91
|
+
values += xml.scan(/(<Entrezgene>.*?<\/Entrezgene>)/sm).flatten
|
92
|
+
end
|
93
|
+
rescue
|
94
|
+
Log.error $!.message
|
86
95
|
end
|
87
|
-
rescue
|
88
|
-
puts $!.message
|
89
|
-
genes += geneids_list.collect{|g| nil}
|
90
96
|
end
|
91
|
-
end
|
92
97
|
|
93
|
-
|
94
|
-
list = Hash[*genes_complete.zip([nil]).flatten]
|
95
|
-
genes.each{|gene|
|
98
|
+
values.each do |xml|
|
96
99
|
geneid = gene.match(/<Gene-track_geneid>(\d+)/)[1]
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
return list
|
101
|
-
else
|
102
|
-
return genes.first
|
100
|
+
|
101
|
+
result[geneid] = xml
|
102
|
+
end
|
103
103
|
end
|
104
|
-
end
|
105
104
|
|
106
|
-
|
105
|
+
genes = {}
|
106
|
+
geneids.each{|id| genes[id] = Gene.new(Open.read(result_files[id])) }
|
107
107
|
|
108
|
-
|
109
|
-
|
110
|
-
end
|
111
|
-
|
112
|
-
def self.get_gene(geneid)
|
113
|
-
return nil if geneid.nil?
|
114
|
-
|
115
|
-
if Array === geneid
|
116
|
-
missing = []
|
117
|
-
list = {}
|
118
|
-
|
119
|
-
geneid.each{|p|
|
120
|
-
next if p.nil?
|
121
|
-
if FileCache.found(gene_filename p)
|
122
|
-
list[p] = Gene.new(Open.read(FileCache.path(gene_filename p)))
|
123
|
-
else
|
124
|
-
missing << p
|
125
|
-
end
|
126
|
-
}
|
127
|
-
|
128
|
-
|
129
|
-
return list unless missing.any?
|
130
|
-
genes = get_online(missing)
|
131
|
-
|
132
|
-
genes.each{|p, xml|
|
133
|
-
filename = gene_filename p
|
134
|
-
FileCache.add(filename,xml) unless FileCache.found(filename)
|
135
|
-
list[p] = Gene.new(xml)
|
136
|
-
}
|
137
|
-
|
138
|
-
return list
|
108
|
+
if _array
|
109
|
+
genes
|
139
110
|
else
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
if FileCache.found(filename)
|
144
|
-
return Gene.new(Open.read(FileCache.path(filename)))
|
145
|
-
else
|
146
|
-
xml = get_online(geneid)
|
147
|
-
|
148
|
-
FileCache.add(filename, xml) unless FileCache.found(filename)
|
149
|
-
|
150
|
-
return Gene.new(xml)
|
151
|
-
end
|
111
|
+
genes.values.first
|
152
112
|
end
|
153
113
|
end
|
154
114
|
|
@@ -144,6 +144,12 @@ module Organism
|
|
144
144
|
}.first
|
145
145
|
end
|
146
146
|
|
147
|
+
def self.organism_code(name)
|
148
|
+
organisms.select{|organism|
|
149
|
+
organism == name or Organism.scientific_name(organism) =~ /#{ name }/i
|
150
|
+
}.first
|
151
|
+
end
|
152
|
+
|
147
153
|
def self.known_ids(name)
|
148
154
|
TSV::Parser.new(Organism.identifiers(name).open).all_fields
|
149
155
|
end
|
data/lib/rbbt/sources/uniprot.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
require 'rbbt'
|
1
|
+
require 'rbbt-util'
|
2
2
|
require 'rbbt/util/open'
|
3
|
+
require 'rbbt/util/filecache'
|
3
4
|
require 'rbbt/resource'
|
4
5
|
require 'rbbt/sources/cath'
|
5
6
|
require 'rbbt/sources/uniprot'
|
@@ -32,12 +33,78 @@ module UniProt
|
|
32
33
|
tsv.to_s
|
33
34
|
end
|
34
35
|
|
35
|
-
|
36
36
|
UNIPROT_TEXT="http://www.uniprot.org/uniprot/[PROTEIN].txt"
|
37
37
|
UNIPROT_FASTA="http://www.uniprot.org/uniprot/[PROTEIN].fasta"
|
38
|
+
|
39
|
+
def self.get_uniprot_entry(uniprotids)
|
40
|
+
_array = Array === uniprotids
|
41
|
+
|
42
|
+
uniprotids = [uniprotids] unless Array === uniprotids
|
43
|
+
uniprotids = uniprotids.compact.collect{|id| id}
|
44
|
+
|
45
|
+
result_files = FileCache.cache_online_elements(uniprotids, 'uniprot-{ID}.xml') do |ids|
|
46
|
+
result = {}
|
47
|
+
ids.each do |id|
|
48
|
+
begin
|
49
|
+
Misc.try3times do
|
50
|
+
|
51
|
+
content = Open.read(UNIPROT_TEXT.sub("[PROTEIN]", id), :wget_options => {:quiet => true}, :nocache => true)
|
52
|
+
|
53
|
+
result[id] = content
|
54
|
+
end
|
55
|
+
rescue
|
56
|
+
Log.error $!.message
|
57
|
+
end
|
58
|
+
end
|
59
|
+
result
|
60
|
+
end
|
61
|
+
|
62
|
+
uniprots = {}
|
63
|
+
uniprotids.each{|id| uniprots[id] = Open.read(result_files[id]) }
|
64
|
+
|
65
|
+
if _array
|
66
|
+
uniprots
|
67
|
+
else
|
68
|
+
uniprots.values.first
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.get_uniprot_sequence(uniprotids)
|
73
|
+
_array = Array === uniprotids
|
74
|
+
|
75
|
+
uniprotids = [uniprotids] unless Array === uniprotids
|
76
|
+
uniprotids = uniprotids.compact.collect{|id| id}
|
77
|
+
|
78
|
+
result_files = FileCache.cache_online_elements(uniprotids, 'uniprot-sequence-{ID}') do |ids|
|
79
|
+
result = {}
|
80
|
+
ids.each do |id|
|
81
|
+
begin
|
82
|
+
Misc.try3times do
|
83
|
+
|
84
|
+
url = UNIPROT_FASTA.sub "[PROTEIN]", id
|
85
|
+
text = Open.read(url, :nocache => true)
|
86
|
+
|
87
|
+
result[id] = text.split(/\n/).select{|line| line !~ /^>/} * ""
|
88
|
+
end
|
89
|
+
rescue
|
90
|
+
Log.error $!.message
|
91
|
+
end
|
92
|
+
end
|
93
|
+
result
|
94
|
+
end
|
95
|
+
|
96
|
+
uniprots = {}
|
97
|
+
uniprotids.each{|id| uniprots[id] = Open.read(result_files[id]) }
|
98
|
+
|
99
|
+
if _array
|
100
|
+
uniprots
|
101
|
+
else
|
102
|
+
uniprots.values.first
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
38
106
|
def self.pdbs(protein)
|
39
|
-
|
40
|
-
text = Open.read(url)
|
107
|
+
text = get_uniprot_entry(protein)
|
41
108
|
|
42
109
|
pdb = {}
|
43
110
|
|
@@ -59,15 +126,11 @@ module UniProt
|
|
59
126
|
end
|
60
127
|
|
61
128
|
def self.sequence(protein)
|
62
|
-
|
63
|
-
text = Open.read(url)
|
64
|
-
|
65
|
-
text.split(/\n/).select{|line| line !~ /^>/} * ""
|
129
|
+
get_uniprot_sequence(protein)
|
66
130
|
end
|
67
131
|
|
68
132
|
def self.features(protein)
|
69
|
-
|
70
|
-
text = Open.read(url)
|
133
|
+
text = get_uniprot_entry(protein)
|
71
134
|
|
72
135
|
text = text.split(/\n/).select{|line| line =~ /^FT/} * "\n"
|
73
136
|
|
@@ -78,7 +141,6 @@ module UniProt
|
|
78
141
|
|
79
142
|
type = nil
|
80
143
|
parts.each do |part|
|
81
|
-
parts
|
82
144
|
if part[0..1] == "FT"
|
83
145
|
type = part.gsub(/FT\s+/,'')
|
84
146
|
next
|
@@ -111,8 +173,7 @@ module UniProt
|
|
111
173
|
|
112
174
|
|
113
175
|
def self.variants(protein)
|
114
|
-
|
115
|
-
text = Open.read(url)
|
176
|
+
text = get_uniprot_entry(protein)
|
116
177
|
|
117
178
|
text = text.split(/\n/).select{|line| line =~ /^FT/} * "\n"
|
118
179
|
|
@@ -157,8 +218,7 @@ module UniProt
|
|
157
218
|
end
|
158
219
|
|
159
220
|
def self.cath(protein)
|
160
|
-
|
161
|
-
text = Open.read(url)
|
221
|
+
text = get_uniprot_entry(protein)
|
162
222
|
|
163
223
|
cath = {}
|
164
224
|
text.split(/\n/).each{|l|
|
@@ -21,21 +21,12 @@ class TestEntrez < Test::Unit::TestCase
|
|
21
21
|
assert(data['850320'].include? '1574125')
|
22
22
|
end
|
23
23
|
|
24
|
-
def test_getonline
|
25
|
-
geneids = 9129
|
26
|
-
|
27
|
-
assert_match(/PRP3 pre-mRNA processing factor/s, Entrez.get_online(geneids))
|
28
|
-
|
29
|
-
geneids = [9129,9]
|
30
|
-
assert_match(/PRP3 pre-mRNA processing factor/s, Entrez.get_online(geneids)[9129])
|
31
|
-
end
|
32
|
-
|
33
24
|
def test_getgene
|
34
25
|
geneids = 9129
|
35
|
-
assert_equal([["
|
26
|
+
assert_equal([["pre-mRNA processing factor 3"]], Entrez.get_gene(geneids).description)
|
36
27
|
|
37
28
|
geneids = [9129, 728049]
|
38
|
-
assert_equal([["
|
29
|
+
assert_equal([["pre-mRNA processing factor 3"]], Entrez.get_gene(geneids)[9129].description)
|
39
30
|
end
|
40
31
|
|
41
32
|
def test_similarity
|
@@ -5,37 +5,37 @@ require 'rbbt/sources/ensembl_ftp'
|
|
5
5
|
|
6
6
|
class TestOrganism < Test::Unit::TestCase
|
7
7
|
|
8
|
-
def
|
8
|
+
def _test_known_ids
|
9
9
|
assert Organism.known_ids("Hsa").include?("Associated Gene Name")
|
10
10
|
end
|
11
11
|
|
12
|
-
def
|
12
|
+
def _test_location
|
13
13
|
assert_equal "share/organisms/Sce/identifiers", Organism.identifiers('Sce')
|
14
14
|
end
|
15
15
|
|
16
|
-
def
|
16
|
+
def _test_identifiers
|
17
17
|
assert Organism.identifiers('Hsa').tsv(:key_field => "Entrez Gene ID", :persist => true)['1020']["Associated Gene Name"].include?('CDK5')
|
18
18
|
assert Organism.identifiers('Sce').tsv(:persist => true)['S000006120']["Ensembl Gene ID"].include?('YPL199C')
|
19
19
|
assert Organism.identifiers("Sce").tsv(:persist => true)['S000006120']["Ensembl Gene ID"].include?('YPL199C')
|
20
20
|
end
|
21
21
|
|
22
|
-
def
|
22
|
+
def _test_lexicon
|
23
23
|
assert TSV.open(Organism.lexicon('Sce'))['S000006120'].flatten.include?('YPL199C')
|
24
24
|
end
|
25
25
|
|
26
|
-
def
|
26
|
+
def _test_guess_id
|
27
27
|
ensembl = %w(YOL044W YDR289C YAL034C YGR246C ARS519 tH(GUG)E2 YDR218C YLR002C YGL224C)
|
28
28
|
gene_name = %w(SNR64 MIP1 MRPS18 TFB2 JEN1 IVY1 TRS33 GAS3)
|
29
29
|
assert_equal "Associated Gene Name", Organism.guess_id("Sce", gene_name).first
|
30
30
|
assert_equal "Ensembl Gene ID", Organism.guess_id("Sce", ensembl).first
|
31
31
|
end
|
32
32
|
|
33
|
-
def
|
33
|
+
def _test_organisms
|
34
34
|
assert Organism.organisms.include? "Hsa"
|
35
35
|
assert_equal "Hsa", Organism.organism("Homo sapiens")
|
36
36
|
end
|
37
37
|
|
38
|
-
def
|
38
|
+
def _test_attach_translations
|
39
39
|
tsv = TSV.setup({"1020" => []}, :type => :list)
|
40
40
|
tsv.key_field = "Entrez Gene ID"
|
41
41
|
tsv.fields = []
|
@@ -47,7 +47,7 @@ class TestOrganism < Test::Unit::TestCase
|
|
47
47
|
assert_equal "CDK5", tsv["1020"]["Associated Gene Name"]
|
48
48
|
end
|
49
49
|
|
50
|
-
def
|
50
|
+
def _test_entrez_taxids
|
51
51
|
assert_equal "Hsa", Organism.entrez_taxid_organism('9606')
|
52
52
|
end
|
53
53
|
|
@@ -61,22 +61,22 @@ class TestOrganism < Test::Unit::TestCase
|
|
61
61
|
assert_equal mutation_19, Organism.liftOver([mutation_18], target_build, source_build).first
|
62
62
|
end
|
63
63
|
|
64
|
-
def
|
64
|
+
def _test_orhtolog
|
65
65
|
require 'rbbt/entity/gene'
|
66
66
|
assert_equal ["ENSG00000133703"], Gene.setup("Kras", "Associated Gene Name", "Mmu/jun2011").ensembl.ortholog("Hsa/jun2011")
|
67
67
|
end
|
68
68
|
|
69
|
-
#def
|
69
|
+
#def _test_genes_at_chromosome
|
70
70
|
# pos = [12, 117799500]
|
71
71
|
# assert_equal "ENSG00000089250", Organism::Hsa.genes_at_chromosome_positions(pos.first, pos.last)
|
72
72
|
#end
|
73
73
|
|
74
|
-
#def
|
74
|
+
#def _test_genes_at_chromosome_array
|
75
75
|
# pos = [12, [117799500, 106903900]]
|
76
76
|
# assert_equal ["ENSG00000089250", "ENSG00000013503"], Organism::Hsa.genes_at_chromosome_positions(pos.first, pos.last)
|
77
77
|
#end
|
78
78
|
|
79
|
-
#def
|
79
|
+
#def _test_genes_at_genomic_positions
|
80
80
|
# pos = [[12, 117799500], [12, 106903900], [1, 115259500]]
|
81
81
|
# assert_equal ["ENSG00000089250", "ENSG00000013503", "ENSG00000213281"], Organism::Hsa.genes_at_genomic_positions(pos)
|
82
82
|
#end
|
metadata
CHANGED
@@ -1,83 +1,83 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-02-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: 4.0.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 4.0.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rbbt-text
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: mechanize
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: libxml-ruby
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: bio
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- -
|
73
|
+
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '0'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
description: Data sources like PubMed, Entrez Gene, or Gene Ontology
|
@@ -88,7 +88,6 @@ extra_rdoc_files: []
|
|
88
88
|
files:
|
89
89
|
- etc/allowed_biomart_archives
|
90
90
|
- etc/biomart/missing_in_archive
|
91
|
-
- lib/rbbt/sources/COSMIC.rb
|
92
91
|
- lib/rbbt/sources/COSTART.rb
|
93
92
|
- lib/rbbt/sources/CTCAE.rb
|
94
93
|
- lib/rbbt/sources/HPRD.rb
|
@@ -100,11 +99,9 @@ files:
|
|
100
99
|
- lib/rbbt/sources/bibtex.rb
|
101
100
|
- lib/rbbt/sources/biomart.rb
|
102
101
|
- lib/rbbt/sources/cath.rb
|
103
|
-
- lib/rbbt/sources/dbSNP.rb
|
104
102
|
- lib/rbbt/sources/ensembl.rb
|
105
103
|
- lib/rbbt/sources/ensembl_ftp.rb
|
106
104
|
- lib/rbbt/sources/entrez.rb
|
107
|
-
- lib/rbbt/sources/genomes1000.rb
|
108
105
|
- lib/rbbt/sources/go.rb
|
109
106
|
- lib/rbbt/sources/gscholar.rb
|
110
107
|
- lib/rbbt/sources/jochem.rb
|
@@ -143,25 +140,25 @@ require_paths:
|
|
143
140
|
- lib
|
144
141
|
required_ruby_version: !ruby/object:Gem::Requirement
|
145
142
|
requirements:
|
146
|
-
- -
|
143
|
+
- - ">="
|
147
144
|
- !ruby/object:Gem::Version
|
148
145
|
version: '0'
|
149
146
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
150
147
|
requirements:
|
151
|
-
- -
|
148
|
+
- - ">="
|
152
149
|
- !ruby/object:Gem::Version
|
153
150
|
version: '0'
|
154
151
|
requirements: []
|
155
152
|
rubyforge_project:
|
156
|
-
rubygems_version: 2.2.
|
153
|
+
rubygems_version: 2.2.1
|
157
154
|
signing_key:
|
158
155
|
specification_version: 4
|
159
156
|
summary: Data sources for the Ruby Bioinformatics Toolkit (rbbt)
|
160
157
|
test_files:
|
161
|
-
- test/rbbt/sources/
|
162
|
-
- test/rbbt/sources/test_entrez.rb
|
158
|
+
- test/rbbt/sources/test_pubmed.rb
|
163
159
|
- test/rbbt/sources/test_biomart.rb
|
164
160
|
- test/rbbt/sources/test_gscholar.rb
|
161
|
+
- test/rbbt/sources/test_entrez.rb
|
162
|
+
- test/rbbt/sources/test_go.rb
|
165
163
|
- test/rbbt/sources/test_organism.rb
|
166
|
-
- test/rbbt/sources/test_pubmed.rb
|
167
164
|
- test/test_helper.rb
|
data/lib/rbbt/sources/COSMIC.rb
DELETED
@@ -1,153 +0,0 @@
|
|
1
|
-
require 'rbbt'
|
2
|
-
require 'rbbt/resource'
|
3
|
-
|
4
|
-
module COSMIC
|
5
|
-
extend Resource
|
6
|
-
self.subdir = "share/databases/COSMIC"
|
7
|
-
|
8
|
-
COSMIC.claim COSMIC.mutations, :proc do
|
9
|
-
url = "ftp://ftp.sanger.ac.uk/pub/CGP/cosmic/data_export/CosmicCompleteExport_v67_241013.tsv.gz"
|
10
|
-
|
11
|
-
stream = CMD.cmd('awk \'BEGIN{FS="\t"} { if ($12 != "" && $12 != "Mutation ID") { sub($12, "COSM" $12 ":" $4)}; print}\'', :in => Open.open(url), :pipe => true)
|
12
|
-
tsv = TSV.open(stream, :type => :list, :header_hash => "", :key_field => "Mutation ID", :namespace => "Hsa/jun2011")
|
13
|
-
tsv.fields = tsv.fields.collect{|f| f == "Gene name" ? "Associated Gene Name" : f}
|
14
|
-
tsv.add_field "Genomic Mutation" do |mid, values|
|
15
|
-
position = values["Mutation GRCh37 genome position"]
|
16
|
-
cds = values["Mutation CDS"]
|
17
|
-
|
18
|
-
if position.nil? or position.empty?
|
19
|
-
nil
|
20
|
-
else
|
21
|
-
position = position.split("-").first
|
22
|
-
|
23
|
-
chr, pos = position.split(":")
|
24
|
-
chr = "X" if chr == "23"
|
25
|
-
chr = "Y" if chr == "24"
|
26
|
-
chr = "M" if chr == "25"
|
27
|
-
position = [chr, pos ] * ":"
|
28
|
-
|
29
|
-
if cds.nil?
|
30
|
-
position
|
31
|
-
else
|
32
|
-
change = case
|
33
|
-
when cds =~ />/
|
34
|
-
cds.split(">").last
|
35
|
-
when cds =~ /del/
|
36
|
-
deletion = cds.split("del").last
|
37
|
-
case
|
38
|
-
when deletion =~ /^\d+$/
|
39
|
-
"-" * deletion.to_i
|
40
|
-
when deletion =~ /^[ACTG]+$/i
|
41
|
-
"-" * deletion.length
|
42
|
-
else
|
43
|
-
Log.debug "Unknown deletion: #{ deletion }"
|
44
|
-
deletion
|
45
|
-
end
|
46
|
-
when cds =~ /ins/
|
47
|
-
insertion = cds.split("ins").last
|
48
|
-
case
|
49
|
-
when insertion =~ /^\d+$/
|
50
|
-
"+" + "N" * insertion.to_i
|
51
|
-
when insertion =~ /^[NACTG]+$/i
|
52
|
-
"+" + insertion
|
53
|
-
else
|
54
|
-
Log.debug "Unknown insertion: #{insertion }"
|
55
|
-
insertion
|
56
|
-
end
|
57
|
-
else
|
58
|
-
Log.debug "Unknown change: #{cds}"
|
59
|
-
"?(" << cds << ")"
|
60
|
-
end
|
61
|
-
position + ":" + change
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
tsv.to_s.gsub(/(\d)-(\d)/,'\1:\2')
|
67
|
-
end
|
68
|
-
|
69
|
-
COSMIC.claim COSMIC.mutations_hg18, :proc do |filename|
|
70
|
-
require 'rbbt/sources/organism'
|
71
|
-
file = COSMIC.mutations.open
|
72
|
-
begin
|
73
|
-
|
74
|
-
while (line = file.gets) !~ /Genomic Mutation/; end
|
75
|
-
fields = line[1..-2].split("\t")
|
76
|
-
mutation_pos = fields.index "Genomic Mutation"
|
77
|
-
|
78
|
-
mutations = CMD.cmd("grep -v '^#'|cut -f #{mutation_pos + 1}|sort -u", :in => COSMIC.mutations.open).read.split("\n").select{|m| m.include? ":" }
|
79
|
-
|
80
|
-
translations = Misc.process_to_hash(mutations){|mutations| Organism.liftOver(mutations, "Hsa/jun2011", "Hsa/may2009")}
|
81
|
-
|
82
|
-
File.open(filename, 'w') do |f|
|
83
|
-
f.puts "#: :type=:list#:namespace=Hsa/may2009"
|
84
|
-
f.puts "#" + fields * "\t"
|
85
|
-
while line = file.gets do
|
86
|
-
next if line[0] == "#"[0]
|
87
|
-
line.strip!
|
88
|
-
parts = line.split("\t")
|
89
|
-
parts[mutation_pos] = translations[parts[mutation_pos]]
|
90
|
-
f.puts parts * "\t"
|
91
|
-
end
|
92
|
-
end
|
93
|
-
rescue Exception
|
94
|
-
FileUtils.rm filename if File.exists? filename
|
95
|
-
raise $!
|
96
|
-
ensure
|
97
|
-
file.close
|
98
|
-
end
|
99
|
-
|
100
|
-
nil
|
101
|
-
end
|
102
|
-
|
103
|
-
|
104
|
-
def self.rsid_index(organism, chromosome = nil)
|
105
|
-
build = Organism.hg_build(organism)
|
106
|
-
|
107
|
-
tag = [build, chromosome] * ":"
|
108
|
-
fwt = nil
|
109
|
-
Persist.persist("StaticPosIndex for COSMIC [#{ tag }]", :fwt, :persist => true) do
|
110
|
-
value_size = 0
|
111
|
-
file = COSMIC[build == "hg19" ? "mutations" : "mutations_hg18"]
|
112
|
-
chr_positions = []
|
113
|
-
begin
|
114
|
-
Open.read(CMD.cmd("grep '\t#{chromosome}:'", :in => file.open, :pipe => true)) do |line|
|
115
|
-
next if line[0] == "#"[0]
|
116
|
-
rsid, mutation = line.split("\t").values_at 0, 25
|
117
|
-
next if mutation.nil? or mutation.empty?
|
118
|
-
chr, pos = mutation.split(":")
|
119
|
-
next if chr != chromosome or pos.nil? or pos.empty?
|
120
|
-
chr_positions << [rsid, pos.to_i]
|
121
|
-
value_size = rsid.length if rsid.length > value_size
|
122
|
-
end
|
123
|
-
rescue
|
124
|
-
end
|
125
|
-
fwt = FixWidthTable.new :memory, value_size
|
126
|
-
fwt.add_point(chr_positions)
|
127
|
-
fwt
|
128
|
-
end
|
129
|
-
end
|
130
|
-
|
131
|
-
def self.mutation_index(organism)
|
132
|
-
build = Organism.hg_build(organism)
|
133
|
-
file = COSMIC[build == "hg19" ? "mutations" : "mutations_hg18"]
|
134
|
-
@mutation_index ||= {}
|
135
|
-
@mutation_index[build] ||= file.tsv :persist => true, :fields => ["Genomic Mutation"], :type => :single, :persist => true
|
136
|
-
end
|
137
|
-
|
138
|
-
|
139
|
-
end
|
140
|
-
|
141
|
-
if defined? Entity
|
142
|
-
if defined? Gene and Entity === Gene
|
143
|
-
module Gene
|
144
|
-
property :COSMIC_rsids => :single2array do
|
145
|
-
COSMIC.rsid_index(organism, chromosome)[self.chr_range]
|
146
|
-
end
|
147
|
-
|
148
|
-
property :COSMIC_mutations => :single2array do
|
149
|
-
GenomicMutation.setup(COSMIC.mutation_index(organism).values_at(*self.COSMIC_rsids).uniq, "COSMIC mutations over #{self.name || self}", organism, false)
|
150
|
-
end
|
151
|
-
end
|
152
|
-
end
|
153
|
-
end
|
data/lib/rbbt/sources/dbSNP.rb
DELETED
@@ -1,194 +0,0 @@
|
|
1
|
-
require 'rbbt'
|
2
|
-
require 'rbbt/util/open'
|
3
|
-
require 'rbbt/resource'
|
4
|
-
require 'net/ftp'
|
5
|
-
|
6
|
-
module DbSNP
|
7
|
-
extend Resource
|
8
|
-
self.subdir = "share/databases/dbSNP"
|
9
|
-
|
10
|
-
URL = "ftp://ftp.ncbi.nlm.nih.gov/snp/organisms/human_9606/VCF/common_all.vcf.gz"
|
11
|
-
|
12
|
-
DbSNP.claim DbSNP.mutations_ncbi, :proc do
|
13
|
-
tsv = TSV.setup({}, :key_field => "RS ID", :fields => ["Genomic Mutation"], :type => :flat)
|
14
|
-
file = Open.open(URL, :nocache => true)
|
15
|
-
while line = file.gets do
|
16
|
-
next if line[0] == "#"[0]
|
17
|
-
chr, position, id, ref, alt = line.split "\t"
|
18
|
-
|
19
|
-
mutations = alt.split(",").collect do |a|
|
20
|
-
if alt[0] == ref[0]
|
21
|
-
alt[0] = '+'[0]
|
22
|
-
end
|
23
|
-
[chr, position, alt] * ":"
|
24
|
-
end
|
25
|
-
|
26
|
-
tsv.namespace = "Hsa/may2012"
|
27
|
-
tsv[id] = mutations
|
28
|
-
end
|
29
|
-
|
30
|
-
tsv.to_s
|
31
|
-
end
|
32
|
-
|
33
|
-
DbSNP.claim DbSNP.rsids, :proc do |filename|
|
34
|
-
ftp = Net::FTP.new('ftp.broadinstitute.org')
|
35
|
-
ftp.passive = true
|
36
|
-
ftp.login('gsapubftp-anonymous', 'devnull@nomail.org')
|
37
|
-
ftp.chdir('/bundle/2.3/hg19')
|
38
|
-
|
39
|
-
tmpfile = TmpFile.tmp_file + '.gz'
|
40
|
-
ftp.getbinaryfile('dbsnp_137.hg19.vcf.gz', tmpfile, 1024)
|
41
|
-
|
42
|
-
file = Open.open(tmpfile, :nocache => true)
|
43
|
-
begin
|
44
|
-
File.open(filename, 'w') do |f|
|
45
|
-
f.puts "#: :type=:list#:namespace=Hsa/may2012"
|
46
|
-
f.puts "#" + ["RS ID", "GMAF", "G5", "G5A", "dbSNP Build ID"] * "\t"
|
47
|
-
while line = file.gets do
|
48
|
-
next if line[0] == "#"[0]
|
49
|
-
|
50
|
-
chr, position, id, ref, muts, qual, filter, info = line.split "\t"
|
51
|
-
|
52
|
-
g5 = g5a = dbsnp_build_id = gmaf = nil
|
53
|
-
|
54
|
-
gmaf = $1 if info =~ /GMAF=([0-9.]+)/
|
55
|
-
g5 = true if info =~ /\bG5\b/
|
56
|
-
g5a = true if info =~ /\bG5A\b/
|
57
|
-
dbsnp_build_id = $1 if info =~ /dbSNPBuildID=(\d+)/
|
58
|
-
|
59
|
-
f.puts [id, gmaf, g5, g5a, dbsnp_build_id] * "\t"
|
60
|
-
end
|
61
|
-
end
|
62
|
-
rescue Exception
|
63
|
-
FileUtils.rm filename if File.exists? filename
|
64
|
-
raise $!
|
65
|
-
ensure
|
66
|
-
file.close
|
67
|
-
FileUtils.rm tmpfile
|
68
|
-
end
|
69
|
-
|
70
|
-
nil
|
71
|
-
end
|
72
|
-
|
73
|
-
DbSNP.claim DbSNP.mutations, :proc do |filename|
|
74
|
-
ftp = Net::FTP.new('ftp.broadinstitute.org')
|
75
|
-
ftp.passive = true
|
76
|
-
ftp.login('gsapubftp-anonymous', 'devnull@nomail.org')
|
77
|
-
ftp.chdir('/bundle/2.3/hg19')
|
78
|
-
|
79
|
-
tmpfile = TmpFile.tmp_file + '.gz'
|
80
|
-
ftp.getbinaryfile('dbsnp_137.hg19.vcf.gz', tmpfile, 1024)
|
81
|
-
|
82
|
-
file = Open.open(tmpfile, :nocache => true)
|
83
|
-
begin
|
84
|
-
File.open(filename, 'w') do |f|
|
85
|
-
f.puts "#: :type=:flat#:namespace=Hsa/may2012"
|
86
|
-
f.puts "#" + ["RS ID", "Genomic Mutation"] * "\t"
|
87
|
-
while line = file.gets do
|
88
|
-
next if line[0] == "#"[0]
|
89
|
-
|
90
|
-
chr, position, id, ref, muts, qual, filter, info = line.split "\t"
|
91
|
-
|
92
|
-
chr.sub!('chr', '')
|
93
|
-
|
94
|
-
position, muts = Misc.correct_vcf_mutation(position.to_i, ref, muts)
|
95
|
-
|
96
|
-
mutations = muts.collect{|mut| [chr, position, mut] * ":" }
|
97
|
-
|
98
|
-
f.puts ([id] + mutations) * "\t"
|
99
|
-
end
|
100
|
-
end
|
101
|
-
rescue Exception
|
102
|
-
FileUtils.rm filename if File.exists? filename
|
103
|
-
raise $!
|
104
|
-
ensure
|
105
|
-
file.close
|
106
|
-
FileUtils.rm tmpfile
|
107
|
-
end
|
108
|
-
|
109
|
-
nil
|
110
|
-
end
|
111
|
-
|
112
|
-
DbSNP.claim DbSNP.mutations_hg18, :proc do |filename|
|
113
|
-
require 'rbbt/sources/organism'
|
114
|
-
|
115
|
-
mutations = CMD.cmd("grep -v '^#'|cut -f 2|sort -u", :in => DbSNP.mutations.open).read.split("\n").collect{|l| l.split("|")}.flatten
|
116
|
-
|
117
|
-
translations = Misc.process_to_hash(mutations){|mutations| Organism.liftOver(mutations, "Hsa/jun2011", "Hsa/may2009")}
|
118
|
-
begin
|
119
|
-
file = Open.open(DbSNP.mutations.find, :nocache => true)
|
120
|
-
File.open(filename, 'w') do |f|
|
121
|
-
f.puts "#: :type=:flat#:namespace=Hsa/may2009"
|
122
|
-
f.puts "#" + ["RS ID", "Genomic Mutation"] * "\t"
|
123
|
-
while line = file.gets do
|
124
|
-
next if line[0] == "#"[0]
|
125
|
-
parts = line.split("\t")
|
126
|
-
parts[1..-1] = parts[1..-1].collect{|p| translations[p]} * "|"
|
127
|
-
f.puts parts * "\t"
|
128
|
-
end
|
129
|
-
end
|
130
|
-
rescue Exception
|
131
|
-
FileUtils.rm filename if File.exists? filename
|
132
|
-
raise $!
|
133
|
-
ensure
|
134
|
-
file.close
|
135
|
-
end
|
136
|
-
|
137
|
-
nil
|
138
|
-
end
|
139
|
-
|
140
|
-
def self.rsid_index(organism, chromosome = nil)
|
141
|
-
build = Organism.hg_build(organism)
|
142
|
-
|
143
|
-
tag = [build, chromosome] * ":"
|
144
|
-
Persist.persist("StaticPosIndex for dbSNP [#{ tag }]", :fwt, :persist => true) do
|
145
|
-
value_size = 0
|
146
|
-
file = DbSNP[build == "hg19" ? "mutations" : "mutations_hg18"]
|
147
|
-
chr_positions = []
|
148
|
-
Open.read(CMD.cmd("grep '\t#{chromosome}:'", :in => file.open, :pipe => true)) do |line|
|
149
|
-
next if line[0] == "#"[0]
|
150
|
-
rsid, mutation = line.split("\t")
|
151
|
-
next if mutation.nil? or mutation.empty?
|
152
|
-
chr, pos = mutation.split(":")
|
153
|
-
next if chr != chromosome or pos.nil? or pos.empty?
|
154
|
-
chr_positions << [rsid, pos.to_i]
|
155
|
-
value_size = rsid.length if rsid.length > value_size
|
156
|
-
end
|
157
|
-
fwt = FixWidthTable.new :memory, value_size
|
158
|
-
fwt.add_point(chr_positions)
|
159
|
-
fwt
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
def self.mutation_index(organism)
|
164
|
-
build = Organism.hg_build(organism)
|
165
|
-
file = DbSNP[build == "hg19" ? "mutations" : "mutations_hg18"]
|
166
|
-
@mutation_index ||= {}
|
167
|
-
@mutation_index[build] ||= file.tsv :persist => true, :fields => ["Genomic Mutation"], :type => :single, :persist => true
|
168
|
-
end
|
169
|
-
|
170
|
-
end
|
171
|
-
|
172
|
-
if defined? Entity
|
173
|
-
if defined? Gene and Entity === Gene
|
174
|
-
module Gene
|
175
|
-
property :dbSNP_rsids => :single2array do
|
176
|
-
DbSNP.rsid_index(organism, chromosome)[self.chr_range]
|
177
|
-
end
|
178
|
-
|
179
|
-
property :dbSNP_mutations => :single2array do
|
180
|
-
GenomicMutation.setup(DbSNP.mutation_index(organism).values_at(*self.dbSNP_rsids).compact.flatten.uniq, "dbSNP mutations over #{self.name || self}", organism, true)
|
181
|
-
end
|
182
|
-
end
|
183
|
-
end
|
184
|
-
|
185
|
-
if defined? GenomicMutation and Entity === GenomicMutation
|
186
|
-
module GenomicMutation
|
187
|
-
property :dbSNP => :array2single do
|
188
|
-
dbSNP.mutations.tsv(:persist => true, :key_field => "Genomic Mutation", :fields => ["RS ID"], :type => :single).values_at *self
|
189
|
-
end
|
190
|
-
end
|
191
|
-
|
192
|
-
end
|
193
|
-
end
|
194
|
-
|
@@ -1,109 +0,0 @@
|
|
1
|
-
require 'rbbt'
|
2
|
-
require 'rbbt/util/open'
|
3
|
-
require 'rbbt/resource'
|
4
|
-
require 'rbbt/entity/gene'
|
5
|
-
|
6
|
-
module Genomes1000
|
7
|
-
extend Resource
|
8
|
-
self.subdir = "share/databases/genomes_1000"
|
9
|
-
|
10
|
-
RELEASE_URL = "ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20110521/ALL.wgs.phase1_release_v3.20101123.snps_indels_sv.sites.vcf.gz"
|
11
|
-
|
12
|
-
Genomes1000.claim Genomes1000.mutations, :proc do |filename|
|
13
|
-
|
14
|
-
begin
|
15
|
-
Open.write(filename) do |file|
|
16
|
-
file.puts "#: :type=:single#:namespace=Hsa"
|
17
|
-
file.puts "#Variant ID\tGenomic Mutation"
|
18
|
-
|
19
|
-
Open.read(RELEASE_URL) do |line|
|
20
|
-
next if line[0] == "#"[0]
|
21
|
-
|
22
|
-
chromosome, position, id, references, alternative, quality, filter, info = line.split("\t")
|
23
|
-
|
24
|
-
file.puts [id, [chromosome, position, alternative] * ":"] * "\t"
|
25
|
-
end
|
26
|
-
end
|
27
|
-
rescue
|
28
|
-
FileUtils.rm filename if File.exists? filename
|
29
|
-
raise $!
|
30
|
-
end
|
31
|
-
nil
|
32
|
-
end
|
33
|
-
|
34
|
-
|
35
|
-
Genomes1000.claim Genomes1000.mutations_hg18, :proc do
|
36
|
-
require 'rbbt/sources/organism'
|
37
|
-
|
38
|
-
hg19_tsv = Genomes1000.mutations.tsv :unnamed => true
|
39
|
-
|
40
|
-
mutations = hg19_tsv.values
|
41
|
-
|
42
|
-
translations = Misc.process_to_hash(mutations){|mutations| Organism.liftOver(mutations, "Hsa/jun2011", "Hsa/may2009")}
|
43
|
-
|
44
|
-
tsv = hg19_tsv.process "Genomic Mutation" do |mutation|
|
45
|
-
translations[mutation]
|
46
|
-
end
|
47
|
-
|
48
|
-
tsv.namespace = "Hsa/may2009"
|
49
|
-
|
50
|
-
tsv.to_s
|
51
|
-
end
|
52
|
-
|
53
|
-
def self.rsid_index(organism, chromosome = nil)
|
54
|
-
build = Organism.hg_build(organism)
|
55
|
-
|
56
|
-
tag = [build, chromosome] * ":"
|
57
|
-
Persist.persist("StaticPosIndex for Genomes1000 [#{ tag }]", :fwt, :persist => true) do
|
58
|
-
value_size = 0
|
59
|
-
file = Genomes1000[build == "hg19" ? "mutations" : "mutations_hg18"]
|
60
|
-
chr_positions = []
|
61
|
-
Open.read(CMD.cmd("grep '\t#{chromosome}:'", :in => file.open, :pipe => true)) do |line|
|
62
|
-
next if line[0] == "#"[0]
|
63
|
-
rsid, mutation = line.split("\t")
|
64
|
-
next if mutation.nil? or mutation.empty?
|
65
|
-
chr, pos = mutation.split(":")
|
66
|
-
next if chr != chromosome or pos.nil? or pos.empty?
|
67
|
-
chr_positions << [rsid, pos.to_i]
|
68
|
-
value_size = rsid.length if rsid.length > value_size
|
69
|
-
end
|
70
|
-
fwt = FixWidthTable.new :memory, value_size
|
71
|
-
fwt.add_point(chr_positions)
|
72
|
-
fwt
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
def self.mutation_index(organism)
|
77
|
-
build = Organism.hg_build(organism)
|
78
|
-
file = Genomes1000[build == "hg19" ? "mutations" : "mutations_hg18"]
|
79
|
-
@mutation_index ||= {}
|
80
|
-
@mutation_index[build] ||= file.tsv :persist => true, :fields => ["Genomic Mutation"], :type => :single, :persist => true
|
81
|
-
end
|
82
|
-
|
83
|
-
|
84
|
-
end
|
85
|
-
|
86
|
-
|
87
|
-
if defined? Entity
|
88
|
-
if defined? Gene and Entity === Gene
|
89
|
-
module Gene
|
90
|
-
property :genomes_1000_rsids => :single2array do
|
91
|
-
Genomes1000.rsid_index(organism, chromosome)[self.chr_range]
|
92
|
-
end
|
93
|
-
|
94
|
-
property :genomes_1000_mutations => :single2array do
|
95
|
-
GenomicMutation.setup(Genomes1000.mutation_index(organism).values_at(*self.genomes_1000_rsids).uniq, "1000 Genomes mutations over #{self.name || self}", organism, true)
|
96
|
-
end
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
if defined? GenomicMutation and Entity === GenomicMutation
|
101
|
-
module GenomicMutation
|
102
|
-
property :genomes_1000 => :array2single do
|
103
|
-
Genomes1000.mutations.tsv(:persist => true, :key_field => "Genomic Mutation", :fields => ["Variant ID"], :type => :single).values_at *self
|
104
|
-
end
|
105
|
-
end
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
|