exodb 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/exodb.rb +23 -0
- data/lib/exodb/addon/string.rb +139 -0
- data/lib/exodb/constant.rb +64 -0
- data/lib/exodb/datamodel.rb +4 -1
- data/lib/exodb/datamodel/genelocfield.rb +177 -0
- data/lib/exodb/datamodel/generef.rb +193 -0
- data/lib/exodb/datamodel/isoform.rb +237 -0
- data/lib/exodb/datamodel/reference.rb +23 -327
- data/lib/exodb/datamodel/region.rb +7 -5
- data/lib/exodb/datamodel/source.rb +1 -10
- data/lib/exodb/datamodel/variant.rb +14 -81
- data/lib/exodb/datamodel/varlocfield.rb +106 -0
- data/lib/exodb/datamodel/xrefsfield.rb +4 -0
- data/lib/exodb/extra.rb +17 -0
- data/lib/exodb/extra/upload.rb +43 -0
- data/lib/exodb/{utils → extra}/upload_generef.rb +35 -21
- data/lib/exodb/rositza/load.rb +56 -42
- data/lib/exodb/utils.rb +1 -2
- data/lib/exodb/utils/ensemblrest.rb +31 -3
- data/lib/exodb/utils/miriamrest.rb +23 -0
- data/lib/exodb/version.rb +1 -1
- metadata +10 -3
- data/lib/exodb/datamodel/locationfield.rb +0 -116
data/lib/exodb/rositza/load.rb
CHANGED
@@ -17,28 +17,68 @@ module Exodb
|
|
17
17
|
|
18
18
|
module_function
|
19
19
|
|
20
|
-
def
|
20
|
+
def load_snv_from_csv(csvfile)
|
21
21
|
|
22
22
|
CSV.read(csvfile, col_sep: "\t", headers: true).each do |record|
|
23
23
|
|
24
|
-
var =
|
25
|
-
var.
|
24
|
+
var = Exodb::Variant.new()
|
25
|
+
var.location=("chr#{record["chromosome"]}:#{record["start position"]}")
|
26
26
|
var.reference = record["ref nucleotide"].split('/')[0]
|
27
|
-
var.
|
28
|
-
var.
|
29
|
-
var.
|
27
|
+
var.alternatet = record["var nucleotide"].split('/').uniq
|
28
|
+
var.somstatus = record["Somatic Status"].downcase
|
29
|
+
var.passfilter = true
|
30
|
+
var.pileupt = record["Reads"]
|
30
31
|
var.predicted_damage = record["PolyPhen"] =~ /probably_damaging/ || record["SIFT"] =~ /deleterious/i || record["PROVEAN"] =~ /deleterious/i ? true : false
|
31
|
-
var.
|
32
|
+
var.temp = {} if var.temp.blank?
|
33
|
+
var.temp['aachange'] = "#{record["symbol"]}:#{record["AA Change"]}"
|
32
34
|
var.add_to_sample(record["cell"])
|
33
35
|
|
34
36
|
p var.save!
|
35
37
|
end
|
36
38
|
end
|
37
39
|
|
40
|
+
def load_splice_from_csv(csvfile)
|
41
|
+
|
42
|
+
CSV.read(csvfile, col_sep: "\t", headers: true).each do |record|
|
43
|
+
|
44
|
+
var = Exodb::Variant.new()
|
45
|
+
var.location=("chr#{record["chr"]}:#{record["Position"]}")
|
46
|
+
var.reference = record["Reference Genotype"]
|
47
|
+
var.alternatet = ['N']
|
48
|
+
var.somstatus = 'somatic'
|
49
|
+
var.passfilter = true
|
50
|
+
#var.pileupt = record["Reads"]
|
51
|
+
var.predicted_damage = true
|
52
|
+
var.temp = {} if var.temp.blank?
|
53
|
+
var.temp['aachange'] = "#{record["Gene Symbol"]}:SpV"
|
54
|
+
var.add_to_sample(record["Sample"])
|
55
|
+
p var.save!
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def load_indel_from_csv(csvfile)
|
60
|
+
|
61
|
+
CSV.read(csvfile, col_sep: "\t", headers: true).each do |record|
|
62
|
+
|
63
|
+
var = Exodb::Variant.new()
|
64
|
+
var.location=("#{record["chr"]}:#{record["pos"]}")
|
65
|
+
#var.reference = record["Reference Genotype"]
|
66
|
+
var.alternatet = ['-2N']
|
67
|
+
var.somstatus = record["Somatic Status"].downcase
|
68
|
+
var.passfilter = true
|
69
|
+
#var.pileupt = record["Reads"]
|
70
|
+
var.predicted_damage = true
|
71
|
+
var.temp = {} if var.temp.blank?
|
72
|
+
var.temp['aachange'] = "#{record["symbol"]}:InDel"
|
73
|
+
var.add_to_sample(record["cell lines"])
|
74
|
+
p var.save!
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
38
78
|
def load_sample_from_csv(csvfile)
|
39
79
|
CSV.read(csvfile, col_sep: "\t", headers: true).each do |record|
|
40
80
|
|
41
|
-
sample =
|
81
|
+
sample = Cell.new({oid: record["SampleFinal"],
|
42
82
|
type: record["Type"].downcase,
|
43
83
|
typeid: "urn:miriam:bioportal.meddra:#{record["Type"] =~ /^spitz /i ? '10041632' : record["Type"] =~ /^spitzoid /i ? '10072450' : '10028679'}",
|
44
84
|
patient: record["SampleFinal"].split('T')[0],
|
@@ -53,29 +93,7 @@ module Exodb
|
|
53
93
|
end
|
54
94
|
end
|
55
95
|
|
56
|
-
def
|
57
|
-
|
58
|
-
File.open(annfile).each do |line|
|
59
|
-
record = line.chomp.split("\t")
|
60
|
-
p record
|
61
|
-
end
|
62
|
-
|
63
|
-
end
|
64
|
-
|
65
|
-
def load_indel_from_merge(mergefile)
|
66
|
-
|
67
|
-
File.open(mergefile).each do |line|
|
68
|
-
record = line.chomp.split("\t")
|
69
|
-
p record[0..21]
|
70
|
-
#others = record[22..-1]
|
71
|
-
#until others.empty?
|
72
|
-
# p others.shift(11)
|
73
|
-
#end
|
74
|
-
end
|
75
|
-
|
76
|
-
end
|
77
|
-
|
78
|
-
def load_variant_from_merge(mergefile)
|
96
|
+
def load_variant_from_merge(mergefile, normal = false, assembly = Exodb::DEFAULTASSEMBLY)
|
79
97
|
|
80
98
|
File.open(mergefile).each do |line|
|
81
99
|
record = line.chomp.split("\t")
|
@@ -85,20 +103,16 @@ module Exodb
|
|
85
103
|
list = record[16..-1]
|
86
104
|
|
87
105
|
until list.empty?
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
if snvq.exists?
|
92
|
-
snv = snvq.first
|
93
|
-
snv.temp.push(header[0..6].join("\t"))
|
94
|
-
p snv.save!
|
106
|
+
if normal
|
107
|
+
snv = Exodb::Variant.where(oid: "#{header[2]}:#{header[3]}:#{assembly}:#{sampledata[0]}").first
|
108
|
+
snv.pileupn = sampledata[6] if snv != nil
|
95
109
|
else
|
110
|
+
sampledata = list.shift(7)
|
96
111
|
snv = Exodb::Variant.new()
|
97
|
-
snv.
|
112
|
+
snv.location=("#{header[2]}:#{header[3]}:#{assembly}")
|
98
113
|
snv.reference = header[10]
|
99
114
|
snv.pileupt = sampledata[6]
|
100
|
-
snv.temp =
|
101
|
-
snv.temp.push(header[0..6].join("\t"))
|
115
|
+
snv.temp = {} if snv.temp.blank?
|
102
116
|
snv.add_to_sample(sampledata[0])
|
103
117
|
p snv.save!
|
104
118
|
end
|
@@ -111,5 +125,5 @@ module Exodb
|
|
111
125
|
end
|
112
126
|
|
113
127
|
#Exodb::Utils.load_sample_from_csv('Samples_all.txt')
|
114
|
-
#Exodb::Utils.
|
128
|
+
#Exodb::Utils.load_snv_from_csv('NovelSNVs_13.txt')
|
115
129
|
#Exodb::Utils.load_variant_from_merge('gilead_ann/tumor/exome.UTR')
|
data/lib/exodb/utils.rb
CHANGED
@@ -9,10 +9,9 @@
|
|
9
9
|
|
10
10
|
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
11
|
|
12
|
-
require 'exodb/utils/upload_generef.rb'
|
13
12
|
require 'exodb/utils/upload_var.rb'
|
14
13
|
require 'exodb/utils/ensemblrest.rb'
|
15
|
-
|
14
|
+
require 'exodb/utils/miriamrest.rb'
|
16
15
|
|
17
16
|
#load script specific to rosiza project
|
18
17
|
require 'exodb/rositza.rb'
|
@@ -19,9 +19,9 @@ module Exodb
|
|
19
19
|
|
20
20
|
module_function
|
21
21
|
|
22
|
-
def get(
|
22
|
+
def get(get_path, options)
|
23
23
|
request = Net::HTTP::Get.new(get_path, {'Content-Type' => 'application/json'}.merge(options))
|
24
|
-
response = http.request(request)
|
24
|
+
response = @@http.request(request)
|
25
25
|
if response.code != "200"
|
26
26
|
riase InvalidResponse, "Invalid response: #{response.code}"
|
27
27
|
else
|
@@ -29,10 +29,38 @@ module Exodb
|
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
32
|
-
|
32
|
+
|
33
|
+
# http://rest.ensembl.org/documentation/info/vep_region_get
|
34
|
+
def vep_region_get(region, allele, species, options = {})
|
33
35
|
return Exodb::Ensembl::REST.get("vep/#{species}/region/#{region}/allele", options)
|
34
36
|
end
|
35
37
|
|
38
|
+
# http://rest.ensembl.org/documentation/info/vep_hgvs_get
|
39
|
+
def vep_hgvs_get(hgvs, species, options = {})
|
40
|
+
return Exodb::Ensembl::REST.get("vep/#{species}/hgvs/#{region}/allele", options)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Converse coordinate from one asembly to another
|
44
|
+
# from http://rest.ensembl.org/documentation/info/assembly_map
|
45
|
+
#
|
46
|
+
# @param [String] location string in chromosome:start..stop format
|
47
|
+
# @param [String] target assembly or asm_two
|
48
|
+
# @param [String] original assembly default: exodb::DEFAULTASSEMBLY
|
49
|
+
# @param [String] species of genome
|
50
|
+
# @param [Hash] option
|
51
|
+
def assembly_map(region, asm_two, asm_one = Exodb::DEFAULTASSEMBLY, species = 'human', options = {})
|
52
|
+
return Exodb::Ensembl::REST.get("map/#{species.gsub(/\s+/, '_')}/#{Exodb::ASSEMBLY[asm_one.downcase]}/#{region}/#{Exodb::ASSEMBLY[asm_two.downcase]}", options)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Returns the genomic sequence of the specified region of the given species.
|
56
|
+
# from http://rest.ensembl.org/documentation/info/sequence_region
|
57
|
+
#
|
58
|
+
# @param [String] location string in chromosome:start..stop format
|
59
|
+
# @param [String] species of genome
|
60
|
+
# @param [Fasta] Return sequence
|
61
|
+
def sequence_region(region, species, options = {})
|
62
|
+
return Exodb::Ensembl::REST.get("sequence/region/#{species}/#{region}", {'Content-Type' => 'text/x-fasta', 'coord_system_version' => Exodb::DEFAULTASSEMBLY}.merge(options))
|
63
|
+
end
|
36
64
|
end
|
37
65
|
|
38
66
|
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#
|
2
|
+
# Exodb
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
module Exodb
|
13
|
+
|
14
|
+
module Miriam
|
15
|
+
|
16
|
+
module REST
|
17
|
+
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
data/lib/exodb/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: exodb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Natapol Pornputtapong
|
@@ -79,21 +79,28 @@ files:
|
|
79
79
|
- lib/exodb.rb
|
80
80
|
- lib/exodb/addon.rb
|
81
81
|
- lib/exodb/addon/string.rb
|
82
|
+
- lib/exodb/constant.rb
|
82
83
|
- lib/exodb/datamodel.rb
|
83
|
-
- lib/exodb/datamodel/
|
84
|
+
- lib/exodb/datamodel/genelocfield.rb
|
85
|
+
- lib/exodb/datamodel/generef.rb
|
86
|
+
- lib/exodb/datamodel/isoform.rb
|
84
87
|
- lib/exodb/datamodel/reference.rb
|
85
88
|
- lib/exodb/datamodel/region.rb
|
86
89
|
- lib/exodb/datamodel/source.rb
|
87
90
|
- lib/exodb/datamodel/variant.rb
|
91
|
+
- lib/exodb/datamodel/varlocfield.rb
|
88
92
|
- lib/exodb/datamodel/xrefsfield.rb
|
89
93
|
- lib/exodb/dbconnection.rb
|
90
94
|
- lib/exodb/exception.rb
|
95
|
+
- lib/exodb/extra.rb
|
96
|
+
- lib/exodb/extra/upload.rb
|
97
|
+
- lib/exodb/extra/upload_generef.rb
|
91
98
|
- lib/exodb/rositza.rb
|
92
99
|
- lib/exodb/rositza/load.rb
|
93
100
|
- lib/exodb/usermanage.rb
|
94
101
|
- lib/exodb/utils.rb
|
95
102
|
- lib/exodb/utils/ensemblrest.rb
|
96
|
-
- lib/exodb/utils/
|
103
|
+
- lib/exodb/utils/miriamrest.rb
|
97
104
|
- lib/exodb/utils/upload_var.rb
|
98
105
|
- lib/exodb/vcf.rb
|
99
106
|
- lib/exodb/version.rb
|
@@ -1,116 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Exodb
|
3
|
-
# Copyright (C) 2014
|
4
|
-
#
|
5
|
-
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
-
#
|
7
|
-
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
-
#
|
9
|
-
|
10
|
-
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
-
|
12
|
-
module Exodb
|
13
|
-
|
14
|
-
module GenomeLocationField
|
15
|
-
|
16
|
-
extend ActiveSupport::Concern
|
17
|
-
|
18
|
-
included do
|
19
|
-
field :location, type: Hash #{chromosome: '', start: x, stop: x}
|
20
|
-
field :strand, type: String
|
21
|
-
|
22
|
-
index({location: 1}, background: true)
|
23
|
-
end
|
24
|
-
|
25
|
-
module ClassMethods
|
26
|
-
|
27
|
-
def cover?(loc_str)
|
28
|
-
dat = loc_str.split(/(:|\.\.|-)/)
|
29
|
-
if dat[4]
|
30
|
-
querystr = {:'location.chromosome' => dat[0], :'location.start'.lte => dat[2].to_i, :'location.stop'.gte => dat[4].to_i}
|
31
|
-
else
|
32
|
-
querystr = {:'location.chromosome' => dat[0], :'location.start'.lte => dat[2].to_i, :'location.stop'.gte => dat[2].to_i}
|
33
|
-
end
|
34
|
-
return self.where(querystr)
|
35
|
-
end
|
36
|
-
|
37
|
-
def intersect?(loc_str)
|
38
|
-
dat = loc_str.split(/(:|\.\.|-)/)
|
39
|
-
querystr = {:'$or' => [{:'location.chromosome' => dat[0], :'location.start'.lte => dat[2].to_i, :'location.stop'.gte => dat[2].to_i}, {:'location.chromosome' => dat[0], :'location.start'.lte => dat[4].to_i, :'location.stop'.gte => dat[4].to_i}]}
|
40
|
-
return self.where(querystr)
|
41
|
-
end
|
42
|
-
|
43
|
-
def in?(loc_str)
|
44
|
-
dat = loc_str.split(/(:|\.\.|-)/)
|
45
|
-
querystr = {:'location.chromosome' => dat[0], :'location.start'.gte => dat[2].to_i, :'location.stop'.lte => dat[4].to_i}
|
46
|
-
return self.where(querystr)
|
47
|
-
end
|
48
|
-
|
49
|
-
#def converse
|
50
|
-
# self.where({}).each do |e|
|
51
|
-
# if e[:location][:coordinates]
|
52
|
-
# oldlocation = e[:location]
|
53
|
-
# if oldlocation[:coordinates][0].is_a?(Array)
|
54
|
-
# e[:location] = {chromosome: oldlocation[:coordinates][0][0], start: oldlocation[:coordinates][0][1], stop: oldlocation[:coordinates][1][1]}
|
55
|
-
# else
|
56
|
-
# e[:location] = {chromosome: oldlocation[:coordinates][0], start: oldlocation[:coordinates][1], stop: oldlocation[:coordinates][1]}
|
57
|
-
# end
|
58
|
-
# p e.save!
|
59
|
-
# end
|
60
|
-
# end
|
61
|
-
#end
|
62
|
-
end
|
63
|
-
|
64
|
-
# get the start position of gene rely on the genome
|
65
|
-
#
|
66
|
-
# @return [Integer] start position of gene
|
67
|
-
def start
|
68
|
-
self[:location]['start']
|
69
|
-
end
|
70
|
-
|
71
|
-
# get the end position of gene rely on the genome
|
72
|
-
#
|
73
|
-
# @return [Integer] end position of gene
|
74
|
-
def stop
|
75
|
-
self[:location]['stop']
|
76
|
-
end
|
77
|
-
|
78
|
-
# get the start position of gene rely on the genome
|
79
|
-
#
|
80
|
-
# @return [Integer] start position of gene
|
81
|
-
def begin
|
82
|
-
self[:strand] == '+' ? self[:location]['start'] : self[:location]['stop']
|
83
|
-
end
|
84
|
-
|
85
|
-
# get the start position of gene rely on the genome
|
86
|
-
#
|
87
|
-
# @return [Integer] start position of gene
|
88
|
-
def end
|
89
|
-
self[:strand] == '+' ? self[:location]['stop'] : self[:location]['start']
|
90
|
-
end
|
91
|
-
|
92
|
-
# get the chromosome
|
93
|
-
#
|
94
|
-
# @return [Integer] chromosome
|
95
|
-
def chromosome
|
96
|
-
self[:location]['chromosome']
|
97
|
-
end
|
98
|
-
|
99
|
-
# Assign gene location in format of chromosome_number:start..stop
|
100
|
-
#
|
101
|
-
# @param [String] gene location in format of chromosome_number:start..stop
|
102
|
-
def parse_location(loc_str)
|
103
|
-
dat = loc_str.split(/(:|\.\.|-)/)
|
104
|
-
if dat[4]
|
105
|
-
self[:location] = {'chromosome' => dat[0], 'start' => dat[2].to_i, 'stop' => dat[4].to_i}
|
106
|
-
else
|
107
|
-
self[:location] = {'chromosome' => dat[0], 'start' => dat[2].to_i, 'stop' => dat[2].to_i}
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
def location_str
|
112
|
-
return "#{self.chromosome}:#{[self.start, self.stop].uniq.join('..')}"
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
end
|