exodb 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f59876f64775ac85fda8bbb7db9a06e253ab39b8
4
- data.tar.gz: 978e13f8048b2cabea0cfe4fc96a9da6e42b97fe
3
+ metadata.gz: 5d6b52be6bba76e973d002e6dbd4ad4ed82defce
4
+ data.tar.gz: 35f0cbe4f0d05380ca24710125e5dee20ecfb141
5
5
  SHA512:
6
- metadata.gz: ceca94da8429c39dee7e1baa9d96f5a88f8860cbf1c185ee2439845d0c93d3a63605bc4d2e6bc7c33c3e1d5c0c424e1d37a1f7557ce8a96baa303ca692a78f55
7
- data.tar.gz: 1d20bb30b30b1aea8a612abd255c02f985a2b6fdcc6dd38431f387c2fc3335e56fd5261b38adefb58655d064cb6fef074e11d725af871768269b4bef2b44e9ee
6
+ metadata.gz: 05822ee2e593e898c62294ab0795cf8769eace304356f9beed84f1b80fd956527a9f5bc74ec4c532ac5b5811ed56e07711d2593dbcd9b9ebb4a4a1d28049b74f
7
+ data.tar.gz: b2ac6f0484ad7a33315ff3c583f2b1fd25a1bfd70a21f22a087b3c9bbf0e03cb43882a8605e19fc66f81a2f1981b27532744b858756a93b080dfa664ac868ef1
data/README.md ADDED
@@ -0,0 +1,15 @@
1
+ Installation
2
+ ============
3
+
4
+ gem install exodb
5
+
6
+ Usage in irb or pry
7
+ ===================
8
+ require 'exodb'
9
+ Exodb.connect("username@servername/databasename")
10
+
11
+ Example query
12
+ =============
13
+ 1. To query reference gene by cross reference
14
+
15
+ Exodb::Generef.where(xrefs: /BRAF/).first
data/exodb.gemspec CHANGED
@@ -25,6 +25,8 @@ Gem::Specification.new do |s|
25
25
 
26
26
  s.add_dependency "mongoid", "~> 3.1"
27
27
  s.add_dependency "bio", "~> 1.4"
28
+ # s.add_dependency "bio-samtools", "~> 2.2"
29
+ # s.add_dependency "bio-vcf", "~> 0.8"
28
30
  s.add_dependency "highline", "~> 1.6"
29
31
  s.add_dependency "pry", "~> 0.10"
30
32
 
@@ -25,7 +25,7 @@ module Exodb
25
25
  module ClassMethods
26
26
 
27
27
  def cover?(loc_str)
28
- dat = loc_str.split(/(:|\.\.)/)
28
+ dat = loc_str.split(/(:|\.\.|-)/)
29
29
  if dat[4]
30
30
  querystr = {:'location.chromosome' => dat[0], :'location.start'.lte => dat[2].to_i, :'location.stop'.gte => dat[4].to_i}
31
31
  else
@@ -35,13 +35,13 @@ module Exodb
35
35
  end
36
36
 
37
37
  def intersect?(loc_str)
38
- dat = loc_str.split(/(:|\.\.)/)
38
+ dat = loc_str.split(/(:|\.\.|-)/)
39
39
  querystr = {:'$or' => [{:'location.chromosome' => dat[0], :'location.start'.lte => dat[2].to_i, :'location.stop'.gte => dat[2].to_i}, {:'location.chromosome' => dat[0], :'location.start'.lte => dat[4].to_i, :'location.stop'.gte => dat[4].to_i}]}
40
40
  return self.where(querystr)
41
41
  end
42
42
 
43
43
  def in?(loc_str)
44
- dat = loc_str.split(/(:|\.\.)/)
44
+ dat = loc_str.split(/(:|\.\.|-)/)
45
45
  querystr = {:'location.chromosome' => dat[0], :'location.start'.gte => dat[2].to_i, :'location.stop'.lte => dat[4].to_i}
46
46
  return self.where(querystr)
47
47
  end
@@ -100,13 +100,12 @@ module Exodb
100
100
  #
101
101
  # @param [String] gene location in format of chromosome_number:start..stop
102
102
  def parse_location(loc_str)
103
- dat = loc_str.split(/(:|\.\.)/)
103
+ dat = loc_str.split(/(:|\.\.|-)/)
104
104
  if dat[4]
105
- self[:location] = {chromosome: dat[0], start: dat[2].to_i, stop: dat[4].to_i}
105
+ self[:location] = {'chromosome' => dat[0], 'start' => dat[2].to_i, 'stop' => dat[4].to_i}
106
106
  else
107
- self[:location] = {chromosome: dat[0], start: dat[2].to_i, stop: dat[2].to_i}
107
+ self[:location] = {'chromosome' => dat[0], 'start' => dat[2].to_i, 'stop' => dat[2].to_i}
108
108
  end
109
-
110
109
  end
111
110
 
112
111
  def location_str
@@ -40,6 +40,7 @@ module Exodb
40
40
  field :preferred, type: Boolean
41
41
  field :paired, type: Boolean
42
42
  field :purity, type: Float
43
+ field :labels, type: Hash
43
44
 
44
45
  default_scope ->{where(preferred: true)}
45
46
 
@@ -26,24 +26,30 @@ module Exodb
26
26
 
27
27
  field :oid, type: String # chromosome:position..alternative:samplename
28
28
  field :reference, type: String #reference genotype
29
- field :alternate, type: Array #alternate genotype
30
- field :quality, type: String
29
+ field :alternaten, type: Array #alternate genotype from tumor cell
30
+ field :alternatet, type: Array #alternate genotype from normal cell
31
31
  field :filter, type: String
32
- field :somaticStatus, type: String #unknown, inherited, somatic
33
- field :somaticScore, type: Float #Somatic score
34
- field :inheritantScore, type: Float #Inheritant score
32
+ field :passfilter, type: Boolean #, default: false
33
+ field :somstatus, type: String #unknown, inherited, somatic
34
+ field :somscore, type: Float #Somatic score 0-1
35
+ field :inhscore, type: Float #Inheritant score 0-1
35
36
  #field :fdr, type: Float #False discovery rate score
36
- field :ctrlread, type: String
37
- field :inhreads, type: String #reads from normal cell
38
- field :reads, type: String
37
+ field :pileupt, type: String #pileup string from tumor cell
38
+ field :pileupn, type: String #pileup string from normal cell
39
+ field :qualt, type: String #pileup quality from tumor cell
40
+ field :qualn, type: String #pileup quality from normal cell
39
41
  field :predicted_damage, type: Boolean #Temporaly field
40
- field :aachange, type: String #Temporaly field
42
+ field :temp, type: Array #Temporaly field
41
43
 
42
44
  belongs_to :cell
43
45
  belongs_to :aacid
44
46
 
45
47
  validates_uniqueness_of :oid, message: "Variant oid of experiment is not unique"
46
48
 
49
+ def self.from_pileup(pileupline)
50
+
51
+ end
52
+
47
53
  # add this variant to original cell sample
48
54
  #
49
55
  # @param [String] oid
@@ -53,9 +59,9 @@ module Exodb
53
59
 
54
60
  if sample.exists?
55
61
  self.cell = sample.first()
56
- #output.puts "#EXODB:INFO This sample is added to #{dataset.first().name}." if _pry_
62
+ #output.puts "#EXODB:INFO This sample is added to #{dataset.first().name}." if $0 == 'pry'
57
63
  else
58
- #output.puts "#EXODB:ERRO Cannot find dataset by #{str}." if _pry_
64
+ #output.puts "#EXODB:ERRO Cannot find dataset by #{str}." if $0 == 'pry'
59
65
  end
60
66
 
61
67
  self.oid = "#{self.location_str}:#{sample.first().oid}"
@@ -63,6 +69,71 @@ module Exodb
63
69
  end
64
70
 
65
71
  alias_method :add_to_cell, :add_to_sample
72
+
73
+ ## overwrite default assignment of pileuplinet
74
+ ## automatically update :reference, :alternatet, and location
75
+ ##
76
+ ## @param [String] pile-up line from mpileup
77
+ #def pileuplinet=(pileupline)
78
+ # begin
79
+ # if pileupline.is_a?(Bio::DB::Pileup)
80
+ # pile = pileupline
81
+ # else
82
+ # pile = Bio::DB::Pileup.new(pileupline.chomp)
83
+ # end
84
+ #
85
+ # self[:reference] = pile.ref_base
86
+ # self.parse_location("#{pile.ref_name}:#{pile.pos}..#{pile.pos}")
87
+ # self[:alternatet] = pile.genotype_list
88
+ # self[:pileuplinet] = pileupline
89
+ # rescue
90
+ # self[:pileuplinet] = pileupline
91
+ # end
92
+ #end
93
+ #
94
+ ## overwrite default assignment of pileuplinen
95
+ ## automatically update :alternaten
96
+ ##
97
+ ## @param [String] pile-up line from mpileup
98
+ #def pileuplinen=(pileupline)
99
+ # begin
100
+ # if pileupline.is_a?(Bio::DB::Pileup)
101
+ # pile = pileupline
102
+ # else
103
+ # pile = Bio::DB::Pileup.new(pileupline.chomp)
104
+ # end
105
+ #
106
+ # if self[:reference] == pile.ref_base
107
+ # self.parse_location("#{pile.ref_name}:#{pile.pos}..#{pile.pos}")
108
+ # self[:alternaten] = pile.genotype_list
109
+ # self[:pileuplinen] = pileupline
110
+ # end
111
+ # rescue
112
+ # self[:pileuplinen] = pileupline
113
+ # end
114
+ #end
115
+ #
116
+ ## overwrite default read of pileuplinet
117
+ ##
118
+ ## @return [Bio::DB::Pileup] pile-up object of tumor sample
119
+ #def pileuplinet()
120
+ # return Bio::DB::Pileup.new(self[:pileuplinet])
121
+ #end
122
+ #
123
+ ## overwrite default read of pileuplinet
124
+ ##
125
+ ## @return [Bio::DB::Pileup] pile-up object of normal sample
126
+ #def pileuplinen()
127
+ # return Bio::DB::Pileup.new(self[:pileuplinen])
128
+ #end
129
+
130
+ # apply filter to the variant
131
+ # the result is kelp in passfilter
132
+ #
133
+ # @param [String] pile-up line from mpileup
134
+ def apply_filter(filter)
135
+ self[:filter] = filter
136
+ end
66
137
  end
67
138
 
68
139
  class SNV < Variant
@@ -0,0 +1,115 @@
1
+
2
+ # Exodb
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ # Script used in Rositza project (Naevi, Spitzoid naevi, Spitzoid melanoma)
13
+
14
+ module Exodb
15
+
16
+ module Utils
17
+
18
+ module_function
19
+
20
+ def load_variant_from_csv(csvfile)
21
+
22
+ CSV.read(csvfile, col_sep: "\t", headers: true).each do |record|
23
+
24
+ var = SNV.new()
25
+ var.parse_location("#{record["chromosome"]}:#{record["start position"]}")
26
+ var.reference = record["ref nucleotide"].split('/')[0]
27
+ var.alternate = record["var nucleotide"].split('/').uniq
28
+ var.somaticStatus = record["Somatic Status"]
29
+ var.reads = record["Reads"]
30
+ var.predicted_damage = record["PolyPhen"] =~ /probably_damaging/ || record["SIFT"] =~ /deleterious/i || record["PROVEAN"] =~ /deleterious/i ? true : false
31
+ var.aachange = record["AA Change"]
32
+ var.add_to_sample(record["cell"])
33
+
34
+ p var.save!
35
+ end
36
+ end
37
+
38
+ def load_sample_from_csv(csvfile)
39
+ CSV.read(csvfile, col_sep: "\t", headers: true).each do |record|
40
+
41
+ sample = Tumor.new({oid: record["SampleFinal"],
42
+ type: record["Type"].downcase,
43
+ typeid: "urn:miriam:bioportal.meddra:#{record["Type"] =~ /^spitz /i ? '10041632' : record["Type"] =~ /^spitzoid /i ? '10072450' : '10028679'}",
44
+ patient: record["SampleFinal"].split('T')[0],
45
+ preferred: record["Preferred"] == 'Y' ? true : false,
46
+ paired: record["merge41final"] =~ /\Apaired\z/i ? true : false},
47
+ labels: {})
48
+
49
+ sample.add_to_dataset('internal.ds:000001')
50
+
51
+ p sample.save!
52
+
53
+ end
54
+ end
55
+
56
+ def load_variant_from_ann(annfile)
57
+
58
+ File.open(annfile).each do |line|
59
+ record = line.chomp.split("\t")
60
+ p record
61
+ end
62
+
63
+ end
64
+
65
+ def load_indel_from_merge(mergefile)
66
+
67
+ File.open(mergefile).each do |line|
68
+ record = line.chomp.split("\t")
69
+ p record[0..21]
70
+ #others = record[22..-1]
71
+ #until others.empty?
72
+ # p others.shift(11)
73
+ #end
74
+ end
75
+
76
+ end
77
+
78
+ def load_variant_from_merge(mergefile)
79
+
80
+ File.open(mergefile).each do |line|
81
+ record = line.chomp.split("\t")
82
+
83
+ header = record[0..15]
84
+
85
+ list = record[16..-1]
86
+
87
+ until list.empty?
88
+
89
+ sampledata = list.shift(7)
90
+ snvq = Exodb::Variant.where(oid: "#{header[2]}:#{header[3]}:#{sampledata[0]}")
91
+ if snvq.exists?
92
+ snv = snvq.first
93
+ snv.temp.push(header[0..6].join("\t"))
94
+ p snv.save!
95
+ else
96
+ snv = Exodb::Variant.new()
97
+ snv.parse_location("#{header[2]}:#{header[3]}..#{header[3]}")
98
+ snv.reference = header[10]
99
+ snv.pileupt = sampledata[6]
100
+ snv.temp = [] if snv.temp == nil
101
+ snv.temp.push(header[0..6].join("\t"))
102
+ snv.add_to_sample(sampledata[0])
103
+ p snv.save!
104
+ end
105
+ end
106
+ end
107
+
108
+ end
109
+
110
+ end
111
+ end
112
+
113
+ #Exodb::Utils.load_sample_from_csv('Samples_all.txt')
114
+ #Exodb::Utils.load_variant_from_csv('NovelSNVs_13.txt')
115
+ #Exodb::Utils.load_variant_from_merge('gilead_ann/tumor/exome.UTR')
@@ -0,0 +1,23 @@
1
+ #
2
+ # Exodb
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ # Script used in Rositza project (Naevi, Spitzoid naevi, Spitzoid melanoma)
13
+
14
+ require 'exodb/rositza/load.rb'
15
+
16
+ module Exodb
17
+
18
+ module Utils
19
+
20
+ module_function
21
+
22
+ end
23
+ end
@@ -0,0 +1,39 @@
1
+ #
2
+ # Exodb
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ module Exodb
13
+
14
+ module Ensembl
15
+
16
+ module REST
17
+ @@url = URI.parse('http://rest.ensembl.org')
18
+ @@http = Net::HTTP.new(@@url.host, @@url.port)
19
+
20
+ module_function
21
+
22
+ def get(getpart, options)
23
+ request = Net::HTTP::Get.new(get_path, {'Content-Type' => 'application/json'}.merge(options))
24
+ response = http.request(request)
25
+ if response.code != "200"
26
+ riase InvalidResponse, "Invalid response: #{response.code}"
27
+ else
28
+ return JSON.parse(response.body)
29
+ end
30
+ end
31
+
32
+ def fetch_variant(region, allele, species, options = {})
33
+ return Exodb::Ensembl::REST.get("vep/#{species}/region/#{region}/allele", options)
34
+ end
35
+
36
+ end
37
+
38
+ end
39
+ end
@@ -150,7 +150,7 @@ module Exodb
150
150
 
151
151
  end
152
152
 
153
- p gene.save!
153
+ puts "STATUS: #{gene.save! ? "SUCCESS" : "FAIL"}: Deposit Gene reference #{gene.xrefs[0]}"
154
154
 
155
155
  end
156
156
 
@@ -9,7 +9,6 @@
9
9
 
10
10
  # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
11
 
12
- require 'csv'
13
12
 
14
13
  module Exodb
15
14
 
@@ -17,54 +16,15 @@ module Exodb
17
16
 
18
17
  module_function
19
18
 
20
- def load_variant_from_csv(csvfile)
21
-
22
- CSV.read(csvfile, col_sep: "\t", headers: true).each do |record|
23
-
24
- var = SNV.new()
25
- var.parse_location("#{record["chromosome"]}:#{record["start position"]}")
26
- var.reference = record["ref nucleotide"].split('/')[0]
27
- var.alternate = record["var nucleotide"].split('/').uniq
28
- var.somaticStatus = record["Somatic Status"]
29
- var.reads = record["Reads"]
30
- var.predicted_damage = record["PolyPhen"] =~ /probably_damaging/ || record["SIFT"] =~ /deleterious/i || record["PROVEAN"] =~ /deleterious/i ? true : false
31
- var.aachange = record["AA Change"]
32
- var.add_to_sample(record["cell"])
33
-
34
- p var.save!
35
- end
36
- end
37
-
38
- def load_sample_from_csv(csvfile)
39
- CSV.read(csvfile, col_sep: "\t", headers: true).each do |record|
40
-
41
- sample = Tumor.new({oid: record["SampleFinal"],
42
- type: record["Type"].downcase,
43
- typeid: "urn:miriam:bioportal.meddra:#{record["Type"] =~ /^spitz /i ? '10041632' : record["Type"] =~ /^spitzoid /i ? '10072450' : '10028679'}",
44
- patient: record["SampleFinal"].split('T')[0],
45
- preferred: record["Preferred"] == 'Y' ? true : false,
46
- paired: record["merge41final"] =~ /\Apaired\z/i ? true : false})
47
-
48
- sample.add_to_dataset('internal.ds:000001')
49
-
50
- p sample.save!
51
-
52
- end
53
- end
54
-
55
- def load_variant_from_ann(annfile)
56
-
57
- File.open(annfile).each do |line|
58
- record = line.chomp.split("\t")
59
- p record
60
- end
61
-
62
- end
19
+ #def pop_var_from_sam(bamfile, templatefasta, samplename, normalsample = false, additioninfo = {}, mpileupoptions = {})
20
+ #
21
+ # bam = Bio::DB::Sam.new(bam: bamfile, fasta: templatefasta)
22
+ # bam.samtools
23
+ #end
63
24
 
64
25
  end
65
26
 
66
27
  end
67
28
 
68
- #Exodb::Utils.load_sample_from_csv('Samples_all.txt')
69
- #Exodb::Utils.load_variant_from_csv('NovelSNVs_13.txt')
29
+
70
30
 
data/lib/exodb/utils.rb CHANGED
@@ -13,6 +13,10 @@ require 'exodb/utils/upload_generef.rb'
13
13
  require 'exodb/utils/upload_var.rb'
14
14
  require 'exodb/utils/ensemblrest.rb'
15
15
 
16
+
17
+ #load script specific to rosiza project
18
+ require 'exodb/rositza.rb'
19
+
16
20
  module Exodb
17
21
 
18
22
  module Utils
data/lib/exodb/version.rb CHANGED
@@ -11,5 +11,5 @@
11
11
 
12
12
 
13
13
  module Exodb
14
- VERSION = "0.1.0"
14
+ VERSION = "0.1.1"
15
15
  end
data/lib/exodb.rb CHANGED
@@ -20,6 +20,7 @@ require 'open-uri'
20
20
  require 'net/http'
21
21
  require 'uri'
22
22
  require 'json'
23
+ require 'csv'
23
24
 
24
25
  #External library
25
26
  #require 'bio-vcf'
@@ -27,6 +28,7 @@ require 'highline/import'
27
28
  require 'mongoid'
28
29
  require 'bio'
29
30
  require 'pry'
31
+ require 'bio-samtools'
30
32
 
31
33
  I18n.enforce_available_locales = false
32
34
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: exodb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Natapol Pornputtapong
@@ -72,6 +72,7 @@ executables: []
72
72
  extensions: []
73
73
  extra_rdoc_files: []
74
74
  files:
75
+ - README.md
75
76
  - contributors.txt
76
77
  - exodb.gemspec
77
78
  - genome/process_genome_seq.rb
@@ -87,8 +88,11 @@ files:
87
88
  - lib/exodb/datamodel/xrefsfield.rb
88
89
  - lib/exodb/dbconnection.rb
89
90
  - lib/exodb/exception.rb
91
+ - lib/exodb/rositza.rb
92
+ - lib/exodb/rositza/load.rb
90
93
  - lib/exodb/usermanage.rb
91
94
  - lib/exodb/utils.rb
95
+ - lib/exodb/utils/ensemblrest.rb
92
96
  - lib/exodb/utils/upload_generef.rb
93
97
  - lib/exodb/utils/upload_var.rb
94
98
  - lib/exodb/vcf.rb