vardb 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/vardb.rb +0 -20
- data/lib/vardb/database_populator.rb +58 -19
- data/lib/vardb/snp_db_build.rb +79 -15
- data/lib/vardb/xls_parser.rb +1 -1
- metadata +19 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a6564e63ebd0a500e3440e7ca7137f2c48f16423
|
4
|
+
data.tar.gz: 01681c3020ba950bbe8d132062a5c2902efa4822
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2a6244867da744515526f34ddf2151688fbde1443b70c56547cb6abd06965db6d05f97ccb010d2b0f1006804b7367b1ad35f14ee7c3af3e1c0ed043373e7683d
|
7
|
+
data.tar.gz: 8e0e88c954358326f2f8cfda36d5c509c043170b003d492b49c42161521b40773e2eff217b4c32df05a89735a98d22f643e5e05eac4a1111334582468f5013dd
|
data/lib/vardb.rb
CHANGED
@@ -6,24 +6,4 @@ class Vardb
|
|
6
6
|
include Builder
|
7
7
|
include Populator
|
8
8
|
include ConfigData
|
9
|
-
|
10
|
-
#def self.set_connection(connection_hash)
|
11
|
-
#ConfigData.set_connection(connection_hash)
|
12
|
-
#end
|
13
|
-
|
14
|
-
#def self.set_metadata(file)
|
15
|
-
#ConfigData.set_metadata
|
16
|
-
#end
|
17
|
-
|
18
|
-
#def self.set_matrix(file)
|
19
|
-
#ConfigData.set_matrix
|
20
|
-
#end
|
21
|
-
|
22
|
-
#def self.format
|
23
|
-
#Builder.format_database
|
24
|
-
#end
|
25
|
-
|
26
|
-
#def self.populate
|
27
|
-
#Populator.populate_database
|
28
|
-
#end
|
29
9
|
end
|
@@ -4,17 +4,27 @@ require 'pg'
|
|
4
4
|
module Populator
|
5
5
|
include XlsParser
|
6
6
|
|
7
|
-
def populate_matrix
|
7
|
+
def populate_matrix(type)
|
8
8
|
|
9
9
|
host = ConfigData.get_connection
|
10
10
|
|
11
|
-
|
11
|
+
if type == 'pg'
|
12
|
+
#PG Connection
|
13
|
+
conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])
|
12
14
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
#Matrix File PG Command Preparation
|
16
|
+
conn.prepare('load_snps', 'INSERT INTO snps (id, locus, annotation_id) values ($1, $2, $3)')
|
17
|
+
conn.prepare('load_annos', 'INSERT INTO annotations (id, cds, transcript, transcript_id, info, orientation, cds_locus, codon_pos, codon, peptide, amino_a, syn ) values ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)')
|
18
|
+
conn.prepare('load_samples_snps', 'INSERT INTO samples_snps (sample_id, snp_id) values ($1, $2)')
|
19
|
+
conn.prepare('load_samples', 'INSERT INTO samples (id, name) values ($1, $2)')
|
20
|
+
|
21
|
+
elsif type == 'sqlite'
|
22
|
+
#SQLite Connection
|
23
|
+
db = SQLite3::Database.new "sqlite_db/#{host[:dbname]}.db"
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
|
18
28
|
|
19
29
|
#Matrix File Load-ins
|
20
30
|
text=File.open(ConfigData.get_matrix).read
|
@@ -32,8 +42,12 @@ module Populator
|
|
32
42
|
puts "populating snps table..."
|
33
43
|
snps = line_data.split("\t")
|
34
44
|
snp_counter = 1
|
35
|
-
snps.each do |locus|
|
36
|
-
|
45
|
+
snps.each do |locus|
|
46
|
+
if type == 'pg'
|
47
|
+
conn.exec_prepared('load_snps', [snp_counter, locus, snp_counter])
|
48
|
+
elsif type == 'sqlite'
|
49
|
+
db.execute("INSERT INTO snps (id, locus, annotation_id) VALUES (?,?,?)", [snp_counter, locus, snp_counter])
|
50
|
+
end
|
37
51
|
snp_counter += 1
|
38
52
|
end
|
39
53
|
elsif (header == '#annotation')
|
@@ -44,9 +58,17 @@ module Populator
|
|
44
58
|
anno_vals.each do |anno|
|
45
59
|
anno.insert(0, anno_counter)
|
46
60
|
if anno[1].match('intergenic')
|
47
|
-
|
61
|
+
if type == 'pg'
|
62
|
+
conn.exec_prepared('load_annos', [ anno[0], 0, 0, 0, anno[1], 0, 0, 0, 0, 0, 0, 0 ])
|
63
|
+
elsif type == 'sqlite'
|
64
|
+
db.execute("INSERT INTO annotations (id, cds, transcript, transcript_id, info, orientation, cds_locus, codon_pos, codon, peptide, amino_a, syn ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)", [ anno[0], 0, 0, 0, anno[1], 0, 0, 0, 0, 0, 0, 0 ])
|
65
|
+
end
|
48
66
|
else
|
49
|
-
|
67
|
+
if type == 'pg'
|
68
|
+
conn.exec_prepared('load_annos', [ anno[0], anno[1], anno[2], anno[3], anno[4], anno[5], anno[6], anno[7], anno[8], anno[9], anno[10], anno[11] ])
|
69
|
+
elsif type == 'sqlite'
|
70
|
+
db.execute("INSERT INTO annotations (id, cds, transcript, transcript_id, info, orientation, cds_locus, codon_pos, codon, peptide, amino_a, syn ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)", [ anno[0], anno[1], anno[2], anno[3], anno[4], anno[5], anno[6], anno[7], anno[8], anno[9], anno[10], anno[11] ])
|
71
|
+
end
|
50
72
|
end
|
51
73
|
anno_counter += 1
|
52
74
|
end
|
@@ -55,11 +77,19 @@ module Populator
|
|
55
77
|
puts "loading reference..."
|
56
78
|
else
|
57
79
|
puts "loading in sample #{sample_number - 1}..."
|
58
|
-
end
|
59
|
-
|
80
|
+
end
|
81
|
+
if type == 'pg'
|
82
|
+
conn.exec_prepared('load_samples', [sample_number, header])
|
83
|
+
elsif type == 'sqlite'
|
84
|
+
db.execute("INSERT INTO samples (id, name) VALUES (?,?)", [sample_number, header])
|
85
|
+
end
|
60
86
|
line_data.split("\t").each_with_index do |n, i|
|
61
87
|
if (n == '1')
|
62
|
-
|
88
|
+
if type == 'pg'
|
89
|
+
conn.exec_prepared('load_samples_snps', [sample_number, i])
|
90
|
+
elsif type == 'sqlite'
|
91
|
+
db.execute("INSERT INTO samples_snps (sample_id, snp_id) VALUES (?,?)", [sample_number, i])
|
92
|
+
end
|
63
93
|
end
|
64
94
|
end
|
65
95
|
sample_number += 1
|
@@ -68,12 +98,10 @@ module Populator
|
|
68
98
|
end
|
69
99
|
end
|
70
100
|
|
71
|
-
def populate_metadata
|
101
|
+
def populate_metadata(type)
|
72
102
|
|
73
103
|
host = ConfigData.get_connection
|
74
104
|
|
75
|
-
conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])
|
76
|
-
|
77
105
|
#Excel Spreadsheet Command Preparaton
|
78
106
|
metadata_fields = XlsParser.load_meta_fields(ConfigData.get_metadata)
|
79
107
|
|
@@ -89,7 +117,14 @@ module Populator
|
|
89
117
|
metadata_values_string << ", $#{i+2}"
|
90
118
|
end
|
91
119
|
|
92
|
-
|
120
|
+
if type == 'pg'
|
121
|
+
#PG Connection
|
122
|
+
conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])
|
123
|
+
conn.prepare('load_metadata', "INSERT INTO sample_metadata (#{metadata_fields_string}) values (#{metadata_values_string})")
|
124
|
+
elsif type == 'sqlite'
|
125
|
+
#SQLite Connection
|
126
|
+
db = SQLite3::Database.new "sqlite_db/#{host[:dbname]}.db"
|
127
|
+
end
|
93
128
|
|
94
129
|
#Excel Spreadsheet Load-ins
|
95
130
|
s = Roo::Excel.new(ConfigData.get_metadata)
|
@@ -104,7 +139,11 @@ module Populator
|
|
104
139
|
metadata_fields.length.times do |i|
|
105
140
|
row_contents << "#{s.cell(row, i)}"
|
106
141
|
end
|
107
|
-
|
142
|
+
if type == 'pg'
|
143
|
+
conn.exec_prepared('load_metadata', row_contents)
|
144
|
+
elsif type == 'sqlite'
|
145
|
+
db.execute("INSERT INTO sample_metadata (#{metadata_fields_string}) VALUES (#{metadata_values_string})", row_contents)
|
146
|
+
end
|
108
147
|
row += 1
|
109
148
|
end
|
110
149
|
end
|
data/lib/vardb/snp_db_build.rb
CHANGED
@@ -1,31 +1,81 @@
|
|
1
1
|
require_relative 'xls_parser'
|
2
2
|
require 'pg'
|
3
|
+
require 'sqlite3'
|
3
4
|
|
4
5
|
module Builder
|
5
6
|
include XlsParser
|
6
|
-
def format_matrix
|
7
|
+
def format_matrix(type)
|
8
|
+
|
7
9
|
host = ConfigData.get_connection
|
8
10
|
|
9
|
-
|
11
|
+
if type == 'pg'
|
12
|
+
conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])
|
10
13
|
|
11
|
-
|
12
|
-
|
14
|
+
puts "formatting annotations table..."
|
15
|
+
conn.exec("CREATE TABLE annotations (id numeric(11) PRIMARY KEY, cds varchar(128), transcript varchar(128), transcript_id varchar(128), info text, orientation varchar(128), cds_locus varchar(128), codon_pos varchar(128), codon varchar(128), peptide varchar(128), amino_a varchar(128), syn varchar(128))")
|
13
16
|
|
14
|
-
|
15
|
-
|
17
|
+
puts "formatting snps table..."
|
18
|
+
conn.exec("CREATE TABLE snps (id numeric(11) PRIMARY KEY, locus numeric(11), annotation_id numeric(11))")
|
16
19
|
|
17
|
-
|
18
|
-
|
20
|
+
puts "formatting samples table..."
|
21
|
+
conn.exec("CREATE TABLE samples (id numeric(11) PRIMARY KEY, name varchar(128))")
|
19
22
|
|
20
|
-
|
21
|
-
|
23
|
+
puts "formatting samples_snps join table..."
|
24
|
+
conn.exec("CREATE TABLE samples_snps (sample_id numeric(11), snp_id numeric(11))")
|
25
|
+
|
26
|
+
elsif type == 'sqlite'
|
27
|
+
db = SQLite3::Database.new "sqlite_db/#{host[:dbname]}.db"
|
28
|
+
|
29
|
+
puts "formatting annotations table..."
|
30
|
+
db.execute <<-SQL
|
31
|
+
create table annotations (
|
32
|
+
id numeric(11) PRIMARY KEY,
|
33
|
+
cds varchar(128),
|
34
|
+
transcript varchar(128),
|
35
|
+
transcript_id varchar(128),
|
36
|
+
info text,
|
37
|
+
orientation varchar(128),
|
38
|
+
cds_locus varchar(128),
|
39
|
+
codon_pos varchar(128),
|
40
|
+
codon varchar(128),
|
41
|
+
peptide varchar(128),
|
42
|
+
amino_a varchar(128),
|
43
|
+
syn varchar(128)
|
44
|
+
);
|
45
|
+
SQL
|
46
|
+
|
47
|
+
puts "formatting snps table..."
|
48
|
+
db.execute <<-SQL
|
49
|
+
create table snps (
|
50
|
+
id numeric(11) PRIMARY KEY,
|
51
|
+
locus numeric(11),
|
52
|
+
annotation_id numeric(11)
|
53
|
+
);
|
54
|
+
SQL
|
55
|
+
|
56
|
+
puts "formatting samples table..."
|
57
|
+
db.execute <<-SQL
|
58
|
+
create table samples (
|
59
|
+
id numeric(11) PRIMARY KEY,
|
60
|
+
name varchar(128)
|
61
|
+
);
|
62
|
+
SQL
|
63
|
+
|
64
|
+
puts "formatting sample_snps join table..."
|
65
|
+
db.execute <<-SQL
|
66
|
+
create table samples_snps (
|
67
|
+
sample_id numeric(11),
|
68
|
+
snp_id numeric(11)
|
69
|
+
);
|
70
|
+
SQL
|
71
|
+
|
72
|
+
end
|
22
73
|
end
|
23
74
|
|
24
|
-
def format_metadata
|
75
|
+
def format_metadata(type)
|
25
76
|
host = ConfigData.get_connection
|
26
77
|
|
27
|
-
|
28
|
-
|
78
|
+
|
29
79
|
metadata_fields = XlsParser.load_meta_fields(ConfigData.get_metadata)
|
30
80
|
|
31
81
|
metadata_field_names = ""
|
@@ -35,7 +85,21 @@ module Builder
|
|
35
85
|
metadata_field_names << name
|
36
86
|
end
|
37
87
|
|
38
|
-
|
39
|
-
|
88
|
+
if type == 'pg'
|
89
|
+
conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])
|
90
|
+
|
91
|
+
puts "formatting sample metadata table..."
|
92
|
+
conn.exec("CREATE TABLE sample_metadata (id numeric (11) PRIMARY KEY#{metadata_field_names})")
|
93
|
+
|
94
|
+
elsif type == 'sqlite'
|
95
|
+
db = SQLite3::Database.new "sqlite_db/#{host[:dbname]}.db"
|
96
|
+
|
97
|
+
puts "formatting sample metadata table..."
|
98
|
+
db.execute <<-SQL
|
99
|
+
create table sample_metadata (
|
100
|
+
id numeric(11) PRIMARY KEY#{metadata_field_names}
|
101
|
+
);
|
102
|
+
SQL
|
103
|
+
end
|
40
104
|
end
|
41
105
|
end
|
data/lib/vardb/xls_parser.rb
CHANGED
@@ -24,7 +24,7 @@ module XlsParser
|
|
24
24
|
if s.cell(1, counter).nil?
|
25
25
|
metadata_fields << ", empty#{counter}"
|
26
26
|
else
|
27
|
-
metadata_fields << ", #{s.cell(1, counter).gsub(/\s+/, "").gsub("-","").gsub("(","").gsub(")","").gsub(".","").gsub("/","")}"
|
27
|
+
metadata_fields << ", #{s.cell(1, counter).to_s.gsub(/\s+/, "").gsub("-","").gsub("(","").gsub(")","").gsub(".","").gsub("/","")}"
|
28
28
|
end
|
29
29
|
counter += 1
|
30
30
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: vardb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter McCaffrey
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-02-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pg
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - ~>
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 1.13.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: sqlite3
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 1.3.7
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.3.7
|
41
55
|
description: This gem builds PostgreSQL databases from .matrix files and metadata
|
42
56
|
spreadsheets
|
43
57
|
email:
|
@@ -47,10 +61,10 @@ extensions: []
|
|
47
61
|
extra_rdoc_files: []
|
48
62
|
files:
|
49
63
|
- lib/vardb.rb
|
50
|
-
- lib/vardb/snp_db_build.rb
|
51
64
|
- lib/vardb/database_populator.rb
|
52
|
-
- lib/vardb/
|
65
|
+
- lib/vardb/snp_db_build.rb
|
53
66
|
- lib/vardb/snpscript_configdata.rb
|
67
|
+
- lib/vardb/xls_parser.rb
|
54
68
|
homepage: ''
|
55
69
|
licenses:
|
56
70
|
- ''
|
@@ -71,7 +85,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
71
85
|
version: '0'
|
72
86
|
requirements: []
|
73
87
|
rubyforge_project:
|
74
|
-
rubygems_version: 2.
|
88
|
+
rubygems_version: 2.2.2
|
75
89
|
signing_key:
|
76
90
|
specification_version: 4
|
77
91
|
summary: Variant database builder
|