vardb 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/vardb/database_populator.rb +111 -0
- data/lib/vardb/snp_db_build.rb +41 -0
- data/lib/vardb/snpscript_configdata.rb +42 -0
- data/lib/vardb/xls_parser.rb +34 -0
- data/lib/vardb.rb +29 -0
- metadata +78 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 73316c44cd437fe2eef6892b3e00097d28ea23ef
|
4
|
+
data.tar.gz: 28be909d305e54450226c784a0c990394477ef1e
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0a4a037984d78107eff470db89cf7d6007d188a85c33c65b046f6147345cd313ec668489b56fe42ada8d9203e7458037628d8769b1c285934f120a1e575f831c
|
7
|
+
data.tar.gz: a1520fb5d119c45368db4340e4d317369e1d3b0f5aa55eb0eddf2d919f013f304eb8d22bf00c3b8dcb36bc1013211801fa5a5d77fedf0c6dea3fa894fe467228
|
@@ -0,0 +1,111 @@
|
|
1
|
+
require_relative 'xls_parser'
|
2
|
+
require 'pg'
|
3
|
+
|
4
|
+
module Populator
|
5
|
+
include XlsParser
|
6
|
+
|
7
|
+
def populate_matrix
|
8
|
+
|
9
|
+
host = ConfigData.get_connection
|
10
|
+
|
11
|
+
conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])
|
12
|
+
|
13
|
+
#Matrix File Command Preparation
|
14
|
+
conn.prepare('load_snps', 'INSERT INTO snps (id, locus, annotation_id) values ($1, $2, $3)')
|
15
|
+
conn.prepare('load_annos', 'INSERT INTO annotations (id, cds, transcript, transcript_id, info, orientation, cds_locus, codon_pos, codon, peptide, amino_a, syn ) values ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)')
|
16
|
+
conn.prepare('load_samples_snps', 'INSERT INTO samples_snps (sample_id, snp_id) values ($1, $2)')
|
17
|
+
conn.prepare('load_samples', 'INSERT INTO samples (id, name) values ($1, $2)')
|
18
|
+
|
19
|
+
#Matrix File Load-ins
|
20
|
+
text=File.open(ConfigData.get_matrix).read
|
21
|
+
|
22
|
+
linenum = 1
|
23
|
+
sample_number = 1
|
24
|
+
|
25
|
+
snps = []
|
26
|
+
anno_tabs = []
|
27
|
+
anno_vals = []
|
28
|
+
|
29
|
+
text.each_line do |line|
|
30
|
+
(header, line_data) = line.split(' ', 2)
|
31
|
+
if (header == '#snp_pos')
|
32
|
+
puts "populating snps table..."
|
33
|
+
snps = line_data.split("\t")
|
34
|
+
snp_counter = 1
|
35
|
+
snps.each do |locus|
|
36
|
+
conn.exec_prepared('load_snps', [snp_counter, locus, snp_counter])
|
37
|
+
snp_counter += 1
|
38
|
+
end
|
39
|
+
elsif (header == '#annotation')
|
40
|
+
puts "populating annotations table..."
|
41
|
+
anno_tabs = line_data.split("\t")
|
42
|
+
anno_tabs.each { |tab| anno_vals << tab.split(',', 11) }
|
43
|
+
anno_counter = 1
|
44
|
+
anno_vals.each do |anno|
|
45
|
+
anno.insert(0, anno_counter)
|
46
|
+
if anno[1].match('intergenic')
|
47
|
+
conn.exec_prepared('load_annos', [ anno[0], 0, 0, 0, anno[1], 0, 0, 0, 0, 0, 0, 0 ])
|
48
|
+
else
|
49
|
+
conn.exec_prepared('load_annos', [ anno[0], anno[1], anno[2], anno[3], anno[4], anno[5], anno[6], anno[7], anno[8], anno[9], anno[10], anno[11] ])
|
50
|
+
end
|
51
|
+
anno_counter += 1
|
52
|
+
end
|
53
|
+
else
|
54
|
+
if sample_number == 1 then
|
55
|
+
puts "loading reference..."
|
56
|
+
else
|
57
|
+
puts "loading in sample #{sample_number - 1}..."
|
58
|
+
end
|
59
|
+
conn.exec_prepared('load_samples', [sample_number, header])
|
60
|
+
line_data.split("\t").each_with_index do |n, i|
|
61
|
+
if (n == '1')
|
62
|
+
conn.exec_prepared('load_samples_snps', [sample_number, i])
|
63
|
+
end
|
64
|
+
end
|
65
|
+
sample_number += 1
|
66
|
+
end
|
67
|
+
linenum += 1
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def populate_metadata
|
72
|
+
|
73
|
+
host = ConfigData.get_connection
|
74
|
+
|
75
|
+
conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])
|
76
|
+
|
77
|
+
#Excel Spreadsheet Command Preparaton
|
78
|
+
metadata_fields = XlsParser.load_meta_fields(ConfigData.get_metadata)
|
79
|
+
|
80
|
+
metadata_fields_string = "id "
|
81
|
+
|
82
|
+
metadata_fields.each do |item|
|
83
|
+
metadata_fields_string << item
|
84
|
+
end
|
85
|
+
|
86
|
+
metadata_values_string = "$1 "
|
87
|
+
|
88
|
+
metadata_fields.length.times do |i|
|
89
|
+
metadata_values_string << ", $#{i+2}"
|
90
|
+
end
|
91
|
+
|
92
|
+
conn.prepare('load_metadata', "INSERT INTO sample_metadata (#{metadata_fields_string}) values (#{metadata_values_string})")
|
93
|
+
|
94
|
+
#Excel Spreadsheet Load-ins
|
95
|
+
s = Roo::Excel.new(ConfigData.get_metadata)
|
96
|
+
s.default_sheet = s.sheets.first
|
97
|
+
|
98
|
+
row = 2
|
99
|
+
|
100
|
+
puts "populating sample metadata..."
|
101
|
+
|
102
|
+
until s.cell(row, 1).nil?
|
103
|
+
row_contents = ["#{row-1}"]
|
104
|
+
metadata_fields.length.times do |i|
|
105
|
+
row_contents << "#{s.cell(row, i)}"
|
106
|
+
end
|
107
|
+
conn.exec_prepared('load_metadata', row_contents)
|
108
|
+
row += 1
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require_relative 'xls_parser'
|
2
|
+
require 'pg'
|
3
|
+
|
4
|
+
module Builder
|
5
|
+
include XlsParser
|
6
|
+
def format_matrix
|
7
|
+
host = ConfigData.get_connection
|
8
|
+
|
9
|
+
conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])
|
10
|
+
|
11
|
+
puts "formatting annotations table..."
|
12
|
+
conn.exec("CREATE TABLE annotations (id numeric(11) PRIMARY KEY, cds varchar(128), transcript varchar(128), transcript_id varchar(128), info text, orientation varchar(128), cds_locus varchar(128), codon_pos varchar(128), codon varchar(128), peptide varchar(128), amino_a varchar(128), syn varchar(128))")
|
13
|
+
|
14
|
+
puts "formatting snps table..."
|
15
|
+
conn.exec("CREATE TABLE snps (id numeric(11) PRIMARY KEY, locus numeric(11), annotation_id numeric(11))")
|
16
|
+
|
17
|
+
puts "formatting samples table..."
|
18
|
+
conn.exec("CREATE TABLE samples (id numeric(11) PRIMARY KEY, name varchar(128))")
|
19
|
+
|
20
|
+
puts "formatting samples_snps join table..."
|
21
|
+
conn.exec("CREATE TABLE samples_snps (sample_id numeric(11), snp_id numeric(11))")
|
22
|
+
end
|
23
|
+
|
24
|
+
def format_metadata
|
25
|
+
host = ConfigData.get_connection
|
26
|
+
|
27
|
+
conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])
|
28
|
+
|
29
|
+
metadata_fields = XlsParser.load_meta_fields(ConfigData.get_metadata)
|
30
|
+
|
31
|
+
metadata_field_names = ""
|
32
|
+
|
33
|
+
metadata_fields.each do |name|
|
34
|
+
name << " varchar(128)"
|
35
|
+
metadata_field_names << name
|
36
|
+
end
|
37
|
+
|
38
|
+
puts "formatting sample metadata table..."
|
39
|
+
conn.exec("CREATE TABLE sample_metadata (id numeric (11) PRIMARY KEY#{metadata_field_names})")
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module ConfigData
|
2
|
+
|
3
|
+
@@host = {}
|
4
|
+
@@metadata_file = ''
|
5
|
+
@@matrix_file = ''
|
6
|
+
|
7
|
+
#host connection
|
8
|
+
def set_connection(connection_hash)
|
9
|
+
@@host = {
|
10
|
+
:host => "#{connection_hash[:host]}",
|
11
|
+
:port => "#{connection_hash[:port]}",
|
12
|
+
:dbname => "#{connection_hash[:dbname]}",
|
13
|
+
:user => "#{connection_hash[:user]}",
|
14
|
+
:password => "#{connection_hash[:password]}",
|
15
|
+
}
|
16
|
+
puts "connection details: #{@@host}"
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.get_connection
|
20
|
+
@@host
|
21
|
+
end
|
22
|
+
|
23
|
+
#metadata file
|
24
|
+
def set_metadata(file)
|
25
|
+
@@metadata_file = file
|
26
|
+
puts "metadata file set to: #{@@metadata_file}"
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.get_metadata
|
30
|
+
@@metadata_file
|
31
|
+
end
|
32
|
+
|
33
|
+
#matrix file
|
34
|
+
def set_matrix(file)
|
35
|
+
@@matrix_file = file
|
36
|
+
puts "matrix file set to: #{@@matrix_file}"
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.get_matrix
|
40
|
+
@@matrix_file
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'roo'
|
2
|
+
|
3
|
+
module XlsParser
|
4
|
+
## A few rules about Excel files:
|
5
|
+
## 1. .xls only, this can't accept .xlsx
|
6
|
+
## 2. There should be no empty cells in the header row of a sheet (first row)
|
7
|
+
## 3. For good form, there shouldn't be any subsequent rows that are
|
8
|
+
## longer than the header row
|
9
|
+
## 4. There should not be any duplicate cell names in the header row
|
10
|
+
def XlsParser.load_meta_fields(file)
|
11
|
+
s = Roo::Excel.new(file)
|
12
|
+
s.default_sheet = s.sheets.first
|
13
|
+
|
14
|
+
columns = 1
|
15
|
+
until s.cell(1, columns).nil?
|
16
|
+
columns += 1
|
17
|
+
end
|
18
|
+
|
19
|
+
counter = 1
|
20
|
+
|
21
|
+
metadata_fields = []
|
22
|
+
|
23
|
+
columns.times do |counter|
|
24
|
+
if s.cell(1, counter).nil?
|
25
|
+
metadata_fields << ", empty#{counter}"
|
26
|
+
else
|
27
|
+
metadata_fields << ", #{s.cell(1, counter).gsub(/\s+/, "").gsub("-","").gsub("(","").gsub(")","").gsub(".","").gsub("/","")}"
|
28
|
+
end
|
29
|
+
counter += 1
|
30
|
+
end
|
31
|
+
|
32
|
+
return metadata_fields
|
33
|
+
end
|
34
|
+
end
|
data/lib/vardb.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'vardb/snp_db_build'
|
2
|
+
require 'vardb/database_populator'
|
3
|
+
require 'vardb/snpscript_configdata'
|
4
|
+
|
5
|
+
class Vardb
|
6
|
+
include Builder
|
7
|
+
include Populator
|
8
|
+
include ConfigData
|
9
|
+
|
10
|
+
#def self.set_connection(connection_hash)
|
11
|
+
#ConfigData.set_connection(connection_hash)
|
12
|
+
#end
|
13
|
+
|
14
|
+
#def self.set_metadata(file)
|
15
|
+
#ConfigData.set_metadata
|
16
|
+
#end
|
17
|
+
|
18
|
+
#def self.set_matrix(file)
|
19
|
+
#ConfigData.set_matrix
|
20
|
+
#end
|
21
|
+
|
22
|
+
#def self.format
|
23
|
+
#Builder.format_database
|
24
|
+
#end
|
25
|
+
|
26
|
+
#def self.populate
|
27
|
+
#Populator.populate_database
|
28
|
+
#end
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: vardb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Peter McCaffrey
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-12-09 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: pg
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.17.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.17.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: roo
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.13.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.13.0
|
41
|
+
description: This gem builds PostgreSQL databases from .matrix files and metadata
|
42
|
+
spreadsheets
|
43
|
+
email:
|
44
|
+
- peter@accetia.com
|
45
|
+
executables: []
|
46
|
+
extensions: []
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- lib/vardb.rb
|
50
|
+
- lib/vardb/snp_db_build.rb
|
51
|
+
- lib/vardb/database_populator.rb
|
52
|
+
- lib/vardb/xls_parser.rb
|
53
|
+
- lib/vardb/snpscript_configdata.rb
|
54
|
+
homepage: ''
|
55
|
+
licenses:
|
56
|
+
- ''
|
57
|
+
metadata: {}
|
58
|
+
post_install_message:
|
59
|
+
rdoc_options: []
|
60
|
+
require_paths:
|
61
|
+
- lib
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - '>='
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - '>='
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '0'
|
72
|
+
requirements: []
|
73
|
+
rubyforge_project:
|
74
|
+
rubygems_version: 2.0.3
|
75
|
+
signing_key:
|
76
|
+
specification_version: 4
|
77
|
+
summary: Variant database builder
|
78
|
+
test_files: []
|