vardb 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/vardb/database_populator.rb +111 -0
- data/lib/vardb/snp_db_build.rb +41 -0
- data/lib/vardb/snpscript_configdata.rb +42 -0
- data/lib/vardb/xls_parser.rb +34 -0
- data/lib/vardb.rb +29 -0
- metadata +78 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 73316c44cd437fe2eef6892b3e00097d28ea23ef
|
4
|
+
data.tar.gz: 28be909d305e54450226c784a0c990394477ef1e
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0a4a037984d78107eff470db89cf7d6007d188a85c33c65b046f6147345cd313ec668489b56fe42ada8d9203e7458037628d8769b1c285934f120a1e575f831c
|
7
|
+
data.tar.gz: a1520fb5d119c45368db4340e4d317369e1d3b0f5aa55eb0eddf2d919f013f304eb8d22bf00c3b8dcb36bc1013211801fa5a5d77fedf0c6dea3fa894fe467228
|
@@ -0,0 +1,111 @@
|
|
1
|
+
require_relative 'xls_parser'
|
2
|
+
require 'pg'
|
3
|
+
|
4
|
+
module Populator
|
5
|
+
include XlsParser
|
6
|
+
|
7
|
+
def populate_matrix
|
8
|
+
|
9
|
+
host = ConfigData.get_connection
|
10
|
+
|
11
|
+
conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])
|
12
|
+
|
13
|
+
#Matrix File Command Preparation
|
14
|
+
conn.prepare('load_snps', 'INSERT INTO snps (id, locus, annotation_id) values ($1, $2, $3)')
|
15
|
+
conn.prepare('load_annos', 'INSERT INTO annotations (id, cds, transcript, transcript_id, info, orientation, cds_locus, codon_pos, codon, peptide, amino_a, syn ) values ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)')
|
16
|
+
conn.prepare('load_samples_snps', 'INSERT INTO samples_snps (sample_id, snp_id) values ($1, $2)')
|
17
|
+
conn.prepare('load_samples', 'INSERT INTO samples (id, name) values ($1, $2)')
|
18
|
+
|
19
|
+
#Matrix File Load-ins
|
20
|
+
text=File.open(ConfigData.get_matrix).read
|
21
|
+
|
22
|
+
linenum = 1
|
23
|
+
sample_number = 1
|
24
|
+
|
25
|
+
snps = []
|
26
|
+
anno_tabs = []
|
27
|
+
anno_vals = []
|
28
|
+
|
29
|
+
text.each_line do |line|
|
30
|
+
(header, line_data) = line.split(' ', 2)
|
31
|
+
if (header == '#snp_pos')
|
32
|
+
puts "populating snps table..."
|
33
|
+
snps = line_data.split("\t")
|
34
|
+
snp_counter = 1
|
35
|
+
snps.each do |locus|
|
36
|
+
conn.exec_prepared('load_snps', [snp_counter, locus, snp_counter])
|
37
|
+
snp_counter += 1
|
38
|
+
end
|
39
|
+
elsif (header == '#annotation')
|
40
|
+
puts "populating annotations table..."
|
41
|
+
anno_tabs = line_data.split("\t")
|
42
|
+
anno_tabs.each { |tab| anno_vals << tab.split(',', 11) }
|
43
|
+
anno_counter = 1
|
44
|
+
anno_vals.each do |anno|
|
45
|
+
anno.insert(0, anno_counter)
|
46
|
+
if anno[1].match('intergenic')
|
47
|
+
conn.exec_prepared('load_annos', [ anno[0], 0, 0, 0, anno[1], 0, 0, 0, 0, 0, 0, 0 ])
|
48
|
+
else
|
49
|
+
conn.exec_prepared('load_annos', [ anno[0], anno[1], anno[2], anno[3], anno[4], anno[5], anno[6], anno[7], anno[8], anno[9], anno[10], anno[11] ])
|
50
|
+
end
|
51
|
+
anno_counter += 1
|
52
|
+
end
|
53
|
+
else
|
54
|
+
if sample_number == 1 then
|
55
|
+
puts "loading reference..."
|
56
|
+
else
|
57
|
+
puts "loading in sample #{sample_number - 1}..."
|
58
|
+
end
|
59
|
+
conn.exec_prepared('load_samples', [sample_number, header])
|
60
|
+
line_data.split("\t").each_with_index do |n, i|
|
61
|
+
if (n == '1')
|
62
|
+
conn.exec_prepared('load_samples_snps', [sample_number, i])
|
63
|
+
end
|
64
|
+
end
|
65
|
+
sample_number += 1
|
66
|
+
end
|
67
|
+
linenum += 1
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def populate_metadata
|
72
|
+
|
73
|
+
host = ConfigData.get_connection
|
74
|
+
|
75
|
+
conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])
|
76
|
+
|
77
|
+
#Excel Spreadsheet Command Preparaton
|
78
|
+
metadata_fields = XlsParser.load_meta_fields(ConfigData.get_metadata)
|
79
|
+
|
80
|
+
metadata_fields_string = "id "
|
81
|
+
|
82
|
+
metadata_fields.each do |item|
|
83
|
+
metadata_fields_string << item
|
84
|
+
end
|
85
|
+
|
86
|
+
metadata_values_string = "$1 "
|
87
|
+
|
88
|
+
metadata_fields.length.times do |i|
|
89
|
+
metadata_values_string << ", $#{i+2}"
|
90
|
+
end
|
91
|
+
|
92
|
+
conn.prepare('load_metadata', "INSERT INTO sample_metadata (#{metadata_fields_string}) values (#{metadata_values_string})")
|
93
|
+
|
94
|
+
#Excel Spreadsheet Load-ins
|
95
|
+
s = Roo::Excel.new(ConfigData.get_metadata)
|
96
|
+
s.default_sheet = s.sheets.first
|
97
|
+
|
98
|
+
row = 2
|
99
|
+
|
100
|
+
puts "populating sample metadata..."
|
101
|
+
|
102
|
+
until s.cell(row, 1).nil?
|
103
|
+
row_contents = ["#{row-1}"]
|
104
|
+
metadata_fields.length.times do |i|
|
105
|
+
row_contents << "#{s.cell(row, i)}"
|
106
|
+
end
|
107
|
+
conn.exec_prepared('load_metadata', row_contents)
|
108
|
+
row += 1
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require_relative 'xls_parser'
|
2
|
+
require 'pg'
|
3
|
+
|
4
|
+
module Builder
|
5
|
+
include XlsParser
|
6
|
+
def format_matrix
|
7
|
+
host = ConfigData.get_connection
|
8
|
+
|
9
|
+
conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])
|
10
|
+
|
11
|
+
puts "formatting annotations table..."
|
12
|
+
conn.exec("CREATE TABLE annotations (id numeric(11) PRIMARY KEY, cds varchar(128), transcript varchar(128), transcript_id varchar(128), info text, orientation varchar(128), cds_locus varchar(128), codon_pos varchar(128), codon varchar(128), peptide varchar(128), amino_a varchar(128), syn varchar(128))")
|
13
|
+
|
14
|
+
puts "formatting snps table..."
|
15
|
+
conn.exec("CREATE TABLE snps (id numeric(11) PRIMARY KEY, locus numeric(11), annotation_id numeric(11))")
|
16
|
+
|
17
|
+
puts "formatting samples table..."
|
18
|
+
conn.exec("CREATE TABLE samples (id numeric(11) PRIMARY KEY, name varchar(128))")
|
19
|
+
|
20
|
+
puts "formatting samples_snps join table..."
|
21
|
+
conn.exec("CREATE TABLE samples_snps (sample_id numeric(11), snp_id numeric(11))")
|
22
|
+
end
|
23
|
+
|
24
|
+
def format_metadata
|
25
|
+
host = ConfigData.get_connection
|
26
|
+
|
27
|
+
conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])
|
28
|
+
|
29
|
+
metadata_fields = XlsParser.load_meta_fields(ConfigData.get_metadata)
|
30
|
+
|
31
|
+
metadata_field_names = ""
|
32
|
+
|
33
|
+
metadata_fields.each do |name|
|
34
|
+
name << " varchar(128)"
|
35
|
+
metadata_field_names << name
|
36
|
+
end
|
37
|
+
|
38
|
+
puts "formatting sample metadata table..."
|
39
|
+
conn.exec("CREATE TABLE sample_metadata (id numeric (11) PRIMARY KEY#{metadata_field_names})")
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module ConfigData
|
2
|
+
|
3
|
+
@@host = {}
|
4
|
+
@@metadata_file = ''
|
5
|
+
@@matrix_file = ''
|
6
|
+
|
7
|
+
#host connection
|
8
|
+
def set_connection(connection_hash)
|
9
|
+
@@host = {
|
10
|
+
:host => "#{connection_hash[:host]}",
|
11
|
+
:port => "#{connection_hash[:port]}",
|
12
|
+
:dbname => "#{connection_hash[:dbname]}",
|
13
|
+
:user => "#{connection_hash[:user]}",
|
14
|
+
:password => "#{connection_hash[:password]}",
|
15
|
+
}
|
16
|
+
puts "connection details: #{@@host}"
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.get_connection
|
20
|
+
@@host
|
21
|
+
end
|
22
|
+
|
23
|
+
#metadata file
|
24
|
+
def set_metadata(file)
|
25
|
+
@@metadata_file = file
|
26
|
+
puts "metadata file set to: #{@@metadata_file}"
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.get_metadata
|
30
|
+
@@metadata_file
|
31
|
+
end
|
32
|
+
|
33
|
+
#matrix file
|
34
|
+
def set_matrix(file)
|
35
|
+
@@matrix_file = file
|
36
|
+
puts "matrix file set to: #{@@matrix_file}"
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.get_matrix
|
40
|
+
@@matrix_file
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'roo'
|
2
|
+
|
3
|
+
module XlsParser
|
4
|
+
## A few rules about Excel files:
|
5
|
+
## 1. .xls only, this can't accept .xlsx
|
6
|
+
## 2. There should be no empty cells in the header row of a sheet (first row)
|
7
|
+
## 3. For good form, there shouldn't be any subsequent rows that are
|
8
|
+
## longer than the header row
|
9
|
+
## 4. There should not be any duplicate cell names in the header row
|
10
|
+
def XlsParser.load_meta_fields(file)
|
11
|
+
s = Roo::Excel.new(file)
|
12
|
+
s.default_sheet = s.sheets.first
|
13
|
+
|
14
|
+
columns = 1
|
15
|
+
until s.cell(1, columns).nil?
|
16
|
+
columns += 1
|
17
|
+
end
|
18
|
+
|
19
|
+
counter = 1
|
20
|
+
|
21
|
+
metadata_fields = []
|
22
|
+
|
23
|
+
columns.times do |counter|
|
24
|
+
if s.cell(1, counter).nil?
|
25
|
+
metadata_fields << ", empty#{counter}"
|
26
|
+
else
|
27
|
+
metadata_fields << ", #{s.cell(1, counter).gsub(/\s+/, "").gsub("-","").gsub("(","").gsub(")","").gsub(".","").gsub("/","")}"
|
28
|
+
end
|
29
|
+
counter += 1
|
30
|
+
end
|
31
|
+
|
32
|
+
return metadata_fields
|
33
|
+
end
|
34
|
+
end
|
data/lib/vardb.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'vardb/snp_db_build'
|
2
|
+
require 'vardb/database_populator'
|
3
|
+
require 'vardb/snpscript_configdata'
|
4
|
+
|
5
|
+
class Vardb
|
6
|
+
include Builder
|
7
|
+
include Populator
|
8
|
+
include ConfigData
|
9
|
+
|
10
|
+
#def self.set_connection(connection_hash)
|
11
|
+
#ConfigData.set_connection(connection_hash)
|
12
|
+
#end
|
13
|
+
|
14
|
+
#def self.set_metadata(file)
|
15
|
+
#ConfigData.set_metadata
|
16
|
+
#end
|
17
|
+
|
18
|
+
#def self.set_matrix(file)
|
19
|
+
#ConfigData.set_matrix
|
20
|
+
#end
|
21
|
+
|
22
|
+
#def self.format
|
23
|
+
#Builder.format_database
|
24
|
+
#end
|
25
|
+
|
26
|
+
#def self.populate
|
27
|
+
#Populator.populate_database
|
28
|
+
#end
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: vardb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Peter McCaffrey
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-12-09 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: pg
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.17.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.17.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: roo
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.13.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.13.0
|
41
|
+
description: This gem builds PostgreSQL databases from .matrix files and metadata
|
42
|
+
spreadsheets
|
43
|
+
email:
|
44
|
+
- peter@accetia.com
|
45
|
+
executables: []
|
46
|
+
extensions: []
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- lib/vardb.rb
|
50
|
+
- lib/vardb/snp_db_build.rb
|
51
|
+
- lib/vardb/database_populator.rb
|
52
|
+
- lib/vardb/xls_parser.rb
|
53
|
+
- lib/vardb/snpscript_configdata.rb
|
54
|
+
homepage: ''
|
55
|
+
licenses:
|
56
|
+
- ''
|
57
|
+
metadata: {}
|
58
|
+
post_install_message:
|
59
|
+
rdoc_options: []
|
60
|
+
require_paths:
|
61
|
+
- lib
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - '>='
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - '>='
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '0'
|
72
|
+
requirements: []
|
73
|
+
rubyforge_project:
|
74
|
+
rubygems_version: 2.0.3
|
75
|
+
signing_key:
|
76
|
+
specification_version: 4
|
77
|
+
summary: Variant database builder
|
78
|
+
test_files: []
|