vardb 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 73316c44cd437fe2eef6892b3e00097d28ea23ef
4
+ data.tar.gz: 28be909d305e54450226c784a0c990394477ef1e
5
+ SHA512:
6
+ metadata.gz: 0a4a037984d78107eff470db89cf7d6007d188a85c33c65b046f6147345cd313ec668489b56fe42ada8d9203e7458037628d8769b1c285934f120a1e575f831c
7
+ data.tar.gz: a1520fb5d119c45368db4340e4d317369e1d3b0f5aa55eb0eddf2d919f013f304eb8d22bf00c3b8dcb36bc1013211801fa5a5d77fedf0c6dea3fa894fe467228
@@ -0,0 +1,111 @@
1
+ require_relative 'xls_parser'
2
+ require 'pg'
3
+
4
+ module Populator
5
+ include XlsParser
6
+
7
+ def populate_matrix
8
+
9
+ host = ConfigData.get_connection
10
+
11
+ conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])
12
+
13
+ #Matrix File Command Preparation
14
+ conn.prepare('load_snps', 'INSERT INTO snps (id, locus, annotation_id) values ($1, $2, $3)')
15
+ conn.prepare('load_annos', 'INSERT INTO annotations (id, cds, transcript, transcript_id, info, orientation, cds_locus, codon_pos, codon, peptide, amino_a, syn ) values ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)')
16
+ conn.prepare('load_samples_snps', 'INSERT INTO samples_snps (sample_id, snp_id) values ($1, $2)')
17
+ conn.prepare('load_samples', 'INSERT INTO samples (id, name) values ($1, $2)')
18
+
19
+ #Matrix File Load-ins
20
+ text=File.open(ConfigData.get_matrix).read
21
+
22
+ linenum = 1
23
+ sample_number = 1
24
+
25
+ snps = []
26
+ anno_tabs = []
27
+ anno_vals = []
28
+
29
+ text.each_line do |line|
30
+ (header, line_data) = line.split(' ', 2)
31
+ if (header == '#snp_pos')
32
+ puts "populating snps table..."
33
+ snps = line_data.split("\t")
34
+ snp_counter = 1
35
+ snps.each do |locus|
36
+ conn.exec_prepared('load_snps', [snp_counter, locus, snp_counter])
37
+ snp_counter += 1
38
+ end
39
+ elsif (header == '#annotation')
40
+ puts "populating annotations table..."
41
+ anno_tabs = line_data.split("\t")
42
+ anno_tabs.each { |tab| anno_vals << tab.split(',', 11) }
43
+ anno_counter = 1
44
+ anno_vals.each do |anno|
45
+ anno.insert(0, anno_counter)
46
+ if anno[1].match('intergenic')
47
+ conn.exec_prepared('load_annos', [ anno[0], 0, 0, 0, anno[1], 0, 0, 0, 0, 0, 0, 0 ])
48
+ else
49
+ conn.exec_prepared('load_annos', [ anno[0], anno[1], anno[2], anno[3], anno[4], anno[5], anno[6], anno[7], anno[8], anno[9], anno[10], anno[11] ])
50
+ end
51
+ anno_counter += 1
52
+ end
53
+ else
54
+ if sample_number == 1 then
55
+ puts "loading reference..."
56
+ else
57
+ puts "loading in sample #{sample_number - 1}..."
58
+ end
59
+ conn.exec_prepared('load_samples', [sample_number, header])
60
+ line_data.split("\t").each_with_index do |n, i|
61
+ if (n == '1')
62
+ conn.exec_prepared('load_samples_snps', [sample_number, i])
63
+ end
64
+ end
65
+ sample_number += 1
66
+ end
67
+ linenum += 1
68
+ end
69
+ end
70
+
71
+ def populate_metadata
72
+
73
+ host = ConfigData.get_connection
74
+
75
+ conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])
76
+
77
+ #Excel Spreadsheet Command Preparaton
78
+ metadata_fields = XlsParser.load_meta_fields(ConfigData.get_metadata)
79
+
80
+ metadata_fields_string = "id "
81
+
82
+ metadata_fields.each do |item|
83
+ metadata_fields_string << item
84
+ end
85
+
86
+ metadata_values_string = "$1 "
87
+
88
+ metadata_fields.length.times do |i|
89
+ metadata_values_string << ", $#{i+2}"
90
+ end
91
+
92
+ conn.prepare('load_metadata', "INSERT INTO sample_metadata (#{metadata_fields_string}) values (#{metadata_values_string})")
93
+
94
+ #Excel Spreadsheet Load-ins
95
+ s = Roo::Excel.new(ConfigData.get_metadata)
96
+ s.default_sheet = s.sheets.first
97
+
98
+ row = 2
99
+
100
+ puts "populating sample metadata..."
101
+
102
+ until s.cell(row, 1).nil?
103
+ row_contents = ["#{row-1}"]
104
+ metadata_fields.length.times do |i|
105
+ row_contents << "#{s.cell(row, i)}"
106
+ end
107
+ conn.exec_prepared('load_metadata', row_contents)
108
+ row += 1
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,41 @@
1
+ require_relative 'xls_parser'
2
+ require 'pg'
3
+
4
+ module Builder
5
+ include XlsParser
6
+ def format_matrix
7
+ host = ConfigData.get_connection
8
+
9
+ conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])
10
+
11
+ puts "formatting annotations table..."
12
+ conn.exec("CREATE TABLE annotations (id numeric(11) PRIMARY KEY, cds varchar(128), transcript varchar(128), transcript_id varchar(128), info text, orientation varchar(128), cds_locus varchar(128), codon_pos varchar(128), codon varchar(128), peptide varchar(128), amino_a varchar(128), syn varchar(128))")
13
+
14
+ puts "formatting snps table..."
15
+ conn.exec("CREATE TABLE snps (id numeric(11) PRIMARY KEY, locus numeric(11), annotation_id numeric(11))")
16
+
17
+ puts "formatting samples table..."
18
+ conn.exec("CREATE TABLE samples (id numeric(11) PRIMARY KEY, name varchar(128))")
19
+
20
+ puts "formatting samples_snps join table..."
21
+ conn.exec("CREATE TABLE samples_snps (sample_id numeric(11), snp_id numeric(11))")
22
+ end
23
+
24
+ def format_metadata
25
+ host = ConfigData.get_connection
26
+
27
+ conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])
28
+
29
+ metadata_fields = XlsParser.load_meta_fields(ConfigData.get_metadata)
30
+
31
+ metadata_field_names = ""
32
+
33
+ metadata_fields.each do |name|
34
+ name << " varchar(128)"
35
+ metadata_field_names << name
36
+ end
37
+
38
+ puts "formatting sample metadata table..."
39
+ conn.exec("CREATE TABLE sample_metadata (id numeric (11) PRIMARY KEY#{metadata_field_names})")
40
+ end
41
+ end
@@ -0,0 +1,42 @@
1
+ module ConfigData
2
+
3
+ @@host = {}
4
+ @@metadata_file = ''
5
+ @@matrix_file = ''
6
+
7
+ #host connection
8
+ def set_connection(connection_hash)
9
+ @@host = {
10
+ :host => "#{connection_hash[:host]}",
11
+ :port => "#{connection_hash[:port]}",
12
+ :dbname => "#{connection_hash[:dbname]}",
13
+ :user => "#{connection_hash[:user]}",
14
+ :password => "#{connection_hash[:password]}",
15
+ }
16
+ puts "connection details: #{@@host}"
17
+ end
18
+
19
+ def self.get_connection
20
+ @@host
21
+ end
22
+
23
+ #metadata file
24
+ def set_metadata(file)
25
+ @@metadata_file = file
26
+ puts "metadata file set to: #{@@metadata_file}"
27
+ end
28
+
29
+ def self.get_metadata
30
+ @@metadata_file
31
+ end
32
+
33
+ #matrix file
34
+ def set_matrix(file)
35
+ @@matrix_file = file
36
+ puts "matrix file set to: #{@@matrix_file}"
37
+ end
38
+
39
+ def self.get_matrix
40
+ @@matrix_file
41
+ end
42
+ end
@@ -0,0 +1,34 @@
1
+ require 'roo'
2
+
3
+ module XlsParser
4
+ ## A few rules about Excel files:
5
+ ## 1. .xls only, this can't accept .xlsx
6
+ ## 2. There should be no empty cells in the header row of a sheet (first row)
7
+ ## 3. For good form, there shouldn't be any subsequent rows that are
8
+ ## longer than the header row
9
+ ## 4. There should not be any duplicate cell names in the header row
10
+ def XlsParser.load_meta_fields(file)
11
+ s = Roo::Excel.new(file)
12
+ s.default_sheet = s.sheets.first
13
+
14
+ columns = 1
15
+ until s.cell(1, columns).nil?
16
+ columns += 1
17
+ end
18
+
19
+ counter = 1
20
+
21
+ metadata_fields = []
22
+
23
+ columns.times do |counter|
24
+ if s.cell(1, counter).nil?
25
+ metadata_fields << ", empty#{counter}"
26
+ else
27
+ metadata_fields << ", #{s.cell(1, counter).gsub(/\s+/, "").gsub("-","").gsub("(","").gsub(")","").gsub(".","").gsub("/","")}"
28
+ end
29
+ counter += 1
30
+ end
31
+
32
+ return metadata_fields
33
+ end
34
+ end
data/lib/vardb.rb ADDED
@@ -0,0 +1,29 @@
1
+ require 'vardb/snp_db_build'
2
+ require 'vardb/database_populator'
3
+ require 'vardb/snpscript_configdata'
4
+
5
+ class Vardb
6
+ include Builder
7
+ include Populator
8
+ include ConfigData
9
+
10
+ #def self.set_connection(connection_hash)
11
+ #ConfigData.set_connection(connection_hash)
12
+ #end
13
+
14
+ #def self.set_metadata(file)
15
+ #ConfigData.set_metadata
16
+ #end
17
+
18
+ #def self.set_matrix(file)
19
+ #ConfigData.set_matrix
20
+ #end
21
+
22
+ #def self.format
23
+ #Builder.format_database
24
+ #end
25
+
26
+ #def self.populate
27
+ #Populator.populate_database
28
+ #end
29
+ end
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: vardb
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Peter McCaffrey
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-12-09 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: pg
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 0.17.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 0.17.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: roo
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 1.13.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: 1.13.0
41
+ description: This gem builds PostgreSQL databases from .matrix files and metadata
42
+ spreadsheets
43
+ email:
44
+ - peter@accetia.com
45
+ executables: []
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - lib/vardb.rb
50
+ - lib/vardb/snp_db_build.rb
51
+ - lib/vardb/database_populator.rb
52
+ - lib/vardb/xls_parser.rb
53
+ - lib/vardb/snpscript_configdata.rb
54
+ homepage: ''
55
+ licenses:
56
+ - ''
57
+ metadata: {}
58
+ post_install_message:
59
+ rdoc_options: []
60
+ require_paths:
61
+ - lib
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - '>='
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ requirements: []
73
+ rubyforge_project:
74
+ rubygems_version: 2.0.3
75
+ signing_key:
76
+ specification_version: 4
77
+ summary: Variant database builder
78
+ test_files: []