vardb 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 73316c44cd437fe2eef6892b3e00097d28ea23ef
4
+ data.tar.gz: 28be909d305e54450226c784a0c990394477ef1e
5
+ SHA512:
6
+ metadata.gz: 0a4a037984d78107eff470db89cf7d6007d188a85c33c65b046f6147345cd313ec668489b56fe42ada8d9203e7458037628d8769b1c285934f120a1e575f831c
7
+ data.tar.gz: a1520fb5d119c45368db4340e4d317369e1d3b0f5aa55eb0eddf2d919f013f304eb8d22bf00c3b8dcb36bc1013211801fa5a5d77fedf0c6dea3fa894fe467228
@@ -0,0 +1,111 @@
1
+ require_relative 'xls_parser'
2
+ require 'pg'
3
+
4
+ module Populator
5
+ include XlsParser
6
+
7
+ def populate_matrix
8
+
9
+ host = ConfigData.get_connection
10
+
11
+ conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])
12
+
13
+ #Matrix File Command Preparation
14
+ conn.prepare('load_snps', 'INSERT INTO snps (id, locus, annotation_id) values ($1, $2, $3)')
15
+ conn.prepare('load_annos', 'INSERT INTO annotations (id, cds, transcript, transcript_id, info, orientation, cds_locus, codon_pos, codon, peptide, amino_a, syn ) values ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)')
16
+ conn.prepare('load_samples_snps', 'INSERT INTO samples_snps (sample_id, snp_id) values ($1, $2)')
17
+ conn.prepare('load_samples', 'INSERT INTO samples (id, name) values ($1, $2)')
18
+
19
+ #Matrix File Load-ins
20
+ text=File.open(ConfigData.get_matrix).read
21
+
22
+ linenum = 1
23
+ sample_number = 1
24
+
25
+ snps = []
26
+ anno_tabs = []
27
+ anno_vals = []
28
+
29
+ text.each_line do |line|
30
+ (header, line_data) = line.split(' ', 2)
31
+ if (header == '#snp_pos')
32
+ puts "populating snps table..."
33
+ snps = line_data.split("\t")
34
+ snp_counter = 1
35
+ snps.each do |locus|
36
+ conn.exec_prepared('load_snps', [snp_counter, locus, snp_counter])
37
+ snp_counter += 1
38
+ end
39
+ elsif (header == '#annotation')
40
+ puts "populating annotations table..."
41
+ anno_tabs = line_data.split("\t")
42
+ anno_tabs.each { |tab| anno_vals << tab.split(',', 11) }
43
+ anno_counter = 1
44
+ anno_vals.each do |anno|
45
+ anno.insert(0, anno_counter)
46
+ if anno[1].match('intergenic')
47
+ conn.exec_prepared('load_annos', [ anno[0], 0, 0, 0, anno[1], 0, 0, 0, 0, 0, 0, 0 ])
48
+ else
49
+ conn.exec_prepared('load_annos', [ anno[0], anno[1], anno[2], anno[3], anno[4], anno[5], anno[6], anno[7], anno[8], anno[9], anno[10], anno[11] ])
50
+ end
51
+ anno_counter += 1
52
+ end
53
+ else
54
+ if sample_number == 1 then
55
+ puts "loading reference..."
56
+ else
57
+ puts "loading in sample #{sample_number - 1}..."
58
+ end
59
+ conn.exec_prepared('load_samples', [sample_number, header])
60
+ line_data.split("\t").each_with_index do |n, i|
61
+ if (n == '1')
62
+ conn.exec_prepared('load_samples_snps', [sample_number, i])
63
+ end
64
+ end
65
+ sample_number += 1
66
+ end
67
+ linenum += 1
68
+ end
69
+ end
70
+
71
+ def populate_metadata
72
+
73
+ host = ConfigData.get_connection
74
+
75
+ conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])
76
+
77
+ #Excel Spreadsheet Command Preparaton
78
+ metadata_fields = XlsParser.load_meta_fields(ConfigData.get_metadata)
79
+
80
+ metadata_fields_string = "id "
81
+
82
+ metadata_fields.each do |item|
83
+ metadata_fields_string << item
84
+ end
85
+
86
+ metadata_values_string = "$1 "
87
+
88
+ metadata_fields.length.times do |i|
89
+ metadata_values_string << ", $#{i+2}"
90
+ end
91
+
92
+ conn.prepare('load_metadata', "INSERT INTO sample_metadata (#{metadata_fields_string}) values (#{metadata_values_string})")
93
+
94
+ #Excel Spreadsheet Load-ins
95
+ s = Roo::Excel.new(ConfigData.get_metadata)
96
+ s.default_sheet = s.sheets.first
97
+
98
+ row = 2
99
+
100
+ puts "populating sample metadata..."
101
+
102
+ until s.cell(row, 1).nil?
103
+ row_contents = ["#{row-1}"]
104
+ metadata_fields.length.times do |i|
105
+ row_contents << "#{s.cell(row, i)}"
106
+ end
107
+ conn.exec_prepared('load_metadata', row_contents)
108
+ row += 1
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,41 @@
1
+ require_relative 'xls_parser'
2
+ require 'pg'
3
+
4
+ module Builder
5
+ include XlsParser
6
+ def format_matrix
7
+ host = ConfigData.get_connection
8
+
9
+ conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])
10
+
11
+ puts "formatting annotations table..."
12
+ conn.exec("CREATE TABLE annotations (id numeric(11) PRIMARY KEY, cds varchar(128), transcript varchar(128), transcript_id varchar(128), info text, orientation varchar(128), cds_locus varchar(128), codon_pos varchar(128), codon varchar(128), peptide varchar(128), amino_a varchar(128), syn varchar(128))")
13
+
14
+ puts "formatting snps table..."
15
+ conn.exec("CREATE TABLE snps (id numeric(11) PRIMARY KEY, locus numeric(11), annotation_id numeric(11))")
16
+
17
+ puts "formatting samples table..."
18
+ conn.exec("CREATE TABLE samples (id numeric(11) PRIMARY KEY, name varchar(128))")
19
+
20
+ puts "formatting samples_snps join table..."
21
+ conn.exec("CREATE TABLE samples_snps (sample_id numeric(11), snp_id numeric(11))")
22
+ end
23
+
24
+ def format_metadata
25
+ host = ConfigData.get_connection
26
+
27
+ conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])
28
+
29
+ metadata_fields = XlsParser.load_meta_fields(ConfigData.get_metadata)
30
+
31
+ metadata_field_names = ""
32
+
33
+ metadata_fields.each do |name|
34
+ name << " varchar(128)"
35
+ metadata_field_names << name
36
+ end
37
+
38
+ puts "formatting sample metadata table..."
39
+ conn.exec("CREATE TABLE sample_metadata (id numeric (11) PRIMARY KEY#{metadata_field_names})")
40
+ end
41
+ end
@@ -0,0 +1,42 @@
1
+ module ConfigData
2
+
3
+ @@host = {}
4
+ @@metadata_file = ''
5
+ @@matrix_file = ''
6
+
7
+ #host connection
8
+ def set_connection(connection_hash)
9
+ @@host = {
10
+ :host => "#{connection_hash[:host]}",
11
+ :port => "#{connection_hash[:port]}",
12
+ :dbname => "#{connection_hash[:dbname]}",
13
+ :user => "#{connection_hash[:user]}",
14
+ :password => "#{connection_hash[:password]}",
15
+ }
16
+ puts "connection details: #{@@host}"
17
+ end
18
+
19
+ def self.get_connection
20
+ @@host
21
+ end
22
+
23
+ #metadata file
24
+ def set_metadata(file)
25
+ @@metadata_file = file
26
+ puts "metadata file set to: #{@@metadata_file}"
27
+ end
28
+
29
+ def self.get_metadata
30
+ @@metadata_file
31
+ end
32
+
33
+ #matrix file
34
+ def set_matrix(file)
35
+ @@matrix_file = file
36
+ puts "matrix file set to: #{@@matrix_file}"
37
+ end
38
+
39
+ def self.get_matrix
40
+ @@matrix_file
41
+ end
42
+ end
@@ -0,0 +1,34 @@
1
+ require 'roo'
2
+
3
+ module XlsParser
4
+ ## A few rules about Excel files:
5
+ ## 1. .xls only, this can't accept .xlsx
6
+ ## 2. There should be no empty cells in the header row of a sheet (first row)
7
+ ## 3. For good form, there shouldn't be any subsequent rows that are
8
+ ## longer than the header row
9
+ ## 4. There should not be any duplicate cell names in the header row
10
+ def XlsParser.load_meta_fields(file)
11
+ s = Roo::Excel.new(file)
12
+ s.default_sheet = s.sheets.first
13
+
14
+ columns = 1
15
+ until s.cell(1, columns).nil?
16
+ columns += 1
17
+ end
18
+
19
+ counter = 1
20
+
21
+ metadata_fields = []
22
+
23
+ columns.times do |counter|
24
+ if s.cell(1, counter).nil?
25
+ metadata_fields << ", empty#{counter}"
26
+ else
27
+ metadata_fields << ", #{s.cell(1, counter).gsub(/\s+/, "").gsub("-","").gsub("(","").gsub(")","").gsub(".","").gsub("/","")}"
28
+ end
29
+ counter += 1
30
+ end
31
+
32
+ return metadata_fields
33
+ end
34
+ end
data/lib/vardb.rb ADDED
@@ -0,0 +1,29 @@
1
+ require 'vardb/snp_db_build'
2
+ require 'vardb/database_populator'
3
+ require 'vardb/snpscript_configdata'
4
+
5
+ class Vardb
6
+ include Builder
7
+ include Populator
8
+ include ConfigData
9
+
10
+ #def self.set_connection(connection_hash)
11
+ #ConfigData.set_connection(connection_hash)
12
+ #end
13
+
14
+ #def self.set_metadata(file)
15
+ #ConfigData.set_metadata
16
+ #end
17
+
18
+ #def self.set_matrix(file)
19
+ #ConfigData.set_matrix
20
+ #end
21
+
22
+ #def self.format
23
+ #Builder.format_database
24
+ #end
25
+
26
+ #def self.populate
27
+ #Populator.populate_database
28
+ #end
29
+ end
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: vardb
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Peter McCaffrey
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-12-09 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: pg
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 0.17.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 0.17.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: roo
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 1.13.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: 1.13.0
41
+ description: This gem builds PostgreSQL databases from .matrix files and metadata
42
+ spreadsheets
43
+ email:
44
+ - peter@accetia.com
45
+ executables: []
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - lib/vardb.rb
50
+ - lib/vardb/snp_db_build.rb
51
+ - lib/vardb/database_populator.rb
52
+ - lib/vardb/xls_parser.rb
53
+ - lib/vardb/snpscript_configdata.rb
54
+ homepage: ''
55
+ licenses:
56
+ - ''
57
+ metadata: {}
58
+ post_install_message:
59
+ rdoc_options: []
60
+ require_paths:
61
+ - lib
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - '>='
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ requirements: []
73
+ rubyforge_project:
74
+ rubygems_version: 2.0.3
75
+ signing_key:
76
+ specification_version: 4
77
+ summary: Variant database builder
78
+ test_files: []