insertica 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f9cfab140cfaee0294ccda564b13de12adc6841a
4
+ data.tar.gz: ae3cab639e68a0967ca46eb27a74283d9d343e92
5
+ SHA512:
6
+ metadata.gz: 10b4ecf013350727c8de2c800725dd3542b54ca23848ab078f17c01c8020b4e5918f023407cb05d994837d8b24ece745093215dc5b99b575fde2635a2569d29f
7
+ data.tar.gz: 8c77d2545e14e2e649aa7ac527e2bae2b8fd5ec25effc426be1e35ac1e2abea1f269a75852cb304706510d14dd77e5b6ac9c84e05b53620e33ab6f9d4bada9ed
data/bin/insertica ADDED
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require './lib/insertica.rb'
4
+ require 'thor'
5
+
6
+ class InserticaExecutable < Thor
7
+ package_name "insertica"
8
+
9
+ desc "load", "Load a delimited file into Vertica."
10
+ method_option :username, aliases: "-u", required: true, type: :string, desc: "Specifies the username to use with Vertica."
11
+ method_option :password, aliases: "-p", required: true, type: :string, desc: "Specifies the password to use with Vertica."
12
+ method_option :host, aliases: "-h", default: 'localhost', desc: "Specifies the host of the Vertica database."
13
+ method_option :port, aliases: "-p", default: 5433, desc: "Specifies the port of the Vertica database."
14
+ def load(filename)
15
+ table = Insertica::Table.new(filename)
16
+ table.insert(options)
17
+ end
18
+ end
19
+
20
+ InserticaExecutable.start
@@ -0,0 +1,42 @@
1
+ require 'insertica/column_type'
2
+
3
+ module Insertica
4
+ class Column
5
+ ESCAPE_CHARACTERS = ["\n", "\t", "\""]
6
+
7
+ attr_accessor :name
8
+ attr_accessor :values
9
+ attr_accessor :type
10
+
11
+ def initialize(name, values = [])
12
+ @name = name
13
+ @type = nil
14
+ @values = values
15
+ end
16
+
17
+ def finalize
18
+ @type = ColumnType.new(@values)
19
+ escape_strings if @type.needs_escaping?
20
+
21
+ self
22
+ end
23
+
24
+ def definition
25
+ "#{@name} #{@type}"
26
+ end
27
+
28
+ def filler_definition
29
+ "#{@name}_filler FILLER VARCHAR"
30
+ end
31
+
32
+ def fix_nulls_definition
33
+ "#{@name} AS CASE WHEN #{@name}_filler = '' THEN NULL ELSE #{@name}_filler::#{@type} END"
34
+ end
35
+
36
+ private
37
+
38
+ def escape_strings
39
+ ESCAPE_CHARACTERS.each { |character| @values.map! { |value| value.gsub("#{character}","\\#{character}") unless value.nil? } }
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,77 @@
1
+ require 'date'
2
+
3
+ module Insertica
4
+ class ColumnType
5
+ attr_accessor :vertica_type
6
+
7
+ DATETIME_TYPE = 'TIMESTAMP'
8
+ TRUECLASS_TYPE = 'BOOLEAN'
9
+ INTEGER_TYPE = 'INTEGER'
10
+ FLOAT_TYPE = 'FLOAT'
11
+ STRING_TYPE = 'VARCHAR'
12
+
13
+ def initialize(values = [])
14
+ @vertica_type = guess_type(values)
15
+ end
16
+
17
+ def to_s
18
+ @vertica_type
19
+ end
20
+
21
+ def needs_escaping?
22
+ STRING_TYPE == @vertica_type
23
+ end
24
+
25
+ private
26
+
27
+ def guess_type(values)
28
+ if all_nil?(values)
29
+ STRING_TYPE
30
+ elsif datetime?(values)
31
+ DATETIME_TYPE
32
+ elsif trueclass?(values)
33
+ TRUECLASS_TYPE
34
+ elsif integer?(values)
35
+ INTEGER_TYPE
36
+ elsif float?(values)
37
+ FLOAT_TYPE
38
+ else
39
+ STRING_TYPE
40
+ end
41
+ end
42
+
43
+ def all_nil?(values)
44
+ values.all? { |value| value.nil? }
45
+ end
46
+
47
+ def datetime?(values)
48
+ values.all? do |value|
49
+ value.nil? || !!DateTime.iso8601(value)
50
+ end
51
+ rescue
52
+ false
53
+ end
54
+
55
+ def trueclass?(values)
56
+ values.all? do |value|
57
+ value.nil? || value.class == FalseClass || value.class == TrueClass
58
+ end
59
+ end
60
+
61
+ def integer?(values)
62
+ values.all? do |value|
63
+ value.nil? || Integer(value) == Float(value)
64
+ end
65
+ rescue
66
+ false
67
+ end
68
+
69
+ def float?(values)
70
+ values.all? do |value|
71
+ value.nil? || !!Float(value)
72
+ end
73
+ rescue
74
+ false
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,98 @@
1
+ require 'insertica/column'
2
+
3
+ require 'json/ext'
4
+ require 'vertica'
5
+
6
+ module Insertica
7
+ class Table
8
+ attr_accessor :schema_name
9
+ attr_accessor :table_name
10
+
11
+ def initialize(schema_name, filename)
12
+ @filename = filename
13
+ @schema_name = schema_name
14
+ @table_name = File.basename(filename, '.*')
15
+ @file = File.read(@filename)
16
+
17
+ columns and rows
18
+ end
19
+
20
+ def full_name
21
+ "#{schema_name}.#{table_name}"
22
+ end
23
+
24
+ def definition
25
+ @definition ||= @columns.values.map { |column| "#{column.definition}" }.join(",\n")
26
+ end
27
+
28
+ def filler_definition
29
+ @filler_definition ||= [
30
+ @columns.values.map { |column| "#{column.filler_definition}" }.join(",\n"),
31
+ @columns.values.map { |column| "#{column.fix_nulls_definition}" }.join(",\n")
32
+ ].join(",\n")
33
+ end
34
+
35
+ def insert(**options)
36
+ vertica_connection = Vertica.connect({
37
+ host: options[:host],
38
+ user: options[:user],
39
+ password: options[:password],
40
+ port: options[:port]
41
+ })
42
+
43
+ drop_statement = "DROP TABLE IF EXISTS #{full_name} CASCADE"
44
+ create_statement = "CREATE TABLE #{full_name} (#{definition})"
45
+ copy_statement = "COPY #{full_name} (#{filler_definition}) FROM STDIN DELIMITER '\t' ENCLOSED BY '\"' NULL AS 'NULL' ABORT ON ERROR"
46
+
47
+ vertica_connection.query(drop_statement)
48
+ vertica_connection.query(create_statement)
49
+ vertica_connection.copy(copy_statement) do |stdin|
50
+ stdin << to_tsv
51
+ end
52
+ vertica_connection.query("COMMIT")
53
+ end
54
+
55
+ def rows
56
+ @rows ||= generate_rows
57
+ end
58
+
59
+ def columns
60
+ @columns ||= generate_columns
61
+ end
62
+
63
+ def to_tsv
64
+ @rows.map do |row|
65
+ "\"#{row.join("\"\t\"")}\"\n"
66
+ end.join("")
67
+ end
68
+
69
+ private
70
+
71
+ def generate_columns
72
+ columns = {}
73
+ @numer_of_rows = @file.lines.length
74
+
75
+ @file.lines.each_with_index do |line, index|
76
+ json_line = JSON.parse(line)
77
+ json_line.keys.each do |key|
78
+ columns[key] = Column.new(key) if columns[key].nil?
79
+ columns[key].values[index] = json_line[key]
80
+ end
81
+ end
82
+ columns.each { |key, column| column.finalize }
83
+ columns
84
+ end
85
+
86
+ def generate_rows
87
+ rows = []
88
+ @numer_of_rows.times do |i|
89
+ row = []
90
+ @columns.values.each do |column|
91
+ row << column.values[i]
92
+ end
93
+ rows << row
94
+ end
95
+ rows
96
+ end
97
+ end
98
+ end
data/lib/insertica.rb ADDED
@@ -0,0 +1,3 @@
1
+ $: << './lib'
2
+
3
+ require 'insertica/table'
metadata ADDED
@@ -0,0 +1,121 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: insertica
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Nick Evans
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-02-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: vertica
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 0.11.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 0.11.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: thor
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 0.18.1
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: 0.18.1
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: A simple tool to insert data into Vertica. Currently only supports JSON
84
+ data.
85
+ email:
86
+ executables:
87
+ - insertica
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - lib/insertica/column.rb
92
+ - lib/insertica/column_type.rb
93
+ - lib/insertica/table.rb
94
+ - lib/insertica.rb
95
+ - bin/insertica
96
+ homepage:
97
+ licenses:
98
+ - MIT
99
+ metadata: {}
100
+ post_install_message:
101
+ rdoc_options: []
102
+ require_paths:
103
+ - lib
104
+ required_ruby_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - '>='
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ required_rubygems_version: !ruby/object:Gem::Requirement
110
+ requirements:
111
+ - - '>='
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ requirements: []
115
+ rubyforge_project:
116
+ rubygems_version: 2.0.14
117
+ signing_key:
118
+ specification_version: 4
119
+ summary: A simple tool to insert data into Vertica.
120
+ test_files: []
121
+ has_rdoc: