insertica 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f9cfab140cfaee0294ccda564b13de12adc6841a
4
+ data.tar.gz: ae3cab639e68a0967ca46eb27a74283d9d343e92
5
+ SHA512:
6
+ metadata.gz: 10b4ecf013350727c8de2c800725dd3542b54ca23848ab078f17c01c8020b4e5918f023407cb05d994837d8b24ece745093215dc5b99b575fde2635a2569d29f
7
+ data.tar.gz: 8c77d2545e14e2e649aa7ac527e2bae2b8fd5ec25effc426be1e35ac1e2abea1f269a75852cb304706510d14dd77e5b6ac9c84e05b53620e33ab6f9d4bada9ed
data/bin/insertica ADDED
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require './lib/insertica.rb'
4
+ require 'thor'
5
+
6
+ class InserticaExecutable < Thor
7
+ package_name "insertica"
8
+
9
+ desc "load", "Load a delimited file into Vertica."
10
+ method_option :username, aliases: "-u", required: true, type: :string, desc: "Specifies the username to use with Vertica."
11
+ method_option :password, aliases: "-p", required: true, type: :string, desc: "Specifies the password to use with Vertica."
12
+ method_option :host, aliases: "-h", default: 'localhost', desc: "Specifies the host of the Vertica database."
13
+ method_option :port, aliases: "-p", default: 5433, desc: "Specifies the port of the Vertica database."
14
+ def load(filename)
15
+ table = Insertica::Table.new(filename)
16
+ table.insert(options)
17
+ end
18
+ end
19
+
20
+ InserticaExecutable.start
@@ -0,0 +1,42 @@
1
+ require 'insertica/column_type'
2
+
3
+ module Insertica
4
+ class Column
5
+ ESCAPE_CHARACTERS = ["\n", "\t", "\""]
6
+
7
+ attr_accessor :name
8
+ attr_accessor :values
9
+ attr_accessor :type
10
+
11
+ def initialize(name, values = [])
12
+ @name = name
13
+ @type = nil
14
+ @values = values
15
+ end
16
+
17
+ def finalize
18
+ @type = ColumnType.new(@values)
19
+ escape_strings if @type.needs_escaping?
20
+
21
+ self
22
+ end
23
+
24
+ def definition
25
+ "#{@name} #{@type}"
26
+ end
27
+
28
+ def filler_definition
29
+ "#{@name}_filler FILLER VARCHAR"
30
+ end
31
+
32
+ def fix_nulls_definition
33
+ "#{@name} AS CASE WHEN #{@name}_filler = '' THEN NULL ELSE #{@name}_filler::#{@type} END"
34
+ end
35
+
36
+ private
37
+
38
+ def escape_strings
39
+ ESCAPE_CHARACTERS.each { |character| @values.map! { |value| value.gsub("#{character}","\\#{character}") unless value.nil? } }
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,77 @@
1
+ require 'date'
2
+
3
+ module Insertica
4
+ class ColumnType
5
+ attr_accessor :vertica_type
6
+
7
+ DATETIME_TYPE = 'TIMESTAMP'
8
+ TRUECLASS_TYPE = 'BOOLEAN'
9
+ INTEGER_TYPE = 'INTEGER'
10
+ FLOAT_TYPE = 'FLOAT'
11
+ STRING_TYPE = 'VARCHAR'
12
+
13
+ def initialize(values = [])
14
+ @vertica_type = guess_type(values)
15
+ end
16
+
17
+ def to_s
18
+ @vertica_type
19
+ end
20
+
21
+ def needs_escaping?
22
+ STRING_TYPE == @vertica_type
23
+ end
24
+
25
+ private
26
+
27
+ def guess_type(values)
28
+ if all_nil?(values)
29
+ STRING_TYPE
30
+ elsif datetime?(values)
31
+ DATETIME_TYPE
32
+ elsif trueclass?(values)
33
+ TRUECLASS_TYPE
34
+ elsif integer?(values)
35
+ INTEGER_TYPE
36
+ elsif float?(values)
37
+ FLOAT_TYPE
38
+ else
39
+ STRING_TYPE
40
+ end
41
+ end
42
+
43
+ def all_nil?(values)
44
+ values.all? { |value| value.nil? }
45
+ end
46
+
47
+ def datetime?(values)
48
+ values.all? do |value|
49
+ value.nil? || !!DateTime.iso8601(value)
50
+ end
51
+ rescue
52
+ false
53
+ end
54
+
55
+ def trueclass?(values)
56
+ values.all? do |value|
57
+ value.nil? || value.class == FalseClass || value.class == TrueClass
58
+ end
59
+ end
60
+
61
+ def integer?(values)
62
+ values.all? do |value|
63
+ value.nil? || Integer(value) == Float(value)
64
+ end
65
+ rescue
66
+ false
67
+ end
68
+
69
+ def float?(values)
70
+ values.all? do |value|
71
+ value.nil? || !!Float(value)
72
+ end
73
+ rescue
74
+ false
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,98 @@
1
+ require 'insertica/column'
2
+
3
+ require 'json/ext'
4
+ require 'vertica'
5
+
6
+ module Insertica
7
+ class Table
8
+ attr_accessor :schema_name
9
+ attr_accessor :table_name
10
+
11
+ def initialize(schema_name, filename)
12
+ @filename = filename
13
+ @schema_name = schema_name
14
+ @table_name = File.basename(filename, '.*')
15
+ @file = File.read(@filename)
16
+
17
+ columns and rows
18
+ end
19
+
20
+ def full_name
21
+ "#{schema_name}.#{table_name}"
22
+ end
23
+
24
+ def definition
25
+ @definition ||= @columns.values.map { |column| "#{column.definition}" }.join(",\n")
26
+ end
27
+
28
+ def filler_definition
29
+ @filler_definition ||= [
30
+ @columns.values.map { |column| "#{column.filler_definition}" }.join(",\n"),
31
+ @columns.values.map { |column| "#{column.fix_nulls_definition}" }.join(",\n")
32
+ ].join(",\n")
33
+ end
34
+
35
+ def insert(**options)
36
+ vertica_connection = Vertica.connect({
37
+ host: options[:host],
38
+ user: options[:user],
39
+ password: options[:password],
40
+ port: options[:port]
41
+ })
42
+
43
+ drop_statement = "DROP TABLE IF EXISTS #{full_name} CASCADE"
44
+ create_statement = "CREATE TABLE #{full_name} (#{definition})"
45
+ copy_statement = "COPY #{full_name} (#{filler_definition}) FROM STDIN DELIMITER '\t' ENCLOSED BY '\"' NULL AS 'NULL' ABORT ON ERROR"
46
+
47
+ vertica_connection.query(drop_statement)
48
+ vertica_connection.query(create_statement)
49
+ vertica_connection.copy(copy_statement) do |stdin|
50
+ stdin << to_tsv
51
+ end
52
+ vertica_connection.query("COMMIT")
53
+ end
54
+
55
+ def rows
56
+ @rows ||= generate_rows
57
+ end
58
+
59
+ def columns
60
+ @columns ||= generate_columns
61
+ end
62
+
63
+ def to_tsv
64
+ @rows.map do |row|
65
+ "\"#{row.join("\"\t\"")}\"\n"
66
+ end.join("")
67
+ end
68
+
69
+ private
70
+
71
+ def generate_columns
72
+ columns = {}
73
+ @numer_of_rows = @file.lines.length
74
+
75
+ @file.lines.each_with_index do |line, index|
76
+ json_line = JSON.parse(line)
77
+ json_line.keys.each do |key|
78
+ columns[key] = Column.new(key) if columns[key].nil?
79
+ columns[key].values[index] = json_line[key]
80
+ end
81
+ end
82
+ columns.each { |key, column| column.finalize }
83
+ columns
84
+ end
85
+
86
+ def generate_rows
87
+ rows = []
88
+ @numer_of_rows.times do |i|
89
+ row = []
90
+ @columns.values.each do |column|
91
+ row << column.values[i]
92
+ end
93
+ rows << row
94
+ end
95
+ rows
96
+ end
97
+ end
98
+ end
data/lib/insertica.rb ADDED
@@ -0,0 +1,3 @@
1
+ $: << './lib'
2
+
3
+ require 'insertica/table'
metadata ADDED
@@ -0,0 +1,121 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: insertica
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Nick Evans
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-02-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: vertica
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 0.11.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 0.11.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: thor
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 0.18.1
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: 0.18.1
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: A simple tool to insert data into Vertica. Currently only supports JSON
84
+ data.
85
+ email:
86
+ executables:
87
+ - insertica
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - lib/insertica/column.rb
92
+ - lib/insertica/column_type.rb
93
+ - lib/insertica/table.rb
94
+ - lib/insertica.rb
95
+ - bin/insertica
96
+ homepage:
97
+ licenses:
98
+ - MIT
99
+ metadata: {}
100
+ post_install_message:
101
+ rdoc_options: []
102
+ require_paths:
103
+ - lib
104
+ required_ruby_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - '>='
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ required_rubygems_version: !ruby/object:Gem::Requirement
110
+ requirements:
111
+ - - '>='
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ requirements: []
115
+ rubyforge_project:
116
+ rubygems_version: 2.0.14
117
+ signing_key:
118
+ specification_version: 4
119
+ summary: A simple tool to insert data into Vertica.
120
+ test_files: []
121
+ has_rdoc: