embulk-output-vertica 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +17 -2
- data/embulk-output-vertica.gemspec +1 -1
- data/example.csv +9 -2
- data/example.yml +24 -8
- data/lib/embulk/output/vertica.rb +78 -38
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 81d94a9a3a707573b6eaed08fcd3b64f2890963d
|
4
|
+
data.tar.gz: 56035fc8679597c91ac57eab7ddba7cab8938710
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f2a54c45a4407fa37a2e1fb2fbb7326d3091c3ceb8d6dd30d6bea701be34c6cb75743e48bae9ad6ef6247a85f0f31f3fc736fe12be7444c29a4eb181e0f066ae
|
7
|
+
data.tar.gz: 1a192bde5bdfd73e3988500cfb198462a86cd2979b4df688fe189f7f1fc36ab1c158e95f5c67067545524bff1edb0bd85fcf1d43734686430fcd86f618722e1c
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -16,10 +16,25 @@
|
|
16
16
|
- **database**: database name (string, default: vdb)
|
17
17
|
- **schema**: schema name (string, default: public)
|
18
18
|
- **table**: table name (string, required)
|
19
|
-
- **
|
19
|
+
- **mode**: "insert", or "replace". See bellow. (string, default: insert)
|
20
|
+
- **copy_mode**: specifies how data is loaded into the database. (`AUTO`, `DIRECT`, or `TRICKLE`. default: AUTO) See vertica documents for details.
|
20
21
|
- **abort_on_error**: Stops the COPY command if a row is rejected and rolls back the command. No data is loaded. (bool, default: false)
|
21
22
|
- **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
|
22
|
-
- **type**: type of a column when this plugin creates new tables
|
23
|
+
- **type**: type of a column when this plugin creates new tables such as `VARCHAR(255)`, `INTEGER NOT NULL UNIQUE`. This is used on creating intermediate tables (insert and truncate_insert modes) and on creating a new target table. (string, default: depends on input column type, see below)
|
24
|
+
- `INT` (same with `BIGINT` in vertica) for `long`
|
25
|
+
- `BOOLEAN` for `boolean`
|
26
|
+
- `FLOAT` (same with `DOUBLE PRECISION` in vertica) for `double`
|
27
|
+
- `VARCHAR` for `string`
|
28
|
+
- `TIMESTAMP` for `timestamp`
|
29
|
+
|
30
|
+
### Modes
|
31
|
+
|
32
|
+
* **insert**:
|
33
|
+
* Behavior: This mode copys rows to some intermediate tables first. If all those tasks run correctly, runs INSERT INTO <target_table> SELECT * FROM <intermediate_table>
|
34
|
+
* Transactional: Yes if `abort_on_error` option is used
|
35
|
+
* **replace**:
|
36
|
+
* Behavior: Same with insert mode excepting that it drop the target table first.
|
37
|
+
* Transactional: Yes if `abort_on_error` option is used
|
23
38
|
|
24
39
|
## Example
|
25
40
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-vertica"
|
3
|
-
spec.version = "0.1.
|
3
|
+
spec.version = "0.1.5"
|
4
4
|
spec.authors = ["eiji.sekiya", "Naotoshi Seo"]
|
5
5
|
spec.email = ["eiji.sekiya.0326@gmail.com", "sonots@gmail.com"]
|
6
6
|
spec.summary = "Vertica output plugin for Embulk"
|
data/example.csv
CHANGED
@@ -1,2 +1,9 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
date,foo,bar,id,name,score
|
2
|
+
2015-07-13,,bar,90,l6lTsvxdlcTfcqx2c0lQSd9HejVQg40W25f0wGNQViY,903.4
|
3
|
+
2015-07-13,,bar,91,XoALSEQg9ycuGqrEWHOb8vdrLbheZSgFO53Wr3mciXY,394.5
|
4
|
+
2015-07-13,,bar,92,0hgDRI_mijs5w7rkiLIe__LEayOOLxL0qVT1IHa5QBw,810.9
|
5
|
+
2015-07-13,,bar,93,KjCRAc-AVcS-R13toBUR6pK_7d9Y8Gl4TRdYYMaSirc,477.4
|
6
|
+
2015-07-13,,bar,94,fyQVGlT8Bqmu_LiajPlgfbmavoNyAqXaBsBP_e4OnN8,725.3
|
7
|
+
2015-07-13,,bar,95,FpBYRPWKu6DmLpx5tsB25URWfj3sNCbcydNAXULaiD8,316.6
|
8
|
+
2015-07-13,,bar,96,9ikvnUqp1Rf2yVwLvs5bBvxQP-KyqxGi4gZRSZ8c1d4,369.5
|
9
|
+
2015-07-13,,bar,97,RRNYDAzKaq4Trtt96Bxgk3N0fXLIV8hXoK0qQ7uw_Wc,506.5
|
data/example.yml
CHANGED
@@ -1,10 +1,27 @@
|
|
1
|
+
# in:
|
2
|
+
# type: random
|
3
|
+
# rows: 100
|
4
|
+
# schema:
|
5
|
+
# id: primary_key
|
6
|
+
# name: string
|
7
|
+
# score: integer
|
1
8
|
in:
|
2
|
-
type:
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
9
|
+
type: file
|
10
|
+
path_prefix: example.csv
|
11
|
+
parser:
|
12
|
+
type: csv
|
13
|
+
charset: UTF-8
|
14
|
+
newline: CRLF
|
15
|
+
null_string: 'NULL'
|
16
|
+
skip_header_lines: 1
|
17
|
+
comment_line_marker: '#'
|
18
|
+
columns:
|
19
|
+
- {name: date, type: timestamp, format: "%Y-%m-%d"}
|
20
|
+
- {name: foo, type: string}
|
21
|
+
- {name: bar, type: string}
|
22
|
+
- {name: id, type: long}
|
23
|
+
- {name: name, type: string}
|
24
|
+
- {name: score, type: double}
|
8
25
|
out:
|
9
26
|
type: vertica
|
10
27
|
host: 127.0.0.1
|
@@ -15,7 +32,6 @@ out:
|
|
15
32
|
table: embulk_test
|
16
33
|
copy_mode: direct
|
17
34
|
column_options:
|
35
|
+
date: {type: DATE}
|
18
36
|
id: {type: INT}
|
19
37
|
name: {type: VARCHAR}
|
20
|
-
score: {type: INT}
|
21
|
-
|
@@ -17,12 +17,17 @@ module Embulk
|
|
17
17
|
'database' => config.param('database', :string, :default => 'vdb'),
|
18
18
|
'schema' => config.param('schema', :string, :default => 'public'),
|
19
19
|
'table' => config.param('table', :string),
|
20
|
+
'mode' => config.param('mode', :string, :default => 'insert'),
|
20
21
|
'copy_mode' => config.param('copy_mode', :string, :default => 'AUTO'),
|
21
22
|
'abort_on_error' => config.param('abort_on_error', :bool, :default => false),
|
22
23
|
'column_options' => config.param('column_options', :hash, :default => {}),
|
23
24
|
}
|
24
25
|
|
25
|
-
unless %w[
|
26
|
+
unless %w[INSERT REPLACE].include?(task['mode'].upcase!)
|
27
|
+
raise ConfigError, "`mode` must be one of INSERT, REPLACE"
|
28
|
+
end
|
29
|
+
|
30
|
+
unless %w[AUTO DIRECT TRICKLE].include?(task['copy_mode'].upcase!)
|
26
31
|
raise ConfigError, "`copy_mode` must be one of AUTO, DIRECT, TRICKLE"
|
27
32
|
end
|
28
33
|
|
@@ -37,30 +42,59 @@ module Embulk
|
|
37
42
|
quoted_temp_table = ::Jvertica.quote_identifier(task['temp_table'])
|
38
43
|
|
39
44
|
connect(task) do |jv|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
jv
|
45
|
+
if task['mode'] == 'REPLACE'
|
46
|
+
query(jv, %[DROP TABLE IF EXISTS #{quoted_schema}.#{quoted_table}])
|
47
|
+
end
|
48
|
+
query(jv, %[DROP TABLE IF EXISTS #{quoted_schema}.#{quoted_temp_table}])
|
49
|
+
query(jv, %[CREATE TABLE #{quoted_schema}.#{quoted_temp_table} (#{sql_schema})])
|
44
50
|
end
|
45
51
|
|
46
52
|
begin
|
47
53
|
yield(task)
|
48
54
|
connect(task) do |jv|
|
49
|
-
|
50
|
-
|
51
|
-
jv.query %[create table if not exists #{quoted_schema}.#{quoted_table} (#{sql_schema})]
|
52
|
-
jv.query %[insert into #{quoted_schema}.#{quoted_table} select * from #{quoted_schema}.#{quoted_temp_table}]
|
55
|
+
query(jv, %[CREATE TABLE IF NOT EXISTS #{quoted_schema}.#{quoted_table} (#{sql_schema})])
|
56
|
+
query(jv, %[INSERT INTO #{quoted_schema}.#{quoted_table} SELECT * FROM #{quoted_schema}.#{quoted_temp_table}])
|
53
57
|
jv.commit
|
54
58
|
end
|
55
59
|
ensure
|
56
60
|
connect(task) do |jv|
|
57
|
-
|
58
|
-
|
61
|
+
query(jv, %[DROP TABLE IF EXISTS #{quoted_schema}.#{quoted_temp_table}])
|
62
|
+
Embulk.logger.debug { query(jv, %[SELECT * FROM #{quoted_schema}.#{quoted_table} LIMIT 10]).map {|row| row.to_h }.join("\n") }
|
59
63
|
end
|
60
64
|
end
|
61
65
|
return {}
|
62
66
|
end
|
63
67
|
|
68
|
+
def initialize(task, schema, index)
|
69
|
+
super
|
70
|
+
@jv = self.class.connect(task)
|
71
|
+
end
|
72
|
+
|
73
|
+
def close
|
74
|
+
@jv.close
|
75
|
+
end
|
76
|
+
|
77
|
+
def add(page)
|
78
|
+
copy(@jv, copy_sql) do |stdin|
|
79
|
+
page.each do |record|
|
80
|
+
stdin << to_json(record) << "\n"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
@jv.commit
|
84
|
+
end
|
85
|
+
|
86
|
+
def finish
|
87
|
+
end
|
88
|
+
|
89
|
+
def abort
|
90
|
+
end
|
91
|
+
|
92
|
+
def commit
|
93
|
+
{}
|
94
|
+
end
|
95
|
+
|
96
|
+
private
|
97
|
+
|
64
98
|
def self.connect(task)
|
65
99
|
jv = ::Jvertica.connect({
|
66
100
|
host: task['host'],
|
@@ -85,8 +119,11 @@ module Embulk
|
|
85
119
|
# @return [String] sql schema used to CREATE TABLE
|
86
120
|
def self.to_sql_schema(schema, column_options)
|
87
121
|
schema.names.zip(schema.types).map do |column_name, type|
|
88
|
-
|
89
|
-
column_options[column_name]['type']
|
122
|
+
if column_options[column_name] and column_options[column_name]['type']
|
123
|
+
sql_type = column_options[column_name]['type']
|
124
|
+
else
|
125
|
+
sql_type = to_sql_type(type)
|
126
|
+
end
|
90
127
|
"#{::Jvertica.quote_identifier(column_name)} #{sql_type}"
|
91
128
|
end.join(',')
|
92
129
|
end
|
@@ -102,45 +139,48 @@ module Embulk
|
|
102
139
|
end
|
103
140
|
end
|
104
141
|
|
105
|
-
def
|
106
|
-
|
107
|
-
|
142
|
+
def self.query(conn, sql)
|
143
|
+
Embulk.logger.debug sql
|
144
|
+
conn.query(sql)
|
108
145
|
end
|
109
146
|
|
110
|
-
def
|
111
|
-
|
147
|
+
def query(conn, sql)
|
148
|
+
self.class.query(conn, sql)
|
112
149
|
end
|
113
150
|
|
114
|
-
def
|
115
|
-
|
116
|
-
|
117
|
-
stdin << record.map {|v| ::Jvertica.quote(v) }.join(",") << "\n"
|
118
|
-
end
|
119
|
-
end
|
120
|
-
@jv.commit
|
151
|
+
def copy(conn, sql, &block)
|
152
|
+
Embulk.logger.debug sql
|
153
|
+
conn.copy(sql, &block)
|
121
154
|
end
|
122
155
|
|
123
|
-
def
|
156
|
+
def copy_sql
|
157
|
+
@copy_sql ||= "COPY #{quoted_schema}.#{quoted_temp_table} FROM STDIN PARSER fjsonparser() #{copy_mode}#{abort_on_error} NO COMMIT"
|
124
158
|
end
|
125
159
|
|
126
|
-
def
|
160
|
+
def to_json(record)
|
161
|
+
Hash[*(schema.names.zip(record).flatten!(1))].to_json
|
127
162
|
end
|
128
163
|
|
129
|
-
def
|
130
|
-
|
164
|
+
def quoted_schema
|
165
|
+
::Jvertica.quote_identifier(@task['schema'])
|
131
166
|
end
|
132
167
|
|
133
|
-
|
168
|
+
def quoted_table
|
169
|
+
::Jvertica.quote_identifier(@task['table'])
|
170
|
+
end
|
134
171
|
|
135
|
-
def
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
Embulk.logger.debug sql
|
142
|
-
sql
|
172
|
+
def quoted_temp_table
|
173
|
+
::Jvertica.quote_identifier(@task['temp_table'])
|
174
|
+
end
|
175
|
+
|
176
|
+
def copy_mode
|
177
|
+
@task['copy_mode']
|
143
178
|
end
|
179
|
+
|
180
|
+
def abort_on_error
|
181
|
+
@task['abort_on_error'] ? ' ABORT ON ERROR' : ''
|
182
|
+
end
|
183
|
+
|
144
184
|
end
|
145
185
|
end
|
146
186
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-vertica
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- eiji.sekiya
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-07-
|
12
|
+
date: 2015-07-23 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|