embulk-output-vertica 0.1.9 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +5 -4
- data/embulk-output-vertica.gemspec +1 -1
- data/example.csv +1 -1
- data/example.yml +4 -2
- data/lib/embulk/output/vertica.rb +14 -12
- data/lib/embulk/output/vertica/value_converter_factory.rb +5 -6
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4f73299beb951f703ae4b6cab3dee30f85ac7314
|
4
|
+
data.tar.gz: 720bc9331a6dba8460f0053563592fd9e3be5071
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1ec6d31b3b118e1697aa4e6ead1750f44ce2e1c15fc8be33d3c61fa2b476d991124a32c9a14e4f27cc8f8a674028c172d22a9149c2eddff2ba53a33ac83a58a5
|
7
|
+
data.tar.gz: ede9307074e8d52d932614d0d0c9338dae8abbbc829fe7754ddf4a88074b7475f2d83384f36e1ea4c29ef8f8506f595302ca28996566779555607510b831d842
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -20,6 +20,7 @@
|
|
20
20
|
- **copy_mode**: specifies how data is loaded into the database. (`AUTO`, `DIRECT`, or `TRICKLE`. default: AUTO) See vertica documents for details.
|
21
21
|
- **abort_on_error**: Stops the COPY command if a row is rejected and rolls back the command. No data is loaded. (bool, default: false)
|
22
22
|
- **reject_on_materialized_type_error**: Use `reject_on_materialized_type_error` option for fjsonparser(). This rejects rows if any of olumn types and value types do not fit. ex) double value into INT column fails. See vertica documents for details. (bool, default: false)
|
23
|
+
- **default_timezone**: the default timezone for column_options (string, default is "UTC")
|
23
24
|
- **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
|
24
25
|
- **type**: type of a column when this plugin creates new tables such as `VARCHAR(255)`, `INTEGER NOT NULL UNIQUE`. This is used on creating intermediate tables (insert and truncate_insert modes) and on creating a new target table. (string, default: depends on input column type, see below)
|
25
26
|
- boolean: `BOOLEAN`
|
@@ -33,8 +34,8 @@
|
|
33
34
|
- double: `boolean` (true), `long` (to\_i), `double`, `string` (to\_s), `timestamp` (Time.at)
|
34
35
|
- string: `boolean` (true), `long` (to\_i), `double` (to\_f), `string`, `timestamp` (Time.strptime)
|
35
36
|
- timestamp: `boolean` (true), `long` (to\_i), `double` (to\_f), `string` (strftime), `timestamp`
|
36
|
-
- **timestamp_format**:
|
37
|
-
- **timezone**:
|
37
|
+
- **timestamp_format**: timestamp format to convert into/from `timestamp` (string, default is "%Y-%m-%d %H:%M:%S %z")
|
38
|
+
- **timezone**: timezone to convert into/from `timestamp` (string, default is `default_timezone`).
|
38
39
|
|
39
40
|
### Modes
|
40
41
|
|
@@ -59,9 +60,9 @@ out:
|
|
59
60
|
copy_mode: DIRECT
|
60
61
|
abort_on_error: true
|
61
62
|
column_options:
|
62
|
-
id:
|
63
|
+
id: {type: INT}
|
63
64
|
name: {type: VARCHAR(255)}
|
64
|
-
date: {type: DATE, value_type:
|
65
|
+
date: {type: DATE, value_type: timezone, timezone: "+09:00"}
|
65
66
|
```
|
66
67
|
|
67
68
|
## Development
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-vertica"
|
3
|
-
spec.version = "0.
|
3
|
+
spec.version = "0.2.0"
|
4
4
|
spec.authors = ["eiji.sekiya", "Naotoshi Seo"]
|
5
5
|
spec.email = ["eiji.sekiya.0326@gmail.com", "sonots@gmail.com"]
|
6
6
|
spec.summary = "Vertica output plugin for Embulk"
|
data/example.csv
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
timestamp_date,string_date,foo,bar,id,name,score
|
2
2
|
2015-07-13,2015-07-13,,bar,90,l6lTsvxdlcTfcqx2c0lQSd9HejVQg40W25f0wGNQViY,903.4
|
3
3
|
2015-07-13,2015-07-13,,bar,91,XoALSEQg9ycuGqrEWHOb8vdrLbheZSgFO53Wr3mciXY,394.5
|
4
4
|
2015-07-13,2015-07-13,,bar,92,0hgDRI_mijs5w7rkiLIe__LEayOOLxL0qVT1IHa5QBw,810.9
|
data/example.yml
CHANGED
@@ -35,9 +35,11 @@ out:
|
|
35
35
|
copy_mode: direct
|
36
36
|
abort_on_error: true
|
37
37
|
reject_on_materialized_type_error: true
|
38
|
+
default_timezone: 'Asia/Tokyo'
|
38
39
|
column_options:
|
40
|
+
timestamp_date: {type: DATE}
|
41
|
+
string_date: {type: TIMESTAMPTZ, value_type: timestamp, timestamp_format: "%Y-%m-%d", timezone: '+08:00'}
|
39
42
|
id: {type: INT}
|
40
43
|
name: {type: VARCHAR}
|
41
|
-
timestamp_date: {type: DATE, timezone: "+09:00"}
|
42
|
-
string_date: {type: DATE, value_type: timestamp, timestamp_format: "%Y-%m-%d", timezone: "Asia/Tokyo"}
|
43
44
|
score: {type: INT, value_type: long}
|
45
|
+
|
@@ -11,18 +11,19 @@ module Embulk
|
|
11
11
|
|
12
12
|
def self.transaction(config, schema, processor_count, &control)
|
13
13
|
task = {
|
14
|
-
'host'
|
15
|
-
'port'
|
16
|
-
'username'
|
17
|
-
'password'
|
18
|
-
'database'
|
19
|
-
'schema'
|
20
|
-
'table'
|
21
|
-
'mode'
|
22
|
-
'copy_mode'
|
23
|
-
'abort_on_error'
|
14
|
+
'host' => config.param('host', :string, :default => 'localhost'),
|
15
|
+
'port' => config.param('port', :integer, :default => 5433),
|
16
|
+
'username' => config.param('username', :string),
|
17
|
+
'password' => config.param('password', :string, :default => ''),
|
18
|
+
'database' => config.param('database', :string, :default => 'vdb'),
|
19
|
+
'schema' => config.param('schema', :string, :default => 'public'),
|
20
|
+
'table' => config.param('table', :string),
|
21
|
+
'mode' => config.param('mode', :string, :default => 'insert'),
|
22
|
+
'copy_mode' => config.param('copy_mode', :string, :default => 'AUTO'),
|
23
|
+
'abort_on_error' => config.param('abort_on_error', :bool, :default => false),
|
24
|
+
'default_timezone' => config.param('default_timezone', :string, :default => 'UTC'),
|
25
|
+
'column_options' => config.param('column_options', :hash, :default => {}),
|
24
26
|
'reject_on_materialized_type_error' => config.param('reject_on_materialized_type_error', :bool, :default => false),
|
25
|
-
'column_options' => config.param('column_options', :hash, :default => {}),
|
26
27
|
}
|
27
28
|
|
28
29
|
unless %w[INSERT REPLACE].include?(task['mode'].upcase!)
|
@@ -69,7 +70,7 @@ module Embulk
|
|
69
70
|
|
70
71
|
def initialize(task, schema, index)
|
71
72
|
super
|
72
|
-
@converters = ValueConverterFactory.create_converters(schema, task['column_options'])
|
73
|
+
@converters = ValueConverterFactory.create_converters(schema, task['default_timezone'], task['column_options'])
|
73
74
|
Embulk.logger.debug { @converters.to_s }
|
74
75
|
@jv = self.class.connect(task)
|
75
76
|
end
|
@@ -84,6 +85,7 @@ module Embulk
|
|
84
85
|
copy(@jv, copy_sql) do |stdin|
|
85
86
|
page.each do |record|
|
86
87
|
json = to_json(record)
|
88
|
+
Embulk.logger.debug { "embulk-output-vertica: #{json}" }
|
87
89
|
stdin << json << "\n"
|
88
90
|
end
|
89
91
|
end
|
@@ -8,17 +8,16 @@ module Embulk
|
|
8
8
|
attr_reader :schema_type, :value_type, :timestamp_format, :timezone, :zone_offset
|
9
9
|
|
10
10
|
DEFAULT_TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S %z"
|
11
|
-
DEFAULT_TIMEZONE = "+00:00"
|
12
11
|
|
13
|
-
def self.create_converters(schema, column_options)
|
12
|
+
def self.create_converters(schema, default_timezone, column_options)
|
14
13
|
# @param [Schema] schema embulk defined column types
|
15
14
|
# @param [Hash] column_options user defined column types
|
16
15
|
# @return [Array] value converters (array of Proc)
|
17
16
|
Hash[*(schema.names.zip(schema.types).map do |column_name, schema_type|
|
18
17
|
if column_options[column_name]
|
19
18
|
value_type = column_options[column_name]['value_type']
|
20
|
-
timestamp_format = column_options[column_name]['timestamp_format']
|
21
|
-
timezone = column_options[column_name]['timezone']
|
19
|
+
timestamp_format = column_options[column_name]['timestamp_format'] || DEFAULT_TIMESTAMP_FORMAT
|
20
|
+
timezone = column_options[column_name]['timezone'] || default_timezone
|
22
21
|
[column_name, self.new(schema_type, value_type, timestamp_format, timezone).create_converter]
|
23
22
|
else
|
24
23
|
[column_name, Proc.new {|val| val }]
|
@@ -29,8 +28,8 @@ module Embulk
|
|
29
28
|
def initialize(schema_type, value_type = nil, timestamp_format = nil, timezone = nil)
|
30
29
|
@schema_type = schema_type
|
31
30
|
@value_type = value_type || schema_type.to_s
|
32
|
-
@timestamp_format = timestamp_format
|
33
|
-
@timezone = timezone
|
31
|
+
@timestamp_format = timestamp_format
|
32
|
+
@timezone = timezone
|
34
33
|
@zone_offset = get_zone_offset(@timezone)
|
35
34
|
end
|
36
35
|
|