embulk-output-vertica 0.1.9 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +5 -4
- data/embulk-output-vertica.gemspec +1 -1
- data/example.csv +1 -1
- data/example.yml +4 -2
- data/lib/embulk/output/vertica.rb +14 -12
- data/lib/embulk/output/vertica/value_converter_factory.rb +5 -6
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4f73299beb951f703ae4b6cab3dee30f85ac7314
|
4
|
+
data.tar.gz: 720bc9331a6dba8460f0053563592fd9e3be5071
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1ec6d31b3b118e1697aa4e6ead1750f44ce2e1c15fc8be33d3c61fa2b476d991124a32c9a14e4f27cc8f8a674028c172d22a9149c2eddff2ba53a33ac83a58a5
|
7
|
+
data.tar.gz: ede9307074e8d52d932614d0d0c9338dae8abbbc829fe7754ddf4a88074b7475f2d83384f36e1ea4c29ef8f8506f595302ca28996566779555607510b831d842
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -20,6 +20,7 @@
|
|
20
20
|
- **copy_mode**: specifies how data is loaded into the database. (`AUTO`, `DIRECT`, or `TRICKLE`. default: AUTO) See vertica documents for details.
|
21
21
|
- **abort_on_error**: Stops the COPY command if a row is rejected and rolls back the command. No data is loaded. (bool, default: false)
|
22
22
|
- **reject_on_materialized_type_error**: Use `reject_on_materialized_type_error` option for fjsonparser(). This rejects rows if any of olumn types and value types do not fit. ex) double value into INT column fails. See vertica documents for details. (bool, default: false)
|
23
|
+
- **default_timezone**: the default timezone for column_options (string, default is "UTC")
|
23
24
|
- **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
|
24
25
|
- **type**: type of a column when this plugin creates new tables such as `VARCHAR(255)`, `INTEGER NOT NULL UNIQUE`. This is used on creating intermediate tables (insert and truncate_insert modes) and on creating a new target table. (string, default: depends on input column type, see below)
|
25
26
|
- boolean: `BOOLEAN`
|
@@ -33,8 +34,8 @@
|
|
33
34
|
- double: `boolean` (true), `long` (to\_i), `double`, `string` (to\_s), `timestamp` (Time.at)
|
34
35
|
- string: `boolean` (true), `long` (to\_i), `double` (to\_f), `string`, `timestamp` (Time.strptime)
|
35
36
|
- timestamp: `boolean` (true), `long` (to\_i), `double` (to\_f), `string` (strftime), `timestamp`
|
36
|
-
- **timestamp_format**:
|
37
|
-
- **timezone**:
|
37
|
+
- **timestamp_format**: timestamp format to convert into/from `timestamp` (string, default is "%Y-%m-%d %H:%M:%S %z")
|
38
|
+
- **timezone**: timezone to convert into/from `timestamp` (string, default is `default_timezone`).
|
38
39
|
|
39
40
|
### Modes
|
40
41
|
|
@@ -59,9 +60,9 @@ out:
|
|
59
60
|
copy_mode: DIRECT
|
60
61
|
abort_on_error: true
|
61
62
|
column_options:
|
62
|
-
id:
|
63
|
+
id: {type: INT}
|
63
64
|
name: {type: VARCHAR(255)}
|
64
|
-
date: {type: DATE, value_type:
|
65
|
+
date: {type: DATE, value_type: timezone, timezone: "+09:00"}
|
65
66
|
```
|
66
67
|
|
67
68
|
## Development
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-vertica"
|
3
|
-
spec.version = "0.
|
3
|
+
spec.version = "0.2.0"
|
4
4
|
spec.authors = ["eiji.sekiya", "Naotoshi Seo"]
|
5
5
|
spec.email = ["eiji.sekiya.0326@gmail.com", "sonots@gmail.com"]
|
6
6
|
spec.summary = "Vertica output plugin for Embulk"
|
data/example.csv
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
timestamp_date,string_date,foo,bar,id,name,score
|
2
2
|
2015-07-13,2015-07-13,,bar,90,l6lTsvxdlcTfcqx2c0lQSd9HejVQg40W25f0wGNQViY,903.4
|
3
3
|
2015-07-13,2015-07-13,,bar,91,XoALSEQg9ycuGqrEWHOb8vdrLbheZSgFO53Wr3mciXY,394.5
|
4
4
|
2015-07-13,2015-07-13,,bar,92,0hgDRI_mijs5w7rkiLIe__LEayOOLxL0qVT1IHa5QBw,810.9
|
data/example.yml
CHANGED
@@ -35,9 +35,11 @@ out:
|
|
35
35
|
copy_mode: direct
|
36
36
|
abort_on_error: true
|
37
37
|
reject_on_materialized_type_error: true
|
38
|
+
default_timezone: 'Asia/Tokyo'
|
38
39
|
column_options:
|
40
|
+
timestamp_date: {type: DATE}
|
41
|
+
string_date: {type: TIMESTAMPTZ, value_type: timestamp, timestamp_format: "%Y-%m-%d", timezone: '+08:00'}
|
39
42
|
id: {type: INT}
|
40
43
|
name: {type: VARCHAR}
|
41
|
-
timestamp_date: {type: DATE, timezone: "+09:00"}
|
42
|
-
string_date: {type: DATE, value_type: timestamp, timestamp_format: "%Y-%m-%d", timezone: "Asia/Tokyo"}
|
43
44
|
score: {type: INT, value_type: long}
|
45
|
+
|
@@ -11,18 +11,19 @@ module Embulk
|
|
11
11
|
|
12
12
|
def self.transaction(config, schema, processor_count, &control)
|
13
13
|
task = {
|
14
|
-
'host'
|
15
|
-
'port'
|
16
|
-
'username'
|
17
|
-
'password'
|
18
|
-
'database'
|
19
|
-
'schema'
|
20
|
-
'table'
|
21
|
-
'mode'
|
22
|
-
'copy_mode'
|
23
|
-
'abort_on_error'
|
14
|
+
'host' => config.param('host', :string, :default => 'localhost'),
|
15
|
+
'port' => config.param('port', :integer, :default => 5433),
|
16
|
+
'username' => config.param('username', :string),
|
17
|
+
'password' => config.param('password', :string, :default => ''),
|
18
|
+
'database' => config.param('database', :string, :default => 'vdb'),
|
19
|
+
'schema' => config.param('schema', :string, :default => 'public'),
|
20
|
+
'table' => config.param('table', :string),
|
21
|
+
'mode' => config.param('mode', :string, :default => 'insert'),
|
22
|
+
'copy_mode' => config.param('copy_mode', :string, :default => 'AUTO'),
|
23
|
+
'abort_on_error' => config.param('abort_on_error', :bool, :default => false),
|
24
|
+
'default_timezone' => config.param('default_timezone', :string, :default => 'UTC'),
|
25
|
+
'column_options' => config.param('column_options', :hash, :default => {}),
|
24
26
|
'reject_on_materialized_type_error' => config.param('reject_on_materialized_type_error', :bool, :default => false),
|
25
|
-
'column_options' => config.param('column_options', :hash, :default => {}),
|
26
27
|
}
|
27
28
|
|
28
29
|
unless %w[INSERT REPLACE].include?(task['mode'].upcase!)
|
@@ -69,7 +70,7 @@ module Embulk
|
|
69
70
|
|
70
71
|
def initialize(task, schema, index)
|
71
72
|
super
|
72
|
-
@converters = ValueConverterFactory.create_converters(schema, task['column_options'])
|
73
|
+
@converters = ValueConverterFactory.create_converters(schema, task['default_timezone'], task['column_options'])
|
73
74
|
Embulk.logger.debug { @converters.to_s }
|
74
75
|
@jv = self.class.connect(task)
|
75
76
|
end
|
@@ -84,6 +85,7 @@ module Embulk
|
|
84
85
|
copy(@jv, copy_sql) do |stdin|
|
85
86
|
page.each do |record|
|
86
87
|
json = to_json(record)
|
88
|
+
Embulk.logger.debug { "embulk-output-vertica: #{json}" }
|
87
89
|
stdin << json << "\n"
|
88
90
|
end
|
89
91
|
end
|
@@ -8,17 +8,16 @@ module Embulk
|
|
8
8
|
attr_reader :schema_type, :value_type, :timestamp_format, :timezone, :zone_offset
|
9
9
|
|
10
10
|
DEFAULT_TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S %z"
|
11
|
-
DEFAULT_TIMEZONE = "+00:00"
|
12
11
|
|
13
|
-
def self.create_converters(schema, column_options)
|
12
|
+
def self.create_converters(schema, default_timezone, column_options)
|
14
13
|
# @param [Schema] schema embulk defined column types
|
15
14
|
# @param [Hash] column_options user defined column types
|
16
15
|
# @return [Array] value converters (array of Proc)
|
17
16
|
Hash[*(schema.names.zip(schema.types).map do |column_name, schema_type|
|
18
17
|
if column_options[column_name]
|
19
18
|
value_type = column_options[column_name]['value_type']
|
20
|
-
timestamp_format = column_options[column_name]['timestamp_format']
|
21
|
-
timezone = column_options[column_name]['timezone']
|
19
|
+
timestamp_format = column_options[column_name]['timestamp_format'] || DEFAULT_TIMESTAMP_FORMAT
|
20
|
+
timezone = column_options[column_name]['timezone'] || default_timezone
|
22
21
|
[column_name, self.new(schema_type, value_type, timestamp_format, timezone).create_converter]
|
23
22
|
else
|
24
23
|
[column_name, Proc.new {|val| val }]
|
@@ -29,8 +28,8 @@ module Embulk
|
|
29
28
|
def initialize(schema_type, value_type = nil, timestamp_format = nil, timezone = nil)
|
30
29
|
@schema_type = schema_type
|
31
30
|
@value_type = value_type || schema_type.to_s
|
32
|
-
@timestamp_format = timestamp_format
|
33
|
-
@timezone = timezone
|
31
|
+
@timestamp_format = timestamp_format
|
32
|
+
@timezone = timezone
|
34
33
|
@zone_offset = get_zone_offset(@timezone)
|
35
34
|
end
|
36
35
|
|