embulk-output-vertica 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -3
- data/README.md +16 -4
- data/embulk-output-vertica.gemspec +1 -1
- data/lib/embulk/output/vertica/value_converter_factory.rb +98 -0
- data/lib/embulk/output/vertica.rb +6 -1
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9514ee560a9a138d1280efcb9f106d0689c3a3c9
|
4
|
+
data.tar.gz: 02dd9130ce7cc4b0eccd2d6d4af59e7900627fb8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bd6b079069cfd1e5ff9d39362c084c6564690557dc4fd2cb8f179ed794d952a632810bddf07e74bf5597859604ddaf8385768211e01c2a1fcde7417757058ba6
|
7
|
+
data.tar.gz: 8de6ec75256fd77d9346c486957593d46bd983c5bd0302d9d2569759836ebf5c91cfb3d59eb9bc2b6fd53fadeb642d4e82c30b0097d0b9c534786af1ab1f64f3
|
data/CHANGELOG.md
CHANGED
@@ -1,16 +1,22 @@
|
|
1
|
-
# 0.1.
|
1
|
+
# 0.1.8 (2015/07/24)
|
2
|
+
|
3
|
+
Enhancements:
|
4
|
+
|
5
|
+
* Support value_type, timezone_format, timezone option for column_options[
|
6
|
+
|
7
|
+
# 0.1.7 (2015/07/24)
|
2
8
|
|
3
9
|
Enhancements:
|
4
10
|
|
5
11
|
* Add reject_on_materialized_type_error option
|
6
12
|
|
7
|
-
# 0.1.6 (2015/07/
|
13
|
+
# 0.1.6 (2015/07/23)
|
8
14
|
|
9
15
|
Enhancements:
|
10
16
|
|
11
17
|
* Enhancement of debug log
|
12
18
|
|
13
|
-
# 0.1.5 (2015/07/
|
19
|
+
# 0.1.5 (2015/07/23)
|
14
20
|
|
15
21
|
Fixes:
|
16
22
|
|
data/README.md
CHANGED
@@ -22,11 +22,19 @@
|
|
22
22
|
- **reject_on_materialized_type_error**: Use `reject_on_materialized_type_error` option for fjsonparser(). This rejects rows if any of olumn types and value types do not fit. ex) double value into INT column fails. See vertica documents for details. (bool, default: false)
|
23
23
|
- **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
|
24
24
|
- **type**: type of a column when this plugin creates new tables such as `VARCHAR(255)`, `INTEGER NOT NULL UNIQUE`. This is used on creating intermediate tables (insert and truncate_insert modes) and on creating a new target table. (string, default: depends on input column type, see below)
|
25
|
-
-
|
26
|
-
-
|
27
|
-
- double:
|
28
|
-
- string:
|
25
|
+
- boolean: `BOOLEAN`
|
26
|
+
- long: `INT` (same with `BIGINT` in vertica)
|
27
|
+
- double: `FLOAT` (same with `DOUBLE PRECISION` in vertica)
|
28
|
+
- string: `VARCHAR`
|
29
29
|
- timestamp: `TIMESTAMP`
|
30
|
+
- **value_type**: The types (embulk types) of values to convert (string, default: no conversion. See below for available types)
|
31
|
+
- boolean: `boolean`, `string` (to\_s)
|
32
|
+
- long: `boolean` (true), `long`, `double` (to\_f), `string` (to\_s), `timestamp` (Time.at)
|
33
|
+
- double: `boolean` (true), `long` (to\_i), `double`, `string` (to\_s), `timestamp` (Time.at)
|
34
|
+
- string: `boolean` (true), `long` (to\_i), `double` (to\_f), `string`, `timestamp` (Time.strptime)
|
35
|
+
- timestamp: `boolean` (true), `long` (to\_i), `double` (to\_f), `string` (strftime), `timestamp`
|
36
|
+
- **timestamp_format**: If input column type (embulk type) is string and value_type is timestamp or date, this plugin needs the timestamp format of the string. Also, if input column type (embulk type) is timestamp and value_type is string, this plugin needs the timestamp format of the string.
|
37
|
+
- **timezone**: With format of "+HH:MM" "-HH:MM". `timestamp` column uses this (string, default is "+00:00").
|
30
38
|
|
31
39
|
### Modes
|
32
40
|
|
@@ -53,8 +61,12 @@ out:
|
|
53
61
|
column_options:
|
54
62
|
id: {type: INT}
|
55
63
|
name: {type: VARCHAR(255)}
|
64
|
+
date: {type: DATE, value_type: Date, timezone: "+09:00"}
|
56
65
|
```
|
57
66
|
|
67
|
+
## ToDo
|
68
|
+
|
69
|
+
* Use timezone for string => timezone conversion
|
58
70
|
|
59
71
|
## Development
|
60
72
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-vertica"
|
3
|
-
spec.version = "0.1.
|
3
|
+
spec.version = "0.1.8"
|
4
4
|
spec.authors = ["eiji.sekiya", "Naotoshi Seo"]
|
5
5
|
spec.email = ["eiji.sekiya.0326@gmail.com", "sonots@gmail.com"]
|
6
6
|
spec.summary = "Vertica output plugin for Embulk"
|
@@ -0,0 +1,98 @@
|
|
1
|
+
module Embulk
|
2
|
+
module Output
|
3
|
+
class Vertica < OutputPlugin
|
4
|
+
class ValueConverterFactory
|
5
|
+
attr_reader :schema_type, :value_type, :timestamp_format, :timezone
|
6
|
+
|
7
|
+
DEFAULT_TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S %z"
|
8
|
+
DEFAULT_TIMEZONE = "+00:00"
|
9
|
+
|
10
|
+
def self.create_converters(schema, column_options)
|
11
|
+
# @param [Schema] schema embulk defined column types
|
12
|
+
# @param [Hash] column_options user defined column types
|
13
|
+
# @return [Array] value converters (array of Proc)
|
14
|
+
Hash[*(schema.names.zip(schema.types).map do |column_name, schema_type|
|
15
|
+
if column_options[column_name]
|
16
|
+
value_type = column_options[column_name]['value_type']
|
17
|
+
timestamp_format = column_options[column_name]['timestamp_format']
|
18
|
+
timezone = column_options[column_name]['timezone']
|
19
|
+
[column_name, self.new(schema_type, value_type, timestamp_format, timezone).create_converter]
|
20
|
+
else
|
21
|
+
[column_name, Proc.new {|val| val }]
|
22
|
+
end
|
23
|
+
end.flatten!(1))]
|
24
|
+
end
|
25
|
+
|
26
|
+
def initialize(schema_type, value_type = nil, timestamp_format = nil, timezone = nil)
|
27
|
+
@schema_type = schema_type
|
28
|
+
@value_type = value_type || schema_type.to_s
|
29
|
+
@timestampt_format = timestamp_format || DEFAULT_TIMESTAMP_FORMAT
|
30
|
+
@timezone = timezone || DEFAULT_TIMEZONE
|
31
|
+
end
|
32
|
+
|
33
|
+
def create_converter
|
34
|
+
case schema_type
|
35
|
+
when :boolean then boolean_converter
|
36
|
+
when :long then long_converter
|
37
|
+
when :double then double_converter
|
38
|
+
when :string then string_converter
|
39
|
+
when :timestamp then timestamp_converter
|
40
|
+
else raise NotSupportedType, "embulk-output-vertica cannot take column type #{schema_type}"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def boolean_converter
|
45
|
+
case value_type
|
46
|
+
when 'boolean' then Proc.new {|val| val }
|
47
|
+
when 'string' then Proc.new {|val| val.to_s }
|
48
|
+
else raise NotSupportedType, "embulk-output-vertica cannot take column value_type #{value_type} for boolean column"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def long_converter
|
53
|
+
case value_type
|
54
|
+
when 'boolean' then Proc.new {|val| !!val }
|
55
|
+
when 'long' then Proc.new {|val| val }
|
56
|
+
when 'double' then Proc.new {|val| val.to_f }
|
57
|
+
when 'string' then Proc.new {|val| val.to_s }
|
58
|
+
when 'timestamp' then Proc.new {|val| Time.at(val).localtime(timezone) }
|
59
|
+
else raise NotSupportedType, "embulk-output-vertica cannot take column value_type #{value_type} for long column"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def double_converter
|
64
|
+
case value_type
|
65
|
+
when 'boolean' then Proc.new {|val| !!val }
|
66
|
+
when 'long' then Proc.new {|val| val.to_i }
|
67
|
+
when 'double' then Proc.new {|val| val }
|
68
|
+
when 'string' then Proc.new {|val| val.to_s }
|
69
|
+
when 'timestamp' then Proc.new {|val| Time.at(val).localtime(timezone) }
|
70
|
+
else raise NotSupportedType, "embulk-output-vertica cannot take column value_type #{value_type} for double column"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def string_converter
|
75
|
+
case value_type
|
76
|
+
when 'boolean' then Proc.new {|val| !!val }
|
77
|
+
when 'long' then Proc.new {|val| val.to_i }
|
78
|
+
when 'double' then Proc.new {|val| val.to_f }
|
79
|
+
when 'string' then Proc.new {|val| val }
|
80
|
+
when 'timestamp' then Proc.new {|val| Time.strptime(val, timestamp_format) } # ToDo: timezone
|
81
|
+
else raise NotSupportedType, "embulk-output-vertica cannot take column value_type #{value_type} for string column"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def timestamp_converter
|
86
|
+
case value_type
|
87
|
+
when 'boolean' then Proc.new {|val| !!val }
|
88
|
+
when 'long' then Proc.new {|val| val.to_i }
|
89
|
+
when 'double' then Proc.new {|val| val.to_f }
|
90
|
+
when 'string' then Proc.new {|val| val.localtime(timezone).strftime(timestamp_format) }
|
91
|
+
when 'timestamp' then Proc.new {|val| val.localtime(timezone) }
|
92
|
+
else raise NotSupportedType, "embulk-output-vertica cannot take column value_type #{value_type} for timesatmp column"
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'jvertica'
|
2
|
+
require_relative 'vertica/value_converter_factory'
|
2
3
|
|
3
4
|
module Embulk
|
4
5
|
module Output
|
@@ -68,6 +69,8 @@ module Embulk
|
|
68
69
|
|
69
70
|
def initialize(task, schema, index)
|
70
71
|
super
|
72
|
+
@converters = ValueConverterFactory.create_converters(schema, task['column_options'])
|
73
|
+
Embulk.logger.debug { @converters.to_s }
|
71
74
|
@jv = self.class.connect(task)
|
72
75
|
end
|
73
76
|
|
@@ -172,7 +175,9 @@ module Embulk
|
|
172
175
|
end
|
173
176
|
|
174
177
|
def to_json(record)
|
175
|
-
Hash[*(schema.names.zip(record).
|
178
|
+
Hash[*(schema.names.zip(record).map do |column_name, value|
|
179
|
+
[column_name, @converters[column_name].call(value)]
|
180
|
+
end.flatten!(1))].to_json
|
176
181
|
end
|
177
182
|
|
178
183
|
def quoted_schema
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-vertica
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- eiji.sekiya
|
@@ -71,6 +71,7 @@ files:
|
|
71
71
|
- example.csv
|
72
72
|
- example.yml
|
73
73
|
- lib/embulk/output/vertica.rb
|
74
|
+
- lib/embulk/output/vertica/value_converter_factory.rb
|
74
75
|
homepage: https://github.com/eratostennis/embulk-output-vertica
|
75
76
|
licenses:
|
76
77
|
- MIT
|