embulk-output-vertica 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +1 -5
- data/embulk-output-vertica.gemspec +2 -1
- data/example.csv +9 -9
- data/example.yml +7 -4
- data/lib/embulk/output/vertica/value_converter_factory.rb +36 -7
- metadata +15 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 16396802fde10566e9de22321179028ab3401763
|
|
4
|
+
data.tar.gz: d04b85afa6ce9e401f5c8caa193fcc28ff171470
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0a739f9edde7e3166cf9992b11328a4b2a86e6187bd1a6a66ccfa82937aebba5e392f62dd2f2a1e8137fa4f2989b542c149f7e2b8ed10f850ab15d9fa8e3bcd3
|
|
7
|
+
data.tar.gz: 0e1063afeaad0955cd4811f1558c0ad64d6b0c84848257050cb6327aa6d4143a99ccb90d236086bac9833f8acb3237499da0149b4d2a4be4d65b2bc87a253413
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
- string: `boolean` (true), `long` (to\_i), `double` (to\_f), `string`, `timestamp` (Time.strptime)
|
|
35
35
|
- timestamp: `boolean` (true), `long` (to\_i), `double` (to\_f), `string` (strftime), `timestamp`
|
|
36
36
|
- **timestamp_format**: If input column type (embulk type) is string and value_type is timestamp or date, this plugin needs the timestamp format of the string. Also, if input column type (embulk type) is timestamp and value_type is string, this plugin needs the timestamp format of the string.
|
|
37
|
-
- **timezone**:
|
|
37
|
+
- **timezone**: Specify timezone to convert into `timestamp` or from `timestamp` (string, default is "UTC").
|
|
38
38
|
|
|
39
39
|
### Modes
|
|
40
40
|
|
|
@@ -64,10 +64,6 @@ out:
|
|
|
64
64
|
date: {type: DATE, value_type: Date, timezone: "+09:00"}
|
|
65
65
|
```
|
|
66
66
|
|
|
67
|
-
## ToDo
|
|
68
|
-
|
|
69
|
-
* Use timezone for string => timezone conversion
|
|
70
|
-
|
|
71
67
|
## Development
|
|
72
68
|
|
|
73
69
|
Run example:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Gem::Specification.new do |spec|
|
|
2
2
|
spec.name = "embulk-output-vertica"
|
|
3
|
-
spec.version = "0.1.
|
|
3
|
+
spec.version = "0.1.9"
|
|
4
4
|
spec.authors = ["eiji.sekiya", "Naotoshi Seo"]
|
|
5
5
|
spec.email = ["eiji.sekiya.0326@gmail.com", "sonots@gmail.com"]
|
|
6
6
|
spec.summary = "Vertica output plugin for Embulk"
|
|
@@ -14,6 +14,7 @@ Gem::Specification.new do |spec|
|
|
|
14
14
|
spec.require_paths = ["lib"]
|
|
15
15
|
|
|
16
16
|
spec.add_dependency "jvertica", "~> 0.2"
|
|
17
|
+
spec.add_dependency "tzinfo"
|
|
17
18
|
spec.add_development_dependency "bundler", "~> 1.7"
|
|
18
19
|
spec.add_development_dependency "rake", "~> 10.0"
|
|
19
20
|
end
|
data/example.csv
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
date,foo,bar,id,name,score
|
|
2
|
-
2015-07-13,,bar,90,l6lTsvxdlcTfcqx2c0lQSd9HejVQg40W25f0wGNQViY,903.4
|
|
3
|
-
2015-07-13,,bar,91,XoALSEQg9ycuGqrEWHOb8vdrLbheZSgFO53Wr3mciXY,394.5
|
|
4
|
-
2015-07-13,,bar,92,0hgDRI_mijs5w7rkiLIe__LEayOOLxL0qVT1IHa5QBw,810.9
|
|
5
|
-
2015-07-13,,bar,93,KjCRAc-AVcS-R13toBUR6pK_7d9Y8Gl4TRdYYMaSirc,477.4
|
|
6
|
-
2015-07-13,,bar,94,fyQVGlT8Bqmu_LiajPlgfbmavoNyAqXaBsBP_e4OnN8,725.3
|
|
7
|
-
2015-07-13,,bar,95,FpBYRPWKu6DmLpx5tsB25URWfj3sNCbcydNAXULaiD8,316.6
|
|
8
|
-
2015-07-13,,bar,96,9ikvnUqp1Rf2yVwLvs5bBvxQP-KyqxGi4gZRSZ8c1d4,369.5
|
|
9
|
-
2015-07-13,,bar,97,RRNYDAzKaq4Trtt96Bxgk3N0fXLIV8hXoK0qQ7uw_Wc,506.5
|
|
1
|
+
date,test,foo,bar,id,name,score
|
|
2
|
+
2015-07-13,2015-07-13,,bar,90,l6lTsvxdlcTfcqx2c0lQSd9HejVQg40W25f0wGNQViY,903.4
|
|
3
|
+
2015-07-13,2015-07-13,,bar,91,XoALSEQg9ycuGqrEWHOb8vdrLbheZSgFO53Wr3mciXY,394.5
|
|
4
|
+
2015-07-13,2015-07-13,,bar,92,0hgDRI_mijs5w7rkiLIe__LEayOOLxL0qVT1IHa5QBw,810.9
|
|
5
|
+
2015-07-13,2015-07-13,,bar,93,KjCRAc-AVcS-R13toBUR6pK_7d9Y8Gl4TRdYYMaSirc,477.4
|
|
6
|
+
2015-07-13,2015-07-13,,bar,94,fyQVGlT8Bqmu_LiajPlgfbmavoNyAqXaBsBP_e4OnN8,725.3
|
|
7
|
+
2015-07-13,2015-07-13,,bar,95,FpBYRPWKu6DmLpx5tsB25URWfj3sNCbcydNAXULaiD8,316.6
|
|
8
|
+
2015-07-13,2015-07-13,,bar,96,9ikvnUqp1Rf2yVwLvs5bBvxQP-KyqxGi4gZRSZ8c1d4,369.5
|
|
9
|
+
2015-07-13,2015-07-13,,bar,97,RRNYDAzKaq4Trtt96Bxgk3N0fXLIV8hXoK0qQ7uw_Wc,506.5
|
data/example.yml
CHANGED
|
@@ -16,7 +16,8 @@ in:
|
|
|
16
16
|
skip_header_lines: 1
|
|
17
17
|
comment_line_marker: '#'
|
|
18
18
|
columns:
|
|
19
|
-
- {name:
|
|
19
|
+
- {name: timestamp_date, type: timestamp, format: "%Y-%m-%d", timezone: "+09:00"}
|
|
20
|
+
- {name: string_date, type: string}
|
|
20
21
|
- {name: foo, type: string}
|
|
21
22
|
- {name: bar, type: string}
|
|
22
23
|
- {name: id, type: long}
|
|
@@ -35,6 +36,8 @@ out:
|
|
|
35
36
|
abort_on_error: true
|
|
36
37
|
reject_on_materialized_type_error: true
|
|
37
38
|
column_options:
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
39
|
+
id: {type: INT}
|
|
40
|
+
name: {type: VARCHAR}
|
|
41
|
+
timestamp_date: {type: DATE, timezone: "+09:00"}
|
|
42
|
+
string_date: {type: DATE, value_type: timestamp, timestamp_format: "%Y-%m-%d", timezone: "Asia/Tokyo"}
|
|
43
|
+
score: {type: INT, value_type: long}
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
+
require 'time'
|
|
2
|
+
require 'tzinfo'
|
|
3
|
+
|
|
1
4
|
module Embulk
|
|
2
5
|
module Output
|
|
3
6
|
class Vertica < OutputPlugin
|
|
4
7
|
class ValueConverterFactory
|
|
5
|
-
attr_reader :schema_type, :value_type, :timestamp_format, :timezone
|
|
8
|
+
attr_reader :schema_type, :value_type, :timestamp_format, :timezone, :zone_offset
|
|
6
9
|
|
|
7
10
|
DEFAULT_TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S %z"
|
|
8
11
|
DEFAULT_TIMEZONE = "+00:00"
|
|
@@ -26,8 +29,9 @@ module Embulk
|
|
|
26
29
|
def initialize(schema_type, value_type = nil, timestamp_format = nil, timezone = nil)
|
|
27
30
|
@schema_type = schema_type
|
|
28
31
|
@value_type = value_type || schema_type.to_s
|
|
29
|
-
@
|
|
32
|
+
@timestamp_format = timestamp_format || DEFAULT_TIMESTAMP_FORMAT
|
|
30
33
|
@timezone = timezone || DEFAULT_TIMEZONE
|
|
34
|
+
@zone_offset = get_zone_offset(@timezone)
|
|
31
35
|
end
|
|
32
36
|
|
|
33
37
|
def create_converter
|
|
@@ -55,7 +59,7 @@ module Embulk
|
|
|
55
59
|
when 'long' then Proc.new {|val| val }
|
|
56
60
|
when 'double' then Proc.new {|val| val.to_f }
|
|
57
61
|
when 'string' then Proc.new {|val| val.to_s }
|
|
58
|
-
when 'timestamp' then Proc.new {|val| Time.at(val).localtime(
|
|
62
|
+
when 'timestamp' then Proc.new {|val| Time.at(val).localtime(zone_offset) }
|
|
59
63
|
else raise NotSupportedType, "embulk-output-vertica cannot take column value_type #{value_type} for long column"
|
|
60
64
|
end
|
|
61
65
|
end
|
|
@@ -66,7 +70,7 @@ module Embulk
|
|
|
66
70
|
when 'long' then Proc.new {|val| val.to_i }
|
|
67
71
|
when 'double' then Proc.new {|val| val }
|
|
68
72
|
when 'string' then Proc.new {|val| val.to_s }
|
|
69
|
-
when 'timestamp' then Proc.new {|val| Time.at(val).localtime(
|
|
73
|
+
when 'timestamp' then Proc.new {|val| Time.at(val).localtime(zone_offset) }
|
|
70
74
|
else raise NotSupportedType, "embulk-output-vertica cannot take column value_type #{value_type} for double column"
|
|
71
75
|
end
|
|
72
76
|
end
|
|
@@ -77,7 +81,7 @@ module Embulk
|
|
|
77
81
|
when 'long' then Proc.new {|val| val.to_i }
|
|
78
82
|
when 'double' then Proc.new {|val| val.to_f }
|
|
79
83
|
when 'string' then Proc.new {|val| val }
|
|
80
|
-
when 'timestamp' then Proc.new {|val|
|
|
84
|
+
when 'timestamp' then Proc.new {|val| strptime_with_zone(val, timestamp_format, zone_offset) }
|
|
81
85
|
else raise NotSupportedType, "embulk-output-vertica cannot take column value_type #{value_type} for string column"
|
|
82
86
|
end
|
|
83
87
|
end
|
|
@@ -87,11 +91,36 @@ module Embulk
|
|
|
87
91
|
when 'boolean' then Proc.new {|val| !!val }
|
|
88
92
|
when 'long' then Proc.new {|val| val.to_i }
|
|
89
93
|
when 'double' then Proc.new {|val| val.to_f }
|
|
90
|
-
when 'string' then Proc.new {|val| val.localtime(
|
|
91
|
-
when 'timestamp' then Proc.new {|val| val.localtime(
|
|
94
|
+
when 'string' then Proc.new {|val| val.localtime(zone_offset).strftime(timestamp_format) }
|
|
95
|
+
when 'timestamp' then Proc.new {|val| val.localtime(zone_offset) }
|
|
92
96
|
else raise NotSupportedType, "embulk-output-vertica cannot take column value_type #{value_type} for timesatmp column"
|
|
93
97
|
end
|
|
94
98
|
end
|
|
99
|
+
|
|
100
|
+
private
|
|
101
|
+
|
|
102
|
+
# [+-]HH:MM, [+-]HHMM, [+-]HH
|
|
103
|
+
NUMERIC_PATTERN = %r{\A[+-]\d\d(:?\d\d)?\z}
|
|
104
|
+
|
|
105
|
+
# Region/Zone, Region/Zone/Zone
|
|
106
|
+
NAME_PATTERN = %r{\A[^/]+/[^/]+(/[^/]+)?\z}
|
|
107
|
+
|
|
108
|
+
def strptime_with_zone(date, timestamp_format, zone_offset)
|
|
109
|
+
time = Time.strptime(date, timestamp_format)
|
|
110
|
+
utc_offset = time.utc_offset
|
|
111
|
+
time.localtime(zone_offset) + utc_offset - zone_offset
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def get_zone_offset(timezone)
|
|
115
|
+
if NUMERIC_PATTERN === timezone
|
|
116
|
+
Time.zone_offset(timezone)
|
|
117
|
+
elsif NAME_PATTERN === timezone
|
|
118
|
+
tz = TZInfo::Timezone.get(timezone)
|
|
119
|
+
tz.period_for_utc(Time.now).utc_total_offset
|
|
120
|
+
else
|
|
121
|
+
raise ArgumentError, "timezone format is invalid: #{timezone}"
|
|
122
|
+
end
|
|
123
|
+
end
|
|
95
124
|
end
|
|
96
125
|
end
|
|
97
126
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: embulk-output-vertica
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.9
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- eiji.sekiya
|
|
@@ -25,6 +25,20 @@ dependencies:
|
|
|
25
25
|
- - ~>
|
|
26
26
|
- !ruby/object:Gem::Version
|
|
27
27
|
version: '0.2'
|
|
28
|
+
- !ruby/object:Gem::Dependency
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - '>='
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '0'
|
|
34
|
+
name: tzinfo
|
|
35
|
+
prerelease: false
|
|
36
|
+
type: :runtime
|
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
38
|
+
requirements:
|
|
39
|
+
- - '>='
|
|
40
|
+
- !ruby/object:Gem::Version
|
|
41
|
+
version: '0'
|
|
28
42
|
- !ruby/object:Gem::Dependency
|
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
|
30
44
|
requirements:
|