embulk-output-vertica 0.1.8 → 0.1.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +1 -5
- data/embulk-output-vertica.gemspec +2 -1
- data/example.csv +9 -9
- data/example.yml +7 -4
- data/lib/embulk/output/vertica/value_converter_factory.rb +36 -7
- metadata +15 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 16396802fde10566e9de22321179028ab3401763
|
4
|
+
data.tar.gz: d04b85afa6ce9e401f5c8caa193fcc28ff171470
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0a739f9edde7e3166cf9992b11328a4b2a86e6187bd1a6a66ccfa82937aebba5e392f62dd2f2a1e8137fa4f2989b542c149f7e2b8ed10f850ab15d9fa8e3bcd3
|
7
|
+
data.tar.gz: 0e1063afeaad0955cd4811f1558c0ad64d6b0c84848257050cb6327aa6d4143a99ccb90d236086bac9833f8acb3237499da0149b4d2a4be4d65b2bc87a253413
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -34,7 +34,7 @@
|
|
34
34
|
- string: `boolean` (true), `long` (to\_i), `double` (to\_f), `string`, `timestamp` (Time.strptime)
|
35
35
|
- timestamp: `boolean` (true), `long` (to\_i), `double` (to\_f), `string` (strftime), `timestamp`
|
36
36
|
- **timestamp_format**: If input column type (embulk type) is string and value_type is timestamp or date, this plugin needs the timestamp format of the string. Also, if input column type (embulk type) is timestamp and value_type is string, this plugin needs the timestamp format of the string.
|
37
|
-
- **timezone**:
|
37
|
+
- **timezone**: Specify timezone to convert into `timestamp` or from `timestamp` (string, default is "UTC").
|
38
38
|
|
39
39
|
### Modes
|
40
40
|
|
@@ -64,10 +64,6 @@ out:
|
|
64
64
|
date: {type: DATE, value_type: Date, timezone: "+09:00"}
|
65
65
|
```
|
66
66
|
|
67
|
-
## ToDo
|
68
|
-
|
69
|
-
* Use timezone for string => timezone conversion
|
70
|
-
|
71
67
|
## Development
|
72
68
|
|
73
69
|
Run example:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-vertica"
|
3
|
-
spec.version = "0.1.
|
3
|
+
spec.version = "0.1.9"
|
4
4
|
spec.authors = ["eiji.sekiya", "Naotoshi Seo"]
|
5
5
|
spec.email = ["eiji.sekiya.0326@gmail.com", "sonots@gmail.com"]
|
6
6
|
spec.summary = "Vertica output plugin for Embulk"
|
@@ -14,6 +14,7 @@ Gem::Specification.new do |spec|
|
|
14
14
|
spec.require_paths = ["lib"]
|
15
15
|
|
16
16
|
spec.add_dependency "jvertica", "~> 0.2"
|
17
|
+
spec.add_dependency "tzinfo"
|
17
18
|
spec.add_development_dependency "bundler", "~> 1.7"
|
18
19
|
spec.add_development_dependency "rake", "~> 10.0"
|
19
20
|
end
|
data/example.csv
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
-
date,foo,bar,id,name,score
|
2
|
-
2015-07-13,,bar,90,l6lTsvxdlcTfcqx2c0lQSd9HejVQg40W25f0wGNQViY,903.4
|
3
|
-
2015-07-13,,bar,91,XoALSEQg9ycuGqrEWHOb8vdrLbheZSgFO53Wr3mciXY,394.5
|
4
|
-
2015-07-13,,bar,92,0hgDRI_mijs5w7rkiLIe__LEayOOLxL0qVT1IHa5QBw,810.9
|
5
|
-
2015-07-13,,bar,93,KjCRAc-AVcS-R13toBUR6pK_7d9Y8Gl4TRdYYMaSirc,477.4
|
6
|
-
2015-07-13,,bar,94,fyQVGlT8Bqmu_LiajPlgfbmavoNyAqXaBsBP_e4OnN8,725.3
|
7
|
-
2015-07-13,,bar,95,FpBYRPWKu6DmLpx5tsB25URWfj3sNCbcydNAXULaiD8,316.6
|
8
|
-
2015-07-13,,bar,96,9ikvnUqp1Rf2yVwLvs5bBvxQP-KyqxGi4gZRSZ8c1d4,369.5
|
9
|
-
2015-07-13,,bar,97,RRNYDAzKaq4Trtt96Bxgk3N0fXLIV8hXoK0qQ7uw_Wc,506.5
|
1
|
+
date,test,foo,bar,id,name,score
|
2
|
+
2015-07-13,2015-07-13,,bar,90,l6lTsvxdlcTfcqx2c0lQSd9HejVQg40W25f0wGNQViY,903.4
|
3
|
+
2015-07-13,2015-07-13,,bar,91,XoALSEQg9ycuGqrEWHOb8vdrLbheZSgFO53Wr3mciXY,394.5
|
4
|
+
2015-07-13,2015-07-13,,bar,92,0hgDRI_mijs5w7rkiLIe__LEayOOLxL0qVT1IHa5QBw,810.9
|
5
|
+
2015-07-13,2015-07-13,,bar,93,KjCRAc-AVcS-R13toBUR6pK_7d9Y8Gl4TRdYYMaSirc,477.4
|
6
|
+
2015-07-13,2015-07-13,,bar,94,fyQVGlT8Bqmu_LiajPlgfbmavoNyAqXaBsBP_e4OnN8,725.3
|
7
|
+
2015-07-13,2015-07-13,,bar,95,FpBYRPWKu6DmLpx5tsB25URWfj3sNCbcydNAXULaiD8,316.6
|
8
|
+
2015-07-13,2015-07-13,,bar,96,9ikvnUqp1Rf2yVwLvs5bBvxQP-KyqxGi4gZRSZ8c1d4,369.5
|
9
|
+
2015-07-13,2015-07-13,,bar,97,RRNYDAzKaq4Trtt96Bxgk3N0fXLIV8hXoK0qQ7uw_Wc,506.5
|
data/example.yml
CHANGED
@@ -16,7 +16,8 @@ in:
|
|
16
16
|
skip_header_lines: 1
|
17
17
|
comment_line_marker: '#'
|
18
18
|
columns:
|
19
|
-
- {name:
|
19
|
+
- {name: timestamp_date, type: timestamp, format: "%Y-%m-%d", timezone: "+09:00"}
|
20
|
+
- {name: string_date, type: string}
|
20
21
|
- {name: foo, type: string}
|
21
22
|
- {name: bar, type: string}
|
22
23
|
- {name: id, type: long}
|
@@ -35,6 +36,8 @@ out:
|
|
35
36
|
abort_on_error: true
|
36
37
|
reject_on_materialized_type_error: true
|
37
38
|
column_options:
|
38
|
-
|
39
|
-
|
40
|
-
|
39
|
+
id: {type: INT}
|
40
|
+
name: {type: VARCHAR}
|
41
|
+
timestamp_date: {type: DATE, timezone: "+09:00"}
|
42
|
+
string_date: {type: DATE, value_type: timestamp, timestamp_format: "%Y-%m-%d", timezone: "Asia/Tokyo"}
|
43
|
+
score: {type: INT, value_type: long}
|
@@ -1,8 +1,11 @@
|
|
1
|
+
require 'time'
|
2
|
+
require 'tzinfo'
|
3
|
+
|
1
4
|
module Embulk
|
2
5
|
module Output
|
3
6
|
class Vertica < OutputPlugin
|
4
7
|
class ValueConverterFactory
|
5
|
-
attr_reader :schema_type, :value_type, :timestamp_format, :timezone
|
8
|
+
attr_reader :schema_type, :value_type, :timestamp_format, :timezone, :zone_offset
|
6
9
|
|
7
10
|
DEFAULT_TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S %z"
|
8
11
|
DEFAULT_TIMEZONE = "+00:00"
|
@@ -26,8 +29,9 @@ module Embulk
|
|
26
29
|
def initialize(schema_type, value_type = nil, timestamp_format = nil, timezone = nil)
|
27
30
|
@schema_type = schema_type
|
28
31
|
@value_type = value_type || schema_type.to_s
|
29
|
-
@
|
32
|
+
@timestamp_format = timestamp_format || DEFAULT_TIMESTAMP_FORMAT
|
30
33
|
@timezone = timezone || DEFAULT_TIMEZONE
|
34
|
+
@zone_offset = get_zone_offset(@timezone)
|
31
35
|
end
|
32
36
|
|
33
37
|
def create_converter
|
@@ -55,7 +59,7 @@ module Embulk
|
|
55
59
|
when 'long' then Proc.new {|val| val }
|
56
60
|
when 'double' then Proc.new {|val| val.to_f }
|
57
61
|
when 'string' then Proc.new {|val| val.to_s }
|
58
|
-
when 'timestamp' then Proc.new {|val| Time.at(val).localtime(
|
62
|
+
when 'timestamp' then Proc.new {|val| Time.at(val).localtime(zone_offset) }
|
59
63
|
else raise NotSupportedType, "embulk-output-vertica cannot take column value_type #{value_type} for long column"
|
60
64
|
end
|
61
65
|
end
|
@@ -66,7 +70,7 @@ module Embulk
|
|
66
70
|
when 'long' then Proc.new {|val| val.to_i }
|
67
71
|
when 'double' then Proc.new {|val| val }
|
68
72
|
when 'string' then Proc.new {|val| val.to_s }
|
69
|
-
when 'timestamp' then Proc.new {|val| Time.at(val).localtime(
|
73
|
+
when 'timestamp' then Proc.new {|val| Time.at(val).localtime(zone_offset) }
|
70
74
|
else raise NotSupportedType, "embulk-output-vertica cannot take column value_type #{value_type} for double column"
|
71
75
|
end
|
72
76
|
end
|
@@ -77,7 +81,7 @@ module Embulk
|
|
77
81
|
when 'long' then Proc.new {|val| val.to_i }
|
78
82
|
when 'double' then Proc.new {|val| val.to_f }
|
79
83
|
when 'string' then Proc.new {|val| val }
|
80
|
-
when 'timestamp' then Proc.new {|val|
|
84
|
+
when 'timestamp' then Proc.new {|val| strptime_with_zone(val, timestamp_format, zone_offset) }
|
81
85
|
else raise NotSupportedType, "embulk-output-vertica cannot take column value_type #{value_type} for string column"
|
82
86
|
end
|
83
87
|
end
|
@@ -87,11 +91,36 @@ module Embulk
|
|
87
91
|
when 'boolean' then Proc.new {|val| !!val }
|
88
92
|
when 'long' then Proc.new {|val| val.to_i }
|
89
93
|
when 'double' then Proc.new {|val| val.to_f }
|
90
|
-
when 'string' then Proc.new {|val| val.localtime(
|
91
|
-
when 'timestamp' then Proc.new {|val| val.localtime(
|
94
|
+
when 'string' then Proc.new {|val| val.localtime(zone_offset).strftime(timestamp_format) }
|
95
|
+
when 'timestamp' then Proc.new {|val| val.localtime(zone_offset) }
|
92
96
|
else raise NotSupportedType, "embulk-output-vertica cannot take column value_type #{value_type} for timesatmp column"
|
93
97
|
end
|
94
98
|
end
|
99
|
+
|
100
|
+
private
|
101
|
+
|
102
|
+
# [+-]HH:MM, [+-]HHMM, [+-]HH
|
103
|
+
NUMERIC_PATTERN = %r{\A[+-]\d\d(:?\d\d)?\z}
|
104
|
+
|
105
|
+
# Region/Zone, Region/Zone/Zone
|
106
|
+
NAME_PATTERN = %r{\A[^/]+/[^/]+(/[^/]+)?\z}
|
107
|
+
|
108
|
+
def strptime_with_zone(date, timestamp_format, zone_offset)
|
109
|
+
time = Time.strptime(date, timestamp_format)
|
110
|
+
utc_offset = time.utc_offset
|
111
|
+
time.localtime(zone_offset) + utc_offset - zone_offset
|
112
|
+
end
|
113
|
+
|
114
|
+
def get_zone_offset(timezone)
|
115
|
+
if NUMERIC_PATTERN === timezone
|
116
|
+
Time.zone_offset(timezone)
|
117
|
+
elsif NAME_PATTERN === timezone
|
118
|
+
tz = TZInfo::Timezone.get(timezone)
|
119
|
+
tz.period_for_utc(Time.now).utc_total_offset
|
120
|
+
else
|
121
|
+
raise ArgumentError, "timezone format is invalid: #{timezone}"
|
122
|
+
end
|
123
|
+
end
|
95
124
|
end
|
96
125
|
end
|
97
126
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-vertica
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- eiji.sekiya
|
@@ -25,6 +25,20 @@ dependencies:
|
|
25
25
|
- - ~>
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: '0.2'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
name: tzinfo
|
35
|
+
prerelease: false
|
36
|
+
type: :runtime
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - '>='
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
28
42
|
- !ruby/object:Gem::Dependency
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
30
44
|
requirements:
|