embulk-output-vertica 0.1.8 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9514ee560a9a138d1280efcb9f106d0689c3a3c9
4
- data.tar.gz: 02dd9130ce7cc4b0eccd2d6d4af59e7900627fb8
3
+ metadata.gz: 16396802fde10566e9de22321179028ab3401763
4
+ data.tar.gz: d04b85afa6ce9e401f5c8caa193fcc28ff171470
5
5
  SHA512:
6
- metadata.gz: bd6b079069cfd1e5ff9d39362c084c6564690557dc4fd2cb8f179ed794d952a632810bddf07e74bf5597859604ddaf8385768211e01c2a1fcde7417757058ba6
7
- data.tar.gz: 8de6ec75256fd77d9346c486957593d46bd983c5bd0302d9d2569759836ebf5c91cfb3d59eb9bc2b6fd53fadeb642d4e82c30b0097d0b9c534786af1ab1f64f3
6
+ metadata.gz: 0a739f9edde7e3166cf9992b11328a4b2a86e6187bd1a6a66ccfa82937aebba5e392f62dd2f2a1e8137fa4f2989b542c149f7e2b8ed10f850ab15d9fa8e3bcd3
7
+ data.tar.gz: 0e1063afeaad0955cd4811f1558c0ad64d6b0c84848257050cb6327aa6d4143a99ccb90d236086bac9833f8acb3237499da0149b4d2a4be4d65b2bc87a253413
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.1.9 (2015/07/24)
2
+
3
+ Enhancements:
4
+
5
+ * Support timezone for string converter
6
+
1
7
  # 0.1.8 (2015/07/24)
2
8
 
3
9
  Enhancements:
data/README.md CHANGED
@@ -34,7 +34,7 @@
34
34
  - string: `boolean` (true), `long` (to\_i), `double` (to\_f), `string`, `timestamp` (Time.strptime)
35
35
  - timestamp: `boolean` (true), `long` (to\_i), `double` (to\_f), `string` (strftime), `timestamp`
36
36
  - **timestamp_format**: If input column type (embulk type) is string and value_type is timestamp or date, this plugin needs the timestamp format of the string. Also, if input column type (embulk type) is timestamp and value_type is string, this plugin needs the timestamp format of the string.
37
- - **timezone**: With format of "+HH:MM" "-HH:MM". `timestamp` column uses this (string, default is "+00:00").
37
+ - **timezone**: Specify timezone to convert into `timestamp` or from `timestamp` (string, default is "UTC").
38
38
 
39
39
  ### Modes
40
40
 
@@ -64,10 +64,6 @@ out:
64
64
  date: {type: DATE, value_type: Date, timezone: "+09:00"}
65
65
  ```
66
66
 
67
- ## ToDo
68
-
69
- * Use timezone for string => timezone conversion
70
-
71
67
  ## Development
72
68
 
73
69
  Run example:
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-vertica"
3
- spec.version = "0.1.8"
3
+ spec.version = "0.1.9"
4
4
  spec.authors = ["eiji.sekiya", "Naotoshi Seo"]
5
5
  spec.email = ["eiji.sekiya.0326@gmail.com", "sonots@gmail.com"]
6
6
  spec.summary = "Vertica output plugin for Embulk"
@@ -14,6 +14,7 @@ Gem::Specification.new do |spec|
14
14
  spec.require_paths = ["lib"]
15
15
 
16
16
  spec.add_dependency "jvertica", "~> 0.2"
17
+ spec.add_dependency "tzinfo"
17
18
  spec.add_development_dependency "bundler", "~> 1.7"
18
19
  spec.add_development_dependency "rake", "~> 10.0"
19
20
  end
data/example.csv CHANGED
@@ -1,9 +1,9 @@
1
- date,foo,bar,id,name,score
2
- 2015-07-13,,bar,90,l6lTsvxdlcTfcqx2c0lQSd9HejVQg40W25f0wGNQViY,903.4
3
- 2015-07-13,,bar,91,XoALSEQg9ycuGqrEWHOb8vdrLbheZSgFO53Wr3mciXY,394.5
4
- 2015-07-13,,bar,92,0hgDRI_mijs5w7rkiLIe__LEayOOLxL0qVT1IHa5QBw,810.9
5
- 2015-07-13,,bar,93,KjCRAc-AVcS-R13toBUR6pK_7d9Y8Gl4TRdYYMaSirc,477.4
6
- 2015-07-13,,bar,94,fyQVGlT8Bqmu_LiajPlgfbmavoNyAqXaBsBP_e4OnN8,725.3
7
- 2015-07-13,,bar,95,FpBYRPWKu6DmLpx5tsB25URWfj3sNCbcydNAXULaiD8,316.6
8
- 2015-07-13,,bar,96,9ikvnUqp1Rf2yVwLvs5bBvxQP-KyqxGi4gZRSZ8c1d4,369.5
9
- 2015-07-13,,bar,97,RRNYDAzKaq4Trtt96Bxgk3N0fXLIV8hXoK0qQ7uw_Wc,506.5
1
+ date,test,foo,bar,id,name,score
2
+ 2015-07-13,2015-07-13,,bar,90,l6lTsvxdlcTfcqx2c0lQSd9HejVQg40W25f0wGNQViY,903.4
3
+ 2015-07-13,2015-07-13,,bar,91,XoALSEQg9ycuGqrEWHOb8vdrLbheZSgFO53Wr3mciXY,394.5
4
+ 2015-07-13,2015-07-13,,bar,92,0hgDRI_mijs5w7rkiLIe__LEayOOLxL0qVT1IHa5QBw,810.9
5
+ 2015-07-13,2015-07-13,,bar,93,KjCRAc-AVcS-R13toBUR6pK_7d9Y8Gl4TRdYYMaSirc,477.4
6
+ 2015-07-13,2015-07-13,,bar,94,fyQVGlT8Bqmu_LiajPlgfbmavoNyAqXaBsBP_e4OnN8,725.3
7
+ 2015-07-13,2015-07-13,,bar,95,FpBYRPWKu6DmLpx5tsB25URWfj3sNCbcydNAXULaiD8,316.6
8
+ 2015-07-13,2015-07-13,,bar,96,9ikvnUqp1Rf2yVwLvs5bBvxQP-KyqxGi4gZRSZ8c1d4,369.5
9
+ 2015-07-13,2015-07-13,,bar,97,RRNYDAzKaq4Trtt96Bxgk3N0fXLIV8hXoK0qQ7uw_Wc,506.5
data/example.yml CHANGED
@@ -16,7 +16,8 @@ in:
16
16
  skip_header_lines: 1
17
17
  comment_line_marker: '#'
18
18
  columns:
19
- - {name: date, type: timestamp, format: "%Y-%m-%d"}
19
+ - {name: timestamp_date, type: timestamp, format: "%Y-%m-%d", timezone: "+09:00"}
20
+ - {name: string_date, type: string}
20
21
  - {name: foo, type: string}
21
22
  - {name: bar, type: string}
22
23
  - {name: id, type: long}
@@ -35,6 +36,8 @@ out:
35
36
  abort_on_error: true
36
37
  reject_on_materialized_type_error: true
37
38
  column_options:
38
- date: {type: DATE}
39
- id: {type: INT}
40
- name: {type: VARCHAR}
39
+ id: {type: INT}
40
+ name: {type: VARCHAR}
41
+ timestamp_date: {type: DATE, timezone: "+09:00"}
42
+ string_date: {type: DATE, value_type: timestamp, timestamp_format: "%Y-%m-%d", timezone: "Asia/Tokyo"}
43
+ score: {type: INT, value_type: long}
@@ -1,8 +1,11 @@
1
+ require 'time'
2
+ require 'tzinfo'
3
+
1
4
  module Embulk
2
5
  module Output
3
6
  class Vertica < OutputPlugin
4
7
  class ValueConverterFactory
5
- attr_reader :schema_type, :value_type, :timestamp_format, :timezone
8
+ attr_reader :schema_type, :value_type, :timestamp_format, :timezone, :zone_offset
6
9
 
7
10
  DEFAULT_TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S %z"
8
11
  DEFAULT_TIMEZONE = "+00:00"
@@ -26,8 +29,9 @@ module Embulk
26
29
  def initialize(schema_type, value_type = nil, timestamp_format = nil, timezone = nil)
27
30
  @schema_type = schema_type
28
31
  @value_type = value_type || schema_type.to_s
29
- @timestampt_format = timestamp_format || DEFAULT_TIMESTAMP_FORMAT
32
+ @timestamp_format = timestamp_format || DEFAULT_TIMESTAMP_FORMAT
30
33
  @timezone = timezone || DEFAULT_TIMEZONE
34
+ @zone_offset = get_zone_offset(@timezone)
31
35
  end
32
36
 
33
37
  def create_converter
@@ -55,7 +59,7 @@ module Embulk
55
59
  when 'long' then Proc.new {|val| val }
56
60
  when 'double' then Proc.new {|val| val.to_f }
57
61
  when 'string' then Proc.new {|val| val.to_s }
58
- when 'timestamp' then Proc.new {|val| Time.at(val).localtime(timezone) }
62
+ when 'timestamp' then Proc.new {|val| Time.at(val).localtime(zone_offset) }
59
63
  else raise NotSupportedType, "embulk-output-vertica cannot take column value_type #{value_type} for long column"
60
64
  end
61
65
  end
@@ -66,7 +70,7 @@ module Embulk
66
70
  when 'long' then Proc.new {|val| val.to_i }
67
71
  when 'double' then Proc.new {|val| val }
68
72
  when 'string' then Proc.new {|val| val.to_s }
69
- when 'timestamp' then Proc.new {|val| Time.at(val).localtime(timezone) }
73
+ when 'timestamp' then Proc.new {|val| Time.at(val).localtime(zone_offset) }
70
74
  else raise NotSupportedType, "embulk-output-vertica cannot take column value_type #{value_type} for double column"
71
75
  end
72
76
  end
@@ -77,7 +81,7 @@ module Embulk
77
81
  when 'long' then Proc.new {|val| val.to_i }
78
82
  when 'double' then Proc.new {|val| val.to_f }
79
83
  when 'string' then Proc.new {|val| val }
80
- when 'timestamp' then Proc.new {|val| Time.strptime(val, timestamp_format) } # ToDo: timezone
84
+ when 'timestamp' then Proc.new {|val| strptime_with_zone(val, timestamp_format, zone_offset) }
81
85
  else raise NotSupportedType, "embulk-output-vertica cannot take column value_type #{value_type} for string column"
82
86
  end
83
87
  end
@@ -87,11 +91,36 @@ module Embulk
87
91
  when 'boolean' then Proc.new {|val| !!val }
88
92
  when 'long' then Proc.new {|val| val.to_i }
89
93
  when 'double' then Proc.new {|val| val.to_f }
90
- when 'string' then Proc.new {|val| val.localtime(timezone).strftime(timestamp_format) }
91
- when 'timestamp' then Proc.new {|val| val.localtime(timezone) }
94
+ when 'string' then Proc.new {|val| val.localtime(zone_offset).strftime(timestamp_format) }
95
+ when 'timestamp' then Proc.new {|val| val.localtime(zone_offset) }
92
96
  else raise NotSupportedType, "embulk-output-vertica cannot take column value_type #{value_type} for timesatmp column"
93
97
  end
94
98
  end
99
+
100
+ private
101
+
102
+ # [+-]HH:MM, [+-]HHMM, [+-]HH
103
+ NUMERIC_PATTERN = %r{\A[+-]\d\d(:?\d\d)?\z}
104
+
105
+ # Region/Zone, Region/Zone/Zone
106
+ NAME_PATTERN = %r{\A[^/]+/[^/]+(/[^/]+)?\z}
107
+
108
+ def strptime_with_zone(date, timestamp_format, zone_offset)
109
+ time = Time.strptime(date, timestamp_format)
110
+ utc_offset = time.utc_offset
111
+ time.localtime(zone_offset) + utc_offset - zone_offset
112
+ end
113
+
114
+ def get_zone_offset(timezone)
115
+ if NUMERIC_PATTERN === timezone
116
+ Time.zone_offset(timezone)
117
+ elsif NAME_PATTERN === timezone
118
+ tz = TZInfo::Timezone.get(timezone)
119
+ tz.period_for_utc(Time.now).utc_total_offset
120
+ else
121
+ raise ArgumentError, "timezone format is invalid: #{timezone}"
122
+ end
123
+ end
95
124
  end
96
125
  end
97
126
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-vertica
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.8
4
+ version: 0.1.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - eiji.sekiya
@@ -25,6 +25,20 @@ dependencies:
25
25
  - - ~>
26
26
  - !ruby/object:Gem::Version
27
27
  version: '0.2'
28
+ - !ruby/object:Gem::Dependency
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ name: tzinfo
35
+ prerelease: false
36
+ type: :runtime
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - '>='
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
28
42
  - !ruby/object:Gem::Dependency
29
43
  requirement: !ruby/object:Gem::Requirement
30
44
  requirements: