embulk-output-vertica 0.1.9 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 16396802fde10566e9de22321179028ab3401763
4
- data.tar.gz: d04b85afa6ce9e401f5c8caa193fcc28ff171470
3
+ metadata.gz: 4f73299beb951f703ae4b6cab3dee30f85ac7314
4
+ data.tar.gz: 720bc9331a6dba8460f0053563592fd9e3be5071
5
5
  SHA512:
6
- metadata.gz: 0a739f9edde7e3166cf9992b11328a4b2a86e6187bd1a6a66ccfa82937aebba5e392f62dd2f2a1e8137fa4f2989b542c149f7e2b8ed10f850ab15d9fa8e3bcd3
7
- data.tar.gz: 0e1063afeaad0955cd4811f1558c0ad64d6b0c84848257050cb6327aa6d4143a99ccb90d236086bac9833f8acb3237499da0149b4d2a4be4d65b2bc87a253413
6
+ metadata.gz: 1ec6d31b3b118e1697aa4e6ead1750f44ce2e1c15fc8be33d3c61fa2b476d991124a32c9a14e4f27cc8f8a674028c172d22a9149c2eddff2ba53a33ac83a58a5
7
+ data.tar.gz: ede9307074e8d52d932614d0d0c9338dae8abbbc829fe7754ddf4a88074b7475f2d83384f36e1ea4c29ef8f8506f595302ca28996566779555607510b831d842
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.2.0 (2015/07/24)
2
+
3
+ Enhancements:
4
+
5
+ * Add `default_timezone` option
6
+
1
7
  # 0.1.9 (2015/07/24)
2
8
 
3
9
  Enhancements:
data/README.md CHANGED
@@ -20,6 +20,7 @@
20
20
  - **copy_mode**: specifies how data is loaded into the database. (`AUTO`, `DIRECT`, or `TRICKLE`. default: AUTO) See vertica documents for details.
21
21
  - **abort_on_error**: Stops the COPY command if a row is rejected and rolls back the command. No data is loaded. (bool, default: false)
22
22
  - **reject_on_materialized_type_error**: Use `reject_on_materialized_type_error` option for fjsonparser(). This rejects rows if any of olumn types and value types do not fit. ex) double value into INT column fails. See vertica documents for details. (bool, default: false)
23
+ - **default_timezone**: the default timezone for column_options (string, default is "UTC")
23
24
  - **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
24
25
  - **type**: type of a column when this plugin creates new tables such as `VARCHAR(255)`, `INTEGER NOT NULL UNIQUE`. This is used on creating intermediate tables (insert and truncate_insert modes) and on creating a new target table. (string, default: depends on input column type, see below)
25
26
  - boolean: `BOOLEAN`
@@ -33,8 +34,8 @@
33
34
  - double: `boolean` (true), `long` (to\_i), `double`, `string` (to\_s), `timestamp` (Time.at)
34
35
  - string: `boolean` (true), `long` (to\_i), `double` (to\_f), `string`, `timestamp` (Time.strptime)
35
36
  - timestamp: `boolean` (true), `long` (to\_i), `double` (to\_f), `string` (strftime), `timestamp`
36
- - **timestamp_format**: If input column type (embulk type) is string and value_type is timestamp or date, this plugin needs the timestamp format of the string. Also, if input column type (embulk type) is timestamp and value_type is string, this plugin needs the timestamp format of the string.
37
- - **timezone**: Specify timezone to convert into `timestamp` or from `timestamp` (string, default is "UTC").
37
+ - **timestamp_format**: timestamp format to convert into/from `timestamp` (string, default is "%Y-%m-%d %H:%M:%S %z")
38
+ - **timezone**: timezone to convert into/from `timestamp` (string, default is `default_timezone`).
38
39
 
39
40
  ### Modes
40
41
 
@@ -59,9 +60,9 @@ out:
59
60
  copy_mode: DIRECT
60
61
  abort_on_error: true
61
62
  column_options:
62
- id: {type: INT}
63
+ id: {type: INT}
63
64
  name: {type: VARCHAR(255)}
64
- date: {type: DATE, value_type: Date, timezone: "+09:00"}
65
+ date: {type: DATE, value_type: timezone, timezone: "+09:00"}
65
66
  ```
66
67
 
67
68
  ## Development
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-vertica"
3
- spec.version = "0.1.9"
3
+ spec.version = "0.2.0"
4
4
  spec.authors = ["eiji.sekiya", "Naotoshi Seo"]
5
5
  spec.email = ["eiji.sekiya.0326@gmail.com", "sonots@gmail.com"]
6
6
  spec.summary = "Vertica output plugin for Embulk"
data/example.csv CHANGED
@@ -1,4 +1,4 @@
1
- date,test,foo,bar,id,name,score
1
+ timestamp_date,string_date,foo,bar,id,name,score
2
2
  2015-07-13,2015-07-13,,bar,90,l6lTsvxdlcTfcqx2c0lQSd9HejVQg40W25f0wGNQViY,903.4
3
3
  2015-07-13,2015-07-13,,bar,91,XoALSEQg9ycuGqrEWHOb8vdrLbheZSgFO53Wr3mciXY,394.5
4
4
  2015-07-13,2015-07-13,,bar,92,0hgDRI_mijs5w7rkiLIe__LEayOOLxL0qVT1IHa5QBw,810.9
data/example.yml CHANGED
@@ -35,9 +35,11 @@ out:
35
35
  copy_mode: direct
36
36
  abort_on_error: true
37
37
  reject_on_materialized_type_error: true
38
+ default_timezone: 'Asia/Tokyo'
38
39
  column_options:
40
+ timestamp_date: {type: DATE}
41
+ string_date: {type: TIMESTAMPTZ, value_type: timestamp, timestamp_format: "%Y-%m-%d", timezone: '+08:00'}
39
42
  id: {type: INT}
40
43
  name: {type: VARCHAR}
41
- timestamp_date: {type: DATE, timezone: "+09:00"}
42
- string_date: {type: DATE, value_type: timestamp, timestamp_format: "%Y-%m-%d", timezone: "Asia/Tokyo"}
43
44
  score: {type: INT, value_type: long}
45
+
@@ -11,18 +11,19 @@ module Embulk
11
11
 
12
12
  def self.transaction(config, schema, processor_count, &control)
13
13
  task = {
14
- 'host' => config.param('host', :string, :default => 'localhost'),
15
- 'port' => config.param('port', :integer, :default => 5433),
16
- 'username' => config.param('username', :string),
17
- 'password' => config.param('password', :string, :default => ''),
18
- 'database' => config.param('database', :string, :default => 'vdb'),
19
- 'schema' => config.param('schema', :string, :default => 'public'),
20
- 'table' => config.param('table', :string),
21
- 'mode' => config.param('mode', :string, :default => 'insert'),
22
- 'copy_mode' => config.param('copy_mode', :string, :default => 'AUTO'),
23
- 'abort_on_error' => config.param('abort_on_error', :bool, :default => false),
14
+ 'host' => config.param('host', :string, :default => 'localhost'),
15
+ 'port' => config.param('port', :integer, :default => 5433),
16
+ 'username' => config.param('username', :string),
17
+ 'password' => config.param('password', :string, :default => ''),
18
+ 'database' => config.param('database', :string, :default => 'vdb'),
19
+ 'schema' => config.param('schema', :string, :default => 'public'),
20
+ 'table' => config.param('table', :string),
21
+ 'mode' => config.param('mode', :string, :default => 'insert'),
22
+ 'copy_mode' => config.param('copy_mode', :string, :default => 'AUTO'),
23
+ 'abort_on_error' => config.param('abort_on_error', :bool, :default => false),
24
+ 'default_timezone' => config.param('default_timezone', :string, :default => 'UTC'),
25
+ 'column_options' => config.param('column_options', :hash, :default => {}),
24
26
  'reject_on_materialized_type_error' => config.param('reject_on_materialized_type_error', :bool, :default => false),
25
- 'column_options' => config.param('column_options', :hash, :default => {}),
26
27
  }
27
28
 
28
29
  unless %w[INSERT REPLACE].include?(task['mode'].upcase!)
@@ -69,7 +70,7 @@ module Embulk
69
70
 
70
71
  def initialize(task, schema, index)
71
72
  super
72
- @converters = ValueConverterFactory.create_converters(schema, task['column_options'])
73
+ @converters = ValueConverterFactory.create_converters(schema, task['default_timezone'], task['column_options'])
73
74
  Embulk.logger.debug { @converters.to_s }
74
75
  @jv = self.class.connect(task)
75
76
  end
@@ -84,6 +85,7 @@ module Embulk
84
85
  copy(@jv, copy_sql) do |stdin|
85
86
  page.each do |record|
86
87
  json = to_json(record)
88
+ Embulk.logger.debug { "embulk-output-vertica: #{json}" }
87
89
  stdin << json << "\n"
88
90
  end
89
91
  end
@@ -8,17 +8,16 @@ module Embulk
8
8
  attr_reader :schema_type, :value_type, :timestamp_format, :timezone, :zone_offset
9
9
 
10
10
  DEFAULT_TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S %z"
11
- DEFAULT_TIMEZONE = "+00:00"
12
11
 
13
- def self.create_converters(schema, column_options)
12
+ def self.create_converters(schema, default_timezone, column_options)
14
13
  # @param [Schema] schema embulk defined column types
15
14
  # @param [Hash] column_options user defined column types
16
15
  # @return [Array] value converters (array of Proc)
17
16
  Hash[*(schema.names.zip(schema.types).map do |column_name, schema_type|
18
17
  if column_options[column_name]
19
18
  value_type = column_options[column_name]['value_type']
20
- timestamp_format = column_options[column_name]['timestamp_format']
21
- timezone = column_options[column_name]['timezone']
19
+ timestamp_format = column_options[column_name]['timestamp_format'] || DEFAULT_TIMESTAMP_FORMAT
20
+ timezone = column_options[column_name]['timezone'] || default_timezone
22
21
  [column_name, self.new(schema_type, value_type, timestamp_format, timezone).create_converter]
23
22
  else
24
23
  [column_name, Proc.new {|val| val }]
@@ -29,8 +28,8 @@ module Embulk
29
28
  def initialize(schema_type, value_type = nil, timestamp_format = nil, timezone = nil)
30
29
  @schema_type = schema_type
31
30
  @value_type = value_type || schema_type.to_s
32
- @timestamp_format = timestamp_format || DEFAULT_TIMESTAMP_FORMAT
33
- @timezone = timezone || DEFAULT_TIMEZONE
31
+ @timestamp_format = timestamp_format
32
+ @timezone = timezone
34
33
  @zone_offset = get_zone_offset(@timezone)
35
34
  end
36
35
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-vertica
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - eiji.sekiya