embulk-output-vertica 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -3
- data/README.md +11 -11
- data/embulk-output-vertica.gemspec +1 -1
- data/lib/embulk/output/vertica.rb +23 -8
- data/lib/embulk/output/vertica/value_converter_factory.rb +6 -5
- metadata +28 -28
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2ab7bcf92a11a6a2899ca833c6a8c07677d5b1e5
|
4
|
+
data.tar.gz: d05503ff66756df167a83e70da5ca7cd6f7ba2f0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2c2dd21ae409cbcfcf40428bdcc6e2ad67f9911935145d69f05a7b534546523ae4618e879f60b6ca7a60ada7496ee9754da5f909b7b50773a8474c52a5f08787
|
7
|
+
data.tar.gz: e3e962118458e4412370e5efa25227f801d5986a4bced0f29a67ab490608d2e83f15d3a9e3c1ea769530cabc7006019b4f16b935819dbf560d32ac899322b9ca
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
# 0.2.3 (2015/09/16)
|
2
|
+
|
3
|
+
Changes:
|
4
|
+
|
5
|
+
* Commit all pages at burst (in each task)
|
6
|
+
|
7
|
+
Enhancements:
|
8
|
+
|
9
|
+
* Return task_reports
|
10
|
+
|
1
11
|
# 0.2.2 (2015/07/24)
|
2
12
|
|
3
13
|
Changes:
|
@@ -20,19 +30,19 @@ Enhancements:
|
|
20
30
|
|
21
31
|
Enhancements:
|
22
32
|
|
23
|
-
* Support timezone for string converter
|
33
|
+
* Support `timezone` for string converter
|
24
34
|
|
25
35
|
# 0.1.8 (2015/07/24)
|
26
36
|
|
27
37
|
Enhancements:
|
28
38
|
|
29
|
-
* Support value_type
|
39
|
+
* Support `value_type`, `timezone_format`, `timezone` option for column_options
|
30
40
|
|
31
41
|
# 0.1.7 (2015/07/24)
|
32
42
|
|
33
43
|
Enhancements:
|
34
44
|
|
35
|
-
* Add reject_on_materialized_type_error option
|
45
|
+
* Add `reject_on_materialized_type_error` option
|
36
46
|
|
37
47
|
# 0.1.6 (2015/07/23)
|
38
48
|
|
data/README.md
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
* **Plugin type**: output
|
6
6
|
* **Resume supported**: no
|
7
|
-
* **Cleanup supported**:
|
7
|
+
* **Cleanup supported**: yes
|
8
8
|
* **Dynamic table creating**: yes
|
9
9
|
|
10
10
|
## Configuration
|
@@ -17,9 +17,9 @@
|
|
17
17
|
- **schema**: schema name (string, default: public)
|
18
18
|
- **table**: table name (string, required)
|
19
19
|
- **mode**: "insert", or "replace". See bellow. (string, default: insert)
|
20
|
-
- **copy_mode**: specifies how data is loaded into the database. (`AUTO`, `DIRECT`, or `TRICKLE`. default: AUTO)
|
21
|
-
- **abort_on_error**:
|
22
|
-
- **reject_on_materialized_type_error**:
|
20
|
+
- **copy_mode**: specifies how data is loaded into the database. See vertica documents for details. (`AUTO`, `DIRECT`, or `TRICKLE`. default: `AUTO`)
|
21
|
+
- **abort_on_error**: stops the COPY command if a row is rejected and rolls back the command. No data is loaded. (bool, default: false)
|
22
|
+
- **reject_on_materialized_type_error**: uses `reject_on_materialized_type_error` option for fjsonparser(). This rejects rows if any of column types and value types do not fit, ex) double value into INT column fails. See vertica documents for details. (bool, default: false)
|
23
23
|
- **default_timezone**: the default timezone for column_options (string, default is "UTC")
|
24
24
|
- **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
|
25
25
|
- **type**: type of a column when this plugin creates new tables such as `VARCHAR(255)`, `INTEGER NOT NULL UNIQUE`. This is used on creating intermediate tables (insert and truncate_insert modes) and on creating a new target table. (string, default: depends on input column type, see below)
|
@@ -29,11 +29,11 @@
|
|
29
29
|
- string: `VARCHAR`
|
30
30
|
- timestamp: `TIMESTAMP`
|
31
31
|
- **value_type**: The types (embulk types) of values to convert (string, default: no conversion. See below for available types)
|
32
|
-
- boolean: `boolean`, `string`
|
33
|
-
- long: `boolean
|
34
|
-
- double: `boolean
|
35
|
-
- string: `boolean
|
36
|
-
- timestamp: `boolean
|
32
|
+
- boolean: `boolean`, `string`
|
33
|
+
- long: `boolean`, `long`, `double`, `string`, `timestamp`
|
34
|
+
- double: `boolean`, `long`, `double`, `string`, `timestamp`
|
35
|
+
- string: `boolean`, `long`, `double`, `string`, `timestamp`
|
36
|
+
- timestamp: `boolean`, `long`, `double`, `string`, `timestamp`
|
37
37
|
- **timestamp_format**: timestamp format to convert into/from `timestamp` (string, default is "%Y-%m-%d %H:%M:%S %z")
|
38
38
|
- **timezone**: timezone to convert into/from `timestamp` (string, default is `default_timezone`).
|
39
39
|
|
@@ -43,7 +43,7 @@
|
|
43
43
|
* Behavior: This mode copys rows to some intermediate tables first. If all those tasks run correctly, runs INSERT INTO <target_table> SELECT * FROM <intermediate_table>
|
44
44
|
* Transactional: Yes if `abort_on_error` option is used
|
45
45
|
* **replace**:
|
46
|
-
* Behavior: Same with insert mode excepting that it
|
46
|
+
* Behavior: Same with insert mode excepting that it drops the target table first.
|
47
47
|
* Transactional: Yes if `abort_on_error` option is used
|
48
48
|
|
49
49
|
## Example
|
@@ -62,7 +62,7 @@ out:
|
|
62
62
|
column_options:
|
63
63
|
id: {type: INT}
|
64
64
|
name: {type: VARCHAR(255)}
|
65
|
-
date: {type: DATE, value_type:
|
65
|
+
date: {type: DATE, value_type: timestamp, timezone: "+09:00"}
|
66
66
|
```
|
67
67
|
|
68
68
|
## Development
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-vertica"
|
3
|
-
spec.version = "0.2.
|
3
|
+
spec.version = "0.2.3"
|
4
4
|
spec.authors = ["eiji.sekiya", "Naotoshi Seo"]
|
5
5
|
spec.email = ["eiji.sekiya.0326@gmail.com", "sonots@gmail.com"]
|
6
6
|
spec.summary = "Vertica output plugin for Embulk"
|
@@ -53,7 +53,9 @@ module Embulk
|
|
53
53
|
end
|
54
54
|
|
55
55
|
begin
|
56
|
-
|
56
|
+
# obtain an array of task_reports where one report is of a task
|
57
|
+
task_reports = yield(task)
|
58
|
+
Embulk.logger.info { "embulk-output-vertica: task_reports: #{task_reports}" }
|
57
59
|
connect(task) do |jv|
|
58
60
|
query(jv, %[CREATE TABLE IF NOT EXISTS #{quoted_schema}.#{quoted_table} (#{sql_schema})])
|
59
61
|
query(jv, %[INSERT INTO #{quoted_schema}.#{quoted_table} SELECT * FROM #{quoted_schema}.#{quoted_temp_table}])
|
@@ -65,7 +67,9 @@ module Embulk
|
|
65
67
|
Embulk.logger.debug { "embulk-output-vertica: #{query(jv, %[SELECT * FROM #{quoted_schema}.#{quoted_table} LIMIT 10]).map {|row| row.to_h }.join("\n") rescue nil}" }
|
66
68
|
end
|
67
69
|
end
|
68
|
-
|
70
|
+
# this is for -o next_config option, add some paramters for next time execution if wants
|
71
|
+
next_config_diff = {}
|
72
|
+
return next_config_diff
|
69
73
|
end
|
70
74
|
|
71
75
|
def initialize(task, schema, index)
|
@@ -73,6 +77,9 @@ module Embulk
|
|
73
77
|
@converters = ValueConverterFactory.create_converters(schema, task['default_timezone'], task['column_options'])
|
74
78
|
Embulk.logger.debug { @converters.to_s }
|
75
79
|
@jv = self.class.connect(task)
|
80
|
+
@num_input_rows = 0
|
81
|
+
@num_output_rows = 0
|
82
|
+
@num_rejected_rows = 0
|
76
83
|
end
|
77
84
|
|
78
85
|
def close
|
@@ -82,15 +89,17 @@ module Embulk
|
|
82
89
|
def add(page)
|
83
90
|
json = nil # for log
|
84
91
|
begin
|
85
|
-
copy(@jv, copy_sql) do |stdin|
|
92
|
+
num_output_rows, rejects = copy(@jv, copy_sql) do |stdin|
|
86
93
|
page.each do |record|
|
87
94
|
json = to_json(record)
|
88
95
|
Embulk.logger.debug { "embulk-output-vertica: #{json}" }
|
89
96
|
stdin << json << "\n"
|
97
|
+
@num_input_rows += 1
|
90
98
|
end
|
91
99
|
end
|
92
|
-
|
93
|
-
@
|
100
|
+
num_rejected_rows = rejects.size
|
101
|
+
@num_output_rows += num_output_rows
|
102
|
+
@num_rejected_rows += num_rejected_rows
|
94
103
|
rescue java.sql.SQLDataException => e
|
95
104
|
@jv.rollback
|
96
105
|
if @task['reject_on_materialized_type_error'] and e.message =~ /Rejected by user-defined parser/
|
@@ -98,7 +107,7 @@ module Embulk
|
|
98
107
|
else
|
99
108
|
Embulk.logger.warn "embulk-output-vertica: ROLLBACK!"
|
100
109
|
end
|
101
|
-
raise e
|
110
|
+
raise e # die transaction
|
102
111
|
end
|
103
112
|
end
|
104
113
|
|
@@ -109,7 +118,13 @@ module Embulk
|
|
109
118
|
end
|
110
119
|
|
111
120
|
def commit
|
112
|
-
|
121
|
+
@jv.commit
|
122
|
+
Embulk.logger.info { "embulk-output-vertica: COMMIT! #{@num_output_rows} rows" }
|
123
|
+
task_report = {
|
124
|
+
"num_input_rows" => @num_input_rows,
|
125
|
+
"num_output_rows" => @num_output_rows,
|
126
|
+
"num_rejected_rows" => @num_rejected_rows,
|
127
|
+
}
|
113
128
|
end
|
114
129
|
|
115
130
|
private
|
@@ -169,7 +184,7 @@ module Embulk
|
|
169
184
|
|
170
185
|
def copy(conn, sql, &block)
|
171
186
|
Embulk.logger.info "embulk-output-vertica: #{sql}"
|
172
|
-
conn.copy(sql, &block)
|
187
|
+
results, rejects = conn.copy(sql, &block)
|
173
188
|
end
|
174
189
|
|
175
190
|
def copy_sql
|
@@ -9,11 +9,12 @@ module Embulk
|
|
9
9
|
|
10
10
|
DEFAULT_TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S %z"
|
11
11
|
|
12
|
+
# @param [Schema] schema embulk defined column types
|
13
|
+
# @param [String] default_timezone
|
14
|
+
# @param [Hash] column_options user defined column types
|
15
|
+
# @return [Hash] hash whose key is column_name, and value is its converter (Proc)
|
12
16
|
def self.create_converters(schema, default_timezone, column_options)
|
13
|
-
|
14
|
-
# @param [Hash] column_options user defined column types
|
15
|
-
# @return [Array] value converters (array of Proc)
|
16
|
-
Hash[*(schema.names.zip(schema.types).map do |column_name, schema_type|
|
17
|
+
Hash[schema.names.zip(schema.types).map do |column_name, schema_type|
|
17
18
|
if column_options[column_name]
|
18
19
|
value_type = column_options[column_name]['value_type']
|
19
20
|
timestamp_format = column_options[column_name]['timestamp_format'] || DEFAULT_TIMESTAMP_FORMAT
|
@@ -22,7 +23,7 @@ module Embulk
|
|
22
23
|
else
|
23
24
|
[column_name, Proc.new {|val| val }]
|
24
25
|
end
|
25
|
-
end
|
26
|
+
end]
|
26
27
|
end
|
27
28
|
|
28
29
|
def initialize(schema_type, value_type = nil, timestamp_format = nil, timezone = nil)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-vertica
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- eiji.sekiya
|
@@ -9,64 +9,64 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-09-16 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
+
name: jvertica
|
16
|
+
version_requirements: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0.2'
|
15
21
|
requirement: !ruby/object:Gem::Requirement
|
16
22
|
requirements:
|
17
|
-
- - ~>
|
23
|
+
- - "~>"
|
18
24
|
- !ruby/object:Gem::Version
|
19
25
|
version: '0.2'
|
20
|
-
name: jvertica
|
21
26
|
prerelease: false
|
22
27
|
type: :runtime
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: tzinfo
|
23
30
|
version_requirements: !ruby/object:Gem::Requirement
|
24
31
|
requirements:
|
25
|
-
- -
|
32
|
+
- - ">="
|
26
33
|
- !ruby/object:Gem::Version
|
27
|
-
version: '0
|
28
|
-
- !ruby/object:Gem::Dependency
|
34
|
+
version: '0'
|
29
35
|
requirement: !ruby/object:Gem::Requirement
|
30
36
|
requirements:
|
31
|
-
- -
|
37
|
+
- - ">="
|
32
38
|
- !ruby/object:Gem::Version
|
33
39
|
version: '0'
|
34
|
-
name: tzinfo
|
35
40
|
prerelease: false
|
36
41
|
type: :runtime
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: bundler
|
37
44
|
version_requirements: !ruby/object:Gem::Requirement
|
38
45
|
requirements:
|
39
|
-
- -
|
46
|
+
- - "~>"
|
40
47
|
- !ruby/object:Gem::Version
|
41
|
-
version: '
|
42
|
-
- !ruby/object:Gem::Dependency
|
48
|
+
version: '1.7'
|
43
49
|
requirement: !ruby/object:Gem::Requirement
|
44
50
|
requirements:
|
45
|
-
- - ~>
|
51
|
+
- - "~>"
|
46
52
|
- !ruby/object:Gem::Version
|
47
53
|
version: '1.7'
|
48
|
-
name: bundler
|
49
54
|
prerelease: false
|
50
55
|
type: :development
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: rake
|
51
58
|
version_requirements: !ruby/object:Gem::Requirement
|
52
59
|
requirements:
|
53
|
-
- - ~>
|
60
|
+
- - "~>"
|
54
61
|
- !ruby/object:Gem::Version
|
55
|
-
version: '
|
56
|
-
- !ruby/object:Gem::Dependency
|
62
|
+
version: '10.0'
|
57
63
|
requirement: !ruby/object:Gem::Requirement
|
58
64
|
requirements:
|
59
|
-
- - ~>
|
65
|
+
- - "~>"
|
60
66
|
- !ruby/object:Gem::Version
|
61
67
|
version: '10.0'
|
62
|
-
name: rake
|
63
68
|
prerelease: false
|
64
69
|
type: :development
|
65
|
-
version_requirements: !ruby/object:Gem::Requirement
|
66
|
-
requirements:
|
67
|
-
- - ~>
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
version: '10.0'
|
70
70
|
description: Dump records to vertica
|
71
71
|
email:
|
72
72
|
- eiji.sekiya.0326@gmail.com
|
@@ -75,7 +75,7 @@ executables: []
|
|
75
75
|
extensions: []
|
76
76
|
extra_rdoc_files: []
|
77
77
|
files:
|
78
|
-
- .gitignore
|
78
|
+
- ".gitignore"
|
79
79
|
- CHANGELOG.md
|
80
80
|
- Gemfile
|
81
81
|
- LICENSE.txt
|
@@ -96,17 +96,17 @@ require_paths:
|
|
96
96
|
- lib
|
97
97
|
required_ruby_version: !ruby/object:Gem::Requirement
|
98
98
|
requirements:
|
99
|
-
- -
|
99
|
+
- - ">="
|
100
100
|
- !ruby/object:Gem::Version
|
101
101
|
version: '0'
|
102
102
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
103
103
|
requirements:
|
104
|
-
- -
|
104
|
+
- - ">="
|
105
105
|
- !ruby/object:Gem::Version
|
106
106
|
version: '0'
|
107
107
|
requirements: []
|
108
108
|
rubyforge_project:
|
109
|
-
rubygems_version: 2.4.
|
109
|
+
rubygems_version: 2.4.8
|
110
110
|
signing_key:
|
111
111
|
specification_version: 4
|
112
112
|
summary: Vertica output plugin for Embulk
|