embulk-output-vertica 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 30466810c9c7e57dec6627ad52ae406ecd1b4dd1
4
- data.tar.gz: fbf6ca1e55f6f739cf4afbcafd5e440ecd1e72b1
3
+ metadata.gz: 81d94a9a3a707573b6eaed08fcd3b64f2890963d
4
+ data.tar.gz: 56035fc8679597c91ac57eab7ddba7cab8938710
5
5
  SHA512:
6
- metadata.gz: 859c52e8b2cf51c6eb9b5357d51c575c7df38d9583cbd5ffa88f203e511ecb1973bdf8cf73254e41d808bffa6d0c08381da025d8709788fd3ccadd2acaac5d40
7
- data.tar.gz: 2746be9c7f2d6d63f0c2adde70c52a0272acae0c6f2bdbc29cd6a52fd4f268762edcab93a06dde532003b46949f0e4e0eda5d254e445632ed3deca01d84cbf3c
6
+ metadata.gz: f2a54c45a4407fa37a2e1fb2fbb7326d3091c3ceb8d6dd30d6bea701be34c6cb75743e48bae9ad6ef6247a85f0f31f3fc736fe12be7444c29a4eb181e0f066ae
7
+ data.tar.gz: 1a192bde5bdfd73e3988500cfb198462a86cd2979b4df688fe189f7f1fc36ab1c158e95f5c67067545524bff1edb0bd85fcf1d43734686430fcd86f618722e1c
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.1.5 (2015/07/24)
2
+
3
+ Fixes:
4
+
5
+ * Use PARSER fjsonparser() instead of DELIMITER ',', otherwise escape is too difficult to do
6
+
1
7
  # 0.1.4 (2015/07/10)
2
8
 
3
9
  Fixes:
data/README.md CHANGED
@@ -16,10 +16,25 @@
16
16
  - **database**: database name (string, default: vdb)
17
17
  - **schema**: schema name (string, default: public)
18
18
  - **table**: table name (string, required)
19
- - **copy_mode**: specifies how data is loaded into the database. (`AUTO`, `DIRECT`, or `TRICKLE`. default: AUTO)
19
+ - **mode**: "insert", or "replace". See bellow. (string, default: insert)
20
+ - **copy_mode**: specifies how data is loaded into the database. (`AUTO`, `DIRECT`, or `TRICKLE`. default: AUTO) See vertica documents for details.
20
21
  - **abort_on_error**: Stops the COPY command if a row is rejected and rolls back the command. No data is loaded. (bool, default: false)
21
22
  - **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
22
- - **type**: type of a column when this plugin creates new tables (e.g. VARCHAR(255), INTEGER NOT NULL UNIQUE). This used when this plugin creates intermediate tables (insert and truncate_insert modes), and when it creates nonexistent target table automatically. (string, default: depends on input column type. INT (same with BIGINT in vertica) if input column type is long, BOOLEAN if boolean, FLOAT (same with DOUBLE PRECISION in vertica) if double, VARCHAR if string, TIMESTAMP if timestamp)
23
+ - **type**: type of a column when this plugin creates new tables such as `VARCHAR(255)`, `INTEGER NOT NULL UNIQUE`. This is used on creating intermediate tables (insert and truncate_insert modes) and on creating a new target table. (string, default: depends on input column type, see below)
24
+ - `INT` (same with `BIGINT` in vertica) for `long`
25
+ - `BOOLEAN` for `boolean`
26
+ - `FLOAT` (same with `DOUBLE PRECISION` in vertica) for `double`
27
+ - `VARCHAR` for `string`
28
+ - `TIMESTAMP` for `timestamp`
29
+
30
+ ### Modes
31
+
32
+ * **insert**:
33
+ * Behavior: This mode copys rows to some intermediate tables first. If all those tasks run correctly, runs INSERT INTO <target_table> SELECT * FROM <intermediate_table>
34
+ * Transactional: Yes if `abort_on_error` option is used
35
+ * **replace**:
36
+ * Behavior: Same with insert mode excepting that it drop the target table first.
37
+ * Transactional: Yes if `abort_on_error` option is used
23
38
 
24
39
  ## Example
25
40
 
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-vertica"
3
- spec.version = "0.1.4"
3
+ spec.version = "0.1.5"
4
4
  spec.authors = ["eiji.sekiya", "Naotoshi Seo"]
5
5
  spec.email = ["eiji.sekiya.0326@gmail.com", "sonots@gmail.com"]
6
6
  spec.summary = "Vertica output plugin for Embulk"
data/example.csv CHANGED
@@ -1,2 +1,9 @@
1
- 1,foo
2
- 2,bar
1
+ date,foo,bar,id,name,score
2
+ 2015-07-13,,bar,90,l6lTsvxdlcTfcqx2c0lQSd9HejVQg40W25f0wGNQViY,903.4
3
+ 2015-07-13,,bar,91,XoALSEQg9ycuGqrEWHOb8vdrLbheZSgFO53Wr3mciXY,394.5
4
+ 2015-07-13,,bar,92,0hgDRI_mijs5w7rkiLIe__LEayOOLxL0qVT1IHa5QBw,810.9
5
+ 2015-07-13,,bar,93,KjCRAc-AVcS-R13toBUR6pK_7d9Y8Gl4TRdYYMaSirc,477.4
6
+ 2015-07-13,,bar,94,fyQVGlT8Bqmu_LiajPlgfbmavoNyAqXaBsBP_e4OnN8,725.3
7
+ 2015-07-13,,bar,95,FpBYRPWKu6DmLpx5tsB25URWfj3sNCbcydNAXULaiD8,316.6
8
+ 2015-07-13,,bar,96,9ikvnUqp1Rf2yVwLvs5bBvxQP-KyqxGi4gZRSZ8c1d4,369.5
9
+ 2015-07-13,,bar,97,RRNYDAzKaq4Trtt96Bxgk3N0fXLIV8hXoK0qQ7uw_Wc,506.5
data/example.yml CHANGED
@@ -1,10 +1,27 @@
1
+ # in:
2
+ # type: random
3
+ # rows: 100
4
+ # schema:
5
+ # id: primary_key
6
+ # name: string
7
+ # score: integer
1
8
  in:
2
- type: random
3
- rows: 100
4
- schema:
5
- id: primary_key
6
- name: string
7
- score: integer
9
+ type: file
10
+ path_prefix: example.csv
11
+ parser:
12
+ type: csv
13
+ charset: UTF-8
14
+ newline: CRLF
15
+ null_string: 'NULL'
16
+ skip_header_lines: 1
17
+ comment_line_marker: '#'
18
+ columns:
19
+ - {name: date, type: timestamp, format: "%Y-%m-%d"}
20
+ - {name: foo, type: string}
21
+ - {name: bar, type: string}
22
+ - {name: id, type: long}
23
+ - {name: name, type: string}
24
+ - {name: score, type: double}
8
25
  out:
9
26
  type: vertica
10
27
  host: 127.0.0.1
@@ -15,7 +32,6 @@ out:
15
32
  table: embulk_test
16
33
  copy_mode: direct
17
34
  column_options:
35
+ date: {type: DATE}
18
36
  id: {type: INT}
19
37
  name: {type: VARCHAR}
20
- score: {type: INT}
21
-
@@ -17,12 +17,17 @@ module Embulk
17
17
  'database' => config.param('database', :string, :default => 'vdb'),
18
18
  'schema' => config.param('schema', :string, :default => 'public'),
19
19
  'table' => config.param('table', :string),
20
+ 'mode' => config.param('mode', :string, :default => 'insert'),
20
21
  'copy_mode' => config.param('copy_mode', :string, :default => 'AUTO'),
21
22
  'abort_on_error' => config.param('abort_on_error', :bool, :default => false),
22
23
  'column_options' => config.param('column_options', :hash, :default => {}),
23
24
  }
24
25
 
25
- unless %w[AUTO DIRECT TRICKLE].include?(task['copy_mode'].upcase)
26
+ unless %w[INSERT REPLACE].include?(task['mode'].upcase!)
27
+ raise ConfigError, "`mode` must be one of INSERT, REPLACE"
28
+ end
29
+
30
+ unless %w[AUTO DIRECT TRICKLE].include?(task['copy_mode'].upcase!)
26
31
  raise ConfigError, "`copy_mode` must be one of AUTO, DIRECT, TRICKLE"
27
32
  end
28
33
 
@@ -37,30 +42,59 @@ module Embulk
37
42
  quoted_temp_table = ::Jvertica.quote_identifier(task['temp_table'])
38
43
 
39
44
  connect(task) do |jv|
40
- # drop table if exists "DEST"
41
- # 'create table if exists "TEMP" ("COL" json)'
42
- jv.query %[drop table if exists #{quoted_schema}.#{quoted_temp_table}]
43
- jv.query %[create table #{quoted_schema}.#{quoted_temp_table} (#{sql_schema})]
45
+ if task['mode'] == 'REPLACE'
46
+ query(jv, %[DROP TABLE IF EXISTS #{quoted_schema}.#{quoted_table}])
47
+ end
48
+ query(jv, %[DROP TABLE IF EXISTS #{quoted_schema}.#{quoted_temp_table}])
49
+ query(jv, %[CREATE TABLE #{quoted_schema}.#{quoted_temp_table} (#{sql_schema})])
44
50
  end
45
51
 
46
52
  begin
47
53
  yield(task)
48
54
  connect(task) do |jv|
49
- # create table if not exists "DEST" ("COL" json)
50
- # 'insert into "DEST" ("COL") select "COL" from "TEMP"'
51
- jv.query %[create table if not exists #{quoted_schema}.#{quoted_table} (#{sql_schema})]
52
- jv.query %[insert into #{quoted_schema}.#{quoted_table} select * from #{quoted_schema}.#{quoted_temp_table}]
55
+ query(jv, %[CREATE TABLE IF NOT EXISTS #{quoted_schema}.#{quoted_table} (#{sql_schema})])
56
+ query(jv, %[INSERT INTO #{quoted_schema}.#{quoted_table} SELECT * FROM #{quoted_schema}.#{quoted_temp_table}])
53
57
  jv.commit
54
58
  end
55
59
  ensure
56
60
  connect(task) do |jv|
57
- # 'drop table if exists TEMP'
58
- jv.query %[drop table if exists #{quoted_schema}.#{quoted_temp_table}]
61
+ query(jv, %[DROP TABLE IF EXISTS #{quoted_schema}.#{quoted_temp_table}])
62
+ Embulk.logger.debug { query(jv, %[SELECT * FROM #{quoted_schema}.#{quoted_table} LIMIT 10]).map {|row| row.to_h }.join("\n") }
59
63
  end
60
64
  end
61
65
  return {}
62
66
  end
63
67
 
68
+ def initialize(task, schema, index)
69
+ super
70
+ @jv = self.class.connect(task)
71
+ end
72
+
73
+ def close
74
+ @jv.close
75
+ end
76
+
77
+ def add(page)
78
+ copy(@jv, copy_sql) do |stdin|
79
+ page.each do |record|
80
+ stdin << to_json(record) << "\n"
81
+ end
82
+ end
83
+ @jv.commit
84
+ end
85
+
86
+ def finish
87
+ end
88
+
89
+ def abort
90
+ end
91
+
92
+ def commit
93
+ {}
94
+ end
95
+
96
+ private
97
+
64
98
  def self.connect(task)
65
99
  jv = ::Jvertica.connect({
66
100
  host: task['host'],
@@ -85,8 +119,11 @@ module Embulk
85
119
  # @return [String] sql schema used to CREATE TABLE
86
120
  def self.to_sql_schema(schema, column_options)
87
121
  schema.names.zip(schema.types).map do |column_name, type|
88
- sql_type = (column_options[column_name] and column_options[column_name]['type']) ?
89
- column_options[column_name]['type'] : to_sql_type(type)
122
+ if column_options[column_name] and column_options[column_name]['type']
123
+ sql_type = column_options[column_name]['type']
124
+ else
125
+ sql_type = to_sql_type(type)
126
+ end
90
127
  "#{::Jvertica.quote_identifier(column_name)} #{sql_type}"
91
128
  end.join(',')
92
129
  end
@@ -102,45 +139,48 @@ module Embulk
102
139
  end
103
140
  end
104
141
 
105
- def initialize(task, schema, index)
106
- super
107
- @jv = self.class.connect(task)
142
+ def self.query(conn, sql)
143
+ Embulk.logger.debug sql
144
+ conn.query(sql)
108
145
  end
109
146
 
110
- def close
111
- @jv.close
147
+ def query(conn, sql)
148
+ self.class.query(conn, sql)
112
149
  end
113
150
 
114
- def add(page)
115
- @jv.copy(copy_sql) do |stdin|
116
- page.each_with_index do |record, idx|
117
- stdin << record.map {|v| ::Jvertica.quote(v) }.join(",") << "\n"
118
- end
119
- end
120
- @jv.commit
151
+ def copy(conn, sql, &block)
152
+ Embulk.logger.debug sql
153
+ conn.copy(sql, &block)
121
154
  end
122
155
 
123
- def finish
156
+ def copy_sql
157
+ @copy_sql ||= "COPY #{quoted_schema}.#{quoted_temp_table} FROM STDIN PARSER fjsonparser() #{copy_mode}#{abort_on_error} NO COMMIT"
124
158
  end
125
159
 
126
- def abort
160
+ def to_json(record)
161
+ Hash[*(schema.names.zip(record).flatten!(1))].to_json
127
162
  end
128
163
 
129
- def commit
130
- {}
164
+ def quoted_schema
165
+ ::Jvertica.quote_identifier(@task['schema'])
131
166
  end
132
167
 
133
- private
168
+ def quoted_table
169
+ ::Jvertica.quote_identifier(@task['table'])
170
+ end
134
171
 
135
- def copy_sql
136
- quoted_schema = ::Jvertica.quote_identifier(@task['schema'])
137
- quoted_temp_table = ::Jvertica.quote_identifier(@task['temp_table'])
138
- copy_mode = @task['copy_mode']
139
- abort_on_error = @task['abort_on_error'] ? ' ABORT ON ERROR' : ''
140
- sql = "COPY #{quoted_schema}.#{quoted_temp_table} FROM STDIN DELIMITER ',' #{copy_mode}#{abort_on_error} NO COMMIT"
141
- Embulk.logger.debug sql
142
- sql
172
+ def quoted_temp_table
173
+ ::Jvertica.quote_identifier(@task['temp_table'])
174
+ end
175
+
176
+ def copy_mode
177
+ @task['copy_mode']
143
178
  end
179
+
180
+ def abort_on_error
181
+ @task['abort_on_error'] ? ' ABORT ON ERROR' : ''
182
+ end
183
+
144
184
  end
145
185
  end
146
186
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-vertica
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - eiji.sekiya
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-07-10 00:00:00.000000000 Z
12
+ date: 2015-07-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement