embulk-output-vertica 0.5.6 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4fb8733ef4ad44731139ff6f8cb63aa7dfb6aa1c
4
- data.tar.gz: b3d80ae9599a03762ac7e65cc2bcfb033f51ecae
3
+ metadata.gz: daddcdfee4c5483f5844d5d1f5b88e6550d482ab
4
+ data.tar.gz: 359f2050bb2fc49c6a34d51f4328e17dd17a16cf
5
5
  SHA512:
6
- metadata.gz: c84bb2d342b70fbbfbbd5656e41e3942115ed25fa2e786bdf7c9d10bf4540cabd0370cfc5d339c107657f71bb8b4b5365eac390d7d104f9f2e905ba3411867e9
7
- data.tar.gz: 92df6faa9d4b0b2be4e6085cf6b2f87986c9f6636ce622f4c901d7e691133d691f5f286578ed48a87a40bfffce49653477abeb26849d272f9e48dea4cebfb148
6
+ metadata.gz: 834eaf89a88c7eface737474dded51f134577b10c3891564f710a9e3f0c86b473b6f7a185ad24b01b5e2a89954e3a7344f0177a7a06ed67c156eb3323eba34ce
7
+ data.tar.gz: 3a435a2d1f8c0a3c3c4307b38e35ce728e99b5ae9beb78b1ebf208e62f9fd4d6c8f58759adc4b33cb423ffc88b4b3a235ae231aa5ad5a26d66aba7f5ed9c7c40
@@ -1,3 +1,9 @@
1
+ # 0.5.7 (2016/01/22)
2
+
3
+ Enhancements:
4
+
5
+ * Add json_payload option to avoid construction of JSON in this jruby plugin for performance improvement
6
+
1
7
  # 0.5.6 (2016/01/22)
2
8
 
3
9
  Enhancements:
data/Gemfile CHANGED
@@ -5,3 +5,4 @@ gem 'embulk-input-random'
5
5
  gem 'embulk-filter-stdout'
6
6
  gem 'pry'
7
7
  gem 'pry-nav'
8
+ gem 'embulk-parser-none'
data/README.md CHANGED
@@ -38,6 +38,7 @@
38
38
  - timestamp: `boolean`, `long`, `double`, `string`, `timestamp`
39
39
  - **timestamp_format**: timestamp format to convert into/from `timestamp` (string, default is "%Y-%m-%d %H:%M:%S %z")
40
40
  - **timezone**: timezone to convert into/from `timestamp` (string, default is `default_timezone`).
41
+ - **json_payload**: Assuming first columns of records are json string, COPY each json into vertica directly using fjsonparser. This would improve performance by avoiding construction of json in this jruby plugin. ToDo: auto table creation is not supported for this mode yet (bool, default is false)
41
42
 
42
43
  ### Modes
43
44
 
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-vertica"
3
- spec.version = "0.5.6"
3
+ spec.version = "0.5.7"
4
4
  spec.authors = ["eiji.sekiya", "Naotoshi Seo"]
5
5
  spec.email = ["eiji.sekiya.0326@gmail.com", "sonots@gmail.com"]
6
6
  spec.summary = "Vertica output plugin for Embulk"
@@ -0,0 +1,8 @@
1
+ {"timestamp_date":"2015-07-13","string_date":"2015-07-13","foo":null,"bar":"bar","id":"90","name":"l6lTsvxdlcTfcqx2c0lQSd9HejVQg40W25f0wGNQViY","score":"903.4"}
2
+ {"timestamp_date":"2015-07-13","string_date":"2015-07-13","foo":null,"bar":"bar","id":"91","name":"XoALSEQg9ycuGqrEWHOb8vdrLbheZSgFO53Wr3mciXY","score":"394.5"}
3
+ {"timestamp_date":"2015-07-13","string_date":"2015-07-13","foo":null,"bar":"bar","id":"92","name":"0hgDRI_mijs5w7rkiLIe__LEayOOLxL0qVT1IHa5QBw","score":"810.9"}
4
+ {"timestamp_date":"2015-07-13","string_date":"2015-07-13","foo":null,"bar":"bar","id":"93","name":"KjCRAc-AVcS-R13toBUR6pK_7d9Y8Gl4TRdYYMaSirc","score":"477.4"}
5
+ {"timestamp_date":"2015-07-13","string_date":"2015-07-13","foo":null,"bar":"bar","id":"94","name":"fyQVGlT8Bqmu_LiajPlgfbmavoNyAqXaBsBP_e4OnN8","score":"725.3"}
6
+ {"timestamp_date":"2015-07-13","string_date":"2015-07-13","foo":null,"bar":"bar","id":"95","name":"FpBYRPWKu6DmLpx5tsB25URWfj3sNCbcydNAXULaiD8","score":"316.6"}
7
+ {"timestamp_date":"2015-07-13","string_date":"2015-07-13","foo":null,"bar":"bar","id":"96","name":"9ikvnUqp1Rf2yVwLvs5bBvxQP-KyqxGi4gZRSZ8c1d4","score":"369.5"}
8
+ {"timestamp_date":"2015-07-13","string_date":"2015-07-13","foo":null,"bar":"bar","id":"97","name":"RRNYDAzKaq4Trtt96Bxgk3N0fXLIV8hXoK0qQ7uw_Wc","score":"506.5"}
@@ -0,0 +1,20 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/example.jsonl
4
+ parser:
5
+ type: none
6
+ out:
7
+ type: vertica
8
+ host: 127.0.0.1
9
+ user: dbadmin
10
+ password: xxxxxxx
11
+ database: vdb
12
+ schema: sandbox
13
+ table: embulk_test
14
+ copy_mode: DIRECT
15
+ abort_on_error: true
16
+ reject_on_materialized_type_error: true
17
+ default_timezone: 'Asia/Tokyo'
18
+ pool: 3
19
+ compress: GZIP
20
+ json_payload: true
@@ -30,6 +30,7 @@ module Embulk
30
30
  'compress' => config.param('compress', :string, :default => 'UNCOMPRESSED'),
31
31
  'default_timezone' => config.param('default_timezone', :string, :default => 'UTC'),
32
32
  'column_options' => config.param('column_options', :hash, :default => {}),
33
+ 'json_payload' => config.param('json_payload', :bool, :default => false),
33
34
  'reject_on_materialized_type_error' => config.param('reject_on_materialized_type_error', :bool, :default => false),
34
35
  'pool' => config.param('pool', :integer, :default => processor_count),
35
36
  }
@@ -67,12 +68,14 @@ module Embulk
67
68
  quoted_table = ::Jvertica.quote_identifier(task['table'])
68
69
  quoted_temp_table = ::Jvertica.quote_identifier(task['temp_table'])
69
70
 
70
- sql_schema_table = self.sql_schema_from_embulk_schema(schema, task['column_options'])
71
+ unless task['json_payload'] # ToDo: auto table creation is not supported to json_payload mode yet
72
+ sql_schema_table = self.sql_schema_from_embulk_schema(schema, task['column_options'])
71
73
 
72
- # create the target table
73
- connect(task) do |jv|
74
- query(jv, %[DROP TABLE IF EXISTS #{quoted_schema}.#{quoted_table}]) if task['mode'] == 'REPLACE'
75
- query(jv, %[CREATE TABLE IF NOT EXISTS #{quoted_schema}.#{quoted_table} (#{sql_schema_table})])
74
+ # create the target table
75
+ connect(task) do |jv|
76
+ query(jv, %[DROP TABLE IF EXISTS #{quoted_schema}.#{quoted_table}]) if task['mode'] == 'REPLACE'
77
+ query(jv, %[CREATE TABLE IF NOT EXISTS #{quoted_schema}.#{quoted_table} (#{sql_schema_table})])
78
+ end
76
79
  end
77
80
 
78
81
  sql_schema_temp_table = self.sql_schema_from_table(task)
@@ -5,6 +5,7 @@ module Embulk
5
5
  class Vertica < OutputPlugin
6
6
  class OutputThreadPool
7
7
  def initialize(task, schema, size)
8
+ @task = task
8
9
  @size = size
9
10
  @schema = schema
10
11
  @converters = ValueConverterFactory.create_converters(schema, task['default_timezone'], task['column_options'])
@@ -33,9 +34,13 @@ module Embulk
33
34
  end
34
35
 
35
36
  def to_json(record)
36
- Hash[*(@schema.names.zip(record).map do |column_name, value|
37
- [column_name, @converters[column_name].call(value)]
38
- end.flatten!(1))].to_json
37
+ if @task['json_payload']
38
+ record.first
39
+ else
40
+ Hash[*(@schema.names.zip(record).map do |column_name, value|
41
+ [column_name, @converters[column_name].call(value)]
42
+ end.flatten!(1))].to_json
43
+ end
39
44
  end
40
45
  end
41
46
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-vertica
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.6
4
+ version: 0.5.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - eiji.sekiya
@@ -84,7 +84,9 @@ files:
84
84
  - embulk-output-vertica.gemspec
85
85
  - example/abort.yml
86
86
  - example/example.csv
87
+ - example/example.jsonl
87
88
  - example/example.yml
89
+ - example/json_payload.yml
88
90
  - lib/embulk/output/vertica.rb
89
91
  - lib/embulk/output/vertica/output_thread.rb
90
92
  - lib/embulk/output/vertica/value_converter_factory.rb