embulk-output-amazon_cloudsearch 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 8179993373e5f421335547da4e2e9a6c97ece4ae
4
+ data.tar.gz: d3c25cd409761f73a31704813fffc526a1ca6ef9
5
+ SHA512:
6
+ metadata.gz: 4b09e9c367106ecc2f3dd918ebe163df6d92381eb268368532426f71261735d140d8ce72ab6e9a3ec7a26a439b31d783d81dce810794cd05b5c18888b69bb569
7
+ data.tar.gz: 7319f3688377320336278a126369f9b616ea41222bfaad1217d4727021c9f82e15c67fee1146d7cc1ad0d2c1bfe9d9e7762f061ffe2da369cf978d42c4550b99
@@ -0,0 +1,5 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
@@ -0,0 +1 @@
1
+ jruby-9.1.15.0
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,36 @@
1
+ # Amazon Cloudsearch output plugin for Embulk
2
+
3
+ Embulk output plugin to insert data into Amazon CloudSearch
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: output
8
+ * **Load all or nothing**: no
9
+ * **Resume supported**: no
10
+ * **Cleanup supported**: no
11
+
12
+ ## Configuration
13
+
14
+ - **endpoint**: Amazon CloudSearch Document Endpoint URL (string, required)
15
+ - **id_column**: document id column (string, required)
16
+ - **upload_columns**: index columns (string, required)
17
+ - **batch_size**: number of records in one bulk request (int, default: 1000)
18
+ - **stub_response**: CloudSearch API Client stubbing (boolean, default: `false`)
19
+
20
+ ## Example
21
+
22
+ ```yaml
23
+ out:
24
+ type: amazon_cloudsearch
25
+ endpoint: https://cloudsearch.example.com/endpoint
26
+ id_column: id
27
+ upload_columns:
28
+ - title
29
+ - timestamp
30
+ ```
31
+
32
+ ## Build
33
+
34
+ ```
35
+ $ rake
36
+ ```
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task default: :build
@@ -0,0 +1,19 @@
1
+ Gem::Specification.new do |spec|
2
+ spec.name = "embulk-output-amazon_cloudsearch"
3
+ spec.version = "0.1.0"
4
+ spec.authors = ["Kenichi Takahashi"]
5
+ spec.summary = "Amazon Cloudsearch output plugin for Embulk"
6
+ spec.description = "Dumps records to Amazon Cloudsearch."
7
+ spec.email = ["kenichi.taka@gmail.com"]
8
+ spec.licenses = ["MIT"]
9
+ spec.homepage = "https://github.com/kenchan/embulk-output-amazon_cloudsearch"
10
+
11
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
12
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
13
+ spec.require_paths = ["lib"]
14
+
15
+ spec.add_dependency 'aws-sdk-cloudsearchdomain', ['~> 1.9']
16
+ spec.add_development_dependency 'embulk', ['>= 0.8.39']
17
+ spec.add_development_dependency 'bundler', ['>= 1.10.6']
18
+ spec.add_development_dependency 'rake', ['>= 10.0']
19
+ end
@@ -0,0 +1,3 @@
1
+ id,title,timestamp
2
+ 1,Hello embulk,2019-05-01 00:00:00.0
3
+ 2,Hello CloudSearch,2019-05-31 23:59:59.0
@@ -0,0 +1,22 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/example.csv
4
+ parser:
5
+ type: csv
6
+ charset: UTF-8
7
+ newline: CRLF
8
+ null_string: 'NULL'
9
+ skip_header_lines: 1
10
+ comment_line_marker: '#'
11
+ columns:
12
+ - {name: id, type: string}
13
+ - {name: title, type: string}
14
+ - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
15
+ out:
16
+ type: amazon_cloudsearch
17
+ stub_response: true
18
+ endpoint: https://example.com
19
+ id_column: id
20
+ upload_columns:
21
+ - title
22
+ - timestamp
@@ -0,0 +1,102 @@
1
+ require 'json'
2
+ require 'aws-sdk-cloudsearchdomain'
3
+
4
+ module Embulk
5
+ module Output
6
+
7
+ class AmazonCloudsearch < OutputPlugin
8
+ Plugin.register_output("amazon_cloudsearch", self)
9
+
10
+
11
+ def self.transaction(config, schema, count, &control)
12
+ # configuration code:
13
+ task = {
14
+ 'endpoint' => config.param('endpoint', :string),
15
+ 'id_column' => config.param('id_column', :string),
16
+ 'upload_columns' => config.param('upload_columns', :array),
17
+ 'stub_response' => config.param('stub_response', :bool, default: false),
18
+ 'batch_size' => config.param('batch_size', :integer, default: 1000)
19
+ }
20
+
21
+ # resumable output:
22
+ # resume(task, schema, count, &control)
23
+
24
+ # non-resumable output:
25
+ task_reports = yield(task)
26
+ next_config_diff = {}
27
+ return next_config_diff
28
+ end
29
+
30
+ # def self.resume(task, schema, count, &control)
31
+ # task_reports = yield(task)
32
+ #
33
+ # next_config_diff = {}
34
+ # return next_config_diff
35
+ # end
36
+
37
+ def init
38
+ @endpoint = task['endpoint']
39
+ @id_column = task['id_column']
40
+ @upload_columns = task['upload_columns']
41
+ @stub_response = task['stub_response']
42
+ @batch_size = task['batch_size']
43
+ end
44
+
45
+ def close
46
+ end
47
+
48
+ def add(page)
49
+ # output code:
50
+ page.each_slice(@batch_size) do |records|
51
+ documents = records.map do |record|
52
+ hash = Hash[schema.names.zip(record)]
53
+
54
+ {
55
+ type: "add",
56
+ id: hash[@id_column].to_s,
57
+ fields: @upload_columns.inject({}) {|acc, c|
58
+ acc[c] = hash[c] if hash[c]
59
+ acc
60
+ }
61
+ }
62
+ end
63
+
64
+ begin
65
+ res = client.upload_documents(
66
+ documents: documents.to_json,
67
+ content_type: 'application/json'
68
+ )
69
+ rescue => e
70
+ Embulk.logger.error { "embulk-output-amazon_cloudsearch: #{e}" }
71
+ Embulk.logger.error { "embulk-output-amazon_cloudsearch: id #{documents.first[:id]}-#{documents.last[:id]}, response #{res}" }
72
+ else
73
+ Embulk.logger.debug { "embulk-output-amazon_cloudsearch: response #{res}" }
74
+
75
+ unless res.status == 'success'
76
+ Embulk.logger.error { "embulk-output-amazon_cloudsearch: id #{documents.first[:id]}-#{documents.last[:id]}, response #{res}" }
77
+ end
78
+ if res.warnings
79
+ Embulk.logger.warn { "embulk-output-amazon_cloudsearch: id #{documents.first[:id]}-#{documents.last[:id]}, response #{res}" }
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ def finish
86
+ end
87
+
88
+ def abort
89
+ end
90
+
91
+ def commit
92
+ task_report = {}
93
+ return task_report
94
+ end
95
+
96
+ private
97
+ def client
98
+ @_client ||= c = Aws::CloudSearchDomain::Client.new(endpoint: @endpoint, stub_responses: @stub_response)
99
+ end
100
+ end
101
+ end
102
+ end
metadata ADDED
@@ -0,0 +1,110 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-output-amazon_cloudsearch
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Kenichi Takahashi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-06-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '1.9'
19
+ name: aws-sdk-cloudsearchdomain
20
+ prerelease: false
21
+ type: :runtime
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.9'
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 0.8.39
33
+ name: embulk
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 0.8.39
41
+ - !ruby/object:Gem::Dependency
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 1.10.6
47
+ name: bundler
48
+ prerelease: false
49
+ type: :development
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 1.10.6
55
+ - !ruby/object:Gem::Dependency
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '10.0'
61
+ name: rake
62
+ prerelease: false
63
+ type: :development
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ description: Dumps records to Amazon Cloudsearch.
70
+ email:
71
+ - kenichi.taka@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - ".ruby-version"
78
+ - Gemfile
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - embulk-output-amazon_cloudsearch.gemspec
83
+ - example/example.csv
84
+ - example/example.yml
85
+ - lib/embulk/output/amazon_cloudsearch.rb
86
+ homepage: https://github.com/kenchan/embulk-output-amazon_cloudsearch
87
+ licenses:
88
+ - MIT
89
+ metadata: {}
90
+ post_install_message:
91
+ rdoc_options: []
92
+ require_paths:
93
+ - lib
94
+ required_ruby_version: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - ">="
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ required_rubygems_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ requirements: []
105
+ rubyforge_project:
106
+ rubygems_version: 2.6.14
107
+ signing_key:
108
+ specification_version: 4
109
+ summary: Amazon Cloudsearch output plugin for Embulk
110
+ test_files: []