embulk-output-amazon_cloudsearch 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +5 -0
- data/.ruby-version +1 -0
- data/Gemfile +2 -0
- data/LICENSE.txt +21 -0
- data/README.md +36 -0
- data/Rakefile +3 -0
- data/embulk-output-amazon_cloudsearch.gemspec +19 -0
- data/example/example.csv +3 -0
- data/example/example.yml +22 -0
- data/lib/embulk/output/amazon_cloudsearch.rb +102 -0
- metadata +110 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 8179993373e5f421335547da4e2e9a6c97ece4ae
|
4
|
+
data.tar.gz: d3c25cd409761f73a31704813fffc526a1ca6ef9
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4b09e9c367106ecc2f3dd918ebe163df6d92381eb268368532426f71261735d140d8ce72ab6e9a3ec7a26a439b31d783d81dce810794cd05b5c18888b69bb569
|
7
|
+
data.tar.gz: 7319f3688377320336278a126369f9b616ea41222bfaad1217d4727021c9f82e15c67fee1146d7cc1ad0d2c1bfe9d9e7762f061ffe2da369cf978d42c4550b99
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
jruby-9.1.15.0
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
|
2
|
+
MIT License
|
3
|
+
|
4
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
a copy of this software and associated documentation files (the
|
6
|
+
"Software"), to deal in the Software without restriction, including
|
7
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# Amazon Cloudsearch output plugin for Embulk
|
2
|
+
|
3
|
+
Embulk output plugin to insert data into Amazon CloudSearch
|
4
|
+
|
5
|
+
## Overview
|
6
|
+
|
7
|
+
* **Plugin type**: output
|
8
|
+
* **Load all or nothing**: no
|
9
|
+
* **Resume supported**: no
|
10
|
+
* **Cleanup supported**: no
|
11
|
+
|
12
|
+
## Configuration
|
13
|
+
|
14
|
+
- **endpoint**: Amazon CloudSearch Document Endpoint URL (string, required)
|
15
|
+
- **id_column**: document id column (string, required)
|
16
|
+
- **upload_columns**: index columns (string, required)
|
17
|
+
- **batch_size**: number of records in one bulk request (int, default: 1000)
|
18
|
+
- **stub_response**: CloudSearch API Client stubbing (boolean, default: `false`)
|
19
|
+
|
20
|
+
## Example
|
21
|
+
|
22
|
+
```yaml
|
23
|
+
out:
|
24
|
+
type: amazon_cloudsearch
|
25
|
+
endpoint: https://cloudsearch.example.com/endpoint
|
26
|
+
id_column: id
|
27
|
+
upload_columns:
|
28
|
+
- title
|
29
|
+
- timestamp
|
30
|
+
```
|
31
|
+
|
32
|
+
## Build
|
33
|
+
|
34
|
+
```
|
35
|
+
$ rake
|
36
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Gem::Specification.new do |spec|
|
2
|
+
spec.name = "embulk-output-amazon_cloudsearch"
|
3
|
+
spec.version = "0.1.0"
|
4
|
+
spec.authors = ["Kenichi Takahashi"]
|
5
|
+
spec.summary = "Amazon Cloudsearch output plugin for Embulk"
|
6
|
+
spec.description = "Dumps records to Amazon Cloudsearch."
|
7
|
+
spec.email = ["kenichi.taka@gmail.com"]
|
8
|
+
spec.licenses = ["MIT"]
|
9
|
+
spec.homepage = "https://github.com/kenchan/embulk-output-amazon_cloudsearch"
|
10
|
+
|
11
|
+
spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
|
12
|
+
spec.test_files = spec.files.grep(%r{^(test|spec)/})
|
13
|
+
spec.require_paths = ["lib"]
|
14
|
+
|
15
|
+
spec.add_dependency 'aws-sdk-cloudsearchdomain', ['~> 1.9']
|
16
|
+
spec.add_development_dependency 'embulk', ['>= 0.8.39']
|
17
|
+
spec.add_development_dependency 'bundler', ['>= 1.10.6']
|
18
|
+
spec.add_development_dependency 'rake', ['>= 10.0']
|
19
|
+
end
|
data/example/example.csv
ADDED
data/example/example.yml
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: id, type: string}
|
13
|
+
- {name: title, type: string}
|
14
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
15
|
+
out:
|
16
|
+
type: amazon_cloudsearch
|
17
|
+
stub_response: true
|
18
|
+
endpoint: https://example.com
|
19
|
+
id_column: id
|
20
|
+
upload_columns:
|
21
|
+
- title
|
22
|
+
- timestamp
|
@@ -0,0 +1,102 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'aws-sdk-cloudsearchdomain'
|
3
|
+
|
4
|
+
module Embulk
|
5
|
+
module Output
|
6
|
+
|
7
|
+
class AmazonCloudsearch < OutputPlugin
|
8
|
+
Plugin.register_output("amazon_cloudsearch", self)
|
9
|
+
|
10
|
+
|
11
|
+
def self.transaction(config, schema, count, &control)
|
12
|
+
# configuration code:
|
13
|
+
task = {
|
14
|
+
'endpoint' => config.param('endpoint', :string),
|
15
|
+
'id_column' => config.param('id_column', :string),
|
16
|
+
'upload_columns' => config.param('upload_columns', :array),
|
17
|
+
'stub_response' => config.param('stub_response', :bool, default: false),
|
18
|
+
'batch_size' => config.param('batch_size', :integer, default: 1000)
|
19
|
+
}
|
20
|
+
|
21
|
+
# resumable output:
|
22
|
+
# resume(task, schema, count, &control)
|
23
|
+
|
24
|
+
# non-resumable output:
|
25
|
+
task_reports = yield(task)
|
26
|
+
next_config_diff = {}
|
27
|
+
return next_config_diff
|
28
|
+
end
|
29
|
+
|
30
|
+
# def self.resume(task, schema, count, &control)
|
31
|
+
# task_reports = yield(task)
|
32
|
+
#
|
33
|
+
# next_config_diff = {}
|
34
|
+
# return next_config_diff
|
35
|
+
# end
|
36
|
+
|
37
|
+
def init
|
38
|
+
@endpoint = task['endpoint']
|
39
|
+
@id_column = task['id_column']
|
40
|
+
@upload_columns = task['upload_columns']
|
41
|
+
@stub_response = task['stub_response']
|
42
|
+
@batch_size = task['batch_size']
|
43
|
+
end
|
44
|
+
|
45
|
+
def close
|
46
|
+
end
|
47
|
+
|
48
|
+
def add(page)
|
49
|
+
# output code:
|
50
|
+
page.each_slice(@batch_size) do |records|
|
51
|
+
documents = records.map do |record|
|
52
|
+
hash = Hash[schema.names.zip(record)]
|
53
|
+
|
54
|
+
{
|
55
|
+
type: "add",
|
56
|
+
id: hash[@id_column].to_s,
|
57
|
+
fields: @upload_columns.inject({}) {|acc, c|
|
58
|
+
acc[c] = hash[c] if hash[c]
|
59
|
+
acc
|
60
|
+
}
|
61
|
+
}
|
62
|
+
end
|
63
|
+
|
64
|
+
begin
|
65
|
+
res = client.upload_documents(
|
66
|
+
documents: documents.to_json,
|
67
|
+
content_type: 'application/json'
|
68
|
+
)
|
69
|
+
rescue => e
|
70
|
+
Embulk.logger.error { "embulk-output-amazon_cloudsearch: #{e}" }
|
71
|
+
Embulk.logger.error { "embulk-output-amazon_cloudsearch: id #{documents.first[:id]}-#{documents.last[:id]}, response #{res}" }
|
72
|
+
else
|
73
|
+
Embulk.logger.debug { "embulk-output-amazon_cloudsearch: response #{res}" }
|
74
|
+
|
75
|
+
unless res.status == 'success'
|
76
|
+
Embulk.logger.error { "embulk-output-amazon_cloudsearch: id #{documents.first[:id]}-#{documents.last[:id]}, response #{res}" }
|
77
|
+
end
|
78
|
+
if res.warnings
|
79
|
+
Embulk.logger.warn { "embulk-output-amazon_cloudsearch: id #{documents.first[:id]}-#{documents.last[:id]}, response #{res}" }
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def finish
|
86
|
+
end
|
87
|
+
|
88
|
+
def abort
|
89
|
+
end
|
90
|
+
|
91
|
+
def commit
|
92
|
+
task_report = {}
|
93
|
+
return task_report
|
94
|
+
end
|
95
|
+
|
96
|
+
private
|
97
|
+
def client
|
98
|
+
@_client ||= c = Aws::CloudSearchDomain::Client.new(endpoint: @endpoint, stub_responses: @stub_response)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
metadata
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: embulk-output-amazon_cloudsearch
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Kenichi Takahashi
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-06-08 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - "~>"
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: '1.9'
|
19
|
+
name: aws-sdk-cloudsearchdomain
|
20
|
+
prerelease: false
|
21
|
+
type: :runtime
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.9'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 0.8.39
|
33
|
+
name: embulk
|
34
|
+
prerelease: false
|
35
|
+
type: :development
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.8.39
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 1.10.6
|
47
|
+
name: bundler
|
48
|
+
prerelease: false
|
49
|
+
type: :development
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.10.6
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
requirement: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '10.0'
|
61
|
+
name: rake
|
62
|
+
prerelease: false
|
63
|
+
type: :development
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '10.0'
|
69
|
+
description: Dumps records to Amazon Cloudsearch.
|
70
|
+
email:
|
71
|
+
- kenichi.taka@gmail.com
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- ".gitignore"
|
77
|
+
- ".ruby-version"
|
78
|
+
- Gemfile
|
79
|
+
- LICENSE.txt
|
80
|
+
- README.md
|
81
|
+
- Rakefile
|
82
|
+
- embulk-output-amazon_cloudsearch.gemspec
|
83
|
+
- example/example.csv
|
84
|
+
- example/example.yml
|
85
|
+
- lib/embulk/output/amazon_cloudsearch.rb
|
86
|
+
homepage: https://github.com/kenchan/embulk-output-amazon_cloudsearch
|
87
|
+
licenses:
|
88
|
+
- MIT
|
89
|
+
metadata: {}
|
90
|
+
post_install_message:
|
91
|
+
rdoc_options: []
|
92
|
+
require_paths:
|
93
|
+
- lib
|
94
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
99
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
requirements: []
|
105
|
+
rubyforge_project:
|
106
|
+
rubygems_version: 2.6.14
|
107
|
+
signing_key:
|
108
|
+
specification_version: 4
|
109
|
+
summary: Amazon Cloudsearch output plugin for Embulk
|
110
|
+
test_files: []
|