metacrunch-elasticsearch 4.0.0 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +46 -0
- data/Gemfile +4 -2
- data/License.txt +1 -1
- data/Readme.md +104 -0
- data/lib/metacrunch/elasticsearch.rb +2 -0
- data/lib/metacrunch/elasticsearch/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 54f127cb7af8ebc5f9b7246d0241994a499fad0d
|
4
|
+
data.tar.gz: f7fca4e9e60e2d2c6951147bb57f177dc21350e2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 198b86f27ce9a318ea975201f95f837be6b56408d6c302d08ba29ba9ec3469868ff367959bcab670d147180a2cba9dae89a0c594ec0551ea807be938513aaa48
|
7
|
+
data.tar.gz: ef8828c5ff6eac6002edaaa0beca1cc7eb29125ca5251c53eab3a35ca21b25910dfd5a63afc3d2af70332f28a7142e8e5aa0b303720c932cd757d826031cfbd6
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# Ruby CircleCI 2.0 configuration file
|
2
|
+
#
|
3
|
+
# Check https://circleci.com/docs/2.0/language-ruby/ for more details
|
4
|
+
#
|
5
|
+
version: 2
|
6
|
+
jobs:
|
7
|
+
build:
|
8
|
+
docker:
|
9
|
+
- image: circleci/ruby:2.4.1-node-browsers
|
10
|
+
- image: docker.elastic.co/elasticsearch/elasticsearch:5.6.3
|
11
|
+
environment:
|
12
|
+
- "discovery.type=single-node"
|
13
|
+
- "xpack.security.enabled=false"
|
14
|
+
- "http.host=0.0.0.0"
|
15
|
+
- "transport.host=127.0.0.1"
|
16
|
+
|
17
|
+
working_directory: ~/repo
|
18
|
+
|
19
|
+
steps:
|
20
|
+
- checkout
|
21
|
+
|
22
|
+
- run:
|
23
|
+
name: Waiting for elasticsearch to start up (30 sec.)
|
24
|
+
command: sleep 30
|
25
|
+
|
26
|
+
- run:
|
27
|
+
name: Install dependencies
|
28
|
+
command: bundle install --jobs=4 --retry=3 --path vendor/bundle
|
29
|
+
|
30
|
+
- run:
|
31
|
+
name: Install CodeClimate test coverage reporter
|
32
|
+
command: |
|
33
|
+
curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
|
34
|
+
chmod +x ./cc-test-reporter
|
35
|
+
./cc-test-reporter before-build
|
36
|
+
|
37
|
+
- run:
|
38
|
+
name: Run tests
|
39
|
+
command: |
|
40
|
+
mkdir /tmp/test-results
|
41
|
+
bundle exec rspec --format progress --format RspecJunitFormatter --out /tmp/test-results/rspec.xml
|
42
|
+
|
43
|
+
- run:
|
44
|
+
name: Upload test coverage report to CodeClimate
|
45
|
+
command: ./cc-test-reporter after-build --exit-code $?
|
46
|
+
|
data/Gemfile
CHANGED
@@ -5,7 +5,6 @@ gemspec
|
|
5
5
|
group :development do
|
6
6
|
gem "bundler", ">= 1.15"
|
7
7
|
gem "rake", ">= 12.1"
|
8
|
-
gem "rspec", ">= 3.5.0", "< 4.0.0"
|
9
8
|
|
10
9
|
if !ENV["CI"]
|
11
10
|
gem "pry-byebug", ">= 3.5.0"
|
@@ -13,5 +12,8 @@ group :development do
|
|
13
12
|
end
|
14
13
|
|
15
14
|
group :test do
|
16
|
-
gem "
|
15
|
+
gem "faker", ">= 1.8.4"
|
16
|
+
gem "rspec", ">= 3.5.0", "< 4.0.0"
|
17
|
+
gem "rspec_junit_formatter", ">= 0.3.0"
|
18
|
+
gem "simplecov", ">= 0.15.0"
|
17
19
|
end
|
data/License.txt
CHANGED
data/Readme.md
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
metacrunch-elasticsearch
|
2
|
+
========================
|
3
|
+
|
4
|
+
[](http://badge.fury.io/rb/metacrunch-elasticsearch)
|
5
|
+
[](https://codeclimate.com/github/ubpb/metacrunch-elasticsearch)
|
6
|
+
[](https://codeclimate.com/github/ubpb/metacrunch-elasticsearch/coverage)
|
7
|
+
[](https://circleci.com/gh/ubpb/metacrunch-elasticsearch)
|
8
|
+
|
9
|
+
This is the official [Elasticsearch](https://www.elastic.co) package for the [metacrunch ETL toolkit](https://github.com/ubpb/metacrunch).
|
10
|
+
|
11
|
+
Installation
|
12
|
+
------------
|
13
|
+
|
14
|
+
Include the gem in your `Gemfile`
|
15
|
+
|
16
|
+
```ruby
|
17
|
+
gem "metacrunch-elasticsearch", "~> 4.0.1"
|
18
|
+
```
|
19
|
+
|
20
|
+
and run `$ bundle install` to install it.
|
21
|
+
|
22
|
+
Or install it manually
|
23
|
+
|
24
|
+
```
|
25
|
+
$ gem install metacrunch-elasticsearch
|
26
|
+
```
|
27
|
+
|
28
|
+
Usage
|
29
|
+
-----
|
30
|
+
|
31
|
+
*Note: For working examples on how to use this package check out our [demo repository](https://github.com/ubpb/metacrunch-demo).*
|
32
|
+
|
33
|
+
### `Metacrunch::Elasticsearch::Source`
|
34
|
+
|
35
|
+
This class provides a metacrunch `source` implementation that can be used to read data from Elasticsearch into a metacrunch job.
|
36
|
+
|
37
|
+
```ruby
|
38
|
+
# my_job.metacrunch
|
39
|
+
|
40
|
+
# Create a Elasticsearch connection
|
41
|
+
elasticsearch = Elasticsearch::Client.new(...)
|
42
|
+
|
43
|
+
# Set the source
|
44
|
+
source Metacrunch::Elasticsearch::Source.new(elasticsearch, OPTIONS)
|
45
|
+
```
|
46
|
+
|
47
|
+
**Options**
|
48
|
+
|
49
|
+
* `:search_options`: A hash with search options (including your query) as described [here](https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/search.rb). We have set some meaningful defaults though: `size: 100`, `scroll: 1m`, `sort: ["_doc"]`. Depending on your use-case it may be needed to modify `:size` and `:scroll` for optimal performance.
|
50
|
+
* `:total_hits_callback`: You can set a `Proc` that gets called with the total number of hits your query will match. Use can use this callback to setup a progress bar for example. Defaults to `nil`.
|
51
|
+
|
52
|
+
|
53
|
+
### `Metacrunch::Elasticsearch::Destination`
|
54
|
+
|
55
|
+
This class provides a metacrunch `destination` implementation that can be used to write data from a metacrunch job to Elasticsearch.
|
56
|
+
|
57
|
+
The data that gets passed to the destination, must be in a proper format. You can use a transformation to transform your data before it reaches the destination.
|
58
|
+
|
59
|
+
As `Metacrunch::Elasticsearch::Destination` utilizes the Elasticsearch bulk API, the expected format must match one of the available options for the `body`parameter described [here](https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb). Please note that you can use the bulk API not only to index records. You can update or delete records as well.
|
60
|
+
|
61
|
+
```ruby
|
62
|
+
# my_job.metacrunch
|
63
|
+
|
64
|
+
# Transform data into a format that the destination can understand.
|
65
|
+
# In this example `data` is some hash.
|
66
|
+
transformation ->(data) do
|
67
|
+
{
|
68
|
+
index: {
|
69
|
+
_index: "my-index",
|
70
|
+
_type: "my-type",
|
71
|
+
_id: data.delete(:id),
|
72
|
+
data: data
|
73
|
+
}
|
74
|
+
}
|
75
|
+
end
|
76
|
+
```
|
77
|
+
|
78
|
+
It is not efficient to call Elasticsearch for every single record. Therefore we can use a transformation with a buffer, to create bulks of records. In this example we use a buffer size of 10. In production environments and depending on your data, larger buffers may be useful.
|
79
|
+
|
80
|
+
```ruby
|
81
|
+
# my_job.metacrunch
|
82
|
+
|
83
|
+
transformation ->(data) { data }, buffer: 10
|
84
|
+
```
|
85
|
+
|
86
|
+
If these transformations are in place you can now use the `Metacrunch::Elasticsearch::Destination` class as a destination.
|
87
|
+
|
88
|
+
```ruby
|
89
|
+
# my_job.metacrunch
|
90
|
+
|
91
|
+
# Write data into elasticsearch
|
92
|
+
destination Metacrunch::Elasticsearch::Destination.new(elasticsearch [, OPTIONS])
|
93
|
+
```
|
94
|
+
|
95
|
+
**Options**
|
96
|
+
|
97
|
+
* `:raise_on_result_errors`: If set to `true` an error is raised if one of the bulk operations return with an error. Defaults to `false`.
|
98
|
+
* `:result_callback`: You can set a `Proc` that gets called with the result from the bulk operation. Defaults to `nil`.
|
99
|
+
* `:bulk_options`: A hash of options for the Eleasticsearch bulk API as described [here](https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb). Setting `body` here will be ignored. Defaults to `{}`.
|
100
|
+
|
101
|
+
License
|
102
|
+
-------
|
103
|
+
|
104
|
+
metacrunch-elasticsearch is available at [github](https://github.com/ubpb/metacrunch-elasticsearch) under [MIT license](https://github.com/ubpb/metacrunch-elasticsearch/blob/master/License.txt).
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metacrunch-elasticsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.0.
|
4
|
+
version: 4.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- René Sprotte
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-10-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -44,11 +44,13 @@ executables: []
|
|
44
44
|
extensions: []
|
45
45
|
extra_rdoc_files: []
|
46
46
|
files:
|
47
|
+
- ".circleci/config.yml"
|
47
48
|
- ".gitignore"
|
48
49
|
- ".rspec"
|
49
50
|
- Gemfile
|
50
51
|
- License.txt
|
51
52
|
- Rakefile
|
53
|
+
- Readme.md
|
52
54
|
- bin/console
|
53
55
|
- lib/metacrunch/elasticsearch.rb
|
54
56
|
- lib/metacrunch/elasticsearch/destination.rb
|