metacrunch-elasticsearch 3.0.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3c8e449f5c84fd5b22946dc75c52b1693830e6c4
4
- data.tar.gz: b698c9cc5904719a37d6902e4cee9528ea03577e
3
+ metadata.gz: 7fcdad44003a195fca6541d8c9156523dcde02ad
4
+ data.tar.gz: 964c93ec7c91b54ea76bf338c5997920546f3840
5
5
  SHA512:
6
- metadata.gz: 396ae218ce4e493aa8d96bc7a5061e5941a65ea56c9d8cd791a9bcaeb0de11f5f83fc1df5f97379f967d969c86626b084dd9d39ed6fbeb3e6dbed7aacf88f2ae
7
- data.tar.gz: 4254f096d00cb3d113e43f5b5b9e215699002972f6661a335a1c4683bdcaf76bcbe12128d2a45b30e8a4bb5043138a16887f6c049d6c52db9fe3e1af23e5cebe
6
+ metadata.gz: 29bdc05cebb697ee8fbf6a434d2ad3861cf205c0a35324a77f1c1ef145599c72032f4adf2e5cc6e2cb395ac5bc744493942f87ddeb6f0310b342ac8285c04074
7
+ data.tar.gz: 60afb62d71c16d17404b5fd2b1bf735bce2011398a9bc014f0cb2c0e398df13d668c1601b9caa3e6beda1ed3f0d946da4bd37c0a59548159410182e86d3faf12
data/Gemfile CHANGED
@@ -3,19 +3,15 @@ source "https://rubygems.org"
3
3
  gemspec
4
4
 
5
5
  group :development do
6
- gem "bundler", ">= 1.7"
7
- gem "rake", ">= 11.1"
8
- gem "rspec", ">= 3.0.0", "< 4.0.0"
9
- gem "simplecov", ">= 0.11.0"
6
+ gem "bundler", ">= 1.15"
7
+ gem "rake", ">= 12.1"
8
+ gem "rspec", ">= 3.5.0", "< 4.0.0"
10
9
 
11
10
  if !ENV["CI"]
12
- gem "hashdiff", ">= 0.3.0", platform: :ruby
13
- gem "pry-byebug", ">= 3.3.0", platform: :ruby
14
- gem "pry-rescue", ">= 1.4.2", platform: :ruby
15
- gem "pry-state", ">= 0.1.7", platform: :ruby
11
+ gem "pry-byebug", ">= 3.5.0"
16
12
  end
17
13
  end
18
14
 
19
15
  group :test do
20
- gem "codeclimate-test-reporter", ">= 0.5.0", require: nil
16
+ gem "simplecov", ">= 0.15.0"
21
17
  end
data/Rakefile CHANGED
@@ -1,5 +1,5 @@
1
- require "bundler/gem_tasks"
2
1
  require "rspec/core/rake_task"
2
+ require "bundler/gem_tasks"
3
3
 
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
data/bin/console CHANGED
@@ -1,11 +1,14 @@
1
1
  #!/usr/bin/env ruby
2
-
3
2
  require "bundler/setup"
4
- require "metascrunch/elasticsearch"
3
+ require "metacrunch/elasticsearch"
5
4
 
6
- # You can add fixtures and/or initialization code here to make experimenting
7
- # with your gem easier. You can also use a different console, if you like.
5
+ begin
6
+ require "pry"
7
+ rescue LoadError ; end
8
8
 
9
- # (If you use this, don't forget to add pry to your Gemfile!)
10
- require "pry"
11
- Pry.start
9
+ if defined?(Pry)
10
+ Pry.start
11
+ else
12
+ require "irb"
13
+ IRB.start
14
+ end
@@ -2,16 +2,7 @@ require "elasticsearch"
2
2
 
3
3
  module Metacrunch
4
4
  module Elasticsearch
5
- require_relative "./elasticsearch/index_creator"
6
- require_relative "./elasticsearch/indexer"
7
- require_relative "./elasticsearch/reader"
8
- require_relative "./elasticsearch/searcher"
9
- require_relative "./elasticsearch/uri"
10
- require_relative "./elasticsearch/writer"
11
-
12
- #
13
- # error class are inline to not clutter source files unnecessarily
14
- #
15
- class IndexAlreadyExistsError < StandardError; end
5
+ require_relative "elasticsearch/destination"
6
+ require_relative "elasticsearch/source"
16
7
  end
17
8
  end
@@ -0,0 +1,47 @@
1
+ require "metacrunch/elasticsearch"
2
+
3
+ module Metacrunch
4
+ class Elasticsearch::Destination
5
+
6
+ DEFAULT_OPTIONS = {
7
+ raise_on_result_errors: false,
8
+ result_callback: nil,
9
+ bulk_options: {}
10
+ }
11
+
12
+ def initialize(elasticsearch_client, options = {})
13
+ @client = elasticsearch_client
14
+ @options = DEFAULT_OPTIONS.deep_merge(options)
15
+ end
16
+
17
+ def write(data)
18
+ return if data.blank?
19
+
20
+ # Call elasticsearch bulk api
21
+ bulk_options = @options[:bulk_options]
22
+ bulk_options[:body] = data
23
+ result = @client.bulk(bulk_options)
24
+
25
+ # Raise an exception if one of the results produced an error and the user wants to know about it
26
+ raise DestinationError.new(errors: result["errors"]) if result["errors"] && @options[:raise_on_result_errors]
27
+
28
+ # if the user provided a callback proc, call it
29
+ @options[:result_callback].call(result) if @options[:result_callback]&.respond_to?(:call)
30
+ end
31
+
32
+ def close
33
+ # noop
34
+ end
35
+
36
+ end
37
+
38
+ class Elasticsearch::DestinationError < StandardError
39
+
40
+ attr_reader :errors
41
+
42
+ def initialize(msg = nil, errors:)
43
+ @errors = errors
44
+ end
45
+
46
+ end
47
+ end
@@ -0,0 +1,56 @@
1
+ require "metacrunch/elasticsearch"
2
+
3
+ module Metacrunch
4
+ class Elasticsearch::Source
5
+
6
+ DEFAULT_OPTIONS = {
7
+ total_hits_callback: nil,
8
+ search_options: {
9
+ size: 100,
10
+ scroll: "1m",
11
+ sort: ["_doc"]
12
+ }
13
+ }
14
+
15
+ def initialize(elasticsearch_client, options = {})
16
+ @client = elasticsearch_client
17
+ @options = DEFAULT_OPTIONS.deep_merge(options)
18
+ end
19
+
20
+ def each(&block)
21
+ return enum_for(__method__) unless block_given?
22
+
23
+ # Perform search request and yield the first results if any
24
+ search_options = @options[:search_options]
25
+ result = @client.search(search_options)
26
+ call_total_hits_callback(result)
27
+ yield_hits(result, &block)
28
+
29
+ # Scroll over the rest of result set and yield the results until the set is empty.
30
+ while (
31
+ # Note: semantic of 'and' is important here. Do not use '&&'.
32
+ result = @client.scroll(scroll_id: result["_scroll_id"], scroll: search_options[:scroll]) and result["hits"]["hits"].present?
33
+ ) do
34
+ yield_hits(result, &block)
35
+ end
36
+ ensure
37
+ # Clear scroll to free up resources.
38
+ @client.clear_scroll(scroll_id: result["_scroll_id"]) if result
39
+ end
40
+
41
+ private
42
+
43
+ def call_total_hits_callback(result)
44
+ if @options[:total_hits_callback]&.respond_to?(:call) && result["hits"]["total"]
45
+ @options[:total_hits_callback].call(result["hits"]["total"])
46
+ end
47
+ end
48
+
49
+ def yield_hits(result, &block)
50
+ result["hits"]["hits"].each do |hit|
51
+ yield(hit)
52
+ end
53
+ end
54
+
55
+ end
56
+ end
@@ -1,5 +1,5 @@
1
1
  module Metacrunch
2
2
  module Elasticsearch
3
- VERSION = "3.0.0"
3
+ VERSION = "4.0.0"
4
4
  end
5
5
  end
@@ -6,17 +6,16 @@ require "metacrunch/elasticsearch/version"
6
6
  Gem::Specification.new do |spec|
7
7
  spec.name = "metacrunch-elasticsearch"
8
8
  spec.version = Metacrunch::Elasticsearch::VERSION
9
- spec.authors = ["René Sprotte", "Michael Sievers"]
10
- spec.summary = %q{Metacrunch elasticsearch package}
9
+ spec.authors = ["René Sprotte"]
10
+ spec.summary = %q{Elasticsearch package for the metacrunch ETL toolkit.}
11
11
  spec.homepage = "http://github.com/ubpb/metacrunch-elasticsearch"
12
- spec.licenses = "MIT"
12
+ spec.license = "MIT"
13
13
 
14
14
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
15
- spec.bindir = "exe"
16
15
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
17
16
  spec.require_paths = ["lib"]
18
17
 
19
- spec.add_dependency "activesupport", ">= 4.0.0"
20
- spec.add_dependency "elasticsearch", "~> 1.0"
18
+ spec.add_dependency "activesupport", ">= 5.1.0"
19
+ spec.add_dependency "elasticsearch", ">= 5.0.4"
21
20
  end
22
21
 
metadata CHANGED
@@ -1,15 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metacrunch-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - René Sprotte
8
- - Michael Sievers
9
8
  autorequire:
10
- bindir: exe
9
+ bindir: bin
11
10
  cert_chain: []
12
- date: 2016-05-18 00:00:00.000000000 Z
11
+ date: 2017-09-27 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: activesupport
@@ -17,28 +16,28 @@ dependencies:
17
16
  requirements:
18
17
  - - ">="
19
18
  - !ruby/object:Gem::Version
20
- version: 4.0.0
19
+ version: 5.1.0
21
20
  type: :runtime
22
21
  prerelease: false
23
22
  version_requirements: !ruby/object:Gem::Requirement
24
23
  requirements:
25
24
  - - ">="
26
25
  - !ruby/object:Gem::Version
27
- version: 4.0.0
26
+ version: 5.1.0
28
27
  - !ruby/object:Gem::Dependency
29
28
  name: elasticsearch
30
29
  requirement: !ruby/object:Gem::Requirement
31
30
  requirements:
32
- - - "~>"
31
+ - - ">="
33
32
  - !ruby/object:Gem::Version
34
- version: '1.0'
33
+ version: 5.0.4
35
34
  type: :runtime
36
35
  prerelease: false
37
36
  version_requirements: !ruby/object:Gem::Requirement
38
37
  requirements:
39
- - - "~>"
38
+ - - ">="
40
39
  - !ruby/object:Gem::Version
41
- version: '1.0'
40
+ version: 5.0.4
42
41
  description:
43
42
  email:
44
43
  executables: []
@@ -47,23 +46,14 @@ extra_rdoc_files: []
47
46
  files:
48
47
  - ".gitignore"
49
48
  - ".rspec"
50
- - ".travis.yml"
51
49
  - Gemfile
52
50
  - License.txt
53
51
  - Rakefile
54
- - Readme.md
55
52
  - bin/console
56
- - bin/setup
57
53
  - lib/metacrunch/elasticsearch.rb
58
- - lib/metacrunch/elasticsearch/client_factory.rb
59
- - lib/metacrunch/elasticsearch/index_creator.rb
60
- - lib/metacrunch/elasticsearch/indexer.rb
61
- - lib/metacrunch/elasticsearch/options_helpers.rb
62
- - lib/metacrunch/elasticsearch/reader.rb
63
- - lib/metacrunch/elasticsearch/searcher.rb
64
- - lib/metacrunch/elasticsearch/uri.rb
54
+ - lib/metacrunch/elasticsearch/destination.rb
55
+ - lib/metacrunch/elasticsearch/source.rb
65
56
  - lib/metacrunch/elasticsearch/version.rb
66
- - lib/metacrunch/elasticsearch/writer.rb
67
57
  - metacrunch-elasticsearch.gemspec
68
58
  homepage: http://github.com/ubpb/metacrunch-elasticsearch
69
59
  licenses:
@@ -85,8 +75,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
85
75
  version: '0'
86
76
  requirements: []
87
77
  rubyforge_project:
88
- rubygems_version: 2.5.1
78
+ rubygems_version: 2.6.11
89
79
  signing_key:
90
80
  specification_version: 4
91
- summary: Metacrunch elasticsearch package
81
+ summary: Elasticsearch package for the metacrunch ETL toolkit.
92
82
  test_files: []
data/.travis.yml DELETED
@@ -1,5 +0,0 @@
1
- language: ruby
2
- rvm:
3
- - "2.0"
4
- - "2.1"
5
- - "2.2"
data/Readme.md DELETED
@@ -1,3 +0,0 @@
1
- # metacrunch-elasticsearch
2
-
3
- Dokumentation folgt in Kürze...
data/bin/setup DELETED
@@ -1,7 +0,0 @@
1
- #!/bin/bash
2
- set -euo pipefail
3
- IFS=$'\n\t'
4
-
5
- bundle install
6
-
7
- # Do any other automated setup that you need to do here
@@ -1,15 +0,0 @@
1
- require "elasticsearch"
2
- require_relative "../elasticsearch"
3
-
4
- module Metacrunch::Elasticsearch::ClientFactory
5
- def client_factory
6
- client_options = {
7
- host: @host,
8
- hosts: @hosts,
9
- url: @url,
10
- urls: @urls
11
- }.compact
12
-
13
- Elasticsearch::Client.new(client_options)
14
- end
15
- end
@@ -1,77 +0,0 @@
1
- require "elasticsearch"
2
- require_relative "../elasticsearch"
3
- require_relative "./client_factory"
4
- require_relative "./options_helpers"
5
-
6
- class Metacrunch::Elasticsearch::IndexCreator
7
- include Metacrunch::Elasticsearch::ClientFactory
8
- include Metacrunch::Elasticsearch::OptionsHelpers
9
-
10
- attr_accessor :default_mapping
11
- attr_accessor :delete_existing_index
12
- attr_accessor :logger
13
- attr_accessor :settings
14
-
15
- def initialize(options = {})
16
- (@client_args = options).deep_symbolize_keys!
17
- extract_options!(@client_args, :_client_options_, :default_mapping, :delete_existing_index, :logger, :number_of_shards, :number_of_replicas, :settings)
18
- raise ArgumentError.new("You have to supply an index name!") if @client_args[:index].blank?
19
- end
20
-
21
- def call(items = [])
22
- client = client_factory
23
- logger = @logger
24
-
25
- if client.indices.exists?(@client_args)
26
- if @delete_existing_index == true
27
- client.indices.delete(@client_args)
28
- log_index_deleted(logger, @client_args[:index], client) if logger
29
- elsif @delete_existing_index == false
30
- return
31
- else
32
- raise Metacrunch::Elasticsearch::IndexAlreadyExistsError
33
- end
34
- end
35
-
36
- client.indices.create(@client_args.merge(
37
- {
38
- body: {
39
- number_of_shards: @number_of_shards,
40
- number_of_replicas: @number_of_replicas,
41
- settings: @settings
42
- }.compact
43
- }
44
- ))
45
-
46
- log_index_created(logger, @client_args[:index], client) if logger
47
-
48
- if @default_mapping
49
- client.indices.put_mapping(
50
- @client_args.merge(
51
- type: "_default_",
52
- body: {
53
- _default_: @default_mapping
54
- }
55
- )
56
- )
57
- end
58
- end
59
-
60
- private
61
-
62
- def log_index_created(logger, index, client)
63
- paths = client.transport.hosts.map do |_host|
64
- "#{_host[:host]}:#{_host[:port]}"
65
- end
66
-
67
- logger.info("Index #{index} created at #{paths}")
68
- end
69
-
70
- def log_index_deleted(logger, index, client)
71
- paths = client.transport.hosts.map do |_host|
72
- "#{_host[:host]}:#{_host[:port]}"
73
- end
74
-
75
- logger.info("Index #{index} deleted at #{paths}")
76
- end
77
- end
@@ -1,90 +0,0 @@
1
- require "elasticsearch"
2
- require_relative "../elasticsearch"
3
- require_relative "./client_factory"
4
- require_relative "./options_helpers"
5
-
6
- class Metacrunch::Elasticsearch::Indexer
7
- include Metacrunch::Elasticsearch::ClientFactory
8
- include Metacrunch::Elasticsearch::OptionsHelpers
9
-
10
- attr_accessor :bulk_size
11
- attr_accessor :callbacks
12
- attr_accessor :id_accessor
13
- attr_accessor :index
14
- attr_accessor :logger
15
- attr_accessor :type
16
-
17
- def initialize(options = {})
18
- (@client_args = options).deep_symbolize_keys!
19
- extract_options!(@client_args, :_client_options_, :bulk_size, :callbacks, :id_accessor, :index, :logger, :type)
20
- raise ArgumentError.new("You have to supply an index name!") if @index.blank?
21
- end
22
-
23
- def call(items = [])
24
- logger = @logger
25
-
26
- if (slice_size = @bulk_size || items.length) > 0
27
- client = client_factory
28
-
29
- items.each_slice(slice_size) do |_item_slice|
30
- # bodies is an array to allow slicing in case of HTTP content length exceed
31
- bodies = [_item_slice.inject([]) { |_memo, _item| _memo.concat bulk_item_factory(_item) }]
32
-
33
- bulk_responses =
34
- begin
35
- bodies.map do |_body|
36
- client.bulk body: _body
37
- end
38
- rescue
39
- logger.info "Bulk index failed. Decreasing bulk size temporary and trying again." if logger
40
-
41
- bodies = bodies.inject([]) do |_memo, _body|
42
- # Since we have to work with the bulk request body instead if the original items
43
- # the bodys length has to be a multiple of 2 in any case. .fdiv(2).fdiv(2).ceil * 2
44
- # ensures this. Example 3698.fdiv(2).fdiv(2).fdiv(2).ceil * 2 == 1850
45
- _memo.concat(_body.each_slice(_body.length.fdiv(2).fdiv(2).ceil * 2).to_a)
46
- end
47
-
48
- retry
49
- end
50
-
51
- bulk_responses.each do |_bulk_response|
52
- log_items_indexed(logger, _bulk_response["items"].length, client) if logger
53
-
54
- if after_indexed_callback = (@callbacks || {})[:after_indexed]
55
- _item_slice.zip(_bulk_response["items"]).each do |_item, _item_response|
56
- after_indexed_callback.call(_item, _item_response)
57
- end
58
- end
59
- end
60
- end
61
- end
62
- end
63
-
64
- private
65
-
66
- def bulk_item_factory(item)
67
- [
68
- { index: { _index: @index, _type: @type, _id: id(item) }.compact },
69
- item.to_h
70
- ]
71
- end
72
-
73
- def id(item)
74
- if @id_accessor
75
- if @id_accessor.respond_to?(:call)
76
- @id_accessor.call(item)
77
- else
78
- item[@id_accessor]
79
- end
80
- end
81
- end
82
-
83
- def log_items_indexed(logger, amount, client)
84
- paths = client.transport.hosts.map do |_host|
85
- "#{_host[:host]}:#{_host[:port]}/#{@index}/#{@type}"
86
- end
87
-
88
- logger.info("Indexed #{amount} items to #{paths}")
89
- end
90
- end
@@ -1,30 +0,0 @@
1
- require_relative "../elasticsearch"
2
-
3
- module Metacrunch::Elasticsearch::OptionsHelpers
4
- def extract_options!(options, *keys)
5
- keys = keys
6
- .map do |_key|
7
- _key == :_client_options_ ? [:host, :hosts, :url, :urls] : _key
8
- end
9
- .flatten
10
-
11
- options
12
- .delete_if do |_key, _value|
13
- if keys.include?(_key)
14
- instance_variable_set("@#{_key}", _value)
15
- true # else if _value is falsy, the key does not get deleted
16
- end
17
- end
18
- end
19
-
20
- def normalize_options!(options)
21
- {
22
- index: options[:index],
23
- body: options.select { |_key, _| _key != :index }
24
- }
25
- .tap(&:compact!)
26
- .try do |_result|
27
- options.clear.merge!(_result)
28
- end
29
- end
30
- end
@@ -1,63 +0,0 @@
1
- require "elasticsearch"
2
- require_relative "../elasticsearch"
3
-
4
- module Metacrunch
5
- module Elasticsearch
6
- class Reader
7
-
8
- DEFAULT_SCAN_SIZE = 250
9
- DEFAULT_SCROLL_EXPIRY_TIME = 10.minutes
10
-
11
-
12
- def initialize(uri, body, log: false)
13
- unless uri.starts_with?("elasticsearch://")
14
- raise ArgumentError, "URI must be an elasticsearch URI (elasticsearch://...)"
15
- end
16
-
17
- @uri = URI(uri)
18
- @body = body
19
- @log = log
20
- end
21
-
22
- def each(&block)
23
- return enum_for(__method__) unless block_given?
24
-
25
- search_result = client.search({
26
- body: @body,
27
- index: @uri.index,
28
- type: @uri.type,
29
- scroll: "#{DEFAULT_SCROLL_EXPIRY_TIME}s",
30
- search_type: "scan",
31
- size: DEFAULT_SCAN_SIZE
32
- })
33
-
34
- while (
35
- search_result = client.scroll(
36
- scroll: "#{DEFAULT_SCROLL_EXPIRY_TIME}s",
37
- scroll_id: search_result["_scroll_id"]
38
- ) and # don't use &&, the semantic of 'and' is important here
39
- search_result["hits"]["hits"].present?
40
- ) do
41
- search_result["hits"]["hits"].each do |_hit|
42
- yield(_hit)
43
- end
44
- end
45
- end
46
-
47
- def count
48
- client.count({
49
- body: { query: @body[:query] },
50
- index: @uri.index,
51
- type: @uri.type
52
- })["count"]
53
- end
54
-
55
- private
56
-
57
- def client
58
- @client ||= ::Elasticsearch::Client.new(host: @uri.host, port: @uri.port, log: @log)
59
- end
60
-
61
- end
62
- end
63
- end
@@ -1,56 +0,0 @@
1
- require "elasticsearch"
2
- require_relative "../elasticsearch"
3
- require_relative "./client_factory"
4
- require_relative "./options_helpers"
5
-
6
- class Metacrunch::Elasticsearch::Searcher
7
- include Enumerable
8
- include Metacrunch::Elasticsearch::ClientFactory
9
- include Metacrunch::Elasticsearch::OptionsHelpers
10
-
11
- DEFAULT_BODY = { query: { match_all: {} } }
12
- DEFAULT_SCAN_SIZE = 200 # per shard
13
- DEFAULT_SCROLL_EXPIRY_TIME = 10.minutes
14
-
15
- attr_accessor :bulk_size
16
- attr_accessor :index
17
- attr_accessor :scan_size
18
- attr_accessor :scroll_expiry_time
19
- attr_accessor :type
20
-
21
- def initialize(options = {})
22
- options.deep_symbolize_keys!
23
- extract_options!(options, :_client_options_, :bulk_size, :index, :scan_size, :scroll_expiry_time, :type)
24
- @body = options.presence || DEFAULT_BODY
25
- end
26
-
27
- def call(items = [])
28
- @docs_enumerator ||= @bulk_size ? each_slice(@bulk_size) : [each.to_a].to_enum
29
- items.concat(@docs_enumerator.next)
30
- end
31
-
32
- def each
33
- return enum_for(__method__) unless block_given?
34
- client = client_factory
35
-
36
- search_result = client.search({
37
- body: @body,
38
- index: @index,
39
- scroll: "#{@scroll_expiry_time || DEFAULT_SCROLL_EXPIRY_TIME}s",
40
- search_type: "scan",
41
- size: @scan_size || DEFAULT_SCAN_SIZE
42
- })
43
-
44
- while (
45
- search_result = client.scroll(
46
- scroll: "#{DEFAULT_SCROLL_EXPIRY_TIME}s",
47
- scroll_id: search_result["_scroll_id"]
48
- ) and # don't use &&, the semantic of and is important here
49
- search_result["hits"]["hits"].present?
50
- ) do
51
- search_result["hits"]["hits"].each do |_hit|
52
- yield _hit
53
- end
54
- end
55
- end
56
- end
@@ -1,31 +0,0 @@
1
- require "uri"
2
- require_relative "../elasticsearch"
3
-
4
-
5
- module Metacrunch
6
- module Elasticsearch
7
- class URI < URI::Generic
8
-
9
- DEFAULT_PORT = 9200
10
-
11
- def index
12
- splitted_path[0]
13
- end
14
-
15
- def type
16
- splitted_path[1]
17
- end
18
-
19
- private
20
-
21
- def splitted_path
22
- path.split("/").map(&:presence).compact
23
- end
24
-
25
- end
26
- end
27
- end
28
-
29
- module URI
30
- @@schemes['ELASTICSEARCH'] = Metacrunch::Elasticsearch::URI
31
- end
@@ -1,59 +0,0 @@
1
- require "elasticsearch"
2
- require_relative "../elasticsearch"
3
-
4
- module Metacrunch
5
- module Elasticsearch
6
- class Writer
7
-
8
- def initialize(uri, log: false, bulk_size: 250, autoflush: true)
9
- unless uri.starts_with?("elasticsearch://")
10
- raise ArgumentError, "URI must be an elasticsearch URI (elasticsearch://...)"
11
- end
12
-
13
- @uri = URI(uri)
14
- @log = log
15
- @bulk_size = bulk_size
16
- @buffer = []
17
- @autoflush = autoflush
18
- end
19
-
20
- def write(data, options = {})
21
- id = data.delete(:id) || data.delete(:_id)
22
- raise ArgumentError, "Missing id. You must provide 'id' or '_id' as part of the data" unless id
23
-
24
- @buffer << {
25
- _index: @uri.index,
26
- _type: @uri.type,
27
- _id: id,
28
- data: data
29
- }
30
-
31
- flush if @autoflush && @bulk_size > 0 && @buffer.length >= @bulk_size
32
-
33
- true
34
- end
35
-
36
- def flush
37
- if @buffer.length > 0
38
- result = client.bulk(body: @buffer.inject([]){ |_body, _data| _body << { index: _data } })
39
- raise RuntimeError if result["errors"]
40
- end
41
-
42
- true
43
- ensure
44
- @buffer = []
45
- end
46
-
47
- def close
48
- flush
49
- end
50
-
51
- private
52
-
53
- def client
54
- @client ||= ::Elasticsearch::Client.new(host: @uri.host, port: @uri.port, log: @log)
55
- end
56
-
57
- end
58
- end
59
- end