metacrunch-elasticsearch 3.0.0 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3c8e449f5c84fd5b22946dc75c52b1693830e6c4
4
- data.tar.gz: b698c9cc5904719a37d6902e4cee9528ea03577e
3
+ metadata.gz: 7fcdad44003a195fca6541d8c9156523dcde02ad
4
+ data.tar.gz: 964c93ec7c91b54ea76bf338c5997920546f3840
5
5
  SHA512:
6
- metadata.gz: 396ae218ce4e493aa8d96bc7a5061e5941a65ea56c9d8cd791a9bcaeb0de11f5f83fc1df5f97379f967d969c86626b084dd9d39ed6fbeb3e6dbed7aacf88f2ae
7
- data.tar.gz: 4254f096d00cb3d113e43f5b5b9e215699002972f6661a335a1c4683bdcaf76bcbe12128d2a45b30e8a4bb5043138a16887f6c049d6c52db9fe3e1af23e5cebe
6
+ metadata.gz: 29bdc05cebb697ee8fbf6a434d2ad3861cf205c0a35324a77f1c1ef145599c72032f4adf2e5cc6e2cb395ac5bc744493942f87ddeb6f0310b342ac8285c04074
7
+ data.tar.gz: 60afb62d71c16d17404b5fd2b1bf735bce2011398a9bc014f0cb2c0e398df13d668c1601b9caa3e6beda1ed3f0d946da4bd37c0a59548159410182e86d3faf12
data/Gemfile CHANGED
@@ -3,19 +3,15 @@ source "https://rubygems.org"
3
3
  gemspec
4
4
 
5
5
  group :development do
6
- gem "bundler", ">= 1.7"
7
- gem "rake", ">= 11.1"
8
- gem "rspec", ">= 3.0.0", "< 4.0.0"
9
- gem "simplecov", ">= 0.11.0"
6
+ gem "bundler", ">= 1.15"
7
+ gem "rake", ">= 12.1"
8
+ gem "rspec", ">= 3.5.0", "< 4.0.0"
10
9
 
11
10
  if !ENV["CI"]
12
- gem "hashdiff", ">= 0.3.0", platform: :ruby
13
- gem "pry-byebug", ">= 3.3.0", platform: :ruby
14
- gem "pry-rescue", ">= 1.4.2", platform: :ruby
15
- gem "pry-state", ">= 0.1.7", platform: :ruby
11
+ gem "pry-byebug", ">= 3.5.0"
16
12
  end
17
13
  end
18
14
 
19
15
  group :test do
20
- gem "codeclimate-test-reporter", ">= 0.5.0", require: nil
16
+ gem "simplecov", ">= 0.15.0"
21
17
  end
data/Rakefile CHANGED
@@ -1,5 +1,5 @@
1
- require "bundler/gem_tasks"
2
1
  require "rspec/core/rake_task"
2
+ require "bundler/gem_tasks"
3
3
 
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
data/bin/console CHANGED
@@ -1,11 +1,14 @@
1
1
  #!/usr/bin/env ruby
2
-
3
2
  require "bundler/setup"
4
- require "metascrunch/elasticsearch"
3
+ require "metacrunch/elasticsearch"
5
4
 
6
- # You can add fixtures and/or initialization code here to make experimenting
7
- # with your gem easier. You can also use a different console, if you like.
5
+ begin
6
+ require "pry"
7
+ rescue LoadError ; end
8
8
 
9
- # (If you use this, don't forget to add pry to your Gemfile!)
10
- require "pry"
11
- Pry.start
9
+ if defined?(Pry)
10
+ Pry.start
11
+ else
12
+ require "irb"
13
+ IRB.start
14
+ end
@@ -2,16 +2,7 @@ require "elasticsearch"
2
2
 
3
3
  module Metacrunch
4
4
  module Elasticsearch
5
- require_relative "./elasticsearch/index_creator"
6
- require_relative "./elasticsearch/indexer"
7
- require_relative "./elasticsearch/reader"
8
- require_relative "./elasticsearch/searcher"
9
- require_relative "./elasticsearch/uri"
10
- require_relative "./elasticsearch/writer"
11
-
12
- #
13
- # error class are inline to not clutter source files unnecessarily
14
- #
15
- class IndexAlreadyExistsError < StandardError; end
5
+ require_relative "elasticsearch/destination"
6
+ require_relative "elasticsearch/source"
16
7
  end
17
8
  end
@@ -0,0 +1,47 @@
1
+ require "metacrunch/elasticsearch"
2
+
3
+ module Metacrunch
4
+ class Elasticsearch::Destination
5
+
6
+ DEFAULT_OPTIONS = {
7
+ raise_on_result_errors: false,
8
+ result_callback: nil,
9
+ bulk_options: {}
10
+ }
11
+
12
+ def initialize(elasticsearch_client, options = {})
13
+ @client = elasticsearch_client
14
+ @options = DEFAULT_OPTIONS.deep_merge(options)
15
+ end
16
+
17
+ def write(data)
18
+ return if data.blank?
19
+
20
+ # Call elasticsearch bulk api
21
+ bulk_options = @options[:bulk_options]
22
+ bulk_options[:body] = data
23
+ result = @client.bulk(bulk_options)
24
+
25
+ # Raise an exception if one of the results produced an error and the user wants to know about it
26
+ raise DestinationError.new(errors: result["errors"]) if result["errors"] && @options[:raise_on_result_errors]
27
+
28
+ # if the user provided a callback proc, call it
29
+ @options[:result_callback].call(result) if @options[:result_callback]&.respond_to?(:call)
30
+ end
31
+
32
+ def close
33
+ # noop
34
+ end
35
+
36
+ end
37
+
38
+ class Elasticsearch::DestinationError < StandardError
39
+
40
+ attr_reader :errors
41
+
42
+ def initialize(msg = nil, errors:)
43
+ @errors = errors
44
+ end
45
+
46
+ end
47
+ end
@@ -0,0 +1,56 @@
1
+ require "metacrunch/elasticsearch"
2
+
3
+ module Metacrunch
4
+ class Elasticsearch::Source
5
+
6
+ DEFAULT_OPTIONS = {
7
+ total_hits_callback: nil,
8
+ search_options: {
9
+ size: 100,
10
+ scroll: "1m",
11
+ sort: ["_doc"]
12
+ }
13
+ }
14
+
15
+ def initialize(elasticsearch_client, options = {})
16
+ @client = elasticsearch_client
17
+ @options = DEFAULT_OPTIONS.deep_merge(options)
18
+ end
19
+
20
+ def each(&block)
21
+ return enum_for(__method__) unless block_given?
22
+
23
+ # Perform search request and yield the first results if any
24
+ search_options = @options[:search_options]
25
+ result = @client.search(search_options)
26
+ call_total_hits_callback(result)
27
+ yield_hits(result, &block)
28
+
29
+ # Scroll over the rest of result set and yield the results until the set is empty.
30
+ while (
31
+ # Note: semantic of 'and' is important here. Do not use '&&'.
32
+ result = @client.scroll(scroll_id: result["_scroll_id"], scroll: search_options[:scroll]) and result["hits"]["hits"].present?
33
+ ) do
34
+ yield_hits(result, &block)
35
+ end
36
+ ensure
37
+ # Clear scroll to free up resources.
38
+ @client.clear_scroll(scroll_id: result["_scroll_id"]) if result
39
+ end
40
+
41
+ private
42
+
43
+ def call_total_hits_callback(result)
44
+ if @options[:total_hits_callback]&.respond_to?(:call) && result["hits"]["total"]
45
+ @options[:total_hits_callback].call(result["hits"]["total"])
46
+ end
47
+ end
48
+
49
+ def yield_hits(result, &block)
50
+ result["hits"]["hits"].each do |hit|
51
+ yield(hit)
52
+ end
53
+ end
54
+
55
+ end
56
+ end
@@ -1,5 +1,5 @@
1
1
  module Metacrunch
2
2
  module Elasticsearch
3
- VERSION = "3.0.0"
3
+ VERSION = "4.0.0"
4
4
  end
5
5
  end
@@ -6,17 +6,16 @@ require "metacrunch/elasticsearch/version"
6
6
  Gem::Specification.new do |spec|
7
7
  spec.name = "metacrunch-elasticsearch"
8
8
  spec.version = Metacrunch::Elasticsearch::VERSION
9
- spec.authors = ["René Sprotte", "Michael Sievers"]
10
- spec.summary = %q{Metacrunch elasticsearch package}
9
+ spec.authors = ["René Sprotte"]
10
+ spec.summary = %q{Elasticsearch package for the metacrunch ETL toolkit.}
11
11
  spec.homepage = "http://github.com/ubpb/metacrunch-elasticsearch"
12
- spec.licenses = "MIT"
12
+ spec.license = "MIT"
13
13
 
14
14
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
15
- spec.bindir = "exe"
16
15
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
17
16
  spec.require_paths = ["lib"]
18
17
 
19
- spec.add_dependency "activesupport", ">= 4.0.0"
20
- spec.add_dependency "elasticsearch", "~> 1.0"
18
+ spec.add_dependency "activesupport", ">= 5.1.0"
19
+ spec.add_dependency "elasticsearch", ">= 5.0.4"
21
20
  end
22
21
 
metadata CHANGED
@@ -1,15 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metacrunch-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - René Sprotte
8
- - Michael Sievers
9
8
  autorequire:
10
- bindir: exe
9
+ bindir: bin
11
10
  cert_chain: []
12
- date: 2016-05-18 00:00:00.000000000 Z
11
+ date: 2017-09-27 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: activesupport
@@ -17,28 +16,28 @@ dependencies:
17
16
  requirements:
18
17
  - - ">="
19
18
  - !ruby/object:Gem::Version
20
- version: 4.0.0
19
+ version: 5.1.0
21
20
  type: :runtime
22
21
  prerelease: false
23
22
  version_requirements: !ruby/object:Gem::Requirement
24
23
  requirements:
25
24
  - - ">="
26
25
  - !ruby/object:Gem::Version
27
- version: 4.0.0
26
+ version: 5.1.0
28
27
  - !ruby/object:Gem::Dependency
29
28
  name: elasticsearch
30
29
  requirement: !ruby/object:Gem::Requirement
31
30
  requirements:
32
- - - "~>"
31
+ - - ">="
33
32
  - !ruby/object:Gem::Version
34
- version: '1.0'
33
+ version: 5.0.4
35
34
  type: :runtime
36
35
  prerelease: false
37
36
  version_requirements: !ruby/object:Gem::Requirement
38
37
  requirements:
39
- - - "~>"
38
+ - - ">="
40
39
  - !ruby/object:Gem::Version
41
- version: '1.0'
40
+ version: 5.0.4
42
41
  description:
43
42
  email:
44
43
  executables: []
@@ -47,23 +46,14 @@ extra_rdoc_files: []
47
46
  files:
48
47
  - ".gitignore"
49
48
  - ".rspec"
50
- - ".travis.yml"
51
49
  - Gemfile
52
50
  - License.txt
53
51
  - Rakefile
54
- - Readme.md
55
52
  - bin/console
56
- - bin/setup
57
53
  - lib/metacrunch/elasticsearch.rb
58
- - lib/metacrunch/elasticsearch/client_factory.rb
59
- - lib/metacrunch/elasticsearch/index_creator.rb
60
- - lib/metacrunch/elasticsearch/indexer.rb
61
- - lib/metacrunch/elasticsearch/options_helpers.rb
62
- - lib/metacrunch/elasticsearch/reader.rb
63
- - lib/metacrunch/elasticsearch/searcher.rb
64
- - lib/metacrunch/elasticsearch/uri.rb
54
+ - lib/metacrunch/elasticsearch/destination.rb
55
+ - lib/metacrunch/elasticsearch/source.rb
65
56
  - lib/metacrunch/elasticsearch/version.rb
66
- - lib/metacrunch/elasticsearch/writer.rb
67
57
  - metacrunch-elasticsearch.gemspec
68
58
  homepage: http://github.com/ubpb/metacrunch-elasticsearch
69
59
  licenses:
@@ -85,8 +75,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
85
75
  version: '0'
86
76
  requirements: []
87
77
  rubyforge_project:
88
- rubygems_version: 2.5.1
78
+ rubygems_version: 2.6.11
89
79
  signing_key:
90
80
  specification_version: 4
91
- summary: Metacrunch elasticsearch package
81
+ summary: Elasticsearch package for the metacrunch ETL toolkit.
92
82
  test_files: []
data/.travis.yml DELETED
@@ -1,5 +0,0 @@
1
- language: ruby
2
- rvm:
3
- - "2.0"
4
- - "2.1"
5
- - "2.2"
data/Readme.md DELETED
@@ -1,3 +0,0 @@
1
- # metacrunch-elasticsearch
2
-
3
- Dokumentation folgt in Kürze...
data/bin/setup DELETED
@@ -1,7 +0,0 @@
1
- #!/bin/bash
2
- set -euo pipefail
3
- IFS=$'\n\t'
4
-
5
- bundle install
6
-
7
- # Do any other automated setup that you need to do here
@@ -1,15 +0,0 @@
1
- require "elasticsearch"
2
- require_relative "../elasticsearch"
3
-
4
- module Metacrunch::Elasticsearch::ClientFactory
5
- def client_factory
6
- client_options = {
7
- host: @host,
8
- hosts: @hosts,
9
- url: @url,
10
- urls: @urls
11
- }.compact
12
-
13
- Elasticsearch::Client.new(client_options)
14
- end
15
- end
@@ -1,77 +0,0 @@
1
- require "elasticsearch"
2
- require_relative "../elasticsearch"
3
- require_relative "./client_factory"
4
- require_relative "./options_helpers"
5
-
6
- class Metacrunch::Elasticsearch::IndexCreator
7
- include Metacrunch::Elasticsearch::ClientFactory
8
- include Metacrunch::Elasticsearch::OptionsHelpers
9
-
10
- attr_accessor :default_mapping
11
- attr_accessor :delete_existing_index
12
- attr_accessor :logger
13
- attr_accessor :settings
14
-
15
- def initialize(options = {})
16
- (@client_args = options).deep_symbolize_keys!
17
- extract_options!(@client_args, :_client_options_, :default_mapping, :delete_existing_index, :logger, :number_of_shards, :number_of_replicas, :settings)
18
- raise ArgumentError.new("You have to supply an index name!") if @client_args[:index].blank?
19
- end
20
-
21
- def call(items = [])
22
- client = client_factory
23
- logger = @logger
24
-
25
- if client.indices.exists?(@client_args)
26
- if @delete_existing_index == true
27
- client.indices.delete(@client_args)
28
- log_index_deleted(logger, @client_args[:index], client) if logger
29
- elsif @delete_existing_index == false
30
- return
31
- else
32
- raise Metacrunch::Elasticsearch::IndexAlreadyExistsError
33
- end
34
- end
35
-
36
- client.indices.create(@client_args.merge(
37
- {
38
- body: {
39
- number_of_shards: @number_of_shards,
40
- number_of_replicas: @number_of_replicas,
41
- settings: @settings
42
- }.compact
43
- }
44
- ))
45
-
46
- log_index_created(logger, @client_args[:index], client) if logger
47
-
48
- if @default_mapping
49
- client.indices.put_mapping(
50
- @client_args.merge(
51
- type: "_default_",
52
- body: {
53
- _default_: @default_mapping
54
- }
55
- )
56
- )
57
- end
58
- end
59
-
60
- private
61
-
62
- def log_index_created(logger, index, client)
63
- paths = client.transport.hosts.map do |_host|
64
- "#{_host[:host]}:#{_host[:port]}"
65
- end
66
-
67
- logger.info("Index #{index} created at #{paths}")
68
- end
69
-
70
- def log_index_deleted(logger, index, client)
71
- paths = client.transport.hosts.map do |_host|
72
- "#{_host[:host]}:#{_host[:port]}"
73
- end
74
-
75
- logger.info("Index #{index} deleted at #{paths}")
76
- end
77
- end
@@ -1,90 +0,0 @@
1
- require "elasticsearch"
2
- require_relative "../elasticsearch"
3
- require_relative "./client_factory"
4
- require_relative "./options_helpers"
5
-
6
- class Metacrunch::Elasticsearch::Indexer
7
- include Metacrunch::Elasticsearch::ClientFactory
8
- include Metacrunch::Elasticsearch::OptionsHelpers
9
-
10
- attr_accessor :bulk_size
11
- attr_accessor :callbacks
12
- attr_accessor :id_accessor
13
- attr_accessor :index
14
- attr_accessor :logger
15
- attr_accessor :type
16
-
17
- def initialize(options = {})
18
- (@client_args = options).deep_symbolize_keys!
19
- extract_options!(@client_args, :_client_options_, :bulk_size, :callbacks, :id_accessor, :index, :logger, :type)
20
- raise ArgumentError.new("You have to supply an index name!") if @index.blank?
21
- end
22
-
23
- def call(items = [])
24
- logger = @logger
25
-
26
- if (slice_size = @bulk_size || items.length) > 0
27
- client = client_factory
28
-
29
- items.each_slice(slice_size) do |_item_slice|
30
- # bodies is an array to allow slicing in case of HTTP content length exceed
31
- bodies = [_item_slice.inject([]) { |_memo, _item| _memo.concat bulk_item_factory(_item) }]
32
-
33
- bulk_responses =
34
- begin
35
- bodies.map do |_body|
36
- client.bulk body: _body
37
- end
38
- rescue
39
- logger.info "Bulk index failed. Decreasing bulk size temporary and trying again." if logger
40
-
41
- bodies = bodies.inject([]) do |_memo, _body|
42
- # Since we have to work with the bulk request body instead if the original items
43
- # the bodys length has to be a multiple of 2 in any case. .fdiv(2).fdiv(2).ceil * 2
44
- # ensures this. Example 3698.fdiv(2).fdiv(2).fdiv(2).ceil * 2 == 1850
45
- _memo.concat(_body.each_slice(_body.length.fdiv(2).fdiv(2).ceil * 2).to_a)
46
- end
47
-
48
- retry
49
- end
50
-
51
- bulk_responses.each do |_bulk_response|
52
- log_items_indexed(logger, _bulk_response["items"].length, client) if logger
53
-
54
- if after_indexed_callback = (@callbacks || {})[:after_indexed]
55
- _item_slice.zip(_bulk_response["items"]).each do |_item, _item_response|
56
- after_indexed_callback.call(_item, _item_response)
57
- end
58
- end
59
- end
60
- end
61
- end
62
- end
63
-
64
- private
65
-
66
- def bulk_item_factory(item)
67
- [
68
- { index: { _index: @index, _type: @type, _id: id(item) }.compact },
69
- item.to_h
70
- ]
71
- end
72
-
73
- def id(item)
74
- if @id_accessor
75
- if @id_accessor.respond_to?(:call)
76
- @id_accessor.call(item)
77
- else
78
- item[@id_accessor]
79
- end
80
- end
81
- end
82
-
83
- def log_items_indexed(logger, amount, client)
84
- paths = client.transport.hosts.map do |_host|
85
- "#{_host[:host]}:#{_host[:port]}/#{@index}/#{@type}"
86
- end
87
-
88
- logger.info("Indexed #{amount} items to #{paths}")
89
- end
90
- end
@@ -1,30 +0,0 @@
1
- require_relative "../elasticsearch"
2
-
3
- module Metacrunch::Elasticsearch::OptionsHelpers
4
- def extract_options!(options, *keys)
5
- keys = keys
6
- .map do |_key|
7
- _key == :_client_options_ ? [:host, :hosts, :url, :urls] : _key
8
- end
9
- .flatten
10
-
11
- options
12
- .delete_if do |_key, _value|
13
- if keys.include?(_key)
14
- instance_variable_set("@#{_key}", _value)
15
- true # else if _value is falsy, the key does not get deleted
16
- end
17
- end
18
- end
19
-
20
- def normalize_options!(options)
21
- {
22
- index: options[:index],
23
- body: options.select { |_key, _| _key != :index }
24
- }
25
- .tap(&:compact!)
26
- .try do |_result|
27
- options.clear.merge!(_result)
28
- end
29
- end
30
- end
@@ -1,63 +0,0 @@
1
- require "elasticsearch"
2
- require_relative "../elasticsearch"
3
-
4
- module Metacrunch
5
- module Elasticsearch
6
- class Reader
7
-
8
- DEFAULT_SCAN_SIZE = 250
9
- DEFAULT_SCROLL_EXPIRY_TIME = 10.minutes
10
-
11
-
12
- def initialize(uri, body, log: false)
13
- unless uri.starts_with?("elasticsearch://")
14
- raise ArgumentError, "URI must be an elasticsearch URI (elasticsearch://...)"
15
- end
16
-
17
- @uri = URI(uri)
18
- @body = body
19
- @log = log
20
- end
21
-
22
- def each(&block)
23
- return enum_for(__method__) unless block_given?
24
-
25
- search_result = client.search({
26
- body: @body,
27
- index: @uri.index,
28
- type: @uri.type,
29
- scroll: "#{DEFAULT_SCROLL_EXPIRY_TIME}s",
30
- search_type: "scan",
31
- size: DEFAULT_SCAN_SIZE
32
- })
33
-
34
- while (
35
- search_result = client.scroll(
36
- scroll: "#{DEFAULT_SCROLL_EXPIRY_TIME}s",
37
- scroll_id: search_result["_scroll_id"]
38
- ) and # don't use &&, the semantic of 'and' is important here
39
- search_result["hits"]["hits"].present?
40
- ) do
41
- search_result["hits"]["hits"].each do |_hit|
42
- yield(_hit)
43
- end
44
- end
45
- end
46
-
47
- def count
48
- client.count({
49
- body: { query: @body[:query] },
50
- index: @uri.index,
51
- type: @uri.type
52
- })["count"]
53
- end
54
-
55
- private
56
-
57
- def client
58
- @client ||= ::Elasticsearch::Client.new(host: @uri.host, port: @uri.port, log: @log)
59
- end
60
-
61
- end
62
- end
63
- end
@@ -1,56 +0,0 @@
1
- require "elasticsearch"
2
- require_relative "../elasticsearch"
3
- require_relative "./client_factory"
4
- require_relative "./options_helpers"
5
-
6
- class Metacrunch::Elasticsearch::Searcher
7
- include Enumerable
8
- include Metacrunch::Elasticsearch::ClientFactory
9
- include Metacrunch::Elasticsearch::OptionsHelpers
10
-
11
- DEFAULT_BODY = { query: { match_all: {} } }
12
- DEFAULT_SCAN_SIZE = 200 # per shard
13
- DEFAULT_SCROLL_EXPIRY_TIME = 10.minutes
14
-
15
- attr_accessor :bulk_size
16
- attr_accessor :index
17
- attr_accessor :scan_size
18
- attr_accessor :scroll_expiry_time
19
- attr_accessor :type
20
-
21
- def initialize(options = {})
22
- options.deep_symbolize_keys!
23
- extract_options!(options, :_client_options_, :bulk_size, :index, :scan_size, :scroll_expiry_time, :type)
24
- @body = options.presence || DEFAULT_BODY
25
- end
26
-
27
- def call(items = [])
28
- @docs_enumerator ||= @bulk_size ? each_slice(@bulk_size) : [each.to_a].to_enum
29
- items.concat(@docs_enumerator.next)
30
- end
31
-
32
- def each
33
- return enum_for(__method__) unless block_given?
34
- client = client_factory
35
-
36
- search_result = client.search({
37
- body: @body,
38
- index: @index,
39
- scroll: "#{@scroll_expiry_time || DEFAULT_SCROLL_EXPIRY_TIME}s",
40
- search_type: "scan",
41
- size: @scan_size || DEFAULT_SCAN_SIZE
42
- })
43
-
44
- while (
45
- search_result = client.scroll(
46
- scroll: "#{DEFAULT_SCROLL_EXPIRY_TIME}s",
47
- scroll_id: search_result["_scroll_id"]
48
- ) and # don't use &&, the semantic of and is important here
49
- search_result["hits"]["hits"].present?
50
- ) do
51
- search_result["hits"]["hits"].each do |_hit|
52
- yield _hit
53
- end
54
- end
55
- end
56
- end
@@ -1,31 +0,0 @@
1
- require "uri"
2
- require_relative "../elasticsearch"
3
-
4
-
5
- module Metacrunch
6
- module Elasticsearch
7
- class URI < URI::Generic
8
-
9
- DEFAULT_PORT = 9200
10
-
11
- def index
12
- splitted_path[0]
13
- end
14
-
15
- def type
16
- splitted_path[1]
17
- end
18
-
19
- private
20
-
21
- def splitted_path
22
- path.split("/").map(&:presence).compact
23
- end
24
-
25
- end
26
- end
27
- end
28
-
29
- module URI
30
- @@schemes['ELASTICSEARCH'] = Metacrunch::Elasticsearch::URI
31
- end
@@ -1,59 +0,0 @@
1
- require "elasticsearch"
2
- require_relative "../elasticsearch"
3
-
4
- module Metacrunch
5
- module Elasticsearch
6
- class Writer
7
-
8
- def initialize(uri, log: false, bulk_size: 250, autoflush: true)
9
- unless uri.starts_with?("elasticsearch://")
10
- raise ArgumentError, "URI must be an elasticsearch URI (elasticsearch://...)"
11
- end
12
-
13
- @uri = URI(uri)
14
- @log = log
15
- @bulk_size = bulk_size
16
- @buffer = []
17
- @autoflush = autoflush
18
- end
19
-
20
- def write(data, options = {})
21
- id = data.delete(:id) || data.delete(:_id)
22
- raise ArgumentError, "Missing id. You must provide 'id' or '_id' as part of the data" unless id
23
-
24
- @buffer << {
25
- _index: @uri.index,
26
- _type: @uri.type,
27
- _id: id,
28
- data: data
29
- }
30
-
31
- flush if @autoflush && @bulk_size > 0 && @buffer.length >= @bulk_size
32
-
33
- true
34
- end
35
-
36
- def flush
37
- if @buffer.length > 0
38
- result = client.bulk(body: @buffer.inject([]){ |_body, _data| _body << { index: _data } })
39
- raise RuntimeError if result["errors"]
40
- end
41
-
42
- true
43
- ensure
44
- @buffer = []
45
- end
46
-
47
- def close
48
- flush
49
- end
50
-
51
- private
52
-
53
- def client
54
- @client ||= ::Elasticsearch::Client.new(host: @uri.host, port: @uri.port, log: @log)
55
- end
56
-
57
- end
58
- end
59
- end