metacrunch-elasticsearch 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a74d43ab5c9f961f3efd802650e78022fb51e1e8
4
- data.tar.gz: 047c7af302f42d06dc24afa823fa46d2fa67a30a
3
+ metadata.gz: 17ba1fec96c7af64b5cdf8283dfee2fe0b4eadf1
4
+ data.tar.gz: fb673a9a460c7d7f82c96dfedc6a48fd26fdc971
5
5
  SHA512:
6
- metadata.gz: ccbddc5b0846556dbbb7409e1b718e716316f56fd8b1dc46c788c10f11fe7406f46db2d1788360bb7bed5703d94885da15e7ad380da17961b2f508c37a34f84b
7
- data.tar.gz: cb7e01749ab51e45702bfced9967860376bc41ee35ef40bf78f692f8d5df57fb88eb16e954e31d6251650df9ca0a96e3fcd6b1822190e14bbbc2b34b6a7d3157
6
+ metadata.gz: 106cdda72d6b43bf2d52392b1f3d45e68bbd679427ec1ca582a265dc904d286415ce8e35a0464e867eca550661ef65232e983e72812c4bd815dbbb5c050dcf34
7
+ data.tar.gz: 7d3552381f7dbf8fab91d2d1e8a2ca8cc964304bfe2105baa760eeba71cbadd12748ffe6ef03b2a425f343fa37caf51dace45d2534eec8b783f4d30516ee658a
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --format documentation
3
+ --require spec_helper
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ language: ruby
2
+ rvm:
3
+ - "2.0"
4
+ - "2.1"
5
+ - "2.2"
data/Gemfile CHANGED
@@ -1,20 +1,28 @@
1
1
  source "https://rubygems.org"
2
2
 
3
+ # Specify your gem's dependencies in your gemspec
3
4
  gemspec
4
5
 
5
- gem "metacrunch", ">= 2.1.0", github: "ubpb/metacrunch", branch: "master"
6
+ group :development do
7
+ gem "bundler", ">= 1.10"
8
+ gem "rake"
9
+ gem "rspec", ">= 3.0.0", "< 4.0.0"
10
+ gem "simplecov", ">= 0.8.0"
11
+ gem "vcr", ">= 2.9.0", "< 3.0.0"
12
+ gem "webmock", ">= 1.19.0", "< 2.0.0"
6
13
 
7
- gem "rake"
8
- gem "rspec", "~> 3.2.0"
9
-
10
- if !ENV["CI"]
11
- group :development do
14
+ if !ENV["CI"]
12
15
  gem "hashdiff"
13
16
  gem "pry", "~> 0.9.12.6"
14
17
  gem "pry-byebug", "<= 1.3.2"
15
- gem "pry-rescue", "~> 1.4.1", github: "ConradIrwin/pry-rescue", branch: :master
18
+ gem "pry-rescue", "~> 1.4.2"
16
19
  gem "pry-stack_explorer", "~> 0.4.9.1"
17
20
  gem "pry-syntax-hacks", "~> 0.0.6"
18
21
  end
19
22
  end
20
23
 
24
+ group :test do
25
+ gem "codeclimate-test-reporter", require: nil
26
+ end
27
+
28
+ gem "metacrunch", github: "ubpb/metacrunch", branch: :master
data/bin/console ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "metascrunch/elasticsearch"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ require "pry"
11
+ Pry.start
data/bin/setup ADDED
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,15 @@
1
+ require "elasticsearch"
2
+ require_relative "../elasticsearch"
3
+
4
+ module Metacrunch::Elasticsearch::ClientFactory
5
+ def client_factory
6
+ client_options = {
7
+ host: @host,
8
+ hosts: @hosts,
9
+ url: @url,
10
+ urls: @urls
11
+ }.compact
12
+
13
+ Elasticsearch::Client.new(client_options)
14
+ end
15
+ end
@@ -0,0 +1,76 @@
1
+ require "elasticsearch"
2
+ require "metacrunch/processor"
3
+ require_relative "../elasticsearch"
4
+ require_relative "./client_factory"
5
+ require_relative "./options_helpers"
6
+
7
+ class Metacrunch::Elasticsearch::IndexCreator < Metacrunch::Processor
8
+ include Metacrunch::Elasticsearch::ClientFactory
9
+ include Metacrunch::Elasticsearch::OptionsHelpers
10
+
11
+ attr_accessor :default_mapping
12
+ attr_accessor :delete_existing_index
13
+ attr_accessor :logger
14
+
15
+ def initialize(options = {})
16
+ (@client_args = options).deep_symbolize_keys!
17
+ extract_options!(@client_args, :_client_options_, :default_mapping, :delete_existing_index, :logger, :number_of_shards, :number_of_replicas)
18
+ raise ArgumentError.new("You have to supply an index name!") if @client_args[:index].blank?
19
+ end
20
+
21
+ def call(items = [], pipeline = nil)
22
+ client = client_factory
23
+ logger = pipeline.try(:logger) || @logger
24
+
25
+ if client.indices.exists?(@client_args)
26
+ if @delete_existing_index == true
27
+ client.indices.delete(@client_args)
28
+ log_index_deleted(logger, @client_args[:index], client) if logger
29
+ elsif @delete_existing_index == false
30
+ return
31
+ else
32
+ raise Metacrunch::Elasticsearch::IndexAlreadyExistsError
33
+ end
34
+ end
35
+
36
+ client.indices.create(@client_args.merge(
37
+ {
38
+ body: {
39
+ number_of_shards: @number_of_shards,
40
+ number_of_replicas: @number_of_replicas
41
+ }.compact
42
+ }
43
+ ))
44
+
45
+ log_index_created(logger, @client_args[:index], client) if logger
46
+
47
+ if @default_mapping
48
+ client.indices.put_mapping(
49
+ @client_args.merge(
50
+ type: "_default_",
51
+ body: {
52
+ _default_: @default_mapping
53
+ }
54
+ )
55
+ )
56
+ end
57
+ end
58
+
59
+ private
60
+
61
+ def log_index_created(logger, index, client)
62
+ paths = client.transport.hosts.map do |_host|
63
+ "#{_host[:host]}:#{_host[:port]}"
64
+ end
65
+
66
+ logger.info("Index #{index} created at #{paths}")
67
+ end
68
+
69
+ def log_index_deleted(logger, index, client)
70
+ paths = client.transport.hosts.map do |_host|
71
+ "#{_host[:host]}:#{_host[:port]}"
72
+ end
73
+
74
+ logger.info("Index #{index} deleted at #{paths}")
75
+ end
76
+ end
@@ -0,0 +1,91 @@
1
+ require "elasticsearch"
2
+ require "metacrunch/processor"
3
+ require_relative "../elasticsearch"
4
+ require_relative "./client_factory"
5
+ require_relative "./options_helpers"
6
+
7
+ class Metacrunch::Elasticsearch::Indexer < Metacrunch::Processor
8
+ include Metacrunch::Elasticsearch::ClientFactory
9
+ include Metacrunch::Elasticsearch::OptionsHelpers
10
+
11
+ attr_accessor :bulk_size
12
+ attr_accessor :callbacks
13
+ attr_accessor :id_accessor
14
+ attr_accessor :index
15
+ attr_accessor :logger
16
+ attr_accessor :type
17
+
18
+ def initialize(options = {})
19
+ (@client_args = options).deep_symbolize_keys!
20
+ extract_options!(@client_args, :_client_options_, :bulk_size, :callbacks, :id_accessor, :index, :logger, :type)
21
+ raise ArgumentError.new("You have to supply an index name!") if @index.blank?
22
+ end
23
+
24
+ def call(items = [], pipeline = nil)
25
+ logger = pipeline.try(:logger) || @logger
26
+
27
+ if (slice_size = @bulk_size || items.length) > 0
28
+ client = client_factory
29
+
30
+ items.each_slice(slice_size) do |_item_slice|
31
+ # bodies is an array to allow slicing in case of HTTP content length exceed
32
+ bodies = [_item_slice.inject([]) { |_memo, _item| _memo.concat bulk_item_factory(_item) }]
33
+
34
+ bulk_responses =
35
+ begin
36
+ bodies.map do |_body|
37
+ client.bulk body: _body
38
+ end
39
+ rescue
40
+ logger.info "Bulk index failed. Decreasing bulk size temporary and trying again." if logger
41
+
42
+ bodies = bodies.inject([]) do |_memo, _body|
43
+ # Since we have to work with the bulk request body instead if the original items
44
+ # the bodys length has to be a multiple of 2 in any case. .fdiv(2).fdiv(2).ceil * 2
45
+ # ensures this. Example 3698.fdiv(2).fdiv(2).fdiv(2).ceil * 2 == 1850
46
+ _memo.concat(_body.each_slice(_body.length.fdiv(2).fdiv(2).ceil * 2).to_a)
47
+ end
48
+
49
+ retry
50
+ end
51
+
52
+ bulk_responses.each do |_bulk_response|
53
+ log_items_indexed(logger, _bulk_response["items"].length, client) if logger
54
+
55
+ if after_indexed_callback = (@callbacks || {})[:after_indexed]
56
+ _item_slice.zip(_bulk_response["items"]).each do |_item, _item_response|
57
+ after_indexed_callback.call(_item, _item_response)
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
64
+
65
+ private
66
+
67
+ def bulk_item_factory(item)
68
+ [
69
+ { index: { _index: @index, _type: @type, _id: id(item) }.compact },
70
+ item.to_h
71
+ ]
72
+ end
73
+
74
+ def id(item)
75
+ if @id_accessor
76
+ if @id_accessor.respond_to?(:call)
77
+ @id_accessor.call(item)
78
+ else
79
+ item[@id_accessor]
80
+ end
81
+ end
82
+ end
83
+
84
+ def log_items_indexed(logger, amount, client)
85
+ paths = client.transport.hosts.map do |_host|
86
+ "#{_host[:host]}:#{_host[:port]}/#{@index}/#{@type}"
87
+ end
88
+
89
+ logger.info("Indexed #{amount} items to #{paths}")
90
+ end
91
+ end
@@ -0,0 +1,30 @@
1
+ require_relative "../elasticsearch"
2
+
3
+ module Metacrunch::Elasticsearch::OptionsHelpers
4
+ def extract_options!(options, *keys)
5
+ keys = keys
6
+ .map do |_key|
7
+ _key == :_client_options_ ? [:host, :hosts, :url, :urls] : _key
8
+ end
9
+ .flatten
10
+
11
+ options
12
+ .delete_if do |_key, _value|
13
+ if keys.include?(_key)
14
+ instance_variable_set("@#{_key}", _value)
15
+ true # else if _value is falsy, the key does not get deleted
16
+ end
17
+ end
18
+ end
19
+
20
+ def normalize_options!(options)
21
+ {
22
+ index: options[:index],
23
+ body: options.select { |_key, _| _key != :index }
24
+ }
25
+ .tap(&:compact!)
26
+ .try do |_result|
27
+ options.clear.merge!(_result)
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,62 @@
1
+ require "elasticsearch"
2
+ require "metacrunch/processor"
3
+ require_relative "../elasticsearch"
4
+ require_relative "./client_factory"
5
+ require_relative "./options_helpers"
6
+
7
+ class Metacrunch::Elasticsearch::Searcher < Metacrunch::Processor
8
+ include Enumerable
9
+ include Metacrunch::Elasticsearch::ClientFactory
10
+ include Metacrunch::Elasticsearch::OptionsHelpers
11
+
12
+ DEFAULT_BODY = { query: { match_all: {} } }
13
+ DEFAULT_SCAN_SIZE = 200 # per shard
14
+ DEFAULT_SCROLL_EXPIRY_TIME = 10.minutes
15
+
16
+ attr_accessor :bulk_size
17
+ attr_accessor :index
18
+ attr_accessor :scan_size
19
+ attr_accessor :scroll_expiry_time
20
+ attr_accessor :type
21
+
22
+ def initialize(options = {})
23
+ options.deep_symbolize_keys!
24
+ extract_options!(options, :_client_options_, :bulk_size, :index, :scan_size, :scroll_expiry_time, :type)
25
+ @body = options.presence || DEFAULT_BODY
26
+ end
27
+
28
+ def call(items = [], pipeline = nil)
29
+ @docs_enumerator ||= @bulk_size ? each_slice(@bulk_size) : [each.to_a].to_enum
30
+
31
+ begin
32
+ items.concat(@docs_enumerator.next)
33
+ rescue StopIteration
34
+ pipeline.terminate!
35
+ end
36
+ end
37
+
38
+ def each
39
+ return enum_for(__method__) unless block_given?
40
+ client = client_factory
41
+
42
+ search_result = client.search({
43
+ body: @body,
44
+ index: @index,
45
+ scroll: "#{@scroll_expiry_time || DEFAULT_SCROLL_EXPIRY_TIME}s",
46
+ search_type: "scan",
47
+ size: @scan_size || DEFAULT_SCAN_SIZE
48
+ })
49
+
50
+ while (
51
+ search_result = client.scroll(
52
+ scroll: "#{DEFAULT_SCROLL_EXPIRY_TIME}s",
53
+ scroll_id: search_result["_scroll_id"]
54
+ ) and # don't use &&, the semantic of and is important here
55
+ search_result["hits"]["hits"].present?
56
+ ) do
57
+ search_result["hits"]["hits"].each do |_hit|
58
+ yield _hit
59
+ end
60
+ end
61
+ end
62
+ end
@@ -1,5 +1,5 @@
1
1
  module Metacrunch
2
2
  module Elasticsearch
3
- VERSION = "2.0.1"
3
+ VERSION = "2.1.0"
4
4
  end
5
5
  end
@@ -3,8 +3,16 @@ require "elasticsearch"
3
3
 
4
4
  module Metacrunch
5
5
  module Elasticsearch
6
- require_relative "./elasticsearch/uri"
6
+ require_relative "./elasticsearch/index_creator"
7
+ require_relative "./elasticsearch/indexer"
7
8
  require_relative "./elasticsearch/reader"
9
+ require_relative "./elasticsearch/searcher"
10
+ require_relative "./elasticsearch/uri"
8
11
  require_relative "./elasticsearch/writer"
12
+
13
+ #
14
+ # error class are inline to not clutter source files unnecessarily
15
+ #
16
+ class IndexAlreadyExistsError < StandardError; end
9
17
  end
10
18
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metacrunch-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - René Sprotte
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2015-09-25 00:00:00.000000000 Z
12
+ date: 2015-10-15 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: activesupport
@@ -60,12 +60,21 @@ extensions: []
60
60
  extra_rdoc_files: []
61
61
  files:
62
62
  - ".gitignore"
63
+ - ".rspec"
64
+ - ".travis.yml"
63
65
  - Gemfile
64
66
  - License.txt
65
67
  - Rakefile
66
68
  - Readme.md
69
+ - bin/console
70
+ - bin/setup
67
71
  - lib/metacrunch/elasticsearch.rb
72
+ - lib/metacrunch/elasticsearch/client_factory.rb
73
+ - lib/metacrunch/elasticsearch/index_creator.rb
74
+ - lib/metacrunch/elasticsearch/indexer.rb
75
+ - lib/metacrunch/elasticsearch/options_helpers.rb
68
76
  - lib/metacrunch/elasticsearch/reader.rb
77
+ - lib/metacrunch/elasticsearch/searcher.rb
69
78
  - lib/metacrunch/elasticsearch/uri.rb
70
79
  - lib/metacrunch/elasticsearch/version.rb
71
80
  - lib/metacrunch/elasticsearch/writer.rb
@@ -96,4 +105,3 @@ signing_key:
96
105
  specification_version: 4
97
106
  summary: Metacrunch elasticsearch package
98
107
  test_files: []
99
- has_rdoc: