elasticated 1.2.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +123 -1
  3. data/elasticated.gemspec +1 -0
  4. data/lib/elasticated.rb +18 -3
  5. data/lib/elasticated/aggregations/filter_aggregation.rb +13 -6
  6. data/lib/elasticated/aggregations/geohash_grid_aggregation.rb +28 -0
  7. data/lib/elasticated/aggregations/group_aggregation.rb +16 -7
  8. data/lib/elasticated/aggregations/missing_aggregation.rb +34 -0
  9. data/lib/elasticated/aggregations/range_aggregation.rb +14 -5
  10. data/lib/elasticated/aggregations/safe_date_histogram_aggregation.rb +86 -0
  11. data/lib/elasticated/aggregations/single_value_aggregation.rb +7 -1
  12. data/lib/elasticated/aggregations/stats_aggregation.rb +13 -0
  13. data/lib/elasticated/aggregations/subaggregated.rb +10 -1
  14. data/lib/elasticated/aggregations/top_hits_aggregation.rb +2 -2
  15. data/lib/elasticated/aggregations/{count_distinct_aggregation.rb → value_count_aggregation.rb} +1 -1
  16. data/lib/elasticated/configurable.rb +13 -2
  17. data/lib/elasticated/configuration.rb +6 -0
  18. data/lib/elasticated/document.rb +2 -1
  19. data/lib/elasticated/helpers.rb +18 -0
  20. data/lib/elasticated/loggers/default_logger.rb +27 -0
  21. data/lib/elasticated/loggers/silent_logger.rb +27 -0
  22. data/lib/elasticated/query.rb +8 -0
  23. data/lib/elasticated/query_aggregations.rb +3 -4
  24. data/lib/elasticated/repository.rb +31 -30
  25. data/lib/elasticated/repository/intelligent_search.rb +46 -0
  26. data/lib/elasticated/repository/normal_search.rb +40 -0
  27. data/lib/elasticated/repository/resumable_search.rb +58 -0
  28. data/lib/elasticated/repository/scan_scroll_search.rb +43 -0
  29. data/lib/elasticated/repository/scroll_search.rb +45 -0
  30. data/lib/elasticated/repository/search.rb +45 -0
  31. data/lib/elasticated/repository/single_page_search.rb +13 -0
  32. data/lib/elasticated/results.rb +43 -25
  33. data/lib/version.rb +11 -1
  34. data/spec/aggregation_spec.rb +58 -32
  35. data/spec/document_spec.rb +4 -4
  36. data/spec/intelligent_search_spec.rb +88 -0
  37. data/spec/query_spec.rb +2 -2
  38. data/spec/results_spec.rb +9 -9
  39. metadata +38 -5
  40. data/lib/elasticated/aggregations/count_aggregation.rb +0 -15
  41. data/lib/elasticated/default_logger.rb +0 -27
@@ -15,7 +15,13 @@ module Elasticated
15
15
  end
16
16
 
17
17
  def parse(response)
18
- response['value'] || 0
18
+ response[result_key] || 0
19
+ end
20
+
21
+ protected
22
+
23
+ def result_key
24
+ 'value'
19
25
  end
20
26
 
21
27
  end
@@ -0,0 +1,13 @@
1
+ module Elasticated
2
+ class StatsAggregation < Aggregation
3
+
4
+ def build
5
+ { stats: { field: field } }
6
+ end
7
+
8
+ def parse(response)
9
+ response
10
+ end
11
+
12
+ end
13
+ end
@@ -20,7 +20,16 @@ module Elasticated
20
20
  end
21
21
 
22
22
  def parse_subaggregations(response)
23
- _subaggregations.parse response
23
+ if compact and _subaggregations.one?
24
+ single_subaggregation = _subaggregations.first
25
+ single_subaggregation.parse(response[single_subaggregation.name.to_s])
26
+ else
27
+ _subaggregations.parse response
28
+ end
29
+ end
30
+
31
+ def compact
32
+ false
24
33
  end
25
34
 
26
35
  end
@@ -16,9 +16,9 @@ module Elasticated
16
16
  def parse(response)
17
17
  # total = response['hits']['total']
18
18
  # max_score = response['hits']['max_score']
19
- # hits = response['hits']['hits'].map{ |hit| Document.from_elasticsearch_hit hit }
19
+ # hits = response['hits']['hits'].map{ |hit| Document.parse hit }
20
20
  # HitsInfo.new total, max_score, hits
21
- response['hits']['hits'].map{ |hit| Document.from_elasticsearch_hit hit }
21
+ response['hits']['hits'].map{ |hit| Document.parse hit }
22
22
  end
23
23
 
24
24
  end
@@ -1,5 +1,5 @@
1
1
  module Elasticated
2
- class CountDistinctAggregation < SingleValueAggregation
2
+ class ValueCountAggregation < SingleValueAggregation
3
3
 
4
4
  # override
5
5
  def default_name
@@ -1,9 +1,20 @@
1
1
  module Elasticated
2
2
  module Configurable
3
3
 
4
- def log
5
- Configuration.logger ||= DefaultLogger.new
4
+ def self.delegated(attr_name)
5
+ define_method attr_name do
6
+ instance_variable_get("@#{attr_name}") || Configuration.send(attr_name)
7
+ end
8
+ define_method "#{attr_name}=" do |value|
9
+ instance_variable_set "@#{attr_name}", value
10
+ end
6
11
  end
7
12
 
13
+ delegated :logger
14
+ alias_method :log, :logger
15
+ delegated :scroll_expiration_time
16
+ delegated :scroll_page_size
17
+ delegated :search_page_size
18
+
8
19
  end
9
20
  end
@@ -6,5 +6,11 @@ module Elasticated
6
6
  attr_accessor :scroll_expiration_time, :scroll_page_size, :search_page_size
7
7
  end
8
8
 
9
+ # defaults
10
+ self.logger = Loggers::SilentLogger.new
11
+ self.scroll_expiration_time = '3m'
12
+ self.scroll_page_size = 500
13
+ self.search_page_size = 1000
14
+
9
15
  end
10
16
  end
@@ -14,7 +14,7 @@ module Elasticated
14
14
  ret
15
15
  end
16
16
 
17
- def from_elasticsearch_hit(hit)
17
+ def parse(hit)
18
18
  document = new hit['_source']
19
19
  document.id = hit['_id']
20
20
  document.type = hit['_type']
@@ -23,6 +23,7 @@ module Elasticated
23
23
  document.version = hit['_version']
24
24
  document
25
25
  end
26
+ alias_method :from_elasticsearch_hit, :parse
26
27
 
27
28
  end
28
29
 
@@ -24,5 +24,23 @@ module Elasticated
24
24
  duplicate
25
25
  end
26
26
 
27
+ def self.unscoped_class_name(klass)
28
+ klass.name.gsub /^.*\:\:/, ''
29
+ end
30
+
31
+ def self.hash_sum(*hashes)
32
+ hashes = hashes.compact
33
+ return 0 if hashes.empty?
34
+ hashes.map(&:keys).reduce(:|).each_with_object({}) do |key, result|
35
+ result[key] =
36
+ if hashes.any? { |h| h[key].is_a?(Hash) }
37
+ new_args = hashes.map { |h| h[key] }
38
+ hash_sum(*new_args)
39
+ else
40
+ hashes.map { |h| h.fetch(key, 0) }.reduce(:+)
41
+ end
42
+ end
43
+ end
44
+
27
45
  end
28
46
  end
@@ -0,0 +1,27 @@
1
+ module Elasticated
2
+ module Loggers
3
+ class DefaultLogger
4
+
5
+ def trace(message)
6
+ puts "TRACE #{message}"
7
+ end
8
+
9
+ def debug(message)
10
+ puts "DEBUG #{message}"
11
+ end
12
+
13
+ def info(message)
14
+ puts "INFO #{message}"
15
+ end
16
+
17
+ def warn(message)
18
+ puts "WARN #{message}"
19
+ end
20
+
21
+ def error(message)
22
+ puts "ERROR #{message}"
23
+ end
24
+
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,27 @@
1
+ module Elasticated
2
+ module Loggers
3
+ class SilentLogger
4
+
5
+ def trace(message)
6
+ # nothing to do
7
+ end
8
+
9
+ def debug(message)
10
+ # nothing to do
11
+ end
12
+
13
+ def info(message)
14
+ # nothing to do
15
+ end
16
+
17
+ def warn(message)
18
+ # nothing to do
19
+ end
20
+
21
+ def error(message)
22
+ # nothing to do
23
+ end
24
+
25
+ end
26
+ end
27
+ end
@@ -76,6 +76,14 @@ module Elasticated
76
76
  !!_size
77
77
  end
78
78
 
79
+ def heavy_for?(repository)
80
+ !_size || _size > repository.search_page_size
81
+ end
82
+
83
+ def has_offset?
84
+ _from && _from > 0
85
+ end
86
+
79
87
  def sorted?
80
88
  !!_sort
81
89
  end
@@ -1,6 +1,9 @@
1
1
  module Elasticated
2
2
  class QueryAggregations
3
3
 
4
+ extend Forwardable
5
+ def_delegators :@_aggregations, :first, :one?, :empty?
6
+
4
7
  include Clonable
5
8
  include BlockEvaluation
6
9
 
@@ -30,10 +33,6 @@ module Elasticated
30
33
  end
31
34
  end
32
35
 
33
- def empty?
34
- _aggregations.empty?
35
- end
36
-
37
36
  private
38
37
 
39
38
  def get_aggregation_class(agg_name)
@@ -1,5 +1,6 @@
1
1
  module Elasticated
2
2
  class Repository
3
+ include Configurable
3
4
 
4
5
  # child can implement 'execute(action, query, opts)'
5
6
  # child can implement 'prepare(action, document, opts)'
@@ -42,6 +43,14 @@ module Elasticated
42
43
  prepare :update, document, opts
43
44
  end
44
45
 
46
+ def prepare_search(query, opts={})
47
+ execute :prepare_search, query, opts
48
+ end
49
+
50
+ def restore_search(scroll_id)
51
+ ResumableSearch.from_scroll_id self, scroll_id
52
+ end
53
+
45
54
  protected
46
55
 
47
56
  # write actions
@@ -90,38 +99,30 @@ module Elasticated
90
99
  end
91
100
 
92
101
  def _exec_paginated_search(query, aggregated, opts={})
93
- body = aggregated ? query.build_for_aggregated_search : query.build_for_search
94
- if query.limited?
95
- response = client.search body, opts
96
- Results.from_elasticsearch_response response, query
97
- elsif query.sorted? || query.aggregated?
98
- # normal pagination
99
- page_size = Configuration.scroll_page_size || 50
100
- current_page = 1
101
- loop do
102
- offset = page_size * (current_page - 1)
103
- response = client.search body, opts.merge(size: page_size, from: offset)
104
- if current_page == 1
105
- results = Results.from_elasticsearch_response response, query
106
- else
107
- results.append_results_from response
108
- end
109
- total_pages = (response['hits']['total'] / page_size.to_f).ceil
110
- break if current_page >= total_pages
111
- current_page += 1
112
- body = query.build_for_search if aggregated
113
- end
114
- results
102
+ raise "The 'size' param is not allowed in opts, it must be setted in the query itself" if opts[:size]
103
+ raise "The 'from' param is not allowed in opts, it must be setted in the query itself" if opts[:from]
104
+ search_method = opts.delete :as
105
+ search = case search_method
106
+ when nil
107
+ IntelligentSearch.new self, query, aggregated, opts
108
+ when :single_page
109
+ SinglePageSearch.new self, query, aggregated, opts
110
+ when :normal
111
+ NormalSearch.new self, query, aggregated, opts
112
+ when :scroll
113
+ ScrollSearch.new self, query, aggregated, opts
114
+ when :scan_and_scroll
115
+ ScanScrollSearch.new self, query, aggregated, opts
115
116
  else
116
- # scan & scroll
117
- scroll_expiration_time = Configuration.scroll_expiration_time || '3m'
118
- page_size = Configuration.scroll_page_size || 1000
119
- response = client.search body, opts.merge(search_type: 'scan', scroll: scroll_expiration_time, size: page_size)
120
- while response = client.scroll(response['_scroll_id'], scroll: scroll_expiration_time) and not response['hits']['hits'].empty? do
121
- results = results ? results.append_results_from(response) : Results.from_elasticsearch_response(response, query)
122
- end
123
- results ? results : Results.from_elasticsearch_response(response, query)
117
+ raise "Invalid search method: '#{search_method}'"
124
118
  end
119
+ search.execute
120
+ end
121
+
122
+ def _exec_prepare_search(query, opts={})
123
+ raise "The query for a resumable search should not have the parameter 'size'" if query.limited? || opts[:size]
124
+ raise "The query for a resumable search should not have the parameter 'from'" if query.has_offset? || opts[:from]
125
+ ResumableSearch.new self, query, false, opts
125
126
  end
126
127
 
127
128
  # abstract methods
@@ -0,0 +1,46 @@
1
+ module Elasticated
2
+ class Repository
3
+ class IntelligentSearch < Search
4
+ include Configurable
5
+
6
+ # INTELLIGENT SEARCH CASES
7
+ # ------------ without sorting
8
+ # without size, without offset => scan_scroll
9
+ # with size, without offset => single_page or scan_scroll
10
+ # without size, with offset => normal_pagination # TODO analizar si se puede usar scroll con offset
11
+ # with size, with offset => single_page or normal_pagination # TODO analizar si se puede usar scroll con offset
12
+ # ------------ with sorting
13
+ # without size, without offset => scroll
14
+ # with size, without offset => single_page or scroll
15
+ # without size, with offset => normal_pagination # TODO analizar si se puede usar scroll con offset
16
+ # with size, with offset => single_page or normal_pagination # TODO analizar si se puede usar scroll con offset
17
+
18
+ # if the query is aggregated and the search strategy is use an
19
+ # scroll-like alternative, we must do a 2-step search process
20
+
21
+ def best_search_method
22
+ if query.limited? && !query.heavy_for?(repository)
23
+ SinglePageSearch.new repository, query, aggregated, opts
24
+ else
25
+ if query.has_offset?
26
+ NormalSearch.new repository, query, aggregated, opts
27
+ else
28
+ if query.sorted?
29
+ ScrollSearch.new repository, query, aggregated, opts
30
+ else
31
+ ScanScrollSearch.new repository, query, aggregated, opts
32
+ end
33
+ end
34
+ end
35
+ end
36
+
37
+ def execute
38
+ search_method = best_search_method
39
+ search_method_name = Helpers.unscoped_class_name search_method.class
40
+ log.debug "#{search_method_name} selected as best search method for query #{query.build.to_json}"
41
+ search_method.execute
42
+ end
43
+
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,40 @@
1
+ module Elasticated
2
+ class Repository
3
+ class NormalSearch < Search
4
+
5
+ def execute
6
+ body = aggregated ? query.build_for_aggregated_search : query.build_for_search
7
+
8
+ initial_offset = offset = query._from # TODO do it nicer
9
+ size = repository.search_page_size
10
+
11
+ override! body, size, offset
12
+ response = client.search body, opts
13
+ results = Results.parse response, query
14
+
15
+ target_size = query.limited? ? query._size : (results.hits.total - offset)
16
+ total_pages = (target_size / size.to_f).ceil
17
+
18
+ current_page = 1
19
+ body = query.build_for_search
20
+
21
+ loop do
22
+ break if current_page >= total_pages
23
+ offset = initial_offset + size * current_page
24
+ size = target_size - offset + initial_offset if query.limited? && offset - initial_offset + size > target_size
25
+ override! body, size, offset
26
+
27
+ response = client.search body, opts
28
+ new_results = Results.parse response
29
+ results.append new_results
30
+ break if new_results.count < size
31
+
32
+ current_page += 1
33
+ end
34
+
35
+ results
36
+ end
37
+
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,58 @@
1
+ module Elasticated
2
+ class Repository
3
+ class ResumableSearch < Search
4
+
5
+ def self.from_scroll_id(client, scroll_id)
6
+ search = new client, nil
7
+ search.scroll_id = scroll_id
8
+ search
9
+ end
10
+
11
+ attr_accessor :scroll_id, :completed
12
+
13
+ def start
14
+ # aggregations
15
+ aggregation_results = if aggregated
16
+ body = query.build_for_aggregations
17
+ response = client.search body, opts
18
+ Results.parse response, query
19
+ end
20
+ # search
21
+ body = query.build_for_search
22
+ response = client.search body, opts.merge(scroll: scroll_expiration_time, size: scroll_page_size)
23
+ results = Results.parse response
24
+ results.append aggregation_results if aggregation_results
25
+ self.scroll_id = results.scroll_id
26
+ mark_completed! if results.documents.count < scroll_page_size
27
+ results
28
+ end
29
+
30
+ def fetch
31
+ raise "No scroll_id present" unless scroll_id
32
+ raise "No more information to fetch: scroll completed" if completed?
33
+ response = client.scroll scroll_id, scroll: scroll_expiration_time
34
+ results = Results.parse response
35
+ self.scroll_id = results.scroll_id
36
+ mark_completed! if results.documents.empty?
37
+ results
38
+ end
39
+
40
+ def completed?
41
+ !!completed
42
+ end
43
+
44
+ def execute
45
+ results = start
46
+ results.append fetch while !completed?
47
+ results
48
+ end
49
+
50
+ protected
51
+
52
+ def mark_completed!
53
+ self.completed = true
54
+ end
55
+
56
+ end
57
+ end
58
+ end