elasticated 1.2.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +123 -1
  3. data/elasticated.gemspec +1 -0
  4. data/lib/elasticated.rb +18 -3
  5. data/lib/elasticated/aggregations/filter_aggregation.rb +13 -6
  6. data/lib/elasticated/aggregations/geohash_grid_aggregation.rb +28 -0
  7. data/lib/elasticated/aggregations/group_aggregation.rb +16 -7
  8. data/lib/elasticated/aggregations/missing_aggregation.rb +34 -0
  9. data/lib/elasticated/aggregations/range_aggregation.rb +14 -5
  10. data/lib/elasticated/aggregations/safe_date_histogram_aggregation.rb +86 -0
  11. data/lib/elasticated/aggregations/single_value_aggregation.rb +7 -1
  12. data/lib/elasticated/aggregations/stats_aggregation.rb +13 -0
  13. data/lib/elasticated/aggregations/subaggregated.rb +10 -1
  14. data/lib/elasticated/aggregations/top_hits_aggregation.rb +2 -2
  15. data/lib/elasticated/aggregations/{count_distinct_aggregation.rb → value_count_aggregation.rb} +1 -1
  16. data/lib/elasticated/configurable.rb +13 -2
  17. data/lib/elasticated/configuration.rb +6 -0
  18. data/lib/elasticated/document.rb +2 -1
  19. data/lib/elasticated/helpers.rb +18 -0
  20. data/lib/elasticated/loggers/default_logger.rb +27 -0
  21. data/lib/elasticated/loggers/silent_logger.rb +27 -0
  22. data/lib/elasticated/query.rb +8 -0
  23. data/lib/elasticated/query_aggregations.rb +3 -4
  24. data/lib/elasticated/repository.rb +31 -30
  25. data/lib/elasticated/repository/intelligent_search.rb +46 -0
  26. data/lib/elasticated/repository/normal_search.rb +40 -0
  27. data/lib/elasticated/repository/resumable_search.rb +58 -0
  28. data/lib/elasticated/repository/scan_scroll_search.rb +43 -0
  29. data/lib/elasticated/repository/scroll_search.rb +45 -0
  30. data/lib/elasticated/repository/search.rb +45 -0
  31. data/lib/elasticated/repository/single_page_search.rb +13 -0
  32. data/lib/elasticated/results.rb +43 -25
  33. data/lib/version.rb +11 -1
  34. data/spec/aggregation_spec.rb +58 -32
  35. data/spec/document_spec.rb +4 -4
  36. data/spec/intelligent_search_spec.rb +88 -0
  37. data/spec/query_spec.rb +2 -2
  38. data/spec/results_spec.rb +9 -9
  39. metadata +38 -5
  40. data/lib/elasticated/aggregations/count_aggregation.rb +0 -15
  41. data/lib/elasticated/default_logger.rb +0 -27
@@ -15,7 +15,13 @@ module Elasticated
15
15
  end
16
16
 
17
17
  def parse(response)
18
- response['value'] || 0
18
+ response[result_key] || 0
19
+ end
20
+
21
+ protected
22
+
23
+ def result_key
24
+ 'value'
19
25
  end
20
26
 
21
27
  end
@@ -0,0 +1,13 @@
1
+ module Elasticated
2
+ class StatsAggregation < Aggregation
3
+
4
+ def build
5
+ { stats: { field: field } }
6
+ end
7
+
8
+ def parse(response)
9
+ response
10
+ end
11
+
12
+ end
13
+ end
@@ -20,7 +20,16 @@ module Elasticated
20
20
  end
21
21
 
22
22
  def parse_subaggregations(response)
23
- _subaggregations.parse response
23
+ if compact and _subaggregations.one?
24
+ single_subaggregation = _subaggregations.first
25
+ single_subaggregation.parse(response[single_subaggregation.name.to_s])
26
+ else
27
+ _subaggregations.parse response
28
+ end
29
+ end
30
+
31
+ def compact
32
+ false
24
33
  end
25
34
 
26
35
  end
@@ -16,9 +16,9 @@ module Elasticated
16
16
  def parse(response)
17
17
  # total = response['hits']['total']
18
18
  # max_score = response['hits']['max_score']
19
- # hits = response['hits']['hits'].map{ |hit| Document.from_elasticsearch_hit hit }
19
+ # hits = response['hits']['hits'].map{ |hit| Document.parse hit }
20
20
  # HitsInfo.new total, max_score, hits
21
- response['hits']['hits'].map{ |hit| Document.from_elasticsearch_hit hit }
21
+ response['hits']['hits'].map{ |hit| Document.parse hit }
22
22
  end
23
23
 
24
24
  end
@@ -1,5 +1,5 @@
1
1
  module Elasticated
2
- class CountDistinctAggregation < SingleValueAggregation
2
+ class ValueCountAggregation < SingleValueAggregation
3
3
 
4
4
  # override
5
5
  def default_name
@@ -1,9 +1,20 @@
1
1
  module Elasticated
2
2
  module Configurable
3
3
 
4
- def log
5
- Configuration.logger ||= DefaultLogger.new
4
+ def self.delegated(attr_name)
5
+ define_method attr_name do
6
+ instance_variable_get("@#{attr_name}") || Configuration.send(attr_name)
7
+ end
8
+ define_method "#{attr_name}=" do |value|
9
+ instance_variable_set "@#{attr_name}", value
10
+ end
6
11
  end
7
12
 
13
+ delegated :logger
14
+ alias_method :log, :logger
15
+ delegated :scroll_expiration_time
16
+ delegated :scroll_page_size
17
+ delegated :search_page_size
18
+
8
19
  end
9
20
  end
@@ -6,5 +6,11 @@ module Elasticated
6
6
  attr_accessor :scroll_expiration_time, :scroll_page_size, :search_page_size
7
7
  end
8
8
 
9
+ # defaults
10
+ self.logger = Loggers::SilentLogger.new
11
+ self.scroll_expiration_time = '3m'
12
+ self.scroll_page_size = 500
13
+ self.search_page_size = 1000
14
+
9
15
  end
10
16
  end
@@ -14,7 +14,7 @@ module Elasticated
14
14
  ret
15
15
  end
16
16
 
17
- def from_elasticsearch_hit(hit)
17
+ def parse(hit)
18
18
  document = new hit['_source']
19
19
  document.id = hit['_id']
20
20
  document.type = hit['_type']
@@ -23,6 +23,7 @@ module Elasticated
23
23
  document.version = hit['_version']
24
24
  document
25
25
  end
26
+ alias_method :from_elasticsearch_hit, :parse
26
27
 
27
28
  end
28
29
 
@@ -24,5 +24,23 @@ module Elasticated
24
24
  duplicate
25
25
  end
26
26
 
27
+ def self.unscoped_class_name(klass)
28
+ klass.name.gsub /^.*\:\:/, ''
29
+ end
30
+
31
+ def self.hash_sum(*hashes)
32
+ hashes = hashes.compact
33
+ return 0 if hashes.empty?
34
+ hashes.map(&:keys).reduce(:|).each_with_object({}) do |key, result|
35
+ result[key] =
36
+ if hashes.any? { |h| h[key].is_a?(Hash) }
37
+ new_args = hashes.map { |h| h[key] }
38
+ hash_sum(*new_args)
39
+ else
40
+ hashes.map { |h| h.fetch(key, 0) }.reduce(:+)
41
+ end
42
+ end
43
+ end
44
+
27
45
  end
28
46
  end
@@ -0,0 +1,27 @@
1
+ module Elasticated
2
+ module Loggers
3
+ class DefaultLogger
4
+
5
+ def trace(message)
6
+ puts "TRACE #{message}"
7
+ end
8
+
9
+ def debug(message)
10
+ puts "DEBUG #{message}"
11
+ end
12
+
13
+ def info(message)
14
+ puts "INFO #{message}"
15
+ end
16
+
17
+ def warn(message)
18
+ puts "WARN #{message}"
19
+ end
20
+
21
+ def error(message)
22
+ puts "ERROR #{message}"
23
+ end
24
+
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,27 @@
1
+ module Elasticated
2
+ module Loggers
3
+ class SilentLogger
4
+
5
+ def trace(message)
6
+ # nothing to do
7
+ end
8
+
9
+ def debug(message)
10
+ # nothing to do
11
+ end
12
+
13
+ def info(message)
14
+ # nothing to do
15
+ end
16
+
17
+ def warn(message)
18
+ # nothing to do
19
+ end
20
+
21
+ def error(message)
22
+ # nothing to do
23
+ end
24
+
25
+ end
26
+ end
27
+ end
@@ -76,6 +76,14 @@ module Elasticated
76
76
  !!_size
77
77
  end
78
78
 
79
+ def heavy_for?(repository)
80
+ !_size || _size > repository.search_page_size
81
+ end
82
+
83
+ def has_offset?
84
+ _from && _from > 0
85
+ end
86
+
79
87
  def sorted?
80
88
  !!_sort
81
89
  end
@@ -1,6 +1,9 @@
1
1
  module Elasticated
2
2
  class QueryAggregations
3
3
 
4
+ extend Forwardable
5
+ def_delegators :@_aggregations, :first, :one?, :empty?
6
+
4
7
  include Clonable
5
8
  include BlockEvaluation
6
9
 
@@ -30,10 +33,6 @@ module Elasticated
30
33
  end
31
34
  end
32
35
 
33
- def empty?
34
- _aggregations.empty?
35
- end
36
-
37
36
  private
38
37
 
39
38
  def get_aggregation_class(agg_name)
@@ -1,5 +1,6 @@
1
1
  module Elasticated
2
2
  class Repository
3
+ include Configurable
3
4
 
4
5
  # child can implement 'execute(action, query, opts)'
5
6
  # child can implement 'prepare(action, document, opts)'
@@ -42,6 +43,14 @@ module Elasticated
42
43
  prepare :update, document, opts
43
44
  end
44
45
 
46
+ def prepare_search(query, opts={})
47
+ execute :prepare_search, query, opts
48
+ end
49
+
50
+ def restore_search(scroll_id)
51
+ ResumableSearch.from_scroll_id self, scroll_id
52
+ end
53
+
45
54
  protected
46
55
 
47
56
  # write actions
@@ -90,38 +99,30 @@ module Elasticated
90
99
  end
91
100
 
92
101
  def _exec_paginated_search(query, aggregated, opts={})
93
- body = aggregated ? query.build_for_aggregated_search : query.build_for_search
94
- if query.limited?
95
- response = client.search body, opts
96
- Results.from_elasticsearch_response response, query
97
- elsif query.sorted? || query.aggregated?
98
- # normal pagination
99
- page_size = Configuration.scroll_page_size || 50
100
- current_page = 1
101
- loop do
102
- offset = page_size * (current_page - 1)
103
- response = client.search body, opts.merge(size: page_size, from: offset)
104
- if current_page == 1
105
- results = Results.from_elasticsearch_response response, query
106
- else
107
- results.append_results_from response
108
- end
109
- total_pages = (response['hits']['total'] / page_size.to_f).ceil
110
- break if current_page >= total_pages
111
- current_page += 1
112
- body = query.build_for_search if aggregated
113
- end
114
- results
102
+ raise "The 'size' param is not allowed in opts, it must be setted in the query itself" if opts[:size]
103
+ raise "The 'from' param is not allowed in opts, it must be setted in the query itself" if opts[:from]
104
+ search_method = opts.delete :as
105
+ search = case search_method
106
+ when nil
107
+ IntelligentSearch.new self, query, aggregated, opts
108
+ when :single_page
109
+ SinglePageSearch.new self, query, aggregated, opts
110
+ when :normal
111
+ NormalSearch.new self, query, aggregated, opts
112
+ when :scroll
113
+ ScrollSearch.new self, query, aggregated, opts
114
+ when :scan_and_scroll
115
+ ScanScrollSearch.new self, query, aggregated, opts
115
116
  else
116
- # scan & scroll
117
- scroll_expiration_time = Configuration.scroll_expiration_time || '3m'
118
- page_size = Configuration.scroll_page_size || 1000
119
- response = client.search body, opts.merge(search_type: 'scan', scroll: scroll_expiration_time, size: page_size)
120
- while response = client.scroll(response['_scroll_id'], scroll: scroll_expiration_time) and not response['hits']['hits'].empty? do
121
- results = results ? results.append_results_from(response) : Results.from_elasticsearch_response(response, query)
122
- end
123
- results ? results : Results.from_elasticsearch_response(response, query)
117
+ raise "Invalid search method: '#{search_method}'"
124
118
  end
119
+ search.execute
120
+ end
121
+
122
+ def _exec_prepare_search(query, opts={})
123
+ raise "The query for a resumable search should not have the parameter 'size'" if query.limited? || opts[:size]
124
+ raise "The query for a resumable search should not have the parameter 'from'" if query.has_offset? || opts[:from]
125
+ ResumableSearch.new self, query, false, opts
125
126
  end
126
127
 
127
128
  # abstract methods
@@ -0,0 +1,46 @@
1
+ module Elasticated
2
+ class Repository
3
+ class IntelligentSearch < Search
4
+ include Configurable
5
+
6
+ # INTELLIGENT SEARCH CASES
7
+ # ------------ without sorting
8
+ # without size, without offset => scan_scroll
9
+ # with size, without offset => single_page or scan_scroll
10
+ # without size, with offset => normal_pagination # TODO analizar si se puede usar scroll con offset
11
+ # with size, with offset => single_page or normal_pagination # TODO analizar si se puede usar scroll con offset
12
+ # ------------ with sorting
13
+ # without size, without offset => scroll
14
+ # with size, without offset => single_page or scroll
15
+ # without size, with offset => normal_pagination # TODO analizar si se puede usar scroll con offset
16
+ # with size, with offset => single_page or normal_pagination # TODO analizar si se puede usar scroll con offset
17
+
18
+ # if the query is aggregated and the search strategy is use an
19
+ # scroll-like alternative, we must do a 2-step search process
20
+
21
+ def best_search_method
22
+ if query.limited? && !query.heavy_for?(repository)
23
+ SinglePageSearch.new repository, query, aggregated, opts
24
+ else
25
+ if query.has_offset?
26
+ NormalSearch.new repository, query, aggregated, opts
27
+ else
28
+ if query.sorted?
29
+ ScrollSearch.new repository, query, aggregated, opts
30
+ else
31
+ ScanScrollSearch.new repository, query, aggregated, opts
32
+ end
33
+ end
34
+ end
35
+ end
36
+
37
+ def execute
38
+ search_method = best_search_method
39
+ search_method_name = Helpers.unscoped_class_name search_method.class
40
+ log.debug "#{search_method_name} selected as best search method for query #{query.build.to_json}"
41
+ search_method.execute
42
+ end
43
+
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,40 @@
1
+ module Elasticated
2
+ class Repository
3
+ class NormalSearch < Search
4
+
5
+ def execute
6
+ body = aggregated ? query.build_for_aggregated_search : query.build_for_search
7
+
8
+ initial_offset = offset = query._from # TODO do it nicer
9
+ size = repository.search_page_size
10
+
11
+ override! body, size, offset
12
+ response = client.search body, opts
13
+ results = Results.parse response, query
14
+
15
+ target_size = query.limited? ? query._size : (results.hits.total - offset)
16
+ total_pages = (target_size / size.to_f).ceil
17
+
18
+ current_page = 1
19
+ body = query.build_for_search
20
+
21
+ loop do
22
+ break if current_page >= total_pages
23
+ offset = initial_offset + size * current_page
24
+ size = target_size - offset + initial_offset if query.limited? && offset - initial_offset + size > target_size
25
+ override! body, size, offset
26
+
27
+ response = client.search body, opts
28
+ new_results = Results.parse response
29
+ results.append new_results
30
+ break if new_results.count < size
31
+
32
+ current_page += 1
33
+ end
34
+
35
+ results
36
+ end
37
+
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,58 @@
1
+ module Elasticated
2
+ class Repository
3
+ class ResumableSearch < Search
4
+
5
+ def self.from_scroll_id(client, scroll_id)
6
+ search = new client, nil
7
+ search.scroll_id = scroll_id
8
+ search
9
+ end
10
+
11
+ attr_accessor :scroll_id, :completed
12
+
13
+ def start
14
+ # aggregations
15
+ aggregation_results = if aggregated
16
+ body = query.build_for_aggregations
17
+ response = client.search body, opts
18
+ Results.parse response, query
19
+ end
20
+ # search
21
+ body = query.build_for_search
22
+ response = client.search body, opts.merge(scroll: scroll_expiration_time, size: scroll_page_size)
23
+ results = Results.parse response
24
+ results.append aggregation_results if aggregation_results
25
+ self.scroll_id = results.scroll_id
26
+ mark_completed! if results.documents.count < scroll_page_size
27
+ results
28
+ end
29
+
30
+ def fetch
31
+ raise "No scroll_id present" unless scroll_id
32
+ raise "No more information to fetch: scroll completed" if completed?
33
+ response = client.scroll scroll_id, scroll: scroll_expiration_time
34
+ results = Results.parse response
35
+ self.scroll_id = results.scroll_id
36
+ mark_completed! if results.documents.empty?
37
+ results
38
+ end
39
+
40
+ def completed?
41
+ !!completed
42
+ end
43
+
44
+ def execute
45
+ results = start
46
+ results.append fetch while !completed?
47
+ results
48
+ end
49
+
50
+ protected
51
+
52
+ def mark_completed!
53
+ self.completed = true
54
+ end
55
+
56
+ end
57
+ end
58
+ end