elasticated 1.2.1 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +123 -1
- data/elasticated.gemspec +1 -0
- data/lib/elasticated.rb +18 -3
- data/lib/elasticated/aggregations/filter_aggregation.rb +13 -6
- data/lib/elasticated/aggregations/geohash_grid_aggregation.rb +28 -0
- data/lib/elasticated/aggregations/group_aggregation.rb +16 -7
- data/lib/elasticated/aggregations/missing_aggregation.rb +34 -0
- data/lib/elasticated/aggregations/range_aggregation.rb +14 -5
- data/lib/elasticated/aggregations/safe_date_histogram_aggregation.rb +86 -0
- data/lib/elasticated/aggregations/single_value_aggregation.rb +7 -1
- data/lib/elasticated/aggregations/stats_aggregation.rb +13 -0
- data/lib/elasticated/aggregations/subaggregated.rb +10 -1
- data/lib/elasticated/aggregations/top_hits_aggregation.rb +2 -2
- data/lib/elasticated/aggregations/{count_distinct_aggregation.rb → value_count_aggregation.rb} +1 -1
- data/lib/elasticated/configurable.rb +13 -2
- data/lib/elasticated/configuration.rb +6 -0
- data/lib/elasticated/document.rb +2 -1
- data/lib/elasticated/helpers.rb +18 -0
- data/lib/elasticated/loggers/default_logger.rb +27 -0
- data/lib/elasticated/loggers/silent_logger.rb +27 -0
- data/lib/elasticated/query.rb +8 -0
- data/lib/elasticated/query_aggregations.rb +3 -4
- data/lib/elasticated/repository.rb +31 -30
- data/lib/elasticated/repository/intelligent_search.rb +46 -0
- data/lib/elasticated/repository/normal_search.rb +40 -0
- data/lib/elasticated/repository/resumable_search.rb +58 -0
- data/lib/elasticated/repository/scan_scroll_search.rb +43 -0
- data/lib/elasticated/repository/scroll_search.rb +45 -0
- data/lib/elasticated/repository/search.rb +45 -0
- data/lib/elasticated/repository/single_page_search.rb +13 -0
- data/lib/elasticated/results.rb +43 -25
- data/lib/version.rb +11 -1
- data/spec/aggregation_spec.rb +58 -32
- data/spec/document_spec.rb +4 -4
- data/spec/intelligent_search_spec.rb +88 -0
- data/spec/query_spec.rb +2 -2
- data/spec/results_spec.rb +9 -9
- metadata +38 -5
- data/lib/elasticated/aggregations/count_aggregation.rb +0 -15
- data/lib/elasticated/default_logger.rb +0 -27
@@ -20,7 +20,16 @@ module Elasticated
|
|
20
20
|
end
|
21
21
|
|
22
22
|
def parse_subaggregations(response)
|
23
|
-
_subaggregations.
|
23
|
+
if compact and _subaggregations.one?
|
24
|
+
single_subaggregation = _subaggregations.first
|
25
|
+
single_subaggregation.parse(response[single_subaggregation.name.to_s])
|
26
|
+
else
|
27
|
+
_subaggregations.parse response
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def compact
|
32
|
+
false
|
24
33
|
end
|
25
34
|
|
26
35
|
end
|
@@ -16,9 +16,9 @@ module Elasticated
|
|
16
16
|
def parse(response)
|
17
17
|
# total = response['hits']['total']
|
18
18
|
# max_score = response['hits']['max_score']
|
19
|
-
# hits = response['hits']['hits'].map{ |hit| Document.
|
19
|
+
# hits = response['hits']['hits'].map{ |hit| Document.parse hit }
|
20
20
|
# HitsInfo.new total, max_score, hits
|
21
|
-
response['hits']['hits'].map{ |hit| Document.
|
21
|
+
response['hits']['hits'].map{ |hit| Document.parse hit }
|
22
22
|
end
|
23
23
|
|
24
24
|
end
|
@@ -1,9 +1,20 @@
|
|
1
1
|
module Elasticated
|
2
2
|
module Configurable
|
3
3
|
|
4
|
-
def
|
5
|
-
|
4
|
+
def self.delegated(attr_name)
|
5
|
+
define_method attr_name do
|
6
|
+
instance_variable_get("@#{attr_name}") || Configuration.send(attr_name)
|
7
|
+
end
|
8
|
+
define_method "#{attr_name}=" do |value|
|
9
|
+
instance_variable_set "@#{attr_name}", value
|
10
|
+
end
|
6
11
|
end
|
7
12
|
|
13
|
+
delegated :logger
|
14
|
+
alias_method :log, :logger
|
15
|
+
delegated :scroll_expiration_time
|
16
|
+
delegated :scroll_page_size
|
17
|
+
delegated :search_page_size
|
18
|
+
|
8
19
|
end
|
9
20
|
end
|
@@ -6,5 +6,11 @@ module Elasticated
|
|
6
6
|
attr_accessor :scroll_expiration_time, :scroll_page_size, :search_page_size
|
7
7
|
end
|
8
8
|
|
9
|
+
# defaults
|
10
|
+
self.logger = Loggers::SilentLogger.new
|
11
|
+
self.scroll_expiration_time = '3m'
|
12
|
+
self.scroll_page_size = 500
|
13
|
+
self.search_page_size = 1000
|
14
|
+
|
9
15
|
end
|
10
16
|
end
|
data/lib/elasticated/document.rb
CHANGED
@@ -14,7 +14,7 @@ module Elasticated
|
|
14
14
|
ret
|
15
15
|
end
|
16
16
|
|
17
|
-
def
|
17
|
+
def parse(hit)
|
18
18
|
document = new hit['_source']
|
19
19
|
document.id = hit['_id']
|
20
20
|
document.type = hit['_type']
|
@@ -23,6 +23,7 @@ module Elasticated
|
|
23
23
|
document.version = hit['_version']
|
24
24
|
document
|
25
25
|
end
|
26
|
+
alias_method :from_elasticsearch_hit, :parse
|
26
27
|
|
27
28
|
end
|
28
29
|
|
data/lib/elasticated/helpers.rb
CHANGED
@@ -24,5 +24,23 @@ module Elasticated
|
|
24
24
|
duplicate
|
25
25
|
end
|
26
26
|
|
27
|
+
def self.unscoped_class_name(klass)
|
28
|
+
klass.name.gsub /^.*\:\:/, ''
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.hash_sum(*hashes)
|
32
|
+
hashes = hashes.compact
|
33
|
+
return 0 if hashes.empty?
|
34
|
+
hashes.map(&:keys).reduce(:|).each_with_object({}) do |key, result|
|
35
|
+
result[key] =
|
36
|
+
if hashes.any? { |h| h[key].is_a?(Hash) }
|
37
|
+
new_args = hashes.map { |h| h[key] }
|
38
|
+
hash_sum(*new_args)
|
39
|
+
else
|
40
|
+
hashes.map { |h| h.fetch(key, 0) }.reduce(:+)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
27
45
|
end
|
28
46
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Elasticated
|
2
|
+
module Loggers
|
3
|
+
class DefaultLogger
|
4
|
+
|
5
|
+
def trace(message)
|
6
|
+
puts "TRACE #{message}"
|
7
|
+
end
|
8
|
+
|
9
|
+
def debug(message)
|
10
|
+
puts "DEBUG #{message}"
|
11
|
+
end
|
12
|
+
|
13
|
+
def info(message)
|
14
|
+
puts "INFO #{message}"
|
15
|
+
end
|
16
|
+
|
17
|
+
def warn(message)
|
18
|
+
puts "WARN #{message}"
|
19
|
+
end
|
20
|
+
|
21
|
+
def error(message)
|
22
|
+
puts "ERROR #{message}"
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Elasticated
|
2
|
+
module Loggers
|
3
|
+
class SilentLogger
|
4
|
+
|
5
|
+
def trace(message)
|
6
|
+
# nothing to do
|
7
|
+
end
|
8
|
+
|
9
|
+
def debug(message)
|
10
|
+
# nothing to do
|
11
|
+
end
|
12
|
+
|
13
|
+
def info(message)
|
14
|
+
# nothing to do
|
15
|
+
end
|
16
|
+
|
17
|
+
def warn(message)
|
18
|
+
# nothing to do
|
19
|
+
end
|
20
|
+
|
21
|
+
def error(message)
|
22
|
+
# nothing to do
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/elasticated/query.rb
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
module Elasticated
|
2
2
|
class QueryAggregations
|
3
3
|
|
4
|
+
extend Forwardable
|
5
|
+
def_delegators :@_aggregations, :first, :one?, :empty?
|
6
|
+
|
4
7
|
include Clonable
|
5
8
|
include BlockEvaluation
|
6
9
|
|
@@ -30,10 +33,6 @@ module Elasticated
|
|
30
33
|
end
|
31
34
|
end
|
32
35
|
|
33
|
-
def empty?
|
34
|
-
_aggregations.empty?
|
35
|
-
end
|
36
|
-
|
37
36
|
private
|
38
37
|
|
39
38
|
def get_aggregation_class(agg_name)
|
@@ -1,5 +1,6 @@
|
|
1
1
|
module Elasticated
|
2
2
|
class Repository
|
3
|
+
include Configurable
|
3
4
|
|
4
5
|
# child can implement 'execute(action, query, opts)'
|
5
6
|
# child can implement 'prepare(action, document, opts)'
|
@@ -42,6 +43,14 @@ module Elasticated
|
|
42
43
|
prepare :update, document, opts
|
43
44
|
end
|
44
45
|
|
46
|
+
def prepare_search(query, opts={})
|
47
|
+
execute :prepare_search, query, opts
|
48
|
+
end
|
49
|
+
|
50
|
+
def restore_search(scroll_id)
|
51
|
+
ResumableSearch.from_scroll_id self, scroll_id
|
52
|
+
end
|
53
|
+
|
45
54
|
protected
|
46
55
|
|
47
56
|
# write actions
|
@@ -90,38 +99,30 @@ module Elasticated
|
|
90
99
|
end
|
91
100
|
|
92
101
|
def _exec_paginated_search(query, aggregated, opts={})
|
93
|
-
|
94
|
-
if
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
results.append_results_from response
|
108
|
-
end
|
109
|
-
total_pages = (response['hits']['total'] / page_size.to_f).ceil
|
110
|
-
break if current_page >= total_pages
|
111
|
-
current_page += 1
|
112
|
-
body = query.build_for_search if aggregated
|
113
|
-
end
|
114
|
-
results
|
102
|
+
raise "The 'size' param is not allowed in opts, it must be setted in the query itself" if opts[:size]
|
103
|
+
raise "The 'from' param is not allowed in opts, it must be setted in the query itself" if opts[:from]
|
104
|
+
search_method = opts.delete :as
|
105
|
+
search = case search_method
|
106
|
+
when nil
|
107
|
+
IntelligentSearch.new self, query, aggregated, opts
|
108
|
+
when :single_page
|
109
|
+
SinglePageSearch.new self, query, aggregated, opts
|
110
|
+
when :normal
|
111
|
+
NormalSearch.new self, query, aggregated, opts
|
112
|
+
when :scroll
|
113
|
+
ScrollSearch.new self, query, aggregated, opts
|
114
|
+
when :scan_and_scroll
|
115
|
+
ScanScrollSearch.new self, query, aggregated, opts
|
115
116
|
else
|
116
|
-
|
117
|
-
scroll_expiration_time = Configuration.scroll_expiration_time || '3m'
|
118
|
-
page_size = Configuration.scroll_page_size || 1000
|
119
|
-
response = client.search body, opts.merge(search_type: 'scan', scroll: scroll_expiration_time, size: page_size)
|
120
|
-
while response = client.scroll(response['_scroll_id'], scroll: scroll_expiration_time) and not response['hits']['hits'].empty? do
|
121
|
-
results = results ? results.append_results_from(response) : Results.from_elasticsearch_response(response, query)
|
122
|
-
end
|
123
|
-
results ? results : Results.from_elasticsearch_response(response, query)
|
117
|
+
raise "Invalid search method: '#{search_method}'"
|
124
118
|
end
|
119
|
+
search.execute
|
120
|
+
end
|
121
|
+
|
122
|
+
def _exec_prepare_search(query, opts={})
|
123
|
+
raise "The query for a resumable search should not have the parameter 'size'" if query.limited? || opts[:size]
|
124
|
+
raise "The query for a resumable search should not have the parameter 'from'" if query.has_offset? || opts[:from]
|
125
|
+
ResumableSearch.new self, query, false, opts
|
125
126
|
end
|
126
127
|
|
127
128
|
# abstract methods
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Elasticated
|
2
|
+
class Repository
|
3
|
+
class IntelligentSearch < Search
|
4
|
+
include Configurable
|
5
|
+
|
6
|
+
# INTELLIGENT SEARCH CASES
|
7
|
+
# ------------ without sorting
|
8
|
+
# without size, without offset => scan_scroll
|
9
|
+
# with size, without offset => single_page or scan_scroll
|
10
|
+
# without size, with offset => normal_pagination # TODO analizar si se puede usar scroll con offset
|
11
|
+
# with size, with offset => single_page or normal_pagination # TODO analizar si se puede usar scroll con offset
|
12
|
+
# ------------ with sorting
|
13
|
+
# without size, without offset => scroll
|
14
|
+
# with size, without offset => single_page or scroll
|
15
|
+
# without size, with offset => normal_pagination # TODO analizar si se puede usar scroll con offset
|
16
|
+
# with size, with offset => single_page or normal_pagination # TODO analizar si se puede usar scroll con offset
|
17
|
+
|
18
|
+
# if the query is aggregated and the search strategy is use an
|
19
|
+
# scroll-like alternative, we must do a 2-step search process
|
20
|
+
|
21
|
+
def best_search_method
|
22
|
+
if query.limited? && !query.heavy_for?(repository)
|
23
|
+
SinglePageSearch.new repository, query, aggregated, opts
|
24
|
+
else
|
25
|
+
if query.has_offset?
|
26
|
+
NormalSearch.new repository, query, aggregated, opts
|
27
|
+
else
|
28
|
+
if query.sorted?
|
29
|
+
ScrollSearch.new repository, query, aggregated, opts
|
30
|
+
else
|
31
|
+
ScanScrollSearch.new repository, query, aggregated, opts
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def execute
|
38
|
+
search_method = best_search_method
|
39
|
+
search_method_name = Helpers.unscoped_class_name search_method.class
|
40
|
+
log.debug "#{search_method_name} selected as best search method for query #{query.build.to_json}"
|
41
|
+
search_method.execute
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Elasticated
|
2
|
+
class Repository
|
3
|
+
class NormalSearch < Search
|
4
|
+
|
5
|
+
def execute
|
6
|
+
body = aggregated ? query.build_for_aggregated_search : query.build_for_search
|
7
|
+
|
8
|
+
initial_offset = offset = query._from # TODO do it nicer
|
9
|
+
size = repository.search_page_size
|
10
|
+
|
11
|
+
override! body, size, offset
|
12
|
+
response = client.search body, opts
|
13
|
+
results = Results.parse response, query
|
14
|
+
|
15
|
+
target_size = query.limited? ? query._size : (results.hits.total - offset)
|
16
|
+
total_pages = (target_size / size.to_f).ceil
|
17
|
+
|
18
|
+
current_page = 1
|
19
|
+
body = query.build_for_search
|
20
|
+
|
21
|
+
loop do
|
22
|
+
break if current_page >= total_pages
|
23
|
+
offset = initial_offset + size * current_page
|
24
|
+
size = target_size - offset + initial_offset if query.limited? && offset - initial_offset + size > target_size
|
25
|
+
override! body, size, offset
|
26
|
+
|
27
|
+
response = client.search body, opts
|
28
|
+
new_results = Results.parse response
|
29
|
+
results.append new_results
|
30
|
+
break if new_results.count < size
|
31
|
+
|
32
|
+
current_page += 1
|
33
|
+
end
|
34
|
+
|
35
|
+
results
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module Elasticated
|
2
|
+
class Repository
|
3
|
+
class ResumableSearch < Search
|
4
|
+
|
5
|
+
def self.from_scroll_id(client, scroll_id)
|
6
|
+
search = new client, nil
|
7
|
+
search.scroll_id = scroll_id
|
8
|
+
search
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_accessor :scroll_id, :completed
|
12
|
+
|
13
|
+
def start
|
14
|
+
# aggregations
|
15
|
+
aggregation_results = if aggregated
|
16
|
+
body = query.build_for_aggregations
|
17
|
+
response = client.search body, opts
|
18
|
+
Results.parse response, query
|
19
|
+
end
|
20
|
+
# search
|
21
|
+
body = query.build_for_search
|
22
|
+
response = client.search body, opts.merge(scroll: scroll_expiration_time, size: scroll_page_size)
|
23
|
+
results = Results.parse response
|
24
|
+
results.append aggregation_results if aggregation_results
|
25
|
+
self.scroll_id = results.scroll_id
|
26
|
+
mark_completed! if results.documents.count < scroll_page_size
|
27
|
+
results
|
28
|
+
end
|
29
|
+
|
30
|
+
def fetch
|
31
|
+
raise "No scroll_id present" unless scroll_id
|
32
|
+
raise "No more information to fetch: scroll completed" if completed?
|
33
|
+
response = client.scroll scroll_id, scroll: scroll_expiration_time
|
34
|
+
results = Results.parse response
|
35
|
+
self.scroll_id = results.scroll_id
|
36
|
+
mark_completed! if results.documents.empty?
|
37
|
+
results
|
38
|
+
end
|
39
|
+
|
40
|
+
def completed?
|
41
|
+
!!completed
|
42
|
+
end
|
43
|
+
|
44
|
+
def execute
|
45
|
+
results = start
|
46
|
+
results.append fetch while !completed?
|
47
|
+
results
|
48
|
+
end
|
49
|
+
|
50
|
+
protected
|
51
|
+
|
52
|
+
def mark_completed!
|
53
|
+
self.completed = true
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|