elasticated 1.2.1 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +123 -1
- data/elasticated.gemspec +1 -0
- data/lib/elasticated.rb +18 -3
- data/lib/elasticated/aggregations/filter_aggregation.rb +13 -6
- data/lib/elasticated/aggregations/geohash_grid_aggregation.rb +28 -0
- data/lib/elasticated/aggregations/group_aggregation.rb +16 -7
- data/lib/elasticated/aggregations/missing_aggregation.rb +34 -0
- data/lib/elasticated/aggregations/range_aggregation.rb +14 -5
- data/lib/elasticated/aggregations/safe_date_histogram_aggregation.rb +86 -0
- data/lib/elasticated/aggregations/single_value_aggregation.rb +7 -1
- data/lib/elasticated/aggregations/stats_aggregation.rb +13 -0
- data/lib/elasticated/aggregations/subaggregated.rb +10 -1
- data/lib/elasticated/aggregations/top_hits_aggregation.rb +2 -2
- data/lib/elasticated/aggregations/{count_distinct_aggregation.rb → value_count_aggregation.rb} +1 -1
- data/lib/elasticated/configurable.rb +13 -2
- data/lib/elasticated/configuration.rb +6 -0
- data/lib/elasticated/document.rb +2 -1
- data/lib/elasticated/helpers.rb +18 -0
- data/lib/elasticated/loggers/default_logger.rb +27 -0
- data/lib/elasticated/loggers/silent_logger.rb +27 -0
- data/lib/elasticated/query.rb +8 -0
- data/lib/elasticated/query_aggregations.rb +3 -4
- data/lib/elasticated/repository.rb +31 -30
- data/lib/elasticated/repository/intelligent_search.rb +46 -0
- data/lib/elasticated/repository/normal_search.rb +40 -0
- data/lib/elasticated/repository/resumable_search.rb +58 -0
- data/lib/elasticated/repository/scan_scroll_search.rb +43 -0
- data/lib/elasticated/repository/scroll_search.rb +45 -0
- data/lib/elasticated/repository/search.rb +45 -0
- data/lib/elasticated/repository/single_page_search.rb +13 -0
- data/lib/elasticated/results.rb +43 -25
- data/lib/version.rb +11 -1
- data/spec/aggregation_spec.rb +58 -32
- data/spec/document_spec.rb +4 -4
- data/spec/intelligent_search_spec.rb +88 -0
- data/spec/query_spec.rb +2 -2
- data/spec/results_spec.rb +9 -9
- metadata +38 -5
- data/lib/elasticated/aggregations/count_aggregation.rb +0 -15
- data/lib/elasticated/default_logger.rb +0 -27
@@ -0,0 +1,43 @@
|
|
1
|
+
module Elasticated
|
2
|
+
class Repository
|
3
|
+
class ScanScrollSearch < Search
|
4
|
+
|
5
|
+
def fetch_aggregations
|
6
|
+
body = query.build_for_aggregations
|
7
|
+
response = client.search body, opts
|
8
|
+
Results.parse response, query
|
9
|
+
end
|
10
|
+
|
11
|
+
def execute
|
12
|
+
# aggregations
|
13
|
+
results = nil
|
14
|
+
if aggregated
|
15
|
+
body = query.build_for_aggregations
|
16
|
+
response = client.search body, opts
|
17
|
+
results = Results.parse response, query
|
18
|
+
end
|
19
|
+
# search
|
20
|
+
body = query.build_for_search
|
21
|
+
response = client.search body, opts.merge(search_type: 'scan', scroll: scroll_expiration_time, size: scroll_page_size)
|
22
|
+
results = Results.parse response
|
23
|
+
results.append fetch_aggregations if aggregated
|
24
|
+
doc_count = 0
|
25
|
+
loop do
|
26
|
+
response = client.scroll results.scroll_id, scroll: scroll_expiration_time
|
27
|
+
new_results = Results.parse response
|
28
|
+
hits = new_results.documents
|
29
|
+
break if hits.empty?
|
30
|
+
if query.limited? && (doc_count + hits.count > query._size)
|
31
|
+
new_results.documents = hits.first(query._size - doc_count)
|
32
|
+
hits = new_results.documents
|
33
|
+
end
|
34
|
+
doc_count += hits.count
|
35
|
+
results = results.append new_results
|
36
|
+
break if query.limited? && doc_count >= query._size
|
37
|
+
end
|
38
|
+
results
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Elasticated
|
2
|
+
class Repository
|
3
|
+
class ScrollSearch < Search
|
4
|
+
|
5
|
+
def fetch_aggregations
|
6
|
+
body = query.build_for_aggregations
|
7
|
+
response = client.search body, opts
|
8
|
+
Results.parse response, query
|
9
|
+
end
|
10
|
+
|
11
|
+
def execute
|
12
|
+
body = query.build_for_search
|
13
|
+
size = scroll_page_size
|
14
|
+
|
15
|
+
override! body
|
16
|
+
response = client.search body, opts.merge(scroll: scroll_expiration_time, size: scroll_page_size)
|
17
|
+
results = Results.parse response, query
|
18
|
+
results.append fetch_aggregations if aggregated
|
19
|
+
|
20
|
+
target_size = query.limited? ? query._size : results.hits.total
|
21
|
+
total_pages = (target_size / size.to_f).ceil
|
22
|
+
|
23
|
+
current_page = 1
|
24
|
+
|
25
|
+
loop do
|
26
|
+
break if current_page >= total_pages
|
27
|
+
|
28
|
+
response = client.scroll results.scroll_id, scroll: scroll_expiration_time
|
29
|
+
new_results = Results.parse response
|
30
|
+
hits = new_results.documents
|
31
|
+
doc_count = results.documents.count
|
32
|
+
new_results.documents = hits.first(target_size - doc_count) if query.limited? && (doc_count + hits.count > target_size)
|
33
|
+
results.append new_results
|
34
|
+
|
35
|
+
break if new_results.documents.count < size
|
36
|
+
|
37
|
+
current_page += 1
|
38
|
+
end
|
39
|
+
|
40
|
+
results
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Elasticated
|
2
|
+
class Repository
|
3
|
+
class Search
|
4
|
+
|
5
|
+
# abstract class
|
6
|
+
# child must implement 'execute()'
|
7
|
+
|
8
|
+
attr_accessor :repository, :query, :aggregated, :opts
|
9
|
+
|
10
|
+
def initialize(repository, query, aggregated=false, opts={})
|
11
|
+
self.repository = repository
|
12
|
+
self.query = query
|
13
|
+
self.aggregated = aggregated
|
14
|
+
self.opts = opts
|
15
|
+
end
|
16
|
+
|
17
|
+
protected
|
18
|
+
|
19
|
+
def override!(body, size=nil, from=nil)
|
20
|
+
size ? body.merge!(size: size) : body.delete(:size)
|
21
|
+
from ? body.merge!(from: from) : body.delete(:from)
|
22
|
+
body
|
23
|
+
end
|
24
|
+
|
25
|
+
# misc getters
|
26
|
+
|
27
|
+
def client
|
28
|
+
repository.client
|
29
|
+
end
|
30
|
+
|
31
|
+
def search_page_size
|
32
|
+
repository.search_page_size
|
33
|
+
end
|
34
|
+
|
35
|
+
def scroll_expiration_time
|
36
|
+
repository.scroll_expiration_time
|
37
|
+
end
|
38
|
+
|
39
|
+
def scroll_page_size
|
40
|
+
repository.scroll_page_size
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Elasticated
|
2
|
+
class Repository
|
3
|
+
class SinglePageSearch < Search
|
4
|
+
|
5
|
+
def execute
|
6
|
+
body = aggregated ? query.build_for_aggregated_search : query.build_for_search
|
7
|
+
response = client.search body, opts
|
8
|
+
Results.parse response, query
|
9
|
+
end
|
10
|
+
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
data/lib/elasticated/results.rb
CHANGED
@@ -3,41 +3,59 @@ module Elasticated
|
|
3
3
|
ShardsInfo = Struct.new :total, :successful, :failed
|
4
4
|
HitsInfo = Struct.new :total, :max_score
|
5
5
|
|
6
|
-
class Results
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
6
|
+
class Results
|
7
|
+
|
8
|
+
class << self
|
9
|
+
def parse(elasticsearch_response, query=nil)
|
10
|
+
documents = elasticsearch_response['hits']['hits'].map{ |hit| Document.parse hit }
|
11
|
+
results = new
|
12
|
+
results.documents = documents
|
13
|
+
# scroll metadata
|
14
|
+
results.scroll_id = elasticsearch_response['_scroll_id']
|
15
|
+
# cluster metadata
|
16
|
+
results.took = elasticsearch_response['took']
|
17
|
+
results.timed_out = elasticsearch_response['timed_out']
|
18
|
+
# shards metadata
|
19
|
+
shards = elasticsearch_response['_shards']
|
20
|
+
results.shards = ShardsInfo.new shards['total'], shards['successful'], shards['failed']
|
21
|
+
# search metadata
|
22
|
+
hits = elasticsearch_response['hits']
|
23
|
+
results.hits = HitsInfo.new hits['total'], hits['max_score']
|
24
|
+
# aggregations results
|
25
|
+
aggregations = elasticsearch_response['aggregations']
|
26
|
+
results.aggregations = query.parse_aggregations aggregations if query && aggregations
|
27
|
+
results
|
28
|
+
end
|
29
|
+
alias_method :from_elasticsearch_response, :parse
|
25
30
|
end
|
26
31
|
|
32
|
+
attr_accessor :scroll_id
|
27
33
|
attr_accessor :took, :timed_out
|
28
34
|
attr_accessor :shards # methods: total, successful, failed
|
29
35
|
attr_accessor :hits # methods: total, max_score
|
30
36
|
attr_accessor :documents, :aggregations
|
31
37
|
|
32
|
-
def
|
33
|
-
|
34
|
-
|
35
|
-
|
38
|
+
def append(another_results)
|
39
|
+
self.documents = documents + another_results.documents
|
40
|
+
self.aggregations = another_results.aggregations if another_results.aggregations
|
41
|
+
self.scroll_id = another_results.scroll_id if another_results.scroll_id
|
36
42
|
self
|
37
43
|
end
|
38
44
|
|
39
|
-
def sources
|
40
|
-
documents.map
|
45
|
+
def sources(with_ids=true)
|
46
|
+
documents.map do |d|
|
47
|
+
d.source.tap do |s|
|
48
|
+
s[:_id] = d.id if with_ids
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def ids
|
54
|
+
documents.map &:id
|
55
|
+
end
|
56
|
+
|
57
|
+
def count
|
58
|
+
documents.count
|
41
59
|
end
|
42
60
|
|
43
61
|
end
|
data/lib/version.rb
CHANGED
@@ -1,9 +1,19 @@
|
|
1
1
|
module Elasticated
|
2
|
-
VERSION = '
|
2
|
+
VERSION = '2.0.0'
|
3
3
|
end
|
4
4
|
|
5
5
|
# Changelog
|
6
6
|
|
7
|
+
# 2.0.0
|
8
|
+
# Se renombra la aggregation 'count_distinct' por 'value_count'
|
9
|
+
# Se separan los metodos de busqueda (scroll, paginacion normal, scan & scroll) en strategies
|
10
|
+
# El metodo de busqueda ahora puede ser especificado por el usuario
|
11
|
+
# Se agrega ResumableSearch (utiliza un scroll)
|
12
|
+
# Se agregan las opciones 'include_count' y 'compact' a varias aggregations
|
13
|
+
# Se elimina la clase CountAggregation (es reemplazable por GroupAggregation)
|
14
|
+
# Se agregan las aggregations 'stats', 'missing', 'geohash_grid' y 'safe_date_histogram'
|
15
|
+
# Ahora el logger default no imprime por stdout
|
16
|
+
|
7
17
|
# 1.2.1
|
8
18
|
# Ahora se optimiza la construcción de QueryConditions cuando sólo tienen una cláusula should con una condición
|
9
19
|
|
data/spec/aggregation_spec.rb
CHANGED
@@ -144,38 +144,22 @@ module Elasticated
|
|
144
144
|
expect(agg.build).to eq expected_result
|
145
145
|
end
|
146
146
|
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
end
|
160
|
-
|
161
|
-
end
|
162
|
-
|
163
|
-
describe CountAggregation do
|
164
|
-
|
165
|
-
it "should build a terms aggregation over a field" do
|
166
|
-
agg = CountAggregation.new :user
|
167
|
-
expected_result = { terms: { field: :user, size: 0 } }
|
168
|
-
expect(agg.build).to eq expected_result
|
169
|
-
end
|
170
|
-
|
171
|
-
it "should build a terms aggregation over a field with custom params" do
|
172
|
-
agg = CountAggregation.new :user, script: 'your_script'
|
173
|
-
expected_result = { terms: { field: :user, size: 0, script: 'your_script' } }
|
174
|
-
expect(agg.build).to eq expected_result
|
147
|
+
it "should map a group aggregation response" do
|
148
|
+
agg = GroupAggregation.new :field
|
149
|
+
response = { 'buckets' => [
|
150
|
+
{ 'key' => 'value1', 'doc_count' => 'count1' },
|
151
|
+
{ 'key' => 'value2', 'doc_count' => 'count2' },
|
152
|
+
{ 'key' => 'value3', 'doc_count' => 'count3' }
|
153
|
+
] }
|
154
|
+
expect(agg.parse(response)).to eq({
|
155
|
+
'value1' => { 'count' => 'count1' },
|
156
|
+
'value2' => { 'count' => 'count2' },
|
157
|
+
'value3' => { 'count' => 'count3' }
|
158
|
+
})
|
175
159
|
end
|
176
160
|
|
177
|
-
it "should map a
|
178
|
-
agg =
|
161
|
+
it "should map a group aggregation response compacted" do
|
162
|
+
agg = GroupAggregation.new :field, compact: true
|
179
163
|
response = { 'buckets' => [
|
180
164
|
{ 'key' => 'value1', 'doc_count' => 'count1' },
|
181
165
|
{ 'key' => 'value2', 'doc_count' => 'count2' },
|
@@ -190,16 +174,26 @@ module Elasticated
|
|
190
174
|
|
191
175
|
end
|
192
176
|
|
193
|
-
describe
|
177
|
+
describe ValueCountAggregation do
|
194
178
|
|
195
179
|
it "should build a terms aggregation over a field" do
|
196
|
-
agg =
|
180
|
+
agg = ValueCountAggregation.new :user
|
197
181
|
expected_result = { value_count: { field: :user } }
|
198
182
|
expect(agg.build).to eq expected_result
|
199
183
|
end
|
200
184
|
|
201
185
|
end
|
202
186
|
|
187
|
+
describe MissingAggregation do
|
188
|
+
|
189
|
+
it "should build a missing aggregation over a field" do
|
190
|
+
agg = MissingAggregation.new :country
|
191
|
+
expected_result = { missing: { field: :country } }
|
192
|
+
expect(agg.build).to eq expected_result
|
193
|
+
end
|
194
|
+
|
195
|
+
end
|
196
|
+
|
203
197
|
describe HistogramAggregation do
|
204
198
|
|
205
199
|
it "should build a histogram aggregation" do
|
@@ -474,6 +468,38 @@ module Elasticated
|
|
474
468
|
})
|
475
469
|
end
|
476
470
|
|
471
|
+
it "should map a 'range' response compacted" do
|
472
|
+
agg = RangeAggregation.new :a_field, compact: true do |a|
|
473
|
+
a.ranges do |r|
|
474
|
+
r.greater_equal :max_value, 'high_values'
|
475
|
+
r.less_equal :min_value
|
476
|
+
r.between :min_value, :max_value
|
477
|
+
end
|
478
|
+
end
|
479
|
+
response = {
|
480
|
+
'buckets' => {
|
481
|
+
'high_values' => {
|
482
|
+
'from' => 'max_value',
|
483
|
+
'doc_count' => 5
|
484
|
+
},
|
485
|
+
'less_equal_min_value' => {
|
486
|
+
'to' => 'min_value',
|
487
|
+
'doc_count' => 3
|
488
|
+
},
|
489
|
+
'between_min_value_and_max_value' => {
|
490
|
+
'from' => 'min_value',
|
491
|
+
'to' => 'max_value',
|
492
|
+
'doc_count' => 9
|
493
|
+
}
|
494
|
+
}
|
495
|
+
}
|
496
|
+
expect(agg.parse(response)).to eq({
|
497
|
+
'high_values' => 5,
|
498
|
+
'less_equal_min_value' => 3,
|
499
|
+
'between_min_value_and_max_value' => 9
|
500
|
+
})
|
501
|
+
end
|
502
|
+
|
477
503
|
end
|
478
504
|
|
479
505
|
describe SumDistinctAggregation do
|
data/spec/document_spec.rb
CHANGED
@@ -12,7 +12,7 @@ module Elasticated
|
|
12
12
|
end
|
13
13
|
|
14
14
|
it "should parse an elasticsearch hit" do
|
15
|
-
document = Document.
|
15
|
+
document = Document.parse hit
|
16
16
|
expect(document.id).to eq 'AU-CLCguwlaKln07OA4x'
|
17
17
|
expect(document.index).to eq 'fbinsights-v6-2015-08'
|
18
18
|
expect(document.type).to eq 'post'
|
@@ -24,17 +24,17 @@ module Elasticated
|
|
24
24
|
context "the document source" do
|
25
25
|
|
26
26
|
it "can be accessed via methods" do
|
27
|
-
document = Document.
|
27
|
+
document = Document.parse hit
|
28
28
|
expect(document.source.page_id).to eq '127735990588679'
|
29
29
|
end
|
30
30
|
|
31
31
|
it "can be accessed via key" do
|
32
|
-
document = Document.
|
32
|
+
document = Document.parse hit
|
33
33
|
expect(document.source['page_id']).to eq '127735990588679'
|
34
34
|
end
|
35
35
|
|
36
36
|
it "can be accessed via indifferent key" do
|
37
|
-
document = Document.
|
37
|
+
document = Document.parse hit
|
38
38
|
expect(document.source[:page_id]).to eq '127735990588679'
|
39
39
|
end
|
40
40
|
|
@@ -0,0 +1,88 @@
|
|
1
|
+
require_relative 'spec_helper'
|
2
|
+
|
3
|
+
module Elasticated
|
4
|
+
class Repository
|
5
|
+
describe IntelligentSearch do
|
6
|
+
|
7
|
+
let(:repository){ double }
|
8
|
+
|
9
|
+
before :each do
|
10
|
+
allow(repository).to receive(:search_page_size).and_return 1000
|
11
|
+
end
|
12
|
+
|
13
|
+
def search_for(&query_block)
|
14
|
+
query = Query.build &query_block
|
15
|
+
IntelligentSearch.new repository, query
|
16
|
+
end
|
17
|
+
|
18
|
+
context "on unsorted queries" do
|
19
|
+
|
20
|
+
it "should select 'scan & scroll' for an unlimited query without offset" do
|
21
|
+
search = search_for {}
|
22
|
+
expect(search.best_search_method).to be_a ScanScrollSearch
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should select 'single page' for a limited, light-weight query without offset" do
|
26
|
+
search = search_for { size 10 }
|
27
|
+
expect(search.best_search_method).to be_a SinglePageSearch
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should select 'scan & scroll' for a limited, heavy query without offset" do
|
31
|
+
search = search_for { size 9999 }
|
32
|
+
expect(search.best_search_method).to be_a ScanScrollSearch
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should select 'normal pagination' for an unlimited query with offset" do
|
36
|
+
search = search_for { from 10 }
|
37
|
+
expect(search.best_search_method).to be_a NormalSearch
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should select 'single page' for a limited, light-weight query with offset" do
|
41
|
+
search = search_for { size 10; from 10 }
|
42
|
+
expect(search.best_search_method).to be_a SinglePageSearch
|
43
|
+
end
|
44
|
+
|
45
|
+
it "should select 'normal pagination' for a limited, heavy query with offset" do
|
46
|
+
search = search_for { size 9999; from 10 }
|
47
|
+
expect(search.best_search_method).to be_a NormalSearch
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
context "on sorted queries" do
|
53
|
+
|
54
|
+
it "should select 'scroll' for an unlimited query without offset" do
|
55
|
+
search = search_for { sort :field }
|
56
|
+
expect(search.best_search_method).to be_a ScrollSearch
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should select 'single page' for a limited, light-weight query without offset" do
|
60
|
+
search = search_for { sort :field; size 10 }
|
61
|
+
expect(search.best_search_method).to be_a SinglePageSearch
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should select 'scan & scroll' for a limited, heavy query without offset" do
|
65
|
+
search = search_for { sort :field; size 9999 }
|
66
|
+
expect(search.best_search_method).to be_a ScrollSearch
|
67
|
+
end
|
68
|
+
|
69
|
+
it "should select 'normal pagination' for an unlimited query with offset" do
|
70
|
+
search = search_for { sort :field; from 10 }
|
71
|
+
expect(search.best_search_method).to be_a NormalSearch
|
72
|
+
end
|
73
|
+
|
74
|
+
it "should select 'single page' for a limited, light-weight query with offset" do
|
75
|
+
search = search_for { sort :field; size 10; from 10 }
|
76
|
+
expect(search.best_search_method).to be_a SinglePageSearch
|
77
|
+
end
|
78
|
+
|
79
|
+
it "should select 'normal pagination' for a limited, heavy query with offset" do
|
80
|
+
search = search_for { sort :field; size 9999; from 10 }
|
81
|
+
expect(search.best_search_method).to be_a NormalSearch
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|