elasticated 1.2.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +123 -1
- data/elasticated.gemspec +1 -0
- data/lib/elasticated.rb +18 -3
- data/lib/elasticated/aggregations/filter_aggregation.rb +13 -6
- data/lib/elasticated/aggregations/geohash_grid_aggregation.rb +28 -0
- data/lib/elasticated/aggregations/group_aggregation.rb +16 -7
- data/lib/elasticated/aggregations/missing_aggregation.rb +34 -0
- data/lib/elasticated/aggregations/range_aggregation.rb +14 -5
- data/lib/elasticated/aggregations/safe_date_histogram_aggregation.rb +86 -0
- data/lib/elasticated/aggregations/single_value_aggregation.rb +7 -1
- data/lib/elasticated/aggregations/stats_aggregation.rb +13 -0
- data/lib/elasticated/aggregations/subaggregated.rb +10 -1
- data/lib/elasticated/aggregations/top_hits_aggregation.rb +2 -2
- data/lib/elasticated/aggregations/{count_distinct_aggregation.rb → value_count_aggregation.rb} +1 -1
- data/lib/elasticated/configurable.rb +13 -2
- data/lib/elasticated/configuration.rb +6 -0
- data/lib/elasticated/document.rb +2 -1
- data/lib/elasticated/helpers.rb +18 -0
- data/lib/elasticated/loggers/default_logger.rb +27 -0
- data/lib/elasticated/loggers/silent_logger.rb +27 -0
- data/lib/elasticated/query.rb +8 -0
- data/lib/elasticated/query_aggregations.rb +3 -4
- data/lib/elasticated/repository.rb +31 -30
- data/lib/elasticated/repository/intelligent_search.rb +46 -0
- data/lib/elasticated/repository/normal_search.rb +40 -0
- data/lib/elasticated/repository/resumable_search.rb +58 -0
- data/lib/elasticated/repository/scan_scroll_search.rb +43 -0
- data/lib/elasticated/repository/scroll_search.rb +45 -0
- data/lib/elasticated/repository/search.rb +45 -0
- data/lib/elasticated/repository/single_page_search.rb +13 -0
- data/lib/elasticated/results.rb +43 -25
- data/lib/version.rb +11 -1
- data/spec/aggregation_spec.rb +58 -32
- data/spec/document_spec.rb +4 -4
- data/spec/intelligent_search_spec.rb +88 -0
- data/spec/query_spec.rb +2 -2
- data/spec/results_spec.rb +9 -9
- metadata +38 -5
- data/lib/elasticated/aggregations/count_aggregation.rb +0 -15
- data/lib/elasticated/default_logger.rb +0 -27
@@ -0,0 +1,43 @@
|
|
1
|
+
module Elasticated
|
2
|
+
class Repository
|
3
|
+
class ScanScrollSearch < Search
|
4
|
+
|
5
|
+
def fetch_aggregations
|
6
|
+
body = query.build_for_aggregations
|
7
|
+
response = client.search body, opts
|
8
|
+
Results.parse response, query
|
9
|
+
end
|
10
|
+
|
11
|
+
def execute
|
12
|
+
# aggregations
|
13
|
+
results = nil
|
14
|
+
if aggregated
|
15
|
+
body = query.build_for_aggregations
|
16
|
+
response = client.search body, opts
|
17
|
+
results = Results.parse response, query
|
18
|
+
end
|
19
|
+
# search
|
20
|
+
body = query.build_for_search
|
21
|
+
response = client.search body, opts.merge(search_type: 'scan', scroll: scroll_expiration_time, size: scroll_page_size)
|
22
|
+
results = Results.parse response
|
23
|
+
results.append fetch_aggregations if aggregated
|
24
|
+
doc_count = 0
|
25
|
+
loop do
|
26
|
+
response = client.scroll results.scroll_id, scroll: scroll_expiration_time
|
27
|
+
new_results = Results.parse response
|
28
|
+
hits = new_results.documents
|
29
|
+
break if hits.empty?
|
30
|
+
if query.limited? && (doc_count + hits.count > query._size)
|
31
|
+
new_results.documents = hits.first(query._size - doc_count)
|
32
|
+
hits = new_results.documents
|
33
|
+
end
|
34
|
+
doc_count += hits.count
|
35
|
+
results = results.append new_results
|
36
|
+
break if query.limited? && doc_count >= query._size
|
37
|
+
end
|
38
|
+
results
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Elasticated
|
2
|
+
class Repository
|
3
|
+
class ScrollSearch < Search
|
4
|
+
|
5
|
+
def fetch_aggregations
|
6
|
+
body = query.build_for_aggregations
|
7
|
+
response = client.search body, opts
|
8
|
+
Results.parse response, query
|
9
|
+
end
|
10
|
+
|
11
|
+
def execute
|
12
|
+
body = query.build_for_search
|
13
|
+
size = scroll_page_size
|
14
|
+
|
15
|
+
override! body
|
16
|
+
response = client.search body, opts.merge(scroll: scroll_expiration_time, size: scroll_page_size)
|
17
|
+
results = Results.parse response, query
|
18
|
+
results.append fetch_aggregations if aggregated
|
19
|
+
|
20
|
+
target_size = query.limited? ? query._size : results.hits.total
|
21
|
+
total_pages = (target_size / size.to_f).ceil
|
22
|
+
|
23
|
+
current_page = 1
|
24
|
+
|
25
|
+
loop do
|
26
|
+
break if current_page >= total_pages
|
27
|
+
|
28
|
+
response = client.scroll results.scroll_id, scroll: scroll_expiration_time
|
29
|
+
new_results = Results.parse response
|
30
|
+
hits = new_results.documents
|
31
|
+
doc_count = results.documents.count
|
32
|
+
new_results.documents = hits.first(target_size - doc_count) if query.limited? && (doc_count + hits.count > target_size)
|
33
|
+
results.append new_results
|
34
|
+
|
35
|
+
break if new_results.documents.count < size
|
36
|
+
|
37
|
+
current_page += 1
|
38
|
+
end
|
39
|
+
|
40
|
+
results
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Elasticated
|
2
|
+
class Repository
|
3
|
+
class Search
|
4
|
+
|
5
|
+
# abstract class
|
6
|
+
# child must implement 'execute()'
|
7
|
+
|
8
|
+
attr_accessor :repository, :query, :aggregated, :opts
|
9
|
+
|
10
|
+
def initialize(repository, query, aggregated=false, opts={})
|
11
|
+
self.repository = repository
|
12
|
+
self.query = query
|
13
|
+
self.aggregated = aggregated
|
14
|
+
self.opts = opts
|
15
|
+
end
|
16
|
+
|
17
|
+
protected
|
18
|
+
|
19
|
+
def override!(body, size=nil, from=nil)
|
20
|
+
size ? body.merge!(size: size) : body.delete(:size)
|
21
|
+
from ? body.merge!(from: from) : body.delete(:from)
|
22
|
+
body
|
23
|
+
end
|
24
|
+
|
25
|
+
# misc getters
|
26
|
+
|
27
|
+
def client
|
28
|
+
repository.client
|
29
|
+
end
|
30
|
+
|
31
|
+
def search_page_size
|
32
|
+
repository.search_page_size
|
33
|
+
end
|
34
|
+
|
35
|
+
def scroll_expiration_time
|
36
|
+
repository.scroll_expiration_time
|
37
|
+
end
|
38
|
+
|
39
|
+
def scroll_page_size
|
40
|
+
repository.scroll_page_size
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Elasticated
|
2
|
+
class Repository
|
3
|
+
class SinglePageSearch < Search
|
4
|
+
|
5
|
+
def execute
|
6
|
+
body = aggregated ? query.build_for_aggregated_search : query.build_for_search
|
7
|
+
response = client.search body, opts
|
8
|
+
Results.parse response, query
|
9
|
+
end
|
10
|
+
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
data/lib/elasticated/results.rb
CHANGED
@@ -3,41 +3,59 @@ module Elasticated
|
|
3
3
|
ShardsInfo = Struct.new :total, :successful, :failed
|
4
4
|
HitsInfo = Struct.new :total, :max_score
|
5
5
|
|
6
|
-
class Results
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
6
|
+
class Results
|
7
|
+
|
8
|
+
class << self
|
9
|
+
def parse(elasticsearch_response, query=nil)
|
10
|
+
documents = elasticsearch_response['hits']['hits'].map{ |hit| Document.parse hit }
|
11
|
+
results = new
|
12
|
+
results.documents = documents
|
13
|
+
# scroll metadata
|
14
|
+
results.scroll_id = elasticsearch_response['_scroll_id']
|
15
|
+
# cluster metadata
|
16
|
+
results.took = elasticsearch_response['took']
|
17
|
+
results.timed_out = elasticsearch_response['timed_out']
|
18
|
+
# shards metadata
|
19
|
+
shards = elasticsearch_response['_shards']
|
20
|
+
results.shards = ShardsInfo.new shards['total'], shards['successful'], shards['failed']
|
21
|
+
# search metadata
|
22
|
+
hits = elasticsearch_response['hits']
|
23
|
+
results.hits = HitsInfo.new hits['total'], hits['max_score']
|
24
|
+
# aggregations results
|
25
|
+
aggregations = elasticsearch_response['aggregations']
|
26
|
+
results.aggregations = query.parse_aggregations aggregations if query && aggregations
|
27
|
+
results
|
28
|
+
end
|
29
|
+
alias_method :from_elasticsearch_response, :parse
|
25
30
|
end
|
26
31
|
|
32
|
+
attr_accessor :scroll_id
|
27
33
|
attr_accessor :took, :timed_out
|
28
34
|
attr_accessor :shards # methods: total, successful, failed
|
29
35
|
attr_accessor :hits # methods: total, max_score
|
30
36
|
attr_accessor :documents, :aggregations
|
31
37
|
|
32
|
-
def
|
33
|
-
|
34
|
-
|
35
|
-
|
38
|
+
def append(another_results)
|
39
|
+
self.documents = documents + another_results.documents
|
40
|
+
self.aggregations = another_results.aggregations if another_results.aggregations
|
41
|
+
self.scroll_id = another_results.scroll_id if another_results.scroll_id
|
36
42
|
self
|
37
43
|
end
|
38
44
|
|
39
|
-
def sources
|
40
|
-
documents.map
|
45
|
+
def sources(with_ids=true)
|
46
|
+
documents.map do |d|
|
47
|
+
d.source.tap do |s|
|
48
|
+
s[:_id] = d.id if with_ids
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def ids
|
54
|
+
documents.map &:id
|
55
|
+
end
|
56
|
+
|
57
|
+
def count
|
58
|
+
documents.count
|
41
59
|
end
|
42
60
|
|
43
61
|
end
|
data/lib/version.rb
CHANGED
@@ -1,9 +1,19 @@
|
|
1
1
|
module Elasticated
|
2
|
-
VERSION = '
|
2
|
+
VERSION = '2.0.0'
|
3
3
|
end
|
4
4
|
|
5
5
|
# Changelog
|
6
6
|
|
7
|
+
# 2.0.0
|
8
|
+
# Se renombra la aggregation 'count_distinct' por 'value_count'
|
9
|
+
# Se separan los metodos de busqueda (scroll, paginacion normal, scan & scroll) en strategies
|
10
|
+
# El metodo de busqueda ahora puede ser especificado por el usuario
|
11
|
+
# Se agrega ResumableSearch (utiliza un scroll)
|
12
|
+
# Se agregan las opciones 'include_count' y 'compact' a varias aggregations
|
13
|
+
# Se elimina la clase CountAggregation (es reemplazable por GroupAggregation)
|
14
|
+
# Se agregan las aggregations 'stats', 'missing', 'geohash_grid' y 'safe_date_histogram'
|
15
|
+
# Ahora el logger default no imprime por stdout
|
16
|
+
|
7
17
|
# 1.2.1
|
8
18
|
# Ahora se optimiza la construcción de QueryConditions cuando sólo tienen una cláusula should con una condición
|
9
19
|
|
data/spec/aggregation_spec.rb
CHANGED
@@ -144,38 +144,22 @@ module Elasticated
|
|
144
144
|
expect(agg.build).to eq expected_result
|
145
145
|
end
|
146
146
|
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
end
|
160
|
-
|
161
|
-
end
|
162
|
-
|
163
|
-
describe CountAggregation do
|
164
|
-
|
165
|
-
it "should build a terms aggregation over a field" do
|
166
|
-
agg = CountAggregation.new :user
|
167
|
-
expected_result = { terms: { field: :user, size: 0 } }
|
168
|
-
expect(agg.build).to eq expected_result
|
169
|
-
end
|
170
|
-
|
171
|
-
it "should build a terms aggregation over a field with custom params" do
|
172
|
-
agg = CountAggregation.new :user, script: 'your_script'
|
173
|
-
expected_result = { terms: { field: :user, size: 0, script: 'your_script' } }
|
174
|
-
expect(agg.build).to eq expected_result
|
147
|
+
it "should map a group aggregation response" do
|
148
|
+
agg = GroupAggregation.new :field
|
149
|
+
response = { 'buckets' => [
|
150
|
+
{ 'key' => 'value1', 'doc_count' => 'count1' },
|
151
|
+
{ 'key' => 'value2', 'doc_count' => 'count2' },
|
152
|
+
{ 'key' => 'value3', 'doc_count' => 'count3' }
|
153
|
+
] }
|
154
|
+
expect(agg.parse(response)).to eq({
|
155
|
+
'value1' => { 'count' => 'count1' },
|
156
|
+
'value2' => { 'count' => 'count2' },
|
157
|
+
'value3' => { 'count' => 'count3' }
|
158
|
+
})
|
175
159
|
end
|
176
160
|
|
177
|
-
it "should map a
|
178
|
-
agg =
|
161
|
+
it "should map a group aggregation response compacted" do
|
162
|
+
agg = GroupAggregation.new :field, compact: true
|
179
163
|
response = { 'buckets' => [
|
180
164
|
{ 'key' => 'value1', 'doc_count' => 'count1' },
|
181
165
|
{ 'key' => 'value2', 'doc_count' => 'count2' },
|
@@ -190,16 +174,26 @@ module Elasticated
|
|
190
174
|
|
191
175
|
end
|
192
176
|
|
193
|
-
describe
|
177
|
+
describe ValueCountAggregation do
|
194
178
|
|
195
179
|
it "should build a terms aggregation over a field" do
|
196
|
-
agg =
|
180
|
+
agg = ValueCountAggregation.new :user
|
197
181
|
expected_result = { value_count: { field: :user } }
|
198
182
|
expect(agg.build).to eq expected_result
|
199
183
|
end
|
200
184
|
|
201
185
|
end
|
202
186
|
|
187
|
+
describe MissingAggregation do
|
188
|
+
|
189
|
+
it "should build a missing aggregation over a field" do
|
190
|
+
agg = MissingAggregation.new :country
|
191
|
+
expected_result = { missing: { field: :country } }
|
192
|
+
expect(agg.build).to eq expected_result
|
193
|
+
end
|
194
|
+
|
195
|
+
end
|
196
|
+
|
203
197
|
describe HistogramAggregation do
|
204
198
|
|
205
199
|
it "should build a histogram aggregation" do
|
@@ -474,6 +468,38 @@ module Elasticated
|
|
474
468
|
})
|
475
469
|
end
|
476
470
|
|
471
|
+
it "should map a 'range' response compacted" do
|
472
|
+
agg = RangeAggregation.new :a_field, compact: true do |a|
|
473
|
+
a.ranges do |r|
|
474
|
+
r.greater_equal :max_value, 'high_values'
|
475
|
+
r.less_equal :min_value
|
476
|
+
r.between :min_value, :max_value
|
477
|
+
end
|
478
|
+
end
|
479
|
+
response = {
|
480
|
+
'buckets' => {
|
481
|
+
'high_values' => {
|
482
|
+
'from' => 'max_value',
|
483
|
+
'doc_count' => 5
|
484
|
+
},
|
485
|
+
'less_equal_min_value' => {
|
486
|
+
'to' => 'min_value',
|
487
|
+
'doc_count' => 3
|
488
|
+
},
|
489
|
+
'between_min_value_and_max_value' => {
|
490
|
+
'from' => 'min_value',
|
491
|
+
'to' => 'max_value',
|
492
|
+
'doc_count' => 9
|
493
|
+
}
|
494
|
+
}
|
495
|
+
}
|
496
|
+
expect(agg.parse(response)).to eq({
|
497
|
+
'high_values' => 5,
|
498
|
+
'less_equal_min_value' => 3,
|
499
|
+
'between_min_value_and_max_value' => 9
|
500
|
+
})
|
501
|
+
end
|
502
|
+
|
477
503
|
end
|
478
504
|
|
479
505
|
describe SumDistinctAggregation do
|
data/spec/document_spec.rb
CHANGED
@@ -12,7 +12,7 @@ module Elasticated
|
|
12
12
|
end
|
13
13
|
|
14
14
|
it "should parse an elasticsearch hit" do
|
15
|
-
document = Document.
|
15
|
+
document = Document.parse hit
|
16
16
|
expect(document.id).to eq 'AU-CLCguwlaKln07OA4x'
|
17
17
|
expect(document.index).to eq 'fbinsights-v6-2015-08'
|
18
18
|
expect(document.type).to eq 'post'
|
@@ -24,17 +24,17 @@ module Elasticated
|
|
24
24
|
context "the document source" do
|
25
25
|
|
26
26
|
it "can be accessed via methods" do
|
27
|
-
document = Document.
|
27
|
+
document = Document.parse hit
|
28
28
|
expect(document.source.page_id).to eq '127735990588679'
|
29
29
|
end
|
30
30
|
|
31
31
|
it "can be accessed via key" do
|
32
|
-
document = Document.
|
32
|
+
document = Document.parse hit
|
33
33
|
expect(document.source['page_id']).to eq '127735990588679'
|
34
34
|
end
|
35
35
|
|
36
36
|
it "can be accessed via indifferent key" do
|
37
|
-
document = Document.
|
37
|
+
document = Document.parse hit
|
38
38
|
expect(document.source[:page_id]).to eq '127735990588679'
|
39
39
|
end
|
40
40
|
|
@@ -0,0 +1,88 @@
|
|
1
|
+
require_relative 'spec_helper'
|
2
|
+
|
3
|
+
module Elasticated
|
4
|
+
class Repository
|
5
|
+
describe IntelligentSearch do
|
6
|
+
|
7
|
+
let(:repository){ double }
|
8
|
+
|
9
|
+
before :each do
|
10
|
+
allow(repository).to receive(:search_page_size).and_return 1000
|
11
|
+
end
|
12
|
+
|
13
|
+
def search_for(&query_block)
|
14
|
+
query = Query.build &query_block
|
15
|
+
IntelligentSearch.new repository, query
|
16
|
+
end
|
17
|
+
|
18
|
+
context "on unsorted queries" do
|
19
|
+
|
20
|
+
it "should select 'scan & scroll' for an unlimited query without offset" do
|
21
|
+
search = search_for {}
|
22
|
+
expect(search.best_search_method).to be_a ScanScrollSearch
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should select 'single page' for a limited, light-weight query without offset" do
|
26
|
+
search = search_for { size 10 }
|
27
|
+
expect(search.best_search_method).to be_a SinglePageSearch
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should select 'scan & scroll' for a limited, heavy query without offset" do
|
31
|
+
search = search_for { size 9999 }
|
32
|
+
expect(search.best_search_method).to be_a ScanScrollSearch
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should select 'normal pagination' for an unlimited query with offset" do
|
36
|
+
search = search_for { from 10 }
|
37
|
+
expect(search.best_search_method).to be_a NormalSearch
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should select 'single page' for a limited, light-weight query with offset" do
|
41
|
+
search = search_for { size 10; from 10 }
|
42
|
+
expect(search.best_search_method).to be_a SinglePageSearch
|
43
|
+
end
|
44
|
+
|
45
|
+
it "should select 'normal pagination' for a limited, heavy query with offset" do
|
46
|
+
search = search_for { size 9999; from 10 }
|
47
|
+
expect(search.best_search_method).to be_a NormalSearch
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
context "on sorted queries" do
|
53
|
+
|
54
|
+
it "should select 'scroll' for an unlimited query without offset" do
|
55
|
+
search = search_for { sort :field }
|
56
|
+
expect(search.best_search_method).to be_a ScrollSearch
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should select 'single page' for a limited, light-weight query without offset" do
|
60
|
+
search = search_for { sort :field; size 10 }
|
61
|
+
expect(search.best_search_method).to be_a SinglePageSearch
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should select 'scan & scroll' for a limited, heavy query without offset" do
|
65
|
+
search = search_for { sort :field; size 9999 }
|
66
|
+
expect(search.best_search_method).to be_a ScrollSearch
|
67
|
+
end
|
68
|
+
|
69
|
+
it "should select 'normal pagination' for an unlimited query with offset" do
|
70
|
+
search = search_for { sort :field; from 10 }
|
71
|
+
expect(search.best_search_method).to be_a NormalSearch
|
72
|
+
end
|
73
|
+
|
74
|
+
it "should select 'single page' for a limited, light-weight query with offset" do
|
75
|
+
search = search_for { sort :field; size 10; from 10 }
|
76
|
+
expect(search.best_search_method).to be_a SinglePageSearch
|
77
|
+
end
|
78
|
+
|
79
|
+
it "should select 'normal pagination' for a limited, heavy query with offset" do
|
80
|
+
search = search_for { sort :field; size 9999; from 10 }
|
81
|
+
expect(search.best_search_method).to be_a NormalSearch
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|