elasticated 1.2.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +123 -1
  3. data/elasticated.gemspec +1 -0
  4. data/lib/elasticated.rb +18 -3
  5. data/lib/elasticated/aggregations/filter_aggregation.rb +13 -6
  6. data/lib/elasticated/aggregations/geohash_grid_aggregation.rb +28 -0
  7. data/lib/elasticated/aggregations/group_aggregation.rb +16 -7
  8. data/lib/elasticated/aggregations/missing_aggregation.rb +34 -0
  9. data/lib/elasticated/aggregations/range_aggregation.rb +14 -5
  10. data/lib/elasticated/aggregations/safe_date_histogram_aggregation.rb +86 -0
  11. data/lib/elasticated/aggregations/single_value_aggregation.rb +7 -1
  12. data/lib/elasticated/aggregations/stats_aggregation.rb +13 -0
  13. data/lib/elasticated/aggregations/subaggregated.rb +10 -1
  14. data/lib/elasticated/aggregations/top_hits_aggregation.rb +2 -2
  15. data/lib/elasticated/aggregations/{count_distinct_aggregation.rb → value_count_aggregation.rb} +1 -1
  16. data/lib/elasticated/configurable.rb +13 -2
  17. data/lib/elasticated/configuration.rb +6 -0
  18. data/lib/elasticated/document.rb +2 -1
  19. data/lib/elasticated/helpers.rb +18 -0
  20. data/lib/elasticated/loggers/default_logger.rb +27 -0
  21. data/lib/elasticated/loggers/silent_logger.rb +27 -0
  22. data/lib/elasticated/query.rb +8 -0
  23. data/lib/elasticated/query_aggregations.rb +3 -4
  24. data/lib/elasticated/repository.rb +31 -30
  25. data/lib/elasticated/repository/intelligent_search.rb +46 -0
  26. data/lib/elasticated/repository/normal_search.rb +40 -0
  27. data/lib/elasticated/repository/resumable_search.rb +58 -0
  28. data/lib/elasticated/repository/scan_scroll_search.rb +43 -0
  29. data/lib/elasticated/repository/scroll_search.rb +45 -0
  30. data/lib/elasticated/repository/search.rb +45 -0
  31. data/lib/elasticated/repository/single_page_search.rb +13 -0
  32. data/lib/elasticated/results.rb +43 -25
  33. data/lib/version.rb +11 -1
  34. data/spec/aggregation_spec.rb +58 -32
  35. data/spec/document_spec.rb +4 -4
  36. data/spec/intelligent_search_spec.rb +88 -0
  37. data/spec/query_spec.rb +2 -2
  38. data/spec/results_spec.rb +9 -9
  39. metadata +38 -5
  40. data/lib/elasticated/aggregations/count_aggregation.rb +0 -15
  41. data/lib/elasticated/default_logger.rb +0 -27
@@ -0,0 +1,43 @@
1
+ module Elasticated
2
+ class Repository
3
+ class ScanScrollSearch < Search
4
+
5
+ def fetch_aggregations
6
+ body = query.build_for_aggregations
7
+ response = client.search body, opts
8
+ Results.parse response, query
9
+ end
10
+
11
+ def execute
12
+ # aggregations
13
+ results = nil
14
+ if aggregated
15
+ body = query.build_for_aggregations
16
+ response = client.search body, opts
17
+ results = Results.parse response, query
18
+ end
19
+ # search
20
+ body = query.build_for_search
21
+ response = client.search body, opts.merge(search_type: 'scan', scroll: scroll_expiration_time, size: scroll_page_size)
22
+ results = Results.parse response
23
+ results.append fetch_aggregations if aggregated
24
+ doc_count = 0
25
+ loop do
26
+ response = client.scroll results.scroll_id, scroll: scroll_expiration_time
27
+ new_results = Results.parse response
28
+ hits = new_results.documents
29
+ break if hits.empty?
30
+ if query.limited? && (doc_count + hits.count > query._size)
31
+ new_results.documents = hits.first(query._size - doc_count)
32
+ hits = new_results.documents
33
+ end
34
+ doc_count += hits.count
35
+ results = results.append new_results
36
+ break if query.limited? && doc_count >= query._size
37
+ end
38
+ results
39
+ end
40
+
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,45 @@
1
+ module Elasticated
2
+ class Repository
3
+ class ScrollSearch < Search
4
+
5
+ def fetch_aggregations
6
+ body = query.build_for_aggregations
7
+ response = client.search body, opts
8
+ Results.parse response, query
9
+ end
10
+
11
+ def execute
12
+ body = query.build_for_search
13
+ size = scroll_page_size
14
+
15
+ override! body
16
+ response = client.search body, opts.merge(scroll: scroll_expiration_time, size: scroll_page_size)
17
+ results = Results.parse response, query
18
+ results.append fetch_aggregations if aggregated
19
+
20
+ target_size = query.limited? ? query._size : results.hits.total
21
+ total_pages = (target_size / size.to_f).ceil
22
+
23
+ current_page = 1
24
+
25
+ loop do
26
+ break if current_page >= total_pages
27
+
28
+ response = client.scroll results.scroll_id, scroll: scroll_expiration_time
29
+ new_results = Results.parse response
30
+ hits = new_results.documents
31
+ doc_count = results.documents.count
32
+ new_results.documents = hits.first(target_size - doc_count) if query.limited? && (doc_count + hits.count > target_size)
33
+ results.append new_results
34
+
35
+ break if new_results.documents.count < size
36
+
37
+ current_page += 1
38
+ end
39
+
40
+ results
41
+ end
42
+
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,45 @@
1
+ module Elasticated
2
+ class Repository
3
+ class Search
4
+
5
+ # abstract class
6
+ # child must implement 'execute()'
7
+
8
+ attr_accessor :repository, :query, :aggregated, :opts
9
+
10
+ def initialize(repository, query, aggregated=false, opts={})
11
+ self.repository = repository
12
+ self.query = query
13
+ self.aggregated = aggregated
14
+ self.opts = opts
15
+ end
16
+
17
+ protected
18
+
19
+ def override!(body, size=nil, from=nil)
20
+ size ? body.merge!(size: size) : body.delete(:size)
21
+ from ? body.merge!(from: from) : body.delete(:from)
22
+ body
23
+ end
24
+
25
+ # misc getters
26
+
27
+ def client
28
+ repository.client
29
+ end
30
+
31
+ def search_page_size
32
+ repository.search_page_size
33
+ end
34
+
35
+ def scroll_expiration_time
36
+ repository.scroll_expiration_time
37
+ end
38
+
39
+ def scroll_page_size
40
+ repository.scroll_page_size
41
+ end
42
+
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,13 @@
1
+ module Elasticated
2
+ class Repository
3
+ class SinglePageSearch < Search
4
+
5
+ def execute
6
+ body = aggregated ? query.build_for_aggregated_search : query.build_for_search
7
+ response = client.search body, opts
8
+ Results.parse response, query
9
+ end
10
+
11
+ end
12
+ end
13
+ end
@@ -3,41 +3,59 @@ module Elasticated
3
3
  ShardsInfo = Struct.new :total, :successful, :failed
4
4
  HitsInfo = Struct.new :total, :max_score
5
5
 
6
- class Results < Array
7
-
8
- def self.from_elasticsearch_response(elasticsearch_response, query=nil)
9
- documents = elasticsearch_response['hits']['hits'].map{ |hit| Document.from_elasticsearch_hit hit }
10
- results = new documents
11
- results.documents = documents
12
- # cluster metadata
13
- results.took = elasticsearch_response['took']
14
- results.timed_out = elasticsearch_response['timed_out']
15
- # shards metadata
16
- shards = elasticsearch_response['_shards']
17
- results.shards = ShardsInfo.new shards['total'], shards['successful'], shards['failed']
18
- # search metadata
19
- hits = elasticsearch_response['hits']
20
- results.hits = HitsInfo.new hits['total'], hits['max_score']
21
- # aggregations results
22
- aggregations = elasticsearch_response['aggregations']
23
- results.aggregations = query.parse_aggregations aggregations if query && aggregations
24
- results
6
+ class Results
7
+
8
+ class << self
9
+ def parse(elasticsearch_response, query=nil)
10
+ documents = elasticsearch_response['hits']['hits'].map{ |hit| Document.parse hit }
11
+ results = new
12
+ results.documents = documents
13
+ # scroll metadata
14
+ results.scroll_id = elasticsearch_response['_scroll_id']
15
+ # cluster metadata
16
+ results.took = elasticsearch_response['took']
17
+ results.timed_out = elasticsearch_response['timed_out']
18
+ # shards metadata
19
+ shards = elasticsearch_response['_shards']
20
+ results.shards = ShardsInfo.new shards['total'], shards['successful'], shards['failed']
21
+ # search metadata
22
+ hits = elasticsearch_response['hits']
23
+ results.hits = HitsInfo.new hits['total'], hits['max_score']
24
+ # aggregations results
25
+ aggregations = elasticsearch_response['aggregations']
26
+ results.aggregations = query.parse_aggregations aggregations if query && aggregations
27
+ results
28
+ end
29
+ alias_method :from_elasticsearch_response, :parse
25
30
  end
26
31
 
32
+ attr_accessor :scroll_id
27
33
  attr_accessor :took, :timed_out
28
34
  attr_accessor :shards # methods: total, successful, failed
29
35
  attr_accessor :hits # methods: total, max_score
30
36
  attr_accessor :documents, :aggregations
31
37
 
32
- def append_results_from(elasticsearch_response)
33
- elasticsearch_response['hits']['hits'].each do |hit|
34
- documents.push Document.from_elasticsearch_hit hit
35
- end
38
+ def append(another_results)
39
+ self.documents = documents + another_results.documents
40
+ self.aggregations = another_results.aggregations if another_results.aggregations
41
+ self.scroll_id = another_results.scroll_id if another_results.scroll_id
36
42
  self
37
43
  end
38
44
 
39
- def sources
40
- documents.map &:source
45
+ def sources(with_ids=true)
46
+ documents.map do |d|
47
+ d.source.tap do |s|
48
+ s[:_id] = d.id if with_ids
49
+ end
50
+ end
51
+ end
52
+
53
+ def ids
54
+ documents.map &:id
55
+ end
56
+
57
+ def count
58
+ documents.count
41
59
  end
42
60
 
43
61
  end
@@ -1,9 +1,19 @@
1
1
  module Elasticated
2
- VERSION = '1.2.1'
2
+ VERSION = '2.0.0'
3
3
  end
4
4
 
5
5
  # Changelog
6
6
 
7
+ # 2.0.0
8
+ # Se renombra la aggregation 'count_distinct' por 'value_count'
9
+ # Se separan los metodos de busqueda (scroll, paginacion normal, scan & scroll) en strategies
10
+ # El metodo de busqueda ahora puede ser especificado por el usuario
11
+ # Se agrega ResumableSearch (utiliza un scroll)
12
+ # Se agregan las opciones 'include_count' y 'compact' a varias aggregations
13
+ # Se elimina la clase CountAggregation (es reemplazable por GroupAggregation)
14
+ # Se agregan las aggregations 'stats', 'missing', 'geohash_grid' y 'safe_date_histogram'
15
+ # Ahora el logger default no imprime por stdout
16
+
7
17
  # 1.2.1
8
18
  # Ahora se optimiza la construcción de QueryConditions cuando sólo tienen una cláusula should con una condición
9
19
 
@@ -144,38 +144,22 @@ module Elasticated
144
144
  expect(agg.build).to eq expected_result
145
145
  end
146
146
 
147
- it "should map a group aggregation response" do
148
- agg = GroupAggregation.new :field
149
- response = { 'buckets' => [
150
- { 'key' => 'value1', 'doc_count' => 'count1' },
151
- { 'key' => 'value2', 'doc_count' => 'count2' },
152
- { 'key' => 'value3', 'doc_count' => 'count3' }
153
- ] }
154
- expect(agg.parse(response)).to eq({
155
- 'value1' => { 'count' => 'count1' },
156
- 'value2' => { 'count' => 'count2' },
157
- 'value3' => { 'count' => 'count3' }
158
- })
159
- end
160
-
161
- end
162
-
163
- describe CountAggregation do
164
-
165
- it "should build a terms aggregation over a field" do
166
- agg = CountAggregation.new :user
167
- expected_result = { terms: { field: :user, size: 0 } }
168
- expect(agg.build).to eq expected_result
169
- end
170
-
171
- it "should build a terms aggregation over a field with custom params" do
172
- agg = CountAggregation.new :user, script: 'your_script'
173
- expected_result = { terms: { field: :user, size: 0, script: 'your_script' } }
174
- expect(agg.build).to eq expected_result
147
+ it "should map a group aggregation response" do
148
+ agg = GroupAggregation.new :field
149
+ response = { 'buckets' => [
150
+ { 'key' => 'value1', 'doc_count' => 'count1' },
151
+ { 'key' => 'value2', 'doc_count' => 'count2' },
152
+ { 'key' => 'value3', 'doc_count' => 'count3' }
153
+ ] }
154
+ expect(agg.parse(response)).to eq({
155
+ 'value1' => { 'count' => 'count1' },
156
+ 'value2' => { 'count' => 'count2' },
157
+ 'value3' => { 'count' => 'count3' }
158
+ })
175
159
  end
176
160
 
177
- it "should map a 'count' aggregation response" do
178
- agg = CountAggregation.new :field
161
+ it "should map a group aggregation response compacted" do
162
+ agg = GroupAggregation.new :field, compact: true
179
163
  response = { 'buckets' => [
180
164
  { 'key' => 'value1', 'doc_count' => 'count1' },
181
165
  { 'key' => 'value2', 'doc_count' => 'count2' },
@@ -190,16 +174,26 @@ module Elasticated
190
174
 
191
175
  end
192
176
 
193
- describe CountDistinctAggregation do
177
+ describe ValueCountAggregation do
194
178
 
195
179
  it "should build a terms aggregation over a field" do
196
- agg = CountDistinctAggregation.new :user
180
+ agg = ValueCountAggregation.new :user
197
181
  expected_result = { value_count: { field: :user } }
198
182
  expect(agg.build).to eq expected_result
199
183
  end
200
184
 
201
185
  end
202
186
 
187
+ describe MissingAggregation do
188
+
189
+ it "should build a missing aggregation over a field" do
190
+ agg = MissingAggregation.new :country
191
+ expected_result = { missing: { field: :country } }
192
+ expect(agg.build).to eq expected_result
193
+ end
194
+
195
+ end
196
+
203
197
  describe HistogramAggregation do
204
198
 
205
199
  it "should build a histogram aggregation" do
@@ -474,6 +468,38 @@ module Elasticated
474
468
  })
475
469
  end
476
470
 
471
+ it "should map a 'range' response compacted" do
472
+ agg = RangeAggregation.new :a_field, compact: true do |a|
473
+ a.ranges do |r|
474
+ r.greater_equal :max_value, 'high_values'
475
+ r.less_equal :min_value
476
+ r.between :min_value, :max_value
477
+ end
478
+ end
479
+ response = {
480
+ 'buckets' => {
481
+ 'high_values' => {
482
+ 'from' => 'max_value',
483
+ 'doc_count' => 5
484
+ },
485
+ 'less_equal_min_value' => {
486
+ 'to' => 'min_value',
487
+ 'doc_count' => 3
488
+ },
489
+ 'between_min_value_and_max_value' => {
490
+ 'from' => 'min_value',
491
+ 'to' => 'max_value',
492
+ 'doc_count' => 9
493
+ }
494
+ }
495
+ }
496
+ expect(agg.parse(response)).to eq({
497
+ 'high_values' => 5,
498
+ 'less_equal_min_value' => 3,
499
+ 'between_min_value_and_max_value' => 9
500
+ })
501
+ end
502
+
477
503
  end
478
504
 
479
505
  describe SumDistinctAggregation do
@@ -12,7 +12,7 @@ module Elasticated
12
12
  end
13
13
 
14
14
  it "should parse an elasticsearch hit" do
15
- document = Document.from_elasticsearch_hit hit
15
+ document = Document.parse hit
16
16
  expect(document.id).to eq 'AU-CLCguwlaKln07OA4x'
17
17
  expect(document.index).to eq 'fbinsights-v6-2015-08'
18
18
  expect(document.type).to eq 'post'
@@ -24,17 +24,17 @@ module Elasticated
24
24
  context "the document source" do
25
25
 
26
26
  it "can be accessed via methods" do
27
- document = Document.from_elasticsearch_hit hit
27
+ document = Document.parse hit
28
28
  expect(document.source.page_id).to eq '127735990588679'
29
29
  end
30
30
 
31
31
  it "can be accessed via key" do
32
- document = Document.from_elasticsearch_hit hit
32
+ document = Document.parse hit
33
33
  expect(document.source['page_id']).to eq '127735990588679'
34
34
  end
35
35
 
36
36
  it "can be accessed via indifferent key" do
37
- document = Document.from_elasticsearch_hit hit
37
+ document = Document.parse hit
38
38
  expect(document.source[:page_id]).to eq '127735990588679'
39
39
  end
40
40
 
@@ -0,0 +1,88 @@
1
+ require_relative 'spec_helper'
2
+
3
+ module Elasticated
4
+ class Repository
5
+ describe IntelligentSearch do
6
+
7
+ let(:repository){ double }
8
+
9
+ before :each do
10
+ allow(repository).to receive(:search_page_size).and_return 1000
11
+ end
12
+
13
+ def search_for(&query_block)
14
+ query = Query.build &query_block
15
+ IntelligentSearch.new repository, query
16
+ end
17
+
18
+ context "on unsorted queries" do
19
+
20
+ it "should select 'scan & scroll' for an unlimited query without offset" do
21
+ search = search_for {}
22
+ expect(search.best_search_method).to be_a ScanScrollSearch
23
+ end
24
+
25
+ it "should select 'single page' for a limited, light-weight query without offset" do
26
+ search = search_for { size 10 }
27
+ expect(search.best_search_method).to be_a SinglePageSearch
28
+ end
29
+
30
+ it "should select 'scan & scroll' for a limited, heavy query without offset" do
31
+ search = search_for { size 9999 }
32
+ expect(search.best_search_method).to be_a ScanScrollSearch
33
+ end
34
+
35
+ it "should select 'normal pagination' for an unlimited query with offset" do
36
+ search = search_for { from 10 }
37
+ expect(search.best_search_method).to be_a NormalSearch
38
+ end
39
+
40
+ it "should select 'single page' for a limited, light-weight query with offset" do
41
+ search = search_for { size 10; from 10 }
42
+ expect(search.best_search_method).to be_a SinglePageSearch
43
+ end
44
+
45
+ it "should select 'normal pagination' for a limited, heavy query with offset" do
46
+ search = search_for { size 9999; from 10 }
47
+ expect(search.best_search_method).to be_a NormalSearch
48
+ end
49
+
50
+ end
51
+
52
+ context "on sorted queries" do
53
+
54
+ it "should select 'scroll' for an unlimited query without offset" do
55
+ search = search_for { sort :field }
56
+ expect(search.best_search_method).to be_a ScrollSearch
57
+ end
58
+
59
+ it "should select 'single page' for a limited, light-weight query without offset" do
60
+ search = search_for { sort :field; size 10 }
61
+ expect(search.best_search_method).to be_a SinglePageSearch
62
+ end
63
+
64
+ it "should select 'scan & scroll' for a limited, heavy query without offset" do
65
+ search = search_for { sort :field; size 9999 }
66
+ expect(search.best_search_method).to be_a ScrollSearch
67
+ end
68
+
69
+ it "should select 'normal pagination' for an unlimited query with offset" do
70
+ search = search_for { sort :field; from 10 }
71
+ expect(search.best_search_method).to be_a NormalSearch
72
+ end
73
+
74
+ it "should select 'single page' for a limited, light-weight query with offset" do
75
+ search = search_for { sort :field; size 10; from 10 }
76
+ expect(search.best_search_method).to be_a SinglePageSearch
77
+ end
78
+
79
+ it "should select 'normal pagination' for a limited, heavy query with offset" do
80
+ search = search_for { sort :field; size 9999; from 10 }
81
+ expect(search.best_search_method).to be_a NormalSearch
82
+ end
83
+
84
+ end
85
+
86
+ end
87
+ end
88
+ end