elasticated 1.2.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +123 -1
  3. data/elasticated.gemspec +1 -0
  4. data/lib/elasticated.rb +18 -3
  5. data/lib/elasticated/aggregations/filter_aggregation.rb +13 -6
  6. data/lib/elasticated/aggregations/geohash_grid_aggregation.rb +28 -0
  7. data/lib/elasticated/aggregations/group_aggregation.rb +16 -7
  8. data/lib/elasticated/aggregations/missing_aggregation.rb +34 -0
  9. data/lib/elasticated/aggregations/range_aggregation.rb +14 -5
  10. data/lib/elasticated/aggregations/safe_date_histogram_aggregation.rb +86 -0
  11. data/lib/elasticated/aggregations/single_value_aggregation.rb +7 -1
  12. data/lib/elasticated/aggregations/stats_aggregation.rb +13 -0
  13. data/lib/elasticated/aggregations/subaggregated.rb +10 -1
  14. data/lib/elasticated/aggregations/top_hits_aggregation.rb +2 -2
  15. data/lib/elasticated/aggregations/{count_distinct_aggregation.rb → value_count_aggregation.rb} +1 -1
  16. data/lib/elasticated/configurable.rb +13 -2
  17. data/lib/elasticated/configuration.rb +6 -0
  18. data/lib/elasticated/document.rb +2 -1
  19. data/lib/elasticated/helpers.rb +18 -0
  20. data/lib/elasticated/loggers/default_logger.rb +27 -0
  21. data/lib/elasticated/loggers/silent_logger.rb +27 -0
  22. data/lib/elasticated/query.rb +8 -0
  23. data/lib/elasticated/query_aggregations.rb +3 -4
  24. data/lib/elasticated/repository.rb +31 -30
  25. data/lib/elasticated/repository/intelligent_search.rb +46 -0
  26. data/lib/elasticated/repository/normal_search.rb +40 -0
  27. data/lib/elasticated/repository/resumable_search.rb +58 -0
  28. data/lib/elasticated/repository/scan_scroll_search.rb +43 -0
  29. data/lib/elasticated/repository/scroll_search.rb +45 -0
  30. data/lib/elasticated/repository/search.rb +45 -0
  31. data/lib/elasticated/repository/single_page_search.rb +13 -0
  32. data/lib/elasticated/results.rb +43 -25
  33. data/lib/version.rb +11 -1
  34. data/spec/aggregation_spec.rb +58 -32
  35. data/spec/document_spec.rb +4 -4
  36. data/spec/intelligent_search_spec.rb +88 -0
  37. data/spec/query_spec.rb +2 -2
  38. data/spec/results_spec.rb +9 -9
  39. metadata +38 -5
  40. data/lib/elasticated/aggregations/count_aggregation.rb +0 -15
  41. data/lib/elasticated/default_logger.rb +0 -27
@@ -0,0 +1,43 @@
1
+ module Elasticated
2
+ class Repository
3
+ class ScanScrollSearch < Search
4
+
5
+ def fetch_aggregations
6
+ body = query.build_for_aggregations
7
+ response = client.search body, opts
8
+ Results.parse response, query
9
+ end
10
+
11
+ def execute
12
+ # aggregations
13
+ results = nil
14
+ if aggregated
15
+ body = query.build_for_aggregations
16
+ response = client.search body, opts
17
+ results = Results.parse response, query
18
+ end
19
+ # search
20
+ body = query.build_for_search
21
+ response = client.search body, opts.merge(search_type: 'scan', scroll: scroll_expiration_time, size: scroll_page_size)
22
+ results = Results.parse response
23
+ results.append fetch_aggregations if aggregated
24
+ doc_count = 0
25
+ loop do
26
+ response = client.scroll results.scroll_id, scroll: scroll_expiration_time
27
+ new_results = Results.parse response
28
+ hits = new_results.documents
29
+ break if hits.empty?
30
+ if query.limited? && (doc_count + hits.count > query._size)
31
+ new_results.documents = hits.first(query._size - doc_count)
32
+ hits = new_results.documents
33
+ end
34
+ doc_count += hits.count
35
+ results = results.append new_results
36
+ break if query.limited? && doc_count >= query._size
37
+ end
38
+ results
39
+ end
40
+
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,45 @@
1
+ module Elasticated
2
+ class Repository
3
+ class ScrollSearch < Search
4
+
5
+ def fetch_aggregations
6
+ body = query.build_for_aggregations
7
+ response = client.search body, opts
8
+ Results.parse response, query
9
+ end
10
+
11
+ def execute
12
+ body = query.build_for_search
13
+ size = scroll_page_size
14
+
15
+ override! body
16
+ response = client.search body, opts.merge(scroll: scroll_expiration_time, size: scroll_page_size)
17
+ results = Results.parse response, query
18
+ results.append fetch_aggregations if aggregated
19
+
20
+ target_size = query.limited? ? query._size : results.hits.total
21
+ total_pages = (target_size / size.to_f).ceil
22
+
23
+ current_page = 1
24
+
25
+ loop do
26
+ break if current_page >= total_pages
27
+
28
+ response = client.scroll results.scroll_id, scroll: scroll_expiration_time
29
+ new_results = Results.parse response
30
+ hits = new_results.documents
31
+ doc_count = results.documents.count
32
+ new_results.documents = hits.first(target_size - doc_count) if query.limited? && (doc_count + hits.count > target_size)
33
+ results.append new_results
34
+
35
+ break if new_results.documents.count < size
36
+
37
+ current_page += 1
38
+ end
39
+
40
+ results
41
+ end
42
+
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,45 @@
1
+ module Elasticated
2
+ class Repository
3
+ class Search
4
+
5
+ # abstract class
6
+ # child must implement 'execute()'
7
+
8
+ attr_accessor :repository, :query, :aggregated, :opts
9
+
10
+ def initialize(repository, query, aggregated=false, opts={})
11
+ self.repository = repository
12
+ self.query = query
13
+ self.aggregated = aggregated
14
+ self.opts = opts
15
+ end
16
+
17
+ protected
18
+
19
+ def override!(body, size=nil, from=nil)
20
+ size ? body.merge!(size: size) : body.delete(:size)
21
+ from ? body.merge!(from: from) : body.delete(:from)
22
+ body
23
+ end
24
+
25
+ # misc getters
26
+
27
+ def client
28
+ repository.client
29
+ end
30
+
31
+ def search_page_size
32
+ repository.search_page_size
33
+ end
34
+
35
+ def scroll_expiration_time
36
+ repository.scroll_expiration_time
37
+ end
38
+
39
+ def scroll_page_size
40
+ repository.scroll_page_size
41
+ end
42
+
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,13 @@
1
+ module Elasticated
2
+ class Repository
3
+ class SinglePageSearch < Search
4
+
5
+ def execute
6
+ body = aggregated ? query.build_for_aggregated_search : query.build_for_search
7
+ response = client.search body, opts
8
+ Results.parse response, query
9
+ end
10
+
11
+ end
12
+ end
13
+ end
@@ -3,41 +3,59 @@ module Elasticated
3
3
  ShardsInfo = Struct.new :total, :successful, :failed
4
4
  HitsInfo = Struct.new :total, :max_score
5
5
 
6
- class Results < Array
7
-
8
- def self.from_elasticsearch_response(elasticsearch_response, query=nil)
9
- documents = elasticsearch_response['hits']['hits'].map{ |hit| Document.from_elasticsearch_hit hit }
10
- results = new documents
11
- results.documents = documents
12
- # cluster metadata
13
- results.took = elasticsearch_response['took']
14
- results.timed_out = elasticsearch_response['timed_out']
15
- # shards metadata
16
- shards = elasticsearch_response['_shards']
17
- results.shards = ShardsInfo.new shards['total'], shards['successful'], shards['failed']
18
- # search metadata
19
- hits = elasticsearch_response['hits']
20
- results.hits = HitsInfo.new hits['total'], hits['max_score']
21
- # aggregations results
22
- aggregations = elasticsearch_response['aggregations']
23
- results.aggregations = query.parse_aggregations aggregations if query && aggregations
24
- results
6
+ class Results
7
+
8
+ class << self
9
+ def parse(elasticsearch_response, query=nil)
10
+ documents = elasticsearch_response['hits']['hits'].map{ |hit| Document.parse hit }
11
+ results = new
12
+ results.documents = documents
13
+ # scroll metadata
14
+ results.scroll_id = elasticsearch_response['_scroll_id']
15
+ # cluster metadata
16
+ results.took = elasticsearch_response['took']
17
+ results.timed_out = elasticsearch_response['timed_out']
18
+ # shards metadata
19
+ shards = elasticsearch_response['_shards']
20
+ results.shards = ShardsInfo.new shards['total'], shards['successful'], shards['failed']
21
+ # search metadata
22
+ hits = elasticsearch_response['hits']
23
+ results.hits = HitsInfo.new hits['total'], hits['max_score']
24
+ # aggregations results
25
+ aggregations = elasticsearch_response['aggregations']
26
+ results.aggregations = query.parse_aggregations aggregations if query && aggregations
27
+ results
28
+ end
29
+ alias_method :from_elasticsearch_response, :parse
25
30
  end
26
31
 
32
+ attr_accessor :scroll_id
27
33
  attr_accessor :took, :timed_out
28
34
  attr_accessor :shards # methods: total, successful, failed
29
35
  attr_accessor :hits # methods: total, max_score
30
36
  attr_accessor :documents, :aggregations
31
37
 
32
- def append_results_from(elasticsearch_response)
33
- elasticsearch_response['hits']['hits'].each do |hit|
34
- documents.push Document.from_elasticsearch_hit hit
35
- end
38
+ def append(another_results)
39
+ self.documents = documents + another_results.documents
40
+ self.aggregations = another_results.aggregations if another_results.aggregations
41
+ self.scroll_id = another_results.scroll_id if another_results.scroll_id
36
42
  self
37
43
  end
38
44
 
39
- def sources
40
- documents.map &:source
45
+ def sources(with_ids=true)
46
+ documents.map do |d|
47
+ d.source.tap do |s|
48
+ s[:_id] = d.id if with_ids
49
+ end
50
+ end
51
+ end
52
+
53
+ def ids
54
+ documents.map &:id
55
+ end
56
+
57
+ def count
58
+ documents.count
41
59
  end
42
60
 
43
61
  end
@@ -1,9 +1,19 @@
1
1
  module Elasticated
2
- VERSION = '1.2.1'
2
+ VERSION = '2.0.0'
3
3
  end
4
4
 
5
5
  # Changelog
6
6
 
7
+ # 2.0.0
8
+ # Se renombra la aggregation 'count_distinct' por 'value_count'
9
+ # Se separan los metodos de busqueda (scroll, paginacion normal, scan & scroll) en strategies
10
+ # El metodo de busqueda ahora puede ser especificado por el usuario
11
+ # Se agrega ResumableSearch (utiliza un scroll)
12
+ # Se agregan las opciones 'include_count' y 'compact' a varias aggregations
13
+ # Se elimina la clase CountAggregation (es reemplazable por GroupAggregation)
14
+ # Se agregan las aggregations 'stats', 'missing', 'geohash_grid' y 'safe_date_histogram'
15
+ # Ahora el logger default no imprime por stdout
16
+
7
17
  # 1.2.1
8
18
  # Ahora se optimiza la construcción de QueryConditions cuando sólo tienen una cláusula should con una condición
9
19
 
@@ -144,38 +144,22 @@ module Elasticated
144
144
  expect(agg.build).to eq expected_result
145
145
  end
146
146
 
147
- it "should map a group aggregation response" do
148
- agg = GroupAggregation.new :field
149
- response = { 'buckets' => [
150
- { 'key' => 'value1', 'doc_count' => 'count1' },
151
- { 'key' => 'value2', 'doc_count' => 'count2' },
152
- { 'key' => 'value3', 'doc_count' => 'count3' }
153
- ] }
154
- expect(agg.parse(response)).to eq({
155
- 'value1' => { 'count' => 'count1' },
156
- 'value2' => { 'count' => 'count2' },
157
- 'value3' => { 'count' => 'count3' }
158
- })
159
- end
160
-
161
- end
162
-
163
- describe CountAggregation do
164
-
165
- it "should build a terms aggregation over a field" do
166
- agg = CountAggregation.new :user
167
- expected_result = { terms: { field: :user, size: 0 } }
168
- expect(agg.build).to eq expected_result
169
- end
170
-
171
- it "should build a terms aggregation over a field with custom params" do
172
- agg = CountAggregation.new :user, script: 'your_script'
173
- expected_result = { terms: { field: :user, size: 0, script: 'your_script' } }
174
- expect(agg.build).to eq expected_result
147
+ it "should map a group aggregation response" do
148
+ agg = GroupAggregation.new :field
149
+ response = { 'buckets' => [
150
+ { 'key' => 'value1', 'doc_count' => 'count1' },
151
+ { 'key' => 'value2', 'doc_count' => 'count2' },
152
+ { 'key' => 'value3', 'doc_count' => 'count3' }
153
+ ] }
154
+ expect(agg.parse(response)).to eq({
155
+ 'value1' => { 'count' => 'count1' },
156
+ 'value2' => { 'count' => 'count2' },
157
+ 'value3' => { 'count' => 'count3' }
158
+ })
175
159
  end
176
160
 
177
- it "should map a 'count' aggregation response" do
178
- agg = CountAggregation.new :field
161
+ it "should map a group aggregation response compacted" do
162
+ agg = GroupAggregation.new :field, compact: true
179
163
  response = { 'buckets' => [
180
164
  { 'key' => 'value1', 'doc_count' => 'count1' },
181
165
  { 'key' => 'value2', 'doc_count' => 'count2' },
@@ -190,16 +174,26 @@ module Elasticated
190
174
 
191
175
  end
192
176
 
193
- describe CountDistinctAggregation do
177
+ describe ValueCountAggregation do
194
178
 
195
179
  it "should build a terms aggregation over a field" do
196
- agg = CountDistinctAggregation.new :user
180
+ agg = ValueCountAggregation.new :user
197
181
  expected_result = { value_count: { field: :user } }
198
182
  expect(agg.build).to eq expected_result
199
183
  end
200
184
 
201
185
  end
202
186
 
187
+ describe MissingAggregation do
188
+
189
+ it "should build a missing aggregation over a field" do
190
+ agg = MissingAggregation.new :country
191
+ expected_result = { missing: { field: :country } }
192
+ expect(agg.build).to eq expected_result
193
+ end
194
+
195
+ end
196
+
203
197
  describe HistogramAggregation do
204
198
 
205
199
  it "should build a histogram aggregation" do
@@ -474,6 +468,38 @@ module Elasticated
474
468
  })
475
469
  end
476
470
 
471
+ it "should map a 'range' response compacted" do
472
+ agg = RangeAggregation.new :a_field, compact: true do |a|
473
+ a.ranges do |r|
474
+ r.greater_equal :max_value, 'high_values'
475
+ r.less_equal :min_value
476
+ r.between :min_value, :max_value
477
+ end
478
+ end
479
+ response = {
480
+ 'buckets' => {
481
+ 'high_values' => {
482
+ 'from' => 'max_value',
483
+ 'doc_count' => 5
484
+ },
485
+ 'less_equal_min_value' => {
486
+ 'to' => 'min_value',
487
+ 'doc_count' => 3
488
+ },
489
+ 'between_min_value_and_max_value' => {
490
+ 'from' => 'min_value',
491
+ 'to' => 'max_value',
492
+ 'doc_count' => 9
493
+ }
494
+ }
495
+ }
496
+ expect(agg.parse(response)).to eq({
497
+ 'high_values' => 5,
498
+ 'less_equal_min_value' => 3,
499
+ 'between_min_value_and_max_value' => 9
500
+ })
501
+ end
502
+
477
503
  end
478
504
 
479
505
  describe SumDistinctAggregation do
@@ -12,7 +12,7 @@ module Elasticated
12
12
  end
13
13
 
14
14
  it "should parse an elasticsearch hit" do
15
- document = Document.from_elasticsearch_hit hit
15
+ document = Document.parse hit
16
16
  expect(document.id).to eq 'AU-CLCguwlaKln07OA4x'
17
17
  expect(document.index).to eq 'fbinsights-v6-2015-08'
18
18
  expect(document.type).to eq 'post'
@@ -24,17 +24,17 @@ module Elasticated
24
24
  context "the document source" do
25
25
 
26
26
  it "can be accessed via methods" do
27
- document = Document.from_elasticsearch_hit hit
27
+ document = Document.parse hit
28
28
  expect(document.source.page_id).to eq '127735990588679'
29
29
  end
30
30
 
31
31
  it "can be accessed via key" do
32
- document = Document.from_elasticsearch_hit hit
32
+ document = Document.parse hit
33
33
  expect(document.source['page_id']).to eq '127735990588679'
34
34
  end
35
35
 
36
36
  it "can be accessed via indifferent key" do
37
- document = Document.from_elasticsearch_hit hit
37
+ document = Document.parse hit
38
38
  expect(document.source[:page_id]).to eq '127735990588679'
39
39
  end
40
40
 
@@ -0,0 +1,88 @@
1
+ require_relative 'spec_helper'
2
+
3
+ module Elasticated
4
+ class Repository
5
+ describe IntelligentSearch do
6
+
7
+ let(:repository){ double }
8
+
9
+ before :each do
10
+ allow(repository).to receive(:search_page_size).and_return 1000
11
+ end
12
+
13
+ def search_for(&query_block)
14
+ query = Query.build &query_block
15
+ IntelligentSearch.new repository, query
16
+ end
17
+
18
+ context "on unsorted queries" do
19
+
20
+ it "should select 'scan & scroll' for an unlimited query without offset" do
21
+ search = search_for {}
22
+ expect(search.best_search_method).to be_a ScanScrollSearch
23
+ end
24
+
25
+ it "should select 'single page' for a limited, light-weight query without offset" do
26
+ search = search_for { size 10 }
27
+ expect(search.best_search_method).to be_a SinglePageSearch
28
+ end
29
+
30
+ it "should select 'scan & scroll' for a limited, heavy query without offset" do
31
+ search = search_for { size 9999 }
32
+ expect(search.best_search_method).to be_a ScanScrollSearch
33
+ end
34
+
35
+ it "should select 'normal pagination' for an unlimited query with offset" do
36
+ search = search_for { from 10 }
37
+ expect(search.best_search_method).to be_a NormalSearch
38
+ end
39
+
40
+ it "should select 'single page' for a limited, light-weight query with offset" do
41
+ search = search_for { size 10; from 10 }
42
+ expect(search.best_search_method).to be_a SinglePageSearch
43
+ end
44
+
45
+ it "should select 'normal pagination' for a limited, heavy query with offset" do
46
+ search = search_for { size 9999; from 10 }
47
+ expect(search.best_search_method).to be_a NormalSearch
48
+ end
49
+
50
+ end
51
+
52
+ context "on sorted queries" do
53
+
54
+ it "should select 'scroll' for an unlimited query without offset" do
55
+ search = search_for { sort :field }
56
+ expect(search.best_search_method).to be_a ScrollSearch
57
+ end
58
+
59
+ it "should select 'single page' for a limited, light-weight query without offset" do
60
+ search = search_for { sort :field; size 10 }
61
+ expect(search.best_search_method).to be_a SinglePageSearch
62
+ end
63
+
64
+ it "should select 'scan & scroll' for a limited, heavy query without offset" do
65
+ search = search_for { sort :field; size 9999 }
66
+ expect(search.best_search_method).to be_a ScrollSearch
67
+ end
68
+
69
+ it "should select 'normal pagination' for an unlimited query with offset" do
70
+ search = search_for { sort :field; from 10 }
71
+ expect(search.best_search_method).to be_a NormalSearch
72
+ end
73
+
74
+ it "should select 'single page' for a limited, light-weight query with offset" do
75
+ search = search_for { sort :field; size 10; from 10 }
76
+ expect(search.best_search_method).to be_a SinglePageSearch
77
+ end
78
+
79
+ it "should select 'normal pagination' for a limited, heavy query with offset" do
80
+ search = search_for { sort :field; size 9999; from 10 }
81
+ expect(search.best_search_method).to be_a NormalSearch
82
+ end
83
+
84
+ end
85
+
86
+ end
87
+ end
88
+ end