elasticsearch_scanner 0.1.2 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/elasticsearch_scanner.gemspec +1 -1
- data/lib/elasticsearch_facet_scanner.rb +103 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c3c22567dd2652dffd476fd40ed23cc43ec73e07edccf0290c650937333a0f76
|
4
|
+
data.tar.gz: e596d443918838c965d52cbc51580f86a850aa398c00cb38685324acecad1815
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a5c3f8f2e2b0754f4a6ffbda29eb500731d2ca3a44bb6935f43246bd831f60fd0570863840431df7a82624b12e9a7f042cd9f1a1cf38371bba5a270235e34699
|
7
|
+
data.tar.gz: 6e1c4eae77bbf3dee3994e7fa10ec036fc92e00750e8dca985db84b6a53f26cebac00d08eda26eeb6e758f29bb59be9c1e3da85c624a4d84603765a9606f8661
|
@@ -0,0 +1,103 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class ElasticSearchFacetScanner
|
4
|
+
include Enumerable
|
5
|
+
|
6
|
+
SCROLL_PATH = '/_search/scroll'
|
7
|
+
|
8
|
+
attr_reader :total_request_time,
|
9
|
+
:total_elasticsearch_time
|
10
|
+
|
11
|
+
def initialize(url, field, size=100, max_retries = 5)
|
12
|
+
@url = url
|
13
|
+
@field = field
|
14
|
+
@aggregation_name = "#{field}_terms"
|
15
|
+
@size = size
|
16
|
+
@max_retries = max_retries
|
17
|
+
@has_more = true
|
18
|
+
@total_request_time = 0.0
|
19
|
+
@total_elasticsearch_time = 0.0
|
20
|
+
end
|
21
|
+
|
22
|
+
def each_batch
|
23
|
+
yield search
|
24
|
+
|
25
|
+
while has_more?
|
26
|
+
yield search
|
27
|
+
end
|
28
|
+
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
|
32
|
+
def each
|
33
|
+
each_batch do |results|
|
34
|
+
results.each do |result|
|
35
|
+
yield result
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def has_more?
|
41
|
+
@has_more
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
def search_payload
|
47
|
+
{
|
48
|
+
size: 0,
|
49
|
+
aggs: {
|
50
|
+
@aggregation_name => {
|
51
|
+
terms: {
|
52
|
+
field: @field,
|
53
|
+
size: @size,
|
54
|
+
order: {
|
55
|
+
_term: :asc
|
56
|
+
}
|
57
|
+
}
|
58
|
+
}
|
59
|
+
}
|
60
|
+
}
|
61
|
+
end
|
62
|
+
|
63
|
+
def search_payload_with_range
|
64
|
+
search_payload.merge(
|
65
|
+
query: {
|
66
|
+
range: {
|
67
|
+
@field => {
|
68
|
+
gt: @field_max_value
|
69
|
+
}
|
70
|
+
}
|
71
|
+
}
|
72
|
+
)
|
73
|
+
end
|
74
|
+
|
75
|
+
def search
|
76
|
+
uri = URI(@url)
|
77
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
78
|
+
req = Net::HTTP::Post.new(uri.request_uri, 'Content-Type' => 'application/json')
|
79
|
+
req.body = (@field_max_value ? search_payload_with_range : search_payload).to_json
|
80
|
+
|
81
|
+
make_request(http, req)
|
82
|
+
end
|
83
|
+
|
84
|
+
def make_request(http, req)
|
85
|
+
request_started_at = Time.now
|
86
|
+
res = http.request(req)
|
87
|
+
@total_request_time += (Time.now - request_started_at)
|
88
|
+
data = JSON.parse(res.body)
|
89
|
+
@total_elasticsearch_time += data['took']
|
90
|
+
aggregation = data['aggregations'][@aggregation_name]
|
91
|
+
@has_more = aggregation['sum_other_doc_count'] > 0
|
92
|
+
@field_max_value = aggregation['buckets'].size > 0 ? aggregation['buckets'].last['key'] : nil
|
93
|
+
aggregation['buckets']
|
94
|
+
rescue Net::ReadTimeout => e
|
95
|
+
attempts ||= 0
|
96
|
+
attempts += 1
|
97
|
+
if attempts < @max_retries
|
98
|
+
sleep([0.1, 0.2, 0.4, 1.0][attempts-1] || 1.0)
|
99
|
+
retry
|
100
|
+
end
|
101
|
+
raise e
|
102
|
+
end
|
103
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elasticsearch_scanner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Doug Youch
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-10-02 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Iterates over the entire index
|
14
14
|
email: dougyouch@gmail.com
|
@@ -20,6 +20,7 @@ files:
|
|
20
20
|
- LICENSE.txt
|
21
21
|
- README.md
|
22
22
|
- elasticsearch_scanner.gemspec
|
23
|
+
- lib/elasticsearch_facet_scanner.rb
|
23
24
|
- lib/elasticsearch_scanner.rb
|
24
25
|
homepage: https://github.com/dougyouch/elasticsearch_scanner
|
25
26
|
licenses:
|