elasticsearch_scanner 0.1.2 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/elasticsearch_scanner.gemspec +1 -1
- data/lib/elasticsearch_facet_scanner.rb +103 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c3c22567dd2652dffd476fd40ed23cc43ec73e07edccf0290c650937333a0f76
|
4
|
+
data.tar.gz: e596d443918838c965d52cbc51580f86a850aa398c00cb38685324acecad1815
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a5c3f8f2e2b0754f4a6ffbda29eb500731d2ca3a44bb6935f43246bd831f60fd0570863840431df7a82624b12e9a7f042cd9f1a1cf38371bba5a270235e34699
|
7
|
+
data.tar.gz: 6e1c4eae77bbf3dee3994e7fa10ec036fc92e00750e8dca985db84b6a53f26cebac00d08eda26eeb6e758f29bb59be9c1e3da85c624a4d84603765a9606f8661
|
@@ -0,0 +1,103 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class ElasticSearchFacetScanner
|
4
|
+
include Enumerable
|
5
|
+
|
6
|
+
SCROLL_PATH = '/_search/scroll'
|
7
|
+
|
8
|
+
attr_reader :total_request_time,
|
9
|
+
:total_elasticsearch_time
|
10
|
+
|
11
|
+
def initialize(url, field, size=100, max_retries = 5)
|
12
|
+
@url = url
|
13
|
+
@field = field
|
14
|
+
@aggregation_name = "#{field}_terms"
|
15
|
+
@size = size
|
16
|
+
@max_retries = max_retries
|
17
|
+
@has_more = true
|
18
|
+
@total_request_time = 0.0
|
19
|
+
@total_elasticsearch_time = 0.0
|
20
|
+
end
|
21
|
+
|
22
|
+
def each_batch
|
23
|
+
yield search
|
24
|
+
|
25
|
+
while has_more?
|
26
|
+
yield search
|
27
|
+
end
|
28
|
+
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
|
32
|
+
def each
|
33
|
+
each_batch do |results|
|
34
|
+
results.each do |result|
|
35
|
+
yield result
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def has_more?
|
41
|
+
@has_more
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
def search_payload
|
47
|
+
{
|
48
|
+
size: 0,
|
49
|
+
aggs: {
|
50
|
+
@aggregation_name => {
|
51
|
+
terms: {
|
52
|
+
field: @field,
|
53
|
+
size: @size,
|
54
|
+
order: {
|
55
|
+
_term: :asc
|
56
|
+
}
|
57
|
+
}
|
58
|
+
}
|
59
|
+
}
|
60
|
+
}
|
61
|
+
end
|
62
|
+
|
63
|
+
def search_payload_with_range
|
64
|
+
search_payload.merge(
|
65
|
+
query: {
|
66
|
+
range: {
|
67
|
+
@field => {
|
68
|
+
gt: @field_max_value
|
69
|
+
}
|
70
|
+
}
|
71
|
+
}
|
72
|
+
)
|
73
|
+
end
|
74
|
+
|
75
|
+
def search
|
76
|
+
uri = URI(@url)
|
77
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
78
|
+
req = Net::HTTP::Post.new(uri.request_uri, 'Content-Type' => 'application/json')
|
79
|
+
req.body = (@field_max_value ? search_payload_with_range : search_payload).to_json
|
80
|
+
|
81
|
+
make_request(http, req)
|
82
|
+
end
|
83
|
+
|
84
|
+
def make_request(http, req)
|
85
|
+
request_started_at = Time.now
|
86
|
+
res = http.request(req)
|
87
|
+
@total_request_time += (Time.now - request_started_at)
|
88
|
+
data = JSON.parse(res.body)
|
89
|
+
@total_elasticsearch_time += data['took']
|
90
|
+
aggregation = data['aggregations'][@aggregation_name]
|
91
|
+
@has_more = aggregation['sum_other_doc_count'] > 0
|
92
|
+
@field_max_value = aggregation['buckets'].size > 0 ? aggregation['buckets'].last['key'] : nil
|
93
|
+
aggregation['buckets']
|
94
|
+
rescue Net::ReadTimeout => e
|
95
|
+
attempts ||= 0
|
96
|
+
attempts += 1
|
97
|
+
if attempts < @max_retries
|
98
|
+
sleep([0.1, 0.2, 0.4, 1.0][attempts-1] || 1.0)
|
99
|
+
retry
|
100
|
+
end
|
101
|
+
raise e
|
102
|
+
end
|
103
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elasticsearch_scanner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Doug Youch
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-10-02 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Iterates over the entire index
|
14
14
|
email: dougyouch@gmail.com
|
@@ -20,6 +20,7 @@ files:
|
|
20
20
|
- LICENSE.txt
|
21
21
|
- README.md
|
22
22
|
- elasticsearch_scanner.gemspec
|
23
|
+
- lib/elasticsearch_facet_scanner.rb
|
23
24
|
- lib/elasticsearch_scanner.rb
|
24
25
|
homepage: https://github.com/dougyouch/elasticsearch_scanner
|
25
26
|
licenses:
|