elasticsearch_scanner 0.1.2 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2bb608be40ca3681cdab9f457f157eef0c0a1e84271ced6a56246425f651a4b2
4
- data.tar.gz: dd0390e0fbe9ca0a44f12c45140590be3204bc11bc31aceffc03529248fa0a3f
3
+ metadata.gz: c3c22567dd2652dffd476fd40ed23cc43ec73e07edccf0290c650937333a0f76
4
+ data.tar.gz: e596d443918838c965d52cbc51580f86a850aa398c00cb38685324acecad1815
5
5
  SHA512:
6
- metadata.gz: 27259fcf1dd1871a13b54b93e4b4f2fbae6755f93569d5085bd1460b48f4e42711c290882c16ef9d9399e3cbf70ede8f61ae1b41833d104e69f9b6e96a72365d
7
- data.tar.gz: de0c6f9087181f1337a28cbf58b06503ad1a3062737b91e602552e815cbc02ae591021f77ff191a02af6362fdb14548ec09a2b29a516cc98e741966d272b0d20
6
+ metadata.gz: a5c3f8f2e2b0754f4a6ffbda29eb500731d2ca3a44bb6935f43246bd831f60fd0570863840431df7a82624b12e9a7f042cd9f1a1cf38371bba5a270235e34699
7
+ data.tar.gz: 6e1c4eae77bbf3dee3994e7fa10ec036fc92e00750e8dca985db84b6a53f26cebac00d08eda26eeb6e758f29bb59be9c1e3da85c624a4d84603765a9606f8661
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'elasticsearch_scanner'
5
- s.version = '0.1.2'
5
+ s.version = '0.1.5'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'ElasticSearch index scanner'
8
8
  s.description = 'Iterates over the entire index'
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ class ElasticSearchFacetScanner
4
+ include Enumerable
5
+
6
+ SCROLL_PATH = '/_search/scroll'
7
+
8
+ attr_reader :total_request_time,
9
+ :total_elasticsearch_time
10
+
11
+ def initialize(url, field, size=100, max_retries = 5)
12
+ @url = url
13
+ @field = field
14
+ @aggregation_name = "#{field}_terms"
15
+ @size = size
16
+ @max_retries = max_retries
17
+ @has_more = true
18
+ @total_request_time = 0.0
19
+ @total_elasticsearch_time = 0.0
20
+ end
21
+
22
+ def each_batch
23
+ yield search
24
+
25
+ while has_more?
26
+ yield search
27
+ end
28
+
29
+ nil
30
+ end
31
+
32
+ def each
33
+ each_batch do |results|
34
+ results.each do |result|
35
+ yield result
36
+ end
37
+ end
38
+ end
39
+
40
+ def has_more?
41
+ @has_more
42
+ end
43
+
44
+ private
45
+
46
+ def search_payload
47
+ {
48
+ size: 0,
49
+ aggs: {
50
+ @aggregation_name => {
51
+ terms: {
52
+ field: @field,
53
+ size: @size,
54
+ order: {
55
+ _term: :asc
56
+ }
57
+ }
58
+ }
59
+ }
60
+ }
61
+ end
62
+
63
+ def search_payload_with_range
64
+ search_payload.merge(
65
+ query: {
66
+ range: {
67
+ @field => {
68
+ gt: @field_max_value
69
+ }
70
+ }
71
+ }
72
+ )
73
+ end
74
+
75
+ def search
76
+ uri = URI(@url)
77
+ http = Net::HTTP.new(uri.host, uri.port)
78
+ req = Net::HTTP::Post.new(uri.request_uri, 'Content-Type' => 'application/json')
79
+ req.body = (@field_max_value ? search_payload_with_range : search_payload).to_json
80
+
81
+ make_request(http, req)
82
+ end
83
+
84
+ def make_request(http, req)
85
+ request_started_at = Time.now
86
+ res = http.request(req)
87
+ @total_request_time += (Time.now - request_started_at)
88
+ data = JSON.parse(res.body)
89
+ @total_elasticsearch_time += data['took']
90
+ aggregation = data['aggregations'][@aggregation_name]
91
+ @has_more = aggregation['sum_other_doc_count'] > 0
92
+ @field_max_value = aggregation['buckets'].size > 0 ? aggregation['buckets'].last['key'] : nil
93
+ aggregation['buckets']
94
+ rescue Net::ReadTimeout => e
95
+ attempts ||= 0
96
+ attempts += 1
97
+ if attempts < @max_retries
98
+ sleep([0.1, 0.2, 0.4, 1.0][attempts-1] || 1.0)
99
+ retry
100
+ end
101
+ raise e
102
+ end
103
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elasticsearch_scanner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-09-19 00:00:00.000000000 Z
11
+ date: 2019-10-02 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Iterates over the entire index
14
14
  email: dougyouch@gmail.com
@@ -20,6 +20,7 @@ files:
20
20
  - LICENSE.txt
21
21
  - README.md
22
22
  - elasticsearch_scanner.gemspec
23
+ - lib/elasticsearch_facet_scanner.rb
23
24
  - lib/elasticsearch_scanner.rb
24
25
  homepage: https://github.com/dougyouch/elasticsearch_scanner
25
26
  licenses: