elasticsearch_scanner 0.1.2 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2bb608be40ca3681cdab9f457f157eef0c0a1e84271ced6a56246425f651a4b2
4
- data.tar.gz: dd0390e0fbe9ca0a44f12c45140590be3204bc11bc31aceffc03529248fa0a3f
3
+ metadata.gz: c3c22567dd2652dffd476fd40ed23cc43ec73e07edccf0290c650937333a0f76
4
+ data.tar.gz: e596d443918838c965d52cbc51580f86a850aa398c00cb38685324acecad1815
5
5
  SHA512:
6
- metadata.gz: 27259fcf1dd1871a13b54b93e4b4f2fbae6755f93569d5085bd1460b48f4e42711c290882c16ef9d9399e3cbf70ede8f61ae1b41833d104e69f9b6e96a72365d
7
- data.tar.gz: de0c6f9087181f1337a28cbf58b06503ad1a3062737b91e602552e815cbc02ae591021f77ff191a02af6362fdb14548ec09a2b29a516cc98e741966d272b0d20
6
+ metadata.gz: a5c3f8f2e2b0754f4a6ffbda29eb500731d2ca3a44bb6935f43246bd831f60fd0570863840431df7a82624b12e9a7f042cd9f1a1cf38371bba5a270235e34699
7
+ data.tar.gz: 6e1c4eae77bbf3dee3994e7fa10ec036fc92e00750e8dca985db84b6a53f26cebac00d08eda26eeb6e758f29bb59be9c1e3da85c624a4d84603765a9606f8661
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'elasticsearch_scanner'
5
- s.version = '0.1.2'
5
+ s.version = '0.1.5'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'ElasticSearch index scanner'
8
8
  s.description = 'Iterates over the entire index'
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ class ElasticSearchFacetScanner
4
+ include Enumerable
5
+
6
+ SCROLL_PATH = '/_search/scroll'
7
+
8
+ attr_reader :total_request_time,
9
+ :total_elasticsearch_time
10
+
11
+ def initialize(url, field, size=100, max_retries = 5)
12
+ @url = url
13
+ @field = field
14
+ @aggregation_name = "#{field}_terms"
15
+ @size = size
16
+ @max_retries = max_retries
17
+ @has_more = true
18
+ @total_request_time = 0.0
19
+ @total_elasticsearch_time = 0.0
20
+ end
21
+
22
+ def each_batch
23
+ yield search
24
+
25
+ while has_more?
26
+ yield search
27
+ end
28
+
29
+ nil
30
+ end
31
+
32
+ def each
33
+ each_batch do |results|
34
+ results.each do |result|
35
+ yield result
36
+ end
37
+ end
38
+ end
39
+
40
+ def has_more?
41
+ @has_more
42
+ end
43
+
44
+ private
45
+
46
+ def search_payload
47
+ {
48
+ size: 0,
49
+ aggs: {
50
+ @aggregation_name => {
51
+ terms: {
52
+ field: @field,
53
+ size: @size,
54
+ order: {
55
+ _term: :asc
56
+ }
57
+ }
58
+ }
59
+ }
60
+ }
61
+ end
62
+
63
+ def search_payload_with_range
64
+ search_payload.merge(
65
+ query: {
66
+ range: {
67
+ @field => {
68
+ gt: @field_max_value
69
+ }
70
+ }
71
+ }
72
+ )
73
+ end
74
+
75
+ def search
76
+ uri = URI(@url)
77
+ http = Net::HTTP.new(uri.host, uri.port)
78
+ req = Net::HTTP::Post.new(uri.request_uri, 'Content-Type' => 'application/json')
79
+ req.body = (@field_max_value ? search_payload_with_range : search_payload).to_json
80
+
81
+ make_request(http, req)
82
+ end
83
+
84
+ def make_request(http, req)
85
+ request_started_at = Time.now
86
+ res = http.request(req)
87
+ @total_request_time += (Time.now - request_started_at)
88
+ data = JSON.parse(res.body)
89
+ @total_elasticsearch_time += data['took']
90
+ aggregation = data['aggregations'][@aggregation_name]
91
+ @has_more = aggregation['sum_other_doc_count'] > 0
92
+ @field_max_value = aggregation['buckets'].size > 0 ? aggregation['buckets'].last['key'] : nil
93
+ aggregation['buckets']
94
+ rescue Net::ReadTimeout => e
95
+ attempts ||= 0
96
+ attempts += 1
97
+ if attempts < @max_retries
98
+ sleep([0.1, 0.2, 0.4, 1.0][attempts-1] || 1.0)
99
+ retry
100
+ end
101
+ raise e
102
+ end
103
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elasticsearch_scanner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-09-19 00:00:00.000000000 Z
11
+ date: 2019-10-02 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Iterates over the entire index
14
14
  email: dougyouch@gmail.com
@@ -20,6 +20,7 @@ files:
20
20
  - LICENSE.txt
21
21
  - README.md
22
22
  - elasticsearch_scanner.gemspec
23
+ - lib/elasticsearch_facet_scanner.rb
23
24
  - lib/elasticsearch_scanner.rb
24
25
  homepage: https://github.com/dougyouch/elasticsearch_scanner
25
26
  licenses: