elasticsearch_scanner 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 0c430016b9eba6aecdffecc2da894a83735979d58c74f0a0f6f1142de91db67f
4
+ data.tar.gz: 4d0279810b2b5306524101d7dbd5ee70c2ce63f7ddda5b155019b60d9bd3ded3
5
+ SHA512:
6
+ metadata.gz: 7eb2d0443e03e24ff1d9021ee6e1883069e6281e57a22416b427fa9efc75b7c3ecb443112da319d82b11c44d10a86b1307dc9cdb423512e66c7542ea510709f6
7
+ data.tar.gz: 015d278b62b07893b32383d8b6d3b286ac6e968ca203b01c920588c8832d9a5127eecda4ef5461d9de62d987c46dee55a2186467424840500bb6cf9464cca530
data/.gitignore ADDED
@@ -0,0 +1,51 @@
1
+ # rcov generated
2
+ coverage
3
+ coverage.data
4
+
5
+ # rdoc generated
6
+ rdoc
7
+
8
+ # yard generated
9
+ doc
10
+ .yardoc
11
+
12
+ # bundler
13
+ .bundle
14
+
15
+ # jeweler generated
16
+ pkg
17
+
18
+ # Have editor/IDE/OS specific files you need to ignore? Consider using a global gitignore:
19
+ #
20
+ # * Create a file at ~/.gitignore
21
+ # * Include files you want ignored
22
+ # * Run: git config --global core.excludesfile ~/.gitignore
23
+ #
24
+ # After doing this, these files will be ignored in all your git projects,
25
+ # saving you from having to 'pollute' every project you touch with them
26
+ #
27
+ # Not sure what to needs to be ignored for particular editors/OSes? Here's some ideas to get you started. (Remember, remove the leading # of the line)
28
+ #
29
+ # For MacOS:
30
+ #
31
+ .DS_Store
32
+
33
+ # For TextMate
34
+ #*.tmproj
35
+ #tmtags
36
+
37
+ # For emacs:
38
+ *~
39
+ \#*
40
+ .\#*
41
+
42
+ # For vim:
43
+ *.swp
44
+
45
+ # For redcar:
46
+ #.redcar
47
+
48
+ # For rubinius:
49
+ #*.rbc
50
+
51
+ *.gem
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2019 Douglas Youch
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1 @@
1
+ # elasticsearch_scanner
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'elasticsearch_scanner'
5
+ s.version = '0.1.0'
6
+ s.licenses = ['MIT']
7
+ s.summary = 'ElasticSearch index scanner'
8
+ s.description = 'Iterates over the entire index'
9
+ s.authors = ['Doug Youch']
10
+ s.email = 'dougyouch@gmail.com'
11
+ s.homepage = 'https://github.com/dougyouch/elasticsearch_scanner'
12
+ s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
13
+ end
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ class ElasticSearchScanner
4
+ include Enumerable
5
+
6
+ SCROLL_PATH = '/_search/scroll'
7
+
8
+ attr_reader :total_request_time,
9
+ :total_elasticsearch_time
10
+
11
+ def initialize(url, query, size=100, scroll_ttl='1m')
12
+ @url = url
13
+ @query = query
14
+ @size = size
15
+ @scroll_ttl = scroll_ttl
16
+ @fields_to_return = true # all fields
17
+ @has_more = true
18
+ @total_request_time = 0.0
19
+ @total_elasticsearch_time = 0.0
20
+ end
21
+
22
+ def fields_to_return=(fields_to_return)
23
+ @fields_to_return = fields_to_return
24
+ end
25
+
26
+ def each_batch
27
+ yield search
28
+
29
+ while has_more?
30
+ yield scroll
31
+ end
32
+
33
+ nil
34
+ end
35
+
36
+ def each
37
+ each_batch do |results|
38
+ results.each do |result|
39
+ yield result
40
+ end
41
+ end
42
+ end
43
+
44
+ def has_more?
45
+ @has_more
46
+ end
47
+
48
+ private
49
+
50
+ def search_payload
51
+ {
52
+ size: @size,
53
+ _source: @fields_to_return,
54
+ query: @query
55
+ }
56
+ end
57
+
58
+ def search
59
+ uri = URI(@url + '?scroll=' + @scroll_ttl)
60
+ http = Net::HTTP.new(uri.host, uri.port)
61
+ req = Net::HTTP::Post.new(uri.request_uri, 'Content-Type' => 'application/json')
62
+ req.body = search_payload.to_json
63
+
64
+ make_request(http, req)
65
+ end
66
+
67
+ def scroll
68
+ uri = URI(@url)
69
+ http = Net::HTTP.new(uri.host, uri.port)
70
+ req = Net::HTTP::Post.new(SCROLL_PATH, 'Content-Type' => 'application/json')
71
+ req.body = {
72
+ scroll: @scroll_ttl,
73
+ scroll_id: @scroll_id
74
+ }.to_json
75
+
76
+ make_request(http, req)
77
+ end
78
+
79
+ def make_request(http, req)
80
+ request_started_at = Time.now
81
+ res = http.request(req)
82
+ @total_request_time += (Time.now - request_started_at)
83
+ data = JSON.parse(res.body)
84
+ @total_elasticsearch_time += data['took']
85
+ @scroll_id = data['_scroll_id']
86
+ @has_more = data['hits']['hits'].size == @size
87
+ data['hits']['hits']
88
+ end
89
+ end
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: elasticsearch_scanner
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Doug Youch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-09-19 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Iterates over the entire index
14
+ email: dougyouch@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - ".gitignore"
20
+ - LICENSE.txt
21
+ - README.md
22
+ - elasticsearch_scanner.gemspec
23
+ - lib/elasticsearch_scanner.rb
24
+ homepage: https://github.com/dougyouch/elasticsearch_scanner
25
+ licenses:
26
+ - MIT
27
+ metadata: {}
28
+ post_install_message:
29
+ rdoc_options: []
30
+ require_paths:
31
+ - lib
32
+ required_ruby_version: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: '0'
37
+ required_rubygems_version: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ requirements: []
43
+ rubygems_version: 3.0.3
44
+ signing_key:
45
+ specification_version: 4
46
+ summary: ElasticSearch index scanner
47
+ test_files: []