elasticsearch-utils 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +88 -0
- data/Rakefile +2 -0
- data/elasticsearch-utils.gemspec +23 -0
- data/lib/elasticsearch/api/actions/stream.rb +38 -0
- data/lib/elasticsearch/utils/version.rb +5 -0
- data/lib/elasticsearch/utils.rb +7 -0
- metadata +84 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 10216ce0da878da2324802434785d0c85a38574e
|
4
|
+
data.tar.gz: bb623a5c05cf2c77d760ef63e0ea6c5fa63980b4
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: bac93d302849099b9f62d4f5287c87299f09e36efa4269385ad8f0e13afdc6140ebd522539d77d43aa9b888d9db1d1789128b85a695d11ccf6594286e82848da
|
7
|
+
data.tar.gz: b165e2e5ed7868e418684e9cfa47e154234748b75bc980612acdb695a455c491f79aaaf9911ada175e15e4184d57f7a827f824b5c9e584c9327cef499c7f60d6
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Bob Breznak
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
# Elasticsearch::Utils
|
2
|
+
|
3
|
+
Adds more cool methods to [`Elasticsearch::Client`](https://github.com/elasticsearch/elasticsearch-ruby) clients.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'elasticsearch-utils'
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install elasticsearch-utils
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
### Streaming
|
24
|
+
|
25
|
+
For those times when you want to map over all results of a search, perhaps in a background job, that may be very large and not worry about paging. This method leverages the [`scroll`](http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/scan-scroll.html) feature of ElasticSearch to maximize server-side efficiency.
|
26
|
+
|
27
|
+
In this example, we run a search for all Bobs in the index and output their last name. There are a ton of bobs in Bobland so the deep paging would normally tax the server, so we opt to stream.
|
28
|
+
|
29
|
+
```
|
30
|
+
client = Elasticsearch::Client.new my_elasticsearch_config
|
31
|
+
|
32
|
+
search_body = {
|
33
|
+
query: {
|
34
|
+
match: {
|
35
|
+
name_first: 'bob'
|
36
|
+
}
|
37
|
+
}
|
38
|
+
}
|
39
|
+
|
40
|
+
search_params = index: :bobland, type: :person, body: search_body
|
41
|
+
|
42
|
+
client.stream search_params do |doc|
|
43
|
+
puts doc['name_last']
|
44
|
+
end
|
45
|
+
```
|
46
|
+
|
47
|
+
You can pass a `memo` variable to the block to track state in subsequent results. Stream will return the resulting memo.
|
48
|
+
|
49
|
+
```
|
50
|
+
bob_families = SortedSet.new
|
51
|
+
bob_families = client.stream search_params do |doc, bob_families|
|
52
|
+
bob_families << doc['name_last']
|
53
|
+
end
|
54
|
+
|
55
|
+
puts "There are #{bob_families.count} families of bobs!"
|
56
|
+
```
|
57
|
+
|
58
|
+
To stop streaming, throw `:stop_stream` like so:
|
59
|
+
|
60
|
+
```
|
61
|
+
memo = client.stream search_params do |doc, memo|
|
62
|
+
# If you are not using `memo`, you could also use `break`
|
63
|
+
throw :stop_stream if memo > 10000
|
64
|
+
|
65
|
+
# Use memo to count total results processed
|
66
|
+
memo += 1
|
67
|
+
end
|
68
|
+
|
69
|
+
puts "Streamed #{memo} bobs!"
|
70
|
+
```
|
71
|
+
|
72
|
+
If sorting is not important for your query, even greater efficiency can be achieved by setting the `search_type` to `scan` like so:
|
73
|
+
|
74
|
+
```
|
75
|
+
search_params[:search_type] = :scan
|
76
|
+
|
77
|
+
client.stream search_params do |doc|
|
78
|
+
# handle each bob out of order
|
79
|
+
end
|
80
|
+
```
|
81
|
+
|
82
|
+
## Contributing
|
83
|
+
|
84
|
+
1. Fork it ( https://github.com/[my-github-username]/elasticsearch-utils/fork )
|
85
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
86
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
87
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
88
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'elasticsearch/utils/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'elasticsearch-utils'
|
8
|
+
spec.version = Elasticsearch::Utils::VERSION
|
9
|
+
spec.authors = ['Andrew Hammond', 'Bob Breznak']
|
10
|
+
spec.email = ['andrew@evertrue.com', 'bob@evertrue.com']
|
11
|
+
spec.summary = 'Simple utilities built ontop of Elasticsearch'
|
12
|
+
spec.description = spec.summary
|
13
|
+
spec.homepage = 'https://github.com/evertrue/elasticsearch-utils'
|
14
|
+
spec.license = 'MIT'
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ['lib']
|
20
|
+
|
21
|
+
spec.add_development_dependency 'bundler', '~> 1.6'
|
22
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
23
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Elasticsearch
|
2
|
+
module API
|
3
|
+
module Actions
|
4
|
+
# search.stream index: 'scrollindex', scroll: '5m', body: { query: { title: 'test' } }
|
5
|
+
#
|
6
|
+
# counter = 0
|
7
|
+
# search.stream counter, index: 'scrollindex', scroll: '5m', body: { query: { title: 'test' } }
|
8
|
+
|
9
|
+
def stream(*args, &block)
|
10
|
+
raise ArgumentError.new "wrong number of arguments (#{args.count} for 1..2)" if args.count > 2
|
11
|
+
raise ArgumentError.new 'no block given' unless block_given?
|
12
|
+
|
13
|
+
opts, memo = *args.reverse
|
14
|
+
opts[:scroll] = opts[:scroll] || opts['scroll'] || '5m'
|
15
|
+
|
16
|
+
scroll_opts = { :scroll => opts[:scroll] }
|
17
|
+
|
18
|
+
catch :stop_stream do
|
19
|
+
results = search opts
|
20
|
+
scroll_opts[:scroll_id] = results['_scroll_id']
|
21
|
+
|
22
|
+
results = scroll scroll_opts if opts[:search_type] =~ /scan/
|
23
|
+
|
24
|
+
until results['hits']['hits'].empty? do
|
25
|
+
scroll_opts[:scroll_id] = results['_scroll_id']
|
26
|
+
results['hits']['hits'].each do |doc|
|
27
|
+
doc_source = doc['_source']
|
28
|
+
memo = yield doc_source, memo
|
29
|
+
end
|
30
|
+
results = scroll scroll_opts
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
memo
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
metadata
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: elasticsearch-utils
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andrew Hammond
|
8
|
+
- Bob Breznak
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2014-08-20 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bundler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '1.6'
|
21
|
+
type: :development
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - "~>"
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '1.6'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: rake
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - "~>"
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '10.0'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - "~>"
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '10.0'
|
42
|
+
description: Simple utilities built ontop of Elasticsearch
|
43
|
+
email:
|
44
|
+
- andrew@evertrue.com
|
45
|
+
- bob@evertrue.com
|
46
|
+
executables: []
|
47
|
+
extensions: []
|
48
|
+
extra_rdoc_files: []
|
49
|
+
files:
|
50
|
+
- ".gitignore"
|
51
|
+
- Gemfile
|
52
|
+
- LICENSE.txt
|
53
|
+
- README.md
|
54
|
+
- Rakefile
|
55
|
+
- elasticsearch-utils.gemspec
|
56
|
+
- lib/elasticsearch/api/actions/stream.rb
|
57
|
+
- lib/elasticsearch/utils.rb
|
58
|
+
- lib/elasticsearch/utils/version.rb
|
59
|
+
homepage: https://github.com/evertrue/elasticsearch-utils
|
60
|
+
licenses:
|
61
|
+
- MIT
|
62
|
+
metadata: {}
|
63
|
+
post_install_message:
|
64
|
+
rdoc_options: []
|
65
|
+
require_paths:
|
66
|
+
- lib
|
67
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '0'
|
72
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
requirements: []
|
78
|
+
rubyforge_project:
|
79
|
+
rubygems_version: 2.2.2
|
80
|
+
signing_key:
|
81
|
+
specification_version: 4
|
82
|
+
summary: Simple utilities built ontop of Elasticsearch
|
83
|
+
test_files: []
|
84
|
+
has_rdoc:
|