elasticsearch-utils 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 10216ce0da878da2324802434785d0c85a38574e
4
+ data.tar.gz: bb623a5c05cf2c77d760ef63e0ea6c5fa63980b4
5
+ SHA512:
6
+ metadata.gz: bac93d302849099b9f62d4f5287c87299f09e36efa4269385ad8f0e13afdc6140ebd522539d77d43aa9b888d9db1d1789128b85a695d11ccf6594286e82848da
7
+ data.tar.gz: b165e2e5ed7868e418684e9cfa47e154234748b75bc980612acdb695a455c491f79aaaf9911ada175e15e4184d57f7a827f824b5c9e584c9327cef499c7f60d6
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in elasticsearch-utils.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Bob Breznak
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,88 @@
1
+ # Elasticsearch::Utils
2
+
3
+ Adds more cool methods to [`Elasticsearch::Client`](https://github.com/elasticsearch/elasticsearch-ruby) clients.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'elasticsearch-utils'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install elasticsearch-utils
20
+
21
+ ## Usage
22
+
23
+ ### Streaming
24
+
25
+ For those times when you want to map over all results of a search, perhaps in a background job, that may be very large and not worry about paging. This method leverages the [`scroll`](http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/scan-scroll.html) feature of ElasticSearch to maximize server-side efficiency.
26
+
27
+ In this example, we run a search for all Bobs in the index and output their last name. There are a ton of bobs in Bobland so the deep paging would normally tax the server, so we opt to stream.
28
+
29
+ ```
30
+ client = Elasticsearch::Client.new my_elasticsearch_config
31
+
32
+ search_body = {
33
+ query: {
34
+ match: {
35
+ name_first: 'bob'
36
+ }
37
+ }
38
+ }
39
+
40
+ search_params = index: :bobland, type: :person, body: search_body
41
+
42
+ client.stream search_params do |doc|
43
+ puts doc['name_last']
44
+ end
45
+ ```
46
+
47
+ You can pass a `memo` variable to the block to track state in subsequent results. Stream will return the resulting memo.
48
+
49
+ ```
50
+ bob_families = SortedSet.new
51
+ bob_families = client.stream search_params do |doc, bob_families|
52
+ bob_families << doc['name_last']
53
+ end
54
+
55
+ puts "There are #{bob_families.count} families of bobs!"
56
+ ```
57
+
58
+ To stop streaming, throw `:stop_stream` like so:
59
+
60
+ ```
61
+ memo = client.stream search_params do |doc, memo|
62
+ # If you are not using `memo`, you could also use `break`
63
+ throw :stop_stream if memo > 10000
64
+
65
+ # Use memo to count total results processed
66
+ memo += 1
67
+ end
68
+
69
+ puts "Streamed #{memo} bobs!"
70
+ ```
71
+
72
+ If sorting is not important for your query, even greater efficiency can be achieved by setting the `search_type` to `scan` like so:
73
+
74
+ ```
75
+ search_params[:search_type] = :scan
76
+
77
+ client.stream search_params do |doc|
78
+ # handle each bob out of order
79
+ end
80
+ ```
81
+
82
+ ## Contributing
83
+
84
+ 1. Fork it ( https://github.com/[my-github-username]/elasticsearch-utils/fork )
85
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
86
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
87
+ 4. Push to the branch (`git push origin my-new-feature`)
88
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'elasticsearch/utils/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'elasticsearch-utils'
8
+ spec.version = Elasticsearch::Utils::VERSION
9
+ spec.authors = ['Andrew Hammond', 'Bob Breznak']
10
+ spec.email = ['andrew@evertrue.com', 'bob@evertrue.com']
11
+ spec.summary = 'Simple utilities built ontop of Elasticsearch'
12
+ spec.description = spec.summary
13
+ spec.homepage = 'https://github.com/evertrue/elasticsearch-utils'
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ['lib']
20
+
21
+ spec.add_development_dependency 'bundler', '~> 1.6'
22
+ spec.add_development_dependency 'rake', '~> 10.0'
23
+ end
@@ -0,0 +1,38 @@
1
+ module Elasticsearch
2
+ module API
3
+ module Actions
4
+ # search.stream index: 'scrollindex', scroll: '5m', body: { query: { title: 'test' } }
5
+ #
6
+ # counter = 0
7
+ # search.stream counter, index: 'scrollindex', scroll: '5m', body: { query: { title: 'test' } }
8
+
9
+ def stream(*args, &block)
10
+ raise ArgumentError.new "wrong number of arguments (#{args.count} for 1..2)" if args.count > 2
11
+ raise ArgumentError.new 'no block given' unless block_given?
12
+
13
+ opts, memo = *args.reverse
14
+ opts[:scroll] = opts[:scroll] || opts['scroll'] || '5m'
15
+
16
+ scroll_opts = { :scroll => opts[:scroll] }
17
+
18
+ catch :stop_stream do
19
+ results = search opts
20
+ scroll_opts[:scroll_id] = results['_scroll_id']
21
+
22
+ results = scroll scroll_opts if opts[:search_type] =~ /scan/
23
+
24
+ until results['hits']['hits'].empty? do
25
+ scroll_opts[:scroll_id] = results['_scroll_id']
26
+ results['hits']['hits'].each do |doc|
27
+ doc_source = doc['_source']
28
+ memo = yield doc_source, memo
29
+ end
30
+ results = scroll scroll_opts
31
+ end
32
+ end
33
+
34
+ memo
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,5 @@
1
+ module Elasticsearch
2
+ module Utils
3
+ VERSION = "0.0.1"
4
+ end
5
+ end
@@ -0,0 +1,7 @@
1
+ require 'elasticsearch/utils/version'
2
+ require 'elasticsearch/api/actions/stream'
3
+
4
+ module Elasticsearch
5
+ module Utils
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,84 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: elasticsearch-utils
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Andrew Hammond
8
+ - Bob Breznak
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-08-20 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '1.6'
21
+ type: :development
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: '1.6'
28
+ - !ruby/object:Gem::Dependency
29
+ name: rake
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: '10.0'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '10.0'
42
+ description: Simple utilities built ontop of Elasticsearch
43
+ email:
44
+ - andrew@evertrue.com
45
+ - bob@evertrue.com
46
+ executables: []
47
+ extensions: []
48
+ extra_rdoc_files: []
49
+ files:
50
+ - ".gitignore"
51
+ - Gemfile
52
+ - LICENSE.txt
53
+ - README.md
54
+ - Rakefile
55
+ - elasticsearch-utils.gemspec
56
+ - lib/elasticsearch/api/actions/stream.rb
57
+ - lib/elasticsearch/utils.rb
58
+ - lib/elasticsearch/utils/version.rb
59
+ homepage: https://github.com/evertrue/elasticsearch-utils
60
+ licenses:
61
+ - MIT
62
+ metadata: {}
63
+ post_install_message:
64
+ rdoc_options: []
65
+ require_paths:
66
+ - lib
67
+ required_ruby_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ required_rubygems_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ requirements: []
78
+ rubyforge_project:
79
+ rubygems_version: 2.2.2
80
+ signing_key:
81
+ specification_version: 4
82
+ summary: Simple utilities built ontop of Elasticsearch
83
+ test_files: []
84
+ has_rdoc: