eson-more 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Florian Gilcher <florian.gilcher@asquera.de>, Felix Gilcher <felix.gilcher@asquera.de>
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,16 @@
1
+ # eson-more. Handy functions for ElasticSearch
2
+
3
+ eson-more implements things you don't necessarily need, but which come in handy from time to time.
4
+
5
+ eson-more is in an experimental stage, so please don't rely on details.
6
+
7
+ ## Usage
8
+
9
+ require 'eson-more/all'
10
+
11
+ c = Eson::HTTP::Client.new
12
+
13
+ # print the whole index in chunks
14
+ c.all(:index => "default") do |results|
15
+ puts results
16
+ end
@@ -0,0 +1,31 @@
1
+ require 'rake/testtask'
2
+ require 'rubygems/package_task'
3
+ require 'rake/testtask'
4
+
5
+ def gemspec
6
+ @gemspec ||= begin
7
+ file = File.expand_path("eson-more.gemspec")
8
+ ::Gem::Specification.load(file)
9
+ end
10
+ end
11
+
12
+ desc "Validates the gemspec"
13
+ task :gemspec do
14
+ gemspec.validate
15
+ end
16
+
17
+ Gem::PackageTask.new(gemspec) do |pkg|
18
+ pkg.gem_spec = gemspec
19
+ end
20
+
21
+ task :package => :gemspec
22
+
23
+ Rake::TestTask.new(:test) do |test|
24
+ test.pattern = 'test/**/*_test.rb'
25
+
26
+ test.verbose = true
27
+ end
28
+
29
+ task :seed do
30
+ require './test/seeds/seeds'
31
+ end
@@ -0,0 +1,26 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ #require "./lib/elsearch"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "eson-more"
7
+ s.version = "0.8.0"
8
+
9
+ s.platform = Gem::Platform::RUBY
10
+ s.authors = ["Florian Gilcher"]
11
+ s.email = ["florian.gilcher@asquera.de"]
12
+ s.homepage = ""
13
+ s.summary = %q{A modular client for ElasticSearch - additional functions}
14
+ s.description = %q{A modular client for ElasticSearch. It provides
15
+ an implementation of the Query language as well as multiple client implementations
16
+ for HTTP and native access.}
17
+
18
+ s.files = `git ls-files`.split("\n")
19
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
+ #s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
21
+ s.require_paths = ["lib"]
22
+
23
+ s.add_dependency "eson-core"
24
+ s.add_development_dependency "elasticsearch-node"
25
+ s.add_development_dependency "riot"
26
+ end
@@ -0,0 +1,6 @@
1
+ require 'eson-core'
2
+
3
+ require_relative 'eson/more/extract'
4
+ require_relative 'eson/more/all'
5
+ require_relative 'eson/more/transplant'
6
+ require_relative 'eson/more/reindex'
@@ -0,0 +1,46 @@
1
+ module Eson
2
+ module More
3
+ module All
4
+ DEFAULT_OPTS = {:search_type => :scan, :scroll => "10m", :size => 50}
5
+
6
+ module Functions
7
+ def self.fetch_more_results(client, scroll_id)
8
+ result = client.scroll(:scroll_id => scroll_id, :scroll => "10m")
9
+ scroll_id = result["_scroll_id"]
10
+
11
+ results = client.extract_hits(result)
12
+ scroll_id = nil if results.empty?
13
+
14
+ return scroll_id, results
15
+ end
16
+ end
17
+
18
+ # Retrieves all documents matching a given option set. Automatically
19
+ # handles scrolling.
20
+ #
21
+ # @param [Hash] opts the options to retrieve documents
22
+ def all(opts)
23
+ scroll_response = search(DEFAULT_OPTS.merge(opts))
24
+ scroll_id = scroll_response["_scroll_id"]
25
+
26
+ if block_given?
27
+ while scroll_id
28
+ scroll_id, results = Functions.fetch_more_results(self, scroll_id)
29
+ yield results
30
+ end
31
+ else
32
+ docs = []
33
+ while scroll_id
34
+ scroll_id, results = Functions.fetch_more_results(self, scroll_id)
35
+ docs += results
36
+ end
37
+ docs
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+
44
+ Eson::Client.class_eval {
45
+ include Eson::More::All
46
+ }
@@ -0,0 +1,23 @@
1
+ module Eson
2
+ module More
3
+ module Extract
4
+ # Extracts doc sources as hashes from a search result.
5
+ #
6
+ # @param [Hash] response the response body as returned by a search request.
7
+ def extract_sources(response)
8
+ extract_hits(response).map {|h| h["_source"] }
9
+ end
10
+
11
+ # Extracts doc hits as hashes from a search result.
12
+ #
13
+ # @param [Hash] response the response body as returned by a search request.
14
+ def extract_hits(response)
15
+ response["hits"]["hits"]
16
+ end
17
+ end
18
+ end
19
+ end
20
+
21
+ Eson::Client.class_eval {
22
+ include Eson::More::Extract
23
+ }
@@ -0,0 +1,29 @@
1
+ module Eson
2
+ module More
3
+ module Reindex
4
+ # Reindex a whole index by scrolling the old one and writing to the
5
+ # new one using a bulk request.
6
+ #
7
+ # @param [String] from the index to reindex from
8
+ # @param [String] to the index to reindex to
9
+ def reindex(from, to)
10
+ all(:index => from) do |chunk|
11
+ if chunk.size > 0
12
+ bulk do |b|
13
+ chunk.each do |doc|
14
+ b.index :index => to,
15
+ :type => doc["_type"],
16
+ :id => doc["_id"],
17
+ :doc => doc["_source"]
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+
27
+ Eson::Client.class_eval {
28
+ include Eson::More::Reindex
29
+ }
@@ -0,0 +1,101 @@
1
+ module Eson
2
+ module More
3
+ module Transplant
4
+ module Functions
5
+ def self.extract_ids(*args)
6
+ args.map do |d|
7
+ case d
8
+ when String, Fixnum
9
+ d
10
+ when Hash
11
+ d[:id] || d["id"]
12
+ when nil
13
+ d
14
+ else
15
+ d.id
16
+ end
17
+ end
18
+ end
19
+
20
+ def self.get_doc(client, old_parent, doc)
21
+ case doc
22
+ when Hash
23
+ doc
24
+ when String
25
+ client.get(:id => doc)
26
+ else
27
+ doc.to_h
28
+ end
29
+ end
30
+
31
+ def self.get_children(client, parent)
32
+ client.all(:type => nil, :q => "_parent:#{parent}",:routing => parent)
33
+ end
34
+
35
+ def self.get_doc(client, parent, doc)
36
+ client.extract_hits(
37
+ client.search(:type => nil, :q => "_id:#{doc}", :routing => parent)
38
+ )
39
+ end
40
+ end
41
+
42
+ # Transplants a document from one parent to another by deleting the old
43
+ # one and reindexing it with a new parent. If no doc is given, all
44
+ # documents of the old parent will be transplanted.
45
+ #
46
+ # To change the index and parent type this command operates on,
47
+ # use Client#with:
48
+ #
49
+ # client.with :index => "foo", :type => "blog" do |c|
50
+ # c.transplant(1, 2, 3)
51
+ # end
52
+ #
53
+ # Transplant does not refresh automatically.
54
+ #
55
+ # @param [String, Hash, Object#id] old_parent the old parent document,
56
+ # either as an Object from which the ID can be retrieved.
57
+ # @param [String, Hash, Object#id] new_parent the new parent document,
58
+ # either as an Object from which the ID can be retrieved.
59
+ # @param [String, Hash, #id, #to_h] doc the document to transplant,
60
+ # either as an Object from which the ID can be retrieved.
61
+ def transplant(old_parent, new_parent, doc = nil)
62
+ old_pid, new_pid, doc_id = Functions.extract_ids(old_parent, new_parent, doc)
63
+
64
+ if doc.nil?
65
+ hits = Functions.get_children(self, old_pid)
66
+ else
67
+ hits = Functions.get_doc(self, old_pid, doc)
68
+ end
69
+
70
+ unless hits.empty?
71
+ bulk do |b|
72
+ hits.each do |hit|
73
+ b.delete :type => hit["_type"],
74
+ :id => hit["_id"],
75
+ :routing => old_pid
76
+ b.index :type => hit["_type"],
77
+ :id => hit["_id"],
78
+ :parent => new_pid,
79
+ :doc => hit["_source"]
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ # Like #transplant, but removes the old parent.
86
+ #
87
+ # @param [String, Hash, Object#id] old_parent the old parent document,
88
+ # either as an Object from which the ID can be retrieved.
89
+ # @param [String, Hash, Object#id] new_parent the new parent document,
90
+ # either as an Object from which the ID can be retrieved.
91
+ def merge_parents(old_parent, new_parent)
92
+ transplant(old_parent, new_parent)
93
+ delete(:id => old_parent)
94
+ end
95
+ end
96
+ end
97
+ end
98
+
99
+ Eson::Client.class_eval {
100
+ include Eson::More::Transplant
101
+ }
@@ -0,0 +1,18 @@
1
+ log4j.rootLogger=INFO, out
2
+ log4j.logger.jgroups=WARN
3
+
4
+ #log4j.logger.discovery=TRACE
5
+ #log4j.logger.cluster=TRACE
6
+ #log4j.logger.indices.cluster=DEBUG
7
+ #log4j.logger.index=TRACE
8
+ #log4j.logger.index.engine=DEBUG
9
+ #log4j.logger.index.shard=TRACE
10
+ #log4j.logger.index.cache=DEBUG
11
+ #log4j.logger.http=TRACE
12
+ #log4j.logger.monitor.jvm=DEBUG
13
+ #log4j.logger.cluster.action.shard=TRACE
14
+ #log4j.logger.index.gateway=TRACE
15
+
16
+ log4j.appender.out=org.apache.log4j.ConsoleAppender
17
+ log4j.appender.out.layout=org.apache.log4j.PatternLayout
18
+ log4j.appender.out.layout.ConversionPattern=[%d{ABSOLUTE}][%-5p][%-25c] %m%n
@@ -0,0 +1,17 @@
1
+ require './test/test_config'
2
+
3
+ context "All" do
4
+ helper(:node) { Node::External.instance }
5
+
6
+ helper(:client) do
7
+ Eson::HTTP::Client.new(:server => "http://#{node.ip}:#{node.port}",
8
+ :logger => 'test/test.log')
9
+ end
10
+
11
+ setup do
12
+ client.all(:index => "all")
13
+ end
14
+
15
+ asserts(:class).equals(Array)
16
+ asserts(:length).equals(500)
17
+ end
@@ -0,0 +1,28 @@
1
+ require './test/test_config'
2
+
3
+ context "Extract" do
4
+ helper(:node) { Node::External.instance }
5
+
6
+ helper(:client) do
7
+ Eson::HTTP::Client.new(:server => "http://#{node.ip}:#{node.port}",
8
+ :logger => 'test/test.log')
9
+ end
10
+
11
+ describe "extract_sources" do
12
+ setup do
13
+ client.extract_sources(client.simple_search :index => "extract_source", :q => "*")
14
+ end
15
+
16
+ asserts(:class).equals(Array)
17
+ asserts(:first).equals("foo" => "bar")
18
+ end
19
+
20
+ describe "extract_hits" do
21
+ setup do
22
+ client.extract_hits(client.simple_search :index => "extract_source", :q => "*")
23
+ end
24
+
25
+ asserts(:class).equals(Array)
26
+ asserts("index of first element") { topic.first["_index"]}.equals("extract_source")
27
+ end
28
+ end
@@ -0,0 +1,19 @@
1
+ require './test/test_config'
2
+
3
+ context "Reindex" do
4
+ helper(:node) { Node::External.instance }
5
+
6
+ helper(:client) do
7
+ Eson::HTTP::Client.new(:server => "http://#{node.ip}:#{node.port}",
8
+ :logger => 'test/test.log')
9
+ end
10
+
11
+ setup do
12
+ client.reindex("all", "to_all")
13
+ client.refresh(:index => "to_all")
14
+ client.all(:index => "to_all")
15
+ end
16
+
17
+ asserts(:class).equals(Array)
18
+ asserts(:length).equals(500)
19
+ end
@@ -0,0 +1,46 @@
1
+ require './test/test_config'
2
+
3
+ context "Transplant" do
4
+ helper(:node) { Node::External.instance }
5
+
6
+ helper(:client) do
7
+ Eson::HTTP::Client.new(:server => "http://#{node.ip}:#{node.port}",
8
+ :logger => 'test/test.log')
9
+ end
10
+
11
+ describe "transplant one document" do
12
+ setup do
13
+ client.with :index => "transplant" do |c|
14
+ c.transplant(1, 2, 4)
15
+ c.refresh
16
+ c.get(:type => "blog_tag", :id => 4, :routing => 2)
17
+ end
18
+ end
19
+ asserts(:source) { topic["_source"]}.equals({"foo" => "bar"})
20
+ end
21
+
22
+ describe "transplant all" do
23
+ setup do
24
+ client.with :index => "transplant" do |c|
25
+ c.transplant(1,2)
26
+ c.refresh
27
+ c.all(:q => "_parent:2")
28
+ end
29
+ end
30
+ asserts(:class).equals(Array)
31
+ asserts(:length).equals(498)
32
+ end
33
+
34
+ describe "merge_parents" do
35
+ setup do
36
+ client.with :index => "transplant", :type => "blog" do |c|
37
+ c.merge_parents(1,2)
38
+ c.refresh
39
+ c
40
+ end
41
+ end
42
+
43
+ asserts("retrieving the old parent") { topic.get(:id => 1) }.raises(Eson::NotFoundError)
44
+ asserts("count of all children of the new client") { topic.all(:type => "blog_tag", :q => "_parent:2").length }.equals(498)
45
+ end
46
+ end
@@ -0,0 +1,70 @@
1
+ # basically, this is also a test for the bulk interface :)
2
+
3
+ node = Node::External.instance
4
+
5
+ c = Eson::Client.new(:server => "http://#{node.ip}:#{node.port}",
6
+ :protocol => Eson::HTTP,
7
+ :plugins => [Eson::ResponseParser],
8
+ :logger => 'test/test.log')
9
+
10
+ c.delete_index :index => "_all" rescue nil
11
+
12
+ c.bulk do |bulk_request|
13
+ 1.upto(500) do |i|
14
+ bulk_request.index :index => "extract_source",
15
+ :type => "bar",
16
+ :doc => {"foo" => "bar"}
17
+ end
18
+ end
19
+
20
+ c.bulk do |bulk_request|
21
+ 1.upto(500) do |i|
22
+ bulk_request.index :index => "all",
23
+ :type => "bar",
24
+ :doc => {"foo" => "bar"}
25
+ end
26
+ end
27
+
28
+ c.create_index :index => "transplant"
29
+ c.put_mapping :index => "transplant",
30
+ :type => 'blog',
31
+ :mapping => {
32
+ :blog => {
33
+ :properties => {
34
+ :foo => { :type => "string", :index => "not_analyzed" }
35
+ }
36
+ }
37
+ }
38
+
39
+ c.put_mapping :index => "transplant",
40
+ :type => 'blog_tag',
41
+ :mapping => {
42
+ :blog_tag => {
43
+ :_parent => { :type => "blog" }
44
+ }
45
+ }
46
+
47
+ c.index :index => "transplant",
48
+ :type => :blog,
49
+ :id => 1,
50
+ :doc => {:foo => "bar"}
51
+ c.index :index => "transplant",
52
+ :type => :blog,
53
+ :id => 2,
54
+ :doc => {:foo => "bar"}
55
+
56
+ c.refresh :index => "transplant"
57
+
58
+ c.bulk do |bulk_request|
59
+ 3.upto(500) do |i|
60
+ bulk_request.index :index => "transplant",
61
+ :type => "blog_tag",
62
+ :id => i,
63
+ :doc => {"foo" => "bar"},
64
+ :parent => ((i % 2) + 1)
65
+ end
66
+ end
67
+
68
+ c.refresh(:index => "all")
69
+ c.refresh(:index => "transplant")
70
+ c.refresh(:index => "extract_source")
@@ -0,0 +1,34 @@
1
+ begin
2
+ # Require the preresolved locked set of gems.
3
+ require File.expand_path('../../.bundle/environment', __FILE__)
4
+ rescue LoadError
5
+ # Fallback on doing the resolve at runtime.
6
+ require 'rubygems'
7
+ require 'bundler'
8
+ Bundler.setup
9
+ end
10
+
11
+ Bundler.require(:test, :default)
12
+
13
+ require 'eson-http'
14
+ require 'eson-more'
15
+
16
+ require 'elasticsearch-node/external'
17
+
18
+ module Node
19
+ module External
20
+ def self.instance
21
+ @node ||= begin
22
+ node = ElasticSearch::Node::External.new("gateway.type" => "none")
23
+ at_exit do
24
+ node.close
25
+ end
26
+ node
27
+ end
28
+ end
29
+ end
30
+ end
31
+
32
+ Node::External.instance
33
+ require 'riot'
34
+ require './test/seeds/seeds'
metadata ADDED
@@ -0,0 +1,117 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: eson-more
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.8.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Florian Gilcher
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-01-22 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: eson-core
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: elasticsearch-node
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: riot
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ description: ! "A modular client for ElasticSearch. It provides\n an implementation
63
+ of the Query language as well as multiple client implementations\n for HTTP and
64
+ native access."
65
+ email:
66
+ - florian.gilcher@asquera.de
67
+ executables: []
68
+ extensions: []
69
+ extra_rdoc_files: []
70
+ files:
71
+ - LICENSE.md
72
+ - README.md
73
+ - Rakefile
74
+ - eson-more.gemspec
75
+ - lib/eson-more.rb
76
+ - lib/eson/more/all.rb
77
+ - lib/eson/more/extract.rb
78
+ - lib/eson/more/reindex.rb
79
+ - lib/eson/more/transplant.rb
80
+ - log4j.properties
81
+ - test/more/all_test.rb
82
+ - test/more/extract_source_test.rb
83
+ - test/more/reindex_test.rb
84
+ - test/more/transplant_test.rb
85
+ - test/seeds/seeds.rb
86
+ - test/test_config.rb
87
+ homepage: ''
88
+ licenses: []
89
+ post_install_message:
90
+ rdoc_options: []
91
+ require_paths:
92
+ - lib
93
+ required_ruby_version: !ruby/object:Gem::Requirement
94
+ none: false
95
+ requirements:
96
+ - - ! '>='
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ required_rubygems_version: !ruby/object:Gem::Requirement
100
+ none: false
101
+ requirements:
102
+ - - ! '>='
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ requirements: []
106
+ rubyforge_project:
107
+ rubygems_version: 1.8.21
108
+ signing_key:
109
+ specification_version: 3
110
+ summary: A modular client for ElasticSearch - additional functions
111
+ test_files:
112
+ - test/more/all_test.rb
113
+ - test/more/extract_source_test.rb
114
+ - test/more/reindex_test.rb
115
+ - test/more/transplant_test.rb
116
+ - test/seeds/seeds.rb
117
+ - test/test_config.rb