metacrunch-elasticsearch 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 33c4fcce820196d49febce7f16730f4958157070
4
+ data.tar.gz: c5c043df51406bbde562ea6fc6ba9e0aba759902
5
+ SHA512:
6
+ metadata.gz: 9ca1e1729c150bc97ee3387926235f0c64bd02b41b561101fbf408fd4e5e2fecb83d9dbef641e8304731e11979f832a674c182ef9fa332dc6b2599e5762a7202
7
+ data.tar.gz: 27c72423491f139c91857cb3dcedf8b3e99e673babf65eea168f68394d0f0185ac32e31835c1703d512ea4dfde8a9a965d3f1bd868e2ad897a3c66a5dc2f7013
data/.gitignore ADDED
@@ -0,0 +1,24 @@
1
+ .DS_Store
2
+ /doc
3
+ *.gem
4
+ *.rbc
5
+ .bundle
6
+ .config
7
+ .yardoc
8
+ Gemfile.lock
9
+ InstalledFiles
10
+ _yardoc
11
+ coverage
12
+ doc/
13
+ lib/bundler/man
14
+ pkg
15
+ rdoc
16
+ spec/reports
17
+ test/tmp
18
+ test/version_tmp
19
+ tmp
20
+ *.bundle
21
+ *.so
22
+ *.o
23
+ *.a
24
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,20 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
4
+
5
+ gem "metacrunch", ">= 2.1.0", github: "ubpb/metacrunch", branch: "master"
6
+
7
+ gem "rake"
8
+ gem "rspec", "~> 3.2.0"
9
+
10
+ if !ENV["CI"]
11
+ group :development do
12
+ gem "hashdiff"
13
+ gem "pry", "~> 0.9.12.6"
14
+ gem "pry-byebug", "<= 1.3.2"
15
+ gem "pry-rescue", "~> 1.4.1", github: "ConradIrwin/pry-rescue", branch: :master
16
+ gem "pry-stack_explorer", "~> 0.4.9.1"
17
+ gem "pry-syntax-hacks", "~> 0.0.6"
18
+ end
19
+ end
20
+
data/License.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 René Sprotte, Michael Sievers
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,5 @@
1
+ require "rspec/core/rake_task"
2
+
3
+ RSpec::Core::RakeTask.new(:spec)
4
+
5
+ task :default => :spec
data/Readme.md ADDED
@@ -0,0 +1,3 @@
1
+ # metacrunch-elasticsearch
2
+
3
+ Dokumentation folgt in Kürze...
@@ -0,0 +1,63 @@
1
+ require "elasticsearch"
2
+ require_relative "../elasticsearch"
3
+
4
+ module Metacrunch
5
+ module Elasticsearch
6
+ class Reader
7
+
8
+ DEFAULT_SCAN_SIZE = 250
9
+ DEFAULT_SCROLL_EXPIRY_TIME = 10.minutes
10
+
11
+
12
+ def initialize(uri, body, log: false)
13
+ unless uri.starts_with?("elasticsearch://")
14
+ raise ArgumentError, "URI must be an elasticsearch URI (elasticsearch://...)"
15
+ end
16
+
17
+ @uri = URI(uri)
18
+ @body = body
19
+ @log = log
20
+ end
21
+
22
+ def each(&block)
23
+ return enum_for(__method__) unless block_given?
24
+
25
+ search_result = client.search({
26
+ body: @body,
27
+ index: @uri.index,
28
+ type: @uri.type,
29
+ scroll: "#{DEFAULT_SCROLL_EXPIRY_TIME}s",
30
+ search_type: "scan",
31
+ size: DEFAULT_SCAN_SIZE
32
+ })
33
+
34
+ while (
35
+ search_result = client.scroll(
36
+ scroll: "#{DEFAULT_SCROLL_EXPIRY_TIME}s",
37
+ scroll_id: search_result["_scroll_id"]
38
+ ) and # don't use &&, the semantic of 'and' is important here
39
+ search_result["hits"]["hits"].present?
40
+ ) do
41
+ search_result["hits"]["hits"].each do |_hit|
42
+ yield(_hit)
43
+ end
44
+ end
45
+ end
46
+
47
+ def count
48
+ client.count({
49
+ body: { query: @body[:query] },
50
+ index: @uri.index,
51
+ type: @uri.type
52
+ })["count"]
53
+ end
54
+
55
+ private
56
+
57
+ def client
58
+ @client ||= ::Elasticsearch::Client.new(host: @uri.host, port: @uri.port, log: @log)
59
+ end
60
+
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,31 @@
1
+ require "uri"
2
+ require_relative "../elasticsearch"
3
+
4
+
5
+ module Metacrunch
6
+ module Elasticsearch
7
+ class URI < URI::Generic
8
+
9
+ DEFAULT_PORT = 9200
10
+
11
+ def index
12
+ splitted_path[0]
13
+ end
14
+
15
+ def type
16
+ splitted_path[1]
17
+ end
18
+
19
+ private
20
+
21
+ def splitted_path
22
+ path.split("/").map(&:presence).compact
23
+ end
24
+
25
+ end
26
+ end
27
+ end
28
+
29
+ module URI
30
+ @@schemes['ELASTICSEARCH'] = Metacrunch::Elasticsearch::URI
31
+ end
@@ -0,0 +1,5 @@
1
+ module Metacrunch
2
+ module Elasticsearch
3
+ VERSION = "2.0.0"
4
+ end
5
+ end
@@ -0,0 +1,59 @@
1
+ require "elasticsearch"
2
+ require_relative "../elasticsearch"
3
+
4
+ module Metacrunch
5
+ module Elasticsearch
6
+ class Writer
7
+
8
+ def initialize(uri, log: false, bulk_size: 250, autoflush: true)
9
+ unless uri.starts_with?("elasticsearch://")
10
+ raise ArgumentError, "URI must be an elasticsearch URI (elasticsearch://...)"
11
+ end
12
+
13
+ @uri = URI(uri)
14
+ @log = log
15
+ @bulk_size = bulk_size
16
+ @buffer = []
17
+ @autoflush = autoflush
18
+ end
19
+
20
+ def write(data, options = {})
21
+ id = data.delete(:id) || data.delete(:_id)
22
+ raise ArgumentError, "Missing id. You must provide 'id' or '_id' as part of the data" unless id
23
+
24
+ @buffer << {
25
+ _index: @uri.index,
26
+ _type: @uri.type,
27
+ _id: id,
28
+ data: data
29
+ }
30
+
31
+ flush if @autoflush && @bulk_size > 0 && @buffer.length >= @bulk_size
32
+
33
+ true
34
+ end
35
+
36
+ def flush
37
+ if @buffer.length > 0
38
+ result = client.bulk(body: @buffer.inject([]){ |_body, _data| _body << { index: _data } })
39
+ raise RuntimeError if result["errors"]
40
+ end
41
+
42
+ true
43
+ ensure
44
+ @buffer = []
45
+ end
46
+
47
+ def close
48
+ flush
49
+ end
50
+
51
+ private
52
+
53
+ def client
54
+ @client ||= ::Elasticsearch::Client.new(host: @uri.host, port: @uri.port, log: @log)
55
+ end
56
+
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,15 @@
1
+ require "metacrunch"
2
+ require "elasticsearch"
3
+
4
+ begin
5
+ require "pry"
6
+ rescue LoadError ; end
7
+
8
+
9
+ module Metacrunch
10
+ module Elasticsearch
11
+ require_relative "./elasticsearch/uri"
12
+ require_relative "./elasticsearch/reader"
13
+ require_relative "./elasticsearch/writer"
14
+ end
15
+ end
@@ -0,0 +1 @@
1
+ require "metacrunch/elasticsearch"
@@ -0,0 +1,22 @@
1
+ require File.expand_path("../lib/metacrunch/elasticsearch/version", __FILE__)
2
+
3
+ Gem::Specification.new do |s|
4
+ s.authors = ["René Sprotte", "Michael Sievers"]
5
+ s.email = "r.sprotte@ub.uni-paderborn.de"
6
+ s.summary = %q{Elasticsearch tools for metacrunch}
7
+ s.description = s.summary
8
+ s.homepage = "http://github.com/ubpb/metacrunch-elasticsearch"
9
+ s.licenses = ["MIT"]
10
+
11
+ s.files = `git ls-files`.split($\)
12
+ s.executables = s.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
14
+ s.name = "metacrunch-elasticsearch"
15
+ s.require_paths = ["lib"]
16
+ s.version = Metacrunch::Elasticsearch::VERSION
17
+
18
+ s.required_ruby_version = ">= 2.2.0"
19
+
20
+ s.add_dependency "elasticsearch", "~> 1.0"
21
+ s.add_dependency "metacrunch", "~> 2.1"
22
+ end
metadata ADDED
@@ -0,0 +1,84 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: metacrunch-elasticsearch
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.0.0
5
+ platform: ruby
6
+ authors:
7
+ - René Sprotte
8
+ - Michael Sievers
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2015-06-15 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: elasticsearch
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '1.0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: '1.0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: metacrunch
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: '2.1'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '2.1'
42
+ description: Elasticsearch tools for metacrunch
43
+ email: r.sprotte@ub.uni-paderborn.de
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".gitignore"
49
+ - Gemfile
50
+ - License.txt
51
+ - Rakefile
52
+ - Readme.md
53
+ - lib/metacrunch/elasticsearch.rb
54
+ - lib/metacrunch/elasticsearch/reader.rb
55
+ - lib/metacrunch/elasticsearch/uri.rb
56
+ - lib/metacrunch/elasticsearch/version.rb
57
+ - lib/metacrunch/elasticsearch/writer.rb
58
+ - lib/metacrunch_plugin.rb
59
+ - metacrunch-elasticsearch.gemspec
60
+ homepage: http://github.com/ubpb/metacrunch-elasticsearch
61
+ licenses:
62
+ - MIT
63
+ metadata: {}
64
+ post_install_message:
65
+ rdoc_options: []
66
+ require_paths:
67
+ - lib
68
+ required_ruby_version: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: 2.2.0
73
+ required_rubygems_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ requirements: []
79
+ rubyforge_project:
80
+ rubygems_version: 2.4.6
81
+ signing_key:
82
+ specification_version: 4
83
+ summary: Elasticsearch tools for metacrunch
84
+ test_files: []