metacrunch-elasticsearch 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +24 -0
- data/Gemfile +20 -0
- data/License.txt +22 -0
- data/Rakefile +5 -0
- data/Readme.md +3 -0
- data/lib/metacrunch/elasticsearch/reader.rb +63 -0
- data/lib/metacrunch/elasticsearch/uri.rb +31 -0
- data/lib/metacrunch/elasticsearch/version.rb +5 -0
- data/lib/metacrunch/elasticsearch/writer.rb +59 -0
- data/lib/metacrunch/elasticsearch.rb +15 -0
- data/lib/metacrunch_plugin.rb +1 -0
- data/metacrunch-elasticsearch.gemspec +22 -0
- metadata +84 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 33c4fcce820196d49febce7f16730f4958157070
|
4
|
+
data.tar.gz: c5c043df51406bbde562ea6fc6ba9e0aba759902
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9ca1e1729c150bc97ee3387926235f0c64bd02b41b561101fbf408fd4e5e2fecb83d9dbef641e8304731e11979f832a674c182ef9fa332dc6b2599e5762a7202
|
7
|
+
data.tar.gz: 27c72423491f139c91857cb3dcedf8b3e99e673babf65eea168f68394d0f0185ac32e31835c1703d512ea4dfde8a9a965d3f1bd868e2ad897a3c66a5dc2f7013
|
data/.gitignore
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
.DS_Store
|
2
|
+
/doc
|
3
|
+
*.gem
|
4
|
+
*.rbc
|
5
|
+
.bundle
|
6
|
+
.config
|
7
|
+
.yardoc
|
8
|
+
Gemfile.lock
|
9
|
+
InstalledFiles
|
10
|
+
_yardoc
|
11
|
+
coverage
|
12
|
+
doc/
|
13
|
+
lib/bundler/man
|
14
|
+
pkg
|
15
|
+
rdoc
|
16
|
+
spec/reports
|
17
|
+
test/tmp
|
18
|
+
test/version_tmp
|
19
|
+
tmp
|
20
|
+
*.bundle
|
21
|
+
*.so
|
22
|
+
*.o
|
23
|
+
*.a
|
24
|
+
mkmf.log
|
data/Gemfile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
source "https://rubygems.org"
|
2
|
+
|
3
|
+
gemspec
|
4
|
+
|
5
|
+
gem "metacrunch", ">= 2.1.0", github: "ubpb/metacrunch", branch: "master"
|
6
|
+
|
7
|
+
gem "rake"
|
8
|
+
gem "rspec", "~> 3.2.0"
|
9
|
+
|
10
|
+
if !ENV["CI"]
|
11
|
+
group :development do
|
12
|
+
gem "hashdiff"
|
13
|
+
gem "pry", "~> 0.9.12.6"
|
14
|
+
gem "pry-byebug", "<= 1.3.2"
|
15
|
+
gem "pry-rescue", "~> 1.4.1", github: "ConradIrwin/pry-rescue", branch: :master
|
16
|
+
gem "pry-stack_explorer", "~> 0.4.9.1"
|
17
|
+
gem "pry-syntax-hacks", "~> 0.0.6"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
data/License.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 René Sprotte, Michael Sievers
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
data/Readme.md
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
require "elasticsearch"
|
2
|
+
require_relative "../elasticsearch"
|
3
|
+
|
4
|
+
module Metacrunch
|
5
|
+
module Elasticsearch
|
6
|
+
class Reader
|
7
|
+
|
8
|
+
DEFAULT_SCAN_SIZE = 250
|
9
|
+
DEFAULT_SCROLL_EXPIRY_TIME = 10.minutes
|
10
|
+
|
11
|
+
|
12
|
+
def initialize(uri, body, log: false)
|
13
|
+
unless uri.starts_with?("elasticsearch://")
|
14
|
+
raise ArgumentError, "URI must be an elasticsearch URI (elasticsearch://...)"
|
15
|
+
end
|
16
|
+
|
17
|
+
@uri = URI(uri)
|
18
|
+
@body = body
|
19
|
+
@log = log
|
20
|
+
end
|
21
|
+
|
22
|
+
def each(&block)
|
23
|
+
return enum_for(__method__) unless block_given?
|
24
|
+
|
25
|
+
search_result = client.search({
|
26
|
+
body: @body,
|
27
|
+
index: @uri.index,
|
28
|
+
type: @uri.type,
|
29
|
+
scroll: "#{DEFAULT_SCROLL_EXPIRY_TIME}s",
|
30
|
+
search_type: "scan",
|
31
|
+
size: DEFAULT_SCAN_SIZE
|
32
|
+
})
|
33
|
+
|
34
|
+
while (
|
35
|
+
search_result = client.scroll(
|
36
|
+
scroll: "#{DEFAULT_SCROLL_EXPIRY_TIME}s",
|
37
|
+
scroll_id: search_result["_scroll_id"]
|
38
|
+
) and # don't use &&, the semantic of 'and' is important here
|
39
|
+
search_result["hits"]["hits"].present?
|
40
|
+
) do
|
41
|
+
search_result["hits"]["hits"].each do |_hit|
|
42
|
+
yield(_hit)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def count
|
48
|
+
client.count({
|
49
|
+
body: { query: @body[:query] },
|
50
|
+
index: @uri.index,
|
51
|
+
type: @uri.type
|
52
|
+
})["count"]
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def client
|
58
|
+
@client ||= ::Elasticsearch::Client.new(host: @uri.host, port: @uri.port, log: @log)
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require "uri"
|
2
|
+
require_relative "../elasticsearch"
|
3
|
+
|
4
|
+
|
5
|
+
module Metacrunch
|
6
|
+
module Elasticsearch
|
7
|
+
class URI < URI::Generic
|
8
|
+
|
9
|
+
DEFAULT_PORT = 9200
|
10
|
+
|
11
|
+
def index
|
12
|
+
splitted_path[0]
|
13
|
+
end
|
14
|
+
|
15
|
+
def type
|
16
|
+
splitted_path[1]
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def splitted_path
|
22
|
+
path.split("/").map(&:presence).compact
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
module URI
|
30
|
+
@@schemes['ELASTICSEARCH'] = Metacrunch::Elasticsearch::URI
|
31
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require "elasticsearch"
|
2
|
+
require_relative "../elasticsearch"
|
3
|
+
|
4
|
+
module Metacrunch
|
5
|
+
module Elasticsearch
|
6
|
+
class Writer
|
7
|
+
|
8
|
+
def initialize(uri, log: false, bulk_size: 250, autoflush: true)
|
9
|
+
unless uri.starts_with?("elasticsearch://")
|
10
|
+
raise ArgumentError, "URI must be an elasticsearch URI (elasticsearch://...)"
|
11
|
+
end
|
12
|
+
|
13
|
+
@uri = URI(uri)
|
14
|
+
@log = log
|
15
|
+
@bulk_size = bulk_size
|
16
|
+
@buffer = []
|
17
|
+
@autoflush = autoflush
|
18
|
+
end
|
19
|
+
|
20
|
+
def write(data, options = {})
|
21
|
+
id = data.delete(:id) || data.delete(:_id)
|
22
|
+
raise ArgumentError, "Missing id. You must provide 'id' or '_id' as part of the data" unless id
|
23
|
+
|
24
|
+
@buffer << {
|
25
|
+
_index: @uri.index,
|
26
|
+
_type: @uri.type,
|
27
|
+
_id: id,
|
28
|
+
data: data
|
29
|
+
}
|
30
|
+
|
31
|
+
flush if @autoflush && @bulk_size > 0 && @buffer.length >= @bulk_size
|
32
|
+
|
33
|
+
true
|
34
|
+
end
|
35
|
+
|
36
|
+
def flush
|
37
|
+
if @buffer.length > 0
|
38
|
+
result = client.bulk(body: @buffer.inject([]){ |_body, _data| _body << { index: _data } })
|
39
|
+
raise RuntimeError if result["errors"]
|
40
|
+
end
|
41
|
+
|
42
|
+
true
|
43
|
+
ensure
|
44
|
+
@buffer = []
|
45
|
+
end
|
46
|
+
|
47
|
+
def close
|
48
|
+
flush
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def client
|
54
|
+
@client ||= ::Elasticsearch::Client.new(host: @uri.host, port: @uri.port, log: @log)
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require "metacrunch"
|
2
|
+
require "elasticsearch"
|
3
|
+
|
4
|
+
begin
|
5
|
+
require "pry"
|
6
|
+
rescue LoadError ; end
|
7
|
+
|
8
|
+
|
9
|
+
module Metacrunch
|
10
|
+
module Elasticsearch
|
11
|
+
require_relative "./elasticsearch/uri"
|
12
|
+
require_relative "./elasticsearch/reader"
|
13
|
+
require_relative "./elasticsearch/writer"
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require "metacrunch/elasticsearch"
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require File.expand_path("../lib/metacrunch/elasticsearch/version", __FILE__)
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.authors = ["René Sprotte", "Michael Sievers"]
|
5
|
+
s.email = "r.sprotte@ub.uni-paderborn.de"
|
6
|
+
s.summary = %q{Elasticsearch tools for metacrunch}
|
7
|
+
s.description = s.summary
|
8
|
+
s.homepage = "http://github.com/ubpb/metacrunch-elasticsearch"
|
9
|
+
s.licenses = ["MIT"]
|
10
|
+
|
11
|
+
s.files = `git ls-files`.split($\)
|
12
|
+
s.executables = s.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
13
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
14
|
+
s.name = "metacrunch-elasticsearch"
|
15
|
+
s.require_paths = ["lib"]
|
16
|
+
s.version = Metacrunch::Elasticsearch::VERSION
|
17
|
+
|
18
|
+
s.required_ruby_version = ">= 2.2.0"
|
19
|
+
|
20
|
+
s.add_dependency "elasticsearch", "~> 1.0"
|
21
|
+
s.add_dependency "metacrunch", "~> 2.1"
|
22
|
+
end
|
metadata
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: metacrunch-elasticsearch
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 2.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- René Sprotte
|
8
|
+
- Michael Sievers
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2015-06-15 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: elasticsearch
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '1.0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - "~>"
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '1.0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: metacrunch
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - "~>"
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '2.1'
|
35
|
+
type: :runtime
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - "~>"
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '2.1'
|
42
|
+
description: Elasticsearch tools for metacrunch
|
43
|
+
email: r.sprotte@ub.uni-paderborn.de
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- ".gitignore"
|
49
|
+
- Gemfile
|
50
|
+
- License.txt
|
51
|
+
- Rakefile
|
52
|
+
- Readme.md
|
53
|
+
- lib/metacrunch/elasticsearch.rb
|
54
|
+
- lib/metacrunch/elasticsearch/reader.rb
|
55
|
+
- lib/metacrunch/elasticsearch/uri.rb
|
56
|
+
- lib/metacrunch/elasticsearch/version.rb
|
57
|
+
- lib/metacrunch/elasticsearch/writer.rb
|
58
|
+
- lib/metacrunch_plugin.rb
|
59
|
+
- metacrunch-elasticsearch.gemspec
|
60
|
+
homepage: http://github.com/ubpb/metacrunch-elasticsearch
|
61
|
+
licenses:
|
62
|
+
- MIT
|
63
|
+
metadata: {}
|
64
|
+
post_install_message:
|
65
|
+
rdoc_options: []
|
66
|
+
require_paths:
|
67
|
+
- lib
|
68
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
69
|
+
requirements:
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: 2.2.0
|
73
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
requirements: []
|
79
|
+
rubyforge_project:
|
80
|
+
rubygems_version: 2.4.6
|
81
|
+
signing_key:
|
82
|
+
specification_version: 4
|
83
|
+
summary: Elasticsearch tools for metacrunch
|
84
|
+
test_files: []
|