metacrunch-elasticsearch 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 33c4fcce820196d49febce7f16730f4958157070
4
+ data.tar.gz: c5c043df51406bbde562ea6fc6ba9e0aba759902
5
+ SHA512:
6
+ metadata.gz: 9ca1e1729c150bc97ee3387926235f0c64bd02b41b561101fbf408fd4e5e2fecb83d9dbef641e8304731e11979f832a674c182ef9fa332dc6b2599e5762a7202
7
+ data.tar.gz: 27c72423491f139c91857cb3dcedf8b3e99e673babf65eea168f68394d0f0185ac32e31835c1703d512ea4dfde8a9a965d3f1bd868e2ad897a3c66a5dc2f7013
data/.gitignore ADDED
@@ -0,0 +1,24 @@
1
+ .DS_Store
2
+ /doc
3
+ *.gem
4
+ *.rbc
5
+ .bundle
6
+ .config
7
+ .yardoc
8
+ Gemfile.lock
9
+ InstalledFiles
10
+ _yardoc
11
+ coverage
12
+ doc/
13
+ lib/bundler/man
14
+ pkg
15
+ rdoc
16
+ spec/reports
17
+ test/tmp
18
+ test/version_tmp
19
+ tmp
20
+ *.bundle
21
+ *.so
22
+ *.o
23
+ *.a
24
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,20 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
4
+
5
+ gem "metacrunch", ">= 2.1.0", github: "ubpb/metacrunch", branch: "master"
6
+
7
+ gem "rake"
8
+ gem "rspec", "~> 3.2.0"
9
+
10
+ if !ENV["CI"]
11
+ group :development do
12
+ gem "hashdiff"
13
+ gem "pry", "~> 0.9.12.6"
14
+ gem "pry-byebug", "<= 1.3.2"
15
+ gem "pry-rescue", "~> 1.4.1", github: "ConradIrwin/pry-rescue", branch: :master
16
+ gem "pry-stack_explorer", "~> 0.4.9.1"
17
+ gem "pry-syntax-hacks", "~> 0.0.6"
18
+ end
19
+ end
20
+
data/License.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 René Sprotte, Michael Sievers
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,5 @@
1
+ require "rspec/core/rake_task"
2
+
3
+ RSpec::Core::RakeTask.new(:spec)
4
+
5
+ task :default => :spec
data/Readme.md ADDED
@@ -0,0 +1,3 @@
1
+ # metacrunch-elasticsearch
2
+
3
+ Dokumentation folgt in Kürze...
@@ -0,0 +1,63 @@
1
+ require "elasticsearch"
2
+ require_relative "../elasticsearch"
3
+
4
+ module Metacrunch
5
+ module Elasticsearch
6
+ class Reader
7
+
8
+ DEFAULT_SCAN_SIZE = 250
9
+ DEFAULT_SCROLL_EXPIRY_TIME = 10.minutes
10
+
11
+
12
+ def initialize(uri, body, log: false)
13
+ unless uri.starts_with?("elasticsearch://")
14
+ raise ArgumentError, "URI must be an elasticsearch URI (elasticsearch://...)"
15
+ end
16
+
17
+ @uri = URI(uri)
18
+ @body = body
19
+ @log = log
20
+ end
21
+
22
+ def each(&block)
23
+ return enum_for(__method__) unless block_given?
24
+
25
+ search_result = client.search({
26
+ body: @body,
27
+ index: @uri.index,
28
+ type: @uri.type,
29
+ scroll: "#{DEFAULT_SCROLL_EXPIRY_TIME}s",
30
+ search_type: "scan",
31
+ size: DEFAULT_SCAN_SIZE
32
+ })
33
+
34
+ while (
35
+ search_result = client.scroll(
36
+ scroll: "#{DEFAULT_SCROLL_EXPIRY_TIME}s",
37
+ scroll_id: search_result["_scroll_id"]
38
+ ) and # don't use &&, the semantic of 'and' is important here
39
+ search_result["hits"]["hits"].present?
40
+ ) do
41
+ search_result["hits"]["hits"].each do |_hit|
42
+ yield(_hit)
43
+ end
44
+ end
45
+ end
46
+
47
+ def count
48
+ client.count({
49
+ body: { query: @body[:query] },
50
+ index: @uri.index,
51
+ type: @uri.type
52
+ })["count"]
53
+ end
54
+
55
+ private
56
+
57
+ def client
58
+ @client ||= ::Elasticsearch::Client.new(host: @uri.host, port: @uri.port, log: @log)
59
+ end
60
+
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,31 @@
1
+ require "uri"
2
+ require_relative "../elasticsearch"
3
+
4
+
5
+ module Metacrunch
6
+ module Elasticsearch
7
+ class URI < URI::Generic
8
+
9
+ DEFAULT_PORT = 9200
10
+
11
+ def index
12
+ splitted_path[0]
13
+ end
14
+
15
+ def type
16
+ splitted_path[1]
17
+ end
18
+
19
+ private
20
+
21
+ def splitted_path
22
+ path.split("/").map(&:presence).compact
23
+ end
24
+
25
+ end
26
+ end
27
+ end
28
+
29
+ module URI
30
+ @@schemes['ELASTICSEARCH'] = Metacrunch::Elasticsearch::URI
31
+ end
@@ -0,0 +1,5 @@
1
+ module Metacrunch
2
+ module Elasticsearch
3
+ VERSION = "2.0.0"
4
+ end
5
+ end
@@ -0,0 +1,59 @@
1
+ require "elasticsearch"
2
+ require_relative "../elasticsearch"
3
+
4
+ module Metacrunch
5
+ module Elasticsearch
6
+ class Writer
7
+
8
+ def initialize(uri, log: false, bulk_size: 250, autoflush: true)
9
+ unless uri.starts_with?("elasticsearch://")
10
+ raise ArgumentError, "URI must be an elasticsearch URI (elasticsearch://...)"
11
+ end
12
+
13
+ @uri = URI(uri)
14
+ @log = log
15
+ @bulk_size = bulk_size
16
+ @buffer = []
17
+ @autoflush = autoflush
18
+ end
19
+
20
+ def write(data, options = {})
21
+ id = data.delete(:id) || data.delete(:_id)
22
+ raise ArgumentError, "Missing id. You must provide 'id' or '_id' as part of the data" unless id
23
+
24
+ @buffer << {
25
+ _index: @uri.index,
26
+ _type: @uri.type,
27
+ _id: id,
28
+ data: data
29
+ }
30
+
31
+ flush if @autoflush && @bulk_size > 0 && @buffer.length >= @bulk_size
32
+
33
+ true
34
+ end
35
+
36
+ def flush
37
+ if @buffer.length > 0
38
+ result = client.bulk(body: @buffer.inject([]){ |_body, _data| _body << { index: _data } })
39
+ raise RuntimeError if result["errors"]
40
+ end
41
+
42
+ true
43
+ ensure
44
+ @buffer = []
45
+ end
46
+
47
+ def close
48
+ flush
49
+ end
50
+
51
+ private
52
+
53
+ def client
54
+ @client ||= ::Elasticsearch::Client.new(host: @uri.host, port: @uri.port, log: @log)
55
+ end
56
+
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,15 @@
1
+ require "metacrunch"
2
+ require "elasticsearch"
3
+
4
+ begin
5
+ require "pry"
6
+ rescue LoadError ; end
7
+
8
+
9
+ module Metacrunch
10
+ module Elasticsearch
11
+ require_relative "./elasticsearch/uri"
12
+ require_relative "./elasticsearch/reader"
13
+ require_relative "./elasticsearch/writer"
14
+ end
15
+ end
@@ -0,0 +1 @@
1
+ require "metacrunch/elasticsearch"
@@ -0,0 +1,22 @@
1
+ require File.expand_path("../lib/metacrunch/elasticsearch/version", __FILE__)
2
+
3
+ Gem::Specification.new do |s|
4
+ s.authors = ["René Sprotte", "Michael Sievers"]
5
+ s.email = "r.sprotte@ub.uni-paderborn.de"
6
+ s.summary = %q{Elasticsearch tools for metacrunch}
7
+ s.description = s.summary
8
+ s.homepage = "http://github.com/ubpb/metacrunch-elasticsearch"
9
+ s.licenses = ["MIT"]
10
+
11
+ s.files = `git ls-files`.split($\)
12
+ s.executables = s.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
14
+ s.name = "metacrunch-elasticsearch"
15
+ s.require_paths = ["lib"]
16
+ s.version = Metacrunch::Elasticsearch::VERSION
17
+
18
+ s.required_ruby_version = ">= 2.2.0"
19
+
20
+ s.add_dependency "elasticsearch", "~> 1.0"
21
+ s.add_dependency "metacrunch", "~> 2.1"
22
+ end
metadata ADDED
@@ -0,0 +1,84 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: metacrunch-elasticsearch
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.0.0
5
+ platform: ruby
6
+ authors:
7
+ - René Sprotte
8
+ - Michael Sievers
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2015-06-15 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: elasticsearch
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '1.0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: '1.0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: metacrunch
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: '2.1'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '2.1'
42
+ description: Elasticsearch tools for metacrunch
43
+ email: r.sprotte@ub.uni-paderborn.de
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".gitignore"
49
+ - Gemfile
50
+ - License.txt
51
+ - Rakefile
52
+ - Readme.md
53
+ - lib/metacrunch/elasticsearch.rb
54
+ - lib/metacrunch/elasticsearch/reader.rb
55
+ - lib/metacrunch/elasticsearch/uri.rb
56
+ - lib/metacrunch/elasticsearch/version.rb
57
+ - lib/metacrunch/elasticsearch/writer.rb
58
+ - lib/metacrunch_plugin.rb
59
+ - metacrunch-elasticsearch.gemspec
60
+ homepage: http://github.com/ubpb/metacrunch-elasticsearch
61
+ licenses:
62
+ - MIT
63
+ metadata: {}
64
+ post_install_message:
65
+ rdoc_options: []
66
+ require_paths:
67
+ - lib
68
+ required_ruby_version: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: 2.2.0
73
+ required_rubygems_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ requirements: []
79
+ rubyforge_project:
80
+ rubygems_version: 2.4.6
81
+ signing_key:
82
+ specification_version: 4
83
+ summary: Elasticsearch tools for metacrunch
84
+ test_files: []