metacrunch-elasticsearch 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +24 -0
- data/Gemfile +20 -0
- data/License.txt +22 -0
- data/Rakefile +5 -0
- data/Readme.md +3 -0
- data/lib/metacrunch/elasticsearch/reader.rb +63 -0
- data/lib/metacrunch/elasticsearch/uri.rb +31 -0
- data/lib/metacrunch/elasticsearch/version.rb +5 -0
- data/lib/metacrunch/elasticsearch/writer.rb +59 -0
- data/lib/metacrunch/elasticsearch.rb +15 -0
- data/lib/metacrunch_plugin.rb +1 -0
- data/metacrunch-elasticsearch.gemspec +22 -0
- metadata +84 -0
    
        checksums.yaml
    ADDED
    
    | @@ -0,0 +1,7 @@ | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            SHA1:
         | 
| 3 | 
            +
              metadata.gz: 33c4fcce820196d49febce7f16730f4958157070
         | 
| 4 | 
            +
              data.tar.gz: c5c043df51406bbde562ea6fc6ba9e0aba759902
         | 
| 5 | 
            +
            SHA512:
         | 
| 6 | 
            +
              metadata.gz: 9ca1e1729c150bc97ee3387926235f0c64bd02b41b561101fbf408fd4e5e2fecb83d9dbef641e8304731e11979f832a674c182ef9fa332dc6b2599e5762a7202
         | 
| 7 | 
            +
              data.tar.gz: 27c72423491f139c91857cb3dcedf8b3e99e673babf65eea168f68394d0f0185ac32e31835c1703d512ea4dfde8a9a965d3f1bd868e2ad897a3c66a5dc2f7013
         | 
    
        data/.gitignore
    ADDED
    
    | @@ -0,0 +1,24 @@ | |
| 1 | 
            +
            .DS_Store
         | 
| 2 | 
            +
            /doc
         | 
| 3 | 
            +
            *.gem
         | 
| 4 | 
            +
            *.rbc
         | 
| 5 | 
            +
            .bundle
         | 
| 6 | 
            +
            .config
         | 
| 7 | 
            +
            .yardoc
         | 
| 8 | 
            +
            Gemfile.lock
         | 
| 9 | 
            +
            InstalledFiles
         | 
| 10 | 
            +
            _yardoc
         | 
| 11 | 
            +
            coverage
         | 
| 12 | 
            +
            doc/
         | 
| 13 | 
            +
            lib/bundler/man
         | 
| 14 | 
            +
            pkg
         | 
| 15 | 
            +
            rdoc
         | 
| 16 | 
            +
            spec/reports
         | 
| 17 | 
            +
            test/tmp
         | 
| 18 | 
            +
            test/version_tmp
         | 
| 19 | 
            +
            tmp
         | 
| 20 | 
            +
            *.bundle
         | 
| 21 | 
            +
            *.so
         | 
| 22 | 
            +
            *.o
         | 
| 23 | 
            +
            *.a
         | 
| 24 | 
            +
            mkmf.log
         | 
    
        data/Gemfile
    ADDED
    
    | @@ -0,0 +1,20 @@ | |
| 1 | 
            +
            source "https://rubygems.org"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            gemspec
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            gem "metacrunch", ">= 2.1.0", github: "ubpb/metacrunch", branch: "master"
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            gem "rake"
         | 
| 8 | 
            +
            gem "rspec", "~> 3.2.0"
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            if !ENV["CI"]
         | 
| 11 | 
            +
              group :development do
         | 
| 12 | 
            +
                gem "hashdiff"
         | 
| 13 | 
            +
                gem "pry",                "~> 0.9.12.6"
         | 
| 14 | 
            +
                gem "pry-byebug",         "<= 1.3.2"
         | 
| 15 | 
            +
                gem "pry-rescue",         "~> 1.4.1", github: "ConradIrwin/pry-rescue", branch: :master
         | 
| 16 | 
            +
                gem "pry-stack_explorer", "~> 0.4.9.1"
         | 
| 17 | 
            +
                gem "pry-syntax-hacks",   "~> 0.0.6"
         | 
| 18 | 
            +
              end
         | 
| 19 | 
            +
            end
         | 
| 20 | 
            +
             | 
    
        data/License.txt
    ADDED
    
    | @@ -0,0 +1,22 @@ | |
| 1 | 
            +
            Copyright (c) 2013 René Sprotte, Michael Sievers
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            MIT License
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            Permission is hereby granted, free of charge, to any person obtaining
         | 
| 6 | 
            +
            a copy of this software and associated documentation files (the
         | 
| 7 | 
            +
            "Software"), to deal in the Software without restriction, including
         | 
| 8 | 
            +
            without limitation the rights to use, copy, modify, merge, publish,
         | 
| 9 | 
            +
            distribute, sublicense, and/or sell copies of the Software, and to
         | 
| 10 | 
            +
            permit persons to whom the Software is furnished to do so, subject to
         | 
| 11 | 
            +
            the following conditions:
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            The above copyright notice and this permission notice shall be
         | 
| 14 | 
            +
            included in all copies or substantial portions of the Software.
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
         | 
| 17 | 
            +
            EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
         | 
| 18 | 
            +
            MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
         | 
| 19 | 
            +
            NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
         | 
| 20 | 
            +
            LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
         | 
| 21 | 
            +
            OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
         | 
| 22 | 
            +
            WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
         | 
    
        data/Rakefile
    ADDED
    
    
    
        data/Readme.md
    ADDED
    
    
| @@ -0,0 +1,63 @@ | |
| 1 | 
            +
            require "elasticsearch"
         | 
| 2 | 
            +
            require_relative "../elasticsearch"
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            module Metacrunch
         | 
| 5 | 
            +
              module Elasticsearch
         | 
| 6 | 
            +
                class Reader
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                  DEFAULT_SCAN_SIZE          = 250
         | 
| 9 | 
            +
                  DEFAULT_SCROLL_EXPIRY_TIME = 10.minutes
         | 
| 10 | 
            +
             | 
| 11 | 
            +
             | 
| 12 | 
            +
                  def initialize(uri, body, log: false)
         | 
| 13 | 
            +
                    unless uri.starts_with?("elasticsearch://")
         | 
| 14 | 
            +
                      raise ArgumentError, "URI must be an elasticsearch URI (elasticsearch://...)"
         | 
| 15 | 
            +
                    end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                    @uri  = URI(uri)
         | 
| 18 | 
            +
                    @body = body
         | 
| 19 | 
            +
                    @log  = log
         | 
| 20 | 
            +
                  end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                  def each(&block)
         | 
| 23 | 
            +
                    return enum_for(__method__) unless block_given?
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                    search_result = client.search({
         | 
| 26 | 
            +
                      body: @body,
         | 
| 27 | 
            +
                      index: @uri.index,
         | 
| 28 | 
            +
                      type: @uri.type,
         | 
| 29 | 
            +
                      scroll: "#{DEFAULT_SCROLL_EXPIRY_TIME}s",
         | 
| 30 | 
            +
                      search_type: "scan",
         | 
| 31 | 
            +
                      size: DEFAULT_SCAN_SIZE
         | 
| 32 | 
            +
                    })
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                    while (
         | 
| 35 | 
            +
                      search_result = client.scroll(
         | 
| 36 | 
            +
                        scroll: "#{DEFAULT_SCROLL_EXPIRY_TIME}s",
         | 
| 37 | 
            +
                        scroll_id: search_result["_scroll_id"]
         | 
| 38 | 
            +
                      ) and # don't use &&, the semantic of 'and' is important here
         | 
| 39 | 
            +
                      search_result["hits"]["hits"].present?
         | 
| 40 | 
            +
                    ) do
         | 
| 41 | 
            +
                      search_result["hits"]["hits"].each do |_hit|
         | 
| 42 | 
            +
                        yield(_hit)
         | 
| 43 | 
            +
                      end
         | 
| 44 | 
            +
                    end
         | 
| 45 | 
            +
                  end
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                  def count
         | 
| 48 | 
            +
                    client.count({
         | 
| 49 | 
            +
                      body: { query: @body[:query] },
         | 
| 50 | 
            +
                      index: @uri.index,
         | 
| 51 | 
            +
                      type: @uri.type
         | 
| 52 | 
            +
                    })["count"]
         | 
| 53 | 
            +
                  end
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                private
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                  def client
         | 
| 58 | 
            +
                    @client ||= ::Elasticsearch::Client.new(host: @uri.host, port: @uri.port, log: @log)
         | 
| 59 | 
            +
                  end
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                end
         | 
| 62 | 
            +
              end
         | 
| 63 | 
            +
            end
         | 
| @@ -0,0 +1,31 @@ | |
| 1 | 
            +
            require "uri"
         | 
| 2 | 
            +
            require_relative "../elasticsearch"
         | 
| 3 | 
            +
             | 
| 4 | 
            +
             | 
| 5 | 
            +
            module Metacrunch
         | 
| 6 | 
            +
              module Elasticsearch
         | 
| 7 | 
            +
                class URI < URI::Generic
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                  DEFAULT_PORT = 9200
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                  def index
         | 
| 12 | 
            +
                    splitted_path[0]
         | 
| 13 | 
            +
                  end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                  def type
         | 
| 16 | 
            +
                    splitted_path[1]
         | 
| 17 | 
            +
                  end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                private
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                  def splitted_path
         | 
| 22 | 
            +
                    path.split("/").map(&:presence).compact
         | 
| 23 | 
            +
                  end
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                end
         | 
| 26 | 
            +
              end
         | 
| 27 | 
            +
            end
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            module URI
         | 
| 30 | 
            +
              @@schemes['ELASTICSEARCH'] = Metacrunch::Elasticsearch::URI
         | 
| 31 | 
            +
            end
         | 
| @@ -0,0 +1,59 @@ | |
| 1 | 
            +
            require "elasticsearch"
         | 
| 2 | 
            +
            require_relative "../elasticsearch"
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            module Metacrunch
         | 
| 5 | 
            +
              module Elasticsearch
         | 
| 6 | 
            +
                class Writer
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                  def initialize(uri, log: false, bulk_size: 250, autoflush: true)
         | 
| 9 | 
            +
                    unless uri.starts_with?("elasticsearch://")
         | 
| 10 | 
            +
                      raise ArgumentError, "URI must be an elasticsearch URI (elasticsearch://...)"
         | 
| 11 | 
            +
                    end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                    @uri       = URI(uri)
         | 
| 14 | 
            +
                    @log       = log
         | 
| 15 | 
            +
                    @bulk_size = bulk_size
         | 
| 16 | 
            +
                    @buffer    = []
         | 
| 17 | 
            +
                    @autoflush = autoflush
         | 
| 18 | 
            +
                  end
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                  def write(data, options = {})
         | 
| 21 | 
            +
                    id = data.delete(:id) || data.delete(:_id)
         | 
| 22 | 
            +
                    raise ArgumentError, "Missing id. You must provide 'id' or '_id' as part of the data" unless id
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                    @buffer << {
         | 
| 25 | 
            +
                      _index: @uri.index,
         | 
| 26 | 
            +
                      _type: @uri.type,
         | 
| 27 | 
            +
                      _id: id,
         | 
| 28 | 
            +
                      data: data
         | 
| 29 | 
            +
                    }
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                    flush if @autoflush && @bulk_size > 0 && @buffer.length >= @bulk_size
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                    true
         | 
| 34 | 
            +
                  end
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                  def flush
         | 
| 37 | 
            +
                    if @buffer.length > 0
         | 
| 38 | 
            +
                      result = client.bulk(body: @buffer.inject([]){ |_body, _data| _body << { index: _data } })
         | 
| 39 | 
            +
                      raise RuntimeError if result["errors"]
         | 
| 40 | 
            +
                    end
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                    true
         | 
| 43 | 
            +
                  ensure
         | 
| 44 | 
            +
                    @buffer = []
         | 
| 45 | 
            +
                  end
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                  def close
         | 
| 48 | 
            +
                    flush
         | 
| 49 | 
            +
                  end
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                private
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                  def client
         | 
| 54 | 
            +
                    @client ||= ::Elasticsearch::Client.new(host: @uri.host, port: @uri.port, log: @log)
         | 
| 55 | 
            +
                  end
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                end
         | 
| 58 | 
            +
              end
         | 
| 59 | 
            +
            end
         | 
| @@ -0,0 +1,15 @@ | |
| 1 | 
            +
            require "metacrunch"
         | 
| 2 | 
            +
            require "elasticsearch"
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            begin
         | 
| 5 | 
            +
              require "pry"
         | 
| 6 | 
            +
            rescue LoadError ; end
         | 
| 7 | 
            +
             | 
| 8 | 
            +
             | 
| 9 | 
            +
            module Metacrunch
         | 
| 10 | 
            +
              module Elasticsearch
         | 
| 11 | 
            +
                require_relative "./elasticsearch/uri"
         | 
| 12 | 
            +
                require_relative "./elasticsearch/reader"
         | 
| 13 | 
            +
                require_relative "./elasticsearch/writer"
         | 
| 14 | 
            +
              end
         | 
| 15 | 
            +
            end
         | 
| @@ -0,0 +1 @@ | |
| 1 | 
            +
            require "metacrunch/elasticsearch"
         | 
| @@ -0,0 +1,22 @@ | |
| 1 | 
            +
            require File.expand_path("../lib/metacrunch/elasticsearch/version", __FILE__)
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            Gem::Specification.new do |s|
         | 
| 4 | 
            +
              s.authors       = ["René Sprotte", "Michael Sievers"]
         | 
| 5 | 
            +
              s.email         = "r.sprotte@ub.uni-paderborn.de"
         | 
| 6 | 
            +
              s.summary       = %q{Elasticsearch tools for metacrunch}
         | 
| 7 | 
            +
              s.description   = s.summary
         | 
| 8 | 
            +
              s.homepage      = "http://github.com/ubpb/metacrunch-elasticsearch"
         | 
| 9 | 
            +
              s.licenses      = ["MIT"]
         | 
| 10 | 
            +
             | 
| 11 | 
            +
              s.files         = `git ls-files`.split($\)
         | 
| 12 | 
            +
              s.executables   = s.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
         | 
| 13 | 
            +
              s.test_files    = s.files.grep(%r{^(test|spec|features)/})
         | 
| 14 | 
            +
              s.name          = "metacrunch-elasticsearch"
         | 
| 15 | 
            +
              s.require_paths = ["lib"]
         | 
| 16 | 
            +
              s.version       = Metacrunch::Elasticsearch::VERSION
         | 
| 17 | 
            +
             | 
| 18 | 
            +
              s.required_ruby_version = ">= 2.2.0"
         | 
| 19 | 
            +
             | 
| 20 | 
            +
              s.add_dependency "elasticsearch", "~> 1.0"
         | 
| 21 | 
            +
              s.add_dependency "metacrunch",    "~> 2.1"
         | 
| 22 | 
            +
            end
         | 
    
        metadata
    ADDED
    
    | @@ -0,0 +1,84 @@ | |
| 1 | 
            +
            --- !ruby/object:Gem::Specification
         | 
| 2 | 
            +
            name: metacrunch-elasticsearch
         | 
| 3 | 
            +
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            +
              version: 2.0.0
         | 
| 5 | 
            +
            platform: ruby
         | 
| 6 | 
            +
            authors:
         | 
| 7 | 
            +
            - René Sprotte
         | 
| 8 | 
            +
            - Michael Sievers
         | 
| 9 | 
            +
            autorequire: 
         | 
| 10 | 
            +
            bindir: bin
         | 
| 11 | 
            +
            cert_chain: []
         | 
| 12 | 
            +
            date: 2015-06-15 00:00:00.000000000 Z
         | 
| 13 | 
            +
            dependencies:
         | 
| 14 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 15 | 
            +
              name: elasticsearch
         | 
| 16 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 17 | 
            +
                requirements:
         | 
| 18 | 
            +
                - - "~>"
         | 
| 19 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 20 | 
            +
                    version: '1.0'
         | 
| 21 | 
            +
              type: :runtime
         | 
| 22 | 
            +
              prerelease: false
         | 
| 23 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 24 | 
            +
                requirements:
         | 
| 25 | 
            +
                - - "~>"
         | 
| 26 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 27 | 
            +
                    version: '1.0'
         | 
| 28 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 29 | 
            +
              name: metacrunch
         | 
| 30 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 31 | 
            +
                requirements:
         | 
| 32 | 
            +
                - - "~>"
         | 
| 33 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 34 | 
            +
                    version: '2.1'
         | 
| 35 | 
            +
              type: :runtime
         | 
| 36 | 
            +
              prerelease: false
         | 
| 37 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 38 | 
            +
                requirements:
         | 
| 39 | 
            +
                - - "~>"
         | 
| 40 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 41 | 
            +
                    version: '2.1'
         | 
| 42 | 
            +
            description: Elasticsearch tools for metacrunch
         | 
| 43 | 
            +
            email: r.sprotte@ub.uni-paderborn.de
         | 
| 44 | 
            +
            executables: []
         | 
| 45 | 
            +
            extensions: []
         | 
| 46 | 
            +
            extra_rdoc_files: []
         | 
| 47 | 
            +
            files:
         | 
| 48 | 
            +
            - ".gitignore"
         | 
| 49 | 
            +
            - Gemfile
         | 
| 50 | 
            +
            - License.txt
         | 
| 51 | 
            +
            - Rakefile
         | 
| 52 | 
            +
            - Readme.md
         | 
| 53 | 
            +
            - lib/metacrunch/elasticsearch.rb
         | 
| 54 | 
            +
            - lib/metacrunch/elasticsearch/reader.rb
         | 
| 55 | 
            +
            - lib/metacrunch/elasticsearch/uri.rb
         | 
| 56 | 
            +
            - lib/metacrunch/elasticsearch/version.rb
         | 
| 57 | 
            +
            - lib/metacrunch/elasticsearch/writer.rb
         | 
| 58 | 
            +
            - lib/metacrunch_plugin.rb
         | 
| 59 | 
            +
            - metacrunch-elasticsearch.gemspec
         | 
| 60 | 
            +
            homepage: http://github.com/ubpb/metacrunch-elasticsearch
         | 
| 61 | 
            +
            licenses:
         | 
| 62 | 
            +
            - MIT
         | 
| 63 | 
            +
            metadata: {}
         | 
| 64 | 
            +
            post_install_message: 
         | 
| 65 | 
            +
            rdoc_options: []
         | 
| 66 | 
            +
            require_paths:
         | 
| 67 | 
            +
            - lib
         | 
| 68 | 
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         | 
| 69 | 
            +
              requirements:
         | 
| 70 | 
            +
              - - ">="
         | 
| 71 | 
            +
                - !ruby/object:Gem::Version
         | 
| 72 | 
            +
                  version: 2.2.0
         | 
| 73 | 
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 74 | 
            +
              requirements:
         | 
| 75 | 
            +
              - - ">="
         | 
| 76 | 
            +
                - !ruby/object:Gem::Version
         | 
| 77 | 
            +
                  version: '0'
         | 
| 78 | 
            +
            requirements: []
         | 
| 79 | 
            +
            rubyforge_project: 
         | 
| 80 | 
            +
            rubygems_version: 2.4.6
         | 
| 81 | 
            +
            signing_key: 
         | 
| 82 | 
            +
            specification_version: 4
         | 
| 83 | 
            +
            summary: Elasticsearch tools for metacrunch
         | 
| 84 | 
            +
            test_files: []
         |