RubyGems - elastic-util - Versions diffs - 0.1 - Mend

elastic-util 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: e7e92f652cfa5c6bcf9c4c66c9d11908f8b793ba
+  data.tar.gz: 64a823c77a5a7f37f50256f47f800138a1b9d166
+SHA512:
+  metadata.gz: 49efe68f26f5b18fa7ee9e5a16b921cf564f3b656abb34e9ff6417387ee60f8d0ac69f3f0f4bb9e2bd2b9a2fab9e2986e9302114b02d969999f022eeab2df788
+  data.tar.gz: 93f14448ac08283321f8a66453f2fc7ed8919388a4607b56cef1ea136acce26077a60093b2e3106611ba5e1cb5e1b17476009dcabd81c40edf7895bb693a5e5d

data/.gitignore ADDED Viewed

@@ -0,0 +1,22 @@
+*.gem
+*.rbc
+.bundle
+.config
+.yardoc
+Gemfile.lock
+InstalledFiles
+_yardoc
+coverage
+doc/
+lib/bundler/man
+pkg
+rdoc
+spec/reports
+test/tmp
+test/version_tmp
+tmp
+*.bundle
+*.so
+*.o
+*.a
+mkmf.log

data/Gemfile ADDED Viewed

@@ -0,0 +1,4 @@
+source 'https://rubygems.org'
+# Specify your gem's dependencies in elastic-util.gemspec
+gemspec

data/README.md ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # elastic-util
2	+ ElasticUtil gem to backup and restore elasticsearch indices

data/Rakefile ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ require "bundler/gem_tasks"
2	+

data/bin/elastic-util ADDED Viewed

@@ -0,0 +1,104 @@
+#!/usr/bin/env ruby
+require 'elastic-util'
+require 'optparse'
+prog_name = "elastic-util" # $0.split('/').last
+usage = "Usage: #{prog_name} [backup|restore] [options]"
+args = ARGV.dup
+command_name = args.shift
+case command_name
+when "backup"
+  options = {}
+  optparse = OptionParser.new do |opts|
+    opts.banner = "Usage: #{prog_name} backup [url] [directory] [options]"
+    opts.on('--indices x,y,z', Array, "The indices to backup. Default is all.") do |val|
+      options[:indices] = val.collect {|it| it.strip }
+    end
+    opts.on('--exclude-indices x,y,z', Array, "The indices to exclude from backup. Default is none.") do |val|
+      options[:exclude_indices] = val.collect {|it| it.strip }
+    end
+    opts.on( '-s', '--size NUMBER', "The size api parameter. This dicates the size of the files and api payloads. Default is 1000." ) do |val|
+      options[:size] = val.to_i
+    end
+    opts.on( '-S', '--scroll STRING', "The scroll api parameter. Default is '5m'." ) do |val|
+      options[:scroll] = val.to_s
+    end
+    opts.on( '-f', '--force', "Delete existing backup directory instead of erroring. Default is false." ) do |val|
+      options[:force] = true
+    end
+    opts.on( '-q', '--quiet', "Don't print to stdout. Default is false." ) do |val|
+      options[:quiet] = true
+    end
+    opts.on('-h', '--help', "Prints this help" ) do
+      puts opts
+      exit
+    end
+  end
+  optparse.parse!(args)
+  url = args[0]
+  backup_directory = args[1]
+  if url.nil? || url.empty?
+    $stderr.puts "#{prog_name}: missing required argument [url]"
+    $stderr.puts optparse
+    exit 1
+  end
+  if backup_directory.nil? || backup_directory.empty?
+    $stderr.puts "#{prog_name}: missing required argument [directory]"
+    $stderr.puts optparse
+    exit 1
+  end
+  begin
+    result = ElasticUtil.backup(url, backup_directory, options)
+    exit 0
+  rescue ElasticUtil::Error => err
+    $stderr.puts "#{prog_name}: #{err.message}"
+    # $stderr.puts optparse
+    exit 1
+  end
+when "restore"
+  options = {}
+  optparse = OptionParser.new do |opts|
+    opts.banner = "Usage: #{prog_name} backup [url] [directory] [options]"
+    opts.on( '-q', '--quiet', "Don't print to stdout. Default is false." ) do |val|
+      options[:quiet] = true
+    end
+    opts.on('-h', '--help', "Prints this help" ) do
+      puts opts
+      exit
+    end
+  end
+  optparse.parse!(args)
+  url = args[0]
+  backup_directory = args[1]
+  if url.nil? || url.empty?
+    $stderr.puts "#{prog_name}: missing required argument [url]"
+    $stderr.puts optparse
+    exit 1
+  end
+  if backup_directory.nil? || backup_directory.empty?
+    $stderr.puts "#{prog_name}: missing required argument [directory]"
+    $stderr.puts optparse
+    exit 1
+  end
+  begin
+    result = ElasticUtil.restore(url, backup_directory, options)
+    exit 0
+  rescue ElasticUtil::Error => err
+    $stderr.puts "#{prog_name}: #{err.message}"
+    # $stderr.puts optparse
+    exit 1
+  end
+else
+  $stderr.puts usage
+  exit 1
+end

data/elastic-util.gemspec ADDED Viewed

@@ -0,0 +1,25 @@
+# coding: utf-8
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'elastic_util'
+Gem::Specification.new do |spec|
+  spec.name          = "elastic-util"
+  spec.version       = ElasticUtil::VERSION
+  spec.authors       = ["James Dickson"]
+  spec.email         = ["dickson.james@gmail.com"]
+  spec.summary       = "Provides backup and restore for ElasticSearch data"
+  spec.description   = "ElasticUtil uses ElasticSearch's scroll and _bulk APIs to dump and restore indices"
+  #spec.homepage      = "http://www.elastic-util.com"
+  spec.license       = "MIT"
+  spec.files         = `git ls-files -z`.split("\x0")
+  spec.executables   = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
+  spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
+  spec.require_paths = ["lib"]
+  spec.add_development_dependency "bundler", "~> 1.6"
+  spec.add_development_dependency "rake"
+  # spec.add_dependency 'multi_json'
+end

data/lib/elastic-util.rb ADDED Viewed

	@@ -0,0 +1 @@
1	+ require 'elastic_util'

data/lib/elastic_util.rb ADDED Viewed

@@ -0,0 +1,243 @@
+# encoding: utf-8
+require 'net/http'
+require 'json'
+require 'fileutils'
+# This module provides a way to backup and restore elasticsearch data.
+#
+# @example Backup data from one elasticsearch cluster and restore it to another.
+#
+#     ElasticUtil.backup('http://localhost:9200', '/tmp/mybackup', {size:5000})
+#     ElasticUtil.restore('http://localhost:9201', '/tmp/mybackup')
+#
+module ElasticUtil
+  VERSION = "0.1"
+  # The name of the data directory, relative to the user provided backup directory.
+  DUMP_DIR = "es_data"
+  # A class to be raised for any known error condition.
+  class Error < StandardError; end
+  # Backup elasticsearch data to a local directory.
+  #
+  # This uses ElasticSearch's scroll api to fetch all records for indices
+  # and write the data to a local directory. The files it generates are given a
+  # .json.data extension. They are not valid JSON files, but rather are in the
+  # format expected by ElasticSearch's _bulk api.
+  #
+  # So #restore simply has to POST the contents of each file.
+  #
+  # Use the :size option to change the number or results to fetch at once,
+  # and also the size of the data files generated.
+  # The latter correlates to the of the the api requests made in #restore.
+  #
+  # @example Backup default elasticsearch running locally.
+  #
+  #     ElasticUtil.backup('http://localhost:9200', '/tmp/mybackup')
+  #
+  # @param [String] url The url of the elasticsearch cluster eg. 'http://localhost:9200'
+  # @param [String] backup_dir The local directory to store data in. eg. '/tmp/es2.4'
+  # @param [Hash] opts The options for this backup.
+  # @option opts [Array] :indices The indices to backup. Default is all.
+  # @option opts [Array] :exclude_indices Exclude certain indexes.
+  # @option opts [String] :scroll The scroll api parameter, Default is '5m'.
+  # @option opts [Integer] :size The size api parameter. Default is 1000.
+  # @option opts [true] :force Delete existing backup directory instead of erroring. Default is false.
+  # @option opts [true] :quiet Don't print anything. Default is false.
+  #
+  # @return [true] or raises an error
+  #
+  def self.backup(url, backup_dir, opts={})
+    start_time = Time.now
+    backup_dir = backup_dir.strip
+    path = File.join(backup_dir.strip, DUMP_DIR)
+    indices = []
+    # ping it first
+    uri = URI(url)
+    response = Net::HTTP.get_response(uri)
+    http = Net::HTTP.new(uri.host, uri.port)
+    http.read_timeout = 5
+    http.open_timeout = 5
+    response = http.start() {|http|
+      http.get("/")
+    }
+    if !response.is_a?(Net::HTTPSuccess)
+      raise Error, "Unable to reach elasticsearch at url '#{url}'!\n#{response.inspect}\n#{response.body.to_s}"
+    end
+    # determine indices to backup, default is everything.
+    if opts[:indices]
+      indices = opts[:indices]
+    else
+      uri = URI(url + "/_cat/indices?format=json")
+      response = Net::HTTP.get_response(uri)
+      if !response.is_a?(Net::HTTPSuccess)
+        raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
+      end
+      json_response = JSON.parse(response.body)
+      json_response.each do |record|
+        indices.push(record['index'])
+      end
+    end
+    if opts[:exclude_indices]
+      indices = indices.reject {|it| opts[:exclude_indices].include?(it) }
+    end
+    if indices.empty?
+      raise Error, "no indices to back up!"
+    end
+    opts[:scroll] ||= '5m'
+    opts[:size] ||= 1000
+    # validate backup path
+    if File.exists?(path)
+      if opts[:force]
+        FileUtils.rmtree(path)
+      else
+        raise Error, "backup path '#{path}' already exists! Delete it first or use --force"
+      end
+    end
+    FileUtils.mkdir_p(path)
+    # dump data
+    indices.each_with_index do |index_name, i|
+      puts "(#{i+1}/#{indices.size}) backing up index #{index_name}" unless opts[:quiet]
+      # initial request
+      file_index = 0
+      uri = URI(url + "/#{index_name}/_search")
+      params = {
+        :format => "json",
+        :scroll => opts[:scroll],
+        :size => opts[:size],
+        :sort => ["_doc"]
+      }
+      uri.query = URI.encode_www_form(params)
+      # puts "HTTP REQUEST #{uri.inspect}"
+      response = Net::HTTP.get_response(uri)
+      if !response.is_a?(Net::HTTPSuccess)
+        raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
+      end
+      json_response = JSON.parse(response.body)
+      raise Error, "No scroll_id returned in response:\n#{response.inspect}" unless json_response['_scroll_id']
+      scroll_id = json_response['_scroll_id']
+      hits = json_response['hits']['hits']
+      save_bulk_data(path, hits)
+      file_index = 1
+      # scroll requests
+      while !hits.empty?
+        uri = URI(url + "/_search/scroll")
+        params = {
+          :scroll_id => scroll_id,
+          :scroll => opts[:scroll]
+        }
+        uri.query = URI.encode_www_form(params)
+        # puts "HTTP REQUEST #{uri.inspect}"
+        response = Net::HTTP.get_response(uri)
+        if !response.is_a?(Net::HTTPSuccess)
+          raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
+        end
+        json_response = JSON.parse(response.body)
+        raise Error, "No scroll_id returned in response:\n#{response.inspect}\n#{response.body.to_s}" unless json_response['_scroll_id']
+        scroll_id = json_response['_scroll_id']
+        hits = json_response['hits']['hits']
+        if file_index > 0
+          save_bulk_data(path, hits, file_index)
+        else
+          save_bulk_data(path, hits)
+        end
+        file_index += 1
+      end
+    end
+    puts "Finished backup of elasticsearch #{url} to directory #{backup_dir} (took #{(Time.now-start_time).round(3)}s)" unless opts[:quiet]
+    return true
+  end
+  # Restore elasticsearch data from a backup.
+  # This will do a POST to the _bulk api for each file in the backup directory.
+  #
+  # @example Restore local cluster with our backup.
+  #
+  #     ElasticUtil.restore('http://localhost:9201', '/tmp/mybackup')
+  #
+  # @param [String] url The url of the elasticsearch cluster eg. 'http://localhost:9200'.
+  # @param [String] backup_dir The backup directory.
+  # @param [Hash] opts The options for this backup.
+  # @option opts [true] :quiet Don't print anything. Default is false.
+  #
+  # @return [true] or raises an error
+  #
+  def self.restore(url, backup_dir, opts={})
+    start_time = Time.now
+    backup_dir = backup_dir.strip
+    path = File.join(backup_dir.strip, DUMP_DIR)
+    # validate backup path
+    if !Dir.exists?(path)
+      raise Error, "backup path '#{backup_dir}' does not exist!"
+    end
+    # ping it first
+    uri = URI(url)
+    response = Net::HTTP.get_response(uri)
+    http = Net::HTTP.new(uri.host, uri.port)
+    http.read_timeout = 5
+    http.open_timeout = 5
+    response = http.start() {|http|
+      http.get("/")
+    }
+    if !response.is_a?(Net::HTTPSuccess)
+      raise Error, "Unable to reach elasticsearch at url '#{url}'!\n#{response.inspect}\n#{response.body.to_s}"
+    end
+    # find files to import
+    found_files = Dir[File.join(path, '**', '*.json.data' )]
+    if found_files.empty?
+      raise Error, "backup path '#{backup_dir}' does not exist!"
+    else
+      puts "Found #{found_files.size} files to import" unless opts[:quiet]
+    end
+    # bulk api request for each file
+    found_files.each_with_index do |file, i|
+      puts "(#{i+1}/#{found_files.size}) bulk importing file #{file}" unless opts[:quiet]
+      payload = File.read(file)
+      # uri = URI(url)
+      http = Net::HTTP.new(uri.host, uri.port)
+      response = http.post("/_bulk", payload)
+      if !response.is_a?(Net::HTTPSuccess)
+        raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
+      end
+    end
+    puts "Finished restore of elasticsearch #{url} with backup #{backup_dir} (took #{(Time.now-start_time).round(3)}s)" unless opts[:quiet]
+    return true
+  end
+  def self.save_bulk_data(path, hits, file_index=nil) # :nodoc:
+    if hits && !hits.empty?
+      hits.each do |hit|
+        index_name = hit['_index']
+        index_type = hit['_type']
+        dir_name = File.join(path, index_name)
+        FileUtils.mkdir_p(dir_name)
+        file_name = File.join(dir_name, index_type) + (file_index ? "_#{file_index}" : "") + ".json.data"
+        # prepare record for bulk api injection
+        action_json = {'index' => {
+          '_index' => hit['_index'], '_type' => hit['_type'], '_id' => hit['_id']
+        } }
+        source_json = hit['_source']
+        File.open(file_name, 'a') do |file|
+          file.write JSON.generate(action_json) + "\n" + JSON.generate(source_json) + "\n"
+        end
+      end
+    end
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,82 @@
+--- !ruby/object:Gem::Specification
+name: elastic-util
+version: !ruby/object:Gem::Version
+  version: '0.1'
+platform: ruby
+authors:
+- James Dickson
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2018-02-27 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: bundler
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.6'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.6'
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+description: ElasticUtil uses ElasticSearch's scroll and _bulk APIs to dump and restore
+  indices
+email:
+- dickson.james@gmail.com
+executables:
+- elastic-util
+extensions: []
+extra_rdoc_files: []
+files:
+- ".gitignore"
+- Gemfile
+- README.md
+- Rakefile
+- bin/elastic-util
+- elastic-util.gemspec
+- lib/elastic-util.rb
+- lib/elastic_util.rb
+homepage:
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.4.8
+signing_key:
+specification_version: 4
+summary: Provides backup and restore for ElasticSearch data
+test_files: []