elasticsearch-data-cleaner 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/elasticsearch-data-cleaner +157 -0
- data/lib/elasticsearch-data-cleaner.rb +165 -0
- metadata +62 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 0a863f431274d88db9cbc0579f597229674981a8
|
4
|
+
data.tar.gz: 6fd30ed2b2c8836a7df9b83bc12c9dd811f258d5
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ff3b0b5e9ec742004b1eec12cb81755253625a969d8b54e4a18a1007e684cf7b5c424ecfe0ff80448da5fdda06f560a386e83cbdeff38d681f420ec34be82f03
|
7
|
+
data.tar.gz: ebc636008e9177583833e6ca7d6e8e46ae241e62232beb11289ec4c7f21c2892fb13c96b8738062049684c1156fa56f64ba4b57c94b551d57be8a610790b84f9
|
@@ -0,0 +1,157 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
###########################################################
|
3
|
+
### ###
|
4
|
+
### Author: Maksim Podlesnyi <mpodlesnyi@smartling.com> ###
|
5
|
+
### ###
|
6
|
+
###########################################################
|
7
|
+
|
8
|
+
require 'elasticsearch-data-cleaner'
|
9
|
+
require 'optparse'
|
10
|
+
require 'ostruct'
|
11
|
+
require 'yaml'
|
12
|
+
require 'logger'
|
13
|
+
|
14
|
+
::CONFIGFILE_HELP = '---
|
15
|
+
events-%Y%m%d: # index pattern compatible ruby date format directives
|
16
|
+
number: 7 # how many indices script have to keep by pattern above
|
17
|
+
future: true # leave indices with future timestamp. true by default
|
18
|
+
logstash-%Y%m%d:
|
19
|
+
number: 60
|
20
|
+
future: false
|
21
|
+
types: # types description
|
22
|
+
elb: # type name. there is no patterns
|
23
|
+
number: 30
|
24
|
+
optimize: true # run _optimize for index after removing this type.
|
25
|
+
# false by default
|
26
|
+
nginx-pinch-main:
|
27
|
+
number: 10
|
28
|
+
optimize: true
|
29
|
+
nginx-wlb-main:
|
30
|
+
number: 60
|
31
|
+
optimize: true
|
32
|
+
|
33
|
+
# Attention: you have to use number of indices greater or equal numbers
|
34
|
+
# of indices with types because first of all script deletes indices
|
35
|
+
# using index setting.'
|
36
|
+
|
37
|
+
class MyOptparse
|
38
|
+
|
39
|
+
def parse(args)
|
40
|
+
options = OpenStruct.new
|
41
|
+
options.dry_run = false
|
42
|
+
options.timeout = 300
|
43
|
+
options.url = 'http://localhost:9200'
|
44
|
+
options.debug = false
|
45
|
+
options.empty = true
|
46
|
+
|
47
|
+
@opt_parser = OptionParser.new do |opts|
|
48
|
+
banner = [
|
49
|
+
$0 + ' --dry-run --config data_rotate.yaml',
|
50
|
+
"\t\t\tdry run for config\n",
|
51
|
+
$0 + ' --debug --config data_rotate.yaml',
|
52
|
+
"\t\t\tenable debug mode\n",
|
53
|
+
$0 + ' -t 600 --config data_rotate.yaml',
|
54
|
+
"\t\t\tincrease http client timeout to 10 minutes\n",
|
55
|
+
]
|
56
|
+
opts.banner = "Examples:\n\t" + banner.join("\n\t")
|
57
|
+
|
58
|
+
opts.separator ""
|
59
|
+
opts.separator "Specific options:"
|
60
|
+
|
61
|
+
opts.on("-u", "--url URL",
|
62
|
+
"Url of ES API. Default it #{options.url}") do |url|
|
63
|
+
options.url = url
|
64
|
+
end
|
65
|
+
|
66
|
+
opts.on("-t", "--timeout SECONDS",
|
67
|
+
"The timeout for connection to ES API. Default is #{options.timeout}") do |timeout|
|
68
|
+
options.timeout = timeout.to_i
|
69
|
+
end
|
70
|
+
|
71
|
+
opts.on("-c", "--config CONFIG",
|
72
|
+
"YAML file with configuration for rotate data") do |config|
|
73
|
+
options.config = config
|
74
|
+
end
|
75
|
+
|
76
|
+
opts.on("-l", "--logfile LOGFILE",
|
77
|
+
"Log file") do |logfile|
|
78
|
+
options.logfile = logfile
|
79
|
+
end
|
80
|
+
|
81
|
+
opts.on("--dry-run",
|
82
|
+
"Run script without applying any changes") do |dry_run|
|
83
|
+
options.dry_run = true
|
84
|
+
end
|
85
|
+
|
86
|
+
opts.on("--empty",
|
87
|
+
"Delete any empty indices") do |empty|
|
88
|
+
options.empty = false
|
89
|
+
end
|
90
|
+
|
91
|
+
opts.on("--debug",
|
92
|
+
'Debug mode. More verbose') do |debug|
|
93
|
+
options.debug = true
|
94
|
+
end
|
95
|
+
|
96
|
+
opts.separator "Common options:"
|
97
|
+
|
98
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
99
|
+
puts opts
|
100
|
+
puts "\n\n\nConfig file (YAML) example:"
|
101
|
+
puts ::CONFIGFILE_HELP
|
102
|
+
exit(0)
|
103
|
+
end
|
104
|
+
|
105
|
+
end
|
106
|
+
@opt_parser.parse!(args)
|
107
|
+
options
|
108
|
+
rescue OptionParser::ParseError
|
109
|
+
$stderr.print "Error: " + $!.to_s + "\n"
|
110
|
+
puts @opt_parser
|
111
|
+
exit(-1)
|
112
|
+
end
|
113
|
+
def help
|
114
|
+
puts @opt_parser
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
begin
|
119
|
+
parser = MyOptparse.new
|
120
|
+
options = parser.parse(ARGV)
|
121
|
+
$logger = Logger.new(options.logfile ? options.logfile : STDOUT)
|
122
|
+
if options.debug
|
123
|
+
$logger.level = Logger::DEBUG
|
124
|
+
else
|
125
|
+
$logger.level = Logger::INFO
|
126
|
+
end
|
127
|
+
if options.logfile
|
128
|
+
$logger.formatter = proc do |severity, datetime, progname, msg|
|
129
|
+
"[#{datetime}] #{severity} : #{msg}\n"
|
130
|
+
end
|
131
|
+
$logger.info("begin")
|
132
|
+
else
|
133
|
+
$logger.formatter = proc do |severity, datetime, progname, msg|
|
134
|
+
"#{msg}\n"
|
135
|
+
end
|
136
|
+
end
|
137
|
+
if options.config
|
138
|
+
if !File.exists?(options.config)
|
139
|
+
raise "can not read config file #{options.config}"
|
140
|
+
end
|
141
|
+
else
|
142
|
+
raise 'please specify path to yaml config file'
|
143
|
+
end
|
144
|
+
config = YAML.load(IO.read(options.config))
|
145
|
+
ess = EScleaner.new options
|
146
|
+
ess.run(config)
|
147
|
+
if options.logfile
|
148
|
+
$logger.info("end")
|
149
|
+
end
|
150
|
+
rescue Exception => e
|
151
|
+
if defined?($logger) == 'global-variable'
|
152
|
+
$logger.error e.message
|
153
|
+
else
|
154
|
+
$stderr.puts e.message
|
155
|
+
end
|
156
|
+
exit(1)
|
157
|
+
end
|
@@ -0,0 +1,165 @@
|
|
1
|
+
###########################################################
|
2
|
+
### ###
|
3
|
+
### Author: Maksim Podlesnyi <mpodlesnyi@smartling.com> ###
|
4
|
+
### ###
|
5
|
+
###########################################################
|
6
|
+
require 'rubygems'
|
7
|
+
require 'json'
|
8
|
+
require 'faraday'
|
9
|
+
require 'date'
|
10
|
+
|
11
|
+
|
12
|
+
class EScleaner
|
13
|
+
|
14
|
+
def initialize(options)
|
15
|
+
@connection = ::Faraday.new options.url, { :request => { :timeout => options.timeout } }
|
16
|
+
@options = options
|
17
|
+
info()
|
18
|
+
end
|
19
|
+
|
20
|
+
def info
|
21
|
+
#get ES information
|
22
|
+
resp = @connection.get
|
23
|
+
@es_info = parse_response resp.body
|
24
|
+
@es_version = {}
|
25
|
+
@es_version['major'], @es_version['minor'], @es_version['patch'] = @es_info['version']['number'].split('.').map { |i| i.to_i }
|
26
|
+
$logger.debug("detected Elasticsearch #{@es_version['major']}.#{@es_version['minor']}")
|
27
|
+
end
|
28
|
+
|
29
|
+
def parse_response(response)
|
30
|
+
j = ::JSON.parse response
|
31
|
+
if j.has_key?('error')
|
32
|
+
raise j['error']
|
33
|
+
else
|
34
|
+
j
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def acknowledged_test(response, answer)
|
39
|
+
# Testing ES response for errors
|
40
|
+
r = parse_response response.body
|
41
|
+
if r.has_key? answer and r[answer] == true
|
42
|
+
else
|
43
|
+
raise 'request failed'
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def list
|
48
|
+
# Get list of indices
|
49
|
+
resp = @connection.get "_aliases"
|
50
|
+
r = parse_response resp.body
|
51
|
+
r.keys
|
52
|
+
end
|
53
|
+
|
54
|
+
def exists(index, type)
|
55
|
+
#checks if type exists
|
56
|
+
if @es_version['major'] >= 5
|
57
|
+
url = "#{index}/_mapping/#{type}"
|
58
|
+
else
|
59
|
+
url = "#{index}/#{type}"
|
60
|
+
end
|
61
|
+
resp = @connection.head url
|
62
|
+
if resp.status == 200
|
63
|
+
return true
|
64
|
+
elsif resp.status == 404
|
65
|
+
return false
|
66
|
+
else
|
67
|
+
raise "could not check if type #{index}/#{type} exists. got response code #{resp.status}"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def docs(index)
|
72
|
+
# Returns number of documents for index
|
73
|
+
resp = @connection.get "#{index}/_stats/docs"
|
74
|
+
r = (parse_response resp.body)['_all']['primaries']['docs']['count']
|
75
|
+
if r
|
76
|
+
if r < 10
|
77
|
+
$logger.warn("index #{index} has #{r} docs")
|
78
|
+
end
|
79
|
+
return r
|
80
|
+
else
|
81
|
+
raise "could not get count of docs for index #{index}"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def delete(object)
|
86
|
+
# Deleting indices or types
|
87
|
+
$logger.info("deleting #{object}#{@options.dry_run ? ' (dry_run)': ''}")
|
88
|
+
if !@options.dry_run
|
89
|
+
resp = @connection.delete "#{object}"
|
90
|
+
acknowledged_test resp, 'acknowledged'
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def optimize(index)
|
95
|
+
# Run _optimize
|
96
|
+
$logger.info("starting optimize for index #{index}#{@options.dry_run ? ' (dry_run)': ''}")
|
97
|
+
if !@options.dry_run
|
98
|
+
resp = @connection.post "#{index}/_optimize?only_expunge_deletes=true"
|
99
|
+
$logger.debug(parse_response resp.body)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def run(config)
|
104
|
+
# Run rotate of ES data
|
105
|
+
config.each do |pattern, settings|
|
106
|
+
a = []
|
107
|
+
list().each do |index|
|
108
|
+
# check if empty just delete it
|
109
|
+
if !@options.empty and docs(index) == 0
|
110
|
+
delete(index)
|
111
|
+
else
|
112
|
+
begin
|
113
|
+
a << DateTime.strptime(index, pattern)
|
114
|
+
rescue
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
today = Date.today
|
119
|
+
sorted = a.sort
|
120
|
+
while sorted.length > 0
|
121
|
+
need_to_optimize = false
|
122
|
+
date = sorted.pop
|
123
|
+
d = date.to_date
|
124
|
+
index = date.strftime(pattern)
|
125
|
+
if d > today
|
126
|
+
if settings['future'] == false
|
127
|
+
$logger.debug("going to delete index #{index}. It is future")
|
128
|
+
delete(index)
|
129
|
+
end
|
130
|
+
else
|
131
|
+
if settings['number']
|
132
|
+
if settings['number'] <= 0
|
133
|
+
delete(index)
|
134
|
+
else
|
135
|
+
if settings['types']
|
136
|
+
# Checking types settings
|
137
|
+
settings['types'].each do |type, type_settings|
|
138
|
+
if exists(index, type)
|
139
|
+
if type_settings['number']
|
140
|
+
if type_settings['number'] <= 0
|
141
|
+
delete("#{index}/#{type}")
|
142
|
+
if type_settings['optimize'] and need_to_optimize == false
|
143
|
+
need_to_optimize = true
|
144
|
+
end
|
145
|
+
else
|
146
|
+
type_settings['number'] -= 1
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
settings['number'] -= 1
|
154
|
+
end
|
155
|
+
end
|
156
|
+
a.delete(date)
|
157
|
+
# do optimize if needed
|
158
|
+
if need_to_optimize
|
159
|
+
optimize(index)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
metadata
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: elasticsearch-data-cleaner
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Maksim Podlesnyi
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-11-08 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: faraday
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.9'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0.9'
|
27
|
+
description: Command line tool which helps remove old ES indices and types
|
28
|
+
email:
|
29
|
+
- mpodlesnyi@smartling.com
|
30
|
+
- itops@smartling.com
|
31
|
+
executables:
|
32
|
+
- elasticsearch-data-cleaner
|
33
|
+
extensions: []
|
34
|
+
extra_rdoc_files: []
|
35
|
+
files:
|
36
|
+
- bin/elasticsearch-data-cleaner
|
37
|
+
- lib/elasticsearch-data-cleaner.rb
|
38
|
+
homepage: https://github.com/Smartling/elasticsearch-data-cleaner
|
39
|
+
licenses:
|
40
|
+
- GPL-3.0
|
41
|
+
metadata: {}
|
42
|
+
post_install_message:
|
43
|
+
rdoc_options: []
|
44
|
+
require_paths:
|
45
|
+
- lib
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0'
|
51
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
requirements: []
|
57
|
+
rubyforge_project:
|
58
|
+
rubygems_version: 2.5.2
|
59
|
+
signing_key:
|
60
|
+
specification_version: 4
|
61
|
+
summary: Command line tool for removing old Elasticsearch data (indices and types)
|
62
|
+
test_files: []
|