elasticsearch-data-cleaner 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 0a863f431274d88db9cbc0579f597229674981a8
4
+ data.tar.gz: 6fd30ed2b2c8836a7df9b83bc12c9dd811f258d5
5
+ SHA512:
6
+ metadata.gz: ff3b0b5e9ec742004b1eec12cb81755253625a969d8b54e4a18a1007e684cf7b5c424ecfe0ff80448da5fdda06f560a386e83cbdeff38d681f420ec34be82f03
7
+ data.tar.gz: ebc636008e9177583833e6ca7d6e8e46ae241e62232beb11289ec4c7f21c2892fb13c96b8738062049684c1156fa56f64ba4b57c94b551d57be8a610790b84f9
@@ -0,0 +1,157 @@
1
+ #!/usr/bin/env ruby
2
+ ###########################################################
3
+ ### ###
4
+ ### Author: Maksim Podlesnyi <mpodlesnyi@smartling.com> ###
5
+ ### ###
6
+ ###########################################################
7
+
8
+ require 'elasticsearch-data-cleaner'
9
+ require 'optparse'
10
+ require 'ostruct'
11
+ require 'yaml'
12
+ require 'logger'
13
+
14
+ ::CONFIGFILE_HELP = '---
15
+ events-%Y%m%d: # index pattern compatible ruby date format directives
16
+ number: 7 # how many indices script have to keep by pattern above
17
+ future: true # leave indices with future timestamp. true by default
18
+ logstash-%Y%m%d:
19
+ number: 60
20
+ future: false
21
+ types: # types description
22
+ elb: # type name. there is no patterns
23
+ number: 30
24
+ optimize: true # run _optimize for index after removing this type.
25
+ # false by default
26
+ nginx-pinch-main:
27
+ number: 10
28
+ optimize: true
29
+ nginx-wlb-main:
30
+ number: 60
31
+ optimize: true
32
+
33
+ # Attention: you have to use number of indices greater or equal numbers
34
+ # of indices with types because first of all script deletes indices
35
+ # using index setting.'
36
+
37
+ class MyOptparse
38
+
39
+ def parse(args)
40
+ options = OpenStruct.new
41
+ options.dry_run = false
42
+ options.timeout = 300
43
+ options.url = 'http://localhost:9200'
44
+ options.debug = false
45
+ options.empty = true
46
+
47
+ @opt_parser = OptionParser.new do |opts|
48
+ banner = [
49
+ $0 + ' --dry-run --config data_rotate.yaml',
50
+ "\t\t\tdry run for config\n",
51
+ $0 + ' --debug --config data_rotate.yaml',
52
+ "\t\t\tenable debug mode\n",
53
+ $0 + ' -t 600 --config data_rotate.yaml',
54
+ "\t\t\tincrease http client timeout to 10 minutes\n",
55
+ ]
56
+ opts.banner = "Examples:\n\t" + banner.join("\n\t")
57
+
58
+ opts.separator ""
59
+ opts.separator "Specific options:"
60
+
61
+ opts.on("-u", "--url URL",
62
+ "Url of ES API. Default it #{options.url}") do |url|
63
+ options.url = url
64
+ end
65
+
66
+ opts.on("-t", "--timeout SECONDS",
67
+ "The timeout for connection to ES API. Default is #{options.timeout}") do |timeout|
68
+ options.timeout = timeout.to_i
69
+ end
70
+
71
+ opts.on("-c", "--config CONFIG",
72
+ "YAML file with configuration for rotate data") do |config|
73
+ options.config = config
74
+ end
75
+
76
+ opts.on("-l", "--logfile LOGFILE",
77
+ "Log file") do |logfile|
78
+ options.logfile = logfile
79
+ end
80
+
81
+ opts.on("--dry-run",
82
+ "Run script without applying any changes") do |dry_run|
83
+ options.dry_run = true
84
+ end
85
+
86
+ opts.on("--empty",
87
+ "Delete any empty indices") do |empty|
88
+ options.empty = false
89
+ end
90
+
91
+ opts.on("--debug",
92
+ 'Debug mode. More verbose') do |debug|
93
+ options.debug = true
94
+ end
95
+
96
+ opts.separator "Common options:"
97
+
98
+ opts.on_tail("-h", "--help", "Show this message") do
99
+ puts opts
100
+ puts "\n\n\nConfig file (YAML) example:"
101
+ puts ::CONFIGFILE_HELP
102
+ exit(0)
103
+ end
104
+
105
+ end
106
+ @opt_parser.parse!(args)
107
+ options
108
+ rescue OptionParser::ParseError
109
+ $stderr.print "Error: " + $!.to_s + "\n"
110
+ puts @opt_parser
111
+ exit(-1)
112
+ end
113
+ def help
114
+ puts @opt_parser
115
+ end
116
+ end
117
+
118
+ begin
119
+ parser = MyOptparse.new
120
+ options = parser.parse(ARGV)
121
+ $logger = Logger.new(options.logfile ? options.logfile : STDOUT)
122
+ if options.debug
123
+ $logger.level = Logger::DEBUG
124
+ else
125
+ $logger.level = Logger::INFO
126
+ end
127
+ if options.logfile
128
+ $logger.formatter = proc do |severity, datetime, progname, msg|
129
+ "[#{datetime}] #{severity} : #{msg}\n"
130
+ end
131
+ $logger.info("begin")
132
+ else
133
+ $logger.formatter = proc do |severity, datetime, progname, msg|
134
+ "#{msg}\n"
135
+ end
136
+ end
137
+ if options.config
138
+ if !File.exists?(options.config)
139
+ raise "can not read config file #{options.config}"
140
+ end
141
+ else
142
+ raise 'please specify path to yaml config file'
143
+ end
144
+ config = YAML.load(IO.read(options.config))
145
+ ess = EScleaner.new options
146
+ ess.run(config)
147
+ if options.logfile
148
+ $logger.info("end")
149
+ end
150
+ rescue Exception => e
151
+ if defined?($logger) == 'global-variable'
152
+ $logger.error e.message
153
+ else
154
+ $stderr.puts e.message
155
+ end
156
+ exit(1)
157
+ end
@@ -0,0 +1,165 @@
1
+ ###########################################################
2
+ ### ###
3
+ ### Author: Maksim Podlesnyi <mpodlesnyi@smartling.com> ###
4
+ ### ###
5
+ ###########################################################
6
+ require 'rubygems'
7
+ require 'json'
8
+ require 'faraday'
9
+ require 'date'
10
+
11
+
12
+ class EScleaner
13
+
14
+ def initialize(options)
15
+ @connection = ::Faraday.new options.url, { :request => { :timeout => options.timeout } }
16
+ @options = options
17
+ info()
18
+ end
19
+
20
+ def info
21
+ #get ES information
22
+ resp = @connection.get
23
+ @es_info = parse_response resp.body
24
+ @es_version = {}
25
+ @es_version['major'], @es_version['minor'], @es_version['patch'] = @es_info['version']['number'].split('.').map { |i| i.to_i }
26
+ $logger.debug("detected Elasticsearch #{@es_version['major']}.#{@es_version['minor']}")
27
+ end
28
+
29
+ def parse_response(response)
30
+ j = ::JSON.parse response
31
+ if j.has_key?('error')
32
+ raise j['error']
33
+ else
34
+ j
35
+ end
36
+ end
37
+
38
+ def acknowledged_test(response, answer)
39
+ # Testing ES response for errors
40
+ r = parse_response response.body
41
+ if r.has_key? answer and r[answer] == true
42
+ else
43
+ raise 'request failed'
44
+ end
45
+ end
46
+
47
+ def list
48
+ # Get list of indices
49
+ resp = @connection.get "_aliases"
50
+ r = parse_response resp.body
51
+ r.keys
52
+ end
53
+
54
+ def exists(index, type)
55
+ #checks if type exists
56
+ if @es_version['major'] >= 5
57
+ url = "#{index}/_mapping/#{type}"
58
+ else
59
+ url = "#{index}/#{type}"
60
+ end
61
+ resp = @connection.head url
62
+ if resp.status == 200
63
+ return true
64
+ elsif resp.status == 404
65
+ return false
66
+ else
67
+ raise "could not check if type #{index}/#{type} exists. got response code #{resp.status}"
68
+ end
69
+ end
70
+
71
+ def docs(index)
72
+ # Returns number of documents for index
73
+ resp = @connection.get "#{index}/_stats/docs"
74
+ r = (parse_response resp.body)['_all']['primaries']['docs']['count']
75
+ if r
76
+ if r < 10
77
+ $logger.warn("index #{index} has #{r} docs")
78
+ end
79
+ return r
80
+ else
81
+ raise "could not get count of docs for index #{index}"
82
+ end
83
+ end
84
+
85
+ def delete(object)
86
+ # Deleting indices or types
87
+ $logger.info("deleting #{object}#{@options.dry_run ? ' (dry_run)': ''}")
88
+ if !@options.dry_run
89
+ resp = @connection.delete "#{object}"
90
+ acknowledged_test resp, 'acknowledged'
91
+ end
92
+ end
93
+
94
+ def optimize(index)
95
+ # Run _optimize
96
+ $logger.info("starting optimize for index #{index}#{@options.dry_run ? ' (dry_run)': ''}")
97
+ if !@options.dry_run
98
+ resp = @connection.post "#{index}/_optimize?only_expunge_deletes=true"
99
+ $logger.debug(parse_response resp.body)
100
+ end
101
+ end
102
+
103
+ def run(config)
104
+ # Run rotate of ES data
105
+ config.each do |pattern, settings|
106
+ a = []
107
+ list().each do |index|
108
+ # check if empty just delete it
109
+ if !@options.empty and docs(index) == 0
110
+ delete(index)
111
+ else
112
+ begin
113
+ a << DateTime.strptime(index, pattern)
114
+ rescue
115
+ end
116
+ end
117
+ end
118
+ today = Date.today
119
+ sorted = a.sort
120
+ while sorted.length > 0
121
+ need_to_optimize = false
122
+ date = sorted.pop
123
+ d = date.to_date
124
+ index = date.strftime(pattern)
125
+ if d > today
126
+ if settings['future'] == false
127
+ $logger.debug("going to delete index #{index}. It is future")
128
+ delete(index)
129
+ end
130
+ else
131
+ if settings['number']
132
+ if settings['number'] <= 0
133
+ delete(index)
134
+ else
135
+ if settings['types']
136
+ # Checking types settings
137
+ settings['types'].each do |type, type_settings|
138
+ if exists(index, type)
139
+ if type_settings['number']
140
+ if type_settings['number'] <= 0
141
+ delete("#{index}/#{type}")
142
+ if type_settings['optimize'] and need_to_optimize == false
143
+ need_to_optimize = true
144
+ end
145
+ else
146
+ type_settings['number'] -= 1
147
+ end
148
+ end
149
+ end
150
+ end
151
+ end
152
+ end
153
+ settings['number'] -= 1
154
+ end
155
+ end
156
+ a.delete(date)
157
+ # do optimize if needed
158
+ if need_to_optimize
159
+ optimize(index)
160
+ end
161
+ end
162
+ end
163
+ end
164
+ end
165
+
metadata ADDED
@@ -0,0 +1,62 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: elasticsearch-data-cleaner
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Maksim Podlesnyi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-11-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: faraday
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.9'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.9'
27
+ description: Command line tool which helps remove old ES indices and types
28
+ email:
29
+ - mpodlesnyi@smartling.com
30
+ - itops@smartling.com
31
+ executables:
32
+ - elasticsearch-data-cleaner
33
+ extensions: []
34
+ extra_rdoc_files: []
35
+ files:
36
+ - bin/elasticsearch-data-cleaner
37
+ - lib/elasticsearch-data-cleaner.rb
38
+ homepage: https://github.com/Smartling/elasticsearch-data-cleaner
39
+ licenses:
40
+ - GPL-3.0
41
+ metadata: {}
42
+ post_install_message:
43
+ rdoc_options: []
44
+ require_paths:
45
+ - lib
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ requirements: []
57
+ rubyforge_project:
58
+ rubygems_version: 2.5.2
59
+ signing_key:
60
+ specification_version: 4
61
+ summary: Command line tool for removing old Elasticsearch data (indices and types)
62
+ test_files: []