stash-query 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ccad0b0e0077c1bbda3390027e84b57be31235ab
4
+ data.tar.gz: eb48df98b461d986d2f8838a10562c8c7c99597d
5
+ SHA512:
6
+ metadata.gz: 11a8394d3446915a8081f70af7db9b371294e23a241d4c292e572c604347617064e70f8ea4ec924fba53f6aaac95e3aff1557b45b49d76bfda57a7632bcf4e49
7
+ data.tar.gz: 18b4f50a7848a14068437b80785f5e8d04fc3afc935294a403a76f8689cfb9086823ea27e199b395640c419f4ead7aa2a454a8bb3ed4f86220424dd335c7a55c
@@ -0,0 +1,3 @@
1
+ *.lock
2
+ *.swp
3
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in barcode.gemspec
4
+ gemspec
@@ -0,0 +1,24 @@
1
+ stash-query
2
+ ===========
3
+
4
+ A CLI Tool for Querying Logstash and Exporting the results. Uses the Lucene query syntax that Kibana utilizes, but provides the option for exporting.
5
+
6
+ Usage:
7
+ ```
8
+ -c, --connect_host [HOST] Logstash host to run query on (defaults to: localhost)
9
+ -p, --port [PORT] Logstash port (defaults to: 9200)
10
+ -i, --index-prefix [PREFIX] Index name prefix. Defaults to 'logstash-'
11
+ -w, --write [FILE] Write output file location (defaults to nil)
12
+ -d, --debug Debug mode
13
+ -s, --start [DATE] Start date. Format: YYYY-MM-DDThh:mm:ss.SSSZ. Ex: 2013-12-01T12:00:00.000Z
14
+ -e, --end [DATE] End date. Format: YYYY-MM-DDThh:mm:ss.SSSZ
15
+ -q, --query [QUERY] Query string
16
+ -t, --tags [TAGS] Tags to query. Comma delimited
17
+ -f, --write-fields [FIELDS] Comma delimited list of Logstash fields to write to output file. Defaults to "message"
18
+ -l, --delimiter [DELIMITER] Delimiter to use in output file. Defaults to ","
19
+ ```
20
+
21
+ Examples:
22
+ ```
23
+ stash-query -s 2013-12-01T00:00:00.000Z -e 2013-12-02T00:00:00.000Z -t my_tag -q 'message:hello_world' -w /tmp/my_query.txt
24
+ ```
@@ -0,0 +1,77 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems' if RUBY_VERSION < '1.9.0'
4
+ require 'stash-query'
5
+
6
+ ############ CONFIG ###########
7
+ $config = {}
8
+ $config[:host] = "localhost"
9
+ $config[:port] = "9200"
10
+ $config[:index_prefix] = [ "logstash-" ]
11
+ $config[:scroll_size] = 10 ## Number of hits returned per scroll request. Not sure what to use here...
12
+ $config[:scroll_time] = '30m'
13
+ $config[:report] = nil
14
+ $config[:write_fields] = []
15
+ $config[:delim] = ','
16
+ $debug = nil
17
+ $flush_buffer = 1000 ## Number of log lines to flush to file at
18
+ $new_transport = true
19
+
20
+ ##################################
21
+
22
+ OptionParser.new do |opts|
23
+ opts.banner = "Usage: "
24
+ opts.on('-c','--connect_host [HOST]', "Logstash host to run query on (defaults to: #{$config[:host]})") { |v| $config[:host] = v unless v.empty? or v.nil? }
25
+ opts.on('-p','--port [PORT]', "Logstash port (defaults to: #{$config[:port]})") { |v| $config[:port] = v unless v.empty? or v.nil? }
26
+ opts.on('-i','--index-prefix [PREFIX]', "Index name prefix(es). Defaults to 'logstash-'. Comma delimited") do |prefix|
27
+ $config[:index_prefix] = prefix.split(',') unless prefix.empty? or prefix.nil?
28
+ end
29
+ opts.on('-w', '--write [FILE]', 'Write output file location (defaults to nil)') { |v| $config[:output] = v }
30
+ opts.on('-d', '--debug', 'Debug mode') { |v| $debug = true }
31
+
32
+ opts.on('-s', '--start [DATE]', 'Start date. Format: YYYY-MM-DDThh:mm:ss.SSSZ. Ex: 2013-12-01T12:00:00.000Z') do |v|
33
+ $config[:start] = v
34
+ end
35
+
36
+ opts.on('-e', '--end [DATE]', 'End date. Format: YYYY-MM-DDThh:mm:ss.SSSZ') do |v|
37
+ $config[:end] = v
38
+ end
39
+
40
+ opts.on('-q', '--query [QUERY]', 'Query string') { |v| $config[:query] = "#{v}" unless v.empty? }
41
+ opts.on('-t', '--tags [TAGS]', 'Tags to query. Comma delimited') do |tags|
42
+ arr = tags.split(',')
43
+ if arr.length > 1
44
+ $config[:tags] = "tags:(" + arr.join(' AND ') + ")"
45
+ else
46
+ $config[:tags] = "tags:#{tags}"
47
+ end
48
+ end
49
+ opts.on('-f', '--write-fields [FIELDS]', 'Comma delimited list of Logstash fields to write to output file. Defaults to "message"') do |fields|
50
+ $config[:write_fields] = fields.split(',')
51
+ end
52
+ opts.on('-l', '--delimiter [DELIMITER]', 'Delimiter to use in output file. Defaults to ","') do |v|
53
+ $config[:delim] = v
54
+ end
55
+ opts.parse!
56
+ end
57
+
58
+ es_conf = {
59
+ :query => $config[:query],
60
+ :tags => $config[:tags],
61
+ :start_date => $config[:start],
62
+ :end_date => $config[:end],
63
+ :output_file => $config[:output],
64
+ :host => $config[:host],
65
+ :port => $config[:port],
66
+ :index_prefixes => $config[:index_prefix],
67
+ :write_fields => $config[:write_fields],
68
+ :delimiter => $config[:delim],
69
+ :do_print => true
70
+ }
71
+ es = Stashquery::Query.new(es_conf)
72
+
73
+ exit if es.num_results < 1
74
+
75
+ until es.query_finished do
76
+ sleep 2
77
+ end
@@ -0,0 +1,3 @@
1
+ $: << File.expand_path(File.dirname(__FILE__))
2
+ require 'stash-query/version'
3
+ require 'stash-query/query'
@@ -0,0 +1,272 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems' if RUBY_VERSION < '1.9.0'
4
+ require 'elasticsearch'
5
+ require 'json'
6
+ require 'date'
7
+ require 'optparse'
8
+ require 'curb'
9
+ require 'progress_bar'
10
+
11
+ module Stashquery
12
+ class Query
13
+
14
+ DEFAULT_FIELD = "message"
15
+ $debug = nil
16
+ $flush_buffer = 1000 ## Number of log lines to flush to file at
17
+ $new_transport = true
18
+
19
+ attr_reader :query_finished
20
+ attr_reader :num_results
21
+ attr_reader :start_date
22
+ attr_reader :end_date
23
+
24
+ def initialize(conf = {})
25
+ @config = {}
26
+ @config[:host] = conf[:host] || "ls2-es-lb.int.tropo.com"
27
+ @config[:port] = conf[:port] || "9200"
28
+ if conf[:index_prefixes].is_a? Array and ! conf[:index_prefixes].empty?
29
+ @config[:index_prefixes] = conf[:index_prefixes]
30
+ else
31
+ @config[:index_prefixes] = [ "logstash-" ]
32
+ end
33
+ @config[:scroll_size] = conf[:scroll_size] || "100"
34
+ @config[:scroll_time] = conf[:scroll_time] || "30m"
35
+ @config[:output] = conf[:output_file] || nil
36
+ @query = conf[:query] || nil
37
+ @tags = conf[:tags] || nil
38
+ @start_date = conf[:start_date]
39
+ @end_date = conf[:end_date]
40
+ @config[:write_fields] = []
41
+ set_write_fields(conf[:write_fields])
42
+ @config[:delimiter] = conf[:delimiter] || ','
43
+ @num_results = 0
44
+ @query_finished = false
45
+ @scroll_ids = Array.new
46
+
47
+ if conf[:do_print]
48
+ @config[:print] = true
49
+ require 'progress_bar'
50
+ end
51
+
52
+ ## Do this better
53
+ unless Query.validate_date(@start_date) and Query.validate_date(@end_date)
54
+ raise "Improper date format entered"
55
+ end
56
+
57
+ ## Cleanup output file. Probably a better way to do this.
58
+ unless @config[:output].nil?
59
+ begin
60
+ File.truncate(@config[:output],0)
61
+ rescue
62
+ end
63
+ end
64
+
65
+ @es_conn = connect_to_es
66
+ run_query
67
+ sort_file
68
+ end
69
+
70
+ def self.validate_date(str)
71
+ return true if str =~ /20[0-9]{2}-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])T[012][0-9]:[0-5][0-9]:[0-5][0-9]\.[0-9]{3}Z/
72
+ return nil
73
+ end
74
+
75
+ private
76
+
77
+ def sort_file
78
+ unless @config[:output].nil?
79
+ arr = File.readlines(@config[:output]).sort
80
+ File.open(@config[:output], 'w') do |f|
81
+ f.puts arr
82
+ end
83
+ end
84
+ end
85
+
86
+ def flush_to_file(hit_list)
87
+ return if @config[:output].nil?
88
+ File.open(@config[:output], 'a') do |file|
89
+ begin
90
+ file.puts(generate_output(hit_list))
91
+ rescue => e
92
+ puts "Error writing to file."
93
+ raise e
94
+ exit
95
+ end
96
+ end
97
+ end
98
+
99
+ def set_write_fields(fields)
100
+ if fields.is_a? Array
101
+ if fields.empty?
102
+ @config[:write_fields] << DEFAULT_FIELD
103
+ else
104
+ @config[:write_fields] = fields
105
+ end
106
+ elsif fields.is_a? String
107
+ @config[:write_fields] = [ fields ]
108
+ else
109
+ @config[:write_fields] = [ DEFAULT_FIELD ]
110
+ end
111
+ end
112
+
113
+ def generate_output(hit_list)
114
+ output_data = []
115
+ hit_list.each do |event|
116
+ event_data = []
117
+ if @config[:write_fields].include?('_all')
118
+ event['_source'].keys.each do |field|
119
+ event_data << "#{event['_source'][field]}".gsub("\n", '')
120
+ end
121
+ else
122
+ @config[:write_fields].each do |field|
123
+ event_data << "#{event['_source'][field] if event['_source'][field]}".gsub("\n", '')
124
+ end
125
+ end
126
+ output_data << event_data.join(@config[:delimiter])
127
+ end
128
+ output_data
129
+ end
130
+
131
+ def connect_to_es
132
+ ## Try a different transporter
133
+ if $new_transport
134
+ require 'typhoeus'
135
+ require 'typhoeus/adapters/faraday'
136
+
137
+ transport_conf = lambda do |f|
138
+ #f.response :logger
139
+ f.adapter :typhoeus
140
+ end
141
+ end
142
+
143
+ ## Connect to ES server
144
+ begin
145
+ if $new_transport
146
+ transport = Elasticsearch::Transport::Transport::HTTP::Faraday.new hosts: [ { host: @config[:host], port: @config[:port] }], &transport_conf
147
+ es = Elasticsearch::Client.new transport: transport
148
+ else
149
+ es = Elasticsearch::Client.new(:host => @config[:host], :port => @config[:port])
150
+ end
151
+ rescue
152
+ raise "Could not connect to Elasticsearch cluster: #{@config[:host]}:#{@config[:port]}"
153
+ end
154
+
155
+ return es
156
+ end
157
+
158
+ def get_indices
159
+ indexes = Array.new
160
+ start_str = @start_date.split('T').first.split('-').join('.')
161
+ s_year = start_str.split('.').first.to_i
162
+ s_mo = start_str.split('.')[1].to_i
163
+ s_day = start_str.split('.').last.to_i
164
+ start_date = Date.new(s_year, s_mo, s_day)
165
+
166
+ end_str = @end_date.split('T').first.split('-').join('.')
167
+ e_year = end_str.split('.').first.to_i
168
+ e_mo = end_str.split('.')[1].to_i
169
+ e_day = end_str.split('.').last.to_i
170
+ end_date = Date.new(e_year, e_mo, e_day)
171
+
172
+ (start_date..end_date).map do |day|
173
+ day = day.strftime('%Y.%m.%d')
174
+ @config[:index_prefixes].each do |prefix|
175
+ indexes << "#{prefix}#{day}"
176
+ end
177
+ end
178
+ return indexes
179
+ end
180
+
181
+ def run_query
182
+ queries = Array.new
183
+ queries << @query if @query
184
+ queries << @tags if @tags
185
+
186
+ if @start_date and @end_date
187
+ time_range = "@timestamp:[#{@start_date} TO #{@end_date}]"
188
+ queries << "#{time_range}"
189
+ indexes = get_indices
190
+ else
191
+ indexes [ '_all' ]
192
+ end
193
+
194
+ query = queries.join(' AND ')
195
+
196
+ ## Make sure each index exists
197
+ good_indexes = Array.new
198
+ unless indexes.include?('_all')
199
+ indexes.each do |index|
200
+ good_indexes << index if @es_conn.indices.exists index: index
201
+ end
202
+ indexes = good_indexes
203
+ else
204
+ indexes = [ '_all' ]
205
+ end
206
+
207
+ puts "Using these indices: #{indexes.join(',')}" if $debug
208
+
209
+ index_str = indexes.join(',')
210
+ res = @es_conn.search index: index_str, q: query, search_type: 'scan', scroll: @config[:scroll_time], size: @config[:scroll_size], df: 'message'
211
+ scroll_id = res['_scroll_id']
212
+
213
+ @scroll_ids << res['_scroll_id']
214
+ @num_results = res['hits']['total']
215
+ puts "Found #{@num_results} results" if @config[:print]
216
+
217
+ puts res.inspect if $debug
218
+
219
+ if @config[:output]
220
+ bar = ProgressBar.new(@num_results) if @config[:print]
221
+ hit_list = Array.new
222
+ total_lines = 0 if $debug
223
+ while true
224
+ res['hits']['hits'].each do |hit|
225
+ bar.increment! if @config[:print]
226
+ hit_list << hit
227
+ if hit_list.length % $flush_buffer == 0
228
+ flush_to_file hit_list.join("\n")
229
+ hit_list = Array.new
230
+ end
231
+ end
232
+ total_lines += res['hits']['hits'].length if $debug
233
+
234
+ # Continue scroll through data
235
+ begin
236
+ res = @es_conn.scroll scroll: @config[:scroll_time], body: scroll_id
237
+ scroll_id = res['_scroll_id']
238
+ @scroll_ids << res['_scroll_id']
239
+ rescue => e
240
+ puts res.inspect
241
+ raise e
242
+ end
243
+
244
+ begin
245
+ break if res['hits']['hits'].length < 1
246
+ rescue => e
247
+ raise e
248
+ end
249
+ end
250
+ flush_to_file hit_list
251
+ end
252
+
253
+ @query_finished = true
254
+ clean_scroll_ids
255
+ end
256
+
257
+ def clean_scroll_ids
258
+ ## Delete the scroll_ids to free up resources on the ES cluster
259
+ ## Have to use direct API call until elasticsearch-ruby supports this
260
+ @scroll_ids.uniq.each do |scroll|
261
+ puts "DELETE SCROLL:#{scroll}" if $debug
262
+ #puts
263
+ begin
264
+ Curl.delete("#{@config[:host]}:#{@config[:port]}/_search/scroll/#{scroll}")
265
+ rescue
266
+ puts "Delete failed" if $debug
267
+ end
268
+ end
269
+ end
270
+
271
+ end
272
+ end
@@ -0,0 +1,3 @@
1
+ module Stashquery
2
+ VERSION = '0.1.2'
3
+ end
@@ -0,0 +1,25 @@
1
+ $:.push File.expand_path("../lib", __FILE__)
2
+ require 'stash-query/version'
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'stash-query'
6
+ s.version = Stashquery::VERSION
7
+ s.date = '2014-07-22'
8
+ s.homepage = "https://github.com/robbydyer/stash-query"
9
+ s.summary = "Gem for querying Logstash and exporting the results"
10
+ s.description = "Gem for querying Logstash and exporting the results."
11
+ s.authors = ["Robby Dyer"]
12
+ s.email = 'robby.dyer@gmail.com'
13
+ s.licenses = [ 'GPL-3.0' ]
14
+ s.files = `git ls-files`.split("\n")
15
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
16
+ s.require_paths = [ 'lib' ]
17
+
18
+ s.add_runtime_dependency 'elasticsearch', '>=1.0.1'
19
+ s.add_runtime_dependency 'curb', '>= 0.8.5'
20
+ s.add_runtime_dependency 'faraday', '= 0.8.8'
21
+ s.add_runtime_dependency 'progress_bar'
22
+ s.add_runtime_dependency 'typhoeus', '= 0.6.6'
23
+ s.add_development_dependency "bundler"
24
+ s.add_development_dependency "rake"
25
+ end
metadata ADDED
@@ -0,0 +1,150 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: stash-query
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.2
5
+ platform: ruby
6
+ authors:
7
+ - Robby Dyer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-07-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: elasticsearch
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.0.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 1.0.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: curb
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 0.8.5
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 0.8.5
41
+ - !ruby/object:Gem::Dependency
42
+ name: faraday
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '='
46
+ - !ruby/object:Gem::Version
47
+ version: 0.8.8
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '='
53
+ - !ruby/object:Gem::Version
54
+ version: 0.8.8
55
+ - !ruby/object:Gem::Dependency
56
+ name: progress_bar
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: typhoeus
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '='
74
+ - !ruby/object:Gem::Version
75
+ version: 0.6.6
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '='
81
+ - !ruby/object:Gem::Version
82
+ version: 0.6.6
83
+ - !ruby/object:Gem::Dependency
84
+ name: bundler
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rake
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ description: Gem for querying Logstash and exporting the results.
112
+ email: robby.dyer@gmail.com
113
+ executables:
114
+ - stash-query
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - .gitignore
119
+ - Gemfile
120
+ - README.md
121
+ - bin/stash-query
122
+ - lib/stash-query.rb
123
+ - lib/stash-query/query.rb
124
+ - lib/stash-query/version.rb
125
+ - stash-query.gemspec
126
+ homepage: https://github.com/robbydyer/stash-query
127
+ licenses:
128
+ - GPL-3.0
129
+ metadata: {}
130
+ post_install_message:
131
+ rdoc_options: []
132
+ require_paths:
133
+ - lib
134
+ required_ruby_version: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - '>='
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ required_rubygems_version: !ruby/object:Gem::Requirement
140
+ requirements:
141
+ - - '>='
142
+ - !ruby/object:Gem::Version
143
+ version: '0'
144
+ requirements: []
145
+ rubyforge_project:
146
+ rubygems_version: 2.0.14
147
+ signing_key:
148
+ specification_version: 4
149
+ summary: Gem for querying Logstash and exporting the results
150
+ test_files: []