ultragrep 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/ultragrep.rb ADDED
@@ -0,0 +1,348 @@
1
+ require 'time'
2
+ require 'optparse'
3
+ require 'pp'
4
+ require 'socket'
5
+ require 'yaml'
6
+
7
+ require 'ultragrep/config'
8
+
9
+ module Ultragrep
10
+ HOUR = 60 * 60
11
+ DAY = 24 * HOUR
12
+ DATE_FROM_FILENAME = /(\d+)(\.\w+)?$/
13
+
14
+ class RequestPrinter
15
+ def initialize(verbose)
16
+ @mutex = Mutex.new
17
+ @all_data = []
18
+ @children_timestamps = {}
19
+ @finish = false
20
+ @verbose = verbose
21
+ end
22
+
23
+ def dump_buffer
24
+ dump_this = []
25
+ new_data = []
26
+
27
+ @mutex.synchronize do
28
+ to_this_ts = @children_timestamps.values.min || 0 # FIXME : should not be necessary, but fails with -t -p
29
+ $stderr.puts("I've searched up through #{Time.at(to_this_ts)}") if @verbose && to_this_ts > 0 && to_this_ts != 2**50
30
+ @all_data.each do |req|
31
+ if req[0] <= to_this_ts
32
+ dump_this << req
33
+ else
34
+ new_data << req
35
+ end
36
+ end
37
+ @all_data = new_data
38
+ end
39
+
40
+ STDOUT.write(dump_this.sort.map(&:last).join)
41
+ STDOUT.flush
42
+ end
43
+
44
+ def run
45
+ Thread.new do
46
+ while @all_data.size > 0 || !@finish
47
+ sleep 2
48
+ dump_buffer
49
+ end
50
+ dump_buffer
51
+ end
52
+ end
53
+
54
+ def add_request(parsed_up_to, text)
55
+ @mutex.synchronize do
56
+ if text = format_request(parsed_up_to, text)
57
+ @all_data << [parsed_up_to, text]
58
+ end
59
+ end
60
+ end
61
+
62
+ def format_request(parsed_up_to, text)
63
+ text.join
64
+ end
65
+
66
+ def set_read_up_to(key, val)
67
+ @mutex.synchronize { @children_timestamps[key] = val }
68
+ end
69
+
70
+ def set_done(key)
71
+ @mutex.synchronize { @children_timestamps[key] = 2**50 }
72
+ end
73
+
74
+ def finish
75
+ @finish = true
76
+ dump_buffer
77
+ end
78
+ end
79
+
80
+ class RequestPerformancePrinter < RequestPrinter
81
+ def format_request(parsed_up_to, text)
82
+ return unless text =~ /.*Processing ([^ ]+) .*Completed in (\d+)ms/m
83
+ action = $1
84
+ time = $2
85
+ "#{parsed_up_to}\t#{action}\t#{time}\n"
86
+ end
87
+ end
88
+
89
+ class << self
90
+ def parse_args(argv)
91
+ options = {
92
+ :files => [],
93
+ :range_start => Time.now.to_i - (Time.now.to_i % DAY),
94
+ :range_end => Time.now.to_i,
95
+ }
96
+
97
+ parser = OptionParser.new do |parser|
98
+ parser.banner = <<-BANNER.gsub(/^ {6,}/, "")
99
+ Usage: ultragrep [OPTIONS] [REGEXP ...]
100
+
101
+ Dates: all datetimes are in UTC whatever Ruby's Time.parse() accepts.
102
+ For example '2011-04-30 11:30:00'.
103
+
104
+ Options are:
105
+ BANNER
106
+ parser.on("--help", "-h", "This text"){ puts parser; exit 0 }
107
+ parser.on("--version", "Show version") do
108
+ require 'ultragrep/version'
109
+ puts "Ultragrep version #{Ultragrep::VERSION}"
110
+ exit 0
111
+ end
112
+ parser.on("--config", "-c FILE", String, "Config file location (default: #{Config::DEFAULT_LOCATIONS.join(", ")})") { |config| options[:config] = config }
113
+ parser.on("--progress", "-p", "show grep progress to STDERR") { options[:verbose] = true }
114
+ parser.on("--verbose", "-v", "DEPRECATED") do
115
+ $stderr.puts("The --verbose option is deprecated and will go away soon, please use -p or --progress instead")
116
+ options[:verbose] = true
117
+ end
118
+ parser.on("--tail", "-t", "Tail requests, show matching requests as they arrive") do
119
+ options[:tail] = true
120
+ options[:range_end] = Time.now.to_i + 100 * DAY
121
+ end
122
+ parser.on("--type", "-l TYPE", String, "Search type of logs, specified in config") { |type| options[:type] = type }
123
+ parser.on("--perf", "Output just performance information") { options[:perf] = true }
124
+ parser.on("--day", "-d DATETIME", String, "Find requests that happened on this day") do |date|
125
+ date = parse_time(date)
126
+ options[:range_start] = date
127
+ options[:range_end] = date + DAY - 1
128
+ end
129
+ parser.on("--daysback", "-b COUNT", Integer, "Find requests from COUNT days ago to now") do |back|
130
+ options[:range_start] = Time.now.to_i - (back * DAY)
131
+ end
132
+ parser.on("--hoursback", "-o COUNT", Integer, "Find requests from COUNT hours ago to now") do |back|
133
+ options[:range_start] = Time.now.to_i - (back * HOUR)
134
+ end
135
+ parser.on("--start", "-s DATETIME", String, "Find requests starting at this date") do |date|
136
+ options[:range_start] = parse_time(date)
137
+ end
138
+ parser.on("--end", "-e DATETIME", String, "Find requests ending at this date") do |date|
139
+ options[:range_end] = parse_time(date)
140
+ end
141
+ parser.on("--around DATETIME", String, "Find a request at about this time (10 seconds buffer on either side") do |date|
142
+ options[:range_start] = parse_time(date) - 10
143
+ options[:range_end] = parse_time(date) + 10
144
+ end
145
+ parser.on("--host HOST", String, "Only find requests on this host") do |host|
146
+ options[:host_filter] ||= []
147
+ options[:host_filter] << host
148
+ end
149
+ end
150
+ parser.parse!(argv)
151
+
152
+ if argv.empty?
153
+ puts parser
154
+ exit 1
155
+ else
156
+ options[:regexps] = argv
157
+ end
158
+
159
+ options[:printer] = if options.delete(:perf)
160
+ RequestPerformancePrinter.new(options[:verbose])
161
+ else
162
+ RequestPrinter.new(options[:verbose])
163
+ end
164
+
165
+ options[:config] = load_config(options[:config])
166
+
167
+ options
168
+ end
169
+
170
+ def ultragrep(options)
171
+ lower_priority
172
+
173
+ config = options.fetch(:config)
174
+ file_type = options.fetch(:type, config.default_file_type)
175
+ file_lists = file_list(config.log_path_glob(file_type), options)
176
+
177
+ request_printer = options.fetch(:printer)
178
+ request_printer.run
179
+
180
+ quoted_regexps = quote_shell_words(options[:regexps])
181
+ print_regex_info(quoted_regexps, options) if options[:verbose]
182
+
183
+ file_lists.each do |files|
184
+ print_search_list(files) if options[:verbose]
185
+
186
+ children_pipes = files.map do |file|
187
+ [worker(file, file_type, quoted_regexps, options), file]
188
+ end
189
+
190
+ children_pipes.each do |pipe, _|
191
+ request_printer.set_read_up_to(pipe, 0)
192
+ end
193
+
194
+ # each thread here waits for child data and then pushes it to the printer thread.
195
+ children_pipes.map do |pipe, filename|
196
+ worker_reader(filename, pipe, request_printer, options)
197
+ end.each(&:join)
198
+
199
+ Process.waitall
200
+ end
201
+
202
+ request_printer.finish
203
+ end
204
+
205
+ private
206
+
207
+ def worker(file, file_type, quoted_regexps, options)
208
+ core = "#{ug_guts} #{file_type} #{options[:range_start]} #{options[:range_end]} #{quoted_regexps}"
209
+ command = if file =~ /\.gz$/
210
+ "gzip -dcf #{file}"
211
+ elsif file =~ /\.bz2$/
212
+ "bzip2 -dcf #{file}"
213
+ elsif file =~ /^tail/
214
+ "#{file}"
215
+ else
216
+ "#{ug_cat} #{file} #{options[:range_start]}"
217
+ end
218
+ IO.popen("#{command} | #{core}")
219
+ end
220
+
221
+ def worker_reader(filename, pipe, request_printer, options)
222
+ Thread.new do
223
+ parsed_up_to = nil
224
+ this_request = nil
225
+ while line = pipe.gets
226
+ encode_utf8!(line)
227
+ if line =~ /^@@(\d+)/
228
+ # timestamp coming back from the child.
229
+ parsed_up_to = $1.to_i
230
+
231
+ request_printer.set_read_up_to(pipe, parsed_up_to)
232
+ this_request = [parsed_up_to, ["\n# #{filename}"]]
233
+ elsif line =~ /^---/
234
+ # end of request
235
+ this_request[1] << line if this_request
236
+ if options[:tail]
237
+ if this_request
238
+ STDOUT.write(request_printer.format_request(*this_request))
239
+ STDOUT.flush
240
+ end
241
+ else
242
+ request_printer.add_request(*this_request) if this_request
243
+ end
244
+ this_request = [parsed_up_to, [line]]
245
+ else
246
+ this_request[1] << line if this_request
247
+ end
248
+ end
249
+ request_printer.set_done(pipe)
250
+ end
251
+ end
252
+
253
+ def print_regex_info(quoted_regexps, options)
254
+ $stderr.puts("searching for regexps: #{quoted_regexps} from #{range_description(options)}")
255
+ end
256
+
257
+ def range_description(options)
258
+ "#{Time.at(options[:range_start])} to #{Time.at(options[:range_end])}"
259
+ end
260
+
261
+ def nothing_found!(globs, options)
262
+ abort("Couldn't find any files matching globs: #{globs.join(',')} from #{range_description(options)}")
263
+ end
264
+
265
+ def print_search_list(list)
266
+ formatted_list = list.each_slice(2).to_a.map { |l| l.join(" ") }.join("\n")
267
+ $stderr.puts("searching #{formatted_list}")
268
+ end
269
+
270
+ def file_list(globs, options)
271
+ file_list = Dir.glob(globs)
272
+
273
+ file_lists = if options[:tail]
274
+ # TODO fix before we open source -- this is a hard-coded file format.
275
+ tail_list = file_list.map do |f|
276
+ today = Time.now.strftime("%Y%m%d")
277
+ "tail -f #{f}" if f =~ /-#{today}$/
278
+ end.compact
279
+ [tail_list]
280
+ else
281
+ filter_and_group_files(file_list, options)
282
+ end
283
+
284
+ nothing_found!(globs, options) if file_lists.empty?
285
+
286
+ $stderr.puts("Grepping #{file_lists.map { |f| f.join(" ") }.join("\n\n\n")}") if options[:verbose]
287
+ file_lists
288
+ end
289
+
290
+ def encode_utf8!(line)
291
+ line.encode!('UTF-16', 'UTF-8', :invalid => :replace, :replace => '')
292
+ line.encode!('UTF-8', 'UTF-16')
293
+ end
294
+
295
+ # maybe use shellwords but also not super important
296
+ def quote_shell_words(words)
297
+ words.map { |r| "'" + r.gsub("'", ".") + "'" }.join(' ')
298
+ end
299
+
300
+ # Set idle I/O and process priority, so other processes aren't starved for I/O
301
+ def lower_priority
302
+ system("ionice -c 3 -p #$$ >/dev/null 2>&1")
303
+ system("renice -n 19 -p #$$ >/dev/null 2>&1")
304
+ end
305
+
306
+ def filter_and_group_files(files, options)
307
+ files = filter_files_by_host(files, options[:host_filter])
308
+ files = filter_files_by_date(files, options.fetch(:range_start)..options.fetch(:range_end))
309
+ files.group_by { |f| f[DATE_FROM_FILENAME, 1] }.values
310
+ end
311
+
312
+ def filter_files_by_host(files, host_filter)
313
+ return files unless host_filter
314
+ files.select { |file| host_filter.include?(file.split("/")[-2]) }
315
+ end
316
+
317
+ def filter_files_by_date(files, range)
318
+ files.select do |file|
319
+ logfile_date = Time.parse(file[DATE_FROM_FILENAME, 1]).to_i
320
+ range_overlap?(range, logfile_date..(logfile_date + DAY - 1))
321
+ end
322
+ end
323
+
324
+ def range_overlap?(a, b)
325
+ a.first <= b.last && b.first <= a.last
326
+ end
327
+
328
+ def parse_time(string)
329
+ if string =~ /^\d+$/ && string !~ /^20/
330
+ string.to_i
331
+ else
332
+ Time.parse("#{string} UTC").to_i
333
+ end
334
+ end
335
+
336
+ def load_config(file)
337
+ Ultragrep::Config.new(file)
338
+ end
339
+
340
+ def ug_guts
341
+ File.expand_path("../../ext/ultragrep/ug_guts", __FILE__)
342
+ end
343
+
344
+ def ug_cat
345
+ File.expand_path("../../ext/ultragrep/ug_cat", __FILE__)
346
+ end
347
+ end
348
+ end
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ultragrep
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - John Doe
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-08-27 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description:
15
+ email: john@example.com
16
+ executables:
17
+ - ultragrep
18
+ extensions:
19
+ - ext/ultragrep/extconf.rb
20
+ extra_rdoc_files: []
21
+ files:
22
+ - bin/ultragrep
23
+ - ext/ultragrep/Makefile
24
+ - ext/ultragrep/extconf.rb
25
+ - ext/ultragrep/rails_req.c
26
+ - ext/ultragrep/rails_req.h
27
+ - ext/ultragrep/req_matcher.h
28
+ - ext/ultragrep/request.c
29
+ - ext/ultragrep/request.h
30
+ - ext/ultragrep/ug_build_index.c
31
+ - ext/ultragrep/ug_cat.c
32
+ - ext/ultragrep/ug_guts.c
33
+ - ext/ultragrep/ug_index.c
34
+ - ext/ultragrep/ug_index.h
35
+ - ext/ultragrep/work_req.c
36
+ - ext/ultragrep/work_req.h
37
+ - ext/ultragrep/zran.c
38
+ - lib/ultragrep.rb
39
+ - lib/ultragrep/config.rb
40
+ - lib/ultragrep/version.rb
41
+ homepage: https://github.com/zendesk/ultragrep
42
+ licenses:
43
+ - Apache License Version 2.0
44
+ post_install_message:
45
+ rdoc_options: []
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ none: false
56
+ requirements:
57
+ - - ! '>='
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
60
+ requirements: []
61
+ rubyforge_project:
62
+ rubygems_version: 1.8.25
63
+ signing_key:
64
+ specification_version: 3
65
+ summary: Ultragrep
66
+ test_files: []
67
+ has_rdoc: