ultragrep 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/ultragrep.rb ADDED
@@ -0,0 +1,348 @@
1
+ require 'time'
2
+ require 'optparse'
3
+ require 'pp'
4
+ require 'socket'
5
+ require 'yaml'
6
+
7
+ require 'ultragrep/config'
8
+
9
+ module Ultragrep
10
+ HOUR = 60 * 60
11
+ DAY = 24 * HOUR
12
+ DATE_FROM_FILENAME = /(\d+)(\.\w+)?$/
13
+
14
+ class RequestPrinter
15
+ def initialize(verbose)
16
+ @mutex = Mutex.new
17
+ @all_data = []
18
+ @children_timestamps = {}
19
+ @finish = false
20
+ @verbose = verbose
21
+ end
22
+
23
+ def dump_buffer
24
+ dump_this = []
25
+ new_data = []
26
+
27
+ @mutex.synchronize do
28
+ to_this_ts = @children_timestamps.values.min || 0 # FIXME : should not be necessary, but fails with -t -p
29
+ $stderr.puts("I've searched up through #{Time.at(to_this_ts)}") if @verbose && to_this_ts > 0 && to_this_ts != 2**50
30
+ @all_data.each do |req|
31
+ if req[0] <= to_this_ts
32
+ dump_this << req
33
+ else
34
+ new_data << req
35
+ end
36
+ end
37
+ @all_data = new_data
38
+ end
39
+
40
+ STDOUT.write(dump_this.sort.map(&:last).join)
41
+ STDOUT.flush
42
+ end
43
+
44
+ def run
45
+ Thread.new do
46
+ while @all_data.size > 0 || !@finish
47
+ sleep 2
48
+ dump_buffer
49
+ end
50
+ dump_buffer
51
+ end
52
+ end
53
+
54
+ def add_request(parsed_up_to, text)
55
+ @mutex.synchronize do
56
+ if text = format_request(parsed_up_to, text)
57
+ @all_data << [parsed_up_to, text]
58
+ end
59
+ end
60
+ end
61
+
62
+ def format_request(parsed_up_to, text)
63
+ text.join
64
+ end
65
+
66
+ def set_read_up_to(key, val)
67
+ @mutex.synchronize { @children_timestamps[key] = val }
68
+ end
69
+
70
+ def set_done(key)
71
+ @mutex.synchronize { @children_timestamps[key] = 2**50 }
72
+ end
73
+
74
+ def finish
75
+ @finish = true
76
+ dump_buffer
77
+ end
78
+ end
79
+
80
+ class RequestPerformancePrinter < RequestPrinter
81
+ def format_request(parsed_up_to, text)
82
+ return unless text =~ /.*Processing ([^ ]+) .*Completed in (\d+)ms/m
83
+ action = $1
84
+ time = $2
85
+ "#{parsed_up_to}\t#{action}\t#{time}\n"
86
+ end
87
+ end
88
+
89
+ class << self
90
+ def parse_args(argv)
91
+ options = {
92
+ :files => [],
93
+ :range_start => Time.now.to_i - (Time.now.to_i % DAY),
94
+ :range_end => Time.now.to_i,
95
+ }
96
+
97
+ parser = OptionParser.new do |parser|
98
+ parser.banner = <<-BANNER.gsub(/^ {6,}/, "")
99
+ Usage: ultragrep [OPTIONS] [REGEXP ...]
100
+
101
+ Dates: all datetimes are in UTC whatever Ruby's Time.parse() accepts.
102
+ For example '2011-04-30 11:30:00'.
103
+
104
+ Options are:
105
+ BANNER
106
+ parser.on("--help", "-h", "This text"){ puts parser; exit 0 }
107
+ parser.on("--version", "Show version") do
108
+ require 'ultragrep/version'
109
+ puts "Ultragrep version #{Ultragrep::VERSION}"
110
+ exit 0
111
+ end
112
+ parser.on("--config", "-c FILE", String, "Config file location (default: #{Config::DEFAULT_LOCATIONS.join(", ")})") { |config| options[:config] = config }
113
+ parser.on("--progress", "-p", "show grep progress to STDERR") { options[:verbose] = true }
114
+ parser.on("--verbose", "-v", "DEPRECATED") do
115
+ $stderr.puts("The --verbose option is deprecated and will go away soon, please use -p or --progress instead")
116
+ options[:verbose] = true
117
+ end
118
+ parser.on("--tail", "-t", "Tail requests, show matching requests as they arrive") do
119
+ options[:tail] = true
120
+ options[:range_end] = Time.now.to_i + 100 * DAY
121
+ end
122
+ parser.on("--type", "-l TYPE", String, "Search type of logs, specified in config") { |type| options[:type] = type }
123
+ parser.on("--perf", "Output just performance information") { options[:perf] = true }
124
+ parser.on("--day", "-d DATETIME", String, "Find requests that happened on this day") do |date|
125
+ date = parse_time(date)
126
+ options[:range_start] = date
127
+ options[:range_end] = date + DAY - 1
128
+ end
129
+ parser.on("--daysback", "-b COUNT", Integer, "Find requests from COUNT days ago to now") do |back|
130
+ options[:range_start] = Time.now.to_i - (back * DAY)
131
+ end
132
+ parser.on("--hoursback", "-o COUNT", Integer, "Find requests from COUNT hours ago to now") do |back|
133
+ options[:range_start] = Time.now.to_i - (back * HOUR)
134
+ end
135
+ parser.on("--start", "-s DATETIME", String, "Find requests starting at this date") do |date|
136
+ options[:range_start] = parse_time(date)
137
+ end
138
+ parser.on("--end", "-e DATETIME", String, "Find requests ending at this date") do |date|
139
+ options[:range_end] = parse_time(date)
140
+ end
141
+ parser.on("--around DATETIME", String, "Find a request at about this time (10 seconds buffer on either side") do |date|
142
+ options[:range_start] = parse_time(date) - 10
143
+ options[:range_end] = parse_time(date) + 10
144
+ end
145
+ parser.on("--host HOST", String, "Only find requests on this host") do |host|
146
+ options[:host_filter] ||= []
147
+ options[:host_filter] << host
148
+ end
149
+ end
150
+ parser.parse!(argv)
151
+
152
+ if argv.empty?
153
+ puts parser
154
+ exit 1
155
+ else
156
+ options[:regexps] = argv
157
+ end
158
+
159
+ options[:printer] = if options.delete(:perf)
160
+ RequestPerformancePrinter.new(options[:verbose])
161
+ else
162
+ RequestPrinter.new(options[:verbose])
163
+ end
164
+
165
+ options[:config] = load_config(options[:config])
166
+
167
+ options
168
+ end
169
+
170
+ def ultragrep(options)
171
+ lower_priority
172
+
173
+ config = options.fetch(:config)
174
+ file_type = options.fetch(:type, config.default_file_type)
175
+ file_lists = file_list(config.log_path_glob(file_type), options)
176
+
177
+ request_printer = options.fetch(:printer)
178
+ request_printer.run
179
+
180
+ quoted_regexps = quote_shell_words(options[:regexps])
181
+ print_regex_info(quoted_regexps, options) if options[:verbose]
182
+
183
+ file_lists.each do |files|
184
+ print_search_list(files) if options[:verbose]
185
+
186
+ children_pipes = files.map do |file|
187
+ [worker(file, file_type, quoted_regexps, options), file]
188
+ end
189
+
190
+ children_pipes.each do |pipe, _|
191
+ request_printer.set_read_up_to(pipe, 0)
192
+ end
193
+
194
+ # each thread here waits for child data and then pushes it to the printer thread.
195
+ children_pipes.map do |pipe, filename|
196
+ worker_reader(filename, pipe, request_printer, options)
197
+ end.each(&:join)
198
+
199
+ Process.waitall
200
+ end
201
+
202
+ request_printer.finish
203
+ end
204
+
205
+ private
206
+
207
+ def worker(file, file_type, quoted_regexps, options)
208
+ core = "#{ug_guts} #{file_type} #{options[:range_start]} #{options[:range_end]} #{quoted_regexps}"
209
+ command = if file =~ /\.gz$/
210
+ "gzip -dcf #{file}"
211
+ elsif file =~ /\.bz2$/
212
+ "bzip2 -dcf #{file}"
213
+ elsif file =~ /^tail/
214
+ "#{file}"
215
+ else
216
+ "#{ug_cat} #{file} #{options[:range_start]}"
217
+ end
218
+ IO.popen("#{command} | #{core}")
219
+ end
220
+
221
+ def worker_reader(filename, pipe, request_printer, options)
222
+ Thread.new do
223
+ parsed_up_to = nil
224
+ this_request = nil
225
+ while line = pipe.gets
226
+ encode_utf8!(line)
227
+ if line =~ /^@@(\d+)/
228
+ # timestamp coming back from the child.
229
+ parsed_up_to = $1.to_i
230
+
231
+ request_printer.set_read_up_to(pipe, parsed_up_to)
232
+ this_request = [parsed_up_to, ["\n# #{filename}"]]
233
+ elsif line =~ /^---/
234
+ # end of request
235
+ this_request[1] << line if this_request
236
+ if options[:tail]
237
+ if this_request
238
+ STDOUT.write(request_printer.format_request(*this_request))
239
+ STDOUT.flush
240
+ end
241
+ else
242
+ request_printer.add_request(*this_request) if this_request
243
+ end
244
+ this_request = [parsed_up_to, [line]]
245
+ else
246
+ this_request[1] << line if this_request
247
+ end
248
+ end
249
+ request_printer.set_done(pipe)
250
+ end
251
+ end
252
+
253
+ def print_regex_info(quoted_regexps, options)
254
+ $stderr.puts("searching for regexps: #{quoted_regexps} from #{range_description(options)}")
255
+ end
256
+
257
+ def range_description(options)
258
+ "#{Time.at(options[:range_start])} to #{Time.at(options[:range_end])}"
259
+ end
260
+
261
+ def nothing_found!(globs, options)
262
+ abort("Couldn't find any files matching globs: #{globs.join(',')} from #{range_description(options)}")
263
+ end
264
+
265
+ def print_search_list(list)
266
+ formatted_list = list.each_slice(2).to_a.map { |l| l.join(" ") }.join("\n")
267
+ $stderr.puts("searching #{formatted_list}")
268
+ end
269
+
270
+ def file_list(globs, options)
271
+ file_list = Dir.glob(globs)
272
+
273
+ file_lists = if options[:tail]
274
+ # TODO fix before we open source -- this is a hard-coded file format.
275
+ tail_list = file_list.map do |f|
276
+ today = Time.now.strftime("%Y%m%d")
277
+ "tail -f #{f}" if f =~ /-#{today}$/
278
+ end.compact
279
+ [tail_list]
280
+ else
281
+ filter_and_group_files(file_list, options)
282
+ end
283
+
284
+ nothing_found!(globs, options) if file_lists.empty?
285
+
286
+ $stderr.puts("Grepping #{file_lists.map { |f| f.join(" ") }.join("\n\n\n")}") if options[:verbose]
287
+ file_lists
288
+ end
289
+
290
+ def encode_utf8!(line)
291
+ line.encode!('UTF-16', 'UTF-8', :invalid => :replace, :replace => '')
292
+ line.encode!('UTF-8', 'UTF-16')
293
+ end
294
+
295
+ # maybe use shellwords but also not super important
296
+ def quote_shell_words(words)
297
+ words.map { |r| "'" + r.gsub("'", ".") + "'" }.join(' ')
298
+ end
299
+
300
+ # Set idle I/O and process priority, so other processes aren't starved for I/O
301
+ def lower_priority
302
+ system("ionice -c 3 -p #$$ >/dev/null 2>&1")
303
+ system("renice -n 19 -p #$$ >/dev/null 2>&1")
304
+ end
305
+
306
+ def filter_and_group_files(files, options)
307
+ files = filter_files_by_host(files, options[:host_filter])
308
+ files = filter_files_by_date(files, options.fetch(:range_start)..options.fetch(:range_end))
309
+ files.group_by { |f| f[DATE_FROM_FILENAME, 1] }.values
310
+ end
311
+
312
+ def filter_files_by_host(files, host_filter)
313
+ return files unless host_filter
314
+ files.select { |file| host_filter.include?(file.split("/")[-2]) }
315
+ end
316
+
317
+ def filter_files_by_date(files, range)
318
+ files.select do |file|
319
+ logfile_date = Time.parse(file[DATE_FROM_FILENAME, 1]).to_i
320
+ range_overlap?(range, logfile_date..(logfile_date + DAY - 1))
321
+ end
322
+ end
323
+
324
+ def range_overlap?(a, b)
325
+ a.first <= b.last && b.first <= a.last
326
+ end
327
+
328
+ def parse_time(string)
329
+ if string =~ /^\d+$/ && string !~ /^20/
330
+ string.to_i
331
+ else
332
+ Time.parse("#{string} UTC").to_i
333
+ end
334
+ end
335
+
336
+ def load_config(file)
337
+ Ultragrep::Config.new(file)
338
+ end
339
+
340
+ def ug_guts
341
+ File.expand_path("../../ext/ultragrep/ug_guts", __FILE__)
342
+ end
343
+
344
+ def ug_cat
345
+ File.expand_path("../../ext/ultragrep/ug_cat", __FILE__)
346
+ end
347
+ end
348
+ end
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ultragrep
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - John Doe
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-08-27 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description:
15
+ email: john@example.com
16
+ executables:
17
+ - ultragrep
18
+ extensions:
19
+ - ext/ultragrep/extconf.rb
20
+ extra_rdoc_files: []
21
+ files:
22
+ - bin/ultragrep
23
+ - ext/ultragrep/Makefile
24
+ - ext/ultragrep/extconf.rb
25
+ - ext/ultragrep/rails_req.c
26
+ - ext/ultragrep/rails_req.h
27
+ - ext/ultragrep/req_matcher.h
28
+ - ext/ultragrep/request.c
29
+ - ext/ultragrep/request.h
30
+ - ext/ultragrep/ug_build_index.c
31
+ - ext/ultragrep/ug_cat.c
32
+ - ext/ultragrep/ug_guts.c
33
+ - ext/ultragrep/ug_index.c
34
+ - ext/ultragrep/ug_index.h
35
+ - ext/ultragrep/work_req.c
36
+ - ext/ultragrep/work_req.h
37
+ - ext/ultragrep/zran.c
38
+ - lib/ultragrep.rb
39
+ - lib/ultragrep/config.rb
40
+ - lib/ultragrep/version.rb
41
+ homepage: https://github.com/zendesk/ultragrep
42
+ licenses:
43
+ - Apache License Version 2.0
44
+ post_install_message:
45
+ rdoc_options: []
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ none: false
56
+ requirements:
57
+ - - ! '>='
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
60
+ requirements: []
61
+ rubyforge_project:
62
+ rubygems_version: 1.8.25
63
+ signing_key:
64
+ specification_version: 3
65
+ summary: Ultragrep
66
+ test_files: []
67
+ has_rdoc: