ultragrep 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/ultragrep +6 -0
- data/ext/ultragrep/Makefile +39 -0
- data/ext/ultragrep/extconf.rb +1 -0
- data/ext/ultragrep/rails_req.c +102 -0
- data/ext/ultragrep/rails_req.h +6 -0
- data/ext/ultragrep/req_matcher.h +17 -0
- data/ext/ultragrep/request.c +41 -0
- data/ext/ultragrep/request.h +22 -0
- data/ext/ultragrep/ug_build_index.c +99 -0
- data/ext/ultragrep/ug_cat.c +46 -0
- data/ext/ultragrep/ug_guts.c +138 -0
- data/ext/ultragrep/ug_index.c +83 -0
- data/ext/ultragrep/ug_index.h +27 -0
- data/ext/ultragrep/work_req.c +200 -0
- data/ext/ultragrep/work_req.h +6 -0
- data/ext/ultragrep/zran.c +291 -0
- data/lib/ultragrep/config.rb +47 -0
- data/lib/ultragrep/version.rb +3 -0
- data/lib/ultragrep.rb +348 -0
- metadata +67 -0
data/lib/ultragrep.rb
ADDED
@@ -0,0 +1,348 @@
|
|
1
|
+
require 'time'
|
2
|
+
require 'optparse'
|
3
|
+
require 'pp'
|
4
|
+
require 'socket'
|
5
|
+
require 'yaml'
|
6
|
+
|
7
|
+
require 'ultragrep/config'
|
8
|
+
|
9
|
+
module Ultragrep
|
10
|
+
HOUR = 60 * 60
|
11
|
+
DAY = 24 * HOUR
|
12
|
+
DATE_FROM_FILENAME = /(\d+)(\.\w+)?$/
|
13
|
+
|
14
|
+
class RequestPrinter
|
15
|
+
def initialize(verbose)
|
16
|
+
@mutex = Mutex.new
|
17
|
+
@all_data = []
|
18
|
+
@children_timestamps = {}
|
19
|
+
@finish = false
|
20
|
+
@verbose = verbose
|
21
|
+
end
|
22
|
+
|
23
|
+
def dump_buffer
|
24
|
+
dump_this = []
|
25
|
+
new_data = []
|
26
|
+
|
27
|
+
@mutex.synchronize do
|
28
|
+
to_this_ts = @children_timestamps.values.min || 0 # FIXME : should not be necessary, but fails with -t -p
|
29
|
+
$stderr.puts("I've searched up through #{Time.at(to_this_ts)}") if @verbose && to_this_ts > 0 && to_this_ts != 2**50
|
30
|
+
@all_data.each do |req|
|
31
|
+
if req[0] <= to_this_ts
|
32
|
+
dump_this << req
|
33
|
+
else
|
34
|
+
new_data << req
|
35
|
+
end
|
36
|
+
end
|
37
|
+
@all_data = new_data
|
38
|
+
end
|
39
|
+
|
40
|
+
STDOUT.write(dump_this.sort.map(&:last).join)
|
41
|
+
STDOUT.flush
|
42
|
+
end
|
43
|
+
|
44
|
+
def run
|
45
|
+
Thread.new do
|
46
|
+
while @all_data.size > 0 || !@finish
|
47
|
+
sleep 2
|
48
|
+
dump_buffer
|
49
|
+
end
|
50
|
+
dump_buffer
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def add_request(parsed_up_to, text)
|
55
|
+
@mutex.synchronize do
|
56
|
+
if text = format_request(parsed_up_to, text)
|
57
|
+
@all_data << [parsed_up_to, text]
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def format_request(parsed_up_to, text)
|
63
|
+
text.join
|
64
|
+
end
|
65
|
+
|
66
|
+
def set_read_up_to(key, val)
|
67
|
+
@mutex.synchronize { @children_timestamps[key] = val }
|
68
|
+
end
|
69
|
+
|
70
|
+
def set_done(key)
|
71
|
+
@mutex.synchronize { @children_timestamps[key] = 2**50 }
|
72
|
+
end
|
73
|
+
|
74
|
+
def finish
|
75
|
+
@finish = true
|
76
|
+
dump_buffer
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
class RequestPerformancePrinter < RequestPrinter
|
81
|
+
def format_request(parsed_up_to, text)
|
82
|
+
return unless text =~ /.*Processing ([^ ]+) .*Completed in (\d+)ms/m
|
83
|
+
action = $1
|
84
|
+
time = $2
|
85
|
+
"#{parsed_up_to}\t#{action}\t#{time}\n"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
class << self
|
90
|
+
def parse_args(argv)
|
91
|
+
options = {
|
92
|
+
:files => [],
|
93
|
+
:range_start => Time.now.to_i - (Time.now.to_i % DAY),
|
94
|
+
:range_end => Time.now.to_i,
|
95
|
+
}
|
96
|
+
|
97
|
+
parser = OptionParser.new do |parser|
|
98
|
+
parser.banner = <<-BANNER.gsub(/^ {6,}/, "")
|
99
|
+
Usage: ultragrep [OPTIONS] [REGEXP ...]
|
100
|
+
|
101
|
+
Dates: all datetimes are in UTC whatever Ruby's Time.parse() accepts.
|
102
|
+
For example '2011-04-30 11:30:00'.
|
103
|
+
|
104
|
+
Options are:
|
105
|
+
BANNER
|
106
|
+
parser.on("--help", "-h", "This text"){ puts parser; exit 0 }
|
107
|
+
parser.on("--version", "Show version") do
|
108
|
+
require 'ultragrep/version'
|
109
|
+
puts "Ultragrep version #{Ultragrep::VERSION}"
|
110
|
+
exit 0
|
111
|
+
end
|
112
|
+
parser.on("--config", "-c FILE", String, "Config file location (default: #{Config::DEFAULT_LOCATIONS.join(", ")})") { |config| options[:config] = config }
|
113
|
+
parser.on("--progress", "-p", "show grep progress to STDERR") { options[:verbose] = true }
|
114
|
+
parser.on("--verbose", "-v", "DEPRECATED") do
|
115
|
+
$stderr.puts("The --verbose option is deprecated and will go away soon, please use -p or --progress instead")
|
116
|
+
options[:verbose] = true
|
117
|
+
end
|
118
|
+
parser.on("--tail", "-t", "Tail requests, show matching requests as they arrive") do
|
119
|
+
options[:tail] = true
|
120
|
+
options[:range_end] = Time.now.to_i + 100 * DAY
|
121
|
+
end
|
122
|
+
parser.on("--type", "-l TYPE", String, "Search type of logs, specified in config") { |type| options[:type] = type }
|
123
|
+
parser.on("--perf", "Output just performance information") { options[:perf] = true }
|
124
|
+
parser.on("--day", "-d DATETIME", String, "Find requests that happened on this day") do |date|
|
125
|
+
date = parse_time(date)
|
126
|
+
options[:range_start] = date
|
127
|
+
options[:range_end] = date + DAY - 1
|
128
|
+
end
|
129
|
+
parser.on("--daysback", "-b COUNT", Integer, "Find requests from COUNT days ago to now") do |back|
|
130
|
+
options[:range_start] = Time.now.to_i - (back * DAY)
|
131
|
+
end
|
132
|
+
parser.on("--hoursback", "-o COUNT", Integer, "Find requests from COUNT hours ago to now") do |back|
|
133
|
+
options[:range_start] = Time.now.to_i - (back * HOUR)
|
134
|
+
end
|
135
|
+
parser.on("--start", "-s DATETIME", String, "Find requests starting at this date") do |date|
|
136
|
+
options[:range_start] = parse_time(date)
|
137
|
+
end
|
138
|
+
parser.on("--end", "-e DATETIME", String, "Find requests ending at this date") do |date|
|
139
|
+
options[:range_end] = parse_time(date)
|
140
|
+
end
|
141
|
+
parser.on("--around DATETIME", String, "Find a request at about this time (10 seconds buffer on either side") do |date|
|
142
|
+
options[:range_start] = parse_time(date) - 10
|
143
|
+
options[:range_end] = parse_time(date) + 10
|
144
|
+
end
|
145
|
+
parser.on("--host HOST", String, "Only find requests on this host") do |host|
|
146
|
+
options[:host_filter] ||= []
|
147
|
+
options[:host_filter] << host
|
148
|
+
end
|
149
|
+
end
|
150
|
+
parser.parse!(argv)
|
151
|
+
|
152
|
+
if argv.empty?
|
153
|
+
puts parser
|
154
|
+
exit 1
|
155
|
+
else
|
156
|
+
options[:regexps] = argv
|
157
|
+
end
|
158
|
+
|
159
|
+
options[:printer] = if options.delete(:perf)
|
160
|
+
RequestPerformancePrinter.new(options[:verbose])
|
161
|
+
else
|
162
|
+
RequestPrinter.new(options[:verbose])
|
163
|
+
end
|
164
|
+
|
165
|
+
options[:config] = load_config(options[:config])
|
166
|
+
|
167
|
+
options
|
168
|
+
end
|
169
|
+
|
170
|
+
def ultragrep(options)
|
171
|
+
lower_priority
|
172
|
+
|
173
|
+
config = options.fetch(:config)
|
174
|
+
file_type = options.fetch(:type, config.default_file_type)
|
175
|
+
file_lists = file_list(config.log_path_glob(file_type), options)
|
176
|
+
|
177
|
+
request_printer = options.fetch(:printer)
|
178
|
+
request_printer.run
|
179
|
+
|
180
|
+
quoted_regexps = quote_shell_words(options[:regexps])
|
181
|
+
print_regex_info(quoted_regexps, options) if options[:verbose]
|
182
|
+
|
183
|
+
file_lists.each do |files|
|
184
|
+
print_search_list(files) if options[:verbose]
|
185
|
+
|
186
|
+
children_pipes = files.map do |file|
|
187
|
+
[worker(file, file_type, quoted_regexps, options), file]
|
188
|
+
end
|
189
|
+
|
190
|
+
children_pipes.each do |pipe, _|
|
191
|
+
request_printer.set_read_up_to(pipe, 0)
|
192
|
+
end
|
193
|
+
|
194
|
+
# each thread here waits for child data and then pushes it to the printer thread.
|
195
|
+
children_pipes.map do |pipe, filename|
|
196
|
+
worker_reader(filename, pipe, request_printer, options)
|
197
|
+
end.each(&:join)
|
198
|
+
|
199
|
+
Process.waitall
|
200
|
+
end
|
201
|
+
|
202
|
+
request_printer.finish
|
203
|
+
end
|
204
|
+
|
205
|
+
private
|
206
|
+
|
207
|
+
def worker(file, file_type, quoted_regexps, options)
|
208
|
+
core = "#{ug_guts} #{file_type} #{options[:range_start]} #{options[:range_end]} #{quoted_regexps}"
|
209
|
+
command = if file =~ /\.gz$/
|
210
|
+
"gzip -dcf #{file}"
|
211
|
+
elsif file =~ /\.bz2$/
|
212
|
+
"bzip2 -dcf #{file}"
|
213
|
+
elsif file =~ /^tail/
|
214
|
+
"#{file}"
|
215
|
+
else
|
216
|
+
"#{ug_cat} #{file} #{options[:range_start]}"
|
217
|
+
end
|
218
|
+
IO.popen("#{command} | #{core}")
|
219
|
+
end
|
220
|
+
|
221
|
+
def worker_reader(filename, pipe, request_printer, options)
|
222
|
+
Thread.new do
|
223
|
+
parsed_up_to = nil
|
224
|
+
this_request = nil
|
225
|
+
while line = pipe.gets
|
226
|
+
encode_utf8!(line)
|
227
|
+
if line =~ /^@@(\d+)/
|
228
|
+
# timestamp coming back from the child.
|
229
|
+
parsed_up_to = $1.to_i
|
230
|
+
|
231
|
+
request_printer.set_read_up_to(pipe, parsed_up_to)
|
232
|
+
this_request = [parsed_up_to, ["\n# #{filename}"]]
|
233
|
+
elsif line =~ /^---/
|
234
|
+
# end of request
|
235
|
+
this_request[1] << line if this_request
|
236
|
+
if options[:tail]
|
237
|
+
if this_request
|
238
|
+
STDOUT.write(request_printer.format_request(*this_request))
|
239
|
+
STDOUT.flush
|
240
|
+
end
|
241
|
+
else
|
242
|
+
request_printer.add_request(*this_request) if this_request
|
243
|
+
end
|
244
|
+
this_request = [parsed_up_to, [line]]
|
245
|
+
else
|
246
|
+
this_request[1] << line if this_request
|
247
|
+
end
|
248
|
+
end
|
249
|
+
request_printer.set_done(pipe)
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
def print_regex_info(quoted_regexps, options)
|
254
|
+
$stderr.puts("searching for regexps: #{quoted_regexps} from #{range_description(options)}")
|
255
|
+
end
|
256
|
+
|
257
|
+
def range_description(options)
|
258
|
+
"#{Time.at(options[:range_start])} to #{Time.at(options[:range_end])}"
|
259
|
+
end
|
260
|
+
|
261
|
+
def nothing_found!(globs, options)
|
262
|
+
abort("Couldn't find any files matching globs: #{globs.join(',')} from #{range_description(options)}")
|
263
|
+
end
|
264
|
+
|
265
|
+
def print_search_list(list)
|
266
|
+
formatted_list = list.each_slice(2).to_a.map { |l| l.join(" ") }.join("\n")
|
267
|
+
$stderr.puts("searching #{formatted_list}")
|
268
|
+
end
|
269
|
+
|
270
|
+
def file_list(globs, options)
|
271
|
+
file_list = Dir.glob(globs)
|
272
|
+
|
273
|
+
file_lists = if options[:tail]
|
274
|
+
# TODO fix before we open source -- this is a hard-coded file format.
|
275
|
+
tail_list = file_list.map do |f|
|
276
|
+
today = Time.now.strftime("%Y%m%d")
|
277
|
+
"tail -f #{f}" if f =~ /-#{today}$/
|
278
|
+
end.compact
|
279
|
+
[tail_list]
|
280
|
+
else
|
281
|
+
filter_and_group_files(file_list, options)
|
282
|
+
end
|
283
|
+
|
284
|
+
nothing_found!(globs, options) if file_lists.empty?
|
285
|
+
|
286
|
+
$stderr.puts("Grepping #{file_lists.map { |f| f.join(" ") }.join("\n\n\n")}") if options[:verbose]
|
287
|
+
file_lists
|
288
|
+
end
|
289
|
+
|
290
|
+
def encode_utf8!(line)
|
291
|
+
line.encode!('UTF-16', 'UTF-8', :invalid => :replace, :replace => '')
|
292
|
+
line.encode!('UTF-8', 'UTF-16')
|
293
|
+
end
|
294
|
+
|
295
|
+
# maybe use shellwords but also not super important
|
296
|
+
def quote_shell_words(words)
|
297
|
+
words.map { |r| "'" + r.gsub("'", ".") + "'" }.join(' ')
|
298
|
+
end
|
299
|
+
|
300
|
+
# Set idle I/O and process priority, so other processes aren't starved for I/O
|
301
|
+
def lower_priority
|
302
|
+
system("ionice -c 3 -p #$$ >/dev/null 2>&1")
|
303
|
+
system("renice -n 19 -p #$$ >/dev/null 2>&1")
|
304
|
+
end
|
305
|
+
|
306
|
+
def filter_and_group_files(files, options)
|
307
|
+
files = filter_files_by_host(files, options[:host_filter])
|
308
|
+
files = filter_files_by_date(files, options.fetch(:range_start)..options.fetch(:range_end))
|
309
|
+
files.group_by { |f| f[DATE_FROM_FILENAME, 1] }.values
|
310
|
+
end
|
311
|
+
|
312
|
+
def filter_files_by_host(files, host_filter)
|
313
|
+
return files unless host_filter
|
314
|
+
files.select { |file| host_filter.include?(file.split("/")[-2]) }
|
315
|
+
end
|
316
|
+
|
317
|
+
def filter_files_by_date(files, range)
|
318
|
+
files.select do |file|
|
319
|
+
logfile_date = Time.parse(file[DATE_FROM_FILENAME, 1]).to_i
|
320
|
+
range_overlap?(range, logfile_date..(logfile_date + DAY - 1))
|
321
|
+
end
|
322
|
+
end
|
323
|
+
|
324
|
+
def range_overlap?(a, b)
|
325
|
+
a.first <= b.last && b.first <= a.last
|
326
|
+
end
|
327
|
+
|
328
|
+
def parse_time(string)
|
329
|
+
if string =~ /^\d+$/ && string !~ /^20/
|
330
|
+
string.to_i
|
331
|
+
else
|
332
|
+
Time.parse("#{string} UTC").to_i
|
333
|
+
end
|
334
|
+
end
|
335
|
+
|
336
|
+
def load_config(file)
|
337
|
+
Ultragrep::Config.new(file)
|
338
|
+
end
|
339
|
+
|
340
|
+
def ug_guts
|
341
|
+
File.expand_path("../../ext/ultragrep/ug_guts", __FILE__)
|
342
|
+
end
|
343
|
+
|
344
|
+
def ug_cat
|
345
|
+
File.expand_path("../../ext/ultragrep/ug_cat", __FILE__)
|
346
|
+
end
|
347
|
+
end
|
348
|
+
end
|
metadata
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ultragrep
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- John Doe
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-08-27 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description:
|
15
|
+
email: john@example.com
|
16
|
+
executables:
|
17
|
+
- ultragrep
|
18
|
+
extensions:
|
19
|
+
- ext/ultragrep/extconf.rb
|
20
|
+
extra_rdoc_files: []
|
21
|
+
files:
|
22
|
+
- bin/ultragrep
|
23
|
+
- ext/ultragrep/Makefile
|
24
|
+
- ext/ultragrep/extconf.rb
|
25
|
+
- ext/ultragrep/rails_req.c
|
26
|
+
- ext/ultragrep/rails_req.h
|
27
|
+
- ext/ultragrep/req_matcher.h
|
28
|
+
- ext/ultragrep/request.c
|
29
|
+
- ext/ultragrep/request.h
|
30
|
+
- ext/ultragrep/ug_build_index.c
|
31
|
+
- ext/ultragrep/ug_cat.c
|
32
|
+
- ext/ultragrep/ug_guts.c
|
33
|
+
- ext/ultragrep/ug_index.c
|
34
|
+
- ext/ultragrep/ug_index.h
|
35
|
+
- ext/ultragrep/work_req.c
|
36
|
+
- ext/ultragrep/work_req.h
|
37
|
+
- ext/ultragrep/zran.c
|
38
|
+
- lib/ultragrep.rb
|
39
|
+
- lib/ultragrep/config.rb
|
40
|
+
- lib/ultragrep/version.rb
|
41
|
+
homepage: https://github.com/zendesk/ultragrep
|
42
|
+
licenses:
|
43
|
+
- Apache License Version 2.0
|
44
|
+
post_install_message:
|
45
|
+
rdoc_options: []
|
46
|
+
require_paths:
|
47
|
+
- lib
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
55
|
+
none: false
|
56
|
+
requirements:
|
57
|
+
- - ! '>='
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '0'
|
60
|
+
requirements: []
|
61
|
+
rubyforge_project:
|
62
|
+
rubygems_version: 1.8.25
|
63
|
+
signing_key:
|
64
|
+
specification_version: 3
|
65
|
+
summary: Ultragrep
|
66
|
+
test_files: []
|
67
|
+
has_rdoc:
|