ultragrep 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/ultragrep +6 -0
- data/ext/ultragrep/Makefile +39 -0
- data/ext/ultragrep/extconf.rb +1 -0
- data/ext/ultragrep/rails_req.c +102 -0
- data/ext/ultragrep/rails_req.h +6 -0
- data/ext/ultragrep/req_matcher.h +17 -0
- data/ext/ultragrep/request.c +41 -0
- data/ext/ultragrep/request.h +22 -0
- data/ext/ultragrep/ug_build_index.c +99 -0
- data/ext/ultragrep/ug_cat.c +46 -0
- data/ext/ultragrep/ug_guts.c +138 -0
- data/ext/ultragrep/ug_index.c +83 -0
- data/ext/ultragrep/ug_index.h +27 -0
- data/ext/ultragrep/work_req.c +200 -0
- data/ext/ultragrep/work_req.h +6 -0
- data/ext/ultragrep/zran.c +291 -0
- data/lib/ultragrep/config.rb +47 -0
- data/lib/ultragrep/version.rb +3 -0
- data/lib/ultragrep.rb +348 -0
- metadata +67 -0
data/lib/ultragrep.rb
ADDED
@@ -0,0 +1,348 @@
|
|
1
|
+
require 'time'
|
2
|
+
require 'optparse'
|
3
|
+
require 'pp'
|
4
|
+
require 'socket'
|
5
|
+
require 'yaml'
|
6
|
+
|
7
|
+
require 'ultragrep/config'
|
8
|
+
|
9
|
+
module Ultragrep
|
10
|
+
HOUR = 60 * 60
|
11
|
+
DAY = 24 * HOUR
|
12
|
+
DATE_FROM_FILENAME = /(\d+)(\.\w+)?$/
|
13
|
+
|
14
|
+
class RequestPrinter
|
15
|
+
def initialize(verbose)
|
16
|
+
@mutex = Mutex.new
|
17
|
+
@all_data = []
|
18
|
+
@children_timestamps = {}
|
19
|
+
@finish = false
|
20
|
+
@verbose = verbose
|
21
|
+
end
|
22
|
+
|
23
|
+
def dump_buffer
|
24
|
+
dump_this = []
|
25
|
+
new_data = []
|
26
|
+
|
27
|
+
@mutex.synchronize do
|
28
|
+
to_this_ts = @children_timestamps.values.min || 0 # FIXME : should not be necessary, but fails with -t -p
|
29
|
+
$stderr.puts("I've searched up through #{Time.at(to_this_ts)}") if @verbose && to_this_ts > 0 && to_this_ts != 2**50
|
30
|
+
@all_data.each do |req|
|
31
|
+
if req[0] <= to_this_ts
|
32
|
+
dump_this << req
|
33
|
+
else
|
34
|
+
new_data << req
|
35
|
+
end
|
36
|
+
end
|
37
|
+
@all_data = new_data
|
38
|
+
end
|
39
|
+
|
40
|
+
STDOUT.write(dump_this.sort.map(&:last).join)
|
41
|
+
STDOUT.flush
|
42
|
+
end
|
43
|
+
|
44
|
+
def run
|
45
|
+
Thread.new do
|
46
|
+
while @all_data.size > 0 || !@finish
|
47
|
+
sleep 2
|
48
|
+
dump_buffer
|
49
|
+
end
|
50
|
+
dump_buffer
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def add_request(parsed_up_to, text)
|
55
|
+
@mutex.synchronize do
|
56
|
+
if text = format_request(parsed_up_to, text)
|
57
|
+
@all_data << [parsed_up_to, text]
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def format_request(parsed_up_to, text)
|
63
|
+
text.join
|
64
|
+
end
|
65
|
+
|
66
|
+
def set_read_up_to(key, val)
|
67
|
+
@mutex.synchronize { @children_timestamps[key] = val }
|
68
|
+
end
|
69
|
+
|
70
|
+
def set_done(key)
|
71
|
+
@mutex.synchronize { @children_timestamps[key] = 2**50 }
|
72
|
+
end
|
73
|
+
|
74
|
+
def finish
|
75
|
+
@finish = true
|
76
|
+
dump_buffer
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
class RequestPerformancePrinter < RequestPrinter
|
81
|
+
def format_request(parsed_up_to, text)
|
82
|
+
return unless text =~ /.*Processing ([^ ]+) .*Completed in (\d+)ms/m
|
83
|
+
action = $1
|
84
|
+
time = $2
|
85
|
+
"#{parsed_up_to}\t#{action}\t#{time}\n"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
class << self
|
90
|
+
def parse_args(argv)
|
91
|
+
options = {
|
92
|
+
:files => [],
|
93
|
+
:range_start => Time.now.to_i - (Time.now.to_i % DAY),
|
94
|
+
:range_end => Time.now.to_i,
|
95
|
+
}
|
96
|
+
|
97
|
+
parser = OptionParser.new do |parser|
|
98
|
+
parser.banner = <<-BANNER.gsub(/^ {6,}/, "")
|
99
|
+
Usage: ultragrep [OPTIONS] [REGEXP ...]
|
100
|
+
|
101
|
+
Dates: all datetimes are in UTC whatever Ruby's Time.parse() accepts.
|
102
|
+
For example '2011-04-30 11:30:00'.
|
103
|
+
|
104
|
+
Options are:
|
105
|
+
BANNER
|
106
|
+
parser.on("--help", "-h", "This text"){ puts parser; exit 0 }
|
107
|
+
parser.on("--version", "Show version") do
|
108
|
+
require 'ultragrep/version'
|
109
|
+
puts "Ultragrep version #{Ultragrep::VERSION}"
|
110
|
+
exit 0
|
111
|
+
end
|
112
|
+
parser.on("--config", "-c FILE", String, "Config file location (default: #{Config::DEFAULT_LOCATIONS.join(", ")})") { |config| options[:config] = config }
|
113
|
+
parser.on("--progress", "-p", "show grep progress to STDERR") { options[:verbose] = true }
|
114
|
+
parser.on("--verbose", "-v", "DEPRECATED") do
|
115
|
+
$stderr.puts("The --verbose option is deprecated and will go away soon, please use -p or --progress instead")
|
116
|
+
options[:verbose] = true
|
117
|
+
end
|
118
|
+
parser.on("--tail", "-t", "Tail requests, show matching requests as they arrive") do
|
119
|
+
options[:tail] = true
|
120
|
+
options[:range_end] = Time.now.to_i + 100 * DAY
|
121
|
+
end
|
122
|
+
parser.on("--type", "-l TYPE", String, "Search type of logs, specified in config") { |type| options[:type] = type }
|
123
|
+
parser.on("--perf", "Output just performance information") { options[:perf] = true }
|
124
|
+
parser.on("--day", "-d DATETIME", String, "Find requests that happened on this day") do |date|
|
125
|
+
date = parse_time(date)
|
126
|
+
options[:range_start] = date
|
127
|
+
options[:range_end] = date + DAY - 1
|
128
|
+
end
|
129
|
+
parser.on("--daysback", "-b COUNT", Integer, "Find requests from COUNT days ago to now") do |back|
|
130
|
+
options[:range_start] = Time.now.to_i - (back * DAY)
|
131
|
+
end
|
132
|
+
parser.on("--hoursback", "-o COUNT", Integer, "Find requests from COUNT hours ago to now") do |back|
|
133
|
+
options[:range_start] = Time.now.to_i - (back * HOUR)
|
134
|
+
end
|
135
|
+
parser.on("--start", "-s DATETIME", String, "Find requests starting at this date") do |date|
|
136
|
+
options[:range_start] = parse_time(date)
|
137
|
+
end
|
138
|
+
parser.on("--end", "-e DATETIME", String, "Find requests ending at this date") do |date|
|
139
|
+
options[:range_end] = parse_time(date)
|
140
|
+
end
|
141
|
+
parser.on("--around DATETIME", String, "Find a request at about this time (10 seconds buffer on either side") do |date|
|
142
|
+
options[:range_start] = parse_time(date) - 10
|
143
|
+
options[:range_end] = parse_time(date) + 10
|
144
|
+
end
|
145
|
+
parser.on("--host HOST", String, "Only find requests on this host") do |host|
|
146
|
+
options[:host_filter] ||= []
|
147
|
+
options[:host_filter] << host
|
148
|
+
end
|
149
|
+
end
|
150
|
+
parser.parse!(argv)
|
151
|
+
|
152
|
+
if argv.empty?
|
153
|
+
puts parser
|
154
|
+
exit 1
|
155
|
+
else
|
156
|
+
options[:regexps] = argv
|
157
|
+
end
|
158
|
+
|
159
|
+
options[:printer] = if options.delete(:perf)
|
160
|
+
RequestPerformancePrinter.new(options[:verbose])
|
161
|
+
else
|
162
|
+
RequestPrinter.new(options[:verbose])
|
163
|
+
end
|
164
|
+
|
165
|
+
options[:config] = load_config(options[:config])
|
166
|
+
|
167
|
+
options
|
168
|
+
end
|
169
|
+
|
170
|
+
def ultragrep(options)
|
171
|
+
lower_priority
|
172
|
+
|
173
|
+
config = options.fetch(:config)
|
174
|
+
file_type = options.fetch(:type, config.default_file_type)
|
175
|
+
file_lists = file_list(config.log_path_glob(file_type), options)
|
176
|
+
|
177
|
+
request_printer = options.fetch(:printer)
|
178
|
+
request_printer.run
|
179
|
+
|
180
|
+
quoted_regexps = quote_shell_words(options[:regexps])
|
181
|
+
print_regex_info(quoted_regexps, options) if options[:verbose]
|
182
|
+
|
183
|
+
file_lists.each do |files|
|
184
|
+
print_search_list(files) if options[:verbose]
|
185
|
+
|
186
|
+
children_pipes = files.map do |file|
|
187
|
+
[worker(file, file_type, quoted_regexps, options), file]
|
188
|
+
end
|
189
|
+
|
190
|
+
children_pipes.each do |pipe, _|
|
191
|
+
request_printer.set_read_up_to(pipe, 0)
|
192
|
+
end
|
193
|
+
|
194
|
+
# each thread here waits for child data and then pushes it to the printer thread.
|
195
|
+
children_pipes.map do |pipe, filename|
|
196
|
+
worker_reader(filename, pipe, request_printer, options)
|
197
|
+
end.each(&:join)
|
198
|
+
|
199
|
+
Process.waitall
|
200
|
+
end
|
201
|
+
|
202
|
+
request_printer.finish
|
203
|
+
end
|
204
|
+
|
205
|
+
private
|
206
|
+
|
207
|
+
def worker(file, file_type, quoted_regexps, options)
|
208
|
+
core = "#{ug_guts} #{file_type} #{options[:range_start]} #{options[:range_end]} #{quoted_regexps}"
|
209
|
+
command = if file =~ /\.gz$/
|
210
|
+
"gzip -dcf #{file}"
|
211
|
+
elsif file =~ /\.bz2$/
|
212
|
+
"bzip2 -dcf #{file}"
|
213
|
+
elsif file =~ /^tail/
|
214
|
+
"#{file}"
|
215
|
+
else
|
216
|
+
"#{ug_cat} #{file} #{options[:range_start]}"
|
217
|
+
end
|
218
|
+
IO.popen("#{command} | #{core}")
|
219
|
+
end
|
220
|
+
|
221
|
+
def worker_reader(filename, pipe, request_printer, options)
|
222
|
+
Thread.new do
|
223
|
+
parsed_up_to = nil
|
224
|
+
this_request = nil
|
225
|
+
while line = pipe.gets
|
226
|
+
encode_utf8!(line)
|
227
|
+
if line =~ /^@@(\d+)/
|
228
|
+
# timestamp coming back from the child.
|
229
|
+
parsed_up_to = $1.to_i
|
230
|
+
|
231
|
+
request_printer.set_read_up_to(pipe, parsed_up_to)
|
232
|
+
this_request = [parsed_up_to, ["\n# #{filename}"]]
|
233
|
+
elsif line =~ /^---/
|
234
|
+
# end of request
|
235
|
+
this_request[1] << line if this_request
|
236
|
+
if options[:tail]
|
237
|
+
if this_request
|
238
|
+
STDOUT.write(request_printer.format_request(*this_request))
|
239
|
+
STDOUT.flush
|
240
|
+
end
|
241
|
+
else
|
242
|
+
request_printer.add_request(*this_request) if this_request
|
243
|
+
end
|
244
|
+
this_request = [parsed_up_to, [line]]
|
245
|
+
else
|
246
|
+
this_request[1] << line if this_request
|
247
|
+
end
|
248
|
+
end
|
249
|
+
request_printer.set_done(pipe)
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
def print_regex_info(quoted_regexps, options)
|
254
|
+
$stderr.puts("searching for regexps: #{quoted_regexps} from #{range_description(options)}")
|
255
|
+
end
|
256
|
+
|
257
|
+
def range_description(options)
|
258
|
+
"#{Time.at(options[:range_start])} to #{Time.at(options[:range_end])}"
|
259
|
+
end
|
260
|
+
|
261
|
+
def nothing_found!(globs, options)
|
262
|
+
abort("Couldn't find any files matching globs: #{globs.join(',')} from #{range_description(options)}")
|
263
|
+
end
|
264
|
+
|
265
|
+
def print_search_list(list)
|
266
|
+
formatted_list = list.each_slice(2).to_a.map { |l| l.join(" ") }.join("\n")
|
267
|
+
$stderr.puts("searching #{formatted_list}")
|
268
|
+
end
|
269
|
+
|
270
|
+
def file_list(globs, options)
|
271
|
+
file_list = Dir.glob(globs)
|
272
|
+
|
273
|
+
file_lists = if options[:tail]
|
274
|
+
# TODO fix before we open source -- this is a hard-coded file format.
|
275
|
+
tail_list = file_list.map do |f|
|
276
|
+
today = Time.now.strftime("%Y%m%d")
|
277
|
+
"tail -f #{f}" if f =~ /-#{today}$/
|
278
|
+
end.compact
|
279
|
+
[tail_list]
|
280
|
+
else
|
281
|
+
filter_and_group_files(file_list, options)
|
282
|
+
end
|
283
|
+
|
284
|
+
nothing_found!(globs, options) if file_lists.empty?
|
285
|
+
|
286
|
+
$stderr.puts("Grepping #{file_lists.map { |f| f.join(" ") }.join("\n\n\n")}") if options[:verbose]
|
287
|
+
file_lists
|
288
|
+
end
|
289
|
+
|
290
|
+
def encode_utf8!(line)
|
291
|
+
line.encode!('UTF-16', 'UTF-8', :invalid => :replace, :replace => '')
|
292
|
+
line.encode!('UTF-8', 'UTF-16')
|
293
|
+
end
|
294
|
+
|
295
|
+
# maybe use shellwords but also not super important
|
296
|
+
def quote_shell_words(words)
|
297
|
+
words.map { |r| "'" + r.gsub("'", ".") + "'" }.join(' ')
|
298
|
+
end
|
299
|
+
|
300
|
+
# Set idle I/O and process priority, so other processes aren't starved for I/O
|
301
|
+
def lower_priority
|
302
|
+
system("ionice -c 3 -p #$$ >/dev/null 2>&1")
|
303
|
+
system("renice -n 19 -p #$$ >/dev/null 2>&1")
|
304
|
+
end
|
305
|
+
|
306
|
+
def filter_and_group_files(files, options)
|
307
|
+
files = filter_files_by_host(files, options[:host_filter])
|
308
|
+
files = filter_files_by_date(files, options.fetch(:range_start)..options.fetch(:range_end))
|
309
|
+
files.group_by { |f| f[DATE_FROM_FILENAME, 1] }.values
|
310
|
+
end
|
311
|
+
|
312
|
+
def filter_files_by_host(files, host_filter)
|
313
|
+
return files unless host_filter
|
314
|
+
files.select { |file| host_filter.include?(file.split("/")[-2]) }
|
315
|
+
end
|
316
|
+
|
317
|
+
def filter_files_by_date(files, range)
|
318
|
+
files.select do |file|
|
319
|
+
logfile_date = Time.parse(file[DATE_FROM_FILENAME, 1]).to_i
|
320
|
+
range_overlap?(range, logfile_date..(logfile_date + DAY - 1))
|
321
|
+
end
|
322
|
+
end
|
323
|
+
|
324
|
+
def range_overlap?(a, b)
|
325
|
+
a.first <= b.last && b.first <= a.last
|
326
|
+
end
|
327
|
+
|
328
|
+
def parse_time(string)
|
329
|
+
if string =~ /^\d+$/ && string !~ /^20/
|
330
|
+
string.to_i
|
331
|
+
else
|
332
|
+
Time.parse("#{string} UTC").to_i
|
333
|
+
end
|
334
|
+
end
|
335
|
+
|
336
|
+
def load_config(file)
|
337
|
+
Ultragrep::Config.new(file)
|
338
|
+
end
|
339
|
+
|
340
|
+
def ug_guts
|
341
|
+
File.expand_path("../../ext/ultragrep/ug_guts", __FILE__)
|
342
|
+
end
|
343
|
+
|
344
|
+
def ug_cat
|
345
|
+
File.expand_path("../../ext/ultragrep/ug_cat", __FILE__)
|
346
|
+
end
|
347
|
+
end
|
348
|
+
end
|
metadata
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ultragrep
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- John Doe
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-08-27 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description:
|
15
|
+
email: john@example.com
|
16
|
+
executables:
|
17
|
+
- ultragrep
|
18
|
+
extensions:
|
19
|
+
- ext/ultragrep/extconf.rb
|
20
|
+
extra_rdoc_files: []
|
21
|
+
files:
|
22
|
+
- bin/ultragrep
|
23
|
+
- ext/ultragrep/Makefile
|
24
|
+
- ext/ultragrep/extconf.rb
|
25
|
+
- ext/ultragrep/rails_req.c
|
26
|
+
- ext/ultragrep/rails_req.h
|
27
|
+
- ext/ultragrep/req_matcher.h
|
28
|
+
- ext/ultragrep/request.c
|
29
|
+
- ext/ultragrep/request.h
|
30
|
+
- ext/ultragrep/ug_build_index.c
|
31
|
+
- ext/ultragrep/ug_cat.c
|
32
|
+
- ext/ultragrep/ug_guts.c
|
33
|
+
- ext/ultragrep/ug_index.c
|
34
|
+
- ext/ultragrep/ug_index.h
|
35
|
+
- ext/ultragrep/work_req.c
|
36
|
+
- ext/ultragrep/work_req.h
|
37
|
+
- ext/ultragrep/zran.c
|
38
|
+
- lib/ultragrep.rb
|
39
|
+
- lib/ultragrep/config.rb
|
40
|
+
- lib/ultragrep/version.rb
|
41
|
+
homepage: https://github.com/zendesk/ultragrep
|
42
|
+
licenses:
|
43
|
+
- Apache License Version 2.0
|
44
|
+
post_install_message:
|
45
|
+
rdoc_options: []
|
46
|
+
require_paths:
|
47
|
+
- lib
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
55
|
+
none: false
|
56
|
+
requirements:
|
57
|
+
- - ! '>='
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '0'
|
60
|
+
requirements: []
|
61
|
+
rubyforge_project:
|
62
|
+
rubygems_version: 1.8.25
|
63
|
+
signing_key:
|
64
|
+
specification_version: 3
|
65
|
+
summary: Ultragrep
|
66
|
+
test_files: []
|
67
|
+
has_rdoc:
|