ultragrep 0.1.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/ultragrep_build_indexes +45 -0
- data/lib/ultragrep.rb +47 -61
- data/lib/ultragrep/config.rb +6 -0
- data/lib/ultragrep/log_collector.rb +67 -0
- data/lib/ultragrep/version.rb +1 -1
- data/src/Makefile +24 -0
- data/{ext/ultragrep → src}/extconf.rb +0 -0
- data/src/pcre.h +668 -0
- data/src/request.h +13 -0
- data/src/ug_build_index.c +109 -0
- data/src/ug_cat.c +188 -0
- data/src/ug_guts.c +199 -0
- data/src/ug_gzip.c +242 -0
- data/src/ug_gzip.h +8 -0
- data/src/ug_index.c +62 -0
- data/src/ug_index.h +23 -0
- data/src/ug_lua.c +119 -0
- data/src/ug_lua.h +10 -0
- metadata +25 -28
- data/ext/ultragrep/Makefile +0 -39
- data/ext/ultragrep/rails_req.c +0 -102
- data/ext/ultragrep/rails_req.h +0 -6
- data/ext/ultragrep/req_matcher.h +0 -17
- data/ext/ultragrep/request.c +0 -41
- data/ext/ultragrep/request.h +0 -22
- data/ext/ultragrep/ug_build_index.c +0 -99
- data/ext/ultragrep/ug_cat.c +0 -46
- data/ext/ultragrep/ug_guts.c +0 -138
- data/ext/ultragrep/ug_index.c +0 -83
- data/ext/ultragrep/ug_index.h +0 -27
- data/ext/ultragrep/work_req.c +0 -200
- data/ext/ultragrep/work_req.h +0 -6
- data/ext/ultragrep/zran.c +0 -291
data/src/request.h
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
// ex: set softtabstop=4 shiftwidth=4 tabstop=4 expandtab:
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <stdlib.h>
|
4
|
+
#include <getopt.h>
|
5
|
+
#include <string.h>
|
6
|
+
#include <errno.h>
|
7
|
+
#include <time.h>
|
8
|
+
#include <unistd.h>
|
9
|
+
#include "pcre.h"
|
10
|
+
#include "request.h"
|
11
|
+
#include "ug_index.h"
|
12
|
+
#include "ug_lua.h"
|
13
|
+
#include "ug_gzip.h"
|
14
|
+
|
15
|
+
#define USAGE "Usage: ug_build_index process.lua file\n"
|
16
|
+
|
17
|
+
// index file format
|
18
|
+
// [64bit,64bit] -- timestamp, file offset
|
19
|
+
// [32bit, Nbytes] -- extra data
|
20
|
+
|
21
|
+
static build_idx_context_t ctx;
|
22
|
+
|
23
|
+
|
24
|
+
void handle_request(request_t *req)
|
25
|
+
{
|
26
|
+
time_t floored_time;
|
27
|
+
floored_time = req->time - (req->time % INDEX_EVERY);
|
28
|
+
if (!ctx.last_index_time || floored_time > ctx.last_index_time) {
|
29
|
+
ug_write_index(ctx.findex, floored_time, req->offset);
|
30
|
+
ctx.last_index_time = floored_time;
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
34
|
+
void open_indexes(char *log_fname)
|
35
|
+
{
|
36
|
+
char *index_fname, *gz_index_fname;
|
37
|
+
|
38
|
+
index_fname = ug_get_index_fname(log_fname, "idx");
|
39
|
+
|
40
|
+
if (strcmp(log_fname + (strlen(log_fname) - 3), ".gz") == 0) {
|
41
|
+
gz_index_fname = ug_get_index_fname(log_fname, "gzidx");
|
42
|
+
/* we don't do incremental index building in gzipped files -- we just truncate and
|
43
|
+
* build over*/
|
44
|
+
ctx.findex = fopen(index_fname, "w+");
|
45
|
+
ctx.fgzindex = fopen(gz_index_fname, "w+");
|
46
|
+
|
47
|
+
if (!ctx.findex || !ctx.fgzindex) {
|
48
|
+
fprintf(stderr, "Couldn't open index files '%s','%s': %s\n", index_fname, gz_index_fname, strerror(errno));
|
49
|
+
exit(1);
|
50
|
+
}
|
51
|
+
} else {
|
52
|
+
ctx.findex = fopen(index_fname, "r+");
|
53
|
+
if (ctx.findex) {
|
54
|
+
/* seek in the log, (and the index, with get_offset_for_timestamp()) to the
|
55
|
+
* last timestamp we indexed */
|
56
|
+
fseeko(ctx.flog, ug_get_offset_for_timestamp(ctx.findex, -1), SEEK_SET);
|
57
|
+
} else {
|
58
|
+
ctx.findex = fopen(index_fname, "w+");
|
59
|
+
}
|
60
|
+
if (!ctx.findex) {
|
61
|
+
fprintf(stderr, "Couldn't open index file '%s': %s\n", index_fname, strerror(errno));
|
62
|
+
exit(1);
|
63
|
+
}
|
64
|
+
}
|
65
|
+
}
|
66
|
+
|
67
|
+
int main(int argc, char **argv)
|
68
|
+
{
|
69
|
+
char *line = NULL, *lua_fname, *log_fname;
|
70
|
+
ssize_t line_size;
|
71
|
+
size_t allocated;
|
72
|
+
|
73
|
+
if (argc < 3) {
|
74
|
+
fprintf(stderr, USAGE);
|
75
|
+
exit(1);
|
76
|
+
}
|
77
|
+
|
78
|
+
lua_fname = argv[1];
|
79
|
+
log_fname = argv[2];
|
80
|
+
|
81
|
+
bzero(&ctx, sizeof(build_idx_context_t));
|
82
|
+
|
83
|
+
ctx.lua = ug_lua_init(lua_fname);
|
84
|
+
|
85
|
+
ctx.flog = fopen(log_fname, "r");
|
86
|
+
if (!ctx.flog) {
|
87
|
+
perror("Couldn't open log file");
|
88
|
+
exit(1);
|
89
|
+
}
|
90
|
+
|
91
|
+
open_indexes(log_fname);
|
92
|
+
|
93
|
+
if (strcmp(log_fname + (strlen(log_fname) - 3), ".gz") == 0) {
|
94
|
+
build_gz_index(&ctx);
|
95
|
+
} else {
|
96
|
+
while (1) {
|
97
|
+
off_t offset;
|
98
|
+
offset = ftello(ctx.flog);
|
99
|
+
line_size = getline(&line, &allocated, ctx.flog);
|
100
|
+
|
101
|
+
if ( line_size < 0 )
|
102
|
+
break;
|
103
|
+
|
104
|
+
ug_process_line(ctx.lua, line, line_size, offset);
|
105
|
+
}
|
106
|
+
}
|
107
|
+
ug_lua_on_eof(ctx.lua);
|
108
|
+
exit(0);
|
109
|
+
}
|
data/src/ug_cat.c
ADDED
@@ -0,0 +1,188 @@
|
|
1
|
+
// ex: set softtabstop=4 shiftwidth=4 tabstop=4 expandtab:
|
2
|
+
|
3
|
+
#include <stdio.h>
|
4
|
+
#include <stdlib.h>
|
5
|
+
#include <string.h>
|
6
|
+
#include <libgen.h>
|
7
|
+
#include "ug_index.h"
|
8
|
+
#include "ug_gzip.h"
|
9
|
+
#include "zlib.h"
|
10
|
+
|
11
|
+
/* target_offset is the offset in the uncompressed stream we're looking for. */
|
12
|
+
void fill_gz_info(off_t target_offset, FILE * gz_index, unsigned char *dict_data, off_t * compressed_offset)
|
13
|
+
{
|
14
|
+
off_t uncompressed_offset = 0;
|
15
|
+
|
16
|
+
for (;;) {
|
17
|
+
if (!fread(&uncompressed_offset, sizeof(off_t), 1, gz_index))
|
18
|
+
break;
|
19
|
+
|
20
|
+
if (uncompressed_offset > target_offset) {
|
21
|
+
return;
|
22
|
+
}
|
23
|
+
|
24
|
+
if (!fread(compressed_offset, sizeof(off_t), 1, gz_index))
|
25
|
+
break;
|
26
|
+
|
27
|
+
if (!fread(dict_data, WINSIZE, 1, gz_index))
|
28
|
+
break;
|
29
|
+
}
|
30
|
+
return;
|
31
|
+
}
|
32
|
+
|
33
|
+
/* Use the index to read len bytes from offset into buf, return bytes read or
|
34
|
+
negative for error (Z_DATA_ERROR or Z_MEM_ERROR). If data is requested past
|
35
|
+
the end of the uncompressed data, then extract() will return a value less
|
36
|
+
than len, indicating how much as actually read into buf. This function
|
37
|
+
should not return a data error unless the file was modified since the index
|
38
|
+
was generated. extract() may also return Z_ERRNO if there is an error on
|
39
|
+
reading or seeking the input file. */
|
40
|
+
int ug_gzip_cat(FILE * in, uint64_t time, FILE * offset_index, FILE * gz_index)
|
41
|
+
{
|
42
|
+
int ret, bits;
|
43
|
+
off_t uncompressed_offset, compressed_offset;
|
44
|
+
z_stream strm;
|
45
|
+
unsigned char input[CHUNK];
|
46
|
+
unsigned char output[WINSIZE], dict[WINSIZE];
|
47
|
+
|
48
|
+
/* initialize file and inflate state to start there */
|
49
|
+
strm.zalloc = Z_NULL;
|
50
|
+
strm.zfree = Z_NULL;
|
51
|
+
strm.opaque = Z_NULL;
|
52
|
+
strm.avail_in = 0;
|
53
|
+
strm.next_in = Z_NULL;
|
54
|
+
|
55
|
+
|
56
|
+
bzero(dict, WINSIZE);
|
57
|
+
|
58
|
+
if (gz_index && offset_index) {
|
59
|
+
uncompressed_offset = ug_get_offset_for_timestamp(offset_index, time);
|
60
|
+
fill_gz_info(uncompressed_offset, gz_index, dict, &compressed_offset);
|
61
|
+
|
62
|
+
bits = compressed_offset >> 56;
|
63
|
+
compressed_offset = (compressed_offset & 0x00FFFFFFFFFFFFFF) - (bits ? 1 : 0);
|
64
|
+
|
65
|
+
ret = inflateInit2(&strm, -15); /* raw inflate */
|
66
|
+
if (ret != Z_OK)
|
67
|
+
return ret;
|
68
|
+
|
69
|
+
ret = fseeko(in, compressed_offset, SEEK_SET);
|
70
|
+
|
71
|
+
if (ret != Z_OK)
|
72
|
+
return ret;
|
73
|
+
} else {
|
74
|
+
compressed_offset = bits = 0;
|
75
|
+
strm.avail_in = fread(input, 1, CHUNK, in);
|
76
|
+
strm.next_in = input;
|
77
|
+
|
78
|
+
ret = inflateInit2(&strm, 47);
|
79
|
+
}
|
80
|
+
|
81
|
+
|
82
|
+
if (ret == -1)
|
83
|
+
goto extract_ret;
|
84
|
+
if (bits) {
|
85
|
+
ret = getc(in);
|
86
|
+
if (ret == -1) {
|
87
|
+
ret = ferror(in) ? Z_ERRNO : Z_DATA_ERROR;
|
88
|
+
goto extract_ret;
|
89
|
+
}
|
90
|
+
(void) inflatePrime(&strm, bits, ret >> (8 - bits));
|
91
|
+
}
|
92
|
+
|
93
|
+
if (compressed_offset > 0)
|
94
|
+
inflateSetDictionary(&strm, dict, WINSIZE);
|
95
|
+
|
96
|
+
for (;;) {
|
97
|
+
strm.avail_out = WINSIZE;
|
98
|
+
strm.next_out = output;
|
99
|
+
|
100
|
+
if (!strm.avail_in) {
|
101
|
+
strm.avail_in = fread(input, 1, CHUNK, in);
|
102
|
+
strm.next_in = input;
|
103
|
+
}
|
104
|
+
|
105
|
+
if (ferror(in)) {
|
106
|
+
ret = Z_ERRNO;
|
107
|
+
goto extract_ret;
|
108
|
+
}
|
109
|
+
|
110
|
+
if (strm.avail_in == 0) {
|
111
|
+
ret = Z_DATA_ERROR;
|
112
|
+
goto extract_ret;
|
113
|
+
}
|
114
|
+
|
115
|
+
ret = inflate(&strm, Z_NO_FLUSH); /* normal inflate */
|
116
|
+
|
117
|
+
if (ret == Z_NEED_DICT)
|
118
|
+
ret = Z_DATA_ERROR;
|
119
|
+
if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
|
120
|
+
goto extract_ret;
|
121
|
+
|
122
|
+
fwrite(output, WINSIZE - strm.avail_out, 1, stdout);
|
123
|
+
|
124
|
+
/* if reach end of stream, then don't keep trying to get more */
|
125
|
+
if (ret == Z_STREAM_END)
|
126
|
+
break;
|
127
|
+
}
|
128
|
+
|
129
|
+
/* clean up and return bytes read or error */
|
130
|
+
extract_ret:
|
131
|
+
(void) inflateEnd(&strm);
|
132
|
+
return ret;
|
133
|
+
}
|
134
|
+
/*
|
135
|
+
* ug_cat -- given a log file and (possibly) a file + (timestamp -> offset) index, cat the file starting
|
136
|
+
* from about that timestamp
|
137
|
+
*/
|
138
|
+
|
139
|
+
#define USAGE "Usage: ug_cat file timestamp\n"
|
140
|
+
|
141
|
+
int main(int argc, char **argv)
|
142
|
+
{
|
143
|
+
int nread;
|
144
|
+
FILE *log;
|
145
|
+
FILE *index;
|
146
|
+
char *log_fname, *index_fname, buf[4096];
|
147
|
+
|
148
|
+
if (argc < 3) {
|
149
|
+
fprintf(stderr, USAGE);
|
150
|
+
exit(1);
|
151
|
+
}
|
152
|
+
|
153
|
+
log_fname = argv[1];
|
154
|
+
|
155
|
+
log = fopen(log_fname, "r");
|
156
|
+
if (!log) {
|
157
|
+
perror("Couldn't open log file");
|
158
|
+
exit(1);
|
159
|
+
}
|
160
|
+
|
161
|
+
index_fname = ug_get_index_fname(log_fname, "idx");
|
162
|
+
|
163
|
+
index = fopen(index_fname, "r");
|
164
|
+
if (strcmp(log_fname + (strlen(log_fname) - 3), ".gz") == 0) {
|
165
|
+
char *gzidx_fname;
|
166
|
+
FILE *gzidx;
|
167
|
+
|
168
|
+
if (index) {
|
169
|
+
gzidx_fname = ug_get_index_fname(log_fname, "gzidx");
|
170
|
+
gzidx = fopen(gzidx_fname, "r");
|
171
|
+
if (!gzidx) {
|
172
|
+
perror("error opening gzidx component");
|
173
|
+
exit(1);
|
174
|
+
}
|
175
|
+
ug_gzip_cat(log, atol(argv[2]), index, gzidx);
|
176
|
+
|
177
|
+
} else {
|
178
|
+
ug_gzip_cat(log, atol(argv[2]), NULL, NULL);
|
179
|
+
|
180
|
+
}
|
181
|
+
} else {
|
182
|
+
if (index)
|
183
|
+
fseeko(log, ug_get_offset_for_timestamp(index, atol(argv[2])), SEEK_SET);
|
184
|
+
|
185
|
+
while ((nread = fread(buf, 1, 4096, log)))
|
186
|
+
fwrite(buf, 1, nread, stdout);
|
187
|
+
}
|
188
|
+
}
|
data/src/ug_guts.c
ADDED
@@ -0,0 +1,199 @@
|
|
1
|
+
// ex: set softtabstop=4 shiftwidth=4 tabstop=4 expandtab:
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <stdlib.h>
|
4
|
+
#include <string.h>
|
5
|
+
#include <time.h>
|
6
|
+
#include <unistd.h>
|
7
|
+
#include <lua.h>
|
8
|
+
#include "pcre.h"
|
9
|
+
#include "request.h"
|
10
|
+
#include "ug_lua.h"
|
11
|
+
|
12
|
+
struct ug_regexp {
|
13
|
+
int invert;
|
14
|
+
pcre *re;
|
15
|
+
};
|
16
|
+
|
17
|
+
typedef struct {
|
18
|
+
time_t start_time;
|
19
|
+
time_t end_time;
|
20
|
+
int num_regexps;
|
21
|
+
struct ug_regexp *regexps;
|
22
|
+
char *lua_file;
|
23
|
+
char *in_file;
|
24
|
+
} context_t;
|
25
|
+
|
26
|
+
static context_t ctx;
|
27
|
+
|
28
|
+
static const char* commandparams="l:s:e:k:f:";
|
29
|
+
static const char* usage ="Usage: ug_guts [-f input] -l file.lua -s start_time -e end_time regexps [... regexps]\n\n";
|
30
|
+
|
31
|
+
int parse_args(int argc, char **argv)
|
32
|
+
{
|
33
|
+
extern char *optarg;
|
34
|
+
extern int optind;
|
35
|
+
const char *error;
|
36
|
+
int erroffset, optValue=0, retValue=1, i;
|
37
|
+
ctx.start_time = -1;
|
38
|
+
ctx.end_time = -1;
|
39
|
+
ctx.lua_file = NULL;
|
40
|
+
|
41
|
+
while ((optValue = getopt(argc, argv, commandparams))!= -1) {
|
42
|
+
switch (optValue) {
|
43
|
+
case 'f':
|
44
|
+
ctx.in_file = strdup(optarg);
|
45
|
+
break;
|
46
|
+
case 'l':
|
47
|
+
ctx.lua_file = strdup(optarg);
|
48
|
+
break;
|
49
|
+
case 's':
|
50
|
+
ctx.start_time = atol(optarg);
|
51
|
+
break;
|
52
|
+
case 'e':
|
53
|
+
ctx.end_time = atol(optarg);
|
54
|
+
break;
|
55
|
+
case '?':
|
56
|
+
return(-1);
|
57
|
+
break;
|
58
|
+
case -1: //Options exhausted
|
59
|
+
break;
|
60
|
+
default:
|
61
|
+
return(-1);
|
62
|
+
}
|
63
|
+
}
|
64
|
+
if ( ctx.lua_file == NULL || ctx.start_time < 0 || ctx.end_time < 0 ) { // mandatory fields
|
65
|
+
return(-1);
|
66
|
+
}
|
67
|
+
else if ((optind + 1 ) > argc) { // Need at least one argument after options
|
68
|
+
return(-1);
|
69
|
+
}
|
70
|
+
|
71
|
+
if (optind < argc) { // regexps follow after command-line options
|
72
|
+
ctx.num_regexps = argc - optind;
|
73
|
+
ctx.regexps = malloc(sizeof(struct ug_regexp) * ctx.num_regexps);
|
74
|
+
bzero(ctx.regexps, sizeof(struct ug_regexp) * ctx.num_regexps);
|
75
|
+
|
76
|
+
for (i=0; optind < argc; ++optind, i++) {
|
77
|
+
char *p = argv[optind];
|
78
|
+
if ( p[0] == '!' || p[0] == '+' ) {
|
79
|
+
ctx.regexps[i].invert = p[0] == '!';
|
80
|
+
p++;
|
81
|
+
}
|
82
|
+
|
83
|
+
ctx.regexps[i].re = pcre_compile(p, 0, &error, &erroffset, NULL);
|
84
|
+
if (error) {
|
85
|
+
fprintf(stderr, "Error compiling regexp \"%s\": %s\n", argv[optind], error);
|
86
|
+
exit(1);
|
87
|
+
}
|
88
|
+
}
|
89
|
+
}
|
90
|
+
return retValue;
|
91
|
+
}
|
92
|
+
|
93
|
+
int check_request(char *request, struct ug_regexp *regexps, int num_regexps)
|
94
|
+
{
|
95
|
+
int j, matched, ovector[30];
|
96
|
+
|
97
|
+
for (j = 0; j < num_regexps; j++) {
|
98
|
+
matched = pcre_exec(regexps[j].re, NULL, request, strlen(request), 0, 0, ovector, 30);
|
99
|
+
if ( matched < 0 && !regexps[j].invert )
|
100
|
+
return 0;
|
101
|
+
else if ( matched >= 0 && regexps[j].invert )
|
102
|
+
return 0;
|
103
|
+
}
|
104
|
+
|
105
|
+
return 1;
|
106
|
+
}
|
107
|
+
|
108
|
+
void print_request(char *request)
|
109
|
+
{
|
110
|
+
int i, last_line_len = 0;
|
111
|
+
char *p;
|
112
|
+
|
113
|
+
printf("%s", request);
|
114
|
+
p = request + (strlen(request) - 1);
|
115
|
+
|
116
|
+
/* skip trailing newlines */
|
117
|
+
while ( p > request && (*p == '\n') )
|
118
|
+
p--;
|
119
|
+
|
120
|
+
while ( p > request && (*p != '\n') ) {
|
121
|
+
p--;
|
122
|
+
last_line_len++;
|
123
|
+
}
|
124
|
+
|
125
|
+
for (i = 0; i < (last_line_len - 1) && i < 80; i++)
|
126
|
+
putchar('-');
|
127
|
+
|
128
|
+
putchar('\n');
|
129
|
+
fflush(stdout);
|
130
|
+
}
|
131
|
+
|
132
|
+
|
133
|
+
void handle_request(request_t * req)
|
134
|
+
{
|
135
|
+
static time_t time = 0;
|
136
|
+
|
137
|
+
if (!req->time)
|
138
|
+
req->time = time;
|
139
|
+
|
140
|
+
if ((req->time >= ctx.start_time
|
141
|
+
&& req->time <= ctx.end_time
|
142
|
+
&& check_request(req->buf, ctx.regexps, ctx.num_regexps))) {
|
143
|
+
if (req->time != 0) {
|
144
|
+
printf("@@%lu\n", req->time);
|
145
|
+
}
|
146
|
+
print_request(req->buf);
|
147
|
+
}
|
148
|
+
/* print a time-marker every second -- allows collections of logs with one sparse
|
149
|
+
log to proceed */
|
150
|
+
if (req->time > time) {
|
151
|
+
time = req->time;
|
152
|
+
printf("@@%lu\n", time);
|
153
|
+
}
|
154
|
+
}
|
155
|
+
|
156
|
+
|
157
|
+
|
158
|
+
int main(int argc, char **argv)
|
159
|
+
{
|
160
|
+
lua_State *lua;
|
161
|
+
ssize_t line_size;
|
162
|
+
FILE *file = NULL;
|
163
|
+
char *line = NULL;
|
164
|
+
size_t allocated = 0, offset = 0;
|
165
|
+
if (argc < 5) {
|
166
|
+
fprintf(stderr, "%s", usage);
|
167
|
+
exit(1);
|
168
|
+
}
|
169
|
+
|
170
|
+
bzero(&ctx, sizeof(context_t));
|
171
|
+
if ( parse_args(argc, argv) == -1 ) {
|
172
|
+
fprintf(stderr, "%s", usage);
|
173
|
+
exit(1);
|
174
|
+
}
|
175
|
+
|
176
|
+
lua = ug_lua_init(ctx.lua_file);
|
177
|
+
if ( !lua )
|
178
|
+
exit(1);
|
179
|
+
|
180
|
+
if ( ctx.in_file ) {
|
181
|
+
file = fopen(ctx.in_file, "r");
|
182
|
+
if ( !file ) {
|
183
|
+
perror(ctx.in_file);
|
184
|
+
exit(1);
|
185
|
+
}
|
186
|
+
} else {
|
187
|
+
file = stdin;
|
188
|
+
}
|
189
|
+
|
190
|
+
while (1) {
|
191
|
+
line_size = getline(&line, &allocated, file);
|
192
|
+
if ( line_size < 0 )
|
193
|
+
break;
|
194
|
+
|
195
|
+
ug_process_line(lua, line, line_size, offset);
|
196
|
+
offset += line_size;
|
197
|
+
}
|
198
|
+
ug_lua_on_eof(lua);
|
199
|
+
}
|