ultragrep 0.1.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/ultragrep_build_indexes +45 -0
- data/lib/ultragrep.rb +47 -61
- data/lib/ultragrep/config.rb +6 -0
- data/lib/ultragrep/log_collector.rb +67 -0
- data/lib/ultragrep/version.rb +1 -1
- data/src/Makefile +24 -0
- data/{ext/ultragrep → src}/extconf.rb +0 -0
- data/src/pcre.h +668 -0
- data/src/request.h +13 -0
- data/src/ug_build_index.c +109 -0
- data/src/ug_cat.c +188 -0
- data/src/ug_guts.c +199 -0
- data/src/ug_gzip.c +242 -0
- data/src/ug_gzip.h +8 -0
- data/src/ug_index.c +62 -0
- data/src/ug_index.h +23 -0
- data/src/ug_lua.c +119 -0
- data/src/ug_lua.h +10 -0
- metadata +25 -28
- data/ext/ultragrep/Makefile +0 -39
- data/ext/ultragrep/rails_req.c +0 -102
- data/ext/ultragrep/rails_req.h +0 -6
- data/ext/ultragrep/req_matcher.h +0 -17
- data/ext/ultragrep/request.c +0 -41
- data/ext/ultragrep/request.h +0 -22
- data/ext/ultragrep/ug_build_index.c +0 -99
- data/ext/ultragrep/ug_cat.c +0 -46
- data/ext/ultragrep/ug_guts.c +0 -138
- data/ext/ultragrep/ug_index.c +0 -83
- data/ext/ultragrep/ug_index.h +0 -27
- data/ext/ultragrep/work_req.c +0 -200
- data/ext/ultragrep/work_req.h +0 -6
- data/ext/ultragrep/zran.c +0 -291
data/src/request.h
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
// ex: set softtabstop=4 shiftwidth=4 tabstop=4 expandtab:
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <stdlib.h>
|
4
|
+
#include <getopt.h>
|
5
|
+
#include <string.h>
|
6
|
+
#include <errno.h>
|
7
|
+
#include <time.h>
|
8
|
+
#include <unistd.h>
|
9
|
+
#include "pcre.h"
|
10
|
+
#include "request.h"
|
11
|
+
#include "ug_index.h"
|
12
|
+
#include "ug_lua.h"
|
13
|
+
#include "ug_gzip.h"
|
14
|
+
|
15
|
+
#define USAGE "Usage: ug_build_index process.lua file\n"
|
16
|
+
|
17
|
+
// index file format
|
18
|
+
// [64bit,64bit] -- timestamp, file offset
|
19
|
+
// [32bit, Nbytes] -- extra data
|
20
|
+
|
21
|
+
static build_idx_context_t ctx;
|
22
|
+
|
23
|
+
|
24
|
+
void handle_request(request_t *req)
|
25
|
+
{
|
26
|
+
time_t floored_time;
|
27
|
+
floored_time = req->time - (req->time % INDEX_EVERY);
|
28
|
+
if (!ctx.last_index_time || floored_time > ctx.last_index_time) {
|
29
|
+
ug_write_index(ctx.findex, floored_time, req->offset);
|
30
|
+
ctx.last_index_time = floored_time;
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
34
|
+
void open_indexes(char *log_fname)
|
35
|
+
{
|
36
|
+
char *index_fname, *gz_index_fname;
|
37
|
+
|
38
|
+
index_fname = ug_get_index_fname(log_fname, "idx");
|
39
|
+
|
40
|
+
if (strcmp(log_fname + (strlen(log_fname) - 3), ".gz") == 0) {
|
41
|
+
gz_index_fname = ug_get_index_fname(log_fname, "gzidx");
|
42
|
+
/* we don't do incremental index building in gzipped files -- we just truncate and
|
43
|
+
* build over*/
|
44
|
+
ctx.findex = fopen(index_fname, "w+");
|
45
|
+
ctx.fgzindex = fopen(gz_index_fname, "w+");
|
46
|
+
|
47
|
+
if (!ctx.findex || !ctx.fgzindex) {
|
48
|
+
fprintf(stderr, "Couldn't open index files '%s','%s': %s\n", index_fname, gz_index_fname, strerror(errno));
|
49
|
+
exit(1);
|
50
|
+
}
|
51
|
+
} else {
|
52
|
+
ctx.findex = fopen(index_fname, "r+");
|
53
|
+
if (ctx.findex) {
|
54
|
+
/* seek in the log, (and the index, with get_offset_for_timestamp()) to the
|
55
|
+
* last timestamp we indexed */
|
56
|
+
fseeko(ctx.flog, ug_get_offset_for_timestamp(ctx.findex, -1), SEEK_SET);
|
57
|
+
} else {
|
58
|
+
ctx.findex = fopen(index_fname, "w+");
|
59
|
+
}
|
60
|
+
if (!ctx.findex) {
|
61
|
+
fprintf(stderr, "Couldn't open index file '%s': %s\n", index_fname, strerror(errno));
|
62
|
+
exit(1);
|
63
|
+
}
|
64
|
+
}
|
65
|
+
}
|
66
|
+
|
67
|
+
int main(int argc, char **argv)
|
68
|
+
{
|
69
|
+
char *line = NULL, *lua_fname, *log_fname;
|
70
|
+
ssize_t line_size;
|
71
|
+
size_t allocated;
|
72
|
+
|
73
|
+
if (argc < 3) {
|
74
|
+
fprintf(stderr, USAGE);
|
75
|
+
exit(1);
|
76
|
+
}
|
77
|
+
|
78
|
+
lua_fname = argv[1];
|
79
|
+
log_fname = argv[2];
|
80
|
+
|
81
|
+
bzero(&ctx, sizeof(build_idx_context_t));
|
82
|
+
|
83
|
+
ctx.lua = ug_lua_init(lua_fname);
|
84
|
+
|
85
|
+
ctx.flog = fopen(log_fname, "r");
|
86
|
+
if (!ctx.flog) {
|
87
|
+
perror("Couldn't open log file");
|
88
|
+
exit(1);
|
89
|
+
}
|
90
|
+
|
91
|
+
open_indexes(log_fname);
|
92
|
+
|
93
|
+
if (strcmp(log_fname + (strlen(log_fname) - 3), ".gz") == 0) {
|
94
|
+
build_gz_index(&ctx);
|
95
|
+
} else {
|
96
|
+
while (1) {
|
97
|
+
off_t offset;
|
98
|
+
offset = ftello(ctx.flog);
|
99
|
+
line_size = getline(&line, &allocated, ctx.flog);
|
100
|
+
|
101
|
+
if ( line_size < 0 )
|
102
|
+
break;
|
103
|
+
|
104
|
+
ug_process_line(ctx.lua, line, line_size, offset);
|
105
|
+
}
|
106
|
+
}
|
107
|
+
ug_lua_on_eof(ctx.lua);
|
108
|
+
exit(0);
|
109
|
+
}
|
data/src/ug_cat.c
ADDED
@@ -0,0 +1,188 @@
|
|
1
|
+
// ex: set softtabstop=4 shiftwidth=4 tabstop=4 expandtab:
|
2
|
+
|
3
|
+
#include <stdio.h>
|
4
|
+
#include <stdlib.h>
|
5
|
+
#include <string.h>
|
6
|
+
#include <libgen.h>
|
7
|
+
#include "ug_index.h"
|
8
|
+
#include "ug_gzip.h"
|
9
|
+
#include "zlib.h"
|
10
|
+
|
11
|
+
/* target_offset is the offset in the uncompressed stream we're looking for. */
|
12
|
+
void fill_gz_info(off_t target_offset, FILE * gz_index, unsigned char *dict_data, off_t * compressed_offset)
|
13
|
+
{
|
14
|
+
off_t uncompressed_offset = 0;
|
15
|
+
|
16
|
+
for (;;) {
|
17
|
+
if (!fread(&uncompressed_offset, sizeof(off_t), 1, gz_index))
|
18
|
+
break;
|
19
|
+
|
20
|
+
if (uncompressed_offset > target_offset) {
|
21
|
+
return;
|
22
|
+
}
|
23
|
+
|
24
|
+
if (!fread(compressed_offset, sizeof(off_t), 1, gz_index))
|
25
|
+
break;
|
26
|
+
|
27
|
+
if (!fread(dict_data, WINSIZE, 1, gz_index))
|
28
|
+
break;
|
29
|
+
}
|
30
|
+
return;
|
31
|
+
}
|
32
|
+
|
33
|
+
/* Use the index to read len bytes from offset into buf, return bytes read or
|
34
|
+
negative for error (Z_DATA_ERROR or Z_MEM_ERROR). If data is requested past
|
35
|
+
the end of the uncompressed data, then extract() will return a value less
|
36
|
+
than len, indicating how much as actually read into buf. This function
|
37
|
+
should not return a data error unless the file was modified since the index
|
38
|
+
was generated. extract() may also return Z_ERRNO if there is an error on
|
39
|
+
reading or seeking the input file. */
|
40
|
+
int ug_gzip_cat(FILE * in, uint64_t time, FILE * offset_index, FILE * gz_index)
|
41
|
+
{
|
42
|
+
int ret, bits;
|
43
|
+
off_t uncompressed_offset, compressed_offset;
|
44
|
+
z_stream strm;
|
45
|
+
unsigned char input[CHUNK];
|
46
|
+
unsigned char output[WINSIZE], dict[WINSIZE];
|
47
|
+
|
48
|
+
/* initialize file and inflate state to start there */
|
49
|
+
strm.zalloc = Z_NULL;
|
50
|
+
strm.zfree = Z_NULL;
|
51
|
+
strm.opaque = Z_NULL;
|
52
|
+
strm.avail_in = 0;
|
53
|
+
strm.next_in = Z_NULL;
|
54
|
+
|
55
|
+
|
56
|
+
bzero(dict, WINSIZE);
|
57
|
+
|
58
|
+
if (gz_index && offset_index) {
|
59
|
+
uncompressed_offset = ug_get_offset_for_timestamp(offset_index, time);
|
60
|
+
fill_gz_info(uncompressed_offset, gz_index, dict, &compressed_offset);
|
61
|
+
|
62
|
+
bits = compressed_offset >> 56;
|
63
|
+
compressed_offset = (compressed_offset & 0x00FFFFFFFFFFFFFF) - (bits ? 1 : 0);
|
64
|
+
|
65
|
+
ret = inflateInit2(&strm, -15); /* raw inflate */
|
66
|
+
if (ret != Z_OK)
|
67
|
+
return ret;
|
68
|
+
|
69
|
+
ret = fseeko(in, compressed_offset, SEEK_SET);
|
70
|
+
|
71
|
+
if (ret != Z_OK)
|
72
|
+
return ret;
|
73
|
+
} else {
|
74
|
+
compressed_offset = bits = 0;
|
75
|
+
strm.avail_in = fread(input, 1, CHUNK, in);
|
76
|
+
strm.next_in = input;
|
77
|
+
|
78
|
+
ret = inflateInit2(&strm, 47);
|
79
|
+
}
|
80
|
+
|
81
|
+
|
82
|
+
if (ret == -1)
|
83
|
+
goto extract_ret;
|
84
|
+
if (bits) {
|
85
|
+
ret = getc(in);
|
86
|
+
if (ret == -1) {
|
87
|
+
ret = ferror(in) ? Z_ERRNO : Z_DATA_ERROR;
|
88
|
+
goto extract_ret;
|
89
|
+
}
|
90
|
+
(void) inflatePrime(&strm, bits, ret >> (8 - bits));
|
91
|
+
}
|
92
|
+
|
93
|
+
if (compressed_offset > 0)
|
94
|
+
inflateSetDictionary(&strm, dict, WINSIZE);
|
95
|
+
|
96
|
+
for (;;) {
|
97
|
+
strm.avail_out = WINSIZE;
|
98
|
+
strm.next_out = output;
|
99
|
+
|
100
|
+
if (!strm.avail_in) {
|
101
|
+
strm.avail_in = fread(input, 1, CHUNK, in);
|
102
|
+
strm.next_in = input;
|
103
|
+
}
|
104
|
+
|
105
|
+
if (ferror(in)) {
|
106
|
+
ret = Z_ERRNO;
|
107
|
+
goto extract_ret;
|
108
|
+
}
|
109
|
+
|
110
|
+
if (strm.avail_in == 0) {
|
111
|
+
ret = Z_DATA_ERROR;
|
112
|
+
goto extract_ret;
|
113
|
+
}
|
114
|
+
|
115
|
+
ret = inflate(&strm, Z_NO_FLUSH); /* normal inflate */
|
116
|
+
|
117
|
+
if (ret == Z_NEED_DICT)
|
118
|
+
ret = Z_DATA_ERROR;
|
119
|
+
if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
|
120
|
+
goto extract_ret;
|
121
|
+
|
122
|
+
fwrite(output, WINSIZE - strm.avail_out, 1, stdout);
|
123
|
+
|
124
|
+
/* if reach end of stream, then don't keep trying to get more */
|
125
|
+
if (ret == Z_STREAM_END)
|
126
|
+
break;
|
127
|
+
}
|
128
|
+
|
129
|
+
/* clean up and return bytes read or error */
|
130
|
+
extract_ret:
|
131
|
+
(void) inflateEnd(&strm);
|
132
|
+
return ret;
|
133
|
+
}
|
134
|
+
/*
|
135
|
+
* ug_cat -- given a log file and (possibly) a file + (timestamp -> offset) index, cat the file starting
|
136
|
+
* from about that timestamp
|
137
|
+
*/
|
138
|
+
|
139
|
+
#define USAGE "Usage: ug_cat file timestamp\n"
|
140
|
+
|
141
|
+
int main(int argc, char **argv)
|
142
|
+
{
|
143
|
+
int nread;
|
144
|
+
FILE *log;
|
145
|
+
FILE *index;
|
146
|
+
char *log_fname, *index_fname, buf[4096];
|
147
|
+
|
148
|
+
if (argc < 3) {
|
149
|
+
fprintf(stderr, USAGE);
|
150
|
+
exit(1);
|
151
|
+
}
|
152
|
+
|
153
|
+
log_fname = argv[1];
|
154
|
+
|
155
|
+
log = fopen(log_fname, "r");
|
156
|
+
if (!log) {
|
157
|
+
perror("Couldn't open log file");
|
158
|
+
exit(1);
|
159
|
+
}
|
160
|
+
|
161
|
+
index_fname = ug_get_index_fname(log_fname, "idx");
|
162
|
+
|
163
|
+
index = fopen(index_fname, "r");
|
164
|
+
if (strcmp(log_fname + (strlen(log_fname) - 3), ".gz") == 0) {
|
165
|
+
char *gzidx_fname;
|
166
|
+
FILE *gzidx;
|
167
|
+
|
168
|
+
if (index) {
|
169
|
+
gzidx_fname = ug_get_index_fname(log_fname, "gzidx");
|
170
|
+
gzidx = fopen(gzidx_fname, "r");
|
171
|
+
if (!gzidx) {
|
172
|
+
perror("error opening gzidx component");
|
173
|
+
exit(1);
|
174
|
+
}
|
175
|
+
ug_gzip_cat(log, atol(argv[2]), index, gzidx);
|
176
|
+
|
177
|
+
} else {
|
178
|
+
ug_gzip_cat(log, atol(argv[2]), NULL, NULL);
|
179
|
+
|
180
|
+
}
|
181
|
+
} else {
|
182
|
+
if (index)
|
183
|
+
fseeko(log, ug_get_offset_for_timestamp(index, atol(argv[2])), SEEK_SET);
|
184
|
+
|
185
|
+
while ((nread = fread(buf, 1, 4096, log)))
|
186
|
+
fwrite(buf, 1, nread, stdout);
|
187
|
+
}
|
188
|
+
}
|
data/src/ug_guts.c
ADDED
@@ -0,0 +1,199 @@
|
|
1
|
+
// ex: set softtabstop=4 shiftwidth=4 tabstop=4 expandtab:
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <stdlib.h>
|
4
|
+
#include <string.h>
|
5
|
+
#include <time.h>
|
6
|
+
#include <unistd.h>
|
7
|
+
#include <lua.h>
|
8
|
+
#include "pcre.h"
|
9
|
+
#include "request.h"
|
10
|
+
#include "ug_lua.h"
|
11
|
+
|
12
|
+
struct ug_regexp {
|
13
|
+
int invert;
|
14
|
+
pcre *re;
|
15
|
+
};
|
16
|
+
|
17
|
+
typedef struct {
|
18
|
+
time_t start_time;
|
19
|
+
time_t end_time;
|
20
|
+
int num_regexps;
|
21
|
+
struct ug_regexp *regexps;
|
22
|
+
char *lua_file;
|
23
|
+
char *in_file;
|
24
|
+
} context_t;
|
25
|
+
|
26
|
+
static context_t ctx;
|
27
|
+
|
28
|
+
static const char* commandparams="l:s:e:k:f:";
|
29
|
+
static const char* usage ="Usage: ug_guts [-f input] -l file.lua -s start_time -e end_time regexps [... regexps]\n\n";
|
30
|
+
|
31
|
+
int parse_args(int argc, char **argv)
|
32
|
+
{
|
33
|
+
extern char *optarg;
|
34
|
+
extern int optind;
|
35
|
+
const char *error;
|
36
|
+
int erroffset, optValue=0, retValue=1, i;
|
37
|
+
ctx.start_time = -1;
|
38
|
+
ctx.end_time = -1;
|
39
|
+
ctx.lua_file = NULL;
|
40
|
+
|
41
|
+
while ((optValue = getopt(argc, argv, commandparams))!= -1) {
|
42
|
+
switch (optValue) {
|
43
|
+
case 'f':
|
44
|
+
ctx.in_file = strdup(optarg);
|
45
|
+
break;
|
46
|
+
case 'l':
|
47
|
+
ctx.lua_file = strdup(optarg);
|
48
|
+
break;
|
49
|
+
case 's':
|
50
|
+
ctx.start_time = atol(optarg);
|
51
|
+
break;
|
52
|
+
case 'e':
|
53
|
+
ctx.end_time = atol(optarg);
|
54
|
+
break;
|
55
|
+
case '?':
|
56
|
+
return(-1);
|
57
|
+
break;
|
58
|
+
case -1: //Options exhausted
|
59
|
+
break;
|
60
|
+
default:
|
61
|
+
return(-1);
|
62
|
+
}
|
63
|
+
}
|
64
|
+
if ( ctx.lua_file == NULL || ctx.start_time < 0 || ctx.end_time < 0 ) { // mandatory fields
|
65
|
+
return(-1);
|
66
|
+
}
|
67
|
+
else if ((optind + 1 ) > argc) { // Need at least one argument after options
|
68
|
+
return(-1);
|
69
|
+
}
|
70
|
+
|
71
|
+
if (optind < argc) { // regexps follow after command-line options
|
72
|
+
ctx.num_regexps = argc - optind;
|
73
|
+
ctx.regexps = malloc(sizeof(struct ug_regexp) * ctx.num_regexps);
|
74
|
+
bzero(ctx.regexps, sizeof(struct ug_regexp) * ctx.num_regexps);
|
75
|
+
|
76
|
+
for (i=0; optind < argc; ++optind, i++) {
|
77
|
+
char *p = argv[optind];
|
78
|
+
if ( p[0] == '!' || p[0] == '+' ) {
|
79
|
+
ctx.regexps[i].invert = p[0] == '!';
|
80
|
+
p++;
|
81
|
+
}
|
82
|
+
|
83
|
+
ctx.regexps[i].re = pcre_compile(p, 0, &error, &erroffset, NULL);
|
84
|
+
if (error) {
|
85
|
+
fprintf(stderr, "Error compiling regexp \"%s\": %s\n", argv[optind], error);
|
86
|
+
exit(1);
|
87
|
+
}
|
88
|
+
}
|
89
|
+
}
|
90
|
+
return retValue;
|
91
|
+
}
|
92
|
+
|
93
|
+
int check_request(char *request, struct ug_regexp *regexps, int num_regexps)
|
94
|
+
{
|
95
|
+
int j, matched, ovector[30];
|
96
|
+
|
97
|
+
for (j = 0; j < num_regexps; j++) {
|
98
|
+
matched = pcre_exec(regexps[j].re, NULL, request, strlen(request), 0, 0, ovector, 30);
|
99
|
+
if ( matched < 0 && !regexps[j].invert )
|
100
|
+
return 0;
|
101
|
+
else if ( matched >= 0 && regexps[j].invert )
|
102
|
+
return 0;
|
103
|
+
}
|
104
|
+
|
105
|
+
return 1;
|
106
|
+
}
|
107
|
+
|
108
|
+
void print_request(char *request)
|
109
|
+
{
|
110
|
+
int i, last_line_len = 0;
|
111
|
+
char *p;
|
112
|
+
|
113
|
+
printf("%s", request);
|
114
|
+
p = request + (strlen(request) - 1);
|
115
|
+
|
116
|
+
/* skip trailing newlines */
|
117
|
+
while ( p > request && (*p == '\n') )
|
118
|
+
p--;
|
119
|
+
|
120
|
+
while ( p > request && (*p != '\n') ) {
|
121
|
+
p--;
|
122
|
+
last_line_len++;
|
123
|
+
}
|
124
|
+
|
125
|
+
for (i = 0; i < (last_line_len - 1) && i < 80; i++)
|
126
|
+
putchar('-');
|
127
|
+
|
128
|
+
putchar('\n');
|
129
|
+
fflush(stdout);
|
130
|
+
}
|
131
|
+
|
132
|
+
|
133
|
+
void handle_request(request_t * req)
|
134
|
+
{
|
135
|
+
static time_t time = 0;
|
136
|
+
|
137
|
+
if (!req->time)
|
138
|
+
req->time = time;
|
139
|
+
|
140
|
+
if ((req->time >= ctx.start_time
|
141
|
+
&& req->time <= ctx.end_time
|
142
|
+
&& check_request(req->buf, ctx.regexps, ctx.num_regexps))) {
|
143
|
+
if (req->time != 0) {
|
144
|
+
printf("@@%lu\n", req->time);
|
145
|
+
}
|
146
|
+
print_request(req->buf);
|
147
|
+
}
|
148
|
+
/* print a time-marker every second -- allows collections of logs with one sparse
|
149
|
+
log to proceed */
|
150
|
+
if (req->time > time) {
|
151
|
+
time = req->time;
|
152
|
+
printf("@@%lu\n", time);
|
153
|
+
}
|
154
|
+
}
|
155
|
+
|
156
|
+
|
157
|
+
|
158
|
+
int main(int argc, char **argv)
|
159
|
+
{
|
160
|
+
lua_State *lua;
|
161
|
+
ssize_t line_size;
|
162
|
+
FILE *file = NULL;
|
163
|
+
char *line = NULL;
|
164
|
+
size_t allocated = 0, offset = 0;
|
165
|
+
if (argc < 5) {
|
166
|
+
fprintf(stderr, "%s", usage);
|
167
|
+
exit(1);
|
168
|
+
}
|
169
|
+
|
170
|
+
bzero(&ctx, sizeof(context_t));
|
171
|
+
if ( parse_args(argc, argv) == -1 ) {
|
172
|
+
fprintf(stderr, "%s", usage);
|
173
|
+
exit(1);
|
174
|
+
}
|
175
|
+
|
176
|
+
lua = ug_lua_init(ctx.lua_file);
|
177
|
+
if ( !lua )
|
178
|
+
exit(1);
|
179
|
+
|
180
|
+
if ( ctx.in_file ) {
|
181
|
+
file = fopen(ctx.in_file, "r");
|
182
|
+
if ( !file ) {
|
183
|
+
perror(ctx.in_file);
|
184
|
+
exit(1);
|
185
|
+
}
|
186
|
+
} else {
|
187
|
+
file = stdin;
|
188
|
+
}
|
189
|
+
|
190
|
+
while (1) {
|
191
|
+
line_size = getline(&line, &allocated, file);
|
192
|
+
if ( line_size < 0 )
|
193
|
+
break;
|
194
|
+
|
195
|
+
ug_process_line(lua, line, line_size, offset);
|
196
|
+
offset += line_size;
|
197
|
+
}
|
198
|
+
ug_lua_on_eof(lua);
|
199
|
+
}
|