ultragrep 0.1.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,13 @@
1
+ #ifndef __REQUEST_H__
2
+ #define __REQUEST_H__
3
+
4
+ #include <time.h>
5
+
6
+ typedef struct request_t {
7
+ char *buf;
8
+ off_t offset;
9
+ time_t time;
10
+ } request_t;
11
+
12
+ void handle_request(request_t * req);
13
+ #endif //__REQUEST_H__
@@ -0,0 +1,109 @@
1
+ // ex: set softtabstop=4 shiftwidth=4 tabstop=4 expandtab:
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <getopt.h>
5
+ #include <string.h>
6
+ #include <errno.h>
7
+ #include <time.h>
8
+ #include <unistd.h>
9
+ #include "pcre.h"
10
+ #include "request.h"
11
+ #include "ug_index.h"
12
+ #include "ug_lua.h"
13
+ #include "ug_gzip.h"
14
+
15
+ #define USAGE "Usage: ug_build_index process.lua file\n"
16
+
17
+ // index file format
18
+ // [64bit,64bit] -- timestamp, file offset
19
+ // [32bit, Nbytes] -- extra data
20
+
21
+ static build_idx_context_t ctx;
22
+
23
+
24
+ void handle_request(request_t *req)
25
+ {
26
+ time_t floored_time;
27
+ floored_time = req->time - (req->time % INDEX_EVERY);
28
+ if (!ctx.last_index_time || floored_time > ctx.last_index_time) {
29
+ ug_write_index(ctx.findex, floored_time, req->offset);
30
+ ctx.last_index_time = floored_time;
31
+ }
32
+ }
33
+
34
+ void open_indexes(char *log_fname)
35
+ {
36
+ char *index_fname, *gz_index_fname;
37
+
38
+ index_fname = ug_get_index_fname(log_fname, "idx");
39
+
40
+ if (strcmp(log_fname + (strlen(log_fname) - 3), ".gz") == 0) {
41
+ gz_index_fname = ug_get_index_fname(log_fname, "gzidx");
42
+ /* we don't do incremental index building in gzipped files -- we just truncate and
43
+ * build over*/
44
+ ctx.findex = fopen(index_fname, "w+");
45
+ ctx.fgzindex = fopen(gz_index_fname, "w+");
46
+
47
+ if (!ctx.findex || !ctx.fgzindex) {
48
+ fprintf(stderr, "Couldn't open index files '%s','%s': %s\n", index_fname, gz_index_fname, strerror(errno));
49
+ exit(1);
50
+ }
51
+ } else {
52
+ ctx.findex = fopen(index_fname, "r+");
53
+ if (ctx.findex) {
54
+ /* seek in the log, (and the index, with get_offset_for_timestamp()) to the
55
+ * last timestamp we indexed */
56
+ fseeko(ctx.flog, ug_get_offset_for_timestamp(ctx.findex, -1), SEEK_SET);
57
+ } else {
58
+ ctx.findex = fopen(index_fname, "w+");
59
+ }
60
+ if (!ctx.findex) {
61
+ fprintf(stderr, "Couldn't open index file '%s': %s\n", index_fname, strerror(errno));
62
+ exit(1);
63
+ }
64
+ }
65
+ }
66
+
67
+ int main(int argc, char **argv)
68
+ {
69
+ char *line = NULL, *lua_fname, *log_fname;
70
+ ssize_t line_size;
71
+ size_t allocated;
72
+
73
+ if (argc < 3) {
74
+ fprintf(stderr, USAGE);
75
+ exit(1);
76
+ }
77
+
78
+ lua_fname = argv[1];
79
+ log_fname = argv[2];
80
+
81
+ bzero(&ctx, sizeof(build_idx_context_t));
82
+
83
+ ctx.lua = ug_lua_init(lua_fname);
84
+
85
+ ctx.flog = fopen(log_fname, "r");
86
+ if (!ctx.flog) {
87
+ perror("Couldn't open log file");
88
+ exit(1);
89
+ }
90
+
91
+ open_indexes(log_fname);
92
+
93
+ if (strcmp(log_fname + (strlen(log_fname) - 3), ".gz") == 0) {
94
+ build_gz_index(&ctx);
95
+ } else {
96
+ while (1) {
97
+ off_t offset;
98
+ offset = ftello(ctx.flog);
99
+ line_size = getline(&line, &allocated, ctx.flog);
100
+
101
+ if ( line_size < 0 )
102
+ break;
103
+
104
+ ug_process_line(ctx.lua, line, line_size, offset);
105
+ }
106
+ }
107
+ ug_lua_on_eof(ctx.lua);
108
+ exit(0);
109
+ }
@@ -0,0 +1,188 @@
1
+ // ex: set softtabstop=4 shiftwidth=4 tabstop=4 expandtab:
2
+
3
+ #include <stdio.h>
4
+ #include <stdlib.h>
5
+ #include <string.h>
6
+ #include <libgen.h>
7
+ #include "ug_index.h"
8
+ #include "ug_gzip.h"
9
+ #include "zlib.h"
10
+
11
+ /* target_offset is the offset in the uncompressed stream we're looking for. */
12
+ void fill_gz_info(off_t target_offset, FILE * gz_index, unsigned char *dict_data, off_t * compressed_offset)
13
+ {
14
+ off_t uncompressed_offset = 0;
15
+
16
+ for (;;) {
17
+ if (!fread(&uncompressed_offset, sizeof(off_t), 1, gz_index))
18
+ break;
19
+
20
+ if (uncompressed_offset > target_offset) {
21
+ return;
22
+ }
23
+
24
+ if (!fread(compressed_offset, sizeof(off_t), 1, gz_index))
25
+ break;
26
+
27
+ if (!fread(dict_data, WINSIZE, 1, gz_index))
28
+ break;
29
+ }
30
+ return;
31
+ }
32
+
33
+ /* Use the index to read len bytes from offset into buf, return bytes read or
34
+ negative for error (Z_DATA_ERROR or Z_MEM_ERROR). If data is requested past
35
+ the end of the uncompressed data, then extract() will return a value less
36
+ than len, indicating how much as actually read into buf. This function
37
+ should not return a data error unless the file was modified since the index
38
+ was generated. extract() may also return Z_ERRNO if there is an error on
39
+ reading or seeking the input file. */
40
+ int ug_gzip_cat(FILE * in, uint64_t time, FILE * offset_index, FILE * gz_index)
41
+ {
42
+ int ret, bits;
43
+ off_t uncompressed_offset, compressed_offset;
44
+ z_stream strm;
45
+ unsigned char input[CHUNK];
46
+ unsigned char output[WINSIZE], dict[WINSIZE];
47
+
48
+ /* initialize file and inflate state to start there */
49
+ strm.zalloc = Z_NULL;
50
+ strm.zfree = Z_NULL;
51
+ strm.opaque = Z_NULL;
52
+ strm.avail_in = 0;
53
+ strm.next_in = Z_NULL;
54
+
55
+
56
+ bzero(dict, WINSIZE);
57
+
58
+ if (gz_index && offset_index) {
59
+ uncompressed_offset = ug_get_offset_for_timestamp(offset_index, time);
60
+ fill_gz_info(uncompressed_offset, gz_index, dict, &compressed_offset);
61
+
62
+ bits = compressed_offset >> 56;
63
+ compressed_offset = (compressed_offset & 0x00FFFFFFFFFFFFFF) - (bits ? 1 : 0);
64
+
65
+ ret = inflateInit2(&strm, -15); /* raw inflate */
66
+ if (ret != Z_OK)
67
+ return ret;
68
+
69
+ ret = fseeko(in, compressed_offset, SEEK_SET);
70
+
71
+ if (ret != Z_OK)
72
+ return ret;
73
+ } else {
74
+ compressed_offset = bits = 0;
75
+ strm.avail_in = fread(input, 1, CHUNK, in);
76
+ strm.next_in = input;
77
+
78
+ ret = inflateInit2(&strm, 47);
79
+ }
80
+
81
+
82
+ if (ret == -1)
83
+ goto extract_ret;
84
+ if (bits) {
85
+ ret = getc(in);
86
+ if (ret == -1) {
87
+ ret = ferror(in) ? Z_ERRNO : Z_DATA_ERROR;
88
+ goto extract_ret;
89
+ }
90
+ (void) inflatePrime(&strm, bits, ret >> (8 - bits));
91
+ }
92
+
93
+ if (compressed_offset > 0)
94
+ inflateSetDictionary(&strm, dict, WINSIZE);
95
+
96
+ for (;;) {
97
+ strm.avail_out = WINSIZE;
98
+ strm.next_out = output;
99
+
100
+ if (!strm.avail_in) {
101
+ strm.avail_in = fread(input, 1, CHUNK, in);
102
+ strm.next_in = input;
103
+ }
104
+
105
+ if (ferror(in)) {
106
+ ret = Z_ERRNO;
107
+ goto extract_ret;
108
+ }
109
+
110
+ if (strm.avail_in == 0) {
111
+ ret = Z_DATA_ERROR;
112
+ goto extract_ret;
113
+ }
114
+
115
+ ret = inflate(&strm, Z_NO_FLUSH); /* normal inflate */
116
+
117
+ if (ret == Z_NEED_DICT)
118
+ ret = Z_DATA_ERROR;
119
+ if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
120
+ goto extract_ret;
121
+
122
+ fwrite(output, WINSIZE - strm.avail_out, 1, stdout);
123
+
124
+ /* if reach end of stream, then don't keep trying to get more */
125
+ if (ret == Z_STREAM_END)
126
+ break;
127
+ }
128
+
129
+ /* clean up and return bytes read or error */
130
+ extract_ret:
131
+ (void) inflateEnd(&strm);
132
+ return ret;
133
+ }
134
+ /*
135
+ * ug_cat -- given a log file and (possibly) a file + (timestamp -> offset) index, cat the file starting
136
+ * from about that timestamp
137
+ */
138
+
139
+ #define USAGE "Usage: ug_cat file timestamp\n"
140
+
141
+ int main(int argc, char **argv)
142
+ {
143
+ int nread;
144
+ FILE *log;
145
+ FILE *index;
146
+ char *log_fname, *index_fname, buf[4096];
147
+
148
+ if (argc < 3) {
149
+ fprintf(stderr, USAGE);
150
+ exit(1);
151
+ }
152
+
153
+ log_fname = argv[1];
154
+
155
+ log = fopen(log_fname, "r");
156
+ if (!log) {
157
+ perror("Couldn't open log file");
158
+ exit(1);
159
+ }
160
+
161
+ index_fname = ug_get_index_fname(log_fname, "idx");
162
+
163
+ index = fopen(index_fname, "r");
164
+ if (strcmp(log_fname + (strlen(log_fname) - 3), ".gz") == 0) {
165
+ char *gzidx_fname;
166
+ FILE *gzidx;
167
+
168
+ if (index) {
169
+ gzidx_fname = ug_get_index_fname(log_fname, "gzidx");
170
+ gzidx = fopen(gzidx_fname, "r");
171
+ if (!gzidx) {
172
+ perror("error opening gzidx component");
173
+ exit(1);
174
+ }
175
+ ug_gzip_cat(log, atol(argv[2]), index, gzidx);
176
+
177
+ } else {
178
+ ug_gzip_cat(log, atol(argv[2]), NULL, NULL);
179
+
180
+ }
181
+ } else {
182
+ if (index)
183
+ fseeko(log, ug_get_offset_for_timestamp(index, atol(argv[2])), SEEK_SET);
184
+
185
+ while ((nread = fread(buf, 1, 4096, log)))
186
+ fwrite(buf, 1, nread, stdout);
187
+ }
188
+ }
@@ -0,0 +1,199 @@
1
+ // ex: set softtabstop=4 shiftwidth=4 tabstop=4 expandtab:
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <string.h>
5
+ #include <time.h>
6
+ #include <unistd.h>
7
+ #include <lua.h>
8
+ #include "pcre.h"
9
+ #include "request.h"
10
+ #include "ug_lua.h"
11
+
12
+ struct ug_regexp {
13
+ int invert;
14
+ pcre *re;
15
+ };
16
+
17
+ typedef struct {
18
+ time_t start_time;
19
+ time_t end_time;
20
+ int num_regexps;
21
+ struct ug_regexp *regexps;
22
+ char *lua_file;
23
+ char *in_file;
24
+ } context_t;
25
+
26
+ static context_t ctx;
27
+
28
+ static const char* commandparams="l:s:e:k:f:";
29
+ static const char* usage ="Usage: ug_guts [-f input] -l file.lua -s start_time -e end_time regexps [... regexps]\n\n";
30
+
31
+ int parse_args(int argc, char **argv)
32
+ {
33
+ extern char *optarg;
34
+ extern int optind;
35
+ const char *error;
36
+ int erroffset, optValue=0, retValue=1, i;
37
+ ctx.start_time = -1;
38
+ ctx.end_time = -1;
39
+ ctx.lua_file = NULL;
40
+
41
+ while ((optValue = getopt(argc, argv, commandparams))!= -1) {
42
+ switch (optValue) {
43
+ case 'f':
44
+ ctx.in_file = strdup(optarg);
45
+ break;
46
+ case 'l':
47
+ ctx.lua_file = strdup(optarg);
48
+ break;
49
+ case 's':
50
+ ctx.start_time = atol(optarg);
51
+ break;
52
+ case 'e':
53
+ ctx.end_time = atol(optarg);
54
+ break;
55
+ case '?':
56
+ return(-1);
57
+ break;
58
+ case -1: //Options exhausted
59
+ break;
60
+ default:
61
+ return(-1);
62
+ }
63
+ }
64
+ if ( ctx.lua_file == NULL || ctx.start_time < 0 || ctx.end_time < 0 ) { // mandatory fields
65
+ return(-1);
66
+ }
67
+ else if ((optind + 1 ) > argc) { // Need at least one argument after options
68
+ return(-1);
69
+ }
70
+
71
+ if (optind < argc) { // regexps follow after command-line options
72
+ ctx.num_regexps = argc - optind;
73
+ ctx.regexps = malloc(sizeof(struct ug_regexp) * ctx.num_regexps);
74
+ bzero(ctx.regexps, sizeof(struct ug_regexp) * ctx.num_regexps);
75
+
76
+ for (i=0; optind < argc; ++optind, i++) {
77
+ char *p = argv[optind];
78
+ if ( p[0] == '!' || p[0] == '+' ) {
79
+ ctx.regexps[i].invert = p[0] == '!';
80
+ p++;
81
+ }
82
+
83
+ ctx.regexps[i].re = pcre_compile(p, 0, &error, &erroffset, NULL);
84
+ if (error) {
85
+ fprintf(stderr, "Error compiling regexp \"%s\": %s\n", argv[optind], error);
86
+ exit(1);
87
+ }
88
+ }
89
+ }
90
+ return retValue;
91
+ }
92
+
93
+ int check_request(char *request, struct ug_regexp *regexps, int num_regexps)
94
+ {
95
+ int j, matched, ovector[30];
96
+
97
+ for (j = 0; j < num_regexps; j++) {
98
+ matched = pcre_exec(regexps[j].re, NULL, request, strlen(request), 0, 0, ovector, 30);
99
+ if ( matched < 0 && !regexps[j].invert )
100
+ return 0;
101
+ else if ( matched >= 0 && regexps[j].invert )
102
+ return 0;
103
+ }
104
+
105
+ return 1;
106
+ }
107
+
108
+ void print_request(char *request)
109
+ {
110
+ int i, last_line_len = 0;
111
+ char *p;
112
+
113
+ printf("%s", request);
114
+ p = request + (strlen(request) - 1);
115
+
116
+ /* skip trailing newlines */
117
+ while ( p > request && (*p == '\n') )
118
+ p--;
119
+
120
+ while ( p > request && (*p != '\n') ) {
121
+ p--;
122
+ last_line_len++;
123
+ }
124
+
125
+ for (i = 0; i < (last_line_len - 1) && i < 80; i++)
126
+ putchar('-');
127
+
128
+ putchar('\n');
129
+ fflush(stdout);
130
+ }
131
+
132
+
133
+ void handle_request(request_t * req)
134
+ {
135
+ static time_t time = 0;
136
+
137
+ if (!req->time)
138
+ req->time = time;
139
+
140
+ if ((req->time >= ctx.start_time
141
+ && req->time <= ctx.end_time
142
+ && check_request(req->buf, ctx.regexps, ctx.num_regexps))) {
143
+ if (req->time != 0) {
144
+ printf("@@%lu\n", req->time);
145
+ }
146
+ print_request(req->buf);
147
+ }
148
+ /* print a time-marker every second -- allows collections of logs with one sparse
149
+ log to proceed */
150
+ if (req->time > time) {
151
+ time = req->time;
152
+ printf("@@%lu\n", time);
153
+ }
154
+ }
155
+
156
+
157
+
158
+ int main(int argc, char **argv)
159
+ {
160
+ lua_State *lua;
161
+ ssize_t line_size;
162
+ FILE *file = NULL;
163
+ char *line = NULL;
164
+ size_t allocated = 0, offset = 0;
165
+ if (argc < 5) {
166
+ fprintf(stderr, "%s", usage);
167
+ exit(1);
168
+ }
169
+
170
+ bzero(&ctx, sizeof(context_t));
171
+ if ( parse_args(argc, argv) == -1 ) {
172
+ fprintf(stderr, "%s", usage);
173
+ exit(1);
174
+ }
175
+
176
+ lua = ug_lua_init(ctx.lua_file);
177
+ if ( !lua )
178
+ exit(1);
179
+
180
+ if ( ctx.in_file ) {
181
+ file = fopen(ctx.in_file, "r");
182
+ if ( !file ) {
183
+ perror(ctx.in_file);
184
+ exit(1);
185
+ }
186
+ } else {
187
+ file = stdin;
188
+ }
189
+
190
+ while (1) {
191
+ line_size = getline(&line, &allocated, file);
192
+ if ( line_size < 0 )
193
+ break;
194
+
195
+ ug_process_line(lua, line, line_size, offset);
196
+ offset += line_size;
197
+ }
198
+ ug_lua_on_eof(lua);
199
+ }