ultragrep 0.1.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ #ifndef __REQUEST_H__
2
+ #define __REQUEST_H__
3
+
4
+ #include <time.h>
5
+
6
+ typedef struct request_t {
7
+ char *buf;
8
+ off_t offset;
9
+ time_t time;
10
+ } request_t;
11
+
12
+ void handle_request(request_t * req);
13
+ #endif //__REQUEST_H__
@@ -0,0 +1,109 @@
1
+ // ex: set softtabstop=4 shiftwidth=4 tabstop=4 expandtab:
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <getopt.h>
5
+ #include <string.h>
6
+ #include <errno.h>
7
+ #include <time.h>
8
+ #include <unistd.h>
9
+ #include "pcre.h"
10
+ #include "request.h"
11
+ #include "ug_index.h"
12
+ #include "ug_lua.h"
13
+ #include "ug_gzip.h"
14
+
15
+ #define USAGE "Usage: ug_build_index process.lua file\n"
16
+
17
+ // index file format
18
+ // [64bit,64bit] -- timestamp, file offset
19
+ // [32bit, Nbytes] -- extra data
20
+
21
+ static build_idx_context_t ctx;
22
+
23
+
24
+ void handle_request(request_t *req)
25
+ {
26
+ time_t floored_time;
27
+ floored_time = req->time - (req->time % INDEX_EVERY);
28
+ if (!ctx.last_index_time || floored_time > ctx.last_index_time) {
29
+ ug_write_index(ctx.findex, floored_time, req->offset);
30
+ ctx.last_index_time = floored_time;
31
+ }
32
+ }
33
+
34
+ void open_indexes(char *log_fname)
35
+ {
36
+ char *index_fname, *gz_index_fname;
37
+
38
+ index_fname = ug_get_index_fname(log_fname, "idx");
39
+
40
+ if (strcmp(log_fname + (strlen(log_fname) - 3), ".gz") == 0) {
41
+ gz_index_fname = ug_get_index_fname(log_fname, "gzidx");
42
+ /* we don't do incremental index building in gzipped files -- we just truncate and
43
+ * build over*/
44
+ ctx.findex = fopen(index_fname, "w+");
45
+ ctx.fgzindex = fopen(gz_index_fname, "w+");
46
+
47
+ if (!ctx.findex || !ctx.fgzindex) {
48
+ fprintf(stderr, "Couldn't open index files '%s','%s': %s\n", index_fname, gz_index_fname, strerror(errno));
49
+ exit(1);
50
+ }
51
+ } else {
52
+ ctx.findex = fopen(index_fname, "r+");
53
+ if (ctx.findex) {
54
+ /* seek in the log, (and the index, with get_offset_for_timestamp()) to the
55
+ * last timestamp we indexed */
56
+ fseeko(ctx.flog, ug_get_offset_for_timestamp(ctx.findex, -1), SEEK_SET);
57
+ } else {
58
+ ctx.findex = fopen(index_fname, "w+");
59
+ }
60
+ if (!ctx.findex) {
61
+ fprintf(stderr, "Couldn't open index file '%s': %s\n", index_fname, strerror(errno));
62
+ exit(1);
63
+ }
64
+ }
65
+ }
66
+
67
+ int main(int argc, char **argv)
68
+ {
69
+ char *line = NULL, *lua_fname, *log_fname;
70
+ ssize_t line_size;
71
+ size_t allocated;
72
+
73
+ if (argc < 3) {
74
+ fprintf(stderr, USAGE);
75
+ exit(1);
76
+ }
77
+
78
+ lua_fname = argv[1];
79
+ log_fname = argv[2];
80
+
81
+ bzero(&ctx, sizeof(build_idx_context_t));
82
+
83
+ ctx.lua = ug_lua_init(lua_fname);
84
+
85
+ ctx.flog = fopen(log_fname, "r");
86
+ if (!ctx.flog) {
87
+ perror("Couldn't open log file");
88
+ exit(1);
89
+ }
90
+
91
+ open_indexes(log_fname);
92
+
93
+ if (strcmp(log_fname + (strlen(log_fname) - 3), ".gz") == 0) {
94
+ build_gz_index(&ctx);
95
+ } else {
96
+ while (1) {
97
+ off_t offset;
98
+ offset = ftello(ctx.flog);
99
+ line_size = getline(&line, &allocated, ctx.flog);
100
+
101
+ if ( line_size < 0 )
102
+ break;
103
+
104
+ ug_process_line(ctx.lua, line, line_size, offset);
105
+ }
106
+ }
107
+ ug_lua_on_eof(ctx.lua);
108
+ exit(0);
109
+ }
@@ -0,0 +1,188 @@
1
+ // ex: set softtabstop=4 shiftwidth=4 tabstop=4 expandtab:
2
+
3
+ #include <stdio.h>
4
+ #include <stdlib.h>
5
+ #include <string.h>
6
+ #include <libgen.h>
7
+ #include "ug_index.h"
8
+ #include "ug_gzip.h"
9
+ #include "zlib.h"
10
+
11
+ /* target_offset is the offset in the uncompressed stream we're looking for. */
12
+ void fill_gz_info(off_t target_offset, FILE * gz_index, unsigned char *dict_data, off_t * compressed_offset)
13
+ {
14
+ off_t uncompressed_offset = 0;
15
+
16
+ for (;;) {
17
+ if (!fread(&uncompressed_offset, sizeof(off_t), 1, gz_index))
18
+ break;
19
+
20
+ if (uncompressed_offset > target_offset) {
21
+ return;
22
+ }
23
+
24
+ if (!fread(compressed_offset, sizeof(off_t), 1, gz_index))
25
+ break;
26
+
27
+ if (!fread(dict_data, WINSIZE, 1, gz_index))
28
+ break;
29
+ }
30
+ return;
31
+ }
32
+
33
+ /* Use the index to read len bytes from offset into buf, return bytes read or
34
+ negative for error (Z_DATA_ERROR or Z_MEM_ERROR). If data is requested past
35
+ the end of the uncompressed data, then extract() will return a value less
36
+ than len, indicating how much as actually read into buf. This function
37
+ should not return a data error unless the file was modified since the index
38
+ was generated. extract() may also return Z_ERRNO if there is an error on
39
+ reading or seeking the input file. */
40
+ int ug_gzip_cat(FILE * in, uint64_t time, FILE * offset_index, FILE * gz_index)
41
+ {
42
+ int ret, bits;
43
+ off_t uncompressed_offset, compressed_offset;
44
+ z_stream strm;
45
+ unsigned char input[CHUNK];
46
+ unsigned char output[WINSIZE], dict[WINSIZE];
47
+
48
+ /* initialize file and inflate state to start there */
49
+ strm.zalloc = Z_NULL;
50
+ strm.zfree = Z_NULL;
51
+ strm.opaque = Z_NULL;
52
+ strm.avail_in = 0;
53
+ strm.next_in = Z_NULL;
54
+
55
+
56
+ bzero(dict, WINSIZE);
57
+
58
+ if (gz_index && offset_index) {
59
+ uncompressed_offset = ug_get_offset_for_timestamp(offset_index, time);
60
+ fill_gz_info(uncompressed_offset, gz_index, dict, &compressed_offset);
61
+
62
+ bits = compressed_offset >> 56;
63
+ compressed_offset = (compressed_offset & 0x00FFFFFFFFFFFFFF) - (bits ? 1 : 0);
64
+
65
+ ret = inflateInit2(&strm, -15); /* raw inflate */
66
+ if (ret != Z_OK)
67
+ return ret;
68
+
69
+ ret = fseeko(in, compressed_offset, SEEK_SET);
70
+
71
+ if (ret != Z_OK)
72
+ return ret;
73
+ } else {
74
+ compressed_offset = bits = 0;
75
+ strm.avail_in = fread(input, 1, CHUNK, in);
76
+ strm.next_in = input;
77
+
78
+ ret = inflateInit2(&strm, 47);
79
+ }
80
+
81
+
82
+ if (ret == -1)
83
+ goto extract_ret;
84
+ if (bits) {
85
+ ret = getc(in);
86
+ if (ret == -1) {
87
+ ret = ferror(in) ? Z_ERRNO : Z_DATA_ERROR;
88
+ goto extract_ret;
89
+ }
90
+ (void) inflatePrime(&strm, bits, ret >> (8 - bits));
91
+ }
92
+
93
+ if (compressed_offset > 0)
94
+ inflateSetDictionary(&strm, dict, WINSIZE);
95
+
96
+ for (;;) {
97
+ strm.avail_out = WINSIZE;
98
+ strm.next_out = output;
99
+
100
+ if (!strm.avail_in) {
101
+ strm.avail_in = fread(input, 1, CHUNK, in);
102
+ strm.next_in = input;
103
+ }
104
+
105
+ if (ferror(in)) {
106
+ ret = Z_ERRNO;
107
+ goto extract_ret;
108
+ }
109
+
110
+ if (strm.avail_in == 0) {
111
+ ret = Z_DATA_ERROR;
112
+ goto extract_ret;
113
+ }
114
+
115
+ ret = inflate(&strm, Z_NO_FLUSH); /* normal inflate */
116
+
117
+ if (ret == Z_NEED_DICT)
118
+ ret = Z_DATA_ERROR;
119
+ if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
120
+ goto extract_ret;
121
+
122
+ fwrite(output, WINSIZE - strm.avail_out, 1, stdout);
123
+
124
+ /* if reach end of stream, then don't keep trying to get more */
125
+ if (ret == Z_STREAM_END)
126
+ break;
127
+ }
128
+
129
+ /* clean up and return bytes read or error */
130
+ extract_ret:
131
+ (void) inflateEnd(&strm);
132
+ return ret;
133
+ }
134
+ /*
135
+ * ug_cat -- given a log file and (possibly) a file + (timestamp -> offset) index, cat the file starting
136
+ * from about that timestamp
137
+ */
138
+
139
+ #define USAGE "Usage: ug_cat file timestamp\n"
140
+
141
+ int main(int argc, char **argv)
142
+ {
143
+ int nread;
144
+ FILE *log;
145
+ FILE *index;
146
+ char *log_fname, *index_fname, buf[4096];
147
+
148
+ if (argc < 3) {
149
+ fprintf(stderr, USAGE);
150
+ exit(1);
151
+ }
152
+
153
+ log_fname = argv[1];
154
+
155
+ log = fopen(log_fname, "r");
156
+ if (!log) {
157
+ perror("Couldn't open log file");
158
+ exit(1);
159
+ }
160
+
161
+ index_fname = ug_get_index_fname(log_fname, "idx");
162
+
163
+ index = fopen(index_fname, "r");
164
+ if (strcmp(log_fname + (strlen(log_fname) - 3), ".gz") == 0) {
165
+ char *gzidx_fname;
166
+ FILE *gzidx;
167
+
168
+ if (index) {
169
+ gzidx_fname = ug_get_index_fname(log_fname, "gzidx");
170
+ gzidx = fopen(gzidx_fname, "r");
171
+ if (!gzidx) {
172
+ perror("error opening gzidx component");
173
+ exit(1);
174
+ }
175
+ ug_gzip_cat(log, atol(argv[2]), index, gzidx);
176
+
177
+ } else {
178
+ ug_gzip_cat(log, atol(argv[2]), NULL, NULL);
179
+
180
+ }
181
+ } else {
182
+ if (index)
183
+ fseeko(log, ug_get_offset_for_timestamp(index, atol(argv[2])), SEEK_SET);
184
+
185
+ while ((nread = fread(buf, 1, 4096, log)))
186
+ fwrite(buf, 1, nread, stdout);
187
+ }
188
+ }
@@ -0,0 +1,199 @@
1
+ // ex: set softtabstop=4 shiftwidth=4 tabstop=4 expandtab:
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <string.h>
5
+ #include <time.h>
6
+ #include <unistd.h>
7
+ #include <lua.h>
8
+ #include "pcre.h"
9
+ #include "request.h"
10
+ #include "ug_lua.h"
11
+
12
+ struct ug_regexp {
13
+ int invert;
14
+ pcre *re;
15
+ };
16
+
17
+ typedef struct {
18
+ time_t start_time;
19
+ time_t end_time;
20
+ int num_regexps;
21
+ struct ug_regexp *regexps;
22
+ char *lua_file;
23
+ char *in_file;
24
+ } context_t;
25
+
26
+ static context_t ctx;
27
+
28
+ static const char* commandparams="l:s:e:k:f:";
29
+ static const char* usage ="Usage: ug_guts [-f input] -l file.lua -s start_time -e end_time regexps [... regexps]\n\n";
30
+
31
+ int parse_args(int argc, char **argv)
32
+ {
33
+ extern char *optarg;
34
+ extern int optind;
35
+ const char *error;
36
+ int erroffset, optValue=0, retValue=1, i;
37
+ ctx.start_time = -1;
38
+ ctx.end_time = -1;
39
+ ctx.lua_file = NULL;
40
+
41
+ while ((optValue = getopt(argc, argv, commandparams))!= -1) {
42
+ switch (optValue) {
43
+ case 'f':
44
+ ctx.in_file = strdup(optarg);
45
+ break;
46
+ case 'l':
47
+ ctx.lua_file = strdup(optarg);
48
+ break;
49
+ case 's':
50
+ ctx.start_time = atol(optarg);
51
+ break;
52
+ case 'e':
53
+ ctx.end_time = atol(optarg);
54
+ break;
55
+ case '?':
56
+ return(-1);
57
+ break;
58
+ case -1: //Options exhausted
59
+ break;
60
+ default:
61
+ return(-1);
62
+ }
63
+ }
64
+ if ( ctx.lua_file == NULL || ctx.start_time < 0 || ctx.end_time < 0 ) { // mandatory fields
65
+ return(-1);
66
+ }
67
+ else if ((optind + 1 ) > argc) { // Need at least one argument after options
68
+ return(-1);
69
+ }
70
+
71
+ if (optind < argc) { // regexps follow after command-line options
72
+ ctx.num_regexps = argc - optind;
73
+ ctx.regexps = malloc(sizeof(struct ug_regexp) * ctx.num_regexps);
74
+ bzero(ctx.regexps, sizeof(struct ug_regexp) * ctx.num_regexps);
75
+
76
+ for (i=0; optind < argc; ++optind, i++) {
77
+ char *p = argv[optind];
78
+ if ( p[0] == '!' || p[0] == '+' ) {
79
+ ctx.regexps[i].invert = p[0] == '!';
80
+ p++;
81
+ }
82
+
83
+ ctx.regexps[i].re = pcre_compile(p, 0, &error, &erroffset, NULL);
84
+ if (error) {
85
+ fprintf(stderr, "Error compiling regexp \"%s\": %s\n", argv[optind], error);
86
+ exit(1);
87
+ }
88
+ }
89
+ }
90
+ return retValue;
91
+ }
92
+
93
+ int check_request(char *request, struct ug_regexp *regexps, int num_regexps)
94
+ {
95
+ int j, matched, ovector[30];
96
+
97
+ for (j = 0; j < num_regexps; j++) {
98
+ matched = pcre_exec(regexps[j].re, NULL, request, strlen(request), 0, 0, ovector, 30);
99
+ if ( matched < 0 && !regexps[j].invert )
100
+ return 0;
101
+ else if ( matched >= 0 && regexps[j].invert )
102
+ return 0;
103
+ }
104
+
105
+ return 1;
106
+ }
107
+
108
+ void print_request(char *request)
109
+ {
110
+ int i, last_line_len = 0;
111
+ char *p;
112
+
113
+ printf("%s", request);
114
+ p = request + (strlen(request) - 1);
115
+
116
+ /* skip trailing newlines */
117
+ while ( p > request && (*p == '\n') )
118
+ p--;
119
+
120
+ while ( p > request && (*p != '\n') ) {
121
+ p--;
122
+ last_line_len++;
123
+ }
124
+
125
+ for (i = 0; i < (last_line_len - 1) && i < 80; i++)
126
+ putchar('-');
127
+
128
+ putchar('\n');
129
+ fflush(stdout);
130
+ }
131
+
132
+
133
+ void handle_request(request_t * req)
134
+ {
135
+ static time_t time = 0;
136
+
137
+ if (!req->time)
138
+ req->time = time;
139
+
140
+ if ((req->time >= ctx.start_time
141
+ && req->time <= ctx.end_time
142
+ && check_request(req->buf, ctx.regexps, ctx.num_regexps))) {
143
+ if (req->time != 0) {
144
+ printf("@@%lu\n", req->time);
145
+ }
146
+ print_request(req->buf);
147
+ }
148
+ /* print a time-marker every second -- allows collections of logs with one sparse
149
+ log to proceed */
150
+ if (req->time > time) {
151
+ time = req->time;
152
+ printf("@@%lu\n", time);
153
+ }
154
+ }
155
+
156
+
157
+
158
+ int main(int argc, char **argv)
159
+ {
160
+ lua_State *lua;
161
+ ssize_t line_size;
162
+ FILE *file = NULL;
163
+ char *line = NULL;
164
+ size_t allocated = 0, offset = 0;
165
+ if (argc < 5) {
166
+ fprintf(stderr, "%s", usage);
167
+ exit(1);
168
+ }
169
+
170
+ bzero(&ctx, sizeof(context_t));
171
+ if ( parse_args(argc, argv) == -1 ) {
172
+ fprintf(stderr, "%s", usage);
173
+ exit(1);
174
+ }
175
+
176
+ lua = ug_lua_init(ctx.lua_file);
177
+ if ( !lua )
178
+ exit(1);
179
+
180
+ if ( ctx.in_file ) {
181
+ file = fopen(ctx.in_file, "r");
182
+ if ( !file ) {
183
+ perror(ctx.in_file);
184
+ exit(1);
185
+ }
186
+ } else {
187
+ file = stdin;
188
+ }
189
+
190
+ while (1) {
191
+ line_size = getline(&line, &allocated, file);
192
+ if ( line_size < 0 )
193
+ break;
194
+
195
+ ug_process_line(lua, line, line_size, offset);
196
+ offset += line_size;
197
+ }
198
+ ug_lua_on_eof(lua);
199
+ }