ultragrep 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/bin/ultragrep ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib')
3
+ require "ultragrep"
4
+
5
+ Thread.abort_on_exception = true
6
+ Ultragrep::ultragrep(Ultragrep::parse_args(ARGV))
@@ -0,0 +1,39 @@
1
+ CFLAGS=-I/opt/local/include -g
2
+ all: ug_guts ug_build_index ug_cat
3
+ install: all
4
+
5
+ request.o: request.c request.h
6
+ gcc ${CFLAGS} -c request.c
7
+
8
+ rails_req.o: rails_req.c rails_req.h req_matcher.h
9
+ gcc ${CFLAGS} -c rails_req.c
10
+
11
+ work_req.o: work_req.c work_req.h req_matcher.h
12
+ gcc ${CFLAGS} -c work_req.c
13
+
14
+ ug_guts.o: ug_guts.c req_matcher.h
15
+ gcc ${CFLAGS} -c ug_guts.c
16
+
17
+ ug_guts: ug_guts.o rails_req.o work_req.o request.o Makefile
18
+ gcc ${CFLAGS} -o ug_guts request.o rails_req.o work_req.o ug_guts.o -lpcre
19
+
20
+ ug_index.o: ug_index.h ug_index.c
21
+ gcc ${CFLAGS} -c ug_index.c
22
+
23
+ ug_build_index.o: ug_build_index.c req_matcher.h ug_index.h
24
+ gcc ${CFLAGS} -c ug_build_index.c
25
+
26
+ ug_build_index: ug_build_index.o ug_index.o rails_req.o work_req.o request.o Makefile zran.o
27
+ gcc ${CFLAGS} -o ug_build_index request.o rails_req.o work_req.o ug_index.o ug_build_index.o zran.o -lpcre -lz
28
+
29
+ ug_cat.o: ug_cat.c ug_index.h
30
+ gcc ${CFLAGS} -c ug_cat.c
31
+
32
+ ug_cat: ug_cat.o ug_index.o Makefile
33
+ gcc ${CFLAGS} -o ug_cat ug_cat.o ug_index.o
34
+
35
+ zran.o: zran.c
36
+ gcc -g -c zran.c
37
+
38
+ clean:
39
+ rm -rf *.o ug_guts
@@ -0,0 +1 @@
1
+ # Do nothing, we already have a complete Makefile!
@@ -0,0 +1,102 @@
1
+ #include <stdio.h>
2
+ #include <time.h>
3
+ #include "pcre.h"
4
+ #include "request.h"
5
+ #include "req_matcher.h"
6
+
7
+ typedef struct {
8
+ req_matcher_t base;
9
+ on_req on_request;
10
+ on_err on_error;
11
+ void* arg;
12
+ int stop_requested;
13
+ int blank_lines;
14
+
15
+ }rails_req_matcher_t;
16
+ static request_t request;
17
+
18
+
19
+ static void rails_on_request(rails_req_matcher_t* m, request_t* r) {
20
+ if(r && m->on_request) {
21
+ if(r->lines > 0) {
22
+ m->on_request(r, m->arg);
23
+ }
24
+ clear_request(r);
25
+ }
26
+ }
27
+
28
+ void rails_stop(req_matcher_t* base) {
29
+ rails_req_matcher_t* m = (rails_req_matcher_t*)base;
30
+ m->stop_requested = 1;
31
+ }
32
+
33
+ static int parse_req_time(char* line, ssize_t line_size, time_t* time) {
34
+ int matched = 0;
35
+ int ovector[30];
36
+ char *date_buf;
37
+ struct tm request_tm;
38
+ time_t tv;
39
+ const char* error;
40
+ int erroffset;
41
+ static pcre* regex = NULL;
42
+
43
+ *time = 0;
44
+
45
+ if(regex == NULL) {
46
+ regex = pcre_compile("^(?:Processing|Started).*(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2})", 0, &error, &erroffset, NULL);
47
+ }
48
+ matched = pcre_exec(regex, NULL, line, line_size,0,0,ovector, 30);
49
+ if(matched > 0) {
50
+ pcre_get_substring(line, ovector, matched, 1, (const char **)&date_buf);
51
+ strptime(date_buf, "%Y-%m-%d %H:%M:%S", &request_tm);
52
+ free(date_buf);
53
+
54
+ *time = timegm(&request_tm);
55
+ return(1);
56
+ }
57
+ return(-1);
58
+ }
59
+
60
+ static int rails_process_line(req_matcher_t* base, char *line, ssize_t line_size, off_t offset) {
61
+ rails_req_matcher_t* m = (rails_req_matcher_t*)base;
62
+
63
+ if((m->stop_requested) || (line_size == -1)) {
64
+ rails_on_request(m, &request);
65
+ return((m->stop_requested)?STOP_SIGNAL:EOF_REACHED);
66
+ }
67
+
68
+ if(line_size == 1) { //blank line
69
+ m->blank_lines += 1;
70
+ return(0);
71
+ }
72
+
73
+ if(m->blank_lines >= 2) {
74
+ m->blank_lines = 0;
75
+ rails_on_request(m, &request);
76
+ }
77
+
78
+ add_to_request(&request, line, offset);
79
+
80
+ if(request.time == 0) {
81
+ parse_req_time(line, line_size, &(request.time));
82
+ }
83
+
84
+ return(0);
85
+ }
86
+
87
+ req_matcher_t* rails_req_matcher(on_req fn1, on_err fn2, void* arg) {
88
+ rails_req_matcher_t* m = (rails_req_matcher_t*)malloc(sizeof(rails_req_matcher_t));
89
+ req_matcher_t* base = (req_matcher_t*)m;
90
+
91
+ m->on_request = fn1;
92
+ m->on_error = fn2;
93
+ m->arg = arg;
94
+
95
+ m->stop_requested = 0;
96
+ m->blank_lines = 0;
97
+
98
+ base->process_line = &rails_process_line;
99
+ base->stop = &rails_stop;
100
+ clear_request(&request);
101
+ return base;
102
+ }
@@ -0,0 +1,6 @@
1
+ #ifndef __RAILS_REQ_H__
2
+ #define __RAILS_REQ_H__
3
+ #include "req_matcher.h"
4
+
5
+ req_matcher_t* rails_req_matcher(on_req fn1, on_err fn2, void* arg);
6
+ #endif
@@ -0,0 +1,17 @@
1
+ #ifndef __REQ_MATCHER_H__
2
+ #define __REQ_MATCHER_H__
3
+ #include "request.h"
4
+ #include <sys/types.h>
5
+
6
+ typedef void (*on_req)(request_t*, void* arg);
7
+ typedef void (*on_err)(char*, ssize_t, void* arg);
8
+
9
+ typedef struct req_matcher_t{
10
+ int (*process_line)(struct req_matcher_t* base, char* line, ssize_t line_sz, off_t offset);
11
+ void (*stop)(struct req_matcher_t* base);
12
+ }req_matcher_t;
13
+
14
+ #define EOF_REACHED 1
15
+ #define STOP_SIGNAL 2
16
+
17
+ #endif //__REQ_MATCHER_H__
@@ -0,0 +1,41 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <string.h>
4
+ #include "request.h"
5
+
6
+ request_t* alloc_request() {
7
+ request_t* r = (request_t*)calloc(1, sizeof(request_t));
8
+ return(r);
9
+ }
10
+
11
+ void init_request(request_t* r) {
12
+ memset(r, 0, sizeof(request_t));
13
+ }
14
+ void clear_request(request_t* r) {
15
+ int i=0;
16
+
17
+ for(i = 0; i < r->lines; i++) {
18
+ free(r->buf[i]);
19
+ }
20
+ if(r->buf) {
21
+ free(r->buf);
22
+ }
23
+ if(r->session) {
24
+ free(r->session);
25
+ }
26
+ init_request(r);
27
+ }
28
+
29
+ void free_request(request_t* r) {
30
+ clear_request(r);
31
+ free(r);
32
+ }
33
+
34
+ void add_to_request(request_t* req, char* line, off_t offset) {
35
+ if ( !req->offset )
36
+ req->offset = offset;
37
+
38
+ req->buf = realloc(req->buf, sizeof(char*) * (req->lines + 1));
39
+ req->buf[req->lines] = line;
40
+ req->lines++;
41
+ }
@@ -0,0 +1,22 @@
1
+ #ifndef __REQUEST_H__
2
+ #define __REQUEST_H__
3
+
4
+ #include <time.h>
5
+
6
+ typedef struct request_t{
7
+ char **buf;
8
+ int lines;
9
+ time_t time;
10
+ char* session;
11
+ off_t offset;
12
+
13
+ struct request_t* next; //for linking
14
+ struct request_t* prev;
15
+ }request_t;
16
+
17
+ request_t* alloc_request();
18
+ void init_request(request_t* r);
19
+ void clear_request(request_t* r);
20
+ void free_request(request_t* r);
21
+ void add_to_request(request_t*, char*, off_t);
22
+ #endif //__REQUEST_H__
@@ -0,0 +1,99 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <getopt.h>
4
+ #include <string.h>
5
+ #include <time.h>
6
+ #include <unistd.h>
7
+ #include "pcre.h"
8
+ #include "rails_req.h"
9
+ #include "work_req.h"
10
+ #include "ug_index.h"
11
+
12
+ #define USAGE "Usage: ug_build_index (work|app) file\n"
13
+
14
+
15
+ // index file format
16
+ // [64bit,64bit] -- timestamp, file offset
17
+ // [32bit, Nbytes] -- extra data
18
+
19
+ void handle_request(request_t* req, build_idx_context_t* cxt) {
20
+ time_t floored_time;
21
+ floored_time = req->time - (req->time % INDEX_EVERY);
22
+ if ( !cxt->last_index_time || floored_time > cxt->last_index_time ) {
23
+ ug_write_index(cxt->index, floored_time, req->offset, NULL, 0);
24
+ cxt->last_index_time = floored_time;
25
+ }
26
+ }
27
+
28
+ int main(int argc, char **argv)
29
+ {
30
+ build_idx_context_t *cxt;
31
+ char *line = NULL, *index_fname = NULL, *dir;
32
+ ssize_t line_size, allocated;
33
+
34
+
35
+ if ( argc < 3 ) {
36
+ fprintf(stderr, USAGE);
37
+ exit(1);
38
+ }
39
+
40
+ cxt = malloc(sizeof(build_idx_context_t));
41
+ memset(cxt, 0, sizeof(build_idx_context_t));
42
+
43
+ if(strcmp(argv[1],"work") == 0)
44
+ {
45
+ cxt->m = work_req_matcher(&handle_request, NULL, cxt);
46
+ }
47
+ else if(strcmp(argv[1], "app") == 0)
48
+ {
49
+ cxt->m = rails_req_matcher(&handle_request, NULL, cxt);
50
+ }
51
+ else
52
+ {
53
+ fprintf(stderr, USAGE);
54
+ exit(1);
55
+ }
56
+
57
+ cxt->log = fopen(argv[2], "r");
58
+ if ( !cxt->log ) {
59
+ perror("Couldn't open log file");
60
+ exit(1);
61
+ }
62
+
63
+ index_fname = ug_get_index_fname(argv[2]);
64
+
65
+ cxt->index = fopen(index_fname, "r+");
66
+ if ( cxt->index ) {
67
+ struct ug_index idx;
68
+ ug_get_last_index_entry(cxt->index, &idx);
69
+ fseeko(cxt->log, idx.offset, SEEK_SET);
70
+ } else {
71
+ cxt->index = fopen(index_fname, "w+");
72
+ }
73
+
74
+ if ( !cxt->index ) {
75
+ perror("Couldn't open index file");
76
+ exit(1);
77
+ }
78
+
79
+ if ( strcmp(argv[2] + (strlen(argv[2]) - 3), ".gz") == 0 ) {
80
+ build_gz_index(cxt);
81
+ } else {
82
+ cxt->data_size = 0;
83
+
84
+ while(1) {
85
+ int ret;
86
+ line_size = getline(&line, &allocated, cxt->log);
87
+ ret = cxt->m->process_line(cxt->m, line, line_size, ftello(cxt->log) - line_size);
88
+ if(ret == EOF_REACHED || ret == STOP_SIGNAL) {
89
+ break;
90
+ }
91
+ line = NULL;
92
+ }
93
+ }
94
+
95
+ fclose(cxt->index);
96
+ fclose(cxt->log);
97
+ }
98
+
99
+
@@ -0,0 +1,46 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <string.h>
4
+ #include <libgen.h>
5
+ #include "ug_index.h"
6
+
7
+ /*
8
+ * ug_cat -- given a log file and (possibly) a file + (timestamp -> offset) index, cat the file starting
9
+ * from about that timestamp
10
+ */
11
+
12
+ #define USAGE "Usage: ug_cat file timestamp\n"
13
+
14
+ int main(int argc, char **argv)
15
+ {
16
+ int nread;
17
+ FILE *log;
18
+ FILE *index;
19
+ char *index_fname, buf[4096];
20
+
21
+ if ( argc < 3 ) {
22
+ fprintf(stderr, USAGE);
23
+ exit(1);
24
+ }
25
+
26
+ log = fopen(argv[1], "r");
27
+ if ( !log ) {
28
+ perror("Couldn't open log file");
29
+ exit(1);
30
+ }
31
+
32
+ index_fname = ug_get_index_fname(argv[1]);
33
+
34
+ index = fopen(index_fname, "r");
35
+ if ( index ) {
36
+ ug_seek_to_timestamp(log, index, atol(argv[2]), NULL);
37
+ }
38
+
39
+ while ( nread = fread(buf, 1, 4096, log) ) {
40
+ fwrite(buf, 1, nread, stdout);
41
+ }
42
+
43
+ fclose(log);
44
+ }
45
+
46
+
@@ -0,0 +1,138 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <getopt.h>
4
+ #include <string.h>
5
+ #include <time.h>
6
+ #include "pcre.h"
7
+ #include "req_matcher.h"
8
+ #include "rails_req.h"
9
+ #include "work_req.h"
10
+
11
+
12
+ typedef struct {
13
+ time_t start_time;
14
+ time_t end_time;
15
+ int num_regexps;
16
+ pcre **regexps;
17
+ req_matcher_t* m;
18
+ }context_t;
19
+
20
+
21
+ int check_request(int lines, char **request, time_t request_time, pcre **regexps, int num_regexps)
22
+ {
23
+ int *matches, i, j, matched;
24
+
25
+ matches = malloc(sizeof(int) * num_regexps);
26
+ memset(matches, 0, (sizeof(int) * num_regexps));
27
+
28
+ for(i=0; i < lines; i++) {
29
+ for(j=0; j < num_regexps; j++) {
30
+ int ovector[30];
31
+ if ( matches[j] ) continue;
32
+
33
+ matched = pcre_exec(regexps[j], NULL, request[i], strlen(request[i]), 0, 0, ovector, 30);
34
+ if ( matched > 0 )
35
+ matches[j] = 1;
36
+ }
37
+ }
38
+
39
+ matched = 1;
40
+ for (j=0; j < num_regexps; j++) {
41
+ matched &= matches[j];
42
+ }
43
+
44
+ free(matches);
45
+ return(matched);
46
+ }
47
+
48
+ void print_request(int request_lines, char **request)
49
+ {
50
+ int i, j;
51
+ putchar('\n');
52
+
53
+ for(i=0; i < request_lines; i++)
54
+ printf("%s", request[i]);
55
+
56
+ for(j=0; j < strlen(request[request_lines - 1]) && j < 80; j++ )
57
+ putchar('-');
58
+
59
+ putchar('\n');
60
+ fflush(stdout);
61
+ }
62
+
63
+ void handle_request(request_t* req, void* cxt_arg) {
64
+ static int time = 0;
65
+ context_t* cxt = (context_t*)cxt_arg;
66
+ if( (req->time > cxt->start_time &&
67
+ check_request(req->lines, req->buf, req->time, cxt->regexps, cxt->num_regexps))) {
68
+ if(req->time != 0) {
69
+ printf("@@%lu\n", req->time);
70
+ }
71
+
72
+ print_request(req->lines, req->buf);
73
+ }
74
+ if(req->time > time) {
75
+ time = req->time;
76
+ printf("@@%lu\n", time);
77
+ }
78
+ if(req->time > cxt->end_time) {
79
+ cxt->m->stop(cxt->m);
80
+ }
81
+ }
82
+ int main(int argc, char **argv)
83
+ {
84
+ int i;
85
+ context_t *cxt;
86
+ const char *error;
87
+ int erroffset;
88
+ char *line = NULL;
89
+ ssize_t line_size, allocated;
90
+
91
+
92
+ if ( argc < 5 ) {
93
+ fprintf(stderr, "Usage: ug_guts (work|app) start_time end_time regexps [... regexps]\n");
94
+ exit(1);
95
+ }
96
+
97
+ cxt = malloc(sizeof(context_t));
98
+
99
+ if(strcmp(argv[1],"work") == 0)
100
+ {
101
+ cxt->m = work_req_matcher(&handle_request, NULL, cxt);
102
+ }
103
+ else if(strcmp(argv[1], "app") == 0)
104
+ {
105
+ cxt->m = rails_req_matcher(&handle_request, NULL, cxt);
106
+ }
107
+ else
108
+ {
109
+ fprintf(stderr, "Usage: ug_guts (work|app) start_time end_time regexps [... regexps]\n");
110
+ exit(1);
111
+ }
112
+
113
+ cxt->start_time = atol(argv[2]);
114
+ cxt->end_time = atol(argv[3]);
115
+
116
+ cxt->num_regexps = argc - 4;
117
+ cxt->regexps = malloc(sizeof(pcre *) * cxt->num_regexps);
118
+
119
+ for ( i = 4; i < argc; i++) {
120
+ cxt->regexps[i-4] = pcre_compile(argv[i], 0, &error, &erroffset, NULL);
121
+ if ( error ) {
122
+ fprintf(stderr, "Error compiling regexp \"%s\": %s\n", argv[i], error);
123
+ exit;
124
+ }
125
+ }
126
+
127
+
128
+ while(1) {
129
+ int ret;
130
+ line_size = getline(&line, &allocated, stdin);
131
+ ret = cxt->m->process_line(cxt->m, line, line_size, 0);
132
+ if(ret == EOF_REACHED || ret == STOP_SIGNAL) {
133
+ break;
134
+ }
135
+ line = NULL;
136
+ }
137
+ }
138
+
@@ -0,0 +1,83 @@
1
+ #include <stdlib.h>
2
+ #include <string.h>
3
+ #include <libgen.h>
4
+ #include "ug_index.h"
5
+
6
+ int ug_write_index(FILE *file, uint64_t time, uint64_t offset, char *data, uint32_t data_size)
7
+ {
8
+ fwrite(&time, 8, 1, file);
9
+ fwrite(&offset, 8, 1, file);
10
+ fwrite(&data_size, 4, 1, file);
11
+
12
+ if ( data_size )
13
+ fwrite(data, 1, data_size, file);
14
+ }
15
+
16
+ int ug_read_index_entry(FILE *file, struct ug_index *idx, int read_data)
17
+ {
18
+ int nread;
19
+ nread = fread(&(idx->time), 8, 1, file);
20
+ if ( !nread )
21
+ return 0;
22
+
23
+ nread = fread(&(idx->offset), 8, 1, file);
24
+ nread = fread(&(idx->data_size), 4, 1, file);
25
+ if ( idx->data_size ) {
26
+ if ( read_data ) {
27
+ idx->data = malloc(idx->data_size);
28
+ nread = fread(idx->data, 1, idx->data_size, file);
29
+ } else {
30
+ fseek(file, idx->data_size, SEEK_CUR);
31
+ }
32
+ }
33
+
34
+ return 1;
35
+ }
36
+
37
+ int ug_get_last_index_entry(FILE *file, struct ug_index *idx) {
38
+ while (ug_read_index_entry(file, idx, 0));
39
+ }
40
+
41
+ void ug_seek_to_timestamp(FILE *flog, FILE *findex, uint64_t time, struct ug_index *param_idx)
42
+ {
43
+ struct ug_index idx, prev;
44
+ off_t last_offset = 0;
45
+
46
+ memset(&prev, 0, sizeof(struct ug_index));
47
+
48
+ for(;;) {
49
+ if ( !ug_read_index_entry(findex, &idx, 0) ) {
50
+ memcpy(&prev, &idx, sizeof(struct ug_index));
51
+ break;
52
+ }
53
+
54
+ if ( idx.time > time )
55
+ break;
56
+
57
+ memcpy(&prev, &idx, sizeof(struct ug_index));
58
+ }
59
+
60
+ if ( prev.offset ) {
61
+ fseek(flog, prev.offset, SEEK_SET);
62
+ if ( param_idx ) {
63
+ memcpy(param_idx, &prev, sizeof(struct ug_index));
64
+ }
65
+ }
66
+ }
67
+
68
+ /* returns malloc'ed memory. */
69
+ char *ug_get_index_fname(char *log_fname)
70
+ {
71
+ char *dir, *index_fname;
72
+
73
+ dir = strdup(log_fname);
74
+ dir = dirname(dir);
75
+
76
+ index_fname = malloc(strlen(dir) + strlen(basename(log_fname)) + strlen("/..idx") + 1);
77
+
78
+ sprintf(index_fname, "%s/.%s.idx", dir, basename(log_fname));
79
+ free(dir);
80
+ return index_fname;
81
+ }
82
+
83
+
@@ -0,0 +1,27 @@
1
+ #include <stdint.h>
2
+ #include <stdio.h>
3
+ #include "req_matcher.h"
4
+
5
+ #define INDEX_EVERY 10
6
+
7
+ struct ug_index {
8
+ uint64_t time;
9
+ uint64_t offset;
10
+ uint32_t data_size;
11
+ char *data;
12
+ };
13
+
14
+ typedef struct {
15
+ time_t last_index_time;
16
+ FILE *log;
17
+ FILE *index;
18
+ uint32_t data_size;
19
+ req_matcher_t* m;
20
+ unsigned char data[32768];
21
+ } build_idx_context_t;
22
+
23
+ int ug_write_index(FILE *file, uint64_t time, uint64_t offset, char *data, uint32_t data_size);
24
+ int ug_get_last_index_entry(FILE *file, struct ug_index *idx);
25
+ void ug_seek_to_timestamp(FILE *log, FILE *idx, uint64_t time, struct ug_index *param_idx);
26
+ char *ug_get_index_fname(char *log_fname);
27
+