ultragrep 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/ultragrep ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib')
3
+ require "ultragrep"
4
+
5
+ Thread.abort_on_exception = true
6
+ Ultragrep::ultragrep(Ultragrep::parse_args(ARGV))
@@ -0,0 +1,39 @@
1
+ CFLAGS=-I/opt/local/include -g
2
+ all: ug_guts ug_build_index ug_cat
3
+ install: all
4
+
5
+ request.o: request.c request.h
6
+ gcc ${CFLAGS} -c request.c
7
+
8
+ rails_req.o: rails_req.c rails_req.h req_matcher.h
9
+ gcc ${CFLAGS} -c rails_req.c
10
+
11
+ work_req.o: work_req.c work_req.h req_matcher.h
12
+ gcc ${CFLAGS} -c work_req.c
13
+
14
+ ug_guts.o: ug_guts.c req_matcher.h
15
+ gcc ${CFLAGS} -c ug_guts.c
16
+
17
+ ug_guts: ug_guts.o rails_req.o work_req.o request.o Makefile
18
+ gcc ${CFLAGS} -o ug_guts request.o rails_req.o work_req.o ug_guts.o -lpcre
19
+
20
+ ug_index.o: ug_index.h ug_index.c
21
+ gcc ${CFLAGS} -c ug_index.c
22
+
23
+ ug_build_index.o: ug_build_index.c req_matcher.h ug_index.h
24
+ gcc ${CFLAGS} -c ug_build_index.c
25
+
26
+ ug_build_index: ug_build_index.o ug_index.o rails_req.o work_req.o request.o Makefile zran.o
27
+ gcc ${CFLAGS} -o ug_build_index request.o rails_req.o work_req.o ug_index.o ug_build_index.o zran.o -lpcre -lz
28
+
29
+ ug_cat.o: ug_cat.c ug_index.h
30
+ gcc ${CFLAGS} -c ug_cat.c
31
+
32
+ ug_cat: ug_cat.o ug_index.o Makefile
33
+ gcc ${CFLAGS} -o ug_cat ug_cat.o ug_index.o
34
+
35
+ zran.o: zran.c
36
+ gcc -g -c zran.c
37
+
38
+ clean:
39
+ rm -rf *.o ug_guts
@@ -0,0 +1 @@
1
+ # Do nothing, we already have a complete Makefile!
@@ -0,0 +1,102 @@
1
+ #include <stdio.h>
2
+ #include <time.h>
3
+ #include "pcre.h"
4
+ #include "request.h"
5
+ #include "req_matcher.h"
6
+
7
+ typedef struct {
8
+ req_matcher_t base;
9
+ on_req on_request;
10
+ on_err on_error;
11
+ void* arg;
12
+ int stop_requested;
13
+ int blank_lines;
14
+
15
+ }rails_req_matcher_t;
16
+ static request_t request;
17
+
18
+
19
+ static void rails_on_request(rails_req_matcher_t* m, request_t* r) {
20
+ if(r && m->on_request) {
21
+ if(r->lines > 0) {
22
+ m->on_request(r, m->arg);
23
+ }
24
+ clear_request(r);
25
+ }
26
+ }
27
+
28
+ void rails_stop(req_matcher_t* base) {
29
+ rails_req_matcher_t* m = (rails_req_matcher_t*)base;
30
+ m->stop_requested = 1;
31
+ }
32
+
33
+ static int parse_req_time(char* line, ssize_t line_size, time_t* time) {
34
+ int matched = 0;
35
+ int ovector[30];
36
+ char *date_buf;
37
+ struct tm request_tm;
38
+ time_t tv;
39
+ const char* error;
40
+ int erroffset;
41
+ static pcre* regex = NULL;
42
+
43
+ *time = 0;
44
+
45
+ if(regex == NULL) {
46
+ regex = pcre_compile("^(?:Processing|Started).*(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2})", 0, &error, &erroffset, NULL);
47
+ }
48
+ matched = pcre_exec(regex, NULL, line, line_size,0,0,ovector, 30);
49
+ if(matched > 0) {
50
+ pcre_get_substring(line, ovector, matched, 1, (const char **)&date_buf);
51
+ strptime(date_buf, "%Y-%m-%d %H:%M:%S", &request_tm);
52
+ free(date_buf);
53
+
54
+ *time = timegm(&request_tm);
55
+ return(1);
56
+ }
57
+ return(-1);
58
+ }
59
+
60
+ static int rails_process_line(req_matcher_t* base, char *line, ssize_t line_size, off_t offset) {
61
+ rails_req_matcher_t* m = (rails_req_matcher_t*)base;
62
+
63
+ if((m->stop_requested) || (line_size == -1)) {
64
+ rails_on_request(m, &request);
65
+ return((m->stop_requested)?STOP_SIGNAL:EOF_REACHED);
66
+ }
67
+
68
+ if(line_size == 1) { //blank line
69
+ m->blank_lines += 1;
70
+ return(0);
71
+ }
72
+
73
+ if(m->blank_lines >= 2) {
74
+ m->blank_lines = 0;
75
+ rails_on_request(m, &request);
76
+ }
77
+
78
+ add_to_request(&request, line, offset);
79
+
80
+ if(request.time == 0) {
81
+ parse_req_time(line, line_size, &(request.time));
82
+ }
83
+
84
+ return(0);
85
+ }
86
+
87
+ req_matcher_t* rails_req_matcher(on_req fn1, on_err fn2, void* arg) {
88
+ rails_req_matcher_t* m = (rails_req_matcher_t*)malloc(sizeof(rails_req_matcher_t));
89
+ req_matcher_t* base = (req_matcher_t*)m;
90
+
91
+ m->on_request = fn1;
92
+ m->on_error = fn2;
93
+ m->arg = arg;
94
+
95
+ m->stop_requested = 0;
96
+ m->blank_lines = 0;
97
+
98
+ base->process_line = &rails_process_line;
99
+ base->stop = &rails_stop;
100
+ clear_request(&request);
101
+ return base;
102
+ }
@@ -0,0 +1,6 @@
1
+ #ifndef __RAILS_REQ_H__
2
+ #define __RAILS_REQ_H__
3
+ #include "req_matcher.h"
4
+
5
+ req_matcher_t* rails_req_matcher(on_req fn1, on_err fn2, void* arg);
6
+ #endif
@@ -0,0 +1,17 @@
1
+ #ifndef __REQ_MATCHER_H__
2
+ #define __REQ_MATCHER_H__
3
+ #include "request.h"
4
+ #include <sys/types.h>
5
+
6
+ typedef void (*on_req)(request_t*, void* arg);
7
+ typedef void (*on_err)(char*, ssize_t, void* arg);
8
+
9
+ typedef struct req_matcher_t{
10
+ int (*process_line)(struct req_matcher_t* base, char* line, ssize_t line_sz, off_t offset);
11
+ void (*stop)(struct req_matcher_t* base);
12
+ }req_matcher_t;
13
+
14
+ #define EOF_REACHED 1
15
+ #define STOP_SIGNAL 2
16
+
17
+ #endif //__REQ_MATCHER_H__
@@ -0,0 +1,41 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <string.h>
4
+ #include "request.h"
5
+
6
+ request_t* alloc_request() {
7
+ request_t* r = (request_t*)calloc(1, sizeof(request_t));
8
+ return(r);
9
+ }
10
+
11
+ void init_request(request_t* r) {
12
+ memset(r, 0, sizeof(request_t));
13
+ }
14
+ void clear_request(request_t* r) {
15
+ int i=0;
16
+
17
+ for(i = 0; i < r->lines; i++) {
18
+ free(r->buf[i]);
19
+ }
20
+ if(r->buf) {
21
+ free(r->buf);
22
+ }
23
+ if(r->session) {
24
+ free(r->session);
25
+ }
26
+ init_request(r);
27
+ }
28
+
29
+ void free_request(request_t* r) {
30
+ clear_request(r);
31
+ free(r);
32
+ }
33
+
34
+ void add_to_request(request_t* req, char* line, off_t offset) {
35
+ if ( !req->offset )
36
+ req->offset = offset;
37
+
38
+ req->buf = realloc(req->buf, sizeof(char*) * (req->lines + 1));
39
+ req->buf[req->lines] = line;
40
+ req->lines++;
41
+ }
@@ -0,0 +1,22 @@
1
+ #ifndef __REQUEST_H__
2
+ #define __REQUEST_H__
3
+
4
+ #include <time.h>
5
+
6
+ typedef struct request_t{
7
+ char **buf;
8
+ int lines;
9
+ time_t time;
10
+ char* session;
11
+ off_t offset;
12
+
13
+ struct request_t* next; //for linking
14
+ struct request_t* prev;
15
+ }request_t;
16
+
17
+ request_t* alloc_request();
18
+ void init_request(request_t* r);
19
+ void clear_request(request_t* r);
20
+ void free_request(request_t* r);
21
+ void add_to_request(request_t*, char*, off_t);
22
+ #endif //__REQUEST_H__
@@ -0,0 +1,99 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <getopt.h>
4
+ #include <string.h>
5
+ #include <time.h>
6
+ #include <unistd.h>
7
+ #include "pcre.h"
8
+ #include "rails_req.h"
9
+ #include "work_req.h"
10
+ #include "ug_index.h"
11
+
12
+ #define USAGE "Usage: ug_build_index (work|app) file\n"
13
+
14
+
15
+ // index file format
16
+ // [64bit,64bit] -- timestamp, file offset
17
+ // [32bit, Nbytes] -- extra data
18
+
19
+ void handle_request(request_t* req, build_idx_context_t* cxt) {
20
+ time_t floored_time;
21
+ floored_time = req->time - (req->time % INDEX_EVERY);
22
+ if ( !cxt->last_index_time || floored_time > cxt->last_index_time ) {
23
+ ug_write_index(cxt->index, floored_time, req->offset, NULL, 0);
24
+ cxt->last_index_time = floored_time;
25
+ }
26
+ }
27
+
28
+ int main(int argc, char **argv)
29
+ {
30
+ build_idx_context_t *cxt;
31
+ char *line = NULL, *index_fname = NULL, *dir;
32
+ ssize_t line_size, allocated;
33
+
34
+
35
+ if ( argc < 3 ) {
36
+ fprintf(stderr, USAGE);
37
+ exit(1);
38
+ }
39
+
40
+ cxt = malloc(sizeof(build_idx_context_t));
41
+ memset(cxt, 0, sizeof(build_idx_context_t));
42
+
43
+ if(strcmp(argv[1],"work") == 0)
44
+ {
45
+ cxt->m = work_req_matcher(&handle_request, NULL, cxt);
46
+ }
47
+ else if(strcmp(argv[1], "app") == 0)
48
+ {
49
+ cxt->m = rails_req_matcher(&handle_request, NULL, cxt);
50
+ }
51
+ else
52
+ {
53
+ fprintf(stderr, USAGE);
54
+ exit(1);
55
+ }
56
+
57
+ cxt->log = fopen(argv[2], "r");
58
+ if ( !cxt->log ) {
59
+ perror("Couldn't open log file");
60
+ exit(1);
61
+ }
62
+
63
+ index_fname = ug_get_index_fname(argv[2]);
64
+
65
+ cxt->index = fopen(index_fname, "r+");
66
+ if ( cxt->index ) {
67
+ struct ug_index idx;
68
+ ug_get_last_index_entry(cxt->index, &idx);
69
+ fseeko(cxt->log, idx.offset, SEEK_SET);
70
+ } else {
71
+ cxt->index = fopen(index_fname, "w+");
72
+ }
73
+
74
+ if ( !cxt->index ) {
75
+ perror("Couldn't open index file");
76
+ exit(1);
77
+ }
78
+
79
+ if ( strcmp(argv[2] + (strlen(argv[2]) - 3), ".gz") == 0 ) {
80
+ build_gz_index(cxt);
81
+ } else {
82
+ cxt->data_size = 0;
83
+
84
+ while(1) {
85
+ int ret;
86
+ line_size = getline(&line, &allocated, cxt->log);
87
+ ret = cxt->m->process_line(cxt->m, line, line_size, ftello(cxt->log) - line_size);
88
+ if(ret == EOF_REACHED || ret == STOP_SIGNAL) {
89
+ break;
90
+ }
91
+ line = NULL;
92
+ }
93
+ }
94
+
95
+ fclose(cxt->index);
96
+ fclose(cxt->log);
97
+ }
98
+
99
+
@@ -0,0 +1,46 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <string.h>
4
+ #include <libgen.h>
5
+ #include "ug_index.h"
6
+
7
+ /*
8
+ * ug_cat -- given a log file and (possibly) a file + (timestamp -> offset) index, cat the file starting
9
+ * from about that timestamp
10
+ */
11
+
12
+ #define USAGE "Usage: ug_cat file timestamp\n"
13
+
14
+ int main(int argc, char **argv)
15
+ {
16
+ int nread;
17
+ FILE *log;
18
+ FILE *index;
19
+ char *index_fname, buf[4096];
20
+
21
+ if ( argc < 3 ) {
22
+ fprintf(stderr, USAGE);
23
+ exit(1);
24
+ }
25
+
26
+ log = fopen(argv[1], "r");
27
+ if ( !log ) {
28
+ perror("Couldn't open log file");
29
+ exit(1);
30
+ }
31
+
32
+ index_fname = ug_get_index_fname(argv[1]);
33
+
34
+ index = fopen(index_fname, "r");
35
+ if ( index ) {
36
+ ug_seek_to_timestamp(log, index, atol(argv[2]), NULL);
37
+ }
38
+
39
+ while ( nread = fread(buf, 1, 4096, log) ) {
40
+ fwrite(buf, 1, nread, stdout);
41
+ }
42
+
43
+ fclose(log);
44
+ }
45
+
46
+
@@ -0,0 +1,138 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <getopt.h>
4
+ #include <string.h>
5
+ #include <time.h>
6
+ #include "pcre.h"
7
+ #include "req_matcher.h"
8
+ #include "rails_req.h"
9
+ #include "work_req.h"
10
+
11
+
12
+ typedef struct {
13
+ time_t start_time;
14
+ time_t end_time;
15
+ int num_regexps;
16
+ pcre **regexps;
17
+ req_matcher_t* m;
18
+ }context_t;
19
+
20
+
21
+ int check_request(int lines, char **request, time_t request_time, pcre **regexps, int num_regexps)
22
+ {
23
+ int *matches, i, j, matched;
24
+
25
+ matches = malloc(sizeof(int) * num_regexps);
26
+ memset(matches, 0, (sizeof(int) * num_regexps));
27
+
28
+ for(i=0; i < lines; i++) {
29
+ for(j=0; j < num_regexps; j++) {
30
+ int ovector[30];
31
+ if ( matches[j] ) continue;
32
+
33
+ matched = pcre_exec(regexps[j], NULL, request[i], strlen(request[i]), 0, 0, ovector, 30);
34
+ if ( matched > 0 )
35
+ matches[j] = 1;
36
+ }
37
+ }
38
+
39
+ matched = 1;
40
+ for (j=0; j < num_regexps; j++) {
41
+ matched &= matches[j];
42
+ }
43
+
44
+ free(matches);
45
+ return(matched);
46
+ }
47
+
48
+ void print_request(int request_lines, char **request)
49
+ {
50
+ int i, j;
51
+ putchar('\n');
52
+
53
+ for(i=0; i < request_lines; i++)
54
+ printf("%s", request[i]);
55
+
56
+ for(j=0; j < strlen(request[request_lines - 1]) && j < 80; j++ )
57
+ putchar('-');
58
+
59
+ putchar('\n');
60
+ fflush(stdout);
61
+ }
62
+
63
+ void handle_request(request_t* req, void* cxt_arg) {
64
+ static int time = 0;
65
+ context_t* cxt = (context_t*)cxt_arg;
66
+ if( (req->time > cxt->start_time &&
67
+ check_request(req->lines, req->buf, req->time, cxt->regexps, cxt->num_regexps))) {
68
+ if(req->time != 0) {
69
+ printf("@@%lu\n", req->time);
70
+ }
71
+
72
+ print_request(req->lines, req->buf);
73
+ }
74
+ if(req->time > time) {
75
+ time = req->time;
76
+ printf("@@%lu\n", time);
77
+ }
78
+ if(req->time > cxt->end_time) {
79
+ cxt->m->stop(cxt->m);
80
+ }
81
+ }
82
+ int main(int argc, char **argv)
83
+ {
84
+ int i;
85
+ context_t *cxt;
86
+ const char *error;
87
+ int erroffset;
88
+ char *line = NULL;
89
+ ssize_t line_size, allocated;
90
+
91
+
92
+ if ( argc < 5 ) {
93
+ fprintf(stderr, "Usage: ug_guts (work|app) start_time end_time regexps [... regexps]\n");
94
+ exit(1);
95
+ }
96
+
97
+ cxt = malloc(sizeof(context_t));
98
+
99
+ if(strcmp(argv[1],"work") == 0)
100
+ {
101
+ cxt->m = work_req_matcher(&handle_request, NULL, cxt);
102
+ }
103
+ else if(strcmp(argv[1], "app") == 0)
104
+ {
105
+ cxt->m = rails_req_matcher(&handle_request, NULL, cxt);
106
+ }
107
+ else
108
+ {
109
+ fprintf(stderr, "Usage: ug_guts (work|app) start_time end_time regexps [... regexps]\n");
110
+ exit(1);
111
+ }
112
+
113
+ cxt->start_time = atol(argv[2]);
114
+ cxt->end_time = atol(argv[3]);
115
+
116
+ cxt->num_regexps = argc - 4;
117
+ cxt->regexps = malloc(sizeof(pcre *) * cxt->num_regexps);
118
+
119
+ for ( i = 4; i < argc; i++) {
120
+ cxt->regexps[i-4] = pcre_compile(argv[i], 0, &error, &erroffset, NULL);
121
+ if ( error ) {
122
+ fprintf(stderr, "Error compiling regexp \"%s\": %s\n", argv[i], error);
123
+ exit;
124
+ }
125
+ }
126
+
127
+
128
+ while(1) {
129
+ int ret;
130
+ line_size = getline(&line, &allocated, stdin);
131
+ ret = cxt->m->process_line(cxt->m, line, line_size, 0);
132
+ if(ret == EOF_REACHED || ret == STOP_SIGNAL) {
133
+ break;
134
+ }
135
+ line = NULL;
136
+ }
137
+ }
138
+
@@ -0,0 +1,83 @@
1
+ #include <stdlib.h>
2
+ #include <string.h>
3
+ #include <libgen.h>
4
+ #include "ug_index.h"
5
+
6
+ int ug_write_index(FILE *file, uint64_t time, uint64_t offset, char *data, uint32_t data_size)
7
+ {
8
+ fwrite(&time, 8, 1, file);
9
+ fwrite(&offset, 8, 1, file);
10
+ fwrite(&data_size, 4, 1, file);
11
+
12
+ if ( data_size )
13
+ fwrite(data, 1, data_size, file);
14
+ }
15
+
16
+ int ug_read_index_entry(FILE *file, struct ug_index *idx, int read_data)
17
+ {
18
+ int nread;
19
+ nread = fread(&(idx->time), 8, 1, file);
20
+ if ( !nread )
21
+ return 0;
22
+
23
+ nread = fread(&(idx->offset), 8, 1, file);
24
+ nread = fread(&(idx->data_size), 4, 1, file);
25
+ if ( idx->data_size ) {
26
+ if ( read_data ) {
27
+ idx->data = malloc(idx->data_size);
28
+ nread = fread(idx->data, 1, idx->data_size, file);
29
+ } else {
30
+ fseek(file, idx->data_size, SEEK_CUR);
31
+ }
32
+ }
33
+
34
+ return 1;
35
+ }
36
+
37
+ int ug_get_last_index_entry(FILE *file, struct ug_index *idx) {
38
+ while (ug_read_index_entry(file, idx, 0));
39
+ }
40
+
41
+ void ug_seek_to_timestamp(FILE *flog, FILE *findex, uint64_t time, struct ug_index *param_idx)
42
+ {
43
+ struct ug_index idx, prev;
44
+ off_t last_offset = 0;
45
+
46
+ memset(&prev, 0, sizeof(struct ug_index));
47
+
48
+ for(;;) {
49
+ if ( !ug_read_index_entry(findex, &idx, 0) ) {
50
+ memcpy(&prev, &idx, sizeof(struct ug_index));
51
+ break;
52
+ }
53
+
54
+ if ( idx.time > time )
55
+ break;
56
+
57
+ memcpy(&prev, &idx, sizeof(struct ug_index));
58
+ }
59
+
60
+ if ( prev.offset ) {
61
+ fseek(flog, prev.offset, SEEK_SET);
62
+ if ( param_idx ) {
63
+ memcpy(param_idx, &prev, sizeof(struct ug_index));
64
+ }
65
+ }
66
+ }
67
+
68
+ /* returns malloc'ed memory. */
69
+ char *ug_get_index_fname(char *log_fname)
70
+ {
71
+ char *dir, *index_fname;
72
+
73
+ dir = strdup(log_fname);
74
+ dir = dirname(dir);
75
+
76
+ index_fname = malloc(strlen(dir) + strlen(basename(log_fname)) + strlen("/..idx") + 1);
77
+
78
+ sprintf(index_fname, "%s/.%s.idx", dir, basename(log_fname));
79
+ free(dir);
80
+ return index_fname;
81
+ }
82
+
83
+
@@ -0,0 +1,27 @@
1
+ #include <stdint.h>
2
+ #include <stdio.h>
3
+ #include "req_matcher.h"
4
+
5
+ #define INDEX_EVERY 10
6
+
7
+ struct ug_index {
8
+ uint64_t time;
9
+ uint64_t offset;
10
+ uint32_t data_size;
11
+ char *data;
12
+ };
13
+
14
+ typedef struct {
15
+ time_t last_index_time;
16
+ FILE *log;
17
+ FILE *index;
18
+ uint32_t data_size;
19
+ req_matcher_t* m;
20
+ unsigned char data[32768];
21
+ } build_idx_context_t;
22
+
23
+ int ug_write_index(FILE *file, uint64_t time, uint64_t offset, char *data, uint32_t data_size);
24
+ int ug_get_last_index_entry(FILE *file, struct ug_index *idx);
25
+ void ug_seek_to_timestamp(FILE *log, FILE *idx, uint64_t time, struct ug_index *param_idx);
26
+ char *ug_get_index_fname(char *log_fname);
27
+