ultragrep 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,200 @@
1
+ #include <stdio.h>
2
+ #include <string.h>
3
+ #include <time.h>
4
+ #include "pcre.h"
5
+ #include "request.h"
6
+ #include "req_matcher.h"
7
+
8
+
9
+ typedef struct {
10
+ req_matcher_t base;
11
+
12
+ on_req on_request;
13
+ on_err on_error;
14
+ void* arg;
15
+
16
+ request_t* curr_req;
17
+ request_t* top;
18
+
19
+ int depth; //debug
20
+
21
+ int stop_requested;
22
+ }work_req_matcher_t;
23
+
24
+
25
+ static void on_request(work_req_matcher_t* m, request_t* r) {
26
+ if(r) {
27
+ if(r->lines > 0 && m->on_request) {
28
+ m->on_request(r, m->arg);
29
+ }
30
+
31
+ //disconnect
32
+ if(r->next) {
33
+ r->next->prev = r->prev;
34
+ }
35
+ if(r->prev) {
36
+ r->prev->next = r->next;
37
+ } else {
38
+ m->top = r->next;
39
+ }
40
+
41
+ free_request(r);
42
+ m->depth--;
43
+ }
44
+ }
45
+
46
+ static void on_all_requests(work_req_matcher_t* m) {
47
+ request_t* r = m->top;
48
+ while(r) {
49
+ on_request(m, r);
50
+ r = m->top;
51
+ }
52
+ }
53
+
54
+ static void work_stop(req_matcher_t* base) {
55
+ work_req_matcher_t* m = (work_req_matcher_t*)base;
56
+ m->stop_requested = 1;
57
+ }
58
+
59
+
60
+ static char* extract_session(char* line, ssize_t line_size) {
61
+ int matched = 0;
62
+ int ovector[30];
63
+ char *session_buf;
64
+ const char* error;
65
+ int erroffset;
66
+ static pcre* regex = NULL;
67
+ if(regex == NULL) {
68
+ regex = pcre_compile("\"(\\w{6}:\\w{6})\"", 0, &error, &erroffset, NULL);
69
+ }
70
+ matched = pcre_exec(regex, NULL, line, line_size, 0, 0, ovector, 30);
71
+ if(matched > 0) {
72
+ pcre_get_substring(line, ovector, matched, 1, (const char **)&session_buf);
73
+ return(session_buf);
74
+ }
75
+ return NULL;
76
+ }
77
+
78
+ static int parse_req_time(char* line, ssize_t line_size, time_t* time) {
79
+ int matched = 0;
80
+ int ovector[30];
81
+ char *date_buf;
82
+ struct tm request_tm;
83
+ time_t tv;
84
+ const char* error;
85
+ int erroffset;
86
+ static pcre* regex = NULL;
87
+
88
+ if(regex == NULL) {
89
+ regex = pcre_compile("\"(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2})\"", 0, &error, &erroffset, NULL);
90
+ }
91
+ matched = pcre_exec(regex, NULL, line, line_size,0,0,ovector, 30);
92
+ if(matched > 0) {
93
+ pcre_get_substring(line, ovector, matched, 1, (const char **)&date_buf);
94
+ strptime(date_buf, "%Y-%m-%d %H:%M:%S", &request_tm);
95
+ free(date_buf);
96
+
97
+ *time = mktime(&request_tm);
98
+ return(1);
99
+ }
100
+ return(-1);
101
+ }
102
+
103
+ static int detect_end(char* line, ssize_t line_size) {
104
+ int matched = 0;
105
+ int ovector[30];
106
+ char *session_buf;
107
+ const char* error;
108
+ int erroffset;
109
+ static pcre* regex = NULL;
110
+ if(regex == NULL) {
111
+ regex = pcre_compile("\"Finished this session\"", 0, &error, &erroffset, NULL);
112
+ }
113
+ matched = pcre_exec(regex, NULL, line, line_size,0,0,ovector, 30);
114
+ return matched;
115
+ }
116
+
117
+ static int session_match(request_t* r, char* s) {
118
+ if(strcmp(r->session, s) == 0) {
119
+ return 1;
120
+ }
121
+ return 0;
122
+ }
123
+
124
+ static int work_process_line(req_matcher_t* base, char *line, ssize_t line_size, off_t offset)
125
+ {
126
+ work_req_matcher_t* m = (work_req_matcher_t*)base;
127
+ char* session_str;
128
+ int matched=0;
129
+ request_t* r;
130
+
131
+ if((m->stop_requested) || (line_size == -1)) {
132
+ on_all_requests(m);
133
+ return((m->stop_requested)?STOP_SIGNAL:EOF_REACHED);
134
+ }
135
+
136
+ session_str = extract_session(line, line_size);
137
+
138
+ r = m->top;
139
+ if(session_str != NULL) {
140
+ if(r && r->next == NULL && r->session == NULL) {
141
+ //The only req we have is sessionless
142
+ on_request(m, r);
143
+ r = NULL;
144
+ //Finish and start afresh
145
+ }
146
+
147
+ //Find the correct req
148
+ while(r && !session_match(r, session_str)){
149
+ r = r->next;
150
+ }
151
+ }//else it goes on to the top
152
+
153
+ if(!r){
154
+ r = alloc_request();
155
+ //This is now new top request
156
+ if(m->top) {
157
+ r->next = m->top;
158
+ m->top->prev = r;
159
+ }
160
+ m->top = r;
161
+ r->session = session_str;
162
+
163
+ m->depth++;
164
+ }else {
165
+ free(session_str);
166
+ }
167
+
168
+ add_to_request(r, line, offset);
169
+
170
+ if(r->time == 0) {
171
+ parse_req_time(line, line_size, &(r->time));
172
+ }
173
+
174
+ if(r->session != NULL) {
175
+ matched = detect_end(line, line_size);
176
+ if(matched >0) {
177
+ on_request(m, r);
178
+ }
179
+ }
180
+
181
+ return(0);
182
+ }
183
+
184
+ req_matcher_t* work_req_matcher(on_req fn1, on_err fn2, void* arg)
185
+ {
186
+ work_req_matcher_t* m = (work_req_matcher_t*)malloc(sizeof(work_req_matcher_t));
187
+ req_matcher_t* base = (req_matcher_t*)m;
188
+
189
+ m->on_request = fn1;
190
+ m->on_error = fn2;
191
+ m->arg = arg;
192
+
193
+ m->stop_requested = 0;
194
+ m->curr_req = NULL;
195
+
196
+ base->process_line = &work_process_line;
197
+ base->stop = &work_stop;
198
+
199
+ return base;
200
+ }
@@ -0,0 +1,6 @@
1
+ #ifndef __WORK_REQ_H__
2
+ #define __WORK_REQ_H__
3
+ #include "req_matcher.h"
4
+
5
+ req_matcher_t* work_req_matcher(on_req fn1, on_err fn2, void* arg);
6
+ #endif
@@ -0,0 +1,291 @@
1
+ /* zran.c -- example of zlib/gzip stream indexing and random access
2
+ * Copyright (C) 2005 Mark Adler
3
+ * For conditions of distribution and use, see copyright notice in zlib.h
4
+ Version 1.0 29 May 2005 Mark Adler */
5
+
6
+ /* Illustrate the use of Z_BLOCK, inflatePrime(), and inflateSetDictionary()
7
+ for random access of a compressed file. A file containing a zlib or gzip
8
+ stream is provided on the command line. The compressed stream is decoded in
9
+ its entirety, and an index built with access points about every SPAN bytes
10
+ in the uncompressed output. The compressed file is left open, and can then
11
+ be read randomly, having to decompress on the average SPAN/2 uncompressed
12
+ bytes before getting to the desired block of data.
13
+
14
+ An access point can be created at the start of any deflate block, by saving
15
+ the starting file offset and bit of that block, and the 32K bytes of
16
+ uncompressed data that precede that block. Also the uncompressed offset of
17
+ that block is saved to provide a referece for locating a desired starting
18
+ point in the uncompressed stream. build_index() works by decompressing the
19
+ input zlib or gzip stream a block at a time, and at the end of each block
20
+ deciding if enough uncompressed data has gone by to justify the creation of
21
+ a new access point. If so, that point is saved in a data structure that
22
+ grows as needed to accommodate the points.
23
+
24
+ To use the index, an offset in the uncompressed data is provided, for which
25
+ the latest accees point at or preceding that offset is located in the index.
26
+ The input file is positioned to the specified location in the index, and if
27
+ necessary the first few bits of the compressed data is read from the file.
28
+ inflate is initialized with those bits and the 32K of uncompressed data, and
29
+ the decompression then proceeds until the desired offset in the file is
30
+ reached. Then the decompression continues to read the desired uncompressed
31
+ data from the file.
32
+ */
33
+
34
+ #include <stdio.h>
35
+ #include <stdlib.h>
36
+ #include <string.h>
37
+ #include "zlib.h"
38
+ #include "ug_index.h"
39
+
40
+ #define WINSIZE 32768U /* sliding window size */
41
+ #define CHUNK 16384 /* file input buffer size */
42
+
43
+
44
+ /* Make one entire pass through the compressed stream and build an index, with
45
+ access points about every span bytes of uncompressed output -- span is
46
+ chosen to balance the speed of random access against the memory requirements
47
+ of the list, about 32K bytes per access point. Note that data after the end
48
+ of the first zlib or gzip stream in the file is ignored. build_index()
49
+ returns the number of access points on success (>= 1), Z_MEM_ERROR for out
50
+ of memory, Z_DATA_ERROR for an error in the input file, or Z_ERRNO for a
51
+ file read error. On success, *built points to the resulting index. */
52
+
53
+ int build_gz_index(build_idx_context_t *cxt)
54
+ {
55
+ int ret, last_line_size;
56
+ off_t totin;
57
+ uint64_t idx_offset;
58
+ z_stream strm;
59
+ unsigned char input[CHUNK];
60
+ unsigned char window[WINSIZE];
61
+ unsigned char *start, *p, *output, *output_ptr;
62
+
63
+ start = p = window;
64
+ output = output_ptr = NULL;
65
+
66
+ bzero(&strm, sizeof(z_stream));
67
+ ret = inflateInit2(&strm, 47); /* automatic zlib or gzip decoding */
68
+ if (ret != Z_OK)
69
+ return ret;
70
+
71
+ /* inflate the input, maintain a sliding window, and build an index -- this
72
+ also validates the integrity of the compressed data using the check
73
+ information at the end of the gzip or zlib stream */
74
+ totin = 0;
75
+ strm.avail_out = 0;
76
+ do {
77
+ /* get some compressed data from input file */
78
+ strm.avail_in = fread(input, 1, CHUNK, cxt->log);
79
+ if (ferror(cxt->log)) {
80
+ ret = Z_ERRNO;
81
+ goto build_index_error;
82
+ }
83
+ if (strm.avail_in == 0) {
84
+ ret = Z_DATA_ERROR;
85
+ goto build_index_error;
86
+ }
87
+ strm.next_in = input;
88
+
89
+ /* process all of that, or until end of stream */
90
+ do {
91
+ /* reset sliding window if necessary */
92
+ if (strm.avail_out == 0) {
93
+ strm.avail_out = WINSIZE;
94
+ strm.next_out = window;
95
+ }
96
+
97
+ /* inflate until out of input, output, or at end of block --
98
+ update the total input and output counters */
99
+ totin += strm.avail_in;
100
+ ret = inflate(&strm, Z_BLOCK); /* return at end of block */
101
+ totin -= strm.avail_in;
102
+ if (ret == Z_NEED_DICT)
103
+ ret = Z_DATA_ERROR;
104
+ if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
105
+ goto build_index_error;
106
+ if (ret == Z_STREAM_END)
107
+ break;
108
+
109
+ /*
110
+ * at the end of a gzip block we reset our context information, so if handle_request
111
+ * decides to add an index somewhere inside this block we can have an index to the gzip block.
112
+ *
113
+ * note that we store the bit offset in the high byte of the offset field in the index.
114
+ *
115
+ * a data_type of 64 means done with the "last block" -- we might index here. not sure.
116
+ */
117
+
118
+ if ((strm.data_type & 128) && !(strm.data_type & 64) && strm.total_out > 0 ) {
119
+ idx_offset = (((uint64_t) strm.data_type & 7) << 56);
120
+ idx_offset |= (totin & 0x00FFFFFFFFFFFFFF);
121
+
122
+
123
+ /* if there's room left in the buffer copy from middle -> end of buffer */
124
+ if (strm.avail_out)
125
+ memcpy(cxt->data, window + WINSIZE - strm.avail_out, strm.avail_out);
126
+
127
+ /* copy from beginning -> middle of buffer if needed */
128
+ if (strm.avail_out < WINSIZE)
129
+ memcpy(cxt->data + strm.avail_out, window, WINSIZE - strm.avail_out);
130
+
131
+ cxt->data_size = WINSIZE;
132
+ }
133
+
134
+ for(;;) {
135
+ int output_len;
136
+ p = start;
137
+
138
+ while ( (*p != '\n') && ((p - window) < (WINSIZE - strm.avail_out)) )
139
+ p++;
140
+
141
+ output_len = output_ptr - output;
142
+ output = realloc(output, (p - start) + output_len + 1);
143
+ output_ptr = output + output_len;
144
+
145
+ strncpy(output_ptr, start, p - start);
146
+ output_ptr += p - start;
147
+
148
+ if ( p == (window + (WINSIZE - strm.avail_out)) ) {
149
+ /* end of buffer or available data, don't pass along to request matching, save for later */
150
+ if ( strm.avail_out == 0 ) /* wrap to start of buffer */
151
+ start = window;
152
+ else
153
+ start = window + (WINSIZE - strm.avail_out);
154
+
155
+ break;
156
+ } else {
157
+ /* p should be a newline */
158
+ *output_ptr = '\0';
159
+ puts(output);
160
+
161
+ // funcall
162
+ free(output);
163
+ output = output_ptr = NULL;
164
+ start = p + 1;
165
+ }
166
+ }
167
+ } while (strm.avail_in != 0);
168
+ } while (ret != Z_STREAM_END);
169
+
170
+ /* clean up and return index (release unused entries in list) */
171
+ (void)inflateEnd(&strm);
172
+ return 0;
173
+
174
+ /* return error */
175
+ build_index_error:
176
+ (void)inflateEnd(&strm);
177
+ return ret;
178
+ }
179
+
180
+ #if 0
181
+ /* Use the index to read len bytes from offset into buf, return bytes read or
182
+ negative for error (Z_DATA_ERROR or Z_MEM_ERROR). If data is requested past
183
+ the end of the uncompressed data, then extract() will return a value less
184
+ than len, indicating how much as actually read into buf. This function
185
+ should not return a data error unless the file was modified since the index
186
+ was generated. extract() may also return Z_ERRNO if there is an error on
187
+ reading or seeking the input file. */
188
+ int extract(FILE *in, struct access *index, off_t offset,
189
+ unsigned char *buf, int len)
190
+ {
191
+ int ret, skip;
192
+ z_stream strm;
193
+ struct point *here;
194
+ unsigned char input[CHUNK];
195
+ unsigned char discard[WINSIZE];
196
+
197
+ /* proceed only if something reasonable to do */
198
+ if (len < 0)
199
+ return 0;
200
+
201
+ /* find where in stream to start */
202
+ here = index->list;
203
+ ret = index->have;
204
+ while (--ret && here[1].out <= offset)
205
+ here++;
206
+
207
+ /* initialize file and inflate state to start there */
208
+ strm.zalloc = Z_NULL;
209
+ strm.zfree = Z_NULL;
210
+ strm.opaque = Z_NULL;
211
+ strm.avail_in = 0;
212
+ strm.next_in = Z_NULL;
213
+ ret = inflateInit2(&strm, -15); /* raw inflate */
214
+ if (ret != Z_OK)
215
+ return ret;
216
+ ret = fseeko(in, here->in - (here->bits ? 1 : 0), SEEK_SET);
217
+ if (ret == -1)
218
+ goto extract_ret;
219
+ if (here->bits) {
220
+ ret = getc(in);
221
+ if (ret == -1) {
222
+ ret = ferror(in) ? Z_ERRNO : Z_DATA_ERROR;
223
+ goto extract_ret;
224
+ }
225
+ (void)inflatePrime(&strm, here->bits, ret >> (8 - here->bits));
226
+ }
227
+ (void)inflateSetDictionary(&strm, here->window, WINSIZE);
228
+
229
+ /* skip uncompressed bytes until offset reached, then satisfy request */
230
+ offset -= here->out;
231
+ strm.avail_in = 0;
232
+ skip = 1; /* while skipping to offset */
233
+ do {
234
+ /* define where to put uncompressed data, and how much */
235
+ if (offset == 0 && skip) { /* at offset now */
236
+ strm.avail_out = len;
237
+ strm.next_out = buf;
238
+ skip = 0; /* only do this once */
239
+ }
240
+ if (offset > WINSIZE) { /* skip WINSIZE bytes */
241
+ strm.avail_out = WINSIZE;
242
+ strm.next_out = discard;
243
+ offset -= WINSIZE;
244
+ }
245
+ else if (offset != 0) { /* last skip */
246
+ strm.avail_out = (unsigned)offset;
247
+ strm.next_out = discard;
248
+ offset = 0;
249
+ }
250
+
251
+ /* uncompress until avail_out filled, or end of stream */
252
+ do {
253
+ if (strm.avail_in == 0) {
254
+ strm.avail_in = fread(input, 1, CHUNK, in);
255
+ if (ferror(in)) {
256
+ ret = Z_ERRNO;
257
+ goto extract_ret;
258
+ }
259
+ if (strm.avail_in == 0) {
260
+ ret = Z_DATA_ERROR;
261
+ goto extract_ret;
262
+ }
263
+ strm.next_in = input;
264
+ }
265
+ ret = inflate(&strm, Z_NO_FLUSH); /* normal inflate */
266
+ if (ret == Z_NEED_DICT)
267
+ ret = Z_DATA_ERROR;
268
+ if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
269
+ goto extract_ret;
270
+ if (ret == Z_STREAM_END)
271
+ break;
272
+ } while (strm.avail_out != 0);
273
+
274
+ /* if reach end of stream, then don't keep trying to get more */
275
+ if (ret == Z_STREAM_END)
276
+ break;
277
+
278
+ /* do until offset reached and requested data read, or stream ends */
279
+ } while (skip);
280
+
281
+ /* compute number of uncompressed bytes read after offset */
282
+ ret = skip ? 0 : len - strm.avail_out;
283
+
284
+ /* clean up and return bytes read or error */
285
+ extract_ret:
286
+ (void)inflateEnd(&strm);
287
+ return ret;
288
+ }
289
+ #endif
290
+
291
+
@@ -0,0 +1,47 @@
1
+ module Ultragrep
2
+ class Config
3
+ DEFAULT_LOCATIONS = [".ultragrep.yml", "#{ENV['HOME']}/.ultragrep.yml", "/etc/ultragrep.yml"]
4
+ def initialize(config_location)
5
+ @config_location = config_location
6
+ parse!
7
+ end
8
+
9
+ def find_file!
10
+ if @config_location && !File.exist?(@config_location)
11
+ abort("#{@config_location} not found")
12
+ end
13
+ file = ([@config_location] + DEFAULT_LOCATIONS).compact.detect { |fname| File.exist?(fname) }
14
+ abort("Please configure ultragrep.yml (#{DEFAULT_LOCATIONS.join(", ")})") unless file
15
+ file
16
+ end
17
+
18
+ def parse!
19
+ @data = YAML.load_file(find_file!)
20
+ end
21
+
22
+ def [](val)
23
+ @data[val]
24
+ end
25
+
26
+ def fetch(*args)
27
+ @data.fetch(*args)
28
+ end
29
+
30
+ def default_file_type
31
+ @data.fetch('default_type')
32
+ end
33
+
34
+ def log_path_glob(type)
35
+ Array(types.fetch(type).fetch('glob'))
36
+ end
37
+
38
+ def types
39
+ raise "Please configure the 'types' section of ultragrep.yml" unless @data["types"]
40
+ @data["types"]
41
+ end
42
+
43
+ def available_types
44
+ types.keys
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,3 @@
1
+ module Ultragrep
2
+ VERSION = "0.1.0"
3
+ end