ultragrep 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,200 @@
1
+ #include <stdio.h>
2
+ #include <string.h>
3
+ #include <time.h>
4
+ #include "pcre.h"
5
+ #include "request.h"
6
+ #include "req_matcher.h"
7
+
8
+
9
+ typedef struct {
10
+ req_matcher_t base;
11
+
12
+ on_req on_request;
13
+ on_err on_error;
14
+ void* arg;
15
+
16
+ request_t* curr_req;
17
+ request_t* top;
18
+
19
+ int depth; //debug
20
+
21
+ int stop_requested;
22
+ }work_req_matcher_t;
23
+
24
+
25
+ static void on_request(work_req_matcher_t* m, request_t* r) {
26
+ if(r) {
27
+ if(r->lines > 0 && m->on_request) {
28
+ m->on_request(r, m->arg);
29
+ }
30
+
31
+ //disconnect
32
+ if(r->next) {
33
+ r->next->prev = r->prev;
34
+ }
35
+ if(r->prev) {
36
+ r->prev->next = r->next;
37
+ } else {
38
+ m->top = r->next;
39
+ }
40
+
41
+ free_request(r);
42
+ m->depth--;
43
+ }
44
+ }
45
+
46
+ static void on_all_requests(work_req_matcher_t* m) {
47
+ request_t* r = m->top;
48
+ while(r) {
49
+ on_request(m, r);
50
+ r = m->top;
51
+ }
52
+ }
53
+
54
+ static void work_stop(req_matcher_t* base) {
55
+ work_req_matcher_t* m = (work_req_matcher_t*)base;
56
+ m->stop_requested = 1;
57
+ }
58
+
59
+
60
+ static char* extract_session(char* line, ssize_t line_size) {
61
+ int matched = 0;
62
+ int ovector[30];
63
+ char *session_buf;
64
+ const char* error;
65
+ int erroffset;
66
+ static pcre* regex = NULL;
67
+ if(regex == NULL) {
68
+ regex = pcre_compile("\"(\\w{6}:\\w{6})\"", 0, &error, &erroffset, NULL);
69
+ }
70
+ matched = pcre_exec(regex, NULL, line, line_size, 0, 0, ovector, 30);
71
+ if(matched > 0) {
72
+ pcre_get_substring(line, ovector, matched, 1, (const char **)&session_buf);
73
+ return(session_buf);
74
+ }
75
+ return NULL;
76
+ }
77
+
78
+ static int parse_req_time(char* line, ssize_t line_size, time_t* time) {
79
+ int matched = 0;
80
+ int ovector[30];
81
+ char *date_buf;
82
+ struct tm request_tm;
83
+ time_t tv;
84
+ const char* error;
85
+ int erroffset;
86
+ static pcre* regex = NULL;
87
+
88
+ if(regex == NULL) {
89
+ regex = pcre_compile("\"(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2})\"", 0, &error, &erroffset, NULL);
90
+ }
91
+ matched = pcre_exec(regex, NULL, line, line_size,0,0,ovector, 30);
92
+ if(matched > 0) {
93
+ pcre_get_substring(line, ovector, matched, 1, (const char **)&date_buf);
94
+ strptime(date_buf, "%Y-%m-%d %H:%M:%S", &request_tm);
95
+ free(date_buf);
96
+
97
+ *time = mktime(&request_tm);
98
+ return(1);
99
+ }
100
+ return(-1);
101
+ }
102
+
103
+ static int detect_end(char* line, ssize_t line_size) {
104
+ int matched = 0;
105
+ int ovector[30];
106
+ char *session_buf;
107
+ const char* error;
108
+ int erroffset;
109
+ static pcre* regex = NULL;
110
+ if(regex == NULL) {
111
+ regex = pcre_compile("\"Finished this session\"", 0, &error, &erroffset, NULL);
112
+ }
113
+ matched = pcre_exec(regex, NULL, line, line_size,0,0,ovector, 30);
114
+ return matched;
115
+ }
116
+
117
+ static int session_match(request_t* r, char* s) {
118
+ if(strcmp(r->session, s) == 0) {
119
+ return 1;
120
+ }
121
+ return 0;
122
+ }
123
+
124
+ static int work_process_line(req_matcher_t* base, char *line, ssize_t line_size, off_t offset)
125
+ {
126
+ work_req_matcher_t* m = (work_req_matcher_t*)base;
127
+ char* session_str;
128
+ int matched=0;
129
+ request_t* r;
130
+
131
+ if((m->stop_requested) || (line_size == -1)) {
132
+ on_all_requests(m);
133
+ return((m->stop_requested)?STOP_SIGNAL:EOF_REACHED);
134
+ }
135
+
136
+ session_str = extract_session(line, line_size);
137
+
138
+ r = m->top;
139
+ if(session_str != NULL) {
140
+ if(r && r->next == NULL && r->session == NULL) {
141
+ //The only req we have is sessionless
142
+ on_request(m, r);
143
+ r = NULL;
144
+ //Finish and start afresh
145
+ }
146
+
147
+ //Find the correct req
148
+ while(r && !session_match(r, session_str)){
149
+ r = r->next;
150
+ }
151
+ }//else it goes on to the top
152
+
153
+ if(!r){
154
+ r = alloc_request();
155
+ //This is now new top request
156
+ if(m->top) {
157
+ r->next = m->top;
158
+ m->top->prev = r;
159
+ }
160
+ m->top = r;
161
+ r->session = session_str;
162
+
163
+ m->depth++;
164
+ }else {
165
+ free(session_str);
166
+ }
167
+
168
+ add_to_request(r, line, offset);
169
+
170
+ if(r->time == 0) {
171
+ parse_req_time(line, line_size, &(r->time));
172
+ }
173
+
174
+ if(r->session != NULL) {
175
+ matched = detect_end(line, line_size);
176
+ if(matched >0) {
177
+ on_request(m, r);
178
+ }
179
+ }
180
+
181
+ return(0);
182
+ }
183
+
184
+ req_matcher_t* work_req_matcher(on_req fn1, on_err fn2, void* arg)
185
+ {
186
+ work_req_matcher_t* m = (work_req_matcher_t*)malloc(sizeof(work_req_matcher_t));
187
+ req_matcher_t* base = (req_matcher_t*)m;
188
+
189
+ m->on_request = fn1;
190
+ m->on_error = fn2;
191
+ m->arg = arg;
192
+
193
+ m->stop_requested = 0;
194
+ m->curr_req = NULL;
195
+
196
+ base->process_line = &work_process_line;
197
+ base->stop = &work_stop;
198
+
199
+ return base;
200
+ }
@@ -0,0 +1,6 @@
1
+ #ifndef __WORK_REQ_H__
2
+ #define __WORK_REQ_H__
3
+ #include "req_matcher.h"
4
+
5
+ req_matcher_t* work_req_matcher(on_req fn1, on_err fn2, void* arg);
6
+ #endif
@@ -0,0 +1,291 @@
1
+ /* zran.c -- example of zlib/gzip stream indexing and random access
2
+ * Copyright (C) 2005 Mark Adler
3
+ * For conditions of distribution and use, see copyright notice in zlib.h
4
+ Version 1.0 29 May 2005 Mark Adler */
5
+
6
+ /* Illustrate the use of Z_BLOCK, inflatePrime(), and inflateSetDictionary()
7
+ for random access of a compressed file. A file containing a zlib or gzip
8
+ stream is provided on the command line. The compressed stream is decoded in
9
+ its entirety, and an index built with access points about every SPAN bytes
10
+ in the uncompressed output. The compressed file is left open, and can then
11
+ be read randomly, having to decompress on the average SPAN/2 uncompressed
12
+ bytes before getting to the desired block of data.
13
+
14
+ An access point can be created at the start of any deflate block, by saving
15
+ the starting file offset and bit of that block, and the 32K bytes of
16
+ uncompressed data that precede that block. Also the uncompressed offset of
17
+ that block is saved to provide a referece for locating a desired starting
18
+ point in the uncompressed stream. build_index() works by decompressing the
19
+ input zlib or gzip stream a block at a time, and at the end of each block
20
+ deciding if enough uncompressed data has gone by to justify the creation of
21
+ a new access point. If so, that point is saved in a data structure that
22
+ grows as needed to accommodate the points.
23
+
24
+ To use the index, an offset in the uncompressed data is provided, for which
25
+ the latest accees point at or preceding that offset is located in the index.
26
+ The input file is positioned to the specified location in the index, and if
27
+ necessary the first few bits of the compressed data is read from the file.
28
+ inflate is initialized with those bits and the 32K of uncompressed data, and
29
+ the decompression then proceeds until the desired offset in the file is
30
+ reached. Then the decompression continues to read the desired uncompressed
31
+ data from the file.
32
+ */
33
+
34
+ #include <stdio.h>
35
+ #include <stdlib.h>
36
+ #include <string.h>
37
+ #include "zlib.h"
38
+ #include "ug_index.h"
39
+
40
+ #define WINSIZE 32768U /* sliding window size */
41
+ #define CHUNK 16384 /* file input buffer size */
42
+
43
+
44
+ /* Make one entire pass through the compressed stream and build an index, with
45
+ access points about every span bytes of uncompressed output -- span is
46
+ chosen to balance the speed of random access against the memory requirements
47
+ of the list, about 32K bytes per access point. Note that data after the end
48
+ of the first zlib or gzip stream in the file is ignored. build_index()
49
+ returns the number of access points on success (>= 1), Z_MEM_ERROR for out
50
+ of memory, Z_DATA_ERROR for an error in the input file, or Z_ERRNO for a
51
+ file read error. On success, *built points to the resulting index. */
52
+
53
+ int build_gz_index(build_idx_context_t *cxt)
54
+ {
55
+ int ret, last_line_size;
56
+ off_t totin;
57
+ uint64_t idx_offset;
58
+ z_stream strm;
59
+ unsigned char input[CHUNK];
60
+ unsigned char window[WINSIZE];
61
+ unsigned char *start, *p, *output, *output_ptr;
62
+
63
+ start = p = window;
64
+ output = output_ptr = NULL;
65
+
66
+ bzero(&strm, sizeof(z_stream));
67
+ ret = inflateInit2(&strm, 47); /* automatic zlib or gzip decoding */
68
+ if (ret != Z_OK)
69
+ return ret;
70
+
71
+ /* inflate the input, maintain a sliding window, and build an index -- this
72
+ also validates the integrity of the compressed data using the check
73
+ information at the end of the gzip or zlib stream */
74
+ totin = 0;
75
+ strm.avail_out = 0;
76
+ do {
77
+ /* get some compressed data from input file */
78
+ strm.avail_in = fread(input, 1, CHUNK, cxt->log);
79
+ if (ferror(cxt->log)) {
80
+ ret = Z_ERRNO;
81
+ goto build_index_error;
82
+ }
83
+ if (strm.avail_in == 0) {
84
+ ret = Z_DATA_ERROR;
85
+ goto build_index_error;
86
+ }
87
+ strm.next_in = input;
88
+
89
+ /* process all of that, or until end of stream */
90
+ do {
91
+ /* reset sliding window if necessary */
92
+ if (strm.avail_out == 0) {
93
+ strm.avail_out = WINSIZE;
94
+ strm.next_out = window;
95
+ }
96
+
97
+ /* inflate until out of input, output, or at end of block --
98
+ update the total input and output counters */
99
+ totin += strm.avail_in;
100
+ ret = inflate(&strm, Z_BLOCK); /* return at end of block */
101
+ totin -= strm.avail_in;
102
+ if (ret == Z_NEED_DICT)
103
+ ret = Z_DATA_ERROR;
104
+ if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
105
+ goto build_index_error;
106
+ if (ret == Z_STREAM_END)
107
+ break;
108
+
109
+ /*
110
+ * at the end of a gzip block we reset our context information, so if handle_request
111
+ * decides to add an index somewhere inside this block we can have an index to the gzip block.
112
+ *
113
+ * note that we store the bit offset in the high byte of the offset field in the index.
114
+ *
115
+ * a data_type of 64 means done with the "last block" -- we might index here. not sure.
116
+ */
117
+
118
+ if ((strm.data_type & 128) && !(strm.data_type & 64) && strm.total_out > 0 ) {
119
+ idx_offset = (((uint64_t) strm.data_type & 7) << 56);
120
+ idx_offset |= (totin & 0x00FFFFFFFFFFFFFF);
121
+
122
+
123
+ /* if there's room left in the buffer copy from middle -> end of buffer */
124
+ if (strm.avail_out)
125
+ memcpy(cxt->data, window + WINSIZE - strm.avail_out, strm.avail_out);
126
+
127
+ /* copy from beginning -> middle of buffer if needed */
128
+ if (strm.avail_out < WINSIZE)
129
+ memcpy(cxt->data + strm.avail_out, window, WINSIZE - strm.avail_out);
130
+
131
+ cxt->data_size = WINSIZE;
132
+ }
133
+
134
+ for(;;) {
135
+ int output_len;
136
+ p = start;
137
+
138
+ while ( (*p != '\n') && ((p - window) < (WINSIZE - strm.avail_out)) )
139
+ p++;
140
+
141
+ output_len = output_ptr - output;
142
+ output = realloc(output, (p - start) + output_len + 1);
143
+ output_ptr = output + output_len;
144
+
145
+ strncpy(output_ptr, start, p - start);
146
+ output_ptr += p - start;
147
+
148
+ if ( p == (window + (WINSIZE - strm.avail_out)) ) {
149
+ /* end of buffer or available data, don't pass along to request matching, save for later */
150
+ if ( strm.avail_out == 0 ) /* wrap to start of buffer */
151
+ start = window;
152
+ else
153
+ start = window + (WINSIZE - strm.avail_out);
154
+
155
+ break;
156
+ } else {
157
+ /* p should be a newline */
158
+ *output_ptr = '\0';
159
+ puts(output);
160
+
161
+ // funcall
162
+ free(output);
163
+ output = output_ptr = NULL;
164
+ start = p + 1;
165
+ }
166
+ }
167
+ } while (strm.avail_in != 0);
168
+ } while (ret != Z_STREAM_END);
169
+
170
+ /* clean up and return index (release unused entries in list) */
171
+ (void)inflateEnd(&strm);
172
+ return 0;
173
+
174
+ /* return error */
175
+ build_index_error:
176
+ (void)inflateEnd(&strm);
177
+ return ret;
178
+ }
179
+
180
+ #if 0
181
+ /* Use the index to read len bytes from offset into buf, return bytes read or
182
+ negative for error (Z_DATA_ERROR or Z_MEM_ERROR). If data is requested past
183
+ the end of the uncompressed data, then extract() will return a value less
184
+ than len, indicating how much as actually read into buf. This function
185
+ should not return a data error unless the file was modified since the index
186
+ was generated. extract() may also return Z_ERRNO if there is an error on
187
+ reading or seeking the input file. */
188
+ int extract(FILE *in, struct access *index, off_t offset,
189
+ unsigned char *buf, int len)
190
+ {
191
+ int ret, skip;
192
+ z_stream strm;
193
+ struct point *here;
194
+ unsigned char input[CHUNK];
195
+ unsigned char discard[WINSIZE];
196
+
197
+ /* proceed only if something reasonable to do */
198
+ if (len < 0)
199
+ return 0;
200
+
201
+ /* find where in stream to start */
202
+ here = index->list;
203
+ ret = index->have;
204
+ while (--ret && here[1].out <= offset)
205
+ here++;
206
+
207
+ /* initialize file and inflate state to start there */
208
+ strm.zalloc = Z_NULL;
209
+ strm.zfree = Z_NULL;
210
+ strm.opaque = Z_NULL;
211
+ strm.avail_in = 0;
212
+ strm.next_in = Z_NULL;
213
+ ret = inflateInit2(&strm, -15); /* raw inflate */
214
+ if (ret != Z_OK)
215
+ return ret;
216
+ ret = fseeko(in, here->in - (here->bits ? 1 : 0), SEEK_SET);
217
+ if (ret == -1)
218
+ goto extract_ret;
219
+ if (here->bits) {
220
+ ret = getc(in);
221
+ if (ret == -1) {
222
+ ret = ferror(in) ? Z_ERRNO : Z_DATA_ERROR;
223
+ goto extract_ret;
224
+ }
225
+ (void)inflatePrime(&strm, here->bits, ret >> (8 - here->bits));
226
+ }
227
+ (void)inflateSetDictionary(&strm, here->window, WINSIZE);
228
+
229
+ /* skip uncompressed bytes until offset reached, then satisfy request */
230
+ offset -= here->out;
231
+ strm.avail_in = 0;
232
+ skip = 1; /* while skipping to offset */
233
+ do {
234
+ /* define where to put uncompressed data, and how much */
235
+ if (offset == 0 && skip) { /* at offset now */
236
+ strm.avail_out = len;
237
+ strm.next_out = buf;
238
+ skip = 0; /* only do this once */
239
+ }
240
+ if (offset > WINSIZE) { /* skip WINSIZE bytes */
241
+ strm.avail_out = WINSIZE;
242
+ strm.next_out = discard;
243
+ offset -= WINSIZE;
244
+ }
245
+ else if (offset != 0) { /* last skip */
246
+ strm.avail_out = (unsigned)offset;
247
+ strm.next_out = discard;
248
+ offset = 0;
249
+ }
250
+
251
+ /* uncompress until avail_out filled, or end of stream */
252
+ do {
253
+ if (strm.avail_in == 0) {
254
+ strm.avail_in = fread(input, 1, CHUNK, in);
255
+ if (ferror(in)) {
256
+ ret = Z_ERRNO;
257
+ goto extract_ret;
258
+ }
259
+ if (strm.avail_in == 0) {
260
+ ret = Z_DATA_ERROR;
261
+ goto extract_ret;
262
+ }
263
+ strm.next_in = input;
264
+ }
265
+ ret = inflate(&strm, Z_NO_FLUSH); /* normal inflate */
266
+ if (ret == Z_NEED_DICT)
267
+ ret = Z_DATA_ERROR;
268
+ if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
269
+ goto extract_ret;
270
+ if (ret == Z_STREAM_END)
271
+ break;
272
+ } while (strm.avail_out != 0);
273
+
274
+ /* if reach end of stream, then don't keep trying to get more */
275
+ if (ret == Z_STREAM_END)
276
+ break;
277
+
278
+ /* do until offset reached and requested data read, or stream ends */
279
+ } while (skip);
280
+
281
+ /* compute number of uncompressed bytes read after offset */
282
+ ret = skip ? 0 : len - strm.avail_out;
283
+
284
+ /* clean up and return bytes read or error */
285
+ extract_ret:
286
+ (void)inflateEnd(&strm);
287
+ return ret;
288
+ }
289
+ #endif
290
+
291
+
@@ -0,0 +1,47 @@
1
+ module Ultragrep
2
+ class Config
3
+ DEFAULT_LOCATIONS = [".ultragrep.yml", "#{ENV['HOME']}/.ultragrep.yml", "/etc/ultragrep.yml"]
4
+ def initialize(config_location)
5
+ @config_location = config_location
6
+ parse!
7
+ end
8
+
9
+ def find_file!
10
+ if @config_location && !File.exist?(@config_location)
11
+ abort("#{@config_location} not found")
12
+ end
13
+ file = ([@config_location] + DEFAULT_LOCATIONS).compact.detect { |fname| File.exist?(fname) }
14
+ abort("Please configure ultragrep.yml (#{DEFAULT_LOCATIONS.join(", ")})") unless file
15
+ file
16
+ end
17
+
18
+ def parse!
19
+ @data = YAML.load_file(find_file!)
20
+ end
21
+
22
+ def [](val)
23
+ @data[val]
24
+ end
25
+
26
+ def fetch(*args)
27
+ @data.fetch(*args)
28
+ end
29
+
30
+ def default_file_type
31
+ @data.fetch('default_type')
32
+ end
33
+
34
+ def log_path_glob(type)
35
+ Array(types.fetch(type).fetch('glob'))
36
+ end
37
+
38
+ def types
39
+ raise "Please configure the 'types' section of ultragrep.yml" unless @data["types"]
40
+ @data["types"]
41
+ end
42
+
43
+ def available_types
44
+ types.keys
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,3 @@
1
+ module Ultragrep
2
+ VERSION = "0.1.0"
3
+ end