RubyGems - ultragrep - Versions diffs - 0.1.0 - Mend

ultragrep 0.1.0

Files changed (20) hide show

data/bin/ultragrep +6 -0
data/ext/ultragrep/Makefile +39 -0
data/ext/ultragrep/extconf.rb +1 -0
data/ext/ultragrep/rails_req.c +102 -0
data/ext/ultragrep/rails_req.h +6 -0
data/ext/ultragrep/req_matcher.h +17 -0
data/ext/ultragrep/request.c +41 -0
data/ext/ultragrep/request.h +22 -0
data/ext/ultragrep/ug_build_index.c +99 -0
data/ext/ultragrep/ug_cat.c +46 -0
data/ext/ultragrep/ug_guts.c +138 -0
data/ext/ultragrep/ug_index.c +83 -0
data/ext/ultragrep/ug_index.h +27 -0
data/ext/ultragrep/work_req.c +200 -0
data/ext/ultragrep/work_req.h +6 -0
data/ext/ultragrep/zran.c +291 -0
data/lib/ultragrep/config.rb +47 -0
data/lib/ultragrep/version.rb +3 -0
data/lib/ultragrep.rb +348 -0
metadata +67 -0

data/ext/ultragrep/work_req.c ADDED Viewed

@@ -0,0 +1,200 @@
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include "pcre.h"
+#include "request.h"
+#include "req_matcher.h"
+typedef struct {
+    req_matcher_t base;
+    on_req on_request;
+    on_err on_error;
+    void* arg;
+    request_t* curr_req;
+    request_t* top;
+    int depth; //debug
+    int stop_requested;
+}work_req_matcher_t;
+static void on_request(work_req_matcher_t* m, request_t* r) {
+    if(r) {
+        if(r->lines > 0 && m->on_request) {
+            m->on_request(r, m->arg);
+        }
+        //disconnect
+        if(r->next) {
+            r->next->prev = r->prev;
+        }
+        if(r->prev) {
+            r->prev->next = r->next;
+        } else {
+            m->top = r->next;
+        }
+        free_request(r);
+        m->depth--;
+    }
+}
+static void on_all_requests(work_req_matcher_t* m) {
+    request_t* r = m->top;
+    while(r) {
+        on_request(m, r);
+        r = m->top;
+    }
+}
+static void work_stop(req_matcher_t* base) {
+    work_req_matcher_t* m = (work_req_matcher_t*)base;
+    m->stop_requested = 1;
+}
+static char* extract_session(char* line, ssize_t line_size) {
+    int matched = 0;
+    int ovector[30];
+    char *session_buf;
+    const char* error;
+    int erroffset;
+    static pcre* regex = NULL;
+    if(regex == NULL) {
+        regex = pcre_compile("\"(\\w{6}:\\w{6})\"", 0, &error, &erroffset, NULL);
+    }
+    matched = pcre_exec(regex, NULL, line, line_size, 0, 0, ovector, 30);
+    if(matched > 0) {
+        pcre_get_substring(line, ovector, matched, 1, (const char **)&session_buf);
+        return(session_buf);
+    }
+    return NULL;
+}
+static int parse_req_time(char* line, ssize_t line_size, time_t* time) {
+    int matched = 0;
+    int ovector[30];
+    char *date_buf;
+    struct tm request_tm;
+    time_t tv;
+    const char* error;
+    int erroffset;
+    static pcre* regex = NULL;
+    if(regex == NULL) {
+        regex = pcre_compile("\"(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2})\"", 0, &error, &erroffset, NULL);
+    }
+    matched = pcre_exec(regex, NULL, line, line_size,0,0,ovector, 30);
+    if(matched > 0) {
+        pcre_get_substring(line, ovector, matched, 1, (const char **)&date_buf);
+        strptime(date_buf, "%Y-%m-%d %H:%M:%S", &request_tm);
+        free(date_buf);
+        *time = mktime(&request_tm);
+        return(1);
+    }
+    return(-1);
+}
+static int detect_end(char* line, ssize_t line_size) {
+    int matched = 0;
+    int ovector[30];
+    char *session_buf;
+    const char* error;
+    int erroffset;
+    static pcre* regex = NULL;
+    if(regex == NULL) {
+        regex = pcre_compile("\"Finished this session\"", 0, &error, &erroffset, NULL);
+    }
+    matched = pcre_exec(regex, NULL, line, line_size,0,0,ovector, 30);
+    return matched;
+}
+static int session_match(request_t* r, char* s) {
+    if(strcmp(r->session, s) == 0) {
+        return 1;
+    }
+    return 0;
+}
+static int work_process_line(req_matcher_t* base, char *line, ssize_t line_size, off_t offset)
+{
+    work_req_matcher_t* m = (work_req_matcher_t*)base;
+    char* session_str;
+    int matched=0;
+    request_t* r;
+    if((m->stop_requested) || (line_size == -1)) {
+        on_all_requests(m);
+        return((m->stop_requested)?STOP_SIGNAL:EOF_REACHED);
+    }
+    session_str = extract_session(line, line_size);
+    r = m->top;
+    if(session_str != NULL) {
+        if(r && r->next == NULL && r->session == NULL) {
+            //The only req we have is sessionless
+            on_request(m, r);
+            r = NULL;
+            //Finish and start afresh
+        }
+        //Find the correct req
+        while(r && !session_match(r, session_str)){
+            r = r->next;
+        }
+    }//else it goes on to the top
+    if(!r){
+        r = alloc_request();
+        //This is now new top request
+        if(m->top) {
+            r->next = m->top;
+            m->top->prev = r;
+        }
+        m->top = r;
+        r->session = session_str;
+        m->depth++;
+    }else {
+        free(session_str);
+    }
+    add_to_request(r, line, offset);
+    if(r->time == 0) {
+        parse_req_time(line, line_size, &(r->time));
+    }
+    if(r->session != NULL) {
+        matched = detect_end(line, line_size);
+        if(matched >0) {
+            on_request(m, r);
+        }
+    }
+    return(0);
+}
+req_matcher_t* work_req_matcher(on_req fn1, on_err fn2, void* arg)
+{
+    work_req_matcher_t* m = (work_req_matcher_t*)malloc(sizeof(work_req_matcher_t));
+    req_matcher_t* base = (req_matcher_t*)m;
+    m->on_request = fn1;
+    m->on_error = fn2;
+    m->arg = arg;
+    m->stop_requested = 0;
+    m->curr_req = NULL;
+    base->process_line = &work_process_line;
+    base->stop = &work_stop;
+    return base;
+}

data/ext/ultragrep/work_req.h ADDED Viewed

@@ -0,0 +1,6 @@
+#ifndef __WORK_REQ_H__
+#define __WORK_REQ_H__
+#include "req_matcher.h"
+req_matcher_t* work_req_matcher(on_req fn1, on_err fn2, void* arg);
+#endif

data/ext/ultragrep/zran.c ADDED Viewed

@@ -0,0 +1,291 @@
+/* zran.c -- example of zlib/gzip stream indexing and random access
+ * Copyright (C) 2005 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+   Version 1.0  29 May 2005  Mark Adler */
+/* Illustrate the use of Z_BLOCK, inflatePrime(), and inflateSetDictionary()
+   for random access of a compressed file.  A file containing a zlib or gzip
+   stream is provided on the command line.  The compressed stream is decoded in
+   its entirety, and an index built with access points about every SPAN bytes
+   in the uncompressed output.  The compressed file is left open, and can then
+   be read randomly, having to decompress on the average SPAN/2 uncompressed
+   bytes before getting to the desired block of data.
+   An access point can be created at the start of any deflate block, by saving
+   the starting file offset and bit of that block, and the 32K bytes of
+   uncompressed data that precede that block.  Also the uncompressed offset of
+   that block is saved to provide a referece for locating a desired starting
+   point in the uncompressed stream.  build_index() works by decompressing the
+   input zlib or gzip stream a block at a time, and at the end of each block
+   deciding if enough uncompressed data has gone by to justify the creation of
+   a new access point.  If so, that point is saved in a data structure that
+   grows as needed to accommodate the points.
+   To use the index, an offset in the uncompressed data is provided, for which
+   the latest accees point at or preceding that offset is located in the index.
+   The input file is positioned to the specified location in the index, and if
+   necessary the first few bits of the compressed data is read from the file.
+   inflate is initialized with those bits and the 32K of uncompressed data, and
+   the decompression then proceeds until the desired offset in the file is
+   reached.  Then the decompression continues to read the desired uncompressed
+   data from the file.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "zlib.h"
+#include "ug_index.h"
+#define WINSIZE 32768U      /* sliding window size */
+#define CHUNK 16384         /* file input buffer size */
+/* Make one entire pass through the compressed stream and build an index, with
+   access points about every span bytes of uncompressed output -- span is
+   chosen to balance the speed of random access against the memory requirements
+   of the list, about 32K bytes per access point.  Note that data after the end
+   of the first zlib or gzip stream in the file is ignored.  build_index()
+   returns the number of access points on success (>= 1), Z_MEM_ERROR for out
+   of memory, Z_DATA_ERROR for an error in the input file, or Z_ERRNO for a
+   file read error.  On success, *built points to the resulting index. */
+int build_gz_index(build_idx_context_t *cxt)
+{
+    int ret, last_line_size;
+    off_t totin;
+    uint64_t idx_offset;
+    z_stream strm;
+    unsigned char input[CHUNK];
+    unsigned char window[WINSIZE];
+    unsigned char *start, *p, *output, *output_ptr;
+    start = p = window;
+    output = output_ptr = NULL;
+    bzero(&strm, sizeof(z_stream));
+    ret = inflateInit2(&strm, 47);      /* automatic zlib or gzip decoding */
+    if (ret != Z_OK)
+        return ret;
+    /* inflate the input, maintain a sliding window, and build an index -- this
+       also validates the integrity of the compressed data using the check
+       information at the end of the gzip or zlib stream */
+    totin = 0;
+    strm.avail_out = 0;
+    do {
+        /* get some compressed data from input file */
+        strm.avail_in = fread(input, 1, CHUNK, cxt->log);
+        if (ferror(cxt->log)) {
+            ret = Z_ERRNO;
+            goto build_index_error;
+        }
+        if (strm.avail_in == 0) {
+            ret = Z_DATA_ERROR;
+            goto build_index_error;
+        }
+        strm.next_in = input;
+        /* process all of that, or until end of stream */
+        do {
+            /* reset sliding window if necessary */
+            if (strm.avail_out == 0) {
+                strm.avail_out = WINSIZE;
+                strm.next_out = window;
+            }
+            /* inflate until out of input, output, or at end of block --
+               update the total input and output counters */
+            totin += strm.avail_in;
+            ret = inflate(&strm, Z_BLOCK);      /* return at end of block */
+            totin -= strm.avail_in;
+            if (ret == Z_NEED_DICT)
+                ret = Z_DATA_ERROR;
+            if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
+                goto build_index_error;
+            if (ret == Z_STREAM_END)
+                break;
+            /*
+             * at the end of a gzip block we reset our context information, so if handle_request
+             * decides to add an index somewhere inside this block we can have an index to the gzip block.
+             *
+             * note that we store the bit offset in the high byte of the offset field in the index.
+             *
+             * a data_type of 64 means done with the "last block" -- we might index here.  not sure.
+             */
+            if ((strm.data_type & 128) && !(strm.data_type & 64) && strm.total_out > 0 ) {
+                idx_offset = (((uint64_t) strm.data_type & 7) << 56);
+                idx_offset |= (totin & 0x00FFFFFFFFFFFFFF);
+                /* if there's room left in the buffer copy from middle -> end of buffer */
+                if (strm.avail_out)
+                    memcpy(cxt->data, window + WINSIZE - strm.avail_out, strm.avail_out);
+                /* copy from beginning -> middle of buffer if needed */
+                if (strm.avail_out < WINSIZE)
+                    memcpy(cxt->data + strm.avail_out, window, WINSIZE - strm.avail_out);
+                cxt->data_size = WINSIZE;
+            }
+            for(;;) {
+              int output_len;
+              p = start;
+              while ( (*p != '\n') && ((p - window) < (WINSIZE - strm.avail_out)) )
+                  p++;
+              output_len = output_ptr - output;
+              output = realloc(output, (p - start) + output_len + 1);
+              output_ptr = output + output_len;
+              strncpy(output_ptr, start, p - start);
+              output_ptr += p - start;
+              if ( p == (window + (WINSIZE - strm.avail_out))  ) {
+                /* end of buffer or available data, don't pass along to request matching, save for later */
+                if ( strm.avail_out == 0 ) /* wrap to start of buffer */
+                  start = window;
+                else
+                  start = window + (WINSIZE - strm.avail_out);
+                break;
+              } else {
+                /* p should be a newline */
+                *output_ptr = '\0';
+                puts(output);
+                // funcall
+                free(output);
+                output = output_ptr = NULL;
+                start = p + 1;
+              }
+           }
+        } while (strm.avail_in != 0);
+    } while (ret != Z_STREAM_END);
+    /* clean up and return index (release unused entries in list) */
+    (void)inflateEnd(&strm);
+    return 0;
+    /* return error */
+  build_index_error:
+    (void)inflateEnd(&strm);
+    return ret;
+}
+#if 0
+/* Use the index to read len bytes from offset into buf, return bytes read or
+   negative for error (Z_DATA_ERROR or Z_MEM_ERROR).  If data is requested past
+   the end of the uncompressed data, then extract() will return a value less
+   than len, indicating how much as actually read into buf.  This function
+   should not return a data error unless the file was modified since the index
+   was generated.  extract() may also return Z_ERRNO if there is an error on
+   reading or seeking the input file. */
+int extract(FILE *in, struct access *index, off_t offset,
+                  unsigned char *buf, int len)
+{
+    int ret, skip;
+    z_stream strm;
+    struct point *here;
+    unsigned char input[CHUNK];
+    unsigned char discard[WINSIZE];
+    /* proceed only if something reasonable to do */
+    if (len < 0)
+        return 0;
+    /* find where in stream to start */
+    here = index->list;
+    ret = index->have;
+    while (--ret && here[1].out <= offset)
+        here++;
+    /* initialize file and inflate state to start there */
+    strm.zalloc = Z_NULL;
+    strm.zfree = Z_NULL;
+    strm.opaque = Z_NULL;
+    strm.avail_in = 0;
+    strm.next_in = Z_NULL;
+    ret = inflateInit2(&strm, -15);         /* raw inflate */
+    if (ret != Z_OK)
+        return ret;
+    ret = fseeko(in, here->in - (here->bits ? 1 : 0), SEEK_SET);
+    if (ret == -1)
+        goto extract_ret;
+    if (here->bits) {
+        ret = getc(in);
+        if (ret == -1) {
+            ret = ferror(in) ? Z_ERRNO : Z_DATA_ERROR;
+            goto extract_ret;
+        }
+        (void)inflatePrime(&strm, here->bits, ret >> (8 - here->bits));
+    }
+    (void)inflateSetDictionary(&strm, here->window, WINSIZE);
+    /* skip uncompressed bytes until offset reached, then satisfy request */
+    offset -= here->out;
+    strm.avail_in = 0;
+    skip = 1;                               /* while skipping to offset */
+    do {
+        /* define where to put uncompressed data, and how much */
+        if (offset == 0 && skip) {          /* at offset now */
+            strm.avail_out = len;
+            strm.next_out = buf;
+            skip = 0;                       /* only do this once */
+        }
+        if (offset > WINSIZE) {             /* skip WINSIZE bytes */
+            strm.avail_out = WINSIZE;
+            strm.next_out = discard;
+            offset -= WINSIZE;
+        }
+        else if (offset != 0) {             /* last skip */
+            strm.avail_out = (unsigned)offset;
+            strm.next_out = discard;
+            offset = 0;
+        }
+        /* uncompress until avail_out filled, or end of stream */
+        do {
+            if (strm.avail_in == 0) {
+                strm.avail_in = fread(input, 1, CHUNK, in);
+                if (ferror(in)) {
+                    ret = Z_ERRNO;
+                    goto extract_ret;
+                }
+                if (strm.avail_in == 0) {
+                    ret = Z_DATA_ERROR;
+                    goto extract_ret;
+                }
+                strm.next_in = input;
+            }
+            ret = inflate(&strm, Z_NO_FLUSH);       /* normal inflate */
+            if (ret == Z_NEED_DICT)
+                ret = Z_DATA_ERROR;
+            if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
+                goto extract_ret;
+            if (ret == Z_STREAM_END)
+                break;
+        } while (strm.avail_out != 0);
+        /* if reach end of stream, then don't keep trying to get more */
+        if (ret == Z_STREAM_END)
+            break;
+        /* do until offset reached and requested data read, or stream ends */
+    } while (skip);
+    /* compute number of uncompressed bytes read after offset */
+    ret = skip ? 0 : len - strm.avail_out;
+    /* clean up and return bytes read or error */
+  extract_ret:
+    (void)inflateEnd(&strm);
+    return ret;
+}
+#endif

data/lib/ultragrep/config.rb ADDED Viewed

@@ -0,0 +1,47 @@
+module Ultragrep
+  class Config
+    DEFAULT_LOCATIONS = [".ultragrep.yml", "#{ENV['HOME']}/.ultragrep.yml", "/etc/ultragrep.yml"]
+    def initialize(config_location)
+      @config_location = config_location
+      parse!
+    end
+    def find_file!
+      if @config_location && !File.exist?(@config_location)
+        abort("#{@config_location} not found")
+      end
+      file = ([@config_location] + DEFAULT_LOCATIONS).compact.detect { |fname| File.exist?(fname) }
+      abort("Please configure ultragrep.yml (#{DEFAULT_LOCATIONS.join(", ")})") unless file
+      file
+    end
+    def parse!
+      @data = YAML.load_file(find_file!)
+    end
+    def [](val)
+      @data[val]
+    end
+    def fetch(*args)
+      @data.fetch(*args)
+    end
+    def default_file_type
+      @data.fetch('default_type')
+    end
+    def log_path_glob(type)
+      Array(types.fetch(type).fetch('glob'))
+    end
+    def types
+      raise "Please configure the 'types' section of ultragrep.yml" unless @data["types"]
+      @data["types"]
+    end
+    def available_types
+      types.keys
+    end
+  end
+end

data/lib/ultragrep/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module Ultragrep
+  VERSION = "0.1.0"
+end