whistlepig 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README +86 -0
- data/ext/whistlepig/defaults.h +28 -0
- data/ext/whistlepig/entry.c +181 -0
- data/ext/whistlepig/entry.h +66 -0
- data/ext/whistlepig/error.c +24 -0
- data/ext/whistlepig/error.h +94 -0
- data/ext/whistlepig/extconf.rb +6 -0
- data/ext/whistlepig/index.c +294 -0
- data/ext/whistlepig/index.h +88 -0
- data/ext/whistlepig/khash.h +316 -0
- data/ext/whistlepig/mmap-obj.c +76 -0
- data/ext/whistlepig/mmap-obj.h +52 -0
- data/ext/whistlepig/query-parser.c +37 -0
- data/ext/whistlepig/query-parser.h +25 -0
- data/ext/whistlepig/query-parser.lex.c +2249 -0
- data/ext/whistlepig/query-parser.lex.h +359 -0
- data/ext/whistlepig/query-parser.tab.c +1757 -0
- data/ext/whistlepig/query-parser.tab.h +85 -0
- data/ext/whistlepig/query.c +194 -0
- data/ext/whistlepig/query.h +78 -0
- data/ext/whistlepig/search.c +746 -0
- data/ext/whistlepig/search.h +76 -0
- data/ext/whistlepig/segment.c +615 -0
- data/ext/whistlepig/segment.h +137 -0
- data/ext/whistlepig/stringmap.c +278 -0
- data/ext/whistlepig/stringmap.h +82 -0
- data/ext/whistlepig/stringpool.c +44 -0
- data/ext/whistlepig/stringpool.h +58 -0
- data/ext/whistlepig/termhash.c +294 -0
- data/ext/whistlepig/termhash.h +79 -0
- data/ext/whistlepig/tokenizer.lex.c +2263 -0
- data/ext/whistlepig/tokenizer.lex.h +360 -0
- data/ext/whistlepig/whistlepig.h +15 -0
- data/ext/whistlepig/whistlepigc.c +537 -0
- data/lib/whistlepig.rb +119 -0
- metadata +103 -0
@@ -0,0 +1,76 @@
|
|
1
|
+
#include <fcntl.h>
|
2
|
+
#include <sys/mman.h>
|
3
|
+
#include <unistd.h>
|
4
|
+
#include "whistlepig.h"
|
5
|
+
|
6
|
+
RAISING_STATIC(validate(mmap_obj_header* h, const char* magic)) {
|
7
|
+
if(strncmp(magic, h->magic, MMAP_OBJ_MAGIC_SIZE)) RAISE_ERROR("invalid magic (expecting %s)", magic);
|
8
|
+
if(h->size == (uint32_t)-1) RAISE_ERROR("invalid size %d", h->size);
|
9
|
+
return NO_ERROR;
|
10
|
+
}
|
11
|
+
|
12
|
+
wp_error* mmap_obj_create(mmap_obj* o, const char* magic, const char* pathname, uint32_t initial_size) {
|
13
|
+
o->fd = open(pathname, O_EXCL | O_CREAT | O_RDWR, 0640);
|
14
|
+
if(o->fd == -1) RAISE_SYSERROR("cannot create %s", pathname);
|
15
|
+
|
16
|
+
uint32_t size = initial_size + sizeof(mmap_obj_header);
|
17
|
+
DEBUG("creating %s with %u + %u = %u bytes for %s object", pathname, initial_size, sizeof(mmap_obj_header), size, magic);
|
18
|
+
lseek(o->fd, size - 1, SEEK_SET);
|
19
|
+
ssize_t num_bytes = write(o->fd, "", 1);
|
20
|
+
if(num_bytes == -1) RAISE_SYSERROR("write");
|
21
|
+
o->header = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, o->fd, 0);
|
22
|
+
if(o->header == MAP_FAILED) RAISE_SYSERROR("mmap");
|
23
|
+
strncpy(o->header->magic, magic, MMAP_OBJ_MAGIC_SIZE);
|
24
|
+
o->header->size = initial_size;
|
25
|
+
DEBUG("created new %s object with %u bytes", magic, size);
|
26
|
+
|
27
|
+
return NO_ERROR;
|
28
|
+
}
|
29
|
+
|
30
|
+
wp_error* mmap_obj_load(mmap_obj* o, const char* magic, const char* pathname) {
|
31
|
+
DEBUG("trying to load %s object from %s", magic, pathname);
|
32
|
+
o->fd = open(pathname, O_RDWR, 0640);
|
33
|
+
if(o->fd == -1) RAISE_SYSERROR("cannot open %s", pathname);
|
34
|
+
|
35
|
+
// load header
|
36
|
+
o->header = mmap(NULL, sizeof(mmap_obj_header), PROT_READ | PROT_WRITE, MAP_SHARED, o->fd, 0);
|
37
|
+
if(o->header == MAP_FAILED) RAISE_SYSERROR("header mmap");
|
38
|
+
DEBUG("loaded header of %u bytes for %s object", sizeof(mmap_obj_header), magic);
|
39
|
+
|
40
|
+
RELAY_ERROR(validate(o->header, magic));
|
41
|
+
|
42
|
+
uint32_t size = o->header->size + sizeof(mmap_obj_header);
|
43
|
+
DEBUG("full size is %u bytes (including %u-byte header)", size, sizeof(mmap_obj_header));
|
44
|
+
if(munmap(o->header, sizeof(mmap_obj_header)) == -1) RAISE_SYSERROR("munmap");
|
45
|
+
|
46
|
+
o->header = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, o->fd, 0);
|
47
|
+
if(o->header == MAP_FAILED) RAISE_SYSERROR("full mmap");
|
48
|
+
DEBUG("loaded full %s object of %u bytes", magic, size);
|
49
|
+
|
50
|
+
return NO_ERROR;
|
51
|
+
}
|
52
|
+
|
53
|
+
wp_error* mmap_obj_resize(mmap_obj* o, uint32_t data_size) {
|
54
|
+
DEBUG("going to expand from %u to %u bytes. current header is at %p", o->header->size, data_size, o->header);
|
55
|
+
|
56
|
+
if(munmap(o->header, sizeof(mmap_obj_header) + o->header->size) == -1) RAISE_SYSERROR("munmap");
|
57
|
+
uint32_t size = data_size + sizeof(mmap_obj_header);
|
58
|
+
|
59
|
+
lseek(o->fd, size - 1, SEEK_SET);
|
60
|
+
ssize_t num_bytes = write(o->fd, "", 1);
|
61
|
+
if(num_bytes == -1) RAISE_SYSERROR("write");
|
62
|
+
//lseek(fd, 0, SEEK_SET); // not necessary!
|
63
|
+
o->header = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, o->fd, 0);
|
64
|
+
if(o->header == MAP_FAILED) RAISE_SYSERROR("mmap");
|
65
|
+
o->header->size = data_size;
|
66
|
+
DEBUG("loaded %u bytes after resize. header is at %p", o->header->size, o->header);
|
67
|
+
|
68
|
+
return NO_ERROR;
|
69
|
+
}
|
70
|
+
|
71
|
+
wp_error* mmap_obj_unload(mmap_obj* o) {
|
72
|
+
DEBUG("unloading %u bytes", sizeof(mmap_obj_header) + o->header->size);
|
73
|
+
if(munmap(o->header, sizeof(mmap_obj_header) + o->header->size) == -1) RAISE_SYSERROR("munmap");
|
74
|
+
o->header = NULL;
|
75
|
+
return NO_ERROR;
|
76
|
+
}
|
@@ -0,0 +1,52 @@
|
|
1
|
+
#ifndef WP_MMAP_OBJ_H_
|
2
|
+
#define WP_MMAP_OBJ_H_
|
3
|
+
|
4
|
+
// whistlepig mmap objects
|
5
|
+
// (c) 2011 William Morgan. See COPYING for license terms.
|
6
|
+
//
|
7
|
+
// wrappers around the logic of loading, unloading, and resizing
|
8
|
+
// arbitrary-sized objects using mmap.
|
9
|
+
//
|
10
|
+
// note that aany of the mmap_obj_* functions may change the object pointer, so
|
11
|
+
// use MMAP_OBJ or MAP_OBJ_PTR to dereference (again) after calling them.
|
12
|
+
|
13
|
+
#define MMAP_OBJ_MAGIC_SIZE 15
|
14
|
+
|
15
|
+
#include <stdint.h>
|
16
|
+
#include "error.h"
|
17
|
+
|
18
|
+
// the header, with a magic string
|
19
|
+
typedef struct mmap_obj_header {
|
20
|
+
char magic[MMAP_OBJ_MAGIC_SIZE];
|
21
|
+
uint32_t size;
|
22
|
+
char obj[];
|
23
|
+
} mmap_obj_header;
|
24
|
+
|
25
|
+
// what we pass around at runtime
|
26
|
+
typedef struct mmap_obj {
|
27
|
+
int fd;
|
28
|
+
mmap_obj_header* header;
|
29
|
+
} mmap_obj;
|
30
|
+
|
31
|
+
// public API
|
32
|
+
|
33
|
+
// public: get the actual object from an mmap_obj
|
34
|
+
#define MMAP_OBJ(v, type) ((type*)&v.header->obj)
|
35
|
+
|
36
|
+
// public: get the object from an mmap_obj*
|
37
|
+
#define MMAP_OBJ_PTR(v, type) (type*)v->header->obj
|
38
|
+
|
39
|
+
// public: create an object with an initial size
|
40
|
+
wp_error* mmap_obj_create(mmap_obj* o, const char* magic, const char* pathname, uint32_t initial_size) RAISES_ERROR;
|
41
|
+
|
42
|
+
// public: load an object, raising an error if it doesn't exist (or if the
|
43
|
+
// magic doesn't match)
|
44
|
+
wp_error* mmap_obj_load(mmap_obj* o, const char* magic, const char* pathname) RAISES_ERROR;
|
45
|
+
|
46
|
+
// public: resize an object. note that the obj pointer might change after this call.
|
47
|
+
wp_error* mmap_obj_resize(mmap_obj* o, uint32_t new_size) RAISES_ERROR;
|
48
|
+
|
49
|
+
// public: unload an object
|
50
|
+
wp_error* mmap_obj_unload(mmap_obj* o) RAISES_ERROR;
|
51
|
+
|
52
|
+
#endif
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#include <stdio.h>
|
2
|
+
#include "whistlepig.h"
|
3
|
+
#include "query-parser.h"
|
4
|
+
#include "query-parser.tab.h"
|
5
|
+
|
6
|
+
int query_parser_parse(query_parse_context* c);
|
7
|
+
int query_parser_lex_init(void* scanner);
|
8
|
+
int query_parser_lex_destroy(void* scanner);
|
9
|
+
int query_parser_set_extra(void* extra, void* scanner);
|
10
|
+
|
11
|
+
void query_parser_error(YYLTYPE* locp, query_parse_context* context, const char* err) {
|
12
|
+
context->error = malloc(1024 * sizeof(char));
|
13
|
+
snprintf(context->error, 1024, "line %d: %s", locp->first_line, err);
|
14
|
+
}
|
15
|
+
|
16
|
+
extern int query_parser_debug;
|
17
|
+
|
18
|
+
wp_error* wp_query_parse(const char* s, const char* default_field, wp_query** query) {
|
19
|
+
query_parse_context c;
|
20
|
+
c.input = s;
|
21
|
+
c.default_field = default_field;
|
22
|
+
c.error = NULL;
|
23
|
+
|
24
|
+
query_parser_lex_init(&c.scanner);
|
25
|
+
query_parser_set_extra(&c, c.scanner);
|
26
|
+
int ret = query_parser_parse(&c);
|
27
|
+
query_parser_lex_destroy(c.scanner);
|
28
|
+
|
29
|
+
if(ret != 0) RAISE_ERROR("parse error: %s", c.error);
|
30
|
+
|
31
|
+
if(c.result == NULL) // empty query
|
32
|
+
*query = wp_query_new_empty();
|
33
|
+
else
|
34
|
+
*query = c.result;
|
35
|
+
|
36
|
+
return NO_ERROR;
|
37
|
+
}
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#ifndef WP_QUERY_PARSER_H_
|
2
|
+
#define WP_QUERY_PARSER_H_
|
3
|
+
|
4
|
+
// whistlepig query parser
|
5
|
+
// (c) 2011 William Morgan. See COPYING for license terms.
|
6
|
+
//
|
7
|
+
// most of the code, of course, is in the .lex and .y files
|
8
|
+
|
9
|
+
#include "query.h"
|
10
|
+
#include "error.h"
|
11
|
+
|
12
|
+
typedef struct {
|
13
|
+
const char* input;
|
14
|
+
const char* default_field;
|
15
|
+
char* error;
|
16
|
+
void* scanner;
|
17
|
+
wp_query* result;
|
18
|
+
} query_parse_context;
|
19
|
+
|
20
|
+
// API methods
|
21
|
+
|
22
|
+
// public: parse a query from a string, attaching terms without fields to default_field
|
23
|
+
wp_error* wp_query_parse(const char* s, const char* default_field, wp_query** query) RAISES_ERROR;
|
24
|
+
|
25
|
+
#endif
|