whistlepig 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +86 -0
- data/ext/whistlepig/defaults.h +28 -0
- data/ext/whistlepig/entry.c +181 -0
- data/ext/whistlepig/entry.h +66 -0
- data/ext/whistlepig/error.c +24 -0
- data/ext/whistlepig/error.h +94 -0
- data/ext/whistlepig/extconf.rb +6 -0
- data/ext/whistlepig/index.c +294 -0
- data/ext/whistlepig/index.h +88 -0
- data/ext/whistlepig/khash.h +316 -0
- data/ext/whistlepig/mmap-obj.c +76 -0
- data/ext/whistlepig/mmap-obj.h +52 -0
- data/ext/whistlepig/query-parser.c +37 -0
- data/ext/whistlepig/query-parser.h +25 -0
- data/ext/whistlepig/query-parser.lex.c +2249 -0
- data/ext/whistlepig/query-parser.lex.h +359 -0
- data/ext/whistlepig/query-parser.tab.c +1757 -0
- data/ext/whistlepig/query-parser.tab.h +85 -0
- data/ext/whistlepig/query.c +194 -0
- data/ext/whistlepig/query.h +78 -0
- data/ext/whistlepig/search.c +746 -0
- data/ext/whistlepig/search.h +76 -0
- data/ext/whistlepig/segment.c +615 -0
- data/ext/whistlepig/segment.h +137 -0
- data/ext/whistlepig/stringmap.c +278 -0
- data/ext/whistlepig/stringmap.h +82 -0
- data/ext/whistlepig/stringpool.c +44 -0
- data/ext/whistlepig/stringpool.h +58 -0
- data/ext/whistlepig/termhash.c +294 -0
- data/ext/whistlepig/termhash.h +79 -0
- data/ext/whistlepig/tokenizer.lex.c +2263 -0
- data/ext/whistlepig/tokenizer.lex.h +360 -0
- data/ext/whistlepig/whistlepig.h +15 -0
- data/ext/whistlepig/whistlepigc.c +537 -0
- data/lib/whistlepig.rb +119 -0
- metadata +103 -0
@@ -0,0 +1,76 @@
|
|
1
|
+
#include <fcntl.h>
|
2
|
+
#include <sys/mman.h>
|
3
|
+
#include <unistd.h>
|
4
|
+
#include "whistlepig.h"
|
5
|
+
|
6
|
+
RAISING_STATIC(validate(mmap_obj_header* h, const char* magic)) {
|
7
|
+
if(strncmp(magic, h->magic, MMAP_OBJ_MAGIC_SIZE)) RAISE_ERROR("invalid magic (expecting %s)", magic);
|
8
|
+
if(h->size == (uint32_t)-1) RAISE_ERROR("invalid size %d", h->size);
|
9
|
+
return NO_ERROR;
|
10
|
+
}
|
11
|
+
|
12
|
+
wp_error* mmap_obj_create(mmap_obj* o, const char* magic, const char* pathname, uint32_t initial_size) {
|
13
|
+
o->fd = open(pathname, O_EXCL | O_CREAT | O_RDWR, 0640);
|
14
|
+
if(o->fd == -1) RAISE_SYSERROR("cannot create %s", pathname);
|
15
|
+
|
16
|
+
uint32_t size = initial_size + sizeof(mmap_obj_header);
|
17
|
+
DEBUG("creating %s with %u + %u = %u bytes for %s object", pathname, initial_size, sizeof(mmap_obj_header), size, magic);
|
18
|
+
lseek(o->fd, size - 1, SEEK_SET);
|
19
|
+
ssize_t num_bytes = write(o->fd, "", 1);
|
20
|
+
if(num_bytes == -1) RAISE_SYSERROR("write");
|
21
|
+
o->header = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, o->fd, 0);
|
22
|
+
if(o->header == MAP_FAILED) RAISE_SYSERROR("mmap");
|
23
|
+
strncpy(o->header->magic, magic, MMAP_OBJ_MAGIC_SIZE);
|
24
|
+
o->header->size = initial_size;
|
25
|
+
DEBUG("created new %s object with %u bytes", magic, size);
|
26
|
+
|
27
|
+
return NO_ERROR;
|
28
|
+
}
|
29
|
+
|
30
|
+
wp_error* mmap_obj_load(mmap_obj* o, const char* magic, const char* pathname) {
|
31
|
+
DEBUG("trying to load %s object from %s", magic, pathname);
|
32
|
+
o->fd = open(pathname, O_RDWR, 0640);
|
33
|
+
if(o->fd == -1) RAISE_SYSERROR("cannot open %s", pathname);
|
34
|
+
|
35
|
+
// load header
|
36
|
+
o->header = mmap(NULL, sizeof(mmap_obj_header), PROT_READ | PROT_WRITE, MAP_SHARED, o->fd, 0);
|
37
|
+
if(o->header == MAP_FAILED) RAISE_SYSERROR("header mmap");
|
38
|
+
DEBUG("loaded header of %u bytes for %s object", sizeof(mmap_obj_header), magic);
|
39
|
+
|
40
|
+
RELAY_ERROR(validate(o->header, magic));
|
41
|
+
|
42
|
+
uint32_t size = o->header->size + sizeof(mmap_obj_header);
|
43
|
+
DEBUG("full size is %u bytes (including %u-byte header)", size, sizeof(mmap_obj_header));
|
44
|
+
if(munmap(o->header, sizeof(mmap_obj_header)) == -1) RAISE_SYSERROR("munmap");
|
45
|
+
|
46
|
+
o->header = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, o->fd, 0);
|
47
|
+
if(o->header == MAP_FAILED) RAISE_SYSERROR("full mmap");
|
48
|
+
DEBUG("loaded full %s object of %u bytes", magic, size);
|
49
|
+
|
50
|
+
return NO_ERROR;
|
51
|
+
}
|
52
|
+
|
53
|
+
wp_error* mmap_obj_resize(mmap_obj* o, uint32_t data_size) {
|
54
|
+
DEBUG("going to expand from %u to %u bytes. current header is at %p", o->header->size, data_size, o->header);
|
55
|
+
|
56
|
+
if(munmap(o->header, sizeof(mmap_obj_header) + o->header->size) == -1) RAISE_SYSERROR("munmap");
|
57
|
+
uint32_t size = data_size + sizeof(mmap_obj_header);
|
58
|
+
|
59
|
+
lseek(o->fd, size - 1, SEEK_SET);
|
60
|
+
ssize_t num_bytes = write(o->fd, "", 1);
|
61
|
+
if(num_bytes == -1) RAISE_SYSERROR("write");
|
62
|
+
//lseek(fd, 0, SEEK_SET); // not necessary!
|
63
|
+
o->header = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, o->fd, 0);
|
64
|
+
if(o->header == MAP_FAILED) RAISE_SYSERROR("mmap");
|
65
|
+
o->header->size = data_size;
|
66
|
+
DEBUG("loaded %u bytes after resize. header is at %p", o->header->size, o->header);
|
67
|
+
|
68
|
+
return NO_ERROR;
|
69
|
+
}
|
70
|
+
|
71
|
+
wp_error* mmap_obj_unload(mmap_obj* o) {
|
72
|
+
DEBUG("unloading %u bytes", sizeof(mmap_obj_header) + o->header->size);
|
73
|
+
if(munmap(o->header, sizeof(mmap_obj_header) + o->header->size) == -1) RAISE_SYSERROR("munmap");
|
74
|
+
o->header = NULL;
|
75
|
+
return NO_ERROR;
|
76
|
+
}
|
@@ -0,0 +1,52 @@
|
|
1
|
+
#ifndef WP_MMAP_OBJ_H_
|
2
|
+
#define WP_MMAP_OBJ_H_
|
3
|
+
|
4
|
+
// whistlepig mmap objects
|
5
|
+
// (c) 2011 William Morgan. See COPYING for license terms.
|
6
|
+
//
|
7
|
+
// wrappers around the logic of loading, unloading, and resizing
|
8
|
+
// arbitrary-sized objects using mmap.
|
9
|
+
//
|
10
|
+
// note that aany of the mmap_obj_* functions may change the object pointer, so
|
11
|
+
// use MMAP_OBJ or MAP_OBJ_PTR to dereference (again) after calling them.
|
12
|
+
|
13
|
+
#define MMAP_OBJ_MAGIC_SIZE 15
|
14
|
+
|
15
|
+
#include <stdint.h>
|
16
|
+
#include "error.h"
|
17
|
+
|
18
|
+
// the header, with a magic string
|
19
|
+
typedef struct mmap_obj_header {
|
20
|
+
char magic[MMAP_OBJ_MAGIC_SIZE];
|
21
|
+
uint32_t size;
|
22
|
+
char obj[];
|
23
|
+
} mmap_obj_header;
|
24
|
+
|
25
|
+
// what we pass around at runtime
|
26
|
+
typedef struct mmap_obj {
|
27
|
+
int fd;
|
28
|
+
mmap_obj_header* header;
|
29
|
+
} mmap_obj;
|
30
|
+
|
31
|
+
// public API
|
32
|
+
|
33
|
+
// public: get the actual object from an mmap_obj
|
34
|
+
#define MMAP_OBJ(v, type) ((type*)&v.header->obj)
|
35
|
+
|
36
|
+
// public: get the object from an mmap_obj*
|
37
|
+
#define MMAP_OBJ_PTR(v, type) (type*)v->header->obj
|
38
|
+
|
39
|
+
// public: create an object with an initial size
|
40
|
+
wp_error* mmap_obj_create(mmap_obj* o, const char* magic, const char* pathname, uint32_t initial_size) RAISES_ERROR;
|
41
|
+
|
42
|
+
// public: load an object, raising an error if it doesn't exist (or if the
|
43
|
+
// magic doesn't match)
|
44
|
+
wp_error* mmap_obj_load(mmap_obj* o, const char* magic, const char* pathname) RAISES_ERROR;
|
45
|
+
|
46
|
+
// public: resize an object. note that the obj pointer might change after this call.
|
47
|
+
wp_error* mmap_obj_resize(mmap_obj* o, uint32_t new_size) RAISES_ERROR;
|
48
|
+
|
49
|
+
// public: unload an object
|
50
|
+
wp_error* mmap_obj_unload(mmap_obj* o) RAISES_ERROR;
|
51
|
+
|
52
|
+
#endif
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#include <stdio.h>
|
2
|
+
#include "whistlepig.h"
|
3
|
+
#include "query-parser.h"
|
4
|
+
#include "query-parser.tab.h"
|
5
|
+
|
6
|
+
int query_parser_parse(query_parse_context* c);
|
7
|
+
int query_parser_lex_init(void* scanner);
|
8
|
+
int query_parser_lex_destroy(void* scanner);
|
9
|
+
int query_parser_set_extra(void* extra, void* scanner);
|
10
|
+
|
11
|
+
void query_parser_error(YYLTYPE* locp, query_parse_context* context, const char* err) {
|
12
|
+
context->error = malloc(1024 * sizeof(char));
|
13
|
+
snprintf(context->error, 1024, "line %d: %s", locp->first_line, err);
|
14
|
+
}
|
15
|
+
|
16
|
+
extern int query_parser_debug;
|
17
|
+
|
18
|
+
wp_error* wp_query_parse(const char* s, const char* default_field, wp_query** query) {
|
19
|
+
query_parse_context c;
|
20
|
+
c.input = s;
|
21
|
+
c.default_field = default_field;
|
22
|
+
c.error = NULL;
|
23
|
+
|
24
|
+
query_parser_lex_init(&c.scanner);
|
25
|
+
query_parser_set_extra(&c, c.scanner);
|
26
|
+
int ret = query_parser_parse(&c);
|
27
|
+
query_parser_lex_destroy(c.scanner);
|
28
|
+
|
29
|
+
if(ret != 0) RAISE_ERROR("parse error: %s", c.error);
|
30
|
+
|
31
|
+
if(c.result == NULL) // empty query
|
32
|
+
*query = wp_query_new_empty();
|
33
|
+
else
|
34
|
+
*query = c.result;
|
35
|
+
|
36
|
+
return NO_ERROR;
|
37
|
+
}
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#ifndef WP_QUERY_PARSER_H_
|
2
|
+
#define WP_QUERY_PARSER_H_
|
3
|
+
|
4
|
+
// whistlepig query parser
|
5
|
+
// (c) 2011 William Morgan. See COPYING for license terms.
|
6
|
+
//
|
7
|
+
// most of the code, of course, is in the .lex and .y files
|
8
|
+
|
9
|
+
#include "query.h"
|
10
|
+
#include "error.h"
|
11
|
+
|
12
|
+
typedef struct {
|
13
|
+
const char* input;
|
14
|
+
const char* default_field;
|
15
|
+
char* error;
|
16
|
+
void* scanner;
|
17
|
+
wp_query* result;
|
18
|
+
} query_parse_context;
|
19
|
+
|
20
|
+
// API methods
|
21
|
+
|
22
|
+
// public: parse a query from a string, attaching terms without fields to default_field
|
23
|
+
wp_error* wp_query_parse(const char* s, const char* default_field, wp_query** query) RAISES_ERROR;
|
24
|
+
|
25
|
+
#endif
|