whistlepig 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,85 @@
1
+
2
+ /* A Bison parser, made by GNU Bison 2.4.1. */
3
+
4
+ /* Skeleton interface for Bison's Yacc-like parsers in C
5
+
6
+ Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
7
+ Free Software Foundation, Inc.
8
+
9
+ This program is free software: you can redistribute it and/or modify
10
+ it under the terms of the GNU General Public License as published by
11
+ the Free Software Foundation, either version 3 of the License, or
12
+ (at your option) any later version.
13
+
14
+ This program is distributed in the hope that it will be useful,
15
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ GNU General Public License for more details.
18
+
19
+ You should have received a copy of the GNU General Public License
20
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
21
+
22
+ /* As a special exception, you may create a larger work that contains
23
+ part or all of the Bison parser skeleton and distribute that work
24
+ under terms of your choice, so long as that work isn't itself a
25
+ parser generator using the skeleton or a modified version thereof
26
+ as a parser skeleton. Alternatively, if you modify or redistribute
27
+ the parser skeleton itself, you may (at your option) remove this
28
+ special exception, which will cause the skeleton and the resulting
29
+ Bison output files to be licensed under the GNU General Public
30
+ License without this special exception.
31
+
32
+ This special exception was added by the Free Software Foundation in
33
+ version 2.2 of Bison. */
34
+
35
+
36
+ /* Tokens. */
37
+ #ifndef YYTOKENTYPE
38
+ # define YYTOKENTYPE
39
+ /* Put the tokens into the symbol table, so that GDB and other debuggers
40
+ know about them. */
41
+ enum yytokentype {
42
+ WORD = 258,
43
+ OR = 259
44
+ };
45
+ #endif
46
+
47
+
48
+
49
+ #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
50
+ typedef union YYSTYPE
51
+ {
52
+
53
+ /* Line 1676 of yacc.c */
54
+ #line 30 "query-parser.y"
55
+
56
+ wp_query* query;
57
+ char* string;
58
+
59
+
60
+
61
+ /* Line 1676 of yacc.c */
62
+ #line 63 "query-parser.tab.h"
63
+ } YYSTYPE;
64
+ # define YYSTYPE_IS_TRIVIAL 1
65
+ # define yystype YYSTYPE /* obsolescent; will be withdrawn */
66
+ # define YYSTYPE_IS_DECLARED 1
67
+ #endif
68
+
69
+
70
+
71
+ #if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED
72
+ typedef struct YYLTYPE
73
+ {
74
+ int first_line;
75
+ int first_column;
76
+ int last_line;
77
+ int last_column;
78
+ } YYLTYPE;
79
+ # define yyltype YYLTYPE /* obsolescent; will be withdrawn */
80
+ # define YYLTYPE_IS_DECLARED 1
81
+ # define YYLTYPE_IS_TRIVIAL 1
82
+ #endif
83
+
84
+
85
+
@@ -0,0 +1,194 @@
1
+ #include "query.h"
2
+
3
+ static wp_query* wp_query_new() {
4
+ wp_query* ret = malloc(sizeof(wp_query));
5
+ ret->type = 0; // error
6
+ ret->field = ret->word = NULL;
7
+ ret->num_children = 0;
8
+ ret->children = ret->next = ret->last = NULL;
9
+ ret->search_data = NULL;
10
+
11
+ return ret;
12
+ }
13
+
14
+ static char* strdup(const char* old) { // sigh... not in c99
15
+ size_t len = strlen(old) + 1;
16
+ char *new = malloc(len * sizeof(char));
17
+ return memcpy(new, old, len);
18
+ }
19
+
20
+ wp_query* wp_query_clone(wp_query* other) {
21
+ wp_query* ret = malloc(sizeof(wp_query));
22
+ ret->type = other->type;
23
+ ret->num_children = other->num_children;
24
+ ret->search_data = NULL;
25
+
26
+ if(other->field) ret->field = strdup(other->field);
27
+ else ret->field = NULL;
28
+
29
+ if(other->word) ret->word = strdup(other->word);
30
+ else ret->word = NULL;
31
+
32
+ ret->children = ret->next = ret->last = NULL; // set below
33
+ for(wp_query* child = other->children; child != NULL; child = child->next) {
34
+ wp_query* clone = wp_query_clone(child);
35
+ if(ret->last == NULL) ret->children = ret->last = clone;
36
+ else {
37
+ ret->last->next = clone;
38
+ ret->last = clone;
39
+ }
40
+ }
41
+
42
+ return ret;
43
+ }
44
+
45
+ wp_query* wp_query_new_term(const char* field, const char* word) {
46
+ wp_query* ret = wp_query_new();
47
+ ret->type = WP_QUERY_TERM;
48
+ ret->field = field;
49
+ ret->word = word;
50
+ return ret;
51
+ }
52
+
53
+ wp_query* wp_query_new_label(const char* label) {
54
+ wp_query* ret = wp_query_new();
55
+ ret->type = WP_QUERY_LABEL;
56
+ ret->word = label;
57
+ ret->field = NULL;
58
+ return ret;
59
+ }
60
+
61
+ wp_query* wp_query_new_conjunction() {
62
+ wp_query* ret = wp_query_new();
63
+ ret->type = WP_QUERY_CONJ;
64
+ return ret;
65
+ }
66
+
67
+ wp_query* wp_query_new_disjunction() {
68
+ wp_query* ret = wp_query_new();
69
+ ret->type = WP_QUERY_DISJ;
70
+ return ret;
71
+ }
72
+
73
+ wp_query* wp_query_new_phrase() {
74
+ wp_query* ret = wp_query_new();
75
+ ret->type = WP_QUERY_PHRASE;
76
+ return ret;
77
+ }
78
+
79
+ wp_query* wp_query_new_negation() {
80
+ wp_query* ret = wp_query_new();
81
+ ret->type = WP_QUERY_NEG;
82
+ return ret;
83
+ }
84
+
85
+ wp_query* wp_query_new_empty() {
86
+ wp_query* ret = wp_query_new();
87
+ ret->type = WP_QUERY_EMPTY;
88
+ return ret;
89
+ }
90
+
91
+ wp_query* wp_query_add(wp_query* a, wp_query* b) {
92
+ if(a->type == WP_QUERY_EMPTY) {
93
+ wp_query_free(a);
94
+ return b;
95
+ }
96
+ else if(b->type == WP_QUERY_EMPTY) {
97
+ wp_query_free(b);
98
+ return a;
99
+ }
100
+ else {
101
+ a->num_children++;
102
+ if(a->last == NULL) a->children = a->last = b;
103
+ else {
104
+ a->last->next = b;
105
+ a->last = b;
106
+ }
107
+ return a;
108
+ }
109
+ }
110
+
111
+ void wp_query_free(wp_query* q) {
112
+ if(q->field) free((void*)q->field);
113
+ if(q->word) free((void*)q->word);
114
+ while(q->children) {
115
+ wp_query* b = q->children;
116
+ q->children = q->children->next;
117
+ wp_query_free(b);
118
+ }
119
+ free(q);
120
+ }
121
+
122
+ static int subquery_to_s(wp_query* q, size_t n, char* buf) {
123
+ char* orig_buf = buf;
124
+
125
+ for(wp_query* child = q->children; child != NULL; child = child->next) {
126
+ if((n - (buf - orig_buf)) < 1) break; // can we add a space?
127
+ buf += sprintf(buf, " ");
128
+ buf += wp_query_to_s(child, n - (buf - orig_buf), buf);
129
+ }
130
+
131
+ return buf - orig_buf;
132
+ }
133
+
134
+ #define min(a, b) (a < b ? a : b)
135
+
136
+ int wp_query_to_s(wp_query* q, size_t n, char* buf) {
137
+ int ret;
138
+ char* orig_buf = buf;
139
+
140
+ if(q->type == WP_QUERY_EMPTY) {
141
+ buf[0] = '\0';
142
+ ret = n;
143
+ }
144
+ else if(q->type == WP_QUERY_TERM) {
145
+ size_t term_n = (size_t)snprintf(buf, n, "%s:\"%s\"", q->field, q->word);
146
+ ret = min(term_n, n);
147
+ }
148
+ else if(q->type == WP_QUERY_LABEL) {
149
+ size_t term_n = (size_t)snprintf(buf, n, "~%s", q->word);
150
+ ret = min(term_n, n);
151
+ }
152
+ else {
153
+ switch(q->type) {
154
+ case WP_QUERY_CONJ:
155
+ if(n >= 4) { // "(AND"
156
+ buf += snprintf(buf, n, "(AND");
157
+ n -= 4;
158
+ }
159
+ break;
160
+ case WP_QUERY_DISJ:
161
+ if(n >= 3) { // "(OR"
162
+ buf += snprintf(buf, n, "(OR");
163
+ n -= 3;
164
+ }
165
+ break;
166
+ case WP_QUERY_PHRASE:
167
+ if(n >= 7) { // "(PHRASE"
168
+ buf += snprintf(buf, n, "(PHRASE");
169
+ n -= 7;
170
+ }
171
+ break;
172
+ case WP_QUERY_NEG:
173
+ if(n >= 4) {
174
+ buf += snprintf(buf, n, "(NOT");
175
+ n -= 4;
176
+ }
177
+ break;
178
+ }
179
+
180
+ int subq_size = subquery_to_s(q, n, buf);
181
+ n -= subq_size;
182
+ buf += subq_size;
183
+ if(n >= 1) buf += sprintf(buf, ")");
184
+ ret = buf - orig_buf;
185
+ }
186
+
187
+ return ret;
188
+ }
189
+
190
+ wp_query* wp_query_set_all_child_fields(wp_query* q, const char* field) {
191
+ if(q->type == WP_QUERY_TERM) q->field = field;
192
+ else for(wp_query* child = q->children; child != NULL; child = child->next) wp_query_set_all_child_fields(child, strdup(field));
193
+ return q;
194
+ }
@@ -0,0 +1,78 @@
1
+ #ifndef WP_QUERY_H_
2
+ #define WP_QUERY_H_
3
+
4
+ // whistlepig query
5
+ // (c) 2011 William Morgan. See COPYING for license terms.
6
+ //
7
+ // a query. typically built up by the parser, but you can also build it
8
+ // programmatically yourself if you like.
9
+ //
10
+ // note that queries contain segment-specific search state in them. see
11
+ // search.c for details.
12
+
13
+ #include <stdint.h>
14
+ #include <stdlib.h>
15
+ #include "segment.h"
16
+
17
+ #define WP_QUERY_TERM 1
18
+ #define WP_QUERY_CONJ 2
19
+ #define WP_QUERY_DISJ 3
20
+ #define WP_QUERY_PHRASE 4
21
+ #define WP_QUERY_NEG 5
22
+ #define WP_QUERY_LABEL 6
23
+ #define WP_QUERY_EMPTY 7
24
+
25
+ // a node in the query tree
26
+ typedef struct wp_query {
27
+ uint8_t type;
28
+ const char* field;
29
+ const char* word;
30
+
31
+ uint16_t num_children;
32
+ struct wp_query* children;
33
+ struct wp_query* next;
34
+ struct wp_query* last;
35
+
36
+ uint16_t segment_idx; // used to continue queries across segments (see index.c)
37
+ void* search_data; // whatever state we need for actually doing searches
38
+ } wp_query;
39
+
40
+ // API methods
41
+
42
+ // public: make a query node with a term
43
+ wp_query* wp_query_new_term(const char* field, const char* word);
44
+
45
+ // public: make a query node with a label
46
+ wp_query* wp_query_new_label(const char* label);
47
+
48
+ // public: make a query conjuction node
49
+ wp_query* wp_query_new_conjunction();
50
+
51
+ // public: make a query disjunction node
52
+ wp_query* wp_query_new_disjunction();
53
+
54
+ // public: make a query phrase node
55
+ wp_query* wp_query_new_phrase();
56
+
57
+ // public: make a query negation node
58
+ wp_query* wp_query_new_negation();
59
+
60
+ // public: make an empty query node.
61
+ wp_query* wp_query_new_empty();
62
+
63
+ // public: deep clone of a query, but dropping all search state.
64
+ wp_query* wp_query_clone(wp_query* other);
65
+
66
+ // public: add a query node as a child of another
67
+ wp_query* wp_query_add(wp_query* a, wp_query* b);
68
+
69
+ // private: set all children fields to a particular value
70
+ wp_query* wp_query_set_all_child_fields(wp_query* q, const char* field);
71
+
72
+ // public: free a query
73
+ void wp_query_free(wp_query* q);
74
+
75
+ // public: build a string representation of a query by writing at most n chars to buf
76
+ int wp_query_to_s(wp_query* q, size_t n, char* buf);
77
+
78
+ #endif