whistlepig 0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,85 @@
1
+
2
+ /* A Bison parser, made by GNU Bison 2.4.1. */
3
+
4
+ /* Skeleton interface for Bison's Yacc-like parsers in C
5
+
6
+ Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
7
+ Free Software Foundation, Inc.
8
+
9
+ This program is free software: you can redistribute it and/or modify
10
+ it under the terms of the GNU General Public License as published by
11
+ the Free Software Foundation, either version 3 of the License, or
12
+ (at your option) any later version.
13
+
14
+ This program is distributed in the hope that it will be useful,
15
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ GNU General Public License for more details.
18
+
19
+ You should have received a copy of the GNU General Public License
20
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
21
+
22
+ /* As a special exception, you may create a larger work that contains
23
+ part or all of the Bison parser skeleton and distribute that work
24
+ under terms of your choice, so long as that work isn't itself a
25
+ parser generator using the skeleton or a modified version thereof
26
+ as a parser skeleton. Alternatively, if you modify or redistribute
27
+ the parser skeleton itself, you may (at your option) remove this
28
+ special exception, which will cause the skeleton and the resulting
29
+ Bison output files to be licensed under the GNU General Public
30
+ License without this special exception.
31
+
32
+ This special exception was added by the Free Software Foundation in
33
+ version 2.2 of Bison. */
34
+
35
+
36
+ /* Tokens. */
37
+ #ifndef YYTOKENTYPE
38
+ # define YYTOKENTYPE
39
+ /* Put the tokens into the symbol table, so that GDB and other debuggers
40
+ know about them. */
41
+ enum yytokentype {
42
+ WORD = 258,
43
+ OR = 259
44
+ };
45
+ #endif
46
+
47
+
48
+
49
+ #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
50
+ typedef union YYSTYPE
51
+ {
52
+
53
+ /* Line 1676 of yacc.c */
54
+ #line 30 "query-parser.y"
55
+
56
+ wp_query* query;
57
+ char* string;
58
+
59
+
60
+
61
+ /* Line 1676 of yacc.c */
62
+ #line 63 "query-parser.tab.h"
63
+ } YYSTYPE;
64
+ # define YYSTYPE_IS_TRIVIAL 1
65
+ # define yystype YYSTYPE /* obsolescent; will be withdrawn */
66
+ # define YYSTYPE_IS_DECLARED 1
67
+ #endif
68
+
69
+
70
+
71
+ #if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED
72
+ typedef struct YYLTYPE
73
+ {
74
+ int first_line;
75
+ int first_column;
76
+ int last_line;
77
+ int last_column;
78
+ } YYLTYPE;
79
+ # define yyltype YYLTYPE /* obsolescent; will be withdrawn */
80
+ # define YYLTYPE_IS_DECLARED 1
81
+ # define YYLTYPE_IS_TRIVIAL 1
82
+ #endif
83
+
84
+
85
+
@@ -0,0 +1,194 @@
1
+ #include "query.h"
2
+
3
+ static wp_query* wp_query_new() {
4
+ wp_query* ret = malloc(sizeof(wp_query));
5
+ ret->type = 0; // error
6
+ ret->field = ret->word = NULL;
7
+ ret->num_children = 0;
8
+ ret->children = ret->next = ret->last = NULL;
9
+ ret->search_data = NULL;
10
+
11
+ return ret;
12
+ }
13
+
14
+ static char* strdup(const char* old) { // sigh... not in c99
15
+ size_t len = strlen(old) + 1;
16
+ char *new = malloc(len * sizeof(char));
17
+ return memcpy(new, old, len);
18
+ }
19
+
20
+ wp_query* wp_query_clone(wp_query* other) {
21
+ wp_query* ret = malloc(sizeof(wp_query));
22
+ ret->type = other->type;
23
+ ret->num_children = other->num_children;
24
+ ret->search_data = NULL;
25
+
26
+ if(other->field) ret->field = strdup(other->field);
27
+ else ret->field = NULL;
28
+
29
+ if(other->word) ret->word = strdup(other->word);
30
+ else ret->word = NULL;
31
+
32
+ ret->children = ret->next = ret->last = NULL; // set below
33
+ for(wp_query* child = other->children; child != NULL; child = child->next) {
34
+ wp_query* clone = wp_query_clone(child);
35
+ if(ret->last == NULL) ret->children = ret->last = clone;
36
+ else {
37
+ ret->last->next = clone;
38
+ ret->last = clone;
39
+ }
40
+ }
41
+
42
+ return ret;
43
+ }
44
+
45
+ wp_query* wp_query_new_term(const char* field, const char* word) {
46
+ wp_query* ret = wp_query_new();
47
+ ret->type = WP_QUERY_TERM;
48
+ ret->field = field;
49
+ ret->word = word;
50
+ return ret;
51
+ }
52
+
53
+ wp_query* wp_query_new_label(const char* label) {
54
+ wp_query* ret = wp_query_new();
55
+ ret->type = WP_QUERY_LABEL;
56
+ ret->word = label;
57
+ ret->field = NULL;
58
+ return ret;
59
+ }
60
+
61
+ wp_query* wp_query_new_conjunction() {
62
+ wp_query* ret = wp_query_new();
63
+ ret->type = WP_QUERY_CONJ;
64
+ return ret;
65
+ }
66
+
67
+ wp_query* wp_query_new_disjunction() {
68
+ wp_query* ret = wp_query_new();
69
+ ret->type = WP_QUERY_DISJ;
70
+ return ret;
71
+ }
72
+
73
+ wp_query* wp_query_new_phrase() {
74
+ wp_query* ret = wp_query_new();
75
+ ret->type = WP_QUERY_PHRASE;
76
+ return ret;
77
+ }
78
+
79
+ wp_query* wp_query_new_negation() {
80
+ wp_query* ret = wp_query_new();
81
+ ret->type = WP_QUERY_NEG;
82
+ return ret;
83
+ }
84
+
85
+ wp_query* wp_query_new_empty() {
86
+ wp_query* ret = wp_query_new();
87
+ ret->type = WP_QUERY_EMPTY;
88
+ return ret;
89
+ }
90
+
91
+ wp_query* wp_query_add(wp_query* a, wp_query* b) {
92
+ if(a->type == WP_QUERY_EMPTY) {
93
+ wp_query_free(a);
94
+ return b;
95
+ }
96
+ else if(b->type == WP_QUERY_EMPTY) {
97
+ wp_query_free(b);
98
+ return a;
99
+ }
100
+ else {
101
+ a->num_children++;
102
+ if(a->last == NULL) a->children = a->last = b;
103
+ else {
104
+ a->last->next = b;
105
+ a->last = b;
106
+ }
107
+ return a;
108
+ }
109
+ }
110
+
111
+ void wp_query_free(wp_query* q) {
112
+ if(q->field) free((void*)q->field);
113
+ if(q->word) free((void*)q->word);
114
+ while(q->children) {
115
+ wp_query* b = q->children;
116
+ q->children = q->children->next;
117
+ wp_query_free(b);
118
+ }
119
+ free(q);
120
+ }
121
+
122
+ static int subquery_to_s(wp_query* q, size_t n, char* buf) {
123
+ char* orig_buf = buf;
124
+
125
+ for(wp_query* child = q->children; child != NULL; child = child->next) {
126
+ if((n - (buf - orig_buf)) < 1) break; // can we add a space?
127
+ buf += sprintf(buf, " ");
128
+ buf += wp_query_to_s(child, n - (buf - orig_buf), buf);
129
+ }
130
+
131
+ return buf - orig_buf;
132
+ }
133
+
134
+ #define min(a, b) (a < b ? a : b)
135
+
136
+ int wp_query_to_s(wp_query* q, size_t n, char* buf) {
137
+ int ret;
138
+ char* orig_buf = buf;
139
+
140
+ if(q->type == WP_QUERY_EMPTY) {
141
+ buf[0] = '\0';
142
+ ret = n;
143
+ }
144
+ else if(q->type == WP_QUERY_TERM) {
145
+ size_t term_n = (size_t)snprintf(buf, n, "%s:\"%s\"", q->field, q->word);
146
+ ret = min(term_n, n);
147
+ }
148
+ else if(q->type == WP_QUERY_LABEL) {
149
+ size_t term_n = (size_t)snprintf(buf, n, "~%s", q->word);
150
+ ret = min(term_n, n);
151
+ }
152
+ else {
153
+ switch(q->type) {
154
+ case WP_QUERY_CONJ:
155
+ if(n >= 4) { // "(AND"
156
+ buf += snprintf(buf, n, "(AND");
157
+ n -= 4;
158
+ }
159
+ break;
160
+ case WP_QUERY_DISJ:
161
+ if(n >= 3) { // "(OR"
162
+ buf += snprintf(buf, n, "(OR");
163
+ n -= 3;
164
+ }
165
+ break;
166
+ case WP_QUERY_PHRASE:
167
+ if(n >= 7) { // "(PHRASE"
168
+ buf += snprintf(buf, n, "(PHRASE");
169
+ n -= 7;
170
+ }
171
+ break;
172
+ case WP_QUERY_NEG:
173
+ if(n >= 4) {
174
+ buf += snprintf(buf, n, "(NOT");
175
+ n -= 4;
176
+ }
177
+ break;
178
+ }
179
+
180
+ int subq_size = subquery_to_s(q, n, buf);
181
+ n -= subq_size;
182
+ buf += subq_size;
183
+ if(n >= 1) buf += sprintf(buf, ")");
184
+ ret = buf - orig_buf;
185
+ }
186
+
187
+ return ret;
188
+ }
189
+
190
+ wp_query* wp_query_set_all_child_fields(wp_query* q, const char* field) {
191
+ if(q->type == WP_QUERY_TERM) q->field = field;
192
+ else for(wp_query* child = q->children; child != NULL; child = child->next) wp_query_set_all_child_fields(child, strdup(field));
193
+ return q;
194
+ }
@@ -0,0 +1,78 @@
1
+ #ifndef WP_QUERY_H_
2
+ #define WP_QUERY_H_
3
+
4
+ // whistlepig query
5
+ // (c) 2011 William Morgan. See COPYING for license terms.
6
+ //
7
+ // a query. typically built up by the parser, but you can also build it
8
+ // programmatically yourself if you like.
9
+ //
10
+ // note that queries contain segment-specific search state in them. see
11
+ // search.c for details.
12
+
13
+ #include <stdint.h>
14
+ #include <stdlib.h>
15
+ #include "segment.h"
16
+
17
+ #define WP_QUERY_TERM 1
18
+ #define WP_QUERY_CONJ 2
19
+ #define WP_QUERY_DISJ 3
20
+ #define WP_QUERY_PHRASE 4
21
+ #define WP_QUERY_NEG 5
22
+ #define WP_QUERY_LABEL 6
23
+ #define WP_QUERY_EMPTY 7
24
+
25
+ // a node in the query tree
26
+ typedef struct wp_query {
27
+ uint8_t type;
28
+ const char* field;
29
+ const char* word;
30
+
31
+ uint16_t num_children;
32
+ struct wp_query* children;
33
+ struct wp_query* next;
34
+ struct wp_query* last;
35
+
36
+ uint16_t segment_idx; // used to continue queries across segments (see index.c)
37
+ void* search_data; // whatever state we need for actually doing searches
38
+ } wp_query;
39
+
40
+ // API methods
41
+
42
+ // public: make a query node with a term
43
+ wp_query* wp_query_new_term(const char* field, const char* word);
44
+
45
+ // public: make a query node with a label
46
+ wp_query* wp_query_new_label(const char* label);
47
+
48
+ // public: make a query conjuction node
49
+ wp_query* wp_query_new_conjunction();
50
+
51
+ // public: make a query disjunction node
52
+ wp_query* wp_query_new_disjunction();
53
+
54
+ // public: make a query phrase node
55
+ wp_query* wp_query_new_phrase();
56
+
57
+ // public: make a query negation node
58
+ wp_query* wp_query_new_negation();
59
+
60
+ // public: make an empty query node.
61
+ wp_query* wp_query_new_empty();
62
+
63
+ // public: deep clone of a query, but dropping all search state.
64
+ wp_query* wp_query_clone(wp_query* other);
65
+
66
+ // public: add a query node as a child of another
67
+ wp_query* wp_query_add(wp_query* a, wp_query* b);
68
+
69
+ // private: set all children fields to a particular value
70
+ wp_query* wp_query_set_all_child_fields(wp_query* q, const char* field);
71
+
72
+ // public: free a query
73
+ void wp_query_free(wp_query* q);
74
+
75
+ // public: build a string representation of a query by writing at most n chars to buf
76
+ int wp_query_to_s(wp_query* q, size_t n, char* buf);
77
+
78
+ #endif