pg_query 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +13 -10
- data/ext/pg_query/extconf.rb +16 -14
- data/ext/pg_query/pg_query.c +3 -466
- data/ext/pg_query/pg_query.h +18 -0
- data/ext/pg_query/pg_query_normalize.c +363 -0
- data/ext/pg_query/pg_query_parse.c +102 -0
- data/lib/pg_query.rb +4 -0
- data/lib/pg_query/filter_columns.rb +52 -39
- data/lib/pg_query/fingerprint.rb +25 -50
- data/lib/pg_query/param_refs.rb +37 -32
- data/lib/pg_query/parse.rb +65 -45
- data/lib/pg_query/parse_error.rb +1 -1
- data/lib/pg_query/version.rb +1 -1
- metadata +35 -4
@@ -0,0 +1,18 @@
|
|
1
|
+
#ifndef PG_QUERY_H
|
2
|
+
#define PG_QUERY_H
|
3
|
+
|
4
|
+
#include "postgres.h"
|
5
|
+
#include "utils/memutils.h"
|
6
|
+
|
7
|
+
#include <ruby.h>
|
8
|
+
|
9
|
+
#define STDERR_BUFFER_LEN 4096
|
10
|
+
//#define DEBUG
|
11
|
+
|
12
|
+
VALUE new_parse_error(ErrorData* error);
|
13
|
+
|
14
|
+
void Init_pg_query(void);
|
15
|
+
VALUE pg_query_normalize(VALUE self, VALUE input);
|
16
|
+
VALUE pg_query_raw_parse(VALUE self, VALUE input);
|
17
|
+
|
18
|
+
#endif
|
@@ -0,0 +1,363 @@
|
|
1
|
+
#include "pg_query.h"
|
2
|
+
|
3
|
+
#include "parser/parser.h"
|
4
|
+
#include "parser/scanner.h"
|
5
|
+
#include "parser/scansup.h"
|
6
|
+
#include "mb/pg_wchar.h"
|
7
|
+
#include "nodes/nodeFuncs.h"
|
8
|
+
|
9
|
+
/*
|
10
|
+
* Struct for tracking locations/lengths of constants during normalization
|
11
|
+
*/
|
12
|
+
typedef struct pgssLocationLen
|
13
|
+
{
|
14
|
+
int location; /* start offset in query text */
|
15
|
+
int length; /* length in bytes, or -1 to ignore */
|
16
|
+
} pgssLocationLen;
|
17
|
+
|
18
|
+
/*
|
19
|
+
* Working state for constant tree walker
|
20
|
+
*/
|
21
|
+
typedef struct pgssConstLocations
|
22
|
+
{
|
23
|
+
/* Array of locations of constants that should be removed */
|
24
|
+
pgssLocationLen *clocations;
|
25
|
+
|
26
|
+
/* Allocated length of clocations array */
|
27
|
+
int clocations_buf_size;
|
28
|
+
|
29
|
+
/* Current number of valid entries in clocations array */
|
30
|
+
int clocations_count;
|
31
|
+
} pgssConstLocations;
|
32
|
+
|
33
|
+
/*
|
34
|
+
* comp_location: comparator for qsorting pgssLocationLen structs by location
|
35
|
+
*/
|
36
|
+
static int
|
37
|
+
comp_location(const void *a, const void *b)
|
38
|
+
{
|
39
|
+
int l = ((const pgssLocationLen *) a)->location;
|
40
|
+
int r = ((const pgssLocationLen *) b)->location;
|
41
|
+
|
42
|
+
if (l < r)
|
43
|
+
return -1;
|
44
|
+
else if (l > r)
|
45
|
+
return +1;
|
46
|
+
else
|
47
|
+
return 0;
|
48
|
+
}
|
49
|
+
|
50
|
+
/*
|
51
|
+
* Given a valid SQL string and an array of constant-location records,
|
52
|
+
* fill in the textual lengths of those constants.
|
53
|
+
*
|
54
|
+
* The constants may use any allowed constant syntax, such as float literals,
|
55
|
+
* bit-strings, single-quoted strings and dollar-quoted strings. This is
|
56
|
+
* accomplished by using the public API for the core scanner.
|
57
|
+
*
|
58
|
+
* It is the caller's job to ensure that the string is a valid SQL statement
|
59
|
+
* with constants at the indicated locations. Since in practice the string
|
60
|
+
* has already been parsed, and the locations that the caller provides will
|
61
|
+
* have originated from within the authoritative parser, this should not be
|
62
|
+
* a problem.
|
63
|
+
*
|
64
|
+
* Duplicate constant pointers are possible, and will have their lengths
|
65
|
+
* marked as '-1', so that they are later ignored. (Actually, we assume the
|
66
|
+
* lengths were initialized as -1 to start with, and don't change them here.)
|
67
|
+
*
|
68
|
+
* N.B. There is an assumption that a '-' character at a Const location begins
|
69
|
+
* a negative numeric constant. This precludes there ever being another
|
70
|
+
* reason for a constant to start with a '-'.
|
71
|
+
*/
|
72
|
+
static void
|
73
|
+
fill_in_constant_lengths(pgssConstLocations *jstate, const char *query)
|
74
|
+
{
|
75
|
+
pgssLocationLen *locs;
|
76
|
+
core_yyscan_t yyscanner;
|
77
|
+
core_yy_extra_type yyextra;
|
78
|
+
core_YYSTYPE yylval;
|
79
|
+
YYLTYPE yylloc;
|
80
|
+
int last_loc = -1;
|
81
|
+
int i;
|
82
|
+
|
83
|
+
/*
|
84
|
+
* Sort the records by location so that we can process them in order while
|
85
|
+
* scanning the query text.
|
86
|
+
*/
|
87
|
+
if (jstate->clocations_count > 1)
|
88
|
+
qsort(jstate->clocations, jstate->clocations_count,
|
89
|
+
sizeof(pgssLocationLen), comp_location);
|
90
|
+
locs = jstate->clocations;
|
91
|
+
|
92
|
+
/* initialize the flex scanner --- should match raw_parser() */
|
93
|
+
yyscanner = scanner_init(query,
|
94
|
+
&yyextra,
|
95
|
+
ScanKeywords,
|
96
|
+
NumScanKeywords);
|
97
|
+
|
98
|
+
/* Search for each constant, in sequence */
|
99
|
+
for (i = 0; i < jstate->clocations_count; i++)
|
100
|
+
{
|
101
|
+
int loc = locs[i].location;
|
102
|
+
int tok;
|
103
|
+
|
104
|
+
Assert(loc >= 0);
|
105
|
+
|
106
|
+
if (loc <= last_loc)
|
107
|
+
continue; /* Duplicate constant, ignore */
|
108
|
+
|
109
|
+
/* Lex tokens until we find the desired constant */
|
110
|
+
for (;;)
|
111
|
+
{
|
112
|
+
tok = core_yylex(&yylval, &yylloc, yyscanner);
|
113
|
+
|
114
|
+
/* We should not hit end-of-string, but if we do, behave sanely */
|
115
|
+
if (tok == 0)
|
116
|
+
break; /* out of inner for-loop */
|
117
|
+
|
118
|
+
/*
|
119
|
+
* We should find the token position exactly, but if we somehow
|
120
|
+
* run past it, work with that.
|
121
|
+
*/
|
122
|
+
if (yylloc >= loc)
|
123
|
+
{
|
124
|
+
if (query[loc] == '-')
|
125
|
+
{
|
126
|
+
/*
|
127
|
+
* It's a negative value - this is the one and only case
|
128
|
+
* where we replace more than a single token.
|
129
|
+
*
|
130
|
+
* Do not compensate for the core system's special-case
|
131
|
+
* adjustment of location to that of the leading '-'
|
132
|
+
* operator in the event of a negative constant. It is
|
133
|
+
* also useful for our purposes to start from the minus
|
134
|
+
* symbol. In this way, queries like "select * from foo
|
135
|
+
* where bar = 1" and "select * from foo where bar = -2"
|
136
|
+
* will have identical normalized query strings.
|
137
|
+
*/
|
138
|
+
tok = core_yylex(&yylval, &yylloc, yyscanner);
|
139
|
+
if (tok == 0)
|
140
|
+
break; /* out of inner for-loop */
|
141
|
+
}
|
142
|
+
|
143
|
+
/*
|
144
|
+
* We now rely on the assumption that flex has placed a zero
|
145
|
+
* byte after the text of the current token in scanbuf.
|
146
|
+
*/
|
147
|
+
locs[i].length = (int) strlen(yyextra.scanbuf + loc);
|
148
|
+
|
149
|
+
/* Quoted string with Unicode escapes
|
150
|
+
*
|
151
|
+
* The lexer consumes trailing whitespace in order to find UESCAPE, but if there
|
152
|
+
* is no UESCAPE it has still consumed it - don't include it in constant length.
|
153
|
+
*/
|
154
|
+
if (locs[i].length > 4 && /* U&'' */
|
155
|
+
(yyextra.scanbuf[loc] == 'u' || yyextra.scanbuf[loc] == 'U') &&
|
156
|
+
yyextra.scanbuf[loc + 1] == '&' && yyextra.scanbuf[loc + 2] == '\'')
|
157
|
+
{
|
158
|
+
int j = locs[i].length - 1; /* Skip the \0 */
|
159
|
+
for (; j >= 0 && scanner_isspace(yyextra.scanbuf[loc + j]); j--) {}
|
160
|
+
locs[i].length = j + 1; /* Count the \0 */
|
161
|
+
}
|
162
|
+
|
163
|
+
break; /* out of inner for-loop */
|
164
|
+
}
|
165
|
+
}
|
166
|
+
|
167
|
+
/* If we hit end-of-string, give up, leaving remaining lengths -1 */
|
168
|
+
if (tok == 0)
|
169
|
+
break;
|
170
|
+
|
171
|
+
last_loc = loc;
|
172
|
+
}
|
173
|
+
|
174
|
+
scanner_finish(yyscanner);
|
175
|
+
}
|
176
|
+
|
177
|
+
/*
|
178
|
+
* Generate a normalized version of the query string that will be used to
|
179
|
+
* represent all similar queries.
|
180
|
+
*
|
181
|
+
* Note that the normalized representation may well vary depending on
|
182
|
+
* just which "equivalent" query is used to create the hashtable entry.
|
183
|
+
* We assume this is OK.
|
184
|
+
*
|
185
|
+
* *query_len_p contains the input string length, and is updated with
|
186
|
+
* the result string length (which cannot be longer) on exit.
|
187
|
+
*
|
188
|
+
* Returns a palloc'd string.
|
189
|
+
*/
|
190
|
+
static char *
|
191
|
+
generate_normalized_query(pgssConstLocations *jstate, const char *query,
|
192
|
+
int *query_len_p, int encoding)
|
193
|
+
{
|
194
|
+
char *norm_query;
|
195
|
+
int query_len = *query_len_p;
|
196
|
+
int i,
|
197
|
+
len_to_wrt, /* Length (in bytes) to write */
|
198
|
+
quer_loc = 0, /* Source query byte location */
|
199
|
+
n_quer_loc = 0, /* Normalized query byte location */
|
200
|
+
last_off = 0, /* Offset from start for previous tok */
|
201
|
+
last_tok_len = 0; /* Length (in bytes) of that tok */
|
202
|
+
|
203
|
+
/*
|
204
|
+
* Get constants' lengths (core system only gives us locations). Note
|
205
|
+
* this also ensures the items are sorted by location.
|
206
|
+
*/
|
207
|
+
fill_in_constant_lengths(jstate, query);
|
208
|
+
|
209
|
+
/* Allocate result buffer */
|
210
|
+
norm_query = palloc(query_len + 1);
|
211
|
+
|
212
|
+
for (i = 0; i < jstate->clocations_count; i++)
|
213
|
+
{
|
214
|
+
int off, /* Offset from start for cur tok */
|
215
|
+
tok_len; /* Length (in bytes) of that tok */
|
216
|
+
|
217
|
+
off = jstate->clocations[i].location;
|
218
|
+
tok_len = jstate->clocations[i].length;
|
219
|
+
|
220
|
+
if (tok_len < 0)
|
221
|
+
continue; /* ignore any duplicates */
|
222
|
+
|
223
|
+
/* Copy next chunk (what precedes the next constant) */
|
224
|
+
len_to_wrt = off - last_off;
|
225
|
+
len_to_wrt -= last_tok_len;
|
226
|
+
|
227
|
+
Assert(len_to_wrt >= 0);
|
228
|
+
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
|
229
|
+
n_quer_loc += len_to_wrt;
|
230
|
+
|
231
|
+
/* And insert a '?' in place of the constant token */
|
232
|
+
norm_query[n_quer_loc++] = '?';
|
233
|
+
|
234
|
+
quer_loc = off + tok_len;
|
235
|
+
last_off = off;
|
236
|
+
last_tok_len = tok_len;
|
237
|
+
}
|
238
|
+
|
239
|
+
/*
|
240
|
+
* We've copied up until the last ignorable constant. Copy over the
|
241
|
+
* remaining bytes of the original query string.
|
242
|
+
*/
|
243
|
+
len_to_wrt = query_len - quer_loc;
|
244
|
+
|
245
|
+
Assert(len_to_wrt >= 0);
|
246
|
+
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
|
247
|
+
n_quer_loc += len_to_wrt;
|
248
|
+
|
249
|
+
Assert(n_quer_loc <= query_len);
|
250
|
+
norm_query[n_quer_loc] = '\0';
|
251
|
+
|
252
|
+
*query_len_p = n_quer_loc;
|
253
|
+
return norm_query;
|
254
|
+
}
|
255
|
+
|
256
|
+
static bool const_record_walker(Node *node, pgssConstLocations *jstate)
|
257
|
+
{
|
258
|
+
bool result;
|
259
|
+
|
260
|
+
if (node == NULL) return false;
|
261
|
+
|
262
|
+
if (IsA(node, A_Const) && ((A_Const *) node)->location >= 0)
|
263
|
+
{
|
264
|
+
/* enlarge array if needed */
|
265
|
+
if (jstate->clocations_count >= jstate->clocations_buf_size)
|
266
|
+
{
|
267
|
+
jstate->clocations_buf_size *= 2;
|
268
|
+
jstate->clocations = (pgssLocationLen *)
|
269
|
+
repalloc(jstate->clocations,
|
270
|
+
jstate->clocations_buf_size *
|
271
|
+
sizeof(pgssLocationLen));
|
272
|
+
}
|
273
|
+
jstate->clocations[jstate->clocations_count].location = ((A_Const *) node)->location;
|
274
|
+
/* initialize lengths to -1 to simplify fill_in_constant_lengths */
|
275
|
+
jstate->clocations[jstate->clocations_count].length = -1;
|
276
|
+
jstate->clocations_count++;
|
277
|
+
}
|
278
|
+
else if (IsA(node, VariableSetStmt))
|
279
|
+
{
|
280
|
+
return const_record_walker((Node *) ((VariableSetStmt *) node)->args, jstate);
|
281
|
+
}
|
282
|
+
else if (IsA(node, CopyStmt))
|
283
|
+
{
|
284
|
+
return const_record_walker((Node *) ((CopyStmt *) node)->query, jstate);
|
285
|
+
}
|
286
|
+
else if (IsA(node, ExplainStmt))
|
287
|
+
{
|
288
|
+
return const_record_walker((Node *) ((ExplainStmt *) node)->query, jstate);
|
289
|
+
}
|
290
|
+
|
291
|
+
PG_TRY();
|
292
|
+
{
|
293
|
+
result = raw_expression_tree_walker(node, const_record_walker, (void*) jstate);
|
294
|
+
}
|
295
|
+
PG_CATCH();
|
296
|
+
{
|
297
|
+
FlushErrorState();
|
298
|
+
result = false;
|
299
|
+
}
|
300
|
+
PG_END_TRY();
|
301
|
+
|
302
|
+
return result;
|
303
|
+
}
|
304
|
+
|
305
|
+
VALUE pg_query_normalize(VALUE self, VALUE input)
|
306
|
+
{
|
307
|
+
Check_Type(input, T_STRING);
|
308
|
+
|
309
|
+
MemoryContext ctx = NULL;
|
310
|
+
VALUE result = Qnil;
|
311
|
+
VALUE error = Qnil;
|
312
|
+
|
313
|
+
ctx = AllocSetContextCreate(TopMemoryContext,
|
314
|
+
"pg_query_normalize",
|
315
|
+
ALLOCSET_DEFAULT_MINSIZE,
|
316
|
+
ALLOCSET_DEFAULT_INITSIZE,
|
317
|
+
ALLOCSET_DEFAULT_MAXSIZE);
|
318
|
+
MemoryContextSwitchTo(ctx);
|
319
|
+
|
320
|
+
PG_TRY();
|
321
|
+
{
|
322
|
+
List *tree;
|
323
|
+
char *str;
|
324
|
+
pgssConstLocations jstate;
|
325
|
+
int query_len;
|
326
|
+
|
327
|
+
/* Parse query */
|
328
|
+
str = StringValueCStr(input);
|
329
|
+
tree = raw_parser(str);
|
330
|
+
|
331
|
+
/* Set up workspace for constant recording */
|
332
|
+
jstate.clocations_buf_size = 32;
|
333
|
+
jstate.clocations = (pgssLocationLen *)
|
334
|
+
palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
|
335
|
+
jstate.clocations_count = 0;
|
336
|
+
|
337
|
+
/* Walk tree and record const locations */
|
338
|
+
const_record_walker((Node *) tree, &jstate);
|
339
|
+
|
340
|
+
/* Normalize query */
|
341
|
+
query_len = (int) strlen(str);
|
342
|
+
str = generate_normalized_query(&jstate, str, &query_len, PG_UTF8);
|
343
|
+
|
344
|
+
result = rb_str_new2(str);
|
345
|
+
|
346
|
+
pfree(str);
|
347
|
+
}
|
348
|
+
PG_CATCH();
|
349
|
+
{
|
350
|
+
ErrorData* error_data = CopyErrorData();
|
351
|
+
error = new_parse_error(error_data);
|
352
|
+
FlushErrorState();
|
353
|
+
}
|
354
|
+
PG_END_TRY();
|
355
|
+
|
356
|
+
MemoryContextSwitchTo(TopMemoryContext);
|
357
|
+
MemoryContextDelete(ctx);
|
358
|
+
|
359
|
+
// If we got an error, throw it
|
360
|
+
if (!NIL_P(error)) rb_exc_raise(error);
|
361
|
+
|
362
|
+
return result;
|
363
|
+
}
|
@@ -0,0 +1,102 @@
|
|
1
|
+
#include "pg_query.h"
|
2
|
+
|
3
|
+
#include "parser/parser.h"
|
4
|
+
#include "parser/scanner.h"
|
5
|
+
#include "parser/scansup.h"
|
6
|
+
|
7
|
+
#include <unistd.h>
|
8
|
+
#include <fcntl.h>
|
9
|
+
|
10
|
+
VALUE new_parse_error(ErrorData* error)
|
11
|
+
{
|
12
|
+
VALUE cPgQuery, cParseError;
|
13
|
+
VALUE args[2];
|
14
|
+
|
15
|
+
cPgQuery = rb_const_get(rb_cObject, rb_intern("PgQuery"));
|
16
|
+
cParseError = rb_const_get_at(cPgQuery, rb_intern("ParseError"));
|
17
|
+
|
18
|
+
args[0] = rb_str_new2(error->message);
|
19
|
+
args[1] = INT2NUM(error->cursorpos);
|
20
|
+
|
21
|
+
return rb_class_new_instance(2, args, cParseError);
|
22
|
+
}
|
23
|
+
|
24
|
+
VALUE pg_query_raw_parse(VALUE self, VALUE input)
|
25
|
+
{
|
26
|
+
Check_Type(input, T_STRING);
|
27
|
+
|
28
|
+
MemoryContext ctx = NULL;
|
29
|
+
VALUE result = Qnil;
|
30
|
+
VALUE error = Qnil;
|
31
|
+
char stderr_buffer[STDERR_BUFFER_LEN + 1] = {0};
|
32
|
+
#ifndef DEBUG
|
33
|
+
int stderr_global;
|
34
|
+
int stderr_pipe[2];
|
35
|
+
#endif
|
36
|
+
|
37
|
+
ctx = AllocSetContextCreate(TopMemoryContext,
|
38
|
+
"pg_query_raw_parse",
|
39
|
+
ALLOCSET_DEFAULT_MINSIZE,
|
40
|
+
ALLOCSET_DEFAULT_INITSIZE,
|
41
|
+
ALLOCSET_DEFAULT_MAXSIZE);
|
42
|
+
MemoryContextSwitchTo(ctx);
|
43
|
+
|
44
|
+
#ifndef DEBUG
|
45
|
+
// Setup pipe for stderr redirection
|
46
|
+
if (pipe(stderr_pipe) != 0)
|
47
|
+
rb_raise(rb_eIOError, "Failed to open pipe, too many open file descriptors");
|
48
|
+
|
49
|
+
fcntl(stderr_pipe[0], F_SETFL, fcntl(stderr_pipe[0], F_GETFL) | O_NONBLOCK);
|
50
|
+
|
51
|
+
// Redirect stderr to the pipe
|
52
|
+
stderr_global = dup(STDERR_FILENO);
|
53
|
+
dup2(stderr_pipe[1], STDERR_FILENO);
|
54
|
+
close(stderr_pipe[1]);
|
55
|
+
#endif
|
56
|
+
|
57
|
+
// Parse it!
|
58
|
+
PG_TRY();
|
59
|
+
{
|
60
|
+
List *tree;
|
61
|
+
char *str;
|
62
|
+
|
63
|
+
str = StringValueCStr(input);
|
64
|
+
tree = raw_parser(str);
|
65
|
+
|
66
|
+
str = nodeToJSONString(tree);
|
67
|
+
|
68
|
+
#ifndef DEBUG
|
69
|
+
// Save stderr for result
|
70
|
+
read(stderr_pipe[0], stderr_buffer, STDERR_BUFFER_LEN);
|
71
|
+
#endif
|
72
|
+
|
73
|
+
result = rb_ary_new();
|
74
|
+
rb_ary_push(result, rb_str_new2(str));
|
75
|
+
rb_ary_push(result, rb_str_new2(stderr_buffer));
|
76
|
+
|
77
|
+
pfree(str);
|
78
|
+
}
|
79
|
+
PG_CATCH();
|
80
|
+
{
|
81
|
+
ErrorData* error_data = CopyErrorData();
|
82
|
+
error = new_parse_error(error_data);
|
83
|
+
FlushErrorState();
|
84
|
+
}
|
85
|
+
PG_END_TRY();
|
86
|
+
|
87
|
+
#ifndef DEBUG
|
88
|
+
// Restore stderr, close pipe
|
89
|
+
dup2(stderr_global, STDERR_FILENO);
|
90
|
+
close(stderr_pipe[0]);
|
91
|
+
close(stderr_global);
|
92
|
+
#endif
|
93
|
+
|
94
|
+
// Return to previous PostgreSQL memory context
|
95
|
+
MemoryContextSwitchTo(TopMemoryContext);
|
96
|
+
MemoryContextDelete(ctx);
|
97
|
+
|
98
|
+
// If we got an error, throw it
|
99
|
+
if (!NIL_P(error)) rb_exc_raise(error);
|
100
|
+
|
101
|
+
return result;
|
102
|
+
}
|