pg_query 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,18 @@
1
+ #ifndef PG_QUERY_H
2
+ #define PG_QUERY_H
3
+
4
+ #include "postgres.h"
5
+ #include "utils/memutils.h"
6
+
7
+ #include <ruby.h>
8
+
9
+ #define STDERR_BUFFER_LEN 4096
10
+ //#define DEBUG
11
+
12
+ VALUE new_parse_error(ErrorData* error);
13
+
14
+ void Init_pg_query(void);
15
+ VALUE pg_query_normalize(VALUE self, VALUE input);
16
+ VALUE pg_query_raw_parse(VALUE self, VALUE input);
17
+
18
+ #endif
@@ -0,0 +1,363 @@
1
+ #include "pg_query.h"
2
+
3
+ #include "parser/parser.h"
4
+ #include "parser/scanner.h"
5
+ #include "parser/scansup.h"
6
+ #include "mb/pg_wchar.h"
7
+ #include "nodes/nodeFuncs.h"
8
+
9
+ /*
10
+ * Struct for tracking locations/lengths of constants during normalization
11
+ */
12
+ typedef struct pgssLocationLen
13
+ {
14
+ int location; /* start offset in query text */
15
+ int length; /* length in bytes, or -1 to ignore */
16
+ } pgssLocationLen;
17
+
18
+ /*
19
+ * Working state for constant tree walker
20
+ */
21
+ typedef struct pgssConstLocations
22
+ {
23
+ /* Array of locations of constants that should be removed */
24
+ pgssLocationLen *clocations;
25
+
26
+ /* Allocated length of clocations array */
27
+ int clocations_buf_size;
28
+
29
+ /* Current number of valid entries in clocations array */
30
+ int clocations_count;
31
+ } pgssConstLocations;
32
+
33
+ /*
34
+ * comp_location: comparator for qsorting pgssLocationLen structs by location
35
+ */
36
+ static int
37
+ comp_location(const void *a, const void *b)
38
+ {
39
+ int l = ((const pgssLocationLen *) a)->location;
40
+ int r = ((const pgssLocationLen *) b)->location;
41
+
42
+ if (l < r)
43
+ return -1;
44
+ else if (l > r)
45
+ return +1;
46
+ else
47
+ return 0;
48
+ }
49
+
50
+ /*
51
+ * Given a valid SQL string and an array of constant-location records,
52
+ * fill in the textual lengths of those constants.
53
+ *
54
+ * The constants may use any allowed constant syntax, such as float literals,
55
+ * bit-strings, single-quoted strings and dollar-quoted strings. This is
56
+ * accomplished by using the public API for the core scanner.
57
+ *
58
+ * It is the caller's job to ensure that the string is a valid SQL statement
59
+ * with constants at the indicated locations. Since in practice the string
60
+ * has already been parsed, and the locations that the caller provides will
61
+ * have originated from within the authoritative parser, this should not be
62
+ * a problem.
63
+ *
64
+ * Duplicate constant pointers are possible, and will have their lengths
65
+ * marked as '-1', so that they are later ignored. (Actually, we assume the
66
+ * lengths were initialized as -1 to start with, and don't change them here.)
67
+ *
68
+ * N.B. There is an assumption that a '-' character at a Const location begins
69
+ * a negative numeric constant. This precludes there ever being another
70
+ * reason for a constant to start with a '-'.
71
+ */
72
+ static void
73
+ fill_in_constant_lengths(pgssConstLocations *jstate, const char *query)
74
+ {
75
+ pgssLocationLen *locs;
76
+ core_yyscan_t yyscanner;
77
+ core_yy_extra_type yyextra;
78
+ core_YYSTYPE yylval;
79
+ YYLTYPE yylloc;
80
+ int last_loc = -1;
81
+ int i;
82
+
83
+ /*
84
+ * Sort the records by location so that we can process them in order while
85
+ * scanning the query text.
86
+ */
87
+ if (jstate->clocations_count > 1)
88
+ qsort(jstate->clocations, jstate->clocations_count,
89
+ sizeof(pgssLocationLen), comp_location);
90
+ locs = jstate->clocations;
91
+
92
+ /* initialize the flex scanner --- should match raw_parser() */
93
+ yyscanner = scanner_init(query,
94
+ &yyextra,
95
+ ScanKeywords,
96
+ NumScanKeywords);
97
+
98
+ /* Search for each constant, in sequence */
99
+ for (i = 0; i < jstate->clocations_count; i++)
100
+ {
101
+ int loc = locs[i].location;
102
+ int tok;
103
+
104
+ Assert(loc >= 0);
105
+
106
+ if (loc <= last_loc)
107
+ continue; /* Duplicate constant, ignore */
108
+
109
+ /* Lex tokens until we find the desired constant */
110
+ for (;;)
111
+ {
112
+ tok = core_yylex(&yylval, &yylloc, yyscanner);
113
+
114
+ /* We should not hit end-of-string, but if we do, behave sanely */
115
+ if (tok == 0)
116
+ break; /* out of inner for-loop */
117
+
118
+ /*
119
+ * We should find the token position exactly, but if we somehow
120
+ * run past it, work with that.
121
+ */
122
+ if (yylloc >= loc)
123
+ {
124
+ if (query[loc] == '-')
125
+ {
126
+ /*
127
+ * It's a negative value - this is the one and only case
128
+ * where we replace more than a single token.
129
+ *
130
+ * Do not compensate for the core system's special-case
131
+ * adjustment of location to that of the leading '-'
132
+ * operator in the event of a negative constant. It is
133
+ * also useful for our purposes to start from the minus
134
+ * symbol. In this way, queries like "select * from foo
135
+ * where bar = 1" and "select * from foo where bar = -2"
136
+ * will have identical normalized query strings.
137
+ */
138
+ tok = core_yylex(&yylval, &yylloc, yyscanner);
139
+ if (tok == 0)
140
+ break; /* out of inner for-loop */
141
+ }
142
+
143
+ /*
144
+ * We now rely on the assumption that flex has placed a zero
145
+ * byte after the text of the current token in scanbuf.
146
+ */
147
+ locs[i].length = (int) strlen(yyextra.scanbuf + loc);
148
+
149
+ /* Quoted string with Unicode escapes
150
+ *
151
+ * The lexer consumes trailing whitespace in order to find UESCAPE, but if there
152
+ * is no UESCAPE it has still consumed it - don't include it in constant length.
153
+ */
154
+ if (locs[i].length > 4 && /* U&'' */
155
+ (yyextra.scanbuf[loc] == 'u' || yyextra.scanbuf[loc] == 'U') &&
156
+ yyextra.scanbuf[loc + 1] == '&' && yyextra.scanbuf[loc + 2] == '\'')
157
+ {
158
+ int j = locs[i].length - 1; /* Skip the \0 */
159
+ for (; j >= 0 && scanner_isspace(yyextra.scanbuf[loc + j]); j--) {}
160
+ locs[i].length = j + 1; /* Count the \0 */
161
+ }
162
+
163
+ break; /* out of inner for-loop */
164
+ }
165
+ }
166
+
167
+ /* If we hit end-of-string, give up, leaving remaining lengths -1 */
168
+ if (tok == 0)
169
+ break;
170
+
171
+ last_loc = loc;
172
+ }
173
+
174
+ scanner_finish(yyscanner);
175
+ }
176
+
177
+ /*
178
+ * Generate a normalized version of the query string that will be used to
179
+ * represent all similar queries.
180
+ *
181
+ * Note that the normalized representation may well vary depending on
182
+ * just which "equivalent" query is used to create the hashtable entry.
183
+ * We assume this is OK.
184
+ *
185
+ * *query_len_p contains the input string length, and is updated with
186
+ * the result string length (which cannot be longer) on exit.
187
+ *
188
+ * Returns a palloc'd string.
189
+ */
190
+ static char *
191
+ generate_normalized_query(pgssConstLocations *jstate, const char *query,
192
+ int *query_len_p, int encoding)
193
+ {
194
+ char *norm_query;
195
+ int query_len = *query_len_p;
196
+ int i,
197
+ len_to_wrt, /* Length (in bytes) to write */
198
+ quer_loc = 0, /* Source query byte location */
199
+ n_quer_loc = 0, /* Normalized query byte location */
200
+ last_off = 0, /* Offset from start for previous tok */
201
+ last_tok_len = 0; /* Length (in bytes) of that tok */
202
+
203
+ /*
204
+ * Get constants' lengths (core system only gives us locations). Note
205
+ * this also ensures the items are sorted by location.
206
+ */
207
+ fill_in_constant_lengths(jstate, query);
208
+
209
+ /* Allocate result buffer */
210
+ norm_query = palloc(query_len + 1);
211
+
212
+ for (i = 0; i < jstate->clocations_count; i++)
213
+ {
214
+ int off, /* Offset from start for cur tok */
215
+ tok_len; /* Length (in bytes) of that tok */
216
+
217
+ off = jstate->clocations[i].location;
218
+ tok_len = jstate->clocations[i].length;
219
+
220
+ if (tok_len < 0)
221
+ continue; /* ignore any duplicates */
222
+
223
+ /* Copy next chunk (what precedes the next constant) */
224
+ len_to_wrt = off - last_off;
225
+ len_to_wrt -= last_tok_len;
226
+
227
+ Assert(len_to_wrt >= 0);
228
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
229
+ n_quer_loc += len_to_wrt;
230
+
231
+ /* And insert a '?' in place of the constant token */
232
+ norm_query[n_quer_loc++] = '?';
233
+
234
+ quer_loc = off + tok_len;
235
+ last_off = off;
236
+ last_tok_len = tok_len;
237
+ }
238
+
239
+ /*
240
+ * We've copied up until the last ignorable constant. Copy over the
241
+ * remaining bytes of the original query string.
242
+ */
243
+ len_to_wrt = query_len - quer_loc;
244
+
245
+ Assert(len_to_wrt >= 0);
246
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
247
+ n_quer_loc += len_to_wrt;
248
+
249
+ Assert(n_quer_loc <= query_len);
250
+ norm_query[n_quer_loc] = '\0';
251
+
252
+ *query_len_p = n_quer_loc;
253
+ return norm_query;
254
+ }
255
+
256
+ static bool const_record_walker(Node *node, pgssConstLocations *jstate)
257
+ {
258
+ bool result;
259
+
260
+ if (node == NULL) return false;
261
+
262
+ if (IsA(node, A_Const) && ((A_Const *) node)->location >= 0)
263
+ {
264
+ /* enlarge array if needed */
265
+ if (jstate->clocations_count >= jstate->clocations_buf_size)
266
+ {
267
+ jstate->clocations_buf_size *= 2;
268
+ jstate->clocations = (pgssLocationLen *)
269
+ repalloc(jstate->clocations,
270
+ jstate->clocations_buf_size *
271
+ sizeof(pgssLocationLen));
272
+ }
273
+ jstate->clocations[jstate->clocations_count].location = ((A_Const *) node)->location;
274
+ /* initialize lengths to -1 to simplify fill_in_constant_lengths */
275
+ jstate->clocations[jstate->clocations_count].length = -1;
276
+ jstate->clocations_count++;
277
+ }
278
+ else if (IsA(node, VariableSetStmt))
279
+ {
280
+ return const_record_walker((Node *) ((VariableSetStmt *) node)->args, jstate);
281
+ }
282
+ else if (IsA(node, CopyStmt))
283
+ {
284
+ return const_record_walker((Node *) ((CopyStmt *) node)->query, jstate);
285
+ }
286
+ else if (IsA(node, ExplainStmt))
287
+ {
288
+ return const_record_walker((Node *) ((ExplainStmt *) node)->query, jstate);
289
+ }
290
+
291
+ PG_TRY();
292
+ {
293
+ result = raw_expression_tree_walker(node, const_record_walker, (void*) jstate);
294
+ }
295
+ PG_CATCH();
296
+ {
297
+ FlushErrorState();
298
+ result = false;
299
+ }
300
+ PG_END_TRY();
301
+
302
+ return result;
303
+ }
304
+
305
+ VALUE pg_query_normalize(VALUE self, VALUE input)
306
+ {
307
+ Check_Type(input, T_STRING);
308
+
309
+ MemoryContext ctx = NULL;
310
+ VALUE result = Qnil;
311
+ VALUE error = Qnil;
312
+
313
+ ctx = AllocSetContextCreate(TopMemoryContext,
314
+ "pg_query_normalize",
315
+ ALLOCSET_DEFAULT_MINSIZE,
316
+ ALLOCSET_DEFAULT_INITSIZE,
317
+ ALLOCSET_DEFAULT_MAXSIZE);
318
+ MemoryContextSwitchTo(ctx);
319
+
320
+ PG_TRY();
321
+ {
322
+ List *tree;
323
+ char *str;
324
+ pgssConstLocations jstate;
325
+ int query_len;
326
+
327
+ /* Parse query */
328
+ str = StringValueCStr(input);
329
+ tree = raw_parser(str);
330
+
331
+ /* Set up workspace for constant recording */
332
+ jstate.clocations_buf_size = 32;
333
+ jstate.clocations = (pgssLocationLen *)
334
+ palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
335
+ jstate.clocations_count = 0;
336
+
337
+ /* Walk tree and record const locations */
338
+ const_record_walker((Node *) tree, &jstate);
339
+
340
+ /* Normalize query */
341
+ query_len = (int) strlen(str);
342
+ str = generate_normalized_query(&jstate, str, &query_len, PG_UTF8);
343
+
344
+ result = rb_str_new2(str);
345
+
346
+ pfree(str);
347
+ }
348
+ PG_CATCH();
349
+ {
350
+ ErrorData* error_data = CopyErrorData();
351
+ error = new_parse_error(error_data);
352
+ FlushErrorState();
353
+ }
354
+ PG_END_TRY();
355
+
356
+ MemoryContextSwitchTo(TopMemoryContext);
357
+ MemoryContextDelete(ctx);
358
+
359
+ // If we got an error, throw it
360
+ if (!NIL_P(error)) rb_exc_raise(error);
361
+
362
+ return result;
363
+ }
@@ -0,0 +1,102 @@
1
+ #include "pg_query.h"
2
+
3
+ #include "parser/parser.h"
4
+ #include "parser/scanner.h"
5
+ #include "parser/scansup.h"
6
+
7
+ #include <unistd.h>
8
+ #include <fcntl.h>
9
+
10
+ VALUE new_parse_error(ErrorData* error)
11
+ {
12
+ VALUE cPgQuery, cParseError;
13
+ VALUE args[2];
14
+
15
+ cPgQuery = rb_const_get(rb_cObject, rb_intern("PgQuery"));
16
+ cParseError = rb_const_get_at(cPgQuery, rb_intern("ParseError"));
17
+
18
+ args[0] = rb_str_new2(error->message);
19
+ args[1] = INT2NUM(error->cursorpos);
20
+
21
+ return rb_class_new_instance(2, args, cParseError);
22
+ }
23
+
24
+ VALUE pg_query_raw_parse(VALUE self, VALUE input)
25
+ {
26
+ Check_Type(input, T_STRING);
27
+
28
+ MemoryContext ctx = NULL;
29
+ VALUE result = Qnil;
30
+ VALUE error = Qnil;
31
+ char stderr_buffer[STDERR_BUFFER_LEN + 1] = {0};
32
+ #ifndef DEBUG
33
+ int stderr_global;
34
+ int stderr_pipe[2];
35
+ #endif
36
+
37
+ ctx = AllocSetContextCreate(TopMemoryContext,
38
+ "pg_query_raw_parse",
39
+ ALLOCSET_DEFAULT_MINSIZE,
40
+ ALLOCSET_DEFAULT_INITSIZE,
41
+ ALLOCSET_DEFAULT_MAXSIZE);
42
+ MemoryContextSwitchTo(ctx);
43
+
44
+ #ifndef DEBUG
45
+ // Setup pipe for stderr redirection
46
+ if (pipe(stderr_pipe) != 0)
47
+ rb_raise(rb_eIOError, "Failed to open pipe, too many open file descriptors");
48
+
49
+ fcntl(stderr_pipe[0], F_SETFL, fcntl(stderr_pipe[0], F_GETFL) | O_NONBLOCK);
50
+
51
+ // Redirect stderr to the pipe
52
+ stderr_global = dup(STDERR_FILENO);
53
+ dup2(stderr_pipe[1], STDERR_FILENO);
54
+ close(stderr_pipe[1]);
55
+ #endif
56
+
57
+ // Parse it!
58
+ PG_TRY();
59
+ {
60
+ List *tree;
61
+ char *str;
62
+
63
+ str = StringValueCStr(input);
64
+ tree = raw_parser(str);
65
+
66
+ str = nodeToJSONString(tree);
67
+
68
+ #ifndef DEBUG
69
+ // Save stderr for result
70
+ read(stderr_pipe[0], stderr_buffer, STDERR_BUFFER_LEN);
71
+ #endif
72
+
73
+ result = rb_ary_new();
74
+ rb_ary_push(result, rb_str_new2(str));
75
+ rb_ary_push(result, rb_str_new2(stderr_buffer));
76
+
77
+ pfree(str);
78
+ }
79
+ PG_CATCH();
80
+ {
81
+ ErrorData* error_data = CopyErrorData();
82
+ error = new_parse_error(error_data);
83
+ FlushErrorState();
84
+ }
85
+ PG_END_TRY();
86
+
87
+ #ifndef DEBUG
88
+ // Restore stderr, close pipe
89
+ dup2(stderr_global, STDERR_FILENO);
90
+ close(stderr_pipe[0]);
91
+ close(stderr_global);
92
+ #endif
93
+
94
+ // Return to previous PostgreSQL memory context
95
+ MemoryContextSwitchTo(TopMemoryContext);
96
+ MemoryContextDelete(ctx);
97
+
98
+ // If we got an error, throw it
99
+ if (!NIL_P(error)) rb_exc_raise(error);
100
+
101
+ return result;
102
+ }