pg_query 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZWQ2ODM2MzRjNWVkODM4ZWJhMjliN2Y1Yjg4NzE4MzljYmUzM2ZkYw==
4
+ Zjc4NWU4NWY5MzMzNjYxMTA4NzU0ZDg2OWQ2OGQ4ZTQwZDBlMTRhYw==
5
5
  data.tar.gz: !binary |-
6
- MDMwMjdmODc1MjM1YzAwMTZhNDg4ZDk2Y2YxMjVhZWMzOGNlODNmNw==
6
+ YzEzNzc3YjlkNzcxNTUxOGEwOGYyZjdjMWM5YzE4Y2EwY2QwODc0NA==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- MTAzODFhNjEyNWJmNmVhNzIxNGYwYWMzZWY0ZGE3YTAzOWYzOTMzZmVjZTA5
10
- YWUyMGViNTgxMGIzOGU3NjRmZDMyZmUwMDBlNDUxMmZiYmYwZTMzNGY1MzE1
11
- M2M3YTU3NGJkY2Y2N2I0NDI1NzIzZTEzY2ZlYTAyZWFiNTFmNGQ=
9
+ ZmFkNDYwYjk1OGU1MGYzNjdkZDJjNzRmOGFjNWVkYzMxNTBlNjk1ZTI2YjQ3
10
+ OTQxNTYxZTZkNzZhYjEyNzczMDYwMGFlMjdlNmRmNGNlZmU1MGFjMjg5MjU5
11
+ ZGI5OGFlYWUzYjZhNGQ3MDFmMjQ5NzdjMGM1OGJlNWNiYTI5OGQ=
12
12
  data.tar.gz: !binary |-
13
- ZWI2YzI0Mjc5NzIzMDU1MGJiNzIyNzIzMGI1Y2UwMmI1MGE4NTk5ZThjYjE4
14
- YTliZTkxMDM3MjM2OGQxODZjY2JhZGZkYTNkNWQ1MjMwNGVlMGM4OWEyMzc0
15
- YTk4NmRmOTRmOTc0ZTJjOTQ3NDZiMGQxNGYxYjVkMWYzNzIyZWE=
13
+ MDY3N2U2NGUzNzg2ZjRjMTdmMzhkMDBmYjY4ODBjM2MxMGFhZGYwM2UzMzAx
14
+ OTFkMzIxMDExODM0MjhhZGY5MzhiNjMxZDg2OTNiMTU4NjE2MzQzMDEzNjhl
15
+ YTE5ODc2ODRjNDM1ZTI1NTM2YjdkZTQxNDE4ZjljOGRiNThkYjA=
data/Rakefile CHANGED
@@ -11,4 +11,10 @@ RSpec::Core::RakeTask.new
11
11
  task spec: :compile
12
12
 
13
13
  task default: :spec
14
- task test: :spec
14
+ task test: :spec
15
+
16
+ task :clean do
17
+ FileUtils.rm_rf File.join(File.dirname(__FILE__), "tmp/")
18
+ FileUtils.rm_f Dir.glob(File.join(File.dirname(__FILE__), "ext/pg_query/*.o"))
19
+ FileUtils.rm_f File.join(File.dirname(__FILE__), "lib/pg_query/pg_query.bundle")
20
+ end
@@ -8,10 +8,10 @@ pgdir = File.join(workdir, "postgres")
8
8
  # Note: We intentionally use a patched version that fixes bugs in outfuncs.c
9
9
  if !Dir.exists?(pgdir)
10
10
  unless File.exists?("#{workdir}/postgres.zip")
11
- system("curl https://codeload.github.com/pganalyze/postgres/zip/more-outfuncs -o #{workdir}/postgres.zip") || raise("ERROR")
11
+ system("curl https://codeload.github.com/pganalyze/postgres/zip/pg_query -o #{workdir}/postgres.zip") || raise("ERROR")
12
12
  end
13
13
  system("unzip -q #{workdir}/postgres.zip -d #{workdir}") || raise("ERROR")
14
- system("mv #{workdir}/postgres-more-outfuncs #{pgdir}") || raise("ERROR")
14
+ system("mv #{workdir}/postgres-pg_query #{pgdir}") || raise("ERROR")
15
15
  system("cd #{pgdir}; CFLAGS=-fPIC ./configure") || raise("ERROR")
16
16
  system("cd #{pgdir}; make") || raise("ERROR")
17
17
  end
@@ -1,7 +1,10 @@
1
1
  #include "postgres.h"
2
2
  #include "utils/memutils.h"
3
3
  #include "parser/parser.h"
4
+ #include "parser/scanner.h"
4
5
  #include "nodes/print.h"
6
+ #include "nodes/nodeFuncs.h"
7
+ #include "mb/pg_wchar.h"
5
8
 
6
9
  #include <unistd.h>
7
10
  #include <fcntl.h>
@@ -40,7 +43,7 @@ static VALUE pg_query_raw_parse(VALUE self, VALUE input)
40
43
  int stderr_pipe[2];
41
44
 
42
45
  ctx = AllocSetContextCreate(TopMemoryContext,
43
- "RootContext",
46
+ "pg_query_raw_parse",
44
47
  ALLOCSET_DEFAULT_MINSIZE,
45
48
  ALLOCSET_DEFAULT_INITSIZE,
46
49
  ALLOCSET_DEFAULT_MAXSIZE);
@@ -96,6 +99,333 @@ static VALUE pg_query_raw_parse(VALUE self, VALUE input)
96
99
  return result;
97
100
  }
98
101
 
102
+ /*
103
+ * Struct for tracking locations/lengths of constants during normalization
104
+ */
105
+ typedef struct pgssLocationLen
106
+ {
107
+ int location; /* start offset in query text */
108
+ int length; /* length in bytes, or -1 to ignore */
109
+ } pgssLocationLen;
110
+
111
+ /*
112
+ * Working state for constant tree walker
113
+ */
114
+ typedef struct pgssConstLocations
115
+ {
116
+ /* Array of locations of constants that should be removed */
117
+ pgssLocationLen *clocations;
118
+
119
+ /* Allocated length of clocations array */
120
+ int clocations_buf_size;
121
+
122
+ /* Current number of valid entries in clocations array */
123
+ int clocations_count;
124
+ } pgssConstLocations;
125
+
126
+ /*
127
+ * comp_location: comparator for qsorting pgssLocationLen structs by location
128
+ */
129
+ static int
130
+ comp_location(const void *a, const void *b)
131
+ {
132
+ int l = ((const pgssLocationLen *) a)->location;
133
+ int r = ((const pgssLocationLen *) b)->location;
134
+
135
+ if (l < r)
136
+ return -1;
137
+ else if (l > r)
138
+ return +1;
139
+ else
140
+ return 0;
141
+ }
142
+
143
+ /*
144
+ * Given a valid SQL string and an array of constant-location records,
145
+ * fill in the textual lengths of those constants.
146
+ *
147
+ * The constants may use any allowed constant syntax, such as float literals,
148
+ * bit-strings, single-quoted strings and dollar-quoted strings. This is
149
+ * accomplished by using the public API for the core scanner.
150
+ *
151
+ * It is the caller's job to ensure that the string is a valid SQL statement
152
+ * with constants at the indicated locations. Since in practice the string
153
+ * has already been parsed, and the locations that the caller provides will
154
+ * have originated from within the authoritative parser, this should not be
155
+ * a problem.
156
+ *
157
+ * Duplicate constant pointers are possible, and will have their lengths
158
+ * marked as '-1', so that they are later ignored. (Actually, we assume the
159
+ * lengths were initialized as -1 to start with, and don't change them here.)
160
+ *
161
+ * N.B. There is an assumption that a '-' character at a Const location begins
162
+ * a negative numeric constant. This precludes there ever being another
163
+ * reason for a constant to start with a '-'.
164
+ */
165
+ static void
166
+ fill_in_constant_lengths(pgssConstLocations *jstate, const char *query)
167
+ {
168
+ pgssLocationLen *locs;
169
+ core_yyscan_t yyscanner;
170
+ core_yy_extra_type yyextra;
171
+ core_YYSTYPE yylval;
172
+ YYLTYPE yylloc;
173
+ int last_loc = -1;
174
+ int i;
175
+
176
+ /*
177
+ * Sort the records by location so that we can process them in order while
178
+ * scanning the query text.
179
+ */
180
+ if (jstate->clocations_count > 1)
181
+ qsort(jstate->clocations, jstate->clocations_count,
182
+ sizeof(pgssLocationLen), comp_location);
183
+ locs = jstate->clocations;
184
+
185
+ /* initialize the flex scanner --- should match raw_parser() */
186
+ yyscanner = scanner_init(query,
187
+ &yyextra,
188
+ ScanKeywords,
189
+ NumScanKeywords);
190
+
191
+ /* Search for each constant, in sequence */
192
+ for (i = 0; i < jstate->clocations_count; i++)
193
+ {
194
+ int loc = locs[i].location;
195
+ int tok;
196
+
197
+ Assert(loc >= 0);
198
+
199
+ if (loc <= last_loc)
200
+ continue; /* Duplicate constant, ignore */
201
+
202
+ /* Lex tokens until we find the desired constant */
203
+ for (;;)
204
+ {
205
+ tok = core_yylex(&yylval, &yylloc, yyscanner);
206
+
207
+ /* We should not hit end-of-string, but if we do, behave sanely */
208
+ if (tok == 0)
209
+ break; /* out of inner for-loop */
210
+
211
+ /*
212
+ * We should find the token position exactly, but if we somehow
213
+ * run past it, work with that.
214
+ */
215
+ if (yylloc >= loc)
216
+ {
217
+ if (query[loc] == '-')
218
+ {
219
+ /*
220
+ * It's a negative value - this is the one and only case
221
+ * where we replace more than a single token.
222
+ *
223
+ * Do not compensate for the core system's special-case
224
+ * adjustment of location to that of the leading '-'
225
+ * operator in the event of a negative constant. It is
226
+ * also useful for our purposes to start from the minus
227
+ * symbol. In this way, queries like "select * from foo
228
+ * where bar = 1" and "select * from foo where bar = -2"
229
+ * will have identical normalized query strings.
230
+ */
231
+ tok = core_yylex(&yylval, &yylloc, yyscanner);
232
+ if (tok == 0)
233
+ break; /* out of inner for-loop */
234
+ }
235
+
236
+ /*
237
+ * We now rely on the assumption that flex has placed a zero
238
+ * byte after the text of the current token in scanbuf.
239
+ */
240
+ locs[i].length = (int) strlen(yyextra.scanbuf + loc);
241
+ break; /* out of inner for-loop */
242
+ }
243
+ }
244
+
245
+ /* If we hit end-of-string, give up, leaving remaining lengths -1 */
246
+ if (tok == 0)
247
+ break;
248
+
249
+ last_loc = loc;
250
+ }
251
+
252
+ scanner_finish(yyscanner);
253
+ }
254
+
255
+ /*
256
+ * Generate a normalized version of the query string that will be used to
257
+ * represent all similar queries.
258
+ *
259
+ * Note that the normalized representation may well vary depending on
260
+ * just which "equivalent" query is used to create the hashtable entry.
261
+ * We assume this is OK.
262
+ *
263
+ * *query_len_p contains the input string length, and is updated with
264
+ * the result string length (which cannot be longer) on exit.
265
+ *
266
+ * Returns a palloc'd string.
267
+ */
268
+ static char *
269
+ generate_normalized_query(pgssConstLocations *jstate, const char *query,
270
+ int *query_len_p, int encoding)
271
+ {
272
+ char *norm_query;
273
+ int query_len = *query_len_p;
274
+ int i,
275
+ len_to_wrt, /* Length (in bytes) to write */
276
+ quer_loc = 0, /* Source query byte location */
277
+ n_quer_loc = 0, /* Normalized query byte location */
278
+ last_off = 0, /* Offset from start for previous tok */
279
+ last_tok_len = 0; /* Length (in bytes) of that tok */
280
+
281
+ /*
282
+ * Get constants' lengths (core system only gives us locations). Note
283
+ * this also ensures the items are sorted by location.
284
+ */
285
+ fill_in_constant_lengths(jstate, query);
286
+
287
+ /* Allocate result buffer */
288
+ norm_query = palloc(query_len + 1);
289
+
290
+ for (i = 0; i < jstate->clocations_count; i++)
291
+ {
292
+ int off, /* Offset from start for cur tok */
293
+ tok_len; /* Length (in bytes) of that tok */
294
+
295
+ off = jstate->clocations[i].location;
296
+ tok_len = jstate->clocations[i].length;
297
+
298
+ if (tok_len < 0)
299
+ continue; /* ignore any duplicates */
300
+
301
+ /* Copy next chunk (what precedes the next constant) */
302
+ len_to_wrt = off - last_off;
303
+ len_to_wrt -= last_tok_len;
304
+
305
+ Assert(len_to_wrt >= 0);
306
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
307
+ n_quer_loc += len_to_wrt;
308
+
309
+ /* And insert a '?' in place of the constant token */
310
+ norm_query[n_quer_loc++] = '?';
311
+
312
+ quer_loc = off + tok_len;
313
+ last_off = off;
314
+ last_tok_len = tok_len;
315
+ }
316
+
317
+ /*
318
+ * We've copied up until the last ignorable constant. Copy over the
319
+ * remaining bytes of the original query string.
320
+ */
321
+ len_to_wrt = query_len - quer_loc;
322
+
323
+ Assert(len_to_wrt >= 0);
324
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
325
+ n_quer_loc += len_to_wrt;
326
+
327
+ Assert(n_quer_loc <= query_len);
328
+ norm_query[n_quer_loc] = '\0';
329
+
330
+ *query_len_p = n_quer_loc;
331
+ return norm_query;
332
+ }
333
+
334
+ bool const_record_walker(Node *node, pgssConstLocations *jstate)
335
+ {
336
+ if (node == NULL) return false;
337
+
338
+ if (IsA(node, A_Const) && ((A_Const *) node)->location >= 0)
339
+ {
340
+ /* enlarge array if needed */
341
+ if (jstate->clocations_count >= jstate->clocations_buf_size)
342
+ {
343
+ jstate->clocations_buf_size *= 2;
344
+ jstate->clocations = (pgssLocationLen *)
345
+ repalloc(jstate->clocations,
346
+ jstate->clocations_buf_size *
347
+ sizeof(pgssLocationLen));
348
+ }
349
+ jstate->clocations[jstate->clocations_count].location = ((A_Const *) node)->location;
350
+ /* initialize lengths to -1 to simplify fill_in_constant_lengths */
351
+ jstate->clocations[jstate->clocations_count].length = -1;
352
+ jstate->clocations_count++;
353
+ }
354
+ //else if (isA(node, Query))
355
+ //{
356
+ // return query_tree_walker(node, const_record_walker, jstate, 0);
357
+ //}
358
+
359
+ PG_TRY();
360
+ {
361
+ return raw_expression_tree_walker(node, const_record_walker, (void*) jstate);
362
+ }
363
+ PG_CATCH();
364
+ {
365
+ return false;
366
+ }
367
+ PG_END_TRY();
368
+ }
369
+
370
+ static VALUE pg_query_normalize(VALUE self, VALUE input)
371
+ {
372
+ Check_Type(input, T_STRING);
373
+
374
+ MemoryContext ctx = NULL;
375
+ VALUE result;
376
+ ErrorData* error = NULL;
377
+
378
+ ctx = AllocSetContextCreate(TopMemoryContext,
379
+ "pg_query_normalize",
380
+ ALLOCSET_DEFAULT_MINSIZE,
381
+ ALLOCSET_DEFAULT_INITSIZE,
382
+ ALLOCSET_DEFAULT_MAXSIZE);
383
+ MemoryContextSwitchTo(ctx);
384
+
385
+ PG_TRY();
386
+ {
387
+ List *tree;
388
+ char *str;
389
+ pgssConstLocations jstate;
390
+ int query_len;
391
+
392
+ /* Parse query */
393
+ str = StringValueCStr(input);
394
+ tree = raw_parser(str);
395
+
396
+ /* Set up workspace for constant recording */
397
+ jstate.clocations_buf_size = 32;
398
+ jstate.clocations = (pgssLocationLen *)
399
+ palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
400
+ jstate.clocations_count = 0;
401
+
402
+ /* Walk tree and record const locations */
403
+ const_record_walker((Node *) tree, &jstate);
404
+
405
+ /* Normalize query */
406
+ query_len = (int) strlen(str);
407
+ str = generate_normalized_query(&jstate, str, &query_len, PG_UTF8);
408
+
409
+ result = rb_tainted_str_new_cstr(str);
410
+
411
+ pfree(str);
412
+ }
413
+ PG_CATCH();
414
+ {
415
+ error = CopyErrorData();
416
+ FlushErrorState();
417
+ }
418
+ PG_END_TRY();
419
+
420
+ MemoryContextSwitchTo(TopMemoryContext);
421
+ MemoryContextDelete(ctx);
422
+
423
+ // If we got an error, throw a ParseError exception
424
+ if (error) raise_parse_error(error);
425
+
426
+ return result;
427
+ }
428
+
99
429
  void Init_pg_query(void)
100
430
  {
101
431
  VALUE cPgQuery;
@@ -105,4 +435,5 @@ void Init_pg_query(void)
105
435
  cPgQuery = rb_const_get(rb_cObject, rb_intern("PgQuery"));
106
436
 
107
437
  rb_define_singleton_method(cPgQuery, "_raw_parse", pg_query_raw_parse, 1);
438
+ rb_define_singleton_method(cPgQuery, "normalize", pg_query_normalize, 1);
108
439
  }
data/lib/pg_query.rb CHANGED
@@ -2,5 +2,4 @@ require 'pg_query/version'
2
2
  require 'pg_query/parse_error'
3
3
 
4
4
  require 'pg_query/pg_query'
5
- require 'pg_query/parse'
6
- require 'pg_query/parse_normalized'
5
+ require 'pg_query/parse'
@@ -1,3 +1,3 @@
1
1
  class PgQuery
2
- VERSION = '0.1.2'
2
+ VERSION = '0.2.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pg_query
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lukas Fittl
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-13 00:00:00.000000000 Z
11
+ date: 2014-05-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
@@ -67,7 +67,6 @@ files:
67
67
  - lib/pg_query.rb
68
68
  - lib/pg_query/parse.rb
69
69
  - lib/pg_query/parse_error.rb
70
- - lib/pg_query/parse_normalized.rb
71
70
  - lib/pg_query/version.rb
72
71
  homepage: http://github.com/pganalyze/pg_query
73
72
  licenses:
@@ -1,51 +0,0 @@
1
- class PgQuery
2
- # Parses a query that has been normalized by pg_stat_statements
3
- def self.parse_normalized(original_query)
4
- # Transform ? into \uFFED
5
- query = normalized_to_parseable_query(original_query)
6
-
7
- # Parse it!
8
- result = parse(query)
9
-
10
- # Transform \uFFED references as if they were $0
11
- parsed_to_normalized_parsetree!(result.parsetree)
12
-
13
- PgQuery.new(original_query, result.parsetree, result.warnings)
14
- end
15
-
16
- protected
17
- # The PostgreSQL parser doesn't understand pg_stat_statements replacement characters,
18
- # change them into a fake column reference to an unusual unicode character \uFFED
19
- def self.normalized_to_parseable_query(query)
20
- regexps = [
21
- 'INTERVAL ?',
22
- /\$[0-9]+\?/,
23
- '?.?',
24
- /(?<!\\)\?/, # Replace all ?, unless they are escaped by a backslash
25
- ]
26
- regexps.each do |re|
27
- query = query.gsub(re) {|m| "\uFFED" * m.size }
28
- end
29
- query
30
- end
31
-
32
- # Modifies the passed in parsetree to have paramrefs to $0 instead of columnref to \uFFED
33
- def self.parsed_to_normalized_parsetree!(parsetree)
34
- expressions = parsetree.dup
35
- loop do
36
- break unless expression = expressions.shift
37
-
38
- if expression.is_a?(Array)
39
- expressions += expression.compact
40
- elsif expression.is_a?(Hash)
41
- value = expression['COLUMNREF'] && expression['COLUMNREF']['fields']
42
- if value && value.size == 1 && value[0].is_a?(String) && value[0].chars.to_a.uniq == ["\uFFED"]
43
- expression.replace('PARAMREF' => {'number' => 0,
44
- 'location' => expression['COLUMNREF']['location']})
45
- else
46
- expressions += expression.values.compact
47
- end
48
- end
49
- end
50
- end
51
- end