pg_query 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZWQ2ODM2MzRjNWVkODM4ZWJhMjliN2Y1Yjg4NzE4MzljYmUzM2ZkYw==
4
+ Zjc4NWU4NWY5MzMzNjYxMTA4NzU0ZDg2OWQ2OGQ4ZTQwZDBlMTRhYw==
5
5
  data.tar.gz: !binary |-
6
- MDMwMjdmODc1MjM1YzAwMTZhNDg4ZDk2Y2YxMjVhZWMzOGNlODNmNw==
6
+ YzEzNzc3YjlkNzcxNTUxOGEwOGYyZjdjMWM5YzE4Y2EwY2QwODc0NA==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- MTAzODFhNjEyNWJmNmVhNzIxNGYwYWMzZWY0ZGE3YTAzOWYzOTMzZmVjZTA5
10
- YWUyMGViNTgxMGIzOGU3NjRmZDMyZmUwMDBlNDUxMmZiYmYwZTMzNGY1MzE1
11
- M2M3YTU3NGJkY2Y2N2I0NDI1NzIzZTEzY2ZlYTAyZWFiNTFmNGQ=
9
+ ZmFkNDYwYjk1OGU1MGYzNjdkZDJjNzRmOGFjNWVkYzMxNTBlNjk1ZTI2YjQ3
10
+ OTQxNTYxZTZkNzZhYjEyNzczMDYwMGFlMjdlNmRmNGNlZmU1MGFjMjg5MjU5
11
+ ZGI5OGFlYWUzYjZhNGQ3MDFmMjQ5NzdjMGM1OGJlNWNiYTI5OGQ=
12
12
  data.tar.gz: !binary |-
13
- ZWI2YzI0Mjc5NzIzMDU1MGJiNzIyNzIzMGI1Y2UwMmI1MGE4NTk5ZThjYjE4
14
- YTliZTkxMDM3MjM2OGQxODZjY2JhZGZkYTNkNWQ1MjMwNGVlMGM4OWEyMzc0
15
- YTk4NmRmOTRmOTc0ZTJjOTQ3NDZiMGQxNGYxYjVkMWYzNzIyZWE=
13
+ MDY3N2U2NGUzNzg2ZjRjMTdmMzhkMDBmYjY4ODBjM2MxMGFhZGYwM2UzMzAx
14
+ OTFkMzIxMDExODM0MjhhZGY5MzhiNjMxZDg2OTNiMTU4NjE2MzQzMDEzNjhl
15
+ YTE5ODc2ODRjNDM1ZTI1NTM2YjdkZTQxNDE4ZjljOGRiNThkYjA=
data/Rakefile CHANGED
@@ -11,4 +11,10 @@ RSpec::Core::RakeTask.new
11
11
  task spec: :compile
12
12
 
13
13
  task default: :spec
14
- task test: :spec
14
+ task test: :spec
15
+
16
+ task :clean do
17
+ FileUtils.rm_rf File.join(File.dirname(__FILE__), "tmp/")
18
+ FileUtils.rm_f Dir.glob(File.join(File.dirname(__FILE__), "ext/pg_query/*.o"))
19
+ FileUtils.rm_f File.join(File.dirname(__FILE__), "lib/pg_query/pg_query.bundle")
20
+ end
@@ -8,10 +8,10 @@ pgdir = File.join(workdir, "postgres")
8
8
  # Note: We intentionally use a patched version that fixes bugs in outfuncs.c
9
9
  if !Dir.exists?(pgdir)
10
10
  unless File.exists?("#{workdir}/postgres.zip")
11
- system("curl https://codeload.github.com/pganalyze/postgres/zip/more-outfuncs -o #{workdir}/postgres.zip") || raise("ERROR")
11
+ system("curl https://codeload.github.com/pganalyze/postgres/zip/pg_query -o #{workdir}/postgres.zip") || raise("ERROR")
12
12
  end
13
13
  system("unzip -q #{workdir}/postgres.zip -d #{workdir}") || raise("ERROR")
14
- system("mv #{workdir}/postgres-more-outfuncs #{pgdir}") || raise("ERROR")
14
+ system("mv #{workdir}/postgres-pg_query #{pgdir}") || raise("ERROR")
15
15
  system("cd #{pgdir}; CFLAGS=-fPIC ./configure") || raise("ERROR")
16
16
  system("cd #{pgdir}; make") || raise("ERROR")
17
17
  end
@@ -1,7 +1,10 @@
1
1
  #include "postgres.h"
2
2
  #include "utils/memutils.h"
3
3
  #include "parser/parser.h"
4
+ #include "parser/scanner.h"
4
5
  #include "nodes/print.h"
6
+ #include "nodes/nodeFuncs.h"
7
+ #include "mb/pg_wchar.h"
5
8
 
6
9
  #include <unistd.h>
7
10
  #include <fcntl.h>
@@ -40,7 +43,7 @@ static VALUE pg_query_raw_parse(VALUE self, VALUE input)
40
43
  int stderr_pipe[2];
41
44
 
42
45
  ctx = AllocSetContextCreate(TopMemoryContext,
43
- "RootContext",
46
+ "pg_query_raw_parse",
44
47
  ALLOCSET_DEFAULT_MINSIZE,
45
48
  ALLOCSET_DEFAULT_INITSIZE,
46
49
  ALLOCSET_DEFAULT_MAXSIZE);
@@ -96,6 +99,333 @@ static VALUE pg_query_raw_parse(VALUE self, VALUE input)
96
99
  return result;
97
100
  }
98
101
 
102
+ /*
103
+ * Struct for tracking locations/lengths of constants during normalization
104
+ */
105
+ typedef struct pgssLocationLen
106
+ {
107
+ int location; /* start offset in query text */
108
+ int length; /* length in bytes, or -1 to ignore */
109
+ } pgssLocationLen;
110
+
111
+ /*
112
+ * Working state for constant tree walker
113
+ */
114
+ typedef struct pgssConstLocations
115
+ {
116
+ /* Array of locations of constants that should be removed */
117
+ pgssLocationLen *clocations;
118
+
119
+ /* Allocated length of clocations array */
120
+ int clocations_buf_size;
121
+
122
+ /* Current number of valid entries in clocations array */
123
+ int clocations_count;
124
+ } pgssConstLocations;
125
+
126
+ /*
127
+ * comp_location: comparator for qsorting pgssLocationLen structs by location
128
+ */
129
+ static int
130
+ comp_location(const void *a, const void *b)
131
+ {
132
+ int l = ((const pgssLocationLen *) a)->location;
133
+ int r = ((const pgssLocationLen *) b)->location;
134
+
135
+ if (l < r)
136
+ return -1;
137
+ else if (l > r)
138
+ return +1;
139
+ else
140
+ return 0;
141
+ }
142
+
143
+ /*
144
+ * Given a valid SQL string and an array of constant-location records,
145
+ * fill in the textual lengths of those constants.
146
+ *
147
+ * The constants may use any allowed constant syntax, such as float literals,
148
+ * bit-strings, single-quoted strings and dollar-quoted strings. This is
149
+ * accomplished by using the public API for the core scanner.
150
+ *
151
+ * It is the caller's job to ensure that the string is a valid SQL statement
152
+ * with constants at the indicated locations. Since in practice the string
153
+ * has already been parsed, and the locations that the caller provides will
154
+ * have originated from within the authoritative parser, this should not be
155
+ * a problem.
156
+ *
157
+ * Duplicate constant pointers are possible, and will have their lengths
158
+ * marked as '-1', so that they are later ignored. (Actually, we assume the
159
+ * lengths were initialized as -1 to start with, and don't change them here.)
160
+ *
161
+ * N.B. There is an assumption that a '-' character at a Const location begins
162
+ * a negative numeric constant. This precludes there ever being another
163
+ * reason for a constant to start with a '-'.
164
+ */
165
+ static void
166
+ fill_in_constant_lengths(pgssConstLocations *jstate, const char *query)
167
+ {
168
+ pgssLocationLen *locs;
169
+ core_yyscan_t yyscanner;
170
+ core_yy_extra_type yyextra;
171
+ core_YYSTYPE yylval;
172
+ YYLTYPE yylloc;
173
+ int last_loc = -1;
174
+ int i;
175
+
176
+ /*
177
+ * Sort the records by location so that we can process them in order while
178
+ * scanning the query text.
179
+ */
180
+ if (jstate->clocations_count > 1)
181
+ qsort(jstate->clocations, jstate->clocations_count,
182
+ sizeof(pgssLocationLen), comp_location);
183
+ locs = jstate->clocations;
184
+
185
+ /* initialize the flex scanner --- should match raw_parser() */
186
+ yyscanner = scanner_init(query,
187
+ &yyextra,
188
+ ScanKeywords,
189
+ NumScanKeywords);
190
+
191
+ /* Search for each constant, in sequence */
192
+ for (i = 0; i < jstate->clocations_count; i++)
193
+ {
194
+ int loc = locs[i].location;
195
+ int tok;
196
+
197
+ Assert(loc >= 0);
198
+
199
+ if (loc <= last_loc)
200
+ continue; /* Duplicate constant, ignore */
201
+
202
+ /* Lex tokens until we find the desired constant */
203
+ for (;;)
204
+ {
205
+ tok = core_yylex(&yylval, &yylloc, yyscanner);
206
+
207
+ /* We should not hit end-of-string, but if we do, behave sanely */
208
+ if (tok == 0)
209
+ break; /* out of inner for-loop */
210
+
211
+ /*
212
+ * We should find the token position exactly, but if we somehow
213
+ * run past it, work with that.
214
+ */
215
+ if (yylloc >= loc)
216
+ {
217
+ if (query[loc] == '-')
218
+ {
219
+ /*
220
+ * It's a negative value - this is the one and only case
221
+ * where we replace more than a single token.
222
+ *
223
+ * Do not compensate for the core system's special-case
224
+ * adjustment of location to that of the leading '-'
225
+ * operator in the event of a negative constant. It is
226
+ * also useful for our purposes to start from the minus
227
+ * symbol. In this way, queries like "select * from foo
228
+ * where bar = 1" and "select * from foo where bar = -2"
229
+ * will have identical normalized query strings.
230
+ */
231
+ tok = core_yylex(&yylval, &yylloc, yyscanner);
232
+ if (tok == 0)
233
+ break; /* out of inner for-loop */
234
+ }
235
+
236
+ /*
237
+ * We now rely on the assumption that flex has placed a zero
238
+ * byte after the text of the current token in scanbuf.
239
+ */
240
+ locs[i].length = (int) strlen(yyextra.scanbuf + loc);
241
+ break; /* out of inner for-loop */
242
+ }
243
+ }
244
+
245
+ /* If we hit end-of-string, give up, leaving remaining lengths -1 */
246
+ if (tok == 0)
247
+ break;
248
+
249
+ last_loc = loc;
250
+ }
251
+
252
+ scanner_finish(yyscanner);
253
+ }
254
+
255
+ /*
256
+ * Generate a normalized version of the query string that will be used to
257
+ * represent all similar queries.
258
+ *
259
+ * Note that the normalized representation may well vary depending on
260
+ * just which "equivalent" query is used to create the hashtable entry.
261
+ * We assume this is OK.
262
+ *
263
+ * *query_len_p contains the input string length, and is updated with
264
+ * the result string length (which cannot be longer) on exit.
265
+ *
266
+ * Returns a palloc'd string.
267
+ */
268
+ static char *
269
+ generate_normalized_query(pgssConstLocations *jstate, const char *query,
270
+ int *query_len_p, int encoding)
271
+ {
272
+ char *norm_query;
273
+ int query_len = *query_len_p;
274
+ int i,
275
+ len_to_wrt, /* Length (in bytes) to write */
276
+ quer_loc = 0, /* Source query byte location */
277
+ n_quer_loc = 0, /* Normalized query byte location */
278
+ last_off = 0, /* Offset from start for previous tok */
279
+ last_tok_len = 0; /* Length (in bytes) of that tok */
280
+
281
+ /*
282
+ * Get constants' lengths (core system only gives us locations). Note
283
+ * this also ensures the items are sorted by location.
284
+ */
285
+ fill_in_constant_lengths(jstate, query);
286
+
287
+ /* Allocate result buffer */
288
+ norm_query = palloc(query_len + 1);
289
+
290
+ for (i = 0; i < jstate->clocations_count; i++)
291
+ {
292
+ int off, /* Offset from start for cur tok */
293
+ tok_len; /* Length (in bytes) of that tok */
294
+
295
+ off = jstate->clocations[i].location;
296
+ tok_len = jstate->clocations[i].length;
297
+
298
+ if (tok_len < 0)
299
+ continue; /* ignore any duplicates */
300
+
301
+ /* Copy next chunk (what precedes the next constant) */
302
+ len_to_wrt = off - last_off;
303
+ len_to_wrt -= last_tok_len;
304
+
305
+ Assert(len_to_wrt >= 0);
306
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
307
+ n_quer_loc += len_to_wrt;
308
+
309
+ /* And insert a '?' in place of the constant token */
310
+ norm_query[n_quer_loc++] = '?';
311
+
312
+ quer_loc = off + tok_len;
313
+ last_off = off;
314
+ last_tok_len = tok_len;
315
+ }
316
+
317
+ /*
318
+ * We've copied up until the last ignorable constant. Copy over the
319
+ * remaining bytes of the original query string.
320
+ */
321
+ len_to_wrt = query_len - quer_loc;
322
+
323
+ Assert(len_to_wrt >= 0);
324
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
325
+ n_quer_loc += len_to_wrt;
326
+
327
+ Assert(n_quer_loc <= query_len);
328
+ norm_query[n_quer_loc] = '\0';
329
+
330
+ *query_len_p = n_quer_loc;
331
+ return norm_query;
332
+ }
333
+
334
+ bool const_record_walker(Node *node, pgssConstLocations *jstate)
335
+ {
336
+ if (node == NULL) return false;
337
+
338
+ if (IsA(node, A_Const) && ((A_Const *) node)->location >= 0)
339
+ {
340
+ /* enlarge array if needed */
341
+ if (jstate->clocations_count >= jstate->clocations_buf_size)
342
+ {
343
+ jstate->clocations_buf_size *= 2;
344
+ jstate->clocations = (pgssLocationLen *)
345
+ repalloc(jstate->clocations,
346
+ jstate->clocations_buf_size *
347
+ sizeof(pgssLocationLen));
348
+ }
349
+ jstate->clocations[jstate->clocations_count].location = ((A_Const *) node)->location;
350
+ /* initialize lengths to -1 to simplify fill_in_constant_lengths */
351
+ jstate->clocations[jstate->clocations_count].length = -1;
352
+ jstate->clocations_count++;
353
+ }
354
+ //else if (isA(node, Query))
355
+ //{
356
+ // return query_tree_walker(node, const_record_walker, jstate, 0);
357
+ //}
358
+
359
+ PG_TRY();
360
+ {
361
+ return raw_expression_tree_walker(node, const_record_walker, (void*) jstate);
362
+ }
363
+ PG_CATCH();
364
+ {
365
+ return false;
366
+ }
367
+ PG_END_TRY();
368
+ }
369
+
370
+ static VALUE pg_query_normalize(VALUE self, VALUE input)
371
+ {
372
+ Check_Type(input, T_STRING);
373
+
374
+ MemoryContext ctx = NULL;
375
+ VALUE result;
376
+ ErrorData* error = NULL;
377
+
378
+ ctx = AllocSetContextCreate(TopMemoryContext,
379
+ "pg_query_normalize",
380
+ ALLOCSET_DEFAULT_MINSIZE,
381
+ ALLOCSET_DEFAULT_INITSIZE,
382
+ ALLOCSET_DEFAULT_MAXSIZE);
383
+ MemoryContextSwitchTo(ctx);
384
+
385
+ PG_TRY();
386
+ {
387
+ List *tree;
388
+ char *str;
389
+ pgssConstLocations jstate;
390
+ int query_len;
391
+
392
+ /* Parse query */
393
+ str = StringValueCStr(input);
394
+ tree = raw_parser(str);
395
+
396
+ /* Set up workspace for constant recording */
397
+ jstate.clocations_buf_size = 32;
398
+ jstate.clocations = (pgssLocationLen *)
399
+ palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
400
+ jstate.clocations_count = 0;
401
+
402
+ /* Walk tree and record const locations */
403
+ const_record_walker((Node *) tree, &jstate);
404
+
405
+ /* Normalize query */
406
+ query_len = (int) strlen(str);
407
+ str = generate_normalized_query(&jstate, str, &query_len, PG_UTF8);
408
+
409
+ result = rb_tainted_str_new_cstr(str);
410
+
411
+ pfree(str);
412
+ }
413
+ PG_CATCH();
414
+ {
415
+ error = CopyErrorData();
416
+ FlushErrorState();
417
+ }
418
+ PG_END_TRY();
419
+
420
+ MemoryContextSwitchTo(TopMemoryContext);
421
+ MemoryContextDelete(ctx);
422
+
423
+ // If we got an error, throw a ParseError exception
424
+ if (error) raise_parse_error(error);
425
+
426
+ return result;
427
+ }
428
+
99
429
  void Init_pg_query(void)
100
430
  {
101
431
  VALUE cPgQuery;
@@ -105,4 +435,5 @@ void Init_pg_query(void)
105
435
  cPgQuery = rb_const_get(rb_cObject, rb_intern("PgQuery"));
106
436
 
107
437
  rb_define_singleton_method(cPgQuery, "_raw_parse", pg_query_raw_parse, 1);
438
+ rb_define_singleton_method(cPgQuery, "normalize", pg_query_normalize, 1);
108
439
  }
data/lib/pg_query.rb CHANGED
@@ -2,5 +2,4 @@ require 'pg_query/version'
2
2
  require 'pg_query/parse_error'
3
3
 
4
4
  require 'pg_query/pg_query'
5
- require 'pg_query/parse'
6
- require 'pg_query/parse_normalized'
5
+ require 'pg_query/parse'
@@ -1,3 +1,3 @@
1
1
  class PgQuery
2
- VERSION = '0.1.2'
2
+ VERSION = '0.2.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pg_query
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lukas Fittl
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-13 00:00:00.000000000 Z
11
+ date: 2014-05-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
@@ -67,7 +67,6 @@ files:
67
67
  - lib/pg_query.rb
68
68
  - lib/pg_query/parse.rb
69
69
  - lib/pg_query/parse_error.rb
70
- - lib/pg_query/parse_normalized.rb
71
70
  - lib/pg_query/version.rb
72
71
  homepage: http://github.com/pganalyze/pg_query
73
72
  licenses:
@@ -1,51 +0,0 @@
1
- class PgQuery
2
- # Parses a query that has been normalized by pg_stat_statements
3
- def self.parse_normalized(original_query)
4
- # Transform ? into \uFFED
5
- query = normalized_to_parseable_query(original_query)
6
-
7
- # Parse it!
8
- result = parse(query)
9
-
10
- # Transform \uFFED references as if they were $0
11
- parsed_to_normalized_parsetree!(result.parsetree)
12
-
13
- PgQuery.new(original_query, result.parsetree, result.warnings)
14
- end
15
-
16
- protected
17
- # The PostgreSQL parser doesn't understand pg_stat_statements replacement characters,
18
- # change them into a fake column reference to an unusual unicode character \uFFED
19
- def self.normalized_to_parseable_query(query)
20
- regexps = [
21
- 'INTERVAL ?',
22
- /\$[0-9]+\?/,
23
- '?.?',
24
- /(?<!\\)\?/, # Replace all ?, unless they are escaped by a backslash
25
- ]
26
- regexps.each do |re|
27
- query = query.gsub(re) {|m| "\uFFED" * m.size }
28
- end
29
- query
30
- end
31
-
32
- # Modifies the passed in parsetree to have paramrefs to $0 instead of columnref to \uFFED
33
- def self.parsed_to_normalized_parsetree!(parsetree)
34
- expressions = parsetree.dup
35
- loop do
36
- break unless expression = expressions.shift
37
-
38
- if expression.is_a?(Array)
39
- expressions += expression.compact
40
- elsif expression.is_a?(Hash)
41
- value = expression['COLUMNREF'] && expression['COLUMNREF']['fields']
42
- if value && value.size == 1 && value[0].is_a?(String) && value[0].chars.to_a.uniq == ["\uFFED"]
43
- expression.replace('PARAMREF' => {'number' => 0,
44
- 'location' => expression['COLUMNREF']['location']})
45
- else
46
- expressions += expression.values.compact
47
- end
48
- end
49
- end
50
- end
51
- end