pg_query 2.0.3 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -0
  3. data/README.md +12 -0
  4. data/Rakefile +5 -19
  5. data/ext/pg_query/extconf.rb +3 -1
  6. data/ext/pg_query/include/c.h +12 -0
  7. data/ext/pg_query/include/executor/executor.h +6 -0
  8. data/ext/pg_query/include/nodes/execnodes.h +9 -6
  9. data/ext/pg_query/include/nodes/pathnodes.h +1 -1
  10. data/ext/pg_query/include/optimizer/paths.h +8 -0
  11. data/ext/pg_query/include/pg_config.h +6 -6
  12. data/ext/pg_query/include/pg_config_manual.h +7 -0
  13. data/ext/pg_query/include/pg_query.h +2 -2
  14. data/ext/pg_query/include/pg_query_outfuncs_defs.c +1 -0
  15. data/ext/pg_query/include/pg_query_readfuncs_defs.c +1 -0
  16. data/ext/pg_query/include/protobuf-c.h +7 -3
  17. data/ext/pg_query/include/protobuf-c/protobuf-c.h +7 -3
  18. data/ext/pg_query/include/protobuf/pg_query.pb-c.h +472 -467
  19. data/ext/pg_query/include/utils/array.h +1 -0
  20. data/ext/pg_query/include/utils/lsyscache.h +1 -0
  21. data/ext/pg_query/include/utils/probes.h +57 -57
  22. data/ext/pg_query/pg_query.pb-c.c +502 -487
  23. data/ext/pg_query/pg_query_deparse.c +6 -0
  24. data/ext/pg_query/pg_query_fingerprint.c +104 -32
  25. data/ext/pg_query/pg_query_normalize.c +112 -60
  26. data/ext/pg_query/protobuf-c.c +34 -27
  27. data/ext/pg_query/src_backend_utils_mmgr_mcxt.c +36 -0
  28. data/ext/pg_query/src_common_hashfn.c +420 -0
  29. data/lib/pg_query.rb +0 -1
  30. data/lib/pg_query/filter_columns.rb +1 -1
  31. data/lib/pg_query/fingerprint.rb +1 -3
  32. data/lib/pg_query/parse.rb +60 -5
  33. data/lib/pg_query/pg_query_pb.rb +1385 -1383
  34. data/lib/pg_query/version.rb +1 -1
  35. metadata +7 -7
  36. data/lib/pg_query/json_field_names.rb +0 -1402
@@ -28,6 +28,7 @@
28
28
  * - CurrentMemoryContext
29
29
  * - MemoryContextDelete
30
30
  * - palloc0
31
+ * - MemoryContextAllocExtended
31
32
  *--------------------------------------------------------------------
32
33
  */
33
34
 
@@ -840,7 +841,42 @@ MemoryContextAllocZeroAligned(MemoryContext context, Size size)
840
841
  * MemoryContextAllocExtended
841
842
  * Allocate space within the specified context using the given flags.
842
843
  */
844
+ void *
845
+ MemoryContextAllocExtended(MemoryContext context, Size size, int flags)
846
+ {
847
+ void *ret;
848
+
849
+ AssertArg(MemoryContextIsValid(context));
850
+ AssertNotInCriticalSection(context);
851
+
852
+ if (((flags & MCXT_ALLOC_HUGE) != 0 && !AllocHugeSizeIsValid(size)) ||
853
+ ((flags & MCXT_ALLOC_HUGE) == 0 && !AllocSizeIsValid(size)))
854
+ elog(ERROR, "invalid memory alloc request size %zu", size);
855
+
856
+ context->isReset = false;
857
+
858
+ ret = context->methods->alloc(context, size);
859
+ if (unlikely(ret == NULL))
860
+ {
861
+ if ((flags & MCXT_ALLOC_NO_OOM) == 0)
862
+ {
863
+ MemoryContextStats(TopMemoryContext);
864
+ ereport(ERROR,
865
+ (errcode(ERRCODE_OUT_OF_MEMORY),
866
+ errmsg("out of memory"),
867
+ errdetail("Failed on request of size %zu in memory context \"%s\".",
868
+ size, context->name)));
869
+ }
870
+ return NULL;
871
+ }
843
872
 
873
+ VALGRIND_MEMPOOL_ALLOC(context, ret, size);
874
+
875
+ if ((flags & MCXT_ALLOC_ZERO) != 0)
876
+ MemSetAligned(ret, 0, size);
877
+
878
+ return ret;
879
+ }
844
880
 
845
881
  void *
846
882
  palloc(Size size)
@@ -0,0 +1,420 @@
1
+ /*--------------------------------------------------------------------
2
+ * Symbols referenced in this file:
3
+ * - hash_bytes
4
+ *--------------------------------------------------------------------
5
+ */
6
+
7
+ /*-------------------------------------------------------------------------
8
+ *
9
+ * hashfn.c
10
+ * Generic hashing functions, and hash functions for use in dynahash.c
11
+ * hashtables
12
+ *
13
+ *
14
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
15
+ * Portions Copyright (c) 1994, Regents of the University of California
16
+ *
17
+ *
18
+ * IDENTIFICATION
19
+ * src/common/hashfn.c
20
+ *
21
+ * NOTES
22
+ * It is expected that every bit of a hash function's 32-bit result is
23
+ * as random as every other; failure to ensure this is likely to lead
24
+ * to poor performance of hash tables. In most cases a hash
25
+ * function should use hash_bytes() or its variant hash_bytes_uint32(),
26
+ * or the wrappers hash_any() and hash_uint32 defined in hashfn.h.
27
+ *
28
+ *-------------------------------------------------------------------------
29
+ */
30
+ #include "postgres.h"
31
+
32
+ #include "common/hashfn.h"
33
+
34
+
35
+ /*
36
+ * This hash function was written by Bob Jenkins
37
+ * (bob_jenkins@burtleburtle.net), and superficially adapted
38
+ * for PostgreSQL by Neil Conway. For more information on this
39
+ * hash function, see http://burtleburtle.net/bob/hash/doobs.html,
40
+ * or Bob's article in Dr. Dobb's Journal, Sept. 1997.
41
+ *
42
+ * In the current code, we have adopted Bob's 2006 update of his hash
43
+ * function to fetch the data a word at a time when it is suitably aligned.
44
+ * This makes for a useful speedup, at the cost of having to maintain
45
+ * four code paths (aligned vs unaligned, and little-endian vs big-endian).
46
+ * It also uses two separate mixing functions mix() and final(), instead
47
+ * of a slower multi-purpose function.
48
+ */
49
+
50
+ /* Get a bit mask of the bits set in non-uint32 aligned addresses */
51
+ #define UINT32_ALIGN_MASK (sizeof(uint32) - 1)
52
+
53
+ /* Rotate a uint32 value left by k bits - note multiple evaluation! */
54
+ #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
55
+
56
+ /*----------
57
+ * mix -- mix 3 32-bit values reversibly.
58
+ *
59
+ * This is reversible, so any information in (a,b,c) before mix() is
60
+ * still in (a,b,c) after mix().
61
+ *
62
+ * If four pairs of (a,b,c) inputs are run through mix(), or through
63
+ * mix() in reverse, there are at least 32 bits of the output that
64
+ * are sometimes the same for one pair and different for another pair.
65
+ * This was tested for:
66
+ * * pairs that differed by one bit, by two bits, in any combination
67
+ * of top bits of (a,b,c), or in any combination of bottom bits of
68
+ * (a,b,c).
69
+ * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
70
+ * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
71
+ * is commonly produced by subtraction) look like a single 1-bit
72
+ * difference.
73
+ * * the base values were pseudorandom, all zero but one bit set, or
74
+ * all zero plus a counter that starts at zero.
75
+ *
76
+ * This does not achieve avalanche. There are input bits of (a,b,c)
77
+ * that fail to affect some output bits of (a,b,c), especially of a. The
78
+ * most thoroughly mixed value is c, but it doesn't really even achieve
79
+ * avalanche in c.
80
+ *
81
+ * This allows some parallelism. Read-after-writes are good at doubling
82
+ * the number of bits affected, so the goal of mixing pulls in the opposite
83
+ * direction from the goal of parallelism. I did what I could. Rotates
84
+ * seem to cost as much as shifts on every machine I could lay my hands on,
85
+ * and rotates are much kinder to the top and bottom bits, so I used rotates.
86
+ *----------
87
+ */
88
+ #define mix(a,b,c) \
89
+ { \
90
+ a -= c; a ^= rot(c, 4); c += b; \
91
+ b -= a; b ^= rot(a, 6); a += c; \
92
+ c -= b; c ^= rot(b, 8); b += a; \
93
+ a -= c; a ^= rot(c,16); c += b; \
94
+ b -= a; b ^= rot(a,19); a += c; \
95
+ c -= b; c ^= rot(b, 4); b += a; \
96
+ }
97
+
98
+ /*----------
99
+ * final -- final mixing of 3 32-bit values (a,b,c) into c
100
+ *
101
+ * Pairs of (a,b,c) values differing in only a few bits will usually
102
+ * produce values of c that look totally different. This was tested for
103
+ * * pairs that differed by one bit, by two bits, in any combination
104
+ * of top bits of (a,b,c), or in any combination of bottom bits of
105
+ * (a,b,c).
106
+ * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
107
+ * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
108
+ * is commonly produced by subtraction) look like a single 1-bit
109
+ * difference.
110
+ * * the base values were pseudorandom, all zero but one bit set, or
111
+ * all zero plus a counter that starts at zero.
112
+ *
113
+ * The use of separate functions for mix() and final() allow for a
114
+ * substantial performance increase since final() does not need to
115
+ * do well in reverse, but is does need to affect all output bits.
116
+ * mix(), on the other hand, does not need to affect all output
117
+ * bits (affecting 32 bits is enough). The original hash function had
118
+ * a single mixing operation that had to satisfy both sets of requirements
119
+ * and was slower as a result.
120
+ *----------
121
+ */
122
+ #define final(a,b,c) \
123
+ { \
124
+ c ^= b; c -= rot(b,14); \
125
+ a ^= c; a -= rot(c,11); \
126
+ b ^= a; b -= rot(a,25); \
127
+ c ^= b; c -= rot(b,16); \
128
+ a ^= c; a -= rot(c, 4); \
129
+ b ^= a; b -= rot(a,14); \
130
+ c ^= b; c -= rot(b,24); \
131
+ }
132
+
133
+ /*
134
+ * hash_bytes() -- hash a variable-length key into a 32-bit value
135
+ * k : the key (the unaligned variable-length array of bytes)
136
+ * len : the length of the key, counting by bytes
137
+ *
138
+ * Returns a uint32 value. Every bit of the key affects every bit of
139
+ * the return value. Every 1-bit and 2-bit delta achieves avalanche.
140
+ * About 6*len+35 instructions. The best hash table sizes are powers
141
+ * of 2. There is no need to do mod a prime (mod is sooo slow!).
142
+ * If you need less than 32 bits, use a bitmask.
143
+ *
144
+ * This procedure must never throw elog(ERROR); the ResourceOwner code
145
+ * relies on this not to fail.
146
+ *
147
+ * Note: we could easily change this function to return a 64-bit hash value
148
+ * by using the final values of both b and c. b is perhaps a little less
149
+ * well mixed than c, however.
150
+ */
151
+ uint32
152
+ hash_bytes(const unsigned char *k, int keylen)
153
+ {
154
+ uint32 a,
155
+ b,
156
+ c,
157
+ len;
158
+
159
+ /* Set up the internal state */
160
+ len = keylen;
161
+ a = b = c = 0x9e3779b9 + len + 3923095;
162
+
163
+ /* If the source pointer is word-aligned, we use word-wide fetches */
164
+ if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
165
+ {
166
+ /* Code path for aligned source data */
167
+ const uint32 *ka = (const uint32 *) k;
168
+
169
+ /* handle most of the key */
170
+ while (len >= 12)
171
+ {
172
+ a += ka[0];
173
+ b += ka[1];
174
+ c += ka[2];
175
+ mix(a, b, c);
176
+ ka += 3;
177
+ len -= 12;
178
+ }
179
+
180
+ /* handle the last 11 bytes */
181
+ k = (const unsigned char *) ka;
182
+ #ifdef WORDS_BIGENDIAN
183
+ switch (len)
184
+ {
185
+ case 11:
186
+ c += ((uint32) k[10] << 8);
187
+ /* fall through */
188
+ case 10:
189
+ c += ((uint32) k[9] << 16);
190
+ /* fall through */
191
+ case 9:
192
+ c += ((uint32) k[8] << 24);
193
+ /* fall through */
194
+ case 8:
195
+ /* the lowest byte of c is reserved for the length */
196
+ b += ka[1];
197
+ a += ka[0];
198
+ break;
199
+ case 7:
200
+ b += ((uint32) k[6] << 8);
201
+ /* fall through */
202
+ case 6:
203
+ b += ((uint32) k[5] << 16);
204
+ /* fall through */
205
+ case 5:
206
+ b += ((uint32) k[4] << 24);
207
+ /* fall through */
208
+ case 4:
209
+ a += ka[0];
210
+ break;
211
+ case 3:
212
+ a += ((uint32) k[2] << 8);
213
+ /* fall through */
214
+ case 2:
215
+ a += ((uint32) k[1] << 16);
216
+ /* fall through */
217
+ case 1:
218
+ a += ((uint32) k[0] << 24);
219
+ /* case 0: nothing left to add */
220
+ }
221
+ #else /* !WORDS_BIGENDIAN */
222
+ switch (len)
223
+ {
224
+ case 11:
225
+ c += ((uint32) k[10] << 24);
226
+ /* fall through */
227
+ case 10:
228
+ c += ((uint32) k[9] << 16);
229
+ /* fall through */
230
+ case 9:
231
+ c += ((uint32) k[8] << 8);
232
+ /* fall through */
233
+ case 8:
234
+ /* the lowest byte of c is reserved for the length */
235
+ b += ka[1];
236
+ a += ka[0];
237
+ break;
238
+ case 7:
239
+ b += ((uint32) k[6] << 16);
240
+ /* fall through */
241
+ case 6:
242
+ b += ((uint32) k[5] << 8);
243
+ /* fall through */
244
+ case 5:
245
+ b += k[4];
246
+ /* fall through */
247
+ case 4:
248
+ a += ka[0];
249
+ break;
250
+ case 3:
251
+ a += ((uint32) k[2] << 16);
252
+ /* fall through */
253
+ case 2:
254
+ a += ((uint32) k[1] << 8);
255
+ /* fall through */
256
+ case 1:
257
+ a += k[0];
258
+ /* case 0: nothing left to add */
259
+ }
260
+ #endif /* WORDS_BIGENDIAN */
261
+ }
262
+ else
263
+ {
264
+ /* Code path for non-aligned source data */
265
+
266
+ /* handle most of the key */
267
+ while (len >= 12)
268
+ {
269
+ #ifdef WORDS_BIGENDIAN
270
+ a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
271
+ b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
272
+ c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
273
+ #else /* !WORDS_BIGENDIAN */
274
+ a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
275
+ b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
276
+ c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
277
+ #endif /* WORDS_BIGENDIAN */
278
+ mix(a, b, c);
279
+ k += 12;
280
+ len -= 12;
281
+ }
282
+
283
+ /* handle the last 11 bytes */
284
+ #ifdef WORDS_BIGENDIAN
285
+ switch (len)
286
+ {
287
+ case 11:
288
+ c += ((uint32) k[10] << 8);
289
+ /* fall through */
290
+ case 10:
291
+ c += ((uint32) k[9] << 16);
292
+ /* fall through */
293
+ case 9:
294
+ c += ((uint32) k[8] << 24);
295
+ /* fall through */
296
+ case 8:
297
+ /* the lowest byte of c is reserved for the length */
298
+ b += k[7];
299
+ /* fall through */
300
+ case 7:
301
+ b += ((uint32) k[6] << 8);
302
+ /* fall through */
303
+ case 6:
304
+ b += ((uint32) k[5] << 16);
305
+ /* fall through */
306
+ case 5:
307
+ b += ((uint32) k[4] << 24);
308
+ /* fall through */
309
+ case 4:
310
+ a += k[3];
311
+ /* fall through */
312
+ case 3:
313
+ a += ((uint32) k[2] << 8);
314
+ /* fall through */
315
+ case 2:
316
+ a += ((uint32) k[1] << 16);
317
+ /* fall through */
318
+ case 1:
319
+ a += ((uint32) k[0] << 24);
320
+ /* case 0: nothing left to add */
321
+ }
322
+ #else /* !WORDS_BIGENDIAN */
323
+ switch (len)
324
+ {
325
+ case 11:
326
+ c += ((uint32) k[10] << 24);
327
+ /* fall through */
328
+ case 10:
329
+ c += ((uint32) k[9] << 16);
330
+ /* fall through */
331
+ case 9:
332
+ c += ((uint32) k[8] << 8);
333
+ /* fall through */
334
+ case 8:
335
+ /* the lowest byte of c is reserved for the length */
336
+ b += ((uint32) k[7] << 24);
337
+ /* fall through */
338
+ case 7:
339
+ b += ((uint32) k[6] << 16);
340
+ /* fall through */
341
+ case 6:
342
+ b += ((uint32) k[5] << 8);
343
+ /* fall through */
344
+ case 5:
345
+ b += k[4];
346
+ /* fall through */
347
+ case 4:
348
+ a += ((uint32) k[3] << 24);
349
+ /* fall through */
350
+ case 3:
351
+ a += ((uint32) k[2] << 16);
352
+ /* fall through */
353
+ case 2:
354
+ a += ((uint32) k[1] << 8);
355
+ /* fall through */
356
+ case 1:
357
+ a += k[0];
358
+ /* case 0: nothing left to add */
359
+ }
360
+ #endif /* WORDS_BIGENDIAN */
361
+ }
362
+
363
+ final(a, b, c);
364
+
365
+ /* report the result */
366
+ return c;
367
+ }
368
+
369
+ /*
370
+ * hash_bytes_extended() -- hash into a 64-bit value, using an optional seed
371
+ * k : the key (the unaligned variable-length array of bytes)
372
+ * len : the length of the key, counting by bytes
373
+ * seed : a 64-bit seed (0 means no seed)
374
+ *
375
+ * Returns a uint64 value. Otherwise similar to hash_bytes.
376
+ */
377
+ #ifdef WORDS_BIGENDIAN
378
+ #else /* !WORDS_BIGENDIAN */
379
+ #endif /* WORDS_BIGENDIAN */
380
+ #ifdef WORDS_BIGENDIAN
381
+ #else /* !WORDS_BIGENDIAN */
382
+ #endif /* WORDS_BIGENDIAN */
383
+ #ifdef WORDS_BIGENDIAN
384
+ #else /* !WORDS_BIGENDIAN */
385
+ #endif /* WORDS_BIGENDIAN */
386
+
387
+ /*
388
+ * hash_bytes_uint32() -- hash a 32-bit value to a 32-bit value
389
+ *
390
+ * This has the same result as
391
+ * hash_bytes(&k, sizeof(uint32))
392
+ * but is faster and doesn't force the caller to store k into memory.
393
+ */
394
+
395
+
396
+ /*
397
+ * hash_bytes_uint32_extended() -- hash 32-bit value to 64-bit value, with seed
398
+ *
399
+ * Like hash_bytes_uint32, this is a convenience function.
400
+ */
401
+
402
+
403
+ /*
404
+ * string_hash: hash function for keys that are NUL-terminated strings.
405
+ *
406
+ * NOTE: this is the default hash function if none is specified.
407
+ */
408
+
409
+
410
+ /*
411
+ * tag_hash: hash function for fixed-size tag values
412
+ */
413
+
414
+
415
+ /*
416
+ * uint32_hash: hash function for keys that are uint32 or int32
417
+ *
418
+ * (tag_hash works for this case too, but is slower)
419
+ */
420
+