pg_query 2.0.1 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +220 -114
  3. data/README.md +12 -0
  4. data/Rakefile +6 -21
  5. data/ext/pg_query/extconf.rb +5 -2
  6. data/ext/pg_query/include/c.h +12 -0
  7. data/ext/pg_query/include/executor/executor.h +6 -0
  8. data/ext/pg_query/include/nodes/execnodes.h +9 -6
  9. data/ext/pg_query/include/nodes/pathnodes.h +1 -1
  10. data/ext/pg_query/include/optimizer/paths.h +8 -0
  11. data/ext/pg_query/include/pg_config.h +10 -6
  12. data/ext/pg_query/include/pg_config_manual.h +7 -0
  13. data/ext/pg_query/include/pg_query.h +2 -2
  14. data/ext/pg_query/include/pg_query_outfuncs_defs.c +1 -0
  15. data/ext/pg_query/include/pg_query_readfuncs_defs.c +1 -0
  16. data/ext/pg_query/include/protobuf/pg_query.pb-c.h +472 -467
  17. data/ext/pg_query/include/protobuf-c/protobuf-c.h +7 -3
  18. data/ext/pg_query/include/protobuf-c.h +7 -3
  19. data/ext/pg_query/include/utils/array.h +1 -0
  20. data/ext/pg_query/include/utils/lsyscache.h +1 -0
  21. data/ext/pg_query/include/utils/probes.h +57 -57
  22. data/ext/pg_query/pg_query.pb-c.c +502 -487
  23. data/ext/pg_query/pg_query_deparse.c +33 -21
  24. data/ext/pg_query/pg_query_fingerprint.c +123 -33
  25. data/ext/pg_query/pg_query_fingerprint.h +3 -1
  26. data/ext/pg_query/pg_query_normalize.c +222 -61
  27. data/ext/pg_query/pg_query_parse_plpgsql.c +21 -1
  28. data/ext/pg_query/pg_query_ruby.sym +1 -0
  29. data/ext/pg_query/protobuf-c.c +34 -27
  30. data/ext/pg_query/src_backend_utils_mmgr_mcxt.c +36 -0
  31. data/ext/pg_query/src_common_hashfn.c +420 -0
  32. data/ext/pg_query/src_pl_plpgsql_src_pl_gram.c +1 -1
  33. data/lib/pg_query/filter_columns.rb +3 -1
  34. data/lib/pg_query/fingerprint.rb +1 -3
  35. data/lib/pg_query/parse.rb +101 -46
  36. data/lib/pg_query/pg_query_pb.rb +1385 -1383
  37. data/lib/pg_query/truncate.rb +12 -4
  38. data/lib/pg_query/version.rb +1 -1
  39. data/lib/pg_query.rb +0 -1
  40. metadata +8 -8
  41. data/lib/pg_query/json_field_names.rb +0 -1402
@@ -2247,6 +2247,12 @@ static void deparseRangeVar(StringInfo str, RangeVar *range_var, DeparseNodeCont
2247
2247
  if (!range_var->inh && context != DEPARSE_NODE_CONTEXT_CREATE_TYPE && context != DEPARSE_NODE_CONTEXT_ALTER_TYPE)
2248
2248
  appendStringInfoString(str, "ONLY ");
2249
2249
 
2250
+ if (range_var->catalogname != NULL)
2251
+ {
2252
+ appendStringInfoString(str, quote_identifier(range_var->catalogname));
2253
+ appendStringInfoChar(str, '.');
2254
+ }
2255
+
2250
2256
  if (range_var->schemaname != NULL)
2251
2257
  {
2252
2258
  appendStringInfoString(str, quote_identifier(range_var->schemaname));
@@ -4018,23 +4024,25 @@ static void deparseTableLikeClause(StringInfo str, TableLikeClause *table_like_c
4018
4024
 
4019
4025
  if (table_like_clause->options == CREATE_TABLE_LIKE_ALL)
4020
4026
  appendStringInfoString(str, "INCLUDING ALL ");
4021
- if (table_like_clause->options & CREATE_TABLE_LIKE_COMMENTS)
4022
- appendStringInfoString(str, "INCLUDING COMMENTS ");
4023
- if (table_like_clause->options & CREATE_TABLE_LIKE_CONSTRAINTS)
4024
- appendStringInfoString(str, "INCLUDING CONSTRAINTS ");
4025
- if (table_like_clause->options & CREATE_TABLE_LIKE_DEFAULTS)
4026
- appendStringInfoString(str, "INCLUDING DEFAULTS ");
4027
- if (table_like_clause->options & CREATE_TABLE_LIKE_IDENTITY)
4028
- appendStringInfoString(str, "INCLUDING IDENTITY ");
4029
- if (table_like_clause->options & CREATE_TABLE_LIKE_GENERATED)
4030
- appendStringInfoString(str, "INCLUDING GENERATED ");
4031
- if (table_like_clause->options & CREATE_TABLE_LIKE_INDEXES)
4032
- appendStringInfoString(str, "INCLUDING INDEXES ");
4033
- if (table_like_clause->options & CREATE_TABLE_LIKE_STATISTICS)
4034
- appendStringInfoString(str, "INCLUDING STATISTICS ");
4035
- if (table_like_clause->options & CREATE_TABLE_LIKE_STORAGE)
4036
- appendStringInfoString(str, "INCLUDING STORAGE ");
4037
-
4027
+ else
4028
+ {
4029
+ if (table_like_clause->options & CREATE_TABLE_LIKE_COMMENTS)
4030
+ appendStringInfoString(str, "INCLUDING COMMENTS ");
4031
+ if (table_like_clause->options & CREATE_TABLE_LIKE_CONSTRAINTS)
4032
+ appendStringInfoString(str, "INCLUDING CONSTRAINTS ");
4033
+ if (table_like_clause->options & CREATE_TABLE_LIKE_DEFAULTS)
4034
+ appendStringInfoString(str, "INCLUDING DEFAULTS ");
4035
+ if (table_like_clause->options & CREATE_TABLE_LIKE_IDENTITY)
4036
+ appendStringInfoString(str, "INCLUDING IDENTITY ");
4037
+ if (table_like_clause->options & CREATE_TABLE_LIKE_GENERATED)
4038
+ appendStringInfoString(str, "INCLUDING GENERATED ");
4039
+ if (table_like_clause->options & CREATE_TABLE_LIKE_INDEXES)
4040
+ appendStringInfoString(str, "INCLUDING INDEXES ");
4041
+ if (table_like_clause->options & CREATE_TABLE_LIKE_STATISTICS)
4042
+ appendStringInfoString(str, "INCLUDING STATISTICS ");
4043
+ if (table_like_clause->options & CREATE_TABLE_LIKE_STORAGE)
4044
+ appendStringInfoString(str, "INCLUDING STORAGE ");
4045
+ }
4038
4046
  removeTrailingSpace(str);
4039
4047
  }
4040
4048
 
@@ -6676,9 +6684,11 @@ static void deparseCopyStmt(StringInfo str, CopyStmt *copy_stmt)
6676
6684
  else
6677
6685
  Assert(false);
6678
6686
  }
6679
- else if (strcmp(def_elem->defname, "freeze") == 0 && intVal(def_elem->arg) == 1)
6687
+ else if (strcmp(def_elem->defname, "freeze") == 0 && (def_elem->arg == NULL || intVal(def_elem->arg) == 1))
6680
6688
  {
6681
- appendStringInfoString(str, "FREEZE 1");
6689
+ appendStringInfoString(str, "FREEZE");
6690
+ if (def_elem->arg != NULL && intVal(def_elem->arg) == 1)
6691
+ appendStringInfoString(str, " 1");
6682
6692
  }
6683
6693
  else if (strcmp(def_elem->defname, "delimiter") == 0)
6684
6694
  {
@@ -6690,9 +6700,11 @@ static void deparseCopyStmt(StringInfo str, CopyStmt *copy_stmt)
6690
6700
  appendStringInfoString(str, "NULL ");
6691
6701
  deparseStringLiteral(str, strVal(def_elem->arg));
6692
6702
  }
6693
- else if (strcmp(def_elem->defname, "header") == 0 && intVal(def_elem->arg) == 1)
6703
+ else if (strcmp(def_elem->defname, "header") == 0 && (def_elem->arg == NULL || intVal(def_elem->arg) == 1))
6694
6704
  {
6695
- appendStringInfoString(str, "HEADER 1");
6705
+ appendStringInfoString(str, "HEADER");
6706
+ if (def_elem->arg != NULL && intVal(def_elem->arg) == 1)
6707
+ appendStringInfoString(str, " 1");
6696
6708
  }
6697
6709
  else if (strcmp(def_elem->defname, "quote") == 0)
6698
6710
  {
@@ -1,3 +1,7 @@
1
+ // Ensure we have asprintf's definition on glibc-based platforms to avoid compiler warnings
2
+ #define _GNU_SOURCE
3
+ #include <stdio.h>
4
+
1
5
  #include "pg_query.h"
2
6
  #include "pg_query_internal.h"
3
7
  #include "pg_query_fingerprint.h"
@@ -13,7 +17,8 @@
13
17
  #include "nodes/parsenodes.h"
14
18
  #include "nodes/value.h"
15
19
 
16
- #include <stdio.h>
20
+ #include "common/hashfn.h"
21
+
17
22
  #include <unistd.h>
18
23
  #include <fcntl.h>
19
24
 
@@ -23,15 +28,41 @@ typedef struct FingerprintContext
23
28
  {
24
29
  XXH3_state_t *xxh_state;
25
30
 
31
+ struct listsort_cache_hash *listsort_cache;
32
+
26
33
  bool write_tokens;
27
34
  dlist_head tokens;
28
35
  } FingerprintContext;
29
36
 
30
- typedef struct FingerprintListContext
37
+ typedef struct FingerprintListsortItem
31
38
  {
32
39
  XXH64_hash_t hash;
33
40
  size_t list_pos;
34
- } FingerprintListContext;
41
+ } FingerprintListsortItem;
42
+
43
+ typedef struct FingerprintListsortItemCacheEntry
44
+ {
45
+ /* List node this cache entry is for */
46
+ uintptr_t node;
47
+
48
+ /* Hashes of all list items -- this is expensive to calculate */
49
+ FingerprintListsortItem **listsort_items;
50
+ size_t listsort_items_size;
51
+
52
+ /* hash entry status */
53
+ char status;
54
+ } FingerprintListsortItemCacheEntry;
55
+
56
+ #define SH_PREFIX listsort_cache
57
+ #define SH_ELEMENT_TYPE FingerprintListsortItemCacheEntry
58
+ #define SH_KEY_TYPE uintptr_t
59
+ #define SH_KEY node
60
+ #define SH_HASH_KEY(tb, key) hash_bytes((const unsigned char *) &key, sizeof(uintptr_t))
61
+ #define SH_EQUAL(tb, a, b) a == b
62
+ #define SH_SCOPE static inline
63
+ #define SH_DEFINE
64
+ #define SH_DECLARE
65
+ #include "lib/simplehash.h"
35
66
 
36
67
  typedef struct FingerprintToken
37
68
  {
@@ -40,7 +71,7 @@ typedef struct FingerprintToken
40
71
  } FingerprintToken;
41
72
 
42
73
  static void _fingerprintNode(FingerprintContext *ctx, const void *obj, const void *parent, char *parent_field_name, unsigned int depth);
43
- static void _fingerprintInitContext(FingerprintContext *ctx, bool write_tokens);
74
+ static void _fingerprintInitContext(FingerprintContext *ctx, FingerprintContext *parent, bool write_tokens);
44
75
  static void _fingerprintFreeContext(FingerprintContext *ctx);
45
76
 
46
77
  #define PG_QUERY_FINGERPRINT_VERSION 3
@@ -93,10 +124,10 @@ _fingerprintBitString(FingerprintContext *ctx, const Value *node)
93
124
  }
94
125
  }
95
126
 
96
- static int compareFingerprintListContext(const void *a, const void *b)
127
+ static int compareFingerprintListsortItem(const void *a, const void *b)
97
128
  {
98
- FingerprintListContext *ca = *(FingerprintListContext**) a;
99
- FingerprintListContext *cb = *(FingerprintListContext**) b;
129
+ FingerprintListsortItem *ca = *(FingerprintListsortItem**) a;
130
+ FingerprintListsortItem *cb = *(FingerprintListsortItem**) b;
100
131
  if (ca->hash > cb->hash)
101
132
  return 1;
102
133
  else if (ca->hash < cb->hash)
@@ -108,38 +139,69 @@ static void
108
139
  _fingerprintList(FingerprintContext *ctx, const List *node, const void *parent, char *field_name, unsigned int depth)
109
140
  {
110
141
  if (field_name != NULL && (strcmp(field_name, "fromClause") == 0 || strcmp(field_name, "targetList") == 0 ||
111
- strcmp(field_name, "cols") == 0 || strcmp(field_name, "rexpr") == 0 || strcmp(field_name, "valuesLists") == 0 ||
112
- strcmp(field_name, "args") == 0)) {
142
+ strcmp(field_name, "cols") == 0 || strcmp(field_name, "rexpr") == 0 || strcmp(field_name, "valuesLists") == 0 ||
143
+ strcmp(field_name, "args") == 0))
144
+ {
145
+ /*
146
+ * Check for cached values for the hashes of subnodes
147
+ *
148
+ * Note this cache is important so we avoid exponential runtime behavior,
149
+ * which would be the case if we fingerprinted each node twice, which
150
+ * then would also again have to fingerprint each of its subnodes twice,
151
+ * etc., leading to deep nodes to be fingerprinted many many times over.
152
+ *
153
+ * We have seen real-world problems with this logic here without
154
+ * a cache in place.
155
+ */
156
+ FingerprintListsortItem** listsort_items = NULL;
157
+ size_t listsort_items_size = 0;
158
+ FingerprintListsortItemCacheEntry *entry = listsort_cache_lookup(ctx->listsort_cache, (uintptr_t) node);
159
+ if (entry != NULL)
160
+ {
161
+ listsort_items = entry->listsort_items;
162
+ listsort_items_size = entry->listsort_items_size;
163
+ }
164
+ else
165
+ {
166
+ listsort_items = palloc0(node->length * sizeof(FingerprintListsortItem*));
167
+ listsort_items_size = 0;
168
+ ListCell *lc;
169
+ bool found;
113
170
 
114
- FingerprintListContext** listCtxArr = palloc0(node->length * sizeof(FingerprintListContext*));
115
- size_t listCtxCount = 0;
116
- const ListCell *lc;
171
+ foreach(lc, node)
172
+ {
173
+ FingerprintContext fctx;
174
+ FingerprintListsortItem* lctx = palloc0(sizeof(FingerprintListsortItem));
117
175
 
118
- foreach(lc, node)
119
- {
120
- FingerprintContext subCtx;
121
- FingerprintListContext* listCtx = palloc0(sizeof(FingerprintListContext));
176
+ _fingerprintInitContext(&fctx, ctx, false);
177
+ _fingerprintNode(&fctx, lfirst(lc), parent, field_name, depth + 1);
178
+ lctx->hash = XXH3_64bits_digest(fctx.xxh_state);
179
+ lctx->list_pos = listsort_items_size;
180
+ _fingerprintFreeContext(&fctx);
122
181
 
123
- _fingerprintInitContext(&subCtx, false);
124
- _fingerprintNode(&subCtx, lfirst(lc), parent, field_name, depth + 1);
125
- listCtx->hash = XXH3_64bits_digest(subCtx.xxh_state);
126
- listCtx->list_pos = listCtxCount;
127
- _fingerprintFreeContext(&subCtx);
182
+ listsort_items[listsort_items_size] = lctx;
183
+ listsort_items_size += 1;
184
+ }
128
185
 
129
- listCtxArr[listCtxCount] = listCtx;
130
- listCtxCount += 1;
131
- }
186
+ pg_qsort(listsort_items, listsort_items_size, sizeof(FingerprintListsortItem*), compareFingerprintListsortItem);
132
187
 
133
- pg_qsort(listCtxArr, listCtxCount, sizeof(FingerprintListContext*), compareFingerprintListContext);
188
+ FingerprintListsortItemCacheEntry *entry = listsort_cache_insert(ctx->listsort_cache, (uintptr_t) node, &found);
189
+ Assert(!found);
190
+
191
+ entry->listsort_items = listsort_items;
192
+ entry->listsort_items_size = listsort_items_size;
193
+ }
134
194
 
135
- for (size_t i = 0; i < listCtxCount; i++)
195
+ for (size_t i = 0; i < listsort_items_size; i++)
136
196
  {
137
- if (i > 0 && listCtxArr[i - 1]->hash == listCtxArr[i]->hash)
197
+ if (i > 0 && listsort_items[i - 1]->hash == listsort_items[i]->hash)
138
198
  continue; // Ignore duplicates
139
199
 
140
- _fingerprintNode(ctx, lfirst(list_nth_cell(node, listCtxArr[i]->list_pos)), parent, field_name, depth + 1);
200
+ _fingerprintNode(ctx, lfirst(list_nth_cell(node, listsort_items[i]->list_pos)), parent, field_name, depth + 1);
141
201
  }
142
- } else {
202
+ }
203
+ else
204
+ {
143
205
  const ListCell *lc;
144
206
 
145
207
  foreach(lc, node)
@@ -152,15 +214,28 @@ _fingerprintList(FingerprintContext *ctx, const List *node, const void *parent,
152
214
  }
153
215
 
154
216
  static void
155
- _fingerprintInitContext(FingerprintContext *ctx, bool write_tokens) {
217
+ _fingerprintInitContext(FingerprintContext *ctx, FingerprintContext *parent, bool write_tokens)
218
+ {
156
219
  ctx->xxh_state = XXH3_createState();
157
220
  if (ctx->xxh_state == NULL) abort();
158
221
  if (XXH3_64bits_reset_withSeed(ctx->xxh_state, PG_QUERY_FINGERPRINT_VERSION) == XXH_ERROR) abort();
159
222
 
160
- if (write_tokens) {
223
+ if (parent != NULL)
224
+ {
225
+ ctx->listsort_cache = parent->listsort_cache;
226
+ }
227
+ else
228
+ {
229
+ ctx->listsort_cache = listsort_cache_create(CurrentMemoryContext, 128, NULL);
230
+ }
231
+
232
+ if (write_tokens)
233
+ {
161
234
  ctx->write_tokens = true;
162
235
  dlist_init(&ctx->tokens);
163
- } else {
236
+ }
237
+ else
238
+ {
164
239
  ctx->write_tokens = false;
165
240
  }
166
241
  }
@@ -216,6 +291,21 @@ _fingerprintNode(FingerprintContext *ctx, const void *obj, const void *parent, c
216
291
  }
217
292
  }
218
293
 
294
+ uint64_t pg_query_fingerprint_node(const void *node)
295
+ {
296
+ FingerprintContext ctx;
297
+ uint64 result;
298
+
299
+ _fingerprintInitContext(&ctx, NULL, false);
300
+ _fingerprintNode(&ctx, node, NULL, NULL, 0);
301
+
302
+ result = XXH3_64bits_digest(ctx.xxh_state);
303
+
304
+ _fingerprintFreeContext(&ctx);
305
+
306
+ return result;
307
+ }
308
+
219
309
  PgQueryFingerprintResult pg_query_fingerprint_with_opts(const char* input, bool printTokens)
220
310
  {
221
311
  MemoryContext ctx = NULL;
@@ -234,7 +324,7 @@ PgQueryFingerprintResult pg_query_fingerprint_with_opts(const char* input, bool
234
324
  FingerprintContext ctx;
235
325
  XXH64_canonical_t chash;
236
326
 
237
- _fingerprintInitContext(&ctx, printTokens);
327
+ _fingerprintInitContext(&ctx, NULL, printTokens);
238
328
 
239
329
  if (parsetree_and_error.tree != NULL) {
240
330
  _fingerprintNode(&ctx, parsetree_and_error.tree, NULL, NULL, 0);
@@ -3,6 +3,8 @@
3
3
 
4
4
  #include <stdbool.h>
5
5
 
6
- PgQueryFingerprintResult pg_query_fingerprint_with_opts(const char* input, bool printTokens);
6
+ extern PgQueryFingerprintResult pg_query_fingerprint_with_opts(const char* input, bool printTokens);
7
+
8
+ extern uint64_t pg_query_fingerprint_node(const void * node);
7
9
 
8
10
  #endif