pg_query 6.0.0 → 6.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +50 -1
- data/README.md +1 -1
- data/Rakefile +5 -4
- data/ext/pg_query/ext_symbols_freebsd_with_ruby_abi_version.sym +2 -0
- data/ext/pg_query/ext_symbols_openbsd.sym +1 -0
- data/ext/pg_query/ext_symbols_openbsd_with_ruby_abi_version.sym +2 -0
- data/ext/pg_query/ext_symbols_with_ruby_abi_version.sym +2 -0
- data/ext/pg_query/extconf.rb +21 -7
- data/ext/pg_query/include/pg_query.h +32 -2
- data/ext/pg_query/include/postgres/access/amapi.h +5 -1
- data/ext/pg_query/include/postgres/access/genam.h +9 -0
- data/ext/pg_query/include/postgres/access/slru.h +4 -7
- data/ext/pg_query/include/postgres/access/tableam.h +13 -5
- data/ext/pg_query/include/postgres/access/transam.h +43 -0
- data/ext/pg_query/include/postgres/access/xlog.h +1 -0
- data/ext/pg_query/include/postgres/access/xlogdefs.h +2 -1
- data/ext/pg_query/include/postgres/c.h +1 -1
- data/ext/pg_query/include/postgres/catalog/objectaddress.h +4 -0
- data/ext/pg_query/include/postgres/commands/defrem.h +1 -1
- data/ext/pg_query/include/postgres/commands/event_trigger.h +6 -0
- data/ext/pg_query/include/postgres/commands/trigger.h +18 -0
- data/ext/pg_query/include/postgres/common/hashfn_unstable.h +6 -52
- data/ext/pg_query/include/postgres/datatype/timestamp.h +1 -1
- data/ext/pg_query/include/postgres/executor/execdesc.h +1 -1
- data/ext/pg_query/include/postgres/executor/executor.h +4 -0
- data/ext/pg_query/include/postgres/libpq/libpq-be.h +6 -3
- data/ext/pg_query/include/postgres/mb/pg_wchar.h +3 -0
- data/ext/pg_query/include/postgres/miscadmin.h +12 -3
- data/ext/pg_query/include/postgres/nodes/execnodes.h +9 -8
- data/ext/pg_query/include/postgres/nodes/pathnodes.h +3 -1
- data/ext/pg_query/include/postgres/nodes/pg_list.h +1 -1
- data/ext/pg_query/include/postgres/nodes/primnodes.h +9 -5
- data/ext/pg_query/include/postgres/parser/parse_coerce.h +3 -0
- data/ext/pg_query/include/postgres/pg_config.h +32 -23
- data/ext/pg_query/include/postgres/pg_config_manual.h +2 -0
- data/ext/pg_query/include/postgres/port/atomics/generic-gcc.h +10 -2
- data/ext/pg_query/include/postgres/port/pg_iovec.h +11 -5
- data/ext/pg_query/include/postgres/port/win32_port.h +0 -2
- data/ext/pg_query/include/postgres/port.h +32 -1
- data/ext/pg_query/include/postgres/replication/reorderbuffer.h +38 -27
- data/ext/pg_query/include/postgres/replication/slot.h +7 -1
- data/ext/pg_query/include/postgres/storage/lockdefs.h +2 -0
- data/ext/pg_query/include/postgres/storage/proc.h +13 -16
- data/ext/pg_query/include/postgres/storage/smgr.h +5 -2
- data/ext/pg_query/include/postgres/utils/catcache.h +1 -0
- data/ext/pg_query/include/postgres/utils/elog.h +1 -0
- data/ext/pg_query/include/postgres/utils/guc.h +1 -1
- data/ext/pg_query/include/postgres/utils/guc_hooks.h +0 -2
- data/ext/pg_query/include/postgres/utils/pg_locale.h +5 -0
- data/ext/pg_query/include/postgres/utils/pgstat_internal.h +19 -0
- data/ext/pg_query/include/postgres/utils/portal.h +1 -1
- data/ext/pg_query/include/postgres/utils/syscache.h +5 -0
- data/ext/pg_query/include/postgres_deparse.h +34 -0
- data/ext/pg_query/include/protobuf/pg_query.pb-c.h +673 -516
- data/ext/pg_query/pg_query.pb-c.c +488 -0
- data/ext/pg_query/pg_query_deparse.c +148 -15
- data/ext/pg_query/pg_query_internal.h +9 -8
- data/ext/pg_query/pg_query_is_utility_stmt.c +70 -0
- data/ext/pg_query/pg_query_normalize.c +3 -0
- data/ext/pg_query/pg_query_raw_tree_walker_supports.c +117 -0
- data/ext/pg_query/pg_query_ruby.c +150 -0
- data/ext/pg_query/pg_query_summary.c +941 -0
- data/ext/pg_query/pg_query_summary.h +109 -0
- data/ext/pg_query/pg_query_summary_statement_type.c +797 -0
- data/ext/pg_query/pg_query_summary_truncate.c +530 -0
- data/ext/pg_query/postgres_deparse.c +4531 -3879
- data/ext/pg_query/src_backend_catalog_namespace.c +29 -0
- data/ext/pg_query/src_backend_nodes_bitmapset.c +84 -1
- data/ext/pg_query/src_backend_nodes_list.c +60 -1
- data/ext/pg_query/src_backend_parser_gram.c +740 -733
- data/ext/pg_query/src_backend_utils_activity_pgstat_database.c +2 -2
- data/ext/pg_query/src_backend_utils_error_elog.c +11 -0
- data/ext/pg_query/src_backend_utils_mb_mbutils.c +43 -4
- data/ext/pg_query/src_backend_utils_mmgr_alignedalloc.c +22 -7
- data/ext/pg_query/src_backend_utils_mmgr_aset.c +3 -3
- data/ext/pg_query/src_backend_utils_mmgr_bump.c +1 -1
- data/ext/pg_query/src_common_stringinfo.c +20 -0
- data/ext/pg_query/src_common_wchar.c +93 -7
- data/ext/pg_query/src_port_snprintf.c +14 -17
- data/lib/pg_query/deparse.rb +29 -8
- data/lib/pg_query/fingerprint.rb +3 -5
- data/lib/pg_query/param_refs.rb +1 -1
- data/lib/pg_query/parse.rb +24 -7
- data/lib/pg_query/parse_error.rb +1 -0
- data/lib/pg_query/pg_query_pb.rb +8 -24
- data/lib/pg_query/scan.rb +1 -0
- data/lib/pg_query/split.rb +20 -0
- data/lib/pg_query/treewalker.rb +11 -13
- data/lib/pg_query/truncate.rb +17 -19
- data/lib/pg_query/version.rb +1 -1
- data/lib/pg_query.rb +1 -0
- metadata +23 -81
- data/ext/pg_query/postgres_deparse.h +0 -9
- /data/ext/pg_query/{pg_query_ruby.sym → ext_symbols.sym} +0 -0
- /data/ext/pg_query/{pg_query_ruby_freebsd.sym → ext_symbols_freebsd.sym} +0 -0
|
@@ -130,8 +130,8 @@ __thread SessionEndType pgStatSessionEndCause = DISCONNECT_NORMAL;
|
|
|
130
130
|
/*
|
|
131
131
|
* Flush out pending stats for the entry
|
|
132
132
|
*
|
|
133
|
-
* If nowait is true
|
|
134
|
-
*
|
|
133
|
+
* If nowait is true and the lock could not be immediately acquired, returns
|
|
134
|
+
* false without flushing the entry. Otherwise returns true.
|
|
135
135
|
*/
|
|
136
136
|
#define PGSTAT_ACCUM_DBCOUNT(item) \
|
|
137
137
|
(sharedent)->stats.item += (pendingent)->item
|
|
@@ -1939,6 +1939,17 @@ write_stderr(const char *fmt,...)
|
|
|
1939
1939
|
}
|
|
1940
1940
|
|
|
1941
1941
|
|
|
1942
|
+
|
|
1943
|
+
|
|
1944
|
+
/*
|
|
1945
|
+
* Write errors to stderr (or by equal means when stderr is
|
|
1946
|
+
* not available) - va_list version
|
|
1947
|
+
*/
|
|
1948
|
+
#ifdef WIN32
|
|
1949
|
+
#endif
|
|
1950
|
+
#ifndef WIN32
|
|
1951
|
+
#else
|
|
1952
|
+
#endif
|
|
1942
1953
|
#ifdef WIN32
|
|
1943
1954
|
__thread volatile int pg_signal_queue;
|
|
1944
1955
|
|
|
@@ -17,6 +17,8 @@
|
|
|
17
17
|
* - pg_mbstrlen_with_len
|
|
18
18
|
* - pg_mblen
|
|
19
19
|
* - SetDatabaseEncoding
|
|
20
|
+
* - pg_mbcharcliplen
|
|
21
|
+
* - pg_mbstrlen
|
|
20
22
|
* - GetMessageEncoding
|
|
21
23
|
* - MessageEncoding
|
|
22
24
|
*--------------------------------------------------------------------
|
|
@@ -386,7 +388,22 @@ pg_mblen(const char *mbstr)
|
|
|
386
388
|
|
|
387
389
|
|
|
388
390
|
/* returns the length (counted in wchars) of a multibyte string */
|
|
391
|
+
int
|
|
392
|
+
pg_mbstrlen(const char *mbstr)
|
|
393
|
+
{
|
|
394
|
+
int len = 0;
|
|
389
395
|
|
|
396
|
+
/* optimization for single byte encoding */
|
|
397
|
+
if (pg_database_encoding_max_length() == 1)
|
|
398
|
+
return strlen(mbstr);
|
|
399
|
+
|
|
400
|
+
while (*mbstr)
|
|
401
|
+
{
|
|
402
|
+
mbstr += pg_mblen(mbstr);
|
|
403
|
+
len++;
|
|
404
|
+
}
|
|
405
|
+
return len;
|
|
406
|
+
}
|
|
390
407
|
|
|
391
408
|
/* returns the length (counted in wchars) of a multibyte string
|
|
392
409
|
* (not necessarily NULL terminated)
|
|
@@ -425,7 +442,7 @@ pg_mbcliplen(const char *mbstr, int len, int limit)
|
|
|
425
442
|
}
|
|
426
443
|
|
|
427
444
|
/*
|
|
428
|
-
* pg_mbcliplen with specified encoding
|
|
445
|
+
* pg_mbcliplen with specified encoding; string must be valid in encoding
|
|
429
446
|
*/
|
|
430
447
|
int
|
|
431
448
|
pg_encoding_mbcliplen(int encoding, const char *mbstr,
|
|
@@ -459,7 +476,29 @@ pg_encoding_mbcliplen(int encoding, const char *mbstr,
|
|
|
459
476
|
* Similar to pg_mbcliplen except the limit parameter specifies the
|
|
460
477
|
* character length, not the byte length.
|
|
461
478
|
*/
|
|
479
|
+
int
|
|
480
|
+
pg_mbcharcliplen(const char *mbstr, int len, int limit)
|
|
481
|
+
{
|
|
482
|
+
int clen = 0;
|
|
483
|
+
int nch = 0;
|
|
484
|
+
int l;
|
|
462
485
|
|
|
486
|
+
/* optimization for single byte encoding */
|
|
487
|
+
if (pg_database_encoding_max_length() == 1)
|
|
488
|
+
return cliplen(mbstr, len, limit);
|
|
489
|
+
|
|
490
|
+
while (len > 0 && *mbstr)
|
|
491
|
+
{
|
|
492
|
+
l = pg_mblen(mbstr);
|
|
493
|
+
nch++;
|
|
494
|
+
if (nch > limit)
|
|
495
|
+
break;
|
|
496
|
+
clen += l;
|
|
497
|
+
len -= l;
|
|
498
|
+
mbstr += l;
|
|
499
|
+
}
|
|
500
|
+
return clen;
|
|
501
|
+
}
|
|
463
502
|
|
|
464
503
|
/* mbcliplen for any single-byte encoding */
|
|
465
504
|
static int
|
|
@@ -726,12 +765,12 @@ pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
|
|
|
726
765
|
* report_invalid_encoding: complain about invalid multibyte character
|
|
727
766
|
*
|
|
728
767
|
* note: len is remaining length of string, not length of character;
|
|
729
|
-
* len must be greater than zero
|
|
768
|
+
* len must be greater than zero (or we'd neglect initializing "buf").
|
|
730
769
|
*/
|
|
731
770
|
void
|
|
732
771
|
report_invalid_encoding(int encoding, const char *mbstr, int len)
|
|
733
772
|
{
|
|
734
|
-
int l =
|
|
773
|
+
int l = pg_encoding_mblen_or_incomplete(encoding, mbstr, len);
|
|
735
774
|
char buf[8 * 5 + 1];
|
|
736
775
|
char *p = buf;
|
|
737
776
|
int j,
|
|
@@ -758,7 +797,7 @@ report_invalid_encoding(int encoding, const char *mbstr, int len)
|
|
|
758
797
|
* report_untranslatable_char: complain about untranslatable character
|
|
759
798
|
*
|
|
760
799
|
* note: len is remaining length of string, not length of character;
|
|
761
|
-
* len must be greater than zero
|
|
800
|
+
* len must be greater than zero (or we'd neglect initializing "buf").
|
|
762
801
|
*/
|
|
763
802
|
|
|
764
803
|
|
|
@@ -54,6 +54,7 @@ AlignedAllocFree(void *pointer)
|
|
|
54
54
|
GetMemoryChunkContext(unaligned)->name, chunk);
|
|
55
55
|
#endif
|
|
56
56
|
|
|
57
|
+
/* Recursively pfree the unaligned chunk */
|
|
57
58
|
pfree(unaligned);
|
|
58
59
|
}
|
|
59
60
|
|
|
@@ -105,18 +106,32 @@ AlignedAllocRealloc(void *pointer, Size size, int flags)
|
|
|
105
106
|
Assert(old_size >= redirchunk->requested_size);
|
|
106
107
|
#endif
|
|
107
108
|
|
|
109
|
+
/*
|
|
110
|
+
* To keep things simple, we always allocate a new aligned chunk and copy
|
|
111
|
+
* data into it. Because of the above inaccuracy, this may end in copying
|
|
112
|
+
* more data than was in the original allocation request size, but that
|
|
113
|
+
* should be OK.
|
|
114
|
+
*/
|
|
108
115
|
ctx = GetMemoryChunkContext(unaligned);
|
|
109
116
|
newptr = MemoryContextAllocAligned(ctx, size, alignto, flags);
|
|
110
117
|
|
|
111
|
-
/*
|
|
112
|
-
|
|
113
|
-
* so we must mark the entire allocation as defined.
|
|
114
|
-
*/
|
|
115
|
-
if (likely(newptr != NULL))
|
|
118
|
+
/* Cope cleanly with OOM */
|
|
119
|
+
if (unlikely(newptr == NULL))
|
|
116
120
|
{
|
|
117
|
-
|
|
118
|
-
|
|
121
|
+
VALGRIND_MAKE_MEM_NOACCESS(redirchunk, sizeof(MemoryChunk));
|
|
122
|
+
return MemoryContextAllocationFailure(ctx, size, flags);
|
|
119
123
|
}
|
|
124
|
+
|
|
125
|
+
/*
|
|
126
|
+
* We may memcpy more than the original allocation request size, which
|
|
127
|
+
* would result in trying to copy trailing bytes that the original
|
|
128
|
+
* MemoryContextAllocAligned call marked NOACCESS. So we must mark the
|
|
129
|
+
* entire old_size as defined. That's slightly annoying, but probably not
|
|
130
|
+
* worth improving.
|
|
131
|
+
*/
|
|
132
|
+
VALGRIND_MAKE_MEM_DEFINED(pointer, old_size);
|
|
133
|
+
memcpy(newptr, pointer, Min(size, old_size));
|
|
134
|
+
|
|
120
135
|
pfree(unaligned);
|
|
121
136
|
|
|
122
137
|
return newptr;
|
|
@@ -1632,9 +1632,9 @@ AllocSetCheck(MemoryContext context)
|
|
|
1632
1632
|
prevblock = block, block = block->next)
|
|
1633
1633
|
{
|
|
1634
1634
|
char *bpoz = ((char *) block) + ALLOC_BLOCKHDRSZ;
|
|
1635
|
-
|
|
1636
|
-
|
|
1637
|
-
|
|
1635
|
+
Size blk_used = block->freeptr - bpoz;
|
|
1636
|
+
Size blk_data = 0;
|
|
1637
|
+
Size nchunks = 0;
|
|
1638
1638
|
bool has_external_chunk = false;
|
|
1639
1639
|
|
|
1640
1640
|
if (IsKeeperBlock(set, block))
|
|
@@ -236,7 +236,7 @@ BumpAllocLarge(MemoryContext context, Size size, int flags)
|
|
|
236
236
|
|
|
237
237
|
block = (BumpBlock *) malloc(blksize);
|
|
238
238
|
if (block == NULL)
|
|
239
|
-
return
|
|
239
|
+
return MemoryContextAllocationFailure(context, size, flags);
|
|
240
240
|
|
|
241
241
|
context->mem_allocated += blksize;
|
|
242
242
|
|
|
@@ -9,6 +9,8 @@
|
|
|
9
9
|
* - enlargeStringInfo
|
|
10
10
|
* - appendStringInfo
|
|
11
11
|
* - appendStringInfoSpaces
|
|
12
|
+
* - makeStringInfo
|
|
13
|
+
* - destroyStringInfo
|
|
12
14
|
*--------------------------------------------------------------------
|
|
13
15
|
*/
|
|
14
16
|
|
|
@@ -51,7 +53,17 @@
|
|
|
51
53
|
*
|
|
52
54
|
* Create an empty 'StringInfoData' & return a pointer to it.
|
|
53
55
|
*/
|
|
56
|
+
StringInfo
|
|
57
|
+
makeStringInfo(void)
|
|
58
|
+
{
|
|
59
|
+
StringInfo res;
|
|
60
|
+
|
|
61
|
+
res = (StringInfo) palloc(sizeof(StringInfoData));
|
|
54
62
|
|
|
63
|
+
initStringInfo(res);
|
|
64
|
+
|
|
65
|
+
return res;
|
|
66
|
+
}
|
|
55
67
|
|
|
56
68
|
/*
|
|
57
69
|
* initStringInfo
|
|
@@ -350,4 +362,12 @@ enlargeStringInfo(StringInfo str, int needed)
|
|
|
350
362
|
* Frees a StringInfo and its buffer (opposite of makeStringInfo()).
|
|
351
363
|
* This must only be called on palloc'd StringInfos.
|
|
352
364
|
*/
|
|
365
|
+
void
|
|
366
|
+
destroyStringInfo(StringInfo str)
|
|
367
|
+
{
|
|
368
|
+
/* don't allow destroys of read-only StringInfos */
|
|
369
|
+
Assert(str->maxlen != 0);
|
|
353
370
|
|
|
371
|
+
pfree(str->data);
|
|
372
|
+
pfree(str);
|
|
373
|
+
}
|
|
@@ -76,6 +76,7 @@
|
|
|
76
76
|
* - pg_johab_dsplen
|
|
77
77
|
* - pg_johab_verifychar
|
|
78
78
|
* - pg_johab_verifystr
|
|
79
|
+
* - pg_encoding_mblen_or_incomplete
|
|
79
80
|
* - pg_encoding_mblen
|
|
80
81
|
*--------------------------------------------------------------------
|
|
81
82
|
*/
|
|
@@ -94,10 +95,31 @@
|
|
|
94
95
|
*/
|
|
95
96
|
#include "c.h"
|
|
96
97
|
|
|
98
|
+
#include <limits.h>
|
|
99
|
+
|
|
97
100
|
#include "mb/pg_wchar.h"
|
|
98
101
|
#include "utils/ascii.h"
|
|
99
102
|
|
|
100
103
|
|
|
104
|
+
/*
|
|
105
|
+
* In today's multibyte encodings other than UTF8, this two-byte sequence
|
|
106
|
+
* ensures pg_encoding_mblen() == 2 && pg_encoding_verifymbstr() == 0.
|
|
107
|
+
*
|
|
108
|
+
* For historical reasons, several verifychar implementations opt to reject
|
|
109
|
+
* this pair specifically. Byte pair range constraints, in encoding
|
|
110
|
+
* originator documentation, always excluded this pair. No core conversion
|
|
111
|
+
* could translate it. However, longstanding verifychar implementations
|
|
112
|
+
* accepted any non-NUL byte. big5_to_euc_tw and big5_to_mic even translate
|
|
113
|
+
* pairs not valid per encoding originator documentation. To avoid tightening
|
|
114
|
+
* core or non-core conversions in a security patch, we sought this one pair.
|
|
115
|
+
*
|
|
116
|
+
* PQescapeString() historically used spaces for BYTE1; many other values
|
|
117
|
+
* could suffice for BYTE1.
|
|
118
|
+
*/
|
|
119
|
+
#define NONUTF8_INVALID_BYTE0 (0x8d)
|
|
120
|
+
#define NONUTF8_INVALID_BYTE1 (' ')
|
|
121
|
+
|
|
122
|
+
|
|
101
123
|
/*
|
|
102
124
|
* Operations on multi-byte encodings are driven by a table of helper
|
|
103
125
|
* functions.
|
|
@@ -1547,6 +1569,11 @@ pg_big5_verifychar(const unsigned char *s, int len)
|
|
|
1547
1569
|
if (len < l)
|
|
1548
1570
|
return -1;
|
|
1549
1571
|
|
|
1572
|
+
if (l == 2 &&
|
|
1573
|
+
s[0] == NONUTF8_INVALID_BYTE0 &&
|
|
1574
|
+
s[1] == NONUTF8_INVALID_BYTE1)
|
|
1575
|
+
return -1;
|
|
1576
|
+
|
|
1550
1577
|
while (--l > 0)
|
|
1551
1578
|
{
|
|
1552
1579
|
if (*++s == '\0')
|
|
@@ -1596,6 +1623,11 @@ pg_gbk_verifychar(const unsigned char *s, int len)
|
|
|
1596
1623
|
if (len < l)
|
|
1597
1624
|
return -1;
|
|
1598
1625
|
|
|
1626
|
+
if (l == 2 &&
|
|
1627
|
+
s[0] == NONUTF8_INVALID_BYTE0 &&
|
|
1628
|
+
s[1] == NONUTF8_INVALID_BYTE1)
|
|
1629
|
+
return -1;
|
|
1630
|
+
|
|
1599
1631
|
while (--l > 0)
|
|
1600
1632
|
{
|
|
1601
1633
|
if (*++s == '\0')
|
|
@@ -1645,6 +1677,11 @@ pg_uhc_verifychar(const unsigned char *s, int len)
|
|
|
1645
1677
|
if (len < l)
|
|
1646
1678
|
return -1;
|
|
1647
1679
|
|
|
1680
|
+
if (l == 2 &&
|
|
1681
|
+
s[0] == NONUTF8_INVALID_BYTE0 &&
|
|
1682
|
+
s[1] == NONUTF8_INVALID_BYTE1)
|
|
1683
|
+
return -1;
|
|
1684
|
+
|
|
1648
1685
|
while (--l > 0)
|
|
1649
1686
|
{
|
|
1650
1687
|
if (*++s == '\0')
|
|
@@ -2089,6 +2126,12 @@ pg_utf8_islegal(const unsigned char *source, int length)
|
|
|
2089
2126
|
}
|
|
2090
2127
|
|
|
2091
2128
|
|
|
2129
|
+
/*
|
|
2130
|
+
* Fills the provided buffer with two bytes such that:
|
|
2131
|
+
* pg_encoding_mblen(dst) == 2 && pg_encoding_verifymbstr(dst) == 0
|
|
2132
|
+
*/
|
|
2133
|
+
|
|
2134
|
+
|
|
2092
2135
|
/*
|
|
2093
2136
|
*-------------------------------------------------------------------
|
|
2094
2137
|
* encoding info table
|
|
@@ -2142,10 +2185,27 @@ const pg_wchar_tbl pg_wchar_table[] = {
|
|
|
2142
2185
|
/*
|
|
2143
2186
|
* Returns the byte length of a multibyte character.
|
|
2144
2187
|
*
|
|
2145
|
-
*
|
|
2146
|
-
*
|
|
2147
|
-
*
|
|
2148
|
-
*
|
|
2188
|
+
* Choose "mblen" functions based on the input string characteristics.
|
|
2189
|
+
* pg_encoding_mblen() can be used when ANY of these conditions are met:
|
|
2190
|
+
*
|
|
2191
|
+
* - The input string is zero-terminated
|
|
2192
|
+
*
|
|
2193
|
+
* - The input string is known to be valid in the encoding (e.g., string
|
|
2194
|
+
* converted from database encoding)
|
|
2195
|
+
*
|
|
2196
|
+
* - The encoding is not GB18030 (e.g., when only database encodings are
|
|
2197
|
+
* passed to 'encoding' parameter)
|
|
2198
|
+
*
|
|
2199
|
+
* encoding==GB18030 requires examining up to two bytes to determine character
|
|
2200
|
+
* length. Therefore, callers satisfying none of those conditions must use
|
|
2201
|
+
* pg_encoding_mblen_or_incomplete() instead, as access to mbstr[1] cannot be
|
|
2202
|
+
* guaranteed to be within allocation bounds.
|
|
2203
|
+
*
|
|
2204
|
+
* When dealing with text that is not certainly valid in the specified
|
|
2205
|
+
* encoding, the result may exceed the actual remaining string length.
|
|
2206
|
+
* Callers that are not prepared to deal with that should use Min(remaining,
|
|
2207
|
+
* pg_encoding_mblen_or_incomplete()). For zero-terminated strings, that and
|
|
2208
|
+
* pg_encoding_mblen_bounded() are interchangeable.
|
|
2149
2209
|
*/
|
|
2150
2210
|
int
|
|
2151
2211
|
pg_encoding_mblen(int encoding, const char *mbstr)
|
|
@@ -2156,8 +2216,28 @@ pg_encoding_mblen(int encoding, const char *mbstr)
|
|
|
2156
2216
|
}
|
|
2157
2217
|
|
|
2158
2218
|
/*
|
|
2159
|
-
* Returns the byte length of a multibyte character
|
|
2160
|
-
*
|
|
2219
|
+
* Returns the byte length of a multibyte character (possibly not
|
|
2220
|
+
* zero-terminated), or INT_MAX if too few bytes remain to determine a length.
|
|
2221
|
+
*/
|
|
2222
|
+
int
|
|
2223
|
+
pg_encoding_mblen_or_incomplete(int encoding, const char *mbstr,
|
|
2224
|
+
size_t remaining)
|
|
2225
|
+
{
|
|
2226
|
+
/*
|
|
2227
|
+
* Define zero remaining as too few, even for single-byte encodings.
|
|
2228
|
+
* pg_gb18030_mblen() reads one or two bytes; single-byte encodings read
|
|
2229
|
+
* zero; others read one.
|
|
2230
|
+
*/
|
|
2231
|
+
if (remaining < 1 ||
|
|
2232
|
+
(encoding == PG_GB18030 && IS_HIGHBIT_SET(*mbstr) && remaining < 2))
|
|
2233
|
+
return INT_MAX;
|
|
2234
|
+
return pg_encoding_mblen(encoding, mbstr);
|
|
2235
|
+
}
|
|
2236
|
+
|
|
2237
|
+
/*
|
|
2238
|
+
* Returns the byte length of a multibyte character; but not more than the
|
|
2239
|
+
* distance to the terminating zero byte. For input that might lack a
|
|
2240
|
+
* terminating zero, use Min(remaining, pg_encoding_mblen_or_incomplete()).
|
|
2161
2241
|
*/
|
|
2162
2242
|
|
|
2163
2243
|
|
|
@@ -2188,5 +2268,11 @@ pg_encoding_max_length(int encoding)
|
|
|
2188
2268
|
{
|
|
2189
2269
|
Assert(PG_VALID_ENCODING(encoding));
|
|
2190
2270
|
|
|
2191
|
-
|
|
2271
|
+
/*
|
|
2272
|
+
* Check for the encoding despite the assert, due to some mingw versions
|
|
2273
|
+
* otherwise issuing bogus warnings.
|
|
2274
|
+
*/
|
|
2275
|
+
return PG_VALID_ENCODING(encoding) ?
|
|
2276
|
+
pg_wchar_table[encoding].maxmblen :
|
|
2277
|
+
pg_wchar_table[PG_SQL_ASCII].maxmblen;
|
|
2192
2278
|
}
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* - pg_vsnprintf
|
|
4
4
|
* - dopr
|
|
5
5
|
* - pg_snprintf
|
|
6
|
-
* -
|
|
6
|
+
* - pg_strchrnul
|
|
7
7
|
* - dostr
|
|
8
8
|
* - flushbuffer
|
|
9
9
|
* - find_arguments
|
|
@@ -362,13 +362,22 @@ static void leading_pad(int zpad, int signvalue, int *padlen,
|
|
|
362
362
|
static void trailing_pad(int padlen, PrintfTarget *target);
|
|
363
363
|
|
|
364
364
|
/*
|
|
365
|
-
* If strchrnul exists (it's a glibc-ism
|
|
366
|
-
*
|
|
365
|
+
* If strchrnul exists (it's a glibc-ism, but since adopted by some other
|
|
366
|
+
* platforms), it's a good bit faster than the equivalent manual loop.
|
|
367
|
+
* Use it if possible, and if it doesn't exist, use this replacement.
|
|
367
368
|
*
|
|
368
369
|
* Note: glibc declares this as returning "char *", but that would require
|
|
369
370
|
* casting away const internally, so we don't follow that detail.
|
|
371
|
+
*
|
|
372
|
+
* Note: macOS has this too as of Sequoia 15.4, but it's hidden behind
|
|
373
|
+
* a deployment-target check that causes compile errors if the deployment
|
|
374
|
+
* target isn't high enough. So !HAVE_DECL_STRCHRNUL may mean "yes it's
|
|
375
|
+
* declared, but it doesn't compile". To avoid failing in that scenario,
|
|
376
|
+
* use a macro to avoid matching <string.h>'s name.
|
|
370
377
|
*/
|
|
371
|
-
#
|
|
378
|
+
#if !HAVE_DECL_STRCHRNUL
|
|
379
|
+
|
|
380
|
+
#define strchrnul pg_strchrnul
|
|
372
381
|
|
|
373
382
|
static inline const char *
|
|
374
383
|
strchrnul(const char *s, int c)
|
|
@@ -378,19 +387,7 @@ strchrnul(const char *s, int c)
|
|
|
378
387
|
return s;
|
|
379
388
|
}
|
|
380
389
|
|
|
381
|
-
#
|
|
382
|
-
|
|
383
|
-
/*
|
|
384
|
-
* glibc's <string.h> declares strchrnul only if _GNU_SOURCE is defined.
|
|
385
|
-
* While we typically use that on glibc platforms, configure will set
|
|
386
|
-
* HAVE_STRCHRNUL whether it's used or not. Fill in the missing declaration
|
|
387
|
-
* so that this file will compile cleanly with or without _GNU_SOURCE.
|
|
388
|
-
*/
|
|
389
|
-
#ifndef _GNU_SOURCE
|
|
390
|
-
extern char *strchrnul(const char *s, int c);
|
|
391
|
-
#endif
|
|
392
|
-
|
|
393
|
-
#endif /* HAVE_STRCHRNUL */
|
|
390
|
+
#endif /* !HAVE_DECL_STRCHRNUL */
|
|
394
391
|
|
|
395
392
|
|
|
396
393
|
/*
|
data/lib/pg_query/deparse.rb
CHANGED
|
@@ -1,18 +1,39 @@
|
|
|
1
1
|
module PgQuery
|
|
2
2
|
class ParserResult
|
|
3
|
-
def deparse
|
|
4
|
-
PgQuery.deparse(@tree)
|
|
3
|
+
def deparse(opts: nil)
|
|
4
|
+
PgQuery.deparse(@tree, opts: opts)
|
|
5
5
|
end
|
|
6
6
|
end
|
|
7
7
|
|
|
8
|
+
class DeparseComment
|
|
9
|
+
attr_accessor :match_location, :newlines_before_comment, :newlines_after_comment, :str
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
DeparseOpts = Struct.new(:pretty_print, :comments, :indent_size, :max_line_length,
|
|
13
|
+
:trailing_newline, :commas_start_of_line, keyword_init: true)
|
|
14
|
+
|
|
8
15
|
# Reconstruct all of the parsed queries into their original form
|
|
9
|
-
def self.deparse(tree)
|
|
10
|
-
if PgQuery::ParseResult.method(:encode).arity == 1
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
16
|
+
def self.deparse(tree, opts: nil)
|
|
17
|
+
protobuf_encoded = if PgQuery::ParseResult.method(:encode).arity == 1
|
|
18
|
+
PgQuery::ParseResult.encode(tree)
|
|
19
|
+
elsif PgQuery::ParseResult.method(:encode).arity == -1
|
|
20
|
+
PgQuery::ParseResult.encode(tree, recursion_limit: 1_000)
|
|
21
|
+
else
|
|
22
|
+
raise ArgumentError, 'Unsupported protobuf Ruby API'
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
if opts
|
|
26
|
+
PgQuery.deparse_protobuf_opts(
|
|
27
|
+
protobuf_encoded,
|
|
28
|
+
opts.pretty_print,
|
|
29
|
+
opts.comments || [],
|
|
30
|
+
opts.indent_size || 0,
|
|
31
|
+
opts.max_line_length || 0,
|
|
32
|
+
opts.trailing_newline,
|
|
33
|
+
opts.commas_start_of_line
|
|
34
|
+
).force_encoding('UTF-8')
|
|
14
35
|
else
|
|
15
|
-
|
|
36
|
+
PgQuery.deparse_protobuf(protobuf_encoded).force_encoding('UTF-8')
|
|
16
37
|
end
|
|
17
38
|
end
|
|
18
39
|
|
data/lib/pg_query/fingerprint.rb
CHANGED
|
@@ -61,7 +61,7 @@ module PgQuery
|
|
|
61
61
|
|
|
62
62
|
def ignored_node_type?(node)
|
|
63
63
|
[A_Const, Alias, ParamRef, SetToDefault, IntList, OidList].include?(node.class) ||
|
|
64
|
-
node.is_a?(TypeCast) &&
|
|
64
|
+
(node.is_a?(TypeCast) && %i[a_const param_ref].include?(node.arg.node))
|
|
65
65
|
end
|
|
66
66
|
|
|
67
67
|
def node_protobuf_field_name_to_json_name(node_class, field)
|
|
@@ -112,12 +112,10 @@ module PgQuery
|
|
|
112
112
|
fingerprint_value(val.gsub(/\d{2,}/, ''), hash, postgres_node_name, postgres_field_name, true)
|
|
113
113
|
next
|
|
114
114
|
end
|
|
115
|
-
when 'stmt_len'
|
|
116
|
-
next if node.is_a?(RawStmt)
|
|
117
|
-
when 'stmt_location'
|
|
115
|
+
when 'stmt_len', 'stmt_location'
|
|
118
116
|
next if node.is_a?(RawStmt)
|
|
119
117
|
when 'kind'
|
|
120
|
-
if node.is_a?(A_Expr) &&
|
|
118
|
+
if node.is_a?(A_Expr) && %i[AEXPR_OP_ANY AEXPR_IN].include?(val)
|
|
121
119
|
fingerprint_value(:AEXPR_OP, hash, postgres_node_name, postgres_field_name, true)
|
|
122
120
|
next
|
|
123
121
|
end
|
data/lib/pg_query/param_refs.rb
CHANGED
|
@@ -7,7 +7,7 @@ module PgQuery
|
|
|
7
7
|
case node
|
|
8
8
|
when PgQuery::ParamRef
|
|
9
9
|
# Ignore param refs inside type casts, as these are already handled
|
|
10
|
-
next if location[-3
|
|
10
|
+
next if location[-3..] == %i[type_cast arg param_ref]
|
|
11
11
|
|
|
12
12
|
results << { 'location' => node.location,
|
|
13
13
|
'length' => param_ref_length(node) }
|
data/lib/pg_query/parse.rb
CHANGED
|
@@ -24,9 +24,7 @@ module PgQuery
|
|
|
24
24
|
end
|
|
25
25
|
|
|
26
26
|
class ParserResult
|
|
27
|
-
attr_reader :query
|
|
28
|
-
attr_reader :tree
|
|
29
|
-
attr_reader :warnings
|
|
27
|
+
attr_reader :query, :tree, :warnings
|
|
30
28
|
|
|
31
29
|
def initialize(query, tree, warnings = [])
|
|
32
30
|
@query = query
|
|
@@ -108,6 +106,7 @@ module PgQuery
|
|
|
108
106
|
statements = @tree.stmts.dup.to_a.map(&:stmt)
|
|
109
107
|
from_clause_items = [] # types: select, dml, ddl
|
|
110
108
|
subselect_items = []
|
|
109
|
+
call_items = [] # CALL fn()
|
|
111
110
|
|
|
112
111
|
loop do
|
|
113
112
|
statement = statements.shift
|
|
@@ -173,10 +172,12 @@ module PgQuery
|
|
|
173
172
|
when :copy_stmt
|
|
174
173
|
from_clause_items << { item: PgQuery::Node.new(range_var: statement.copy_stmt.relation), type: :dml } if statement.copy_stmt.relation
|
|
175
174
|
statements << statement.copy_stmt.query
|
|
175
|
+
when :call_stmt
|
|
176
|
+
call_items << statement.call_stmt
|
|
176
177
|
# The following statement types are DDL (changing table structure)
|
|
177
178
|
when :alter_table_stmt
|
|
178
179
|
case statement.alter_table_stmt.objtype
|
|
179
|
-
when :OBJECT_INDEX # Index
|
|
180
|
+
when :OBJECT_INDEX # Index
|
|
180
181
|
# ignore `ALTER INDEX index_name`
|
|
181
182
|
else
|
|
182
183
|
from_clause_items << { item: PgQuery::Node.new(range_var: statement.alter_table_stmt.relation), type: :ddl }
|
|
@@ -230,11 +231,11 @@ module PgQuery
|
|
|
230
231
|
when :grant_stmt
|
|
231
232
|
objects = statement.grant_stmt.objects
|
|
232
233
|
case statement.grant_stmt.objtype
|
|
233
|
-
when :OBJECT_COLUMN # Column
|
|
234
|
+
when :OBJECT_COLUMN # Column
|
|
234
235
|
# FIXME
|
|
235
236
|
when :OBJECT_TABLE # Table
|
|
236
237
|
from_clause_items += objects.map { |o| { item: o, type: :ddl } }
|
|
237
|
-
when :OBJECT_SEQUENCE # Sequence
|
|
238
|
+
when :OBJECT_SEQUENCE # Sequence
|
|
238
239
|
# FIXME
|
|
239
240
|
end
|
|
240
241
|
when :lock_stmt
|
|
@@ -278,15 +279,20 @@ module PgQuery
|
|
|
278
279
|
end
|
|
279
280
|
when :bool_expr
|
|
280
281
|
subselect_items.concat(next_item.bool_expr.args.to_ary)
|
|
282
|
+
when :boolean_test
|
|
283
|
+
subselect_items << next_item.boolean_test.arg
|
|
281
284
|
when :coalesce_expr
|
|
282
285
|
subselect_items.concat(next_item.coalesce_expr.args.to_ary)
|
|
283
286
|
when :min_max_expr
|
|
284
287
|
subselect_items.concat(next_item.min_max_expr.args.to_ary)
|
|
288
|
+
when :null_test
|
|
289
|
+
subselect_items << next_item.null_test.arg
|
|
285
290
|
when :res_target
|
|
286
291
|
subselect_items << next_item.res_target.val
|
|
287
292
|
when :sub_link
|
|
288
293
|
statements << next_item.sub_link.subselect
|
|
289
294
|
when :func_call
|
|
295
|
+
# See also CALL below
|
|
290
296
|
subselect_items.concat(next_item.func_call.args.to_ary)
|
|
291
297
|
@functions << {
|
|
292
298
|
function: next_item.func_call.funcname.map { |f| f.string.sval }.join('.'),
|
|
@@ -301,6 +307,17 @@ module PgQuery
|
|
|
301
307
|
end
|
|
302
308
|
end
|
|
303
309
|
|
|
310
|
+
# CALL fn()
|
|
311
|
+
next_item = call_items.shift
|
|
312
|
+
if next_item
|
|
313
|
+
# Treat as a sub-select. Note the difference in underscore in func_call versus the above.
|
|
314
|
+
subselect_items.concat(next_item.funccall.args.to_ary)
|
|
315
|
+
@functions << {
|
|
316
|
+
function: next_item.funccall.funcname.map { |f| f.string.sval }.join('.'),
|
|
317
|
+
type: :call
|
|
318
|
+
}
|
|
319
|
+
end
|
|
320
|
+
|
|
304
321
|
next_item = from_clause_items.shift
|
|
305
322
|
if next_item && next_item[:item]
|
|
306
323
|
case next_item[:item].node
|
|
@@ -339,7 +356,7 @@ module PgQuery
|
|
|
339
356
|
@cte_names.uniq!
|
|
340
357
|
end
|
|
341
358
|
|
|
342
|
-
def statements_and_cte_names_for_with_clause(with_clause)
|
|
359
|
+
def statements_and_cte_names_for_with_clause(with_clause)
|
|
343
360
|
statements = []
|
|
344
361
|
cte_names = []
|
|
345
362
|
|