pg_query 2.2.0 → 4.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/README.md +59 -31
- data/Rakefile +2 -2
- data/ext/pg_query/include/access/amapi.h +45 -1
- data/ext/pg_query/include/access/attmap.h +1 -1
- data/ext/pg_query/include/access/attnum.h +2 -2
- data/ext/pg_query/include/access/clog.h +4 -2
- data/ext/pg_query/include/access/commit_ts.h +6 -9
- data/ext/pg_query/include/access/detoast.h +1 -11
- data/ext/pg_query/include/access/genam.h +15 -12
- data/ext/pg_query/include/access/gin.h +2 -2
- data/ext/pg_query/include/access/htup.h +1 -1
- data/ext/pg_query/include/access/htup_details.h +75 -87
- data/ext/pg_query/include/access/itup.h +7 -1
- data/ext/pg_query/include/access/parallel.h +2 -2
- data/ext/pg_query/include/access/printtup.h +1 -1
- data/ext/pg_query/include/access/relation.h +1 -1
- data/ext/pg_query/include/access/relscan.h +17 -2
- data/ext/pg_query/include/access/rmgr.h +30 -3
- data/ext/pg_query/include/access/rmgrlist.h +23 -23
- data/ext/pg_query/include/access/sdir.h +1 -1
- data/ext/pg_query/include/access/skey.h +1 -1
- data/ext/pg_query/include/access/stratnum.h +4 -2
- data/ext/pg_query/include/access/sysattr.h +1 -1
- data/ext/pg_query/include/access/table.h +2 -1
- data/ext/pg_query/include/access/tableam.h +272 -20
- data/ext/pg_query/include/access/toast_compression.h +73 -0
- data/ext/pg_query/include/access/transam.h +123 -13
- data/ext/pg_query/include/access/tupconvert.h +1 -1
- data/ext/pg_query/include/access/tupdesc.h +1 -1
- data/ext/pg_query/include/access/tupmacs.h +3 -3
- data/ext/pg_query/include/access/twophase.h +3 -1
- data/ext/pg_query/include/access/xact.h +73 -19
- data/ext/pg_query/include/access/xlog.h +60 -155
- data/ext/pg_query/include/access/xlog_internal.h +40 -13
- data/ext/pg_query/include/access/xlogdefs.h +8 -16
- data/ext/pg_query/include/access/xlogprefetcher.h +55 -0
- data/ext/pg_query/include/access/xlogreader.h +145 -39
- data/ext/pg_query/include/access/xlogrecord.h +18 -9
- data/ext/pg_query/include/access/xlogrecovery.h +157 -0
- data/ext/pg_query/include/c.h +101 -44
- data/ext/pg_query/include/catalog/catalog.h +3 -1
- data/ext/pg_query/include/catalog/catversion.h +2 -2
- data/ext/pg_query/include/catalog/dependency.h +8 -16
- data/ext/pg_query/include/catalog/genbki.h +83 -5
- data/ext/pg_query/include/catalog/index.h +18 -3
- data/ext/pg_query/include/catalog/indexing.h +12 -324
- data/ext/pg_query/include/catalog/namespace.h +4 -2
- data/ext/pg_query/include/catalog/objectaccess.h +70 -2
- data/ext/pg_query/include/catalog/objectaddress.h +11 -6
- data/ext/pg_query/include/catalog/pg_aggregate.h +14 -10
- data/ext/pg_query/include/catalog/pg_aggregate_d.h +2 -1
- data/ext/pg_query/include/catalog/pg_am.h +4 -1
- data/ext/pg_query/include/catalog/pg_am_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_attribute.h +27 -10
- data/ext/pg_query/include/catalog/pg_attribute_d.h +21 -18
- data/ext/pg_query/include/catalog/pg_authid.h +7 -2
- data/ext/pg_query/include/catalog/pg_authid_d.h +17 -9
- data/ext/pg_query/include/catalog/pg_class.h +44 -14
- data/ext/pg_query/include/catalog/pg_class_d.h +30 -1
- data/ext/pg_query/include/catalog/pg_collation.h +33 -8
- data/ext/pg_query/include/catalog/pg_collation_d.h +20 -3
- data/ext/pg_query/include/catalog/pg_constraint.h +38 -12
- data/ext/pg_query/include/catalog/pg_constraint_d.h +10 -4
- data/ext/pg_query/include/catalog/pg_control.h +3 -5
- data/ext/pg_query/include/catalog/pg_conversion.h +7 -4
- data/ext/pg_query/include/catalog/pg_conversion_d.h +4 -1
- data/ext/pg_query/include/catalog/pg_depend.h +11 -7
- data/ext/pg_query/include/catalog/pg_depend_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_event_trigger.h +9 -3
- data/ext/pg_query/include/catalog/pg_event_trigger_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_index.h +17 -7
- data/ext/pg_query/include/catalog/pg_index_d.h +20 -17
- data/ext/pg_query/include/catalog/pg_language.h +10 -5
- data/ext/pg_query/include/catalog/pg_language_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_namespace.h +7 -2
- data/ext/pg_query/include/catalog/pg_namespace_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_opclass.h +8 -5
- data/ext/pg_query/include/catalog/pg_opclass_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_operator.h +18 -15
- data/ext/pg_query/include/catalog/pg_operator_d.h +37 -1
- data/ext/pg_query/include/catalog/pg_opfamily.h +6 -3
- data/ext/pg_query/include/catalog/pg_opfamily_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_parameter_acl.h +60 -0
- data/ext/pg_query/include/catalog/pg_parameter_acl_d.h +34 -0
- data/ext/pg_query/include/catalog/pg_partitioned_table.h +20 -9
- data/ext/pg_query/include/catalog/pg_partitioned_table_d.h +2 -1
- data/ext/pg_query/include/catalog/pg_proc.h +20 -11
- data/ext/pg_query/include/catalog/pg_proc_d.h +10 -8
- data/ext/pg_query/include/catalog/pg_publication.h +50 -7
- data/ext/pg_query/include/catalog/pg_publication_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_replication_origin.h +6 -1
- data/ext/pg_query/include/catalog/pg_replication_origin_d.h +5 -1
- data/ext/pg_query/include/catalog/pg_statistic.h +19 -12
- data/ext/pg_query/include/catalog/pg_statistic_d.h +2 -1
- data/ext/pg_query/include/catalog/pg_statistic_ext.h +19 -5
- data/ext/pg_query/include/catalog/pg_statistic_ext_d.h +7 -2
- data/ext/pg_query/include/catalog/pg_transform.h +8 -5
- data/ext/pg_query/include/catalog/pg_transform_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_trigger.h +24 -8
- data/ext/pg_query/include/catalog/pg_trigger_d.h +4 -1
- data/ext/pg_query/include/catalog/pg_ts_config.h +6 -3
- data/ext/pg_query/include/catalog/pg_ts_config_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_ts_dict.h +8 -3
- data/ext/pg_query/include/catalog/pg_ts_dict_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_ts_parser.h +6 -3
- data/ext/pg_query/include/catalog/pg_ts_parser_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_ts_template.h +6 -3
- data/ext/pg_query/include/catalog/pg_ts_template_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_type.h +55 -24
- data/ext/pg_query/include/catalog/pg_type_d.h +70 -31
- data/ext/pg_query/include/catalog/storage.h +5 -3
- data/ext/pg_query/include/commands/async.h +3 -4
- data/ext/pg_query/include/commands/dbcommands.h +2 -1
- data/ext/pg_query/include/commands/defrem.h +11 -24
- data/ext/pg_query/include/commands/event_trigger.h +2 -2
- data/ext/pg_query/include/commands/explain.h +1 -1
- data/ext/pg_query/include/commands/prepare.h +1 -1
- data/ext/pg_query/include/commands/tablespace.h +2 -2
- data/ext/pg_query/include/commands/trigger.h +18 -16
- data/ext/pg_query/include/commands/user.h +2 -2
- data/ext/pg_query/include/commands/vacuum.h +88 -41
- data/ext/pg_query/include/commands/variable.h +1 -1
- data/ext/pg_query/include/common/file_perm.h +4 -4
- data/ext/pg_query/include/common/hashfn.h +1 -1
- data/ext/pg_query/include/common/ip.h +1 -7
- data/ext/pg_query/include/common/keywords.h +2 -6
- data/ext/pg_query/include/common/kwlookup.h +1 -1
- data/ext/pg_query/include/common/pg_prng.h +60 -0
- data/ext/pg_query/include/common/relpath.h +2 -2
- data/ext/pg_query/include/common/string.h +24 -1
- data/ext/pg_query/include/common/unicode_combining_table.h +114 -2
- data/ext/pg_query/include/common/unicode_east_asian_fw_table.h +125 -0
- data/ext/pg_query/include/datatype/timestamp.h +40 -1
- data/ext/pg_query/include/executor/execdesc.h +1 -1
- data/ext/pg_query/include/executor/executor.h +65 -22
- data/ext/pg_query/include/executor/functions.h +17 -3
- data/ext/pg_query/include/executor/instrument.h +33 -16
- data/ext/pg_query/include/executor/spi.h +41 -3
- data/ext/pg_query/include/executor/tablefunc.h +1 -1
- data/ext/pg_query/include/executor/tuptable.h +1 -1
- data/ext/pg_query/include/fmgr.h +13 -7
- data/ext/pg_query/include/funcapi.h +16 -4
- data/ext/pg_query/include/getaddrinfo.h +1 -1
- data/ext/pg_query/include/jit/jit.h +11 -11
- data/ext/pg_query/include/kwlist_d.h +517 -494
- data/ext/pg_query/include/lib/dshash.h +112 -0
- data/ext/pg_query/include/lib/ilist.h +20 -1
- data/ext/pg_query/include/lib/pairingheap.h +1 -1
- data/ext/pg_query/include/lib/simplehash.h +140 -15
- data/ext/pg_query/include/lib/sort_template.h +432 -0
- data/ext/pg_query/include/lib/stringinfo.h +1 -1
- data/ext/pg_query/include/libpq/auth.h +6 -4
- data/ext/pg_query/include/libpq/crypt.h +5 -4
- data/ext/pg_query/include/libpq/hba.h +43 -4
- data/ext/pg_query/include/libpq/libpq-be.h +23 -6
- data/ext/pg_query/include/libpq/libpq.h +30 -20
- data/ext/pg_query/include/libpq/pqcomm.h +17 -31
- data/ext/pg_query/include/libpq/pqformat.h +1 -1
- data/ext/pg_query/include/libpq/pqsignal.h +4 -4
- data/ext/pg_query/include/mb/pg_wchar.h +105 -23
- data/ext/pg_query/include/mb/stringinfo_mb.h +1 -1
- data/ext/pg_query/include/miscadmin.h +47 -41
- data/ext/pg_query/include/nodes/bitmapset.h +1 -1
- data/ext/pg_query/include/nodes/execnodes.h +270 -78
- data/ext/pg_query/include/nodes/extensible.h +4 -2
- data/ext/pg_query/include/nodes/lockoptions.h +1 -1
- data/ext/pg_query/include/nodes/makefuncs.h +7 -6
- data/ext/pg_query/include/nodes/memnodes.h +5 -3
- data/ext/pg_query/include/nodes/nodeFuncs.h +1 -1
- data/ext/pg_query/include/nodes/nodes.h +30 -11
- data/ext/pg_query/include/nodes/params.h +1 -1
- data/ext/pg_query/include/nodes/parsenodes.h +322 -90
- data/ext/pg_query/include/nodes/pathnodes.h +243 -66
- data/ext/pg_query/include/nodes/pg_list.h +75 -69
- data/ext/pg_query/include/nodes/plannodes.h +111 -28
- data/ext/pg_query/include/nodes/primnodes.h +99 -47
- data/ext/pg_query/include/nodes/print.h +1 -1
- data/ext/pg_query/include/nodes/tidbitmap.h +1 -1
- data/ext/pg_query/include/nodes/value.h +58 -39
- data/ext/pg_query/include/optimizer/cost.h +9 -2
- data/ext/pg_query/include/optimizer/geqo.h +9 -7
- data/ext/pg_query/include/optimizer/geqo_gene.h +1 -1
- data/ext/pg_query/include/optimizer/optimizer.h +25 -17
- data/ext/pg_query/include/optimizer/paths.h +6 -6
- data/ext/pg_query/include/optimizer/planmain.h +15 -14
- data/ext/pg_query/include/parser/analyze.h +19 -5
- data/ext/pg_query/include/parser/gram.h +947 -913
- data/ext/pg_query/include/parser/gramparse.h +1 -1
- data/ext/pg_query/include/parser/kwlist.h +463 -453
- data/ext/pg_query/include/parser/parse_agg.h +2 -7
- data/ext/pg_query/include/parser/parse_coerce.h +3 -1
- data/ext/pg_query/include/parser/parse_expr.h +2 -3
- data/ext/pg_query/include/parser/parse_func.h +2 -1
- data/ext/pg_query/include/parser/parse_node.h +21 -9
- data/ext/pg_query/include/parser/parse_oper.h +1 -3
- data/ext/pg_query/include/parser/parse_relation.h +5 -4
- data/ext/pg_query/include/parser/parse_type.h +1 -1
- data/ext/pg_query/include/parser/parser.h +31 -4
- data/ext/pg_query/include/parser/parsetree.h +1 -1
- data/ext/pg_query/include/parser/scanner.h +1 -1
- data/ext/pg_query/include/parser/scansup.h +2 -5
- data/ext/pg_query/include/partitioning/partdefs.h +1 -1
- data/ext/pg_query/include/pg_config.h +83 -41
- data/ext/pg_query/include/pg_config_manual.h +74 -21
- data/ext/pg_query/include/pg_getopt.h +6 -6
- data/ext/pg_query/include/pg_query.h +5 -4
- data/ext/pg_query/include/pg_query_enum_defs.c +358 -241
- data/ext/pg_query/include/pg_query_fingerprint_conds.c +44 -7
- data/ext/pg_query/include/pg_query_fingerprint_defs.c +939 -113
- data/ext/pg_query/include/pg_query_outfuncs_conds.c +43 -13
- data/ext/pg_query/include/pg_query_outfuncs_defs.c +151 -26
- data/ext/pg_query/include/pg_query_readfuncs_conds.c +11 -2
- data/ext/pg_query/include/pg_query_readfuncs_defs.c +173 -30
- data/ext/pg_query/include/pg_trace.h +1 -1
- data/ext/pg_query/include/pgstat.h +449 -1238
- data/ext/pg_query/include/pgtime.h +14 -4
- data/ext/pg_query/include/pl_gram.h +126 -128
- data/ext/pg_query/include/pl_reserved_kwlist.h +1 -1
- data/ext/pg_query/include/pl_reserved_kwlist_d.h +10 -10
- data/ext/pg_query/include/pl_unreserved_kwlist.h +2 -3
- data/ext/pg_query/include/pl_unreserved_kwlist_d.h +54 -56
- data/ext/pg_query/include/plerrcodes.h +9 -1
- data/ext/pg_query/include/plpgsql.h +52 -54
- data/ext/pg_query/include/port/atomics/arch-arm.h +7 -1
- data/ext/pg_query/include/port/atomics/arch-ppc.h +1 -1
- data/ext/pg_query/include/port/atomics/arch-x86.h +1 -1
- data/ext/pg_query/include/port/atomics/fallback.h +1 -1
- data/ext/pg_query/include/port/atomics/generic-gcc.h +3 -3
- data/ext/pg_query/include/port/atomics/generic.h +1 -1
- data/ext/pg_query/include/port/atomics.h +1 -1
- data/ext/pg_query/include/port/pg_bitutils.h +40 -10
- data/ext/pg_query/include/port/pg_bswap.h +1 -1
- data/ext/pg_query/include/port/pg_crc32c.h +1 -1
- data/ext/pg_query/include/port.h +71 -46
- data/ext/pg_query/include/portability/instr_time.h +1 -1
- data/ext/pg_query/include/postgres.h +60 -16
- data/ext/pg_query/include/postmaster/autovacuum.h +17 -17
- data/ext/pg_query/include/postmaster/auxprocess.h +20 -0
- data/ext/pg_query/include/postmaster/bgworker.h +2 -1
- data/ext/pg_query/include/postmaster/bgworker_internals.h +2 -2
- data/ext/pg_query/include/postmaster/bgwriter.h +5 -5
- data/ext/pg_query/include/postmaster/fork_process.h +1 -1
- data/ext/pg_query/include/postmaster/interrupt.h +1 -1
- data/ext/pg_query/include/postmaster/pgarch.h +42 -8
- data/ext/pg_query/include/postmaster/postmaster.h +18 -17
- data/ext/pg_query/include/postmaster/startup.h +39 -0
- data/ext/pg_query/include/postmaster/syslogger.h +15 -10
- data/ext/pg_query/include/postmaster/walwriter.h +3 -3
- data/ext/pg_query/include/protobuf/pg_query.pb-c.h +1419 -914
- data/ext/pg_query/include/protobuf/pg_query.pb.h +43678 -32769
- data/ext/pg_query/include/regex/regex.h +18 -16
- data/ext/pg_query/include/replication/logicallauncher.h +3 -5
- data/ext/pg_query/include/replication/logicalproto.h +161 -17
- data/ext/pg_query/include/replication/logicalworker.h +1 -1
- data/ext/pg_query/include/replication/origin.h +7 -7
- data/ext/pg_query/include/replication/reorderbuffer.h +259 -42
- data/ext/pg_query/include/replication/slot.h +22 -11
- data/ext/pg_query/include/replication/syncrep.h +5 -5
- data/ext/pg_query/include/replication/walreceiver.h +145 -13
- data/ext/pg_query/include/replication/walsender.h +8 -8
- data/ext/pg_query/include/rewrite/prs2lock.h +1 -1
- data/ext/pg_query/include/rewrite/rewriteHandler.h +1 -3
- data/ext/pg_query/include/rewrite/rewriteManip.h +1 -1
- data/ext/pg_query/include/rewrite/rewriteSupport.h +1 -1
- data/ext/pg_query/include/storage/backendid.h +3 -3
- data/ext/pg_query/include/storage/block.h +4 -10
- data/ext/pg_query/include/storage/buf.h +1 -1
- data/ext/pg_query/include/storage/bufmgr.h +19 -14
- data/ext/pg_query/include/storage/bufpage.h +6 -8
- data/ext/pg_query/include/storage/condition_variable.h +13 -2
- data/ext/pg_query/include/storage/dsm.h +4 -1
- data/ext/pg_query/include/storage/dsm_impl.h +3 -2
- data/ext/pg_query/include/storage/fd.h +33 -3
- data/ext/pg_query/include/storage/fileset.h +40 -0
- data/ext/pg_query/include/storage/ipc.h +4 -1
- data/ext/pg_query/include/storage/item.h +1 -1
- data/ext/pg_query/include/storage/itemid.h +1 -1
- data/ext/pg_query/include/storage/itemptr.h +3 -1
- data/ext/pg_query/include/storage/large_object.h +2 -2
- data/ext/pg_query/include/storage/latch.h +9 -13
- data/ext/pg_query/include/storage/lmgr.h +2 -1
- data/ext/pg_query/include/storage/lock.h +11 -8
- data/ext/pg_query/include/storage/lockdefs.h +2 -2
- data/ext/pg_query/include/storage/lwlock.h +5 -32
- data/ext/pg_query/include/storage/lwlocknames.h +0 -1
- data/ext/pg_query/include/storage/off.h +1 -1
- data/ext/pg_query/include/storage/pg_sema.h +1 -1
- data/ext/pg_query/include/storage/pg_shmem.h +9 -7
- data/ext/pg_query/include/storage/pmsignal.h +15 -4
- data/ext/pg_query/include/storage/predicate.h +4 -4
- data/ext/pg_query/include/storage/proc.h +173 -59
- data/ext/pg_query/include/storage/procarray.h +98 -0
- data/ext/pg_query/include/storage/proclist_types.h +1 -1
- data/ext/pg_query/include/storage/procsignal.h +3 -7
- data/ext/pg_query/include/storage/relfilenode.h +1 -1
- data/ext/pg_query/include/storage/s_lock.h +60 -21
- data/ext/pg_query/include/storage/sharedfileset.h +3 -11
- data/ext/pg_query/include/storage/shm_mq.h +5 -4
- data/ext/pg_query/include/storage/shm_toc.h +1 -1
- data/ext/pg_query/include/storage/shmem.h +1 -1
- data/ext/pg_query/include/storage/sinval.h +3 -3
- data/ext/pg_query/include/storage/sinvaladt.h +1 -1
- data/ext/pg_query/include/storage/smgr.h +10 -8
- data/ext/pg_query/include/storage/spin.h +2 -2
- data/ext/pg_query/include/storage/standby.h +13 -6
- data/ext/pg_query/include/storage/standbydefs.h +2 -2
- data/ext/pg_query/include/storage/sync.h +7 -3
- data/ext/pg_query/include/tcop/cmdtag.h +1 -1
- data/ext/pg_query/include/tcop/cmdtaglist.h +3 -2
- data/ext/pg_query/include/tcop/deparse_utility.h +1 -1
- data/ext/pg_query/include/tcop/dest.h +1 -1
- data/ext/pg_query/include/tcop/fastpath.h +1 -2
- data/ext/pg_query/include/tcop/pquery.h +1 -1
- data/ext/pg_query/include/tcop/tcopprot.h +19 -11
- data/ext/pg_query/include/tcop/utility.h +7 -3
- data/ext/pg_query/include/tsearch/ts_cache.h +2 -2
- data/ext/pg_query/include/utils/acl.h +24 -3
- data/ext/pg_query/include/utils/aclchk_internal.h +1 -1
- data/ext/pg_query/include/utils/array.h +7 -2
- data/ext/pg_query/include/utils/backend_progress.h +44 -0
- data/ext/pg_query/include/utils/backend_status.h +321 -0
- data/ext/pg_query/include/utils/builtins.h +10 -11
- data/ext/pg_query/include/utils/bytea.h +3 -2
- data/ext/pg_query/include/utils/catcache.h +1 -1
- data/ext/pg_query/include/utils/date.h +1 -1
- data/ext/pg_query/include/utils/datetime.h +8 -7
- data/ext/pg_query/include/utils/datum.h +9 -1
- data/ext/pg_query/include/utils/dsa.h +1 -1
- data/ext/pg_query/include/utils/dynahash.h +4 -3
- data/ext/pg_query/include/utils/elog.h +52 -21
- data/ext/pg_query/include/utils/errcodes.h +2 -0
- data/ext/pg_query/include/utils/expandeddatum.h +1 -1
- data/ext/pg_query/include/utils/expandedrecord.h +1 -1
- data/ext/pg_query/include/utils/float.h +7 -7
- data/ext/pg_query/include/utils/fmgroids.h +1300 -696
- data/ext/pg_query/include/utils/fmgrprotos.h +199 -16
- data/ext/pg_query/include/utils/fmgrtab.h +6 -5
- data/ext/pg_query/include/utils/guc.h +69 -43
- data/ext/pg_query/include/utils/guc_tables.h +23 -19
- data/ext/pg_query/include/utils/hsearch.h +15 -11
- data/ext/pg_query/include/utils/inval.h +4 -1
- data/ext/pg_query/include/utils/lsyscache.h +11 -1
- data/ext/pg_query/include/utils/memdebug.h +1 -1
- data/ext/pg_query/include/utils/memutils.h +8 -3
- data/ext/pg_query/include/utils/numeric.h +19 -5
- data/ext/pg_query/include/utils/palloc.h +25 -3
- data/ext/pg_query/include/utils/partcache.h +1 -1
- data/ext/pg_query/include/utils/pg_locale.h +17 -9
- data/ext/pg_query/include/utils/pg_lsn.h +1 -1
- data/ext/pg_query/include/utils/pgstat_internal.h +784 -0
- data/ext/pg_query/include/utils/pidfile.h +1 -1
- data/ext/pg_query/include/utils/plancache.h +6 -5
- data/ext/pg_query/include/utils/portal.h +10 -12
- data/ext/pg_query/include/utils/ps_status.h +1 -1
- data/ext/pg_query/include/utils/queryenvironment.h +1 -1
- data/ext/pg_query/include/utils/queryjumble.h +88 -0
- data/ext/pg_query/include/utils/regproc.h +14 -3
- data/ext/pg_query/include/utils/rel.h +71 -19
- data/ext/pg_query/include/utils/relcache.h +8 -5
- data/ext/pg_query/include/utils/reltrigger.h +1 -1
- data/ext/pg_query/include/utils/resowner.h +1 -1
- data/ext/pg_query/include/utils/rls.h +2 -2
- data/ext/pg_query/include/utils/ruleutils.h +4 -1
- data/ext/pg_query/include/utils/sharedtuplestore.h +1 -1
- data/ext/pg_query/include/utils/snapmgr.h +34 -14
- data/ext/pg_query/include/utils/snapshot.h +14 -1
- data/ext/pg_query/include/utils/sortsupport.h +117 -2
- data/ext/pg_query/include/utils/syscache.h +6 -1
- data/ext/pg_query/include/utils/timeout.h +11 -4
- data/ext/pg_query/include/utils/timestamp.h +6 -5
- data/ext/pg_query/include/utils/tuplesort.h +25 -11
- data/ext/pg_query/include/utils/tuplestore.h +2 -2
- data/ext/pg_query/include/utils/typcache.h +24 -17
- data/ext/pg_query/include/utils/tzparser.h +1 -1
- data/ext/pg_query/include/utils/varlena.h +5 -3
- data/ext/pg_query/include/utils/wait_event.h +289 -0
- data/ext/pg_query/include/utils/xml.h +4 -4
- data/ext/pg_query/pg_query.pb-c.c +4302 -2304
- data/ext/pg_query/pg_query_deparse.c +1106 -373
- data/ext/pg_query/pg_query_fingerprint.c +30 -10
- data/ext/pg_query/pg_query_json_plpgsql.c +0 -25
- data/ext/pg_query/pg_query_normalize.c +1 -1
- data/ext/pg_query/pg_query_outfuncs_json.c +54 -16
- data/ext/pg_query/pg_query_outfuncs_protobuf.c +70 -10
- data/ext/pg_query/pg_query_parse.c +1 -1
- data/ext/pg_query/pg_query_readfuncs_protobuf.c +42 -8
- data/ext/pg_query/pg_query_scan.c +2 -1
- data/ext/pg_query/pg_query_split.c +3 -2
- data/ext/pg_query/src_backend_catalog_namespace.c +20 -9
- data/ext/pg_query/src_backend_catalog_pg_proc.c +4 -1
- data/ext/pg_query/src_backend_commands_define.c +11 -1
- data/ext/pg_query/src_backend_nodes_bitmapset.c +3 -1
- data/ext/pg_query/src_backend_nodes_copyfuncs.c +401 -76
- data/ext/pg_query/src_backend_nodes_equalfuncs.c +290 -46
- data/ext/pg_query/src_backend_nodes_extensible.c +1 -1
- data/ext/pg_query/src_backend_nodes_list.c +74 -11
- data/ext/pg_query/src_backend_nodes_makefuncs.c +5 -4
- data/ext/pg_query/src_backend_nodes_nodeFuncs.c +55 -12
- data/ext/pg_query/src_backend_nodes_value.c +28 -19
- data/ext/pg_query/src_backend_parser_gram.c +33874 -31261
- data/ext/pg_query/src_backend_parser_parser.c +26 -7
- data/ext/pg_query/src_backend_parser_scan.c +172 -209
- data/ext/pg_query/src_backend_parser_scansup.c +4 -28
- data/ext/pg_query/src_backend_postmaster_postmaster.c +77 -106
- data/ext/pg_query/src_backend_storage_ipc_ipc.c +13 -4
- data/ext/pg_query/src_backend_storage_lmgr_s_lock.c +5 -4
- data/ext/pg_query/src_backend_tcop_postgres.c +62 -23
- data/ext/pg_query/src_backend_utils_activity_pgstat_database.c +140 -0
- data/ext/pg_query/src_backend_utils_adt_datum.c +13 -1
- data/ext/pg_query/src_backend_utils_adt_expandeddatum.c +1 -1
- data/ext/pg_query/src_backend_utils_adt_format_type.c +6 -2
- data/ext/pg_query/src_backend_utils_adt_ruleutils.c +71 -5
- data/ext/pg_query/src_backend_utils_error_assert.c +16 -14
- data/ext/pg_query/src_backend_utils_error_elog.c +172 -99
- data/ext/pg_query/src_backend_utils_fmgr_fmgr.c +12 -17
- data/ext/pg_query/src_backend_utils_hash_dynahash.c +40 -10
- data/ext/pg_query/src_backend_utils_init_globals.c +5 -5
- data/ext/pg_query/src_backend_utils_mb_mbutils.c +55 -66
- data/ext/pg_query/src_backend_utils_misc_guc.c +206 -45
- data/ext/pg_query/src_backend_utils_mmgr_aset.c +7 -5
- data/ext/pg_query/src_backend_utils_mmgr_mcxt.c +123 -35
- data/ext/pg_query/src_common_encnames.c +1 -1
- data/ext/pg_query/src_common_hashfn.c +3 -3
- data/ext/pg_query/src_common_keywords.c +15 -2
- data/ext/pg_query/src_common_kwlist_d.h +517 -494
- data/ext/pg_query/src_common_kwlookup.c +1 -1
- data/ext/pg_query/src_common_pg_prng.c +152 -0
- data/ext/pg_query/src_common_psprintf.c +1 -1
- data/ext/pg_query/src_common_string.c +7 -1
- data/ext/pg_query/src_common_stringinfo.c +1 -1
- data/ext/pg_query/src_common_wchar.c +701 -109
- data/ext/pg_query/src_pl_plpgsql_src_pl_comp.c +45 -20
- data/ext/pg_query/src_pl_plpgsql_src_pl_funcs.c +1 -18
- data/ext/pg_query/src_pl_plpgsql_src_pl_gram.c +1233 -1259
- data/ext/pg_query/src_pl_plpgsql_src_pl_handler.c +1 -1
- data/ext/pg_query/src_pl_plpgsql_src_pl_reserved_kwlist_d.h +10 -10
- data/ext/pg_query/src_pl_plpgsql_src_pl_scanner.c +2 -2
- data/ext/pg_query/src_pl_plpgsql_src_pl_unreserved_kwlist_d.h +54 -56
- data/ext/pg_query/src_port_pg_bitutils.c +41 -31
- data/ext/pg_query/src_port_pgsleep.c +1 -1
- data/ext/pg_query/src_port_pgstrcasecmp.c +1 -1
- data/ext/pg_query/src_port_qsort.c +12 -224
- data/ext/pg_query/src_port_snprintf.c +37 -13
- data/ext/pg_query/src_port_strerror.c +9 -19
- data/ext/pg_query/src_port_strnlen.c +1 -1
- data/lib/pg_query/filter_columns.rb +1 -1
- data/lib/pg_query/fingerprint.rb +5 -1
- data/lib/pg_query/node.rb +2 -2
- data/lib/pg_query/param_refs.rb +1 -1
- data/lib/pg_query/parse.rb +20 -8
- data/lib/pg_query/pg_query_pb.rb +1108 -942
- data/lib/pg_query/treewalker.rb +6 -0
- data/lib/pg_query/truncate.rb +1 -1
- data/lib/pg_query/version.rb +1 -1
- metadata +27 -17
- data/ext/pg_query/include/access/xloginsert.h +0 -64
- data/ext/pg_query/include/bootstrap/bootstrap.h +0 -62
- data/ext/pg_query/include/parser/parse_clause.h +0 -54
- data/ext/pg_query/include/parser/parse_collate.h +0 -27
- data/ext/pg_query/include/parser/parse_target.h +0 -46
- data/ext/pg_query/pg_query_ruby_freebsd.sym +0 -2
- data/ext/pg_query/src_backend_libpq_pqcomm.c +0 -659
- data/ext/pg_query/src_backend_parser_parse_expr.c +0 -313
- data/ext/pg_query/src_port_erand48.c +0 -127
- data/ext/pg_query/src_port_random.c +0 -31
@@ -8,18 +8,21 @@
|
|
8
8
|
* - pg_wchar2single_with_len
|
9
9
|
* - pg_ascii_mblen
|
10
10
|
* - pg_ascii_dsplen
|
11
|
-
* -
|
11
|
+
* - pg_ascii_verifychar
|
12
|
+
* - pg_ascii_verifystr
|
12
13
|
* - pg_eucjp2wchar_with_len
|
13
14
|
* - pg_euc2wchar_with_len
|
14
15
|
* - pg_wchar2euc_with_len
|
15
16
|
* - pg_eucjp_mblen
|
16
17
|
* - pg_euc_mblen
|
17
18
|
* - pg_eucjp_dsplen
|
18
|
-
* -
|
19
|
+
* - pg_eucjp_verifychar
|
20
|
+
* - pg_eucjp_verifystr
|
19
21
|
* - pg_euccn2wchar_with_len
|
20
22
|
* - pg_euccn_mblen
|
21
23
|
* - pg_euccn_dsplen
|
22
|
-
* -
|
24
|
+
* - pg_euckr_verifychar
|
25
|
+
* - pg_euckr_verifystr
|
23
26
|
* - pg_euckr2wchar_with_len
|
24
27
|
* - pg_euckr_mblen
|
25
28
|
* - pg_euckr_dsplen
|
@@ -27,7 +30,8 @@
|
|
27
30
|
* - pg_euctw2wchar_with_len
|
28
31
|
* - pg_euctw_mblen
|
29
32
|
* - pg_euctw_dsplen
|
30
|
-
* -
|
33
|
+
* - pg_euctw_verifychar
|
34
|
+
* - pg_euctw_verifystr
|
31
35
|
* - pg_utf2wchar_with_len
|
32
36
|
* - pg_wchar2utf_with_len
|
33
37
|
* - unicode_to_utf8
|
@@ -35,34 +39,45 @@
|
|
35
39
|
* - utf8_to_unicode
|
36
40
|
* - ucs_wcwidth
|
37
41
|
* - mbbisearch
|
38
|
-
* -
|
42
|
+
* - pg_utf8_verifychar
|
39
43
|
* - pg_utf8_islegal
|
44
|
+
* - pg_utf8_verifystr
|
45
|
+
* - utf8_advance
|
46
|
+
* - Utf8Transition
|
40
47
|
* - pg_mule2wchar_with_len
|
41
48
|
* - pg_wchar2mule_with_len
|
42
49
|
* - pg_mule_dsplen
|
43
|
-
* -
|
50
|
+
* - pg_mule_verifychar
|
51
|
+
* - pg_mule_verifystr
|
44
52
|
* - pg_latin12wchar_with_len
|
45
53
|
* - pg_latin1_mblen
|
46
54
|
* - pg_latin1_dsplen
|
47
|
-
* -
|
55
|
+
* - pg_latin1_verifychar
|
56
|
+
* - pg_latin1_verifystr
|
48
57
|
* - pg_sjis_mblen
|
49
58
|
* - pg_sjis_dsplen
|
50
|
-
* -
|
59
|
+
* - pg_sjis_verifychar
|
60
|
+
* - pg_sjis_verifystr
|
51
61
|
* - pg_big5_mblen
|
52
62
|
* - pg_big5_dsplen
|
53
|
-
* -
|
63
|
+
* - pg_big5_verifychar
|
64
|
+
* - pg_big5_verifystr
|
54
65
|
* - pg_gbk_mblen
|
55
66
|
* - pg_gbk_dsplen
|
56
|
-
* -
|
67
|
+
* - pg_gbk_verifychar
|
68
|
+
* - pg_gbk_verifystr
|
57
69
|
* - pg_uhc_mblen
|
58
70
|
* - pg_uhc_dsplen
|
59
|
-
* -
|
71
|
+
* - pg_uhc_verifychar
|
72
|
+
* - pg_uhc_verifystr
|
60
73
|
* - pg_gb18030_mblen
|
61
74
|
* - pg_gb18030_dsplen
|
62
|
-
* -
|
75
|
+
* - pg_gb18030_verifychar
|
76
|
+
* - pg_gb18030_verifystr
|
63
77
|
* - pg_johab_mblen
|
64
78
|
* - pg_johab_dsplen
|
65
|
-
* -
|
79
|
+
* - pg_johab_verifychar
|
80
|
+
* - pg_johab_verifystr
|
66
81
|
* - pg_encoding_mblen
|
67
82
|
*--------------------------------------------------------------------
|
68
83
|
*/
|
@@ -72,7 +87,7 @@
|
|
72
87
|
* wchar.c
|
73
88
|
* Functions for working with multibyte characters in various encodings.
|
74
89
|
*
|
75
|
-
* Portions Copyright (c) 1998-
|
90
|
+
* Portions Copyright (c) 1998-2022, PostgreSQL Global Development Group
|
76
91
|
*
|
77
92
|
* IDENTIFICATION
|
78
93
|
* src/common/wchar.c
|
@@ -88,9 +103,9 @@
|
|
88
103
|
* Operations on multi-byte encodings are driven by a table of helper
|
89
104
|
* functions.
|
90
105
|
*
|
91
|
-
* To add an encoding support, define mblen(), dsplen()
|
92
|
-
* the encoding. For server-encodings, also define mb2wchar()
|
93
|
-
* conversion functions.
|
106
|
+
* To add an encoding support, define mblen(), dsplen(), verifychar() and
|
107
|
+
* verifystr() for the encoding. For server-encodings, also define mb2wchar()
|
108
|
+
* and wchar2mb() conversion functions.
|
94
109
|
*
|
95
110
|
* These functions generally assume that their input is validly formed.
|
96
111
|
* The "verifier" functions, further down in the file, have to be more
|
@@ -652,8 +667,8 @@ pg_utf_mblen(const unsigned char *s)
|
|
652
667
|
|
653
668
|
struct mbinterval
|
654
669
|
{
|
655
|
-
unsigned
|
656
|
-
unsigned
|
670
|
+
unsigned int first;
|
671
|
+
unsigned int last;
|
657
672
|
};
|
658
673
|
|
659
674
|
/* auxiliary function for binary search in interval table */
|
@@ -692,12 +707,6 @@ mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
|
|
692
707
|
* category code Mn or Me in the Unicode database) have a
|
693
708
|
* column width of 0.
|
694
709
|
*
|
695
|
-
* - Other format characters (general category code Cf in the Unicode
|
696
|
-
* database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
|
697
|
-
*
|
698
|
-
* - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
|
699
|
-
* have a column width of 0.
|
700
|
-
*
|
701
710
|
* - Spacing characters in the East Asian Wide (W) or East Asian
|
702
711
|
* FullWidth (F) category as defined in Unicode Technical
|
703
712
|
* Report #11 have a column width of 2.
|
@@ -714,6 +723,7 @@ static int
|
|
714
723
|
ucs_wcwidth(pg_wchar ucs)
|
715
724
|
{
|
716
725
|
#include "common/unicode_combining_table.h"
|
726
|
+
#include "common/unicode_east_asian_fw_table.h"
|
717
727
|
|
718
728
|
/* test for 8-bit control characters */
|
719
729
|
if (ucs == 0)
|
@@ -722,27 +732,25 @@ ucs_wcwidth(pg_wchar ucs)
|
|
722
732
|
if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
|
723
733
|
return -1;
|
724
734
|
|
725
|
-
/*
|
735
|
+
/*
|
736
|
+
* binary search in table of non-spacing characters
|
737
|
+
*
|
738
|
+
* XXX: In the official Unicode sources, it is possible for a character to
|
739
|
+
* be described as both non-spacing and wide at the same time. As of
|
740
|
+
* Unicode 13.0, treating the non-spacing property as the determining
|
741
|
+
* factor for display width leads to the correct behavior, so do that
|
742
|
+
* search first.
|
743
|
+
*/
|
726
744
|
if (mbbisearch(ucs, combining,
|
727
745
|
sizeof(combining) / sizeof(struct mbinterval) - 1))
|
728
746
|
return 0;
|
729
747
|
|
730
|
-
/*
|
731
|
-
|
732
|
-
|
748
|
+
/* binary search in table of wide characters */
|
749
|
+
if (mbbisearch(ucs, east_asian_fw,
|
750
|
+
sizeof(east_asian_fw) / sizeof(struct mbinterval) - 1))
|
751
|
+
return 2;
|
733
752
|
|
734
|
-
return 1
|
735
|
-
(ucs >= 0x1100 &&
|
736
|
-
(ucs <= 0x115f || /* Hangul Jamo init. consonants */
|
737
|
-
(ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
|
738
|
-
ucs != 0x303f) || /* CJK ... Yi */
|
739
|
-
(ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
|
740
|
-
(ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility
|
741
|
-
* Ideographs */
|
742
|
-
(ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
|
743
|
-
(ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
|
744
|
-
(ucs >= 0xffe0 && ucs <= 0xffe6) ||
|
745
|
-
(ucs >= 0x20000 && ucs <= 0x2ffff)));
|
753
|
+
return 1;
|
746
754
|
}
|
747
755
|
|
748
756
|
/*
|
@@ -1156,29 +1164,45 @@ pg_gb18030_dsplen(const unsigned char *s)
|
|
1156
1164
|
*-------------------------------------------------------------------
|
1157
1165
|
* multibyte sequence validators
|
1158
1166
|
*
|
1159
|
-
*
|
1160
|
-
* and "len", the remaining length of the string. If there is a
|
1161
|
-
* encoded character beginning at *s, return its length in bytes;
|
1162
|
-
* return -1.
|
1167
|
+
* The verifychar functions accept "s", a pointer to the first byte of a
|
1168
|
+
* string, and "len", the remaining length of the string. If there is a
|
1169
|
+
* validly encoded character beginning at *s, return its length in bytes;
|
1170
|
+
* else return -1.
|
1163
1171
|
*
|
1164
|
-
* The functions
|
1165
|
-
*
|
1172
|
+
* The verifystr functions also accept "s", a pointer to a string and "len",
|
1173
|
+
* the length of the string. They verify the whole string, and return the
|
1174
|
+
* number of input bytes (<= len) that are valid. In other words, if the
|
1175
|
+
* whole string is valid, verifystr returns "len", otherwise it returns the
|
1176
|
+
* byte offset of the first invalid character. The verifystr functions must
|
1177
|
+
* test for and reject zeroes in the input.
|
1166
1178
|
*
|
1167
|
-
*
|
1168
|
-
*
|
1179
|
+
* The verifychar functions can assume that len > 0 and that *s != '\0', but
|
1180
|
+
* they must test for and reject zeroes in any additional bytes of a
|
1181
|
+
* multibyte character. Note that this definition allows the function for a
|
1182
|
+
* single-byte encoding to be just "return 1".
|
1169
1183
|
*-------------------------------------------------------------------
|
1170
1184
|
*/
|
1171
|
-
|
1172
1185
|
static int
|
1173
|
-
|
1186
|
+
pg_ascii_verifychar(const unsigned char *s, int len)
|
1174
1187
|
{
|
1175
1188
|
return 1;
|
1176
1189
|
}
|
1177
1190
|
|
1191
|
+
static int
|
1192
|
+
pg_ascii_verifystr(const unsigned char *s, int len)
|
1193
|
+
{
|
1194
|
+
const unsigned char *nullpos = memchr(s, 0, len);
|
1195
|
+
|
1196
|
+
if (nullpos == NULL)
|
1197
|
+
return len;
|
1198
|
+
else
|
1199
|
+
return nullpos - s;
|
1200
|
+
}
|
1201
|
+
|
1178
1202
|
#define IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe)
|
1179
1203
|
|
1180
1204
|
static int
|
1181
|
-
|
1205
|
+
pg_eucjp_verifychar(const unsigned char *s, int len)
|
1182
1206
|
{
|
1183
1207
|
int l;
|
1184
1208
|
unsigned char c1,
|
@@ -1233,7 +1257,36 @@ pg_eucjp_verifier(const unsigned char *s, int len)
|
|
1233
1257
|
}
|
1234
1258
|
|
1235
1259
|
static int
|
1236
|
-
|
1260
|
+
pg_eucjp_verifystr(const unsigned char *s, int len)
|
1261
|
+
{
|
1262
|
+
const unsigned char *start = s;
|
1263
|
+
|
1264
|
+
while (len > 0)
|
1265
|
+
{
|
1266
|
+
int l;
|
1267
|
+
|
1268
|
+
/* fast path for ASCII-subset characters */
|
1269
|
+
if (!IS_HIGHBIT_SET(*s))
|
1270
|
+
{
|
1271
|
+
if (*s == '\0')
|
1272
|
+
break;
|
1273
|
+
l = 1;
|
1274
|
+
}
|
1275
|
+
else
|
1276
|
+
{
|
1277
|
+
l = pg_eucjp_verifychar(s, len);
|
1278
|
+
if (l == -1)
|
1279
|
+
break;
|
1280
|
+
}
|
1281
|
+
s += l;
|
1282
|
+
len -= l;
|
1283
|
+
}
|
1284
|
+
|
1285
|
+
return s - start;
|
1286
|
+
}
|
1287
|
+
|
1288
|
+
static int
|
1289
|
+
pg_euckr_verifychar(const unsigned char *s, int len)
|
1237
1290
|
{
|
1238
1291
|
int l;
|
1239
1292
|
unsigned char c1,
|
@@ -1261,11 +1314,41 @@ pg_euckr_verifier(const unsigned char *s, int len)
|
|
1261
1314
|
return l;
|
1262
1315
|
}
|
1263
1316
|
|
1317
|
+
static int
|
1318
|
+
pg_euckr_verifystr(const unsigned char *s, int len)
|
1319
|
+
{
|
1320
|
+
const unsigned char *start = s;
|
1321
|
+
|
1322
|
+
while (len > 0)
|
1323
|
+
{
|
1324
|
+
int l;
|
1325
|
+
|
1326
|
+
/* fast path for ASCII-subset characters */
|
1327
|
+
if (!IS_HIGHBIT_SET(*s))
|
1328
|
+
{
|
1329
|
+
if (*s == '\0')
|
1330
|
+
break;
|
1331
|
+
l = 1;
|
1332
|
+
}
|
1333
|
+
else
|
1334
|
+
{
|
1335
|
+
l = pg_euckr_verifychar(s, len);
|
1336
|
+
if (l == -1)
|
1337
|
+
break;
|
1338
|
+
}
|
1339
|
+
s += l;
|
1340
|
+
len -= l;
|
1341
|
+
}
|
1342
|
+
|
1343
|
+
return s - start;
|
1344
|
+
}
|
1345
|
+
|
1264
1346
|
/* EUC-CN byte sequences are exactly same as EUC-KR */
|
1265
|
-
#define
|
1347
|
+
#define pg_euccn_verifychar pg_euckr_verifychar
|
1348
|
+
#define pg_euccn_verifystr pg_euckr_verifystr
|
1266
1349
|
|
1267
1350
|
static int
|
1268
|
-
|
1351
|
+
pg_euctw_verifychar(const unsigned char *s, int len)
|
1269
1352
|
{
|
1270
1353
|
int l;
|
1271
1354
|
unsigned char c1,
|
@@ -1315,7 +1398,36 @@ pg_euctw_verifier(const unsigned char *s, int len)
|
|
1315
1398
|
}
|
1316
1399
|
|
1317
1400
|
static int
|
1318
|
-
|
1401
|
+
pg_euctw_verifystr(const unsigned char *s, int len)
|
1402
|
+
{
|
1403
|
+
const unsigned char *start = s;
|
1404
|
+
|
1405
|
+
while (len > 0)
|
1406
|
+
{
|
1407
|
+
int l;
|
1408
|
+
|
1409
|
+
/* fast path for ASCII-subset characters */
|
1410
|
+
if (!IS_HIGHBIT_SET(*s))
|
1411
|
+
{
|
1412
|
+
if (*s == '\0')
|
1413
|
+
break;
|
1414
|
+
l = 1;
|
1415
|
+
}
|
1416
|
+
else
|
1417
|
+
{
|
1418
|
+
l = pg_euctw_verifychar(s, len);
|
1419
|
+
if (l == -1)
|
1420
|
+
break;
|
1421
|
+
}
|
1422
|
+
s += l;
|
1423
|
+
len -= l;
|
1424
|
+
}
|
1425
|
+
|
1426
|
+
return s - start;
|
1427
|
+
}
|
1428
|
+
|
1429
|
+
static int
|
1430
|
+
pg_johab_verifychar(const unsigned char *s, int len)
|
1319
1431
|
{
|
1320
1432
|
int l,
|
1321
1433
|
mbl;
|
@@ -1339,7 +1451,36 @@ pg_johab_verifier(const unsigned char *s, int len)
|
|
1339
1451
|
}
|
1340
1452
|
|
1341
1453
|
static int
|
1342
|
-
|
1454
|
+
pg_johab_verifystr(const unsigned char *s, int len)
|
1455
|
+
{
|
1456
|
+
const unsigned char *start = s;
|
1457
|
+
|
1458
|
+
while (len > 0)
|
1459
|
+
{
|
1460
|
+
int l;
|
1461
|
+
|
1462
|
+
/* fast path for ASCII-subset characters */
|
1463
|
+
if (!IS_HIGHBIT_SET(*s))
|
1464
|
+
{
|
1465
|
+
if (*s == '\0')
|
1466
|
+
break;
|
1467
|
+
l = 1;
|
1468
|
+
}
|
1469
|
+
else
|
1470
|
+
{
|
1471
|
+
l = pg_johab_verifychar(s, len);
|
1472
|
+
if (l == -1)
|
1473
|
+
break;
|
1474
|
+
}
|
1475
|
+
s += l;
|
1476
|
+
len -= l;
|
1477
|
+
}
|
1478
|
+
|
1479
|
+
return s - start;
|
1480
|
+
}
|
1481
|
+
|
1482
|
+
static int
|
1483
|
+
pg_mule_verifychar(const unsigned char *s, int len)
|
1343
1484
|
{
|
1344
1485
|
int l,
|
1345
1486
|
mbl;
|
@@ -1360,13 +1501,53 @@ pg_mule_verifier(const unsigned char *s, int len)
|
|
1360
1501
|
}
|
1361
1502
|
|
1362
1503
|
static int
|
1363
|
-
|
1504
|
+
pg_mule_verifystr(const unsigned char *s, int len)
|
1505
|
+
{
|
1506
|
+
const unsigned char *start = s;
|
1507
|
+
|
1508
|
+
while (len > 0)
|
1509
|
+
{
|
1510
|
+
int l;
|
1511
|
+
|
1512
|
+
/* fast path for ASCII-subset characters */
|
1513
|
+
if (!IS_HIGHBIT_SET(*s))
|
1514
|
+
{
|
1515
|
+
if (*s == '\0')
|
1516
|
+
break;
|
1517
|
+
l = 1;
|
1518
|
+
}
|
1519
|
+
else
|
1520
|
+
{
|
1521
|
+
l = pg_mule_verifychar(s, len);
|
1522
|
+
if (l == -1)
|
1523
|
+
break;
|
1524
|
+
}
|
1525
|
+
s += l;
|
1526
|
+
len -= l;
|
1527
|
+
}
|
1528
|
+
|
1529
|
+
return s - start;
|
1530
|
+
}
|
1531
|
+
|
1532
|
+
static int
|
1533
|
+
pg_latin1_verifychar(const unsigned char *s, int len)
|
1364
1534
|
{
|
1365
1535
|
return 1;
|
1366
1536
|
}
|
1367
1537
|
|
1368
1538
|
static int
|
1369
|
-
|
1539
|
+
pg_latin1_verifystr(const unsigned char *s, int len)
|
1540
|
+
{
|
1541
|
+
const unsigned char *nullpos = memchr(s, 0, len);
|
1542
|
+
|
1543
|
+
if (nullpos == NULL)
|
1544
|
+
return len;
|
1545
|
+
else
|
1546
|
+
return nullpos - s;
|
1547
|
+
}
|
1548
|
+
|
1549
|
+
static int
|
1550
|
+
pg_sjis_verifychar(const unsigned char *s, int len)
|
1370
1551
|
{
|
1371
1552
|
int l,
|
1372
1553
|
mbl;
|
@@ -1389,7 +1570,36 @@ pg_sjis_verifier(const unsigned char *s, int len)
|
|
1389
1570
|
}
|
1390
1571
|
|
1391
1572
|
static int
|
1392
|
-
|
1573
|
+
pg_sjis_verifystr(const unsigned char *s, int len)
|
1574
|
+
{
|
1575
|
+
const unsigned char *start = s;
|
1576
|
+
|
1577
|
+
while (len > 0)
|
1578
|
+
{
|
1579
|
+
int l;
|
1580
|
+
|
1581
|
+
/* fast path for ASCII-subset characters */
|
1582
|
+
if (!IS_HIGHBIT_SET(*s))
|
1583
|
+
{
|
1584
|
+
if (*s == '\0')
|
1585
|
+
break;
|
1586
|
+
l = 1;
|
1587
|
+
}
|
1588
|
+
else
|
1589
|
+
{
|
1590
|
+
l = pg_sjis_verifychar(s, len);
|
1591
|
+
if (l == -1)
|
1592
|
+
break;
|
1593
|
+
}
|
1594
|
+
s += l;
|
1595
|
+
len -= l;
|
1596
|
+
}
|
1597
|
+
|
1598
|
+
return s - start;
|
1599
|
+
}
|
1600
|
+
|
1601
|
+
static int
|
1602
|
+
pg_big5_verifychar(const unsigned char *s, int len)
|
1393
1603
|
{
|
1394
1604
|
int l,
|
1395
1605
|
mbl;
|
@@ -1409,7 +1619,36 @@ pg_big5_verifier(const unsigned char *s, int len)
|
|
1409
1619
|
}
|
1410
1620
|
|
1411
1621
|
static int
|
1412
|
-
|
1622
|
+
pg_big5_verifystr(const unsigned char *s, int len)
|
1623
|
+
{
|
1624
|
+
const unsigned char *start = s;
|
1625
|
+
|
1626
|
+
while (len > 0)
|
1627
|
+
{
|
1628
|
+
int l;
|
1629
|
+
|
1630
|
+
/* fast path for ASCII-subset characters */
|
1631
|
+
if (!IS_HIGHBIT_SET(*s))
|
1632
|
+
{
|
1633
|
+
if (*s == '\0')
|
1634
|
+
break;
|
1635
|
+
l = 1;
|
1636
|
+
}
|
1637
|
+
else
|
1638
|
+
{
|
1639
|
+
l = pg_big5_verifychar(s, len);
|
1640
|
+
if (l == -1)
|
1641
|
+
break;
|
1642
|
+
}
|
1643
|
+
s += l;
|
1644
|
+
len -= l;
|
1645
|
+
}
|
1646
|
+
|
1647
|
+
return s - start;
|
1648
|
+
}
|
1649
|
+
|
1650
|
+
static int
|
1651
|
+
pg_gbk_verifychar(const unsigned char *s, int len)
|
1413
1652
|
{
|
1414
1653
|
int l,
|
1415
1654
|
mbl;
|
@@ -1429,7 +1668,36 @@ pg_gbk_verifier(const unsigned char *s, int len)
|
|
1429
1668
|
}
|
1430
1669
|
|
1431
1670
|
static int
|
1432
|
-
|
1671
|
+
pg_gbk_verifystr(const unsigned char *s, int len)
|
1672
|
+
{
|
1673
|
+
const unsigned char *start = s;
|
1674
|
+
|
1675
|
+
while (len > 0)
|
1676
|
+
{
|
1677
|
+
int l;
|
1678
|
+
|
1679
|
+
/* fast path for ASCII-subset characters */
|
1680
|
+
if (!IS_HIGHBIT_SET(*s))
|
1681
|
+
{
|
1682
|
+
if (*s == '\0')
|
1683
|
+
break;
|
1684
|
+
l = 1;
|
1685
|
+
}
|
1686
|
+
else
|
1687
|
+
{
|
1688
|
+
l = pg_gbk_verifychar(s, len);
|
1689
|
+
if (l == -1)
|
1690
|
+
break;
|
1691
|
+
}
|
1692
|
+
s += l;
|
1693
|
+
len -= l;
|
1694
|
+
}
|
1695
|
+
|
1696
|
+
return s - start;
|
1697
|
+
}
|
1698
|
+
|
1699
|
+
static int
|
1700
|
+
pg_uhc_verifychar(const unsigned char *s, int len)
|
1433
1701
|
{
|
1434
1702
|
int l,
|
1435
1703
|
mbl;
|
@@ -1449,7 +1717,36 @@ pg_uhc_verifier(const unsigned char *s, int len)
|
|
1449
1717
|
}
|
1450
1718
|
|
1451
1719
|
static int
|
1452
|
-
|
1720
|
+
pg_uhc_verifystr(const unsigned char *s, int len)
|
1721
|
+
{
|
1722
|
+
const unsigned char *start = s;
|
1723
|
+
|
1724
|
+
while (len > 0)
|
1725
|
+
{
|
1726
|
+
int l;
|
1727
|
+
|
1728
|
+
/* fast path for ASCII-subset characters */
|
1729
|
+
if (!IS_HIGHBIT_SET(*s))
|
1730
|
+
{
|
1731
|
+
if (*s == '\0')
|
1732
|
+
break;
|
1733
|
+
l = 1;
|
1734
|
+
}
|
1735
|
+
else
|
1736
|
+
{
|
1737
|
+
l = pg_uhc_verifychar(s, len);
|
1738
|
+
if (l == -1)
|
1739
|
+
break;
|
1740
|
+
}
|
1741
|
+
s += l;
|
1742
|
+
len -= l;
|
1743
|
+
}
|
1744
|
+
|
1745
|
+
return s - start;
|
1746
|
+
}
|
1747
|
+
|
1748
|
+
static int
|
1749
|
+
pg_gb18030_verifychar(const unsigned char *s, int len)
|
1453
1750
|
{
|
1454
1751
|
int l;
|
1455
1752
|
|
@@ -1480,11 +1777,55 @@ pg_gb18030_verifier(const unsigned char *s, int len)
|
|
1480
1777
|
}
|
1481
1778
|
|
1482
1779
|
static int
|
1483
|
-
|
1780
|
+
pg_gb18030_verifystr(const unsigned char *s, int len)
|
1484
1781
|
{
|
1485
|
-
|
1782
|
+
const unsigned char *start = s;
|
1486
1783
|
|
1487
|
-
|
1784
|
+
while (len > 0)
|
1785
|
+
{
|
1786
|
+
int l;
|
1787
|
+
|
1788
|
+
/* fast path for ASCII-subset characters */
|
1789
|
+
if (!IS_HIGHBIT_SET(*s))
|
1790
|
+
{
|
1791
|
+
if (*s == '\0')
|
1792
|
+
break;
|
1793
|
+
l = 1;
|
1794
|
+
}
|
1795
|
+
else
|
1796
|
+
{
|
1797
|
+
l = pg_gb18030_verifychar(s, len);
|
1798
|
+
if (l == -1)
|
1799
|
+
break;
|
1800
|
+
}
|
1801
|
+
s += l;
|
1802
|
+
len -= l;
|
1803
|
+
}
|
1804
|
+
|
1805
|
+
return s - start;
|
1806
|
+
}
|
1807
|
+
|
1808
|
+
static int
|
1809
|
+
pg_utf8_verifychar(const unsigned char *s, int len)
|
1810
|
+
{
|
1811
|
+
int l;
|
1812
|
+
|
1813
|
+
if ((*s & 0x80) == 0)
|
1814
|
+
{
|
1815
|
+
if (*s == '\0')
|
1816
|
+
return -1;
|
1817
|
+
return 1;
|
1818
|
+
}
|
1819
|
+
else if ((*s & 0xe0) == 0xc0)
|
1820
|
+
l = 2;
|
1821
|
+
else if ((*s & 0xf0) == 0xe0)
|
1822
|
+
l = 3;
|
1823
|
+
else if ((*s & 0xf8) == 0xf0)
|
1824
|
+
l = 4;
|
1825
|
+
else
|
1826
|
+
l = 1;
|
1827
|
+
|
1828
|
+
if (l > len)
|
1488
1829
|
return -1;
|
1489
1830
|
|
1490
1831
|
if (!pg_utf8_islegal(s, l))
|
@@ -1493,6 +1834,250 @@ pg_utf8_verifier(const unsigned char *s, int len)
|
|
1493
1834
|
return l;
|
1494
1835
|
}
|
1495
1836
|
|
1837
|
+
/*
|
1838
|
+
* The fast path of the UTF-8 verifier uses a deterministic finite automaton
|
1839
|
+
* (DFA) for multibyte characters. In a traditional table-driven DFA, the
|
1840
|
+
* input byte and current state are used to compute an index into an array of
|
1841
|
+
* state transitions. Since the address of the next transition is dependent
|
1842
|
+
* on this computation, there is latency in executing the load instruction,
|
1843
|
+
* and the CPU is not kept busy.
|
1844
|
+
*
|
1845
|
+
* Instead, we use a "shift-based" DFA as described by Per Vognsen:
|
1846
|
+
*
|
1847
|
+
* https://gist.github.com/pervognsen/218ea17743e1442e59bb60d29b1aa725
|
1848
|
+
*
|
1849
|
+
* In a shift-based DFA, the input byte is an index into array of integers
|
1850
|
+
* whose bit pattern encodes the state transitions. To compute the next
|
1851
|
+
* state, we simply right-shift the integer by the current state and apply a
|
1852
|
+
* mask. In this scheme, the address of the transition only depends on the
|
1853
|
+
* input byte, so there is better pipelining.
|
1854
|
+
*
|
1855
|
+
* The naming convention for states and transitions was adopted from a UTF-8
|
1856
|
+
* to UTF-16/32 transcoder, whose table is reproduced below:
|
1857
|
+
*
|
1858
|
+
* https://github.com/BobSteagall/utf_utils/blob/6b7a465265de2f5fa6133d653df0c9bdd73bbcf8/src/utf_utils.cpp
|
1859
|
+
*
|
1860
|
+
* ILL ASC CR1 CR2 CR3 L2A L3A L3B L3C L4A L4B L4C CLASS / STATE
|
1861
|
+
* ==========================================================================
|
1862
|
+
* err, END, err, err, err, CS1, P3A, CS2, P3B, P4A, CS3, P4B, | BGN/END
|
1863
|
+
* err, err, err, err, err, err, err, err, err, err, err, err, | ERR
|
1864
|
+
* |
|
1865
|
+
* err, err, END, END, END, err, err, err, err, err, err, err, | CS1
|
1866
|
+
* err, err, CS1, CS1, CS1, err, err, err, err, err, err, err, | CS2
|
1867
|
+
* err, err, CS2, CS2, CS2, err, err, err, err, err, err, err, | CS3
|
1868
|
+
* |
|
1869
|
+
* err, err, err, err, CS1, err, err, err, err, err, err, err, | P3A
|
1870
|
+
* err, err, CS1, CS1, err, err, err, err, err, err, err, err, | P3B
|
1871
|
+
* |
|
1872
|
+
* err, err, err, CS2, CS2, err, err, err, err, err, err, err, | P4A
|
1873
|
+
* err, err, CS2, err, err, err, err, err, err, err, err, err, | P4B
|
1874
|
+
*
|
1875
|
+
* In the most straightforward implementation, a shift-based DFA for UTF-8
|
1876
|
+
* requires 64-bit integers to encode the transitions, but with an SMT solver
|
1877
|
+
* it's possible to find state numbers such that the transitions fit within
|
1878
|
+
* 32-bit integers, as Dougall Johnson demonstrated:
|
1879
|
+
*
|
1880
|
+
* https://gist.github.com/dougallj/166e326de6ad4cf2c94be97a204c025f
|
1881
|
+
*
|
1882
|
+
* This packed representation is the reason for the seemingly odd choice of
|
1883
|
+
* state values below.
|
1884
|
+
*/
|
1885
|
+
|
1886
|
+
/* Error */
|
1887
|
+
#define ERR 0
|
1888
|
+
/* Begin */
|
1889
|
+
#define BGN 11
|
1890
|
+
/* Continuation states, expect 1/2/3 continuation bytes */
|
1891
|
+
#define CS1 16
|
1892
|
+
#define CS2 1
|
1893
|
+
#define CS3 5
|
1894
|
+
/* Partial states, where the first continuation byte has a restricted range */
|
1895
|
+
#define P3A 6 /* Lead was E0, check for 3-byte overlong */
|
1896
|
+
#define P3B 20 /* Lead was ED, check for surrogate */
|
1897
|
+
#define P4A 25 /* Lead was F0, check for 4-byte overlong */
|
1898
|
+
#define P4B 30 /* Lead was F4, check for too-large */
|
1899
|
+
/* Begin and End are the same state */
|
1900
|
+
#define END BGN
|
1901
|
+
|
1902
|
+
/* the encoded state transitions for the lookup table */
|
1903
|
+
|
1904
|
+
/* ASCII */
|
1905
|
+
#define ASC (END << BGN)
|
1906
|
+
/* 2-byte lead */
|
1907
|
+
#define L2A (CS1 << BGN)
|
1908
|
+
/* 3-byte lead */
|
1909
|
+
#define L3A (P3A << BGN)
|
1910
|
+
#define L3B (CS2 << BGN)
|
1911
|
+
#define L3C (P3B << BGN)
|
1912
|
+
/* 4-byte lead */
|
1913
|
+
#define L4A (P4A << BGN)
|
1914
|
+
#define L4B (CS3 << BGN)
|
1915
|
+
#define L4C (P4B << BGN)
|
1916
|
+
/* continuation byte */
|
1917
|
+
#define CR1 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)
|
1918
|
+
#define CR2 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)
|
1919
|
+
#define CR3 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)
|
1920
|
+
/* invalid byte */
|
1921
|
+
#define ILL ERR
|
1922
|
+
|
1923
|
+
static const uint32 Utf8Transition[256] =
|
1924
|
+
{
|
1925
|
+
/* ASCII */
|
1926
|
+
|
1927
|
+
ILL, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1928
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1929
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1930
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1931
|
+
|
1932
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1933
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1934
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1935
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1936
|
+
|
1937
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1938
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1939
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1940
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1941
|
+
|
1942
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1943
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1944
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1945
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1946
|
+
|
1947
|
+
/* continuation bytes */
|
1948
|
+
|
1949
|
+
/* 80..8F */
|
1950
|
+
CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1,
|
1951
|
+
CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1,
|
1952
|
+
|
1953
|
+
/* 90..9F */
|
1954
|
+
CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2,
|
1955
|
+
CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2,
|
1956
|
+
|
1957
|
+
/* A0..BF */
|
1958
|
+
CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
|
1959
|
+
CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
|
1960
|
+
CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
|
1961
|
+
CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
|
1962
|
+
|
1963
|
+
/* leading bytes */
|
1964
|
+
|
1965
|
+
/* C0..DF */
|
1966
|
+
ILL, ILL, L2A, L2A, L2A, L2A, L2A, L2A,
|
1967
|
+
L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A,
|
1968
|
+
L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A,
|
1969
|
+
L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A,
|
1970
|
+
|
1971
|
+
/* E0..EF */
|
1972
|
+
L3A, L3B, L3B, L3B, L3B, L3B, L3B, L3B,
|
1973
|
+
L3B, L3B, L3B, L3B, L3B, L3C, L3B, L3B,
|
1974
|
+
|
1975
|
+
/* F0..FF */
|
1976
|
+
L4A, L4B, L4B, L4B, L4C, ILL, ILL, ILL,
|
1977
|
+
ILL, ILL, ILL, ILL, ILL, ILL, ILL, ILL
|
1978
|
+
};
|
1979
|
+
|
1980
|
+
static void
|
1981
|
+
utf8_advance(const unsigned char *s, uint32 *state, int len)
|
1982
|
+
{
|
1983
|
+
/* Note: We deliberately don't check the state's value here. */
|
1984
|
+
while (len > 0)
|
1985
|
+
{
|
1986
|
+
/*
|
1987
|
+
* It's important that the mask value is 31: In most instruction sets,
|
1988
|
+
* a shift by a 32-bit operand is understood to be a shift by its mod
|
1989
|
+
* 32, so the compiler should elide the mask operation.
|
1990
|
+
*/
|
1991
|
+
*state = Utf8Transition[*s++] >> (*state & 31);
|
1992
|
+
len--;
|
1993
|
+
}
|
1994
|
+
|
1995
|
+
*state &= 31;
|
1996
|
+
}
|
1997
|
+
|
1998
|
+
static int
|
1999
|
+
pg_utf8_verifystr(const unsigned char *s, int len)
|
2000
|
+
{
|
2001
|
+
const unsigned char *start = s;
|
2002
|
+
const int orig_len = len;
|
2003
|
+
uint32 state = BGN;
|
2004
|
+
|
2005
|
+
/*
|
2006
|
+
* Sixteen seems to give the best balance of performance across different
|
2007
|
+
* byte distributions.
|
2008
|
+
*/
|
2009
|
+
#define STRIDE_LENGTH 16
|
2010
|
+
|
2011
|
+
if (len >= STRIDE_LENGTH)
|
2012
|
+
{
|
2013
|
+
while (len >= STRIDE_LENGTH)
|
2014
|
+
{
|
2015
|
+
/*
|
2016
|
+
* If the chunk is all ASCII, we can skip the full UTF-8 check,
|
2017
|
+
* but we must first check for a non-END state, which means the
|
2018
|
+
* previous chunk ended in the middle of a multibyte sequence.
|
2019
|
+
*/
|
2020
|
+
if (state != END || !is_valid_ascii(s, STRIDE_LENGTH))
|
2021
|
+
utf8_advance(s, &state, STRIDE_LENGTH);
|
2022
|
+
|
2023
|
+
s += STRIDE_LENGTH;
|
2024
|
+
len -= STRIDE_LENGTH;
|
2025
|
+
}
|
2026
|
+
|
2027
|
+
/* The error state persists, so we only need to check for it here. */
|
2028
|
+
if (state == ERR)
|
2029
|
+
{
|
2030
|
+
/*
|
2031
|
+
* Start over from the beginning with the slow path so we can
|
2032
|
+
* count the valid bytes.
|
2033
|
+
*/
|
2034
|
+
len = orig_len;
|
2035
|
+
s = start;
|
2036
|
+
}
|
2037
|
+
else if (state != END)
|
2038
|
+
{
|
2039
|
+
/*
|
2040
|
+
* The fast path exited in the middle of a multibyte sequence.
|
2041
|
+
* Walk backwards to find the leading byte so that the slow path
|
2042
|
+
* can resume checking from there. We must always backtrack at
|
2043
|
+
* least one byte, since the current byte could be e.g. an ASCII
|
2044
|
+
* byte after a 2-byte lead, which is invalid.
|
2045
|
+
*/
|
2046
|
+
do
|
2047
|
+
{
|
2048
|
+
Assert(s > start);
|
2049
|
+
s--;
|
2050
|
+
len++;
|
2051
|
+
Assert(IS_HIGHBIT_SET(*s));
|
2052
|
+
} while (pg_utf_mblen(s) <= 1);
|
2053
|
+
}
|
2054
|
+
}
|
2055
|
+
|
2056
|
+
/* check remaining bytes */
|
2057
|
+
while (len > 0)
|
2058
|
+
{
|
2059
|
+
int l;
|
2060
|
+
|
2061
|
+
/* fast path for ASCII-subset characters */
|
2062
|
+
if (!IS_HIGHBIT_SET(*s))
|
2063
|
+
{
|
2064
|
+
if (*s == '\0')
|
2065
|
+
break;
|
2066
|
+
l = 1;
|
2067
|
+
}
|
2068
|
+
else
|
2069
|
+
{
|
2070
|
+
l = pg_utf8_verifychar(s, len);
|
2071
|
+
if (l == -1)
|
2072
|
+
break;
|
2073
|
+
}
|
2074
|
+
s += l;
|
2075
|
+
len -= l;
|
2076
|
+
}
|
2077
|
+
|
2078
|
+
return s - start;
|
2079
|
+
}
|
2080
|
+
|
1496
2081
|
/*
|
1497
2082
|
* Check for validity of a single UTF-8 encoded character
|
1498
2083
|
*
|
@@ -1572,48 +2157,48 @@ pg_utf8_islegal(const unsigned char *source, int length)
|
|
1572
2157
|
*-------------------------------------------------------------------
|
1573
2158
|
*/
|
1574
2159
|
const pg_wchar_tbl pg_wchar_table[] = {
|
1575
|
-
{pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen,
|
1576
|
-
{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen,
|
1577
|
-
{pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen,
|
1578
|
-
{pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen,
|
1579
|
-
{pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen,
|
1580
|
-
{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen,
|
1581
|
-
{pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen,
|
1582
|
-
{pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen,
|
1583
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1584
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1585
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1586
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1587
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1588
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1589
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1590
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1591
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1592
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1593
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1594
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1595
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1596
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1597
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1598
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1599
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1600
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1601
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1602
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1603
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1604
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1605
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1606
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1607
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1608
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1609
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1610
|
-
{0, 0, pg_sjis_mblen, pg_sjis_dsplen,
|
1611
|
-
{0, 0, pg_big5_mblen, pg_big5_dsplen,
|
1612
|
-
{0, 0, pg_gbk_mblen, pg_gbk_dsplen,
|
1613
|
-
{0, 0, pg_uhc_mblen, pg_uhc_dsplen,
|
1614
|
-
{0, 0, pg_gb18030_mblen, pg_gb18030_dsplen,
|
1615
|
-
{0, 0, pg_johab_mblen, pg_johab_dsplen,
|
1616
|
-
{0, 0, pg_sjis_mblen, pg_sjis_dsplen,
|
2160
|
+
{pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifychar, pg_ascii_verifystr, 1}, /* PG_SQL_ASCII */
|
2161
|
+
{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3}, /* PG_EUC_JP */
|
2162
|
+
{pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 2}, /* PG_EUC_CN */
|
2163
|
+
{pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifychar, pg_euckr_verifystr, 3}, /* PG_EUC_KR */
|
2164
|
+
{pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifychar, pg_euctw_verifystr, 4}, /* PG_EUC_TW */
|
2165
|
+
{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3}, /* PG_EUC_JIS_2004 */
|
2166
|
+
{pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifychar, pg_utf8_verifystr, 4}, /* PG_UTF8 */
|
2167
|
+
{pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifychar, pg_mule_verifystr, 4}, /* PG_MULE_INTERNAL */
|
2168
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN1 */
|
2169
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN2 */
|
2170
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN3 */
|
2171
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN4 */
|
2172
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN5 */
|
2173
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN6 */
|
2174
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN7 */
|
2175
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN8 */
|
2176
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN9 */
|
2177
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN10 */
|
2178
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1256 */
|
2179
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1258 */
|
2180
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN866 */
|
2181
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN874 */
|
2182
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_KOI8R */
|
2183
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1251 */
|
2184
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1252 */
|
2185
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-5 */
|
2186
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-6 */
|
2187
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-7 */
|
2188
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-8 */
|
2189
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1250 */
|
2190
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1253 */
|
2191
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1254 */
|
2192
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1255 */
|
2193
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1257 */
|
2194
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_KOI8U */
|
2195
|
+
{0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifychar, pg_sjis_verifystr, 2}, /* PG_SJIS */
|
2196
|
+
{0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifychar, pg_big5_verifystr, 2}, /* PG_BIG5 */
|
2197
|
+
{0, 0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifychar, pg_gbk_verifystr, 2}, /* PG_GBK */
|
2198
|
+
{0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifychar, pg_uhc_verifystr, 2}, /* PG_UHC */
|
2199
|
+
{0, 0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifychar, pg_gb18030_verifystr, 4}, /* PG_GB18030 */
|
2200
|
+
{0, 0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifychar, pg_johab_verifystr, 3}, /* PG_JOHAB */
|
2201
|
+
{0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifychar, pg_sjis_verifystr, 2} /* PG_SHIFT_JIS_2004 */
|
1617
2202
|
};
|
1618
2203
|
|
1619
2204
|
/*
|
@@ -1646,7 +2231,14 @@ pg_encoding_mblen(int encoding, const char *mbstr)
|
|
1646
2231
|
/*
|
1647
2232
|
* Verify the first multibyte character of the given string.
|
1648
2233
|
* Return its byte length if good, -1 if bad. (See comments above for
|
1649
|
-
* full details of the
|
2234
|
+
* full details of the mbverifychar API.)
|
2235
|
+
*/
|
2236
|
+
|
2237
|
+
|
2238
|
+
/*
|
2239
|
+
* Verify that a string is valid for the given encoding.
|
2240
|
+
* Returns the number of input bytes (<= len) that form a valid string.
|
2241
|
+
* (See comments above for full details of the mbverifystr API.)
|
1650
2242
|
*/
|
1651
2243
|
|
1652
2244
|
|