pg_query 2.2.0 → 4.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/README.md +59 -31
- data/Rakefile +2 -2
- data/ext/pg_query/include/access/amapi.h +45 -1
- data/ext/pg_query/include/access/attmap.h +1 -1
- data/ext/pg_query/include/access/attnum.h +2 -2
- data/ext/pg_query/include/access/clog.h +4 -2
- data/ext/pg_query/include/access/commit_ts.h +6 -9
- data/ext/pg_query/include/access/detoast.h +1 -11
- data/ext/pg_query/include/access/genam.h +15 -12
- data/ext/pg_query/include/access/gin.h +2 -2
- data/ext/pg_query/include/access/htup.h +1 -1
- data/ext/pg_query/include/access/htup_details.h +75 -87
- data/ext/pg_query/include/access/itup.h +7 -1
- data/ext/pg_query/include/access/parallel.h +2 -2
- data/ext/pg_query/include/access/printtup.h +1 -1
- data/ext/pg_query/include/access/relation.h +1 -1
- data/ext/pg_query/include/access/relscan.h +17 -2
- data/ext/pg_query/include/access/rmgr.h +30 -3
- data/ext/pg_query/include/access/rmgrlist.h +23 -23
- data/ext/pg_query/include/access/sdir.h +1 -1
- data/ext/pg_query/include/access/skey.h +1 -1
- data/ext/pg_query/include/access/stratnum.h +4 -2
- data/ext/pg_query/include/access/sysattr.h +1 -1
- data/ext/pg_query/include/access/table.h +2 -1
- data/ext/pg_query/include/access/tableam.h +272 -20
- data/ext/pg_query/include/access/toast_compression.h +73 -0
- data/ext/pg_query/include/access/transam.h +123 -13
- data/ext/pg_query/include/access/tupconvert.h +1 -1
- data/ext/pg_query/include/access/tupdesc.h +1 -1
- data/ext/pg_query/include/access/tupmacs.h +3 -3
- data/ext/pg_query/include/access/twophase.h +3 -1
- data/ext/pg_query/include/access/xact.h +73 -19
- data/ext/pg_query/include/access/xlog.h +60 -155
- data/ext/pg_query/include/access/xlog_internal.h +40 -13
- data/ext/pg_query/include/access/xlogdefs.h +8 -16
- data/ext/pg_query/include/access/xlogprefetcher.h +55 -0
- data/ext/pg_query/include/access/xlogreader.h +145 -39
- data/ext/pg_query/include/access/xlogrecord.h +18 -9
- data/ext/pg_query/include/access/xlogrecovery.h +157 -0
- data/ext/pg_query/include/c.h +101 -44
- data/ext/pg_query/include/catalog/catalog.h +3 -1
- data/ext/pg_query/include/catalog/catversion.h +2 -2
- data/ext/pg_query/include/catalog/dependency.h +8 -16
- data/ext/pg_query/include/catalog/genbki.h +83 -5
- data/ext/pg_query/include/catalog/index.h +18 -3
- data/ext/pg_query/include/catalog/indexing.h +12 -324
- data/ext/pg_query/include/catalog/namespace.h +4 -2
- data/ext/pg_query/include/catalog/objectaccess.h +70 -2
- data/ext/pg_query/include/catalog/objectaddress.h +11 -6
- data/ext/pg_query/include/catalog/pg_aggregate.h +14 -10
- data/ext/pg_query/include/catalog/pg_aggregate_d.h +2 -1
- data/ext/pg_query/include/catalog/pg_am.h +4 -1
- data/ext/pg_query/include/catalog/pg_am_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_attribute.h +27 -10
- data/ext/pg_query/include/catalog/pg_attribute_d.h +21 -18
- data/ext/pg_query/include/catalog/pg_authid.h +7 -2
- data/ext/pg_query/include/catalog/pg_authid_d.h +17 -9
- data/ext/pg_query/include/catalog/pg_class.h +44 -14
- data/ext/pg_query/include/catalog/pg_class_d.h +30 -1
- data/ext/pg_query/include/catalog/pg_collation.h +33 -8
- data/ext/pg_query/include/catalog/pg_collation_d.h +20 -3
- data/ext/pg_query/include/catalog/pg_constraint.h +38 -12
- data/ext/pg_query/include/catalog/pg_constraint_d.h +10 -4
- data/ext/pg_query/include/catalog/pg_control.h +3 -5
- data/ext/pg_query/include/catalog/pg_conversion.h +7 -4
- data/ext/pg_query/include/catalog/pg_conversion_d.h +4 -1
- data/ext/pg_query/include/catalog/pg_depend.h +11 -7
- data/ext/pg_query/include/catalog/pg_depend_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_event_trigger.h +9 -3
- data/ext/pg_query/include/catalog/pg_event_trigger_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_index.h +17 -7
- data/ext/pg_query/include/catalog/pg_index_d.h +20 -17
- data/ext/pg_query/include/catalog/pg_language.h +10 -5
- data/ext/pg_query/include/catalog/pg_language_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_namespace.h +7 -2
- data/ext/pg_query/include/catalog/pg_namespace_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_opclass.h +8 -5
- data/ext/pg_query/include/catalog/pg_opclass_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_operator.h +18 -15
- data/ext/pg_query/include/catalog/pg_operator_d.h +37 -1
- data/ext/pg_query/include/catalog/pg_opfamily.h +6 -3
- data/ext/pg_query/include/catalog/pg_opfamily_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_parameter_acl.h +60 -0
- data/ext/pg_query/include/catalog/pg_parameter_acl_d.h +34 -0
- data/ext/pg_query/include/catalog/pg_partitioned_table.h +20 -9
- data/ext/pg_query/include/catalog/pg_partitioned_table_d.h +2 -1
- data/ext/pg_query/include/catalog/pg_proc.h +20 -11
- data/ext/pg_query/include/catalog/pg_proc_d.h +10 -8
- data/ext/pg_query/include/catalog/pg_publication.h +50 -7
- data/ext/pg_query/include/catalog/pg_publication_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_replication_origin.h +6 -1
- data/ext/pg_query/include/catalog/pg_replication_origin_d.h +5 -1
- data/ext/pg_query/include/catalog/pg_statistic.h +19 -12
- data/ext/pg_query/include/catalog/pg_statistic_d.h +2 -1
- data/ext/pg_query/include/catalog/pg_statistic_ext.h +19 -5
- data/ext/pg_query/include/catalog/pg_statistic_ext_d.h +7 -2
- data/ext/pg_query/include/catalog/pg_transform.h +8 -5
- data/ext/pg_query/include/catalog/pg_transform_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_trigger.h +24 -8
- data/ext/pg_query/include/catalog/pg_trigger_d.h +4 -1
- data/ext/pg_query/include/catalog/pg_ts_config.h +6 -3
- data/ext/pg_query/include/catalog/pg_ts_config_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_ts_dict.h +8 -3
- data/ext/pg_query/include/catalog/pg_ts_dict_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_ts_parser.h +6 -3
- data/ext/pg_query/include/catalog/pg_ts_parser_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_ts_template.h +6 -3
- data/ext/pg_query/include/catalog/pg_ts_template_d.h +3 -1
- data/ext/pg_query/include/catalog/pg_type.h +55 -24
- data/ext/pg_query/include/catalog/pg_type_d.h +70 -31
- data/ext/pg_query/include/catalog/storage.h +5 -3
- data/ext/pg_query/include/commands/async.h +3 -4
- data/ext/pg_query/include/commands/dbcommands.h +2 -1
- data/ext/pg_query/include/commands/defrem.h +11 -24
- data/ext/pg_query/include/commands/event_trigger.h +2 -2
- data/ext/pg_query/include/commands/explain.h +1 -1
- data/ext/pg_query/include/commands/prepare.h +1 -1
- data/ext/pg_query/include/commands/tablespace.h +2 -2
- data/ext/pg_query/include/commands/trigger.h +18 -16
- data/ext/pg_query/include/commands/user.h +2 -2
- data/ext/pg_query/include/commands/vacuum.h +88 -41
- data/ext/pg_query/include/commands/variable.h +1 -1
- data/ext/pg_query/include/common/file_perm.h +4 -4
- data/ext/pg_query/include/common/hashfn.h +1 -1
- data/ext/pg_query/include/common/ip.h +1 -7
- data/ext/pg_query/include/common/keywords.h +2 -6
- data/ext/pg_query/include/common/kwlookup.h +1 -1
- data/ext/pg_query/include/common/pg_prng.h +60 -0
- data/ext/pg_query/include/common/relpath.h +2 -2
- data/ext/pg_query/include/common/string.h +24 -1
- data/ext/pg_query/include/common/unicode_combining_table.h +114 -2
- data/ext/pg_query/include/common/unicode_east_asian_fw_table.h +125 -0
- data/ext/pg_query/include/datatype/timestamp.h +40 -1
- data/ext/pg_query/include/executor/execdesc.h +1 -1
- data/ext/pg_query/include/executor/executor.h +65 -22
- data/ext/pg_query/include/executor/functions.h +17 -3
- data/ext/pg_query/include/executor/instrument.h +33 -16
- data/ext/pg_query/include/executor/spi.h +41 -3
- data/ext/pg_query/include/executor/tablefunc.h +1 -1
- data/ext/pg_query/include/executor/tuptable.h +1 -1
- data/ext/pg_query/include/fmgr.h +13 -7
- data/ext/pg_query/include/funcapi.h +16 -4
- data/ext/pg_query/include/getaddrinfo.h +1 -1
- data/ext/pg_query/include/jit/jit.h +11 -11
- data/ext/pg_query/include/kwlist_d.h +517 -494
- data/ext/pg_query/include/lib/dshash.h +112 -0
- data/ext/pg_query/include/lib/ilist.h +20 -1
- data/ext/pg_query/include/lib/pairingheap.h +1 -1
- data/ext/pg_query/include/lib/simplehash.h +140 -15
- data/ext/pg_query/include/lib/sort_template.h +432 -0
- data/ext/pg_query/include/lib/stringinfo.h +1 -1
- data/ext/pg_query/include/libpq/auth.h +6 -4
- data/ext/pg_query/include/libpq/crypt.h +5 -4
- data/ext/pg_query/include/libpq/hba.h +43 -4
- data/ext/pg_query/include/libpq/libpq-be.h +23 -6
- data/ext/pg_query/include/libpq/libpq.h +30 -20
- data/ext/pg_query/include/libpq/pqcomm.h +17 -31
- data/ext/pg_query/include/libpq/pqformat.h +1 -1
- data/ext/pg_query/include/libpq/pqsignal.h +4 -4
- data/ext/pg_query/include/mb/pg_wchar.h +105 -23
- data/ext/pg_query/include/mb/stringinfo_mb.h +1 -1
- data/ext/pg_query/include/miscadmin.h +47 -41
- data/ext/pg_query/include/nodes/bitmapset.h +1 -1
- data/ext/pg_query/include/nodes/execnodes.h +270 -78
- data/ext/pg_query/include/nodes/extensible.h +4 -2
- data/ext/pg_query/include/nodes/lockoptions.h +1 -1
- data/ext/pg_query/include/nodes/makefuncs.h +7 -6
- data/ext/pg_query/include/nodes/memnodes.h +5 -3
- data/ext/pg_query/include/nodes/nodeFuncs.h +1 -1
- data/ext/pg_query/include/nodes/nodes.h +30 -11
- data/ext/pg_query/include/nodes/params.h +1 -1
- data/ext/pg_query/include/nodes/parsenodes.h +322 -90
- data/ext/pg_query/include/nodes/pathnodes.h +243 -66
- data/ext/pg_query/include/nodes/pg_list.h +75 -69
- data/ext/pg_query/include/nodes/plannodes.h +111 -28
- data/ext/pg_query/include/nodes/primnodes.h +99 -47
- data/ext/pg_query/include/nodes/print.h +1 -1
- data/ext/pg_query/include/nodes/tidbitmap.h +1 -1
- data/ext/pg_query/include/nodes/value.h +58 -39
- data/ext/pg_query/include/optimizer/cost.h +9 -2
- data/ext/pg_query/include/optimizer/geqo.h +9 -7
- data/ext/pg_query/include/optimizer/geqo_gene.h +1 -1
- data/ext/pg_query/include/optimizer/optimizer.h +25 -17
- data/ext/pg_query/include/optimizer/paths.h +6 -6
- data/ext/pg_query/include/optimizer/planmain.h +15 -14
- data/ext/pg_query/include/parser/analyze.h +19 -5
- data/ext/pg_query/include/parser/gram.h +947 -913
- data/ext/pg_query/include/parser/gramparse.h +1 -1
- data/ext/pg_query/include/parser/kwlist.h +463 -453
- data/ext/pg_query/include/parser/parse_agg.h +2 -7
- data/ext/pg_query/include/parser/parse_coerce.h +3 -1
- data/ext/pg_query/include/parser/parse_expr.h +2 -3
- data/ext/pg_query/include/parser/parse_func.h +2 -1
- data/ext/pg_query/include/parser/parse_node.h +21 -9
- data/ext/pg_query/include/parser/parse_oper.h +1 -3
- data/ext/pg_query/include/parser/parse_relation.h +5 -4
- data/ext/pg_query/include/parser/parse_type.h +1 -1
- data/ext/pg_query/include/parser/parser.h +31 -4
- data/ext/pg_query/include/parser/parsetree.h +1 -1
- data/ext/pg_query/include/parser/scanner.h +1 -1
- data/ext/pg_query/include/parser/scansup.h +2 -5
- data/ext/pg_query/include/partitioning/partdefs.h +1 -1
- data/ext/pg_query/include/pg_config.h +83 -41
- data/ext/pg_query/include/pg_config_manual.h +74 -21
- data/ext/pg_query/include/pg_getopt.h +6 -6
- data/ext/pg_query/include/pg_query.h +5 -4
- data/ext/pg_query/include/pg_query_enum_defs.c +358 -241
- data/ext/pg_query/include/pg_query_fingerprint_conds.c +44 -7
- data/ext/pg_query/include/pg_query_fingerprint_defs.c +939 -113
- data/ext/pg_query/include/pg_query_outfuncs_conds.c +43 -13
- data/ext/pg_query/include/pg_query_outfuncs_defs.c +151 -26
- data/ext/pg_query/include/pg_query_readfuncs_conds.c +11 -2
- data/ext/pg_query/include/pg_query_readfuncs_defs.c +173 -30
- data/ext/pg_query/include/pg_trace.h +1 -1
- data/ext/pg_query/include/pgstat.h +449 -1238
- data/ext/pg_query/include/pgtime.h +14 -4
- data/ext/pg_query/include/pl_gram.h +126 -128
- data/ext/pg_query/include/pl_reserved_kwlist.h +1 -1
- data/ext/pg_query/include/pl_reserved_kwlist_d.h +10 -10
- data/ext/pg_query/include/pl_unreserved_kwlist.h +2 -3
- data/ext/pg_query/include/pl_unreserved_kwlist_d.h +54 -56
- data/ext/pg_query/include/plerrcodes.h +9 -1
- data/ext/pg_query/include/plpgsql.h +52 -54
- data/ext/pg_query/include/port/atomics/arch-arm.h +7 -1
- data/ext/pg_query/include/port/atomics/arch-ppc.h +1 -1
- data/ext/pg_query/include/port/atomics/arch-x86.h +1 -1
- data/ext/pg_query/include/port/atomics/fallback.h +1 -1
- data/ext/pg_query/include/port/atomics/generic-gcc.h +3 -3
- data/ext/pg_query/include/port/atomics/generic.h +1 -1
- data/ext/pg_query/include/port/atomics.h +1 -1
- data/ext/pg_query/include/port/pg_bitutils.h +40 -10
- data/ext/pg_query/include/port/pg_bswap.h +1 -1
- data/ext/pg_query/include/port/pg_crc32c.h +1 -1
- data/ext/pg_query/include/port.h +71 -46
- data/ext/pg_query/include/portability/instr_time.h +1 -1
- data/ext/pg_query/include/postgres.h +60 -16
- data/ext/pg_query/include/postmaster/autovacuum.h +17 -17
- data/ext/pg_query/include/postmaster/auxprocess.h +20 -0
- data/ext/pg_query/include/postmaster/bgworker.h +2 -1
- data/ext/pg_query/include/postmaster/bgworker_internals.h +2 -2
- data/ext/pg_query/include/postmaster/bgwriter.h +5 -5
- data/ext/pg_query/include/postmaster/fork_process.h +1 -1
- data/ext/pg_query/include/postmaster/interrupt.h +1 -1
- data/ext/pg_query/include/postmaster/pgarch.h +42 -8
- data/ext/pg_query/include/postmaster/postmaster.h +18 -17
- data/ext/pg_query/include/postmaster/startup.h +39 -0
- data/ext/pg_query/include/postmaster/syslogger.h +15 -10
- data/ext/pg_query/include/postmaster/walwriter.h +3 -3
- data/ext/pg_query/include/protobuf/pg_query.pb-c.h +1419 -914
- data/ext/pg_query/include/protobuf/pg_query.pb.h +43678 -32769
- data/ext/pg_query/include/regex/regex.h +18 -16
- data/ext/pg_query/include/replication/logicallauncher.h +3 -5
- data/ext/pg_query/include/replication/logicalproto.h +161 -17
- data/ext/pg_query/include/replication/logicalworker.h +1 -1
- data/ext/pg_query/include/replication/origin.h +7 -7
- data/ext/pg_query/include/replication/reorderbuffer.h +259 -42
- data/ext/pg_query/include/replication/slot.h +22 -11
- data/ext/pg_query/include/replication/syncrep.h +5 -5
- data/ext/pg_query/include/replication/walreceiver.h +145 -13
- data/ext/pg_query/include/replication/walsender.h +8 -8
- data/ext/pg_query/include/rewrite/prs2lock.h +1 -1
- data/ext/pg_query/include/rewrite/rewriteHandler.h +1 -3
- data/ext/pg_query/include/rewrite/rewriteManip.h +1 -1
- data/ext/pg_query/include/rewrite/rewriteSupport.h +1 -1
- data/ext/pg_query/include/storage/backendid.h +3 -3
- data/ext/pg_query/include/storage/block.h +4 -10
- data/ext/pg_query/include/storage/buf.h +1 -1
- data/ext/pg_query/include/storage/bufmgr.h +19 -14
- data/ext/pg_query/include/storage/bufpage.h +6 -8
- data/ext/pg_query/include/storage/condition_variable.h +13 -2
- data/ext/pg_query/include/storage/dsm.h +4 -1
- data/ext/pg_query/include/storage/dsm_impl.h +3 -2
- data/ext/pg_query/include/storage/fd.h +33 -3
- data/ext/pg_query/include/storage/fileset.h +40 -0
- data/ext/pg_query/include/storage/ipc.h +4 -1
- data/ext/pg_query/include/storage/item.h +1 -1
- data/ext/pg_query/include/storage/itemid.h +1 -1
- data/ext/pg_query/include/storage/itemptr.h +3 -1
- data/ext/pg_query/include/storage/large_object.h +2 -2
- data/ext/pg_query/include/storage/latch.h +9 -13
- data/ext/pg_query/include/storage/lmgr.h +2 -1
- data/ext/pg_query/include/storage/lock.h +11 -8
- data/ext/pg_query/include/storage/lockdefs.h +2 -2
- data/ext/pg_query/include/storage/lwlock.h +5 -32
- data/ext/pg_query/include/storage/lwlocknames.h +0 -1
- data/ext/pg_query/include/storage/off.h +1 -1
- data/ext/pg_query/include/storage/pg_sema.h +1 -1
- data/ext/pg_query/include/storage/pg_shmem.h +9 -7
- data/ext/pg_query/include/storage/pmsignal.h +15 -4
- data/ext/pg_query/include/storage/predicate.h +4 -4
- data/ext/pg_query/include/storage/proc.h +173 -59
- data/ext/pg_query/include/storage/procarray.h +98 -0
- data/ext/pg_query/include/storage/proclist_types.h +1 -1
- data/ext/pg_query/include/storage/procsignal.h +3 -7
- data/ext/pg_query/include/storage/relfilenode.h +1 -1
- data/ext/pg_query/include/storage/s_lock.h +60 -21
- data/ext/pg_query/include/storage/sharedfileset.h +3 -11
- data/ext/pg_query/include/storage/shm_mq.h +5 -4
- data/ext/pg_query/include/storage/shm_toc.h +1 -1
- data/ext/pg_query/include/storage/shmem.h +1 -1
- data/ext/pg_query/include/storage/sinval.h +3 -3
- data/ext/pg_query/include/storage/sinvaladt.h +1 -1
- data/ext/pg_query/include/storage/smgr.h +10 -8
- data/ext/pg_query/include/storage/spin.h +2 -2
- data/ext/pg_query/include/storage/standby.h +13 -6
- data/ext/pg_query/include/storage/standbydefs.h +2 -2
- data/ext/pg_query/include/storage/sync.h +7 -3
- data/ext/pg_query/include/tcop/cmdtag.h +1 -1
- data/ext/pg_query/include/tcop/cmdtaglist.h +3 -2
- data/ext/pg_query/include/tcop/deparse_utility.h +1 -1
- data/ext/pg_query/include/tcop/dest.h +1 -1
- data/ext/pg_query/include/tcop/fastpath.h +1 -2
- data/ext/pg_query/include/tcop/pquery.h +1 -1
- data/ext/pg_query/include/tcop/tcopprot.h +19 -11
- data/ext/pg_query/include/tcop/utility.h +7 -3
- data/ext/pg_query/include/tsearch/ts_cache.h +2 -2
- data/ext/pg_query/include/utils/acl.h +24 -3
- data/ext/pg_query/include/utils/aclchk_internal.h +1 -1
- data/ext/pg_query/include/utils/array.h +7 -2
- data/ext/pg_query/include/utils/backend_progress.h +44 -0
- data/ext/pg_query/include/utils/backend_status.h +321 -0
- data/ext/pg_query/include/utils/builtins.h +10 -11
- data/ext/pg_query/include/utils/bytea.h +3 -2
- data/ext/pg_query/include/utils/catcache.h +1 -1
- data/ext/pg_query/include/utils/date.h +1 -1
- data/ext/pg_query/include/utils/datetime.h +8 -7
- data/ext/pg_query/include/utils/datum.h +9 -1
- data/ext/pg_query/include/utils/dsa.h +1 -1
- data/ext/pg_query/include/utils/dynahash.h +4 -3
- data/ext/pg_query/include/utils/elog.h +52 -21
- data/ext/pg_query/include/utils/errcodes.h +2 -0
- data/ext/pg_query/include/utils/expandeddatum.h +1 -1
- data/ext/pg_query/include/utils/expandedrecord.h +1 -1
- data/ext/pg_query/include/utils/float.h +7 -7
- data/ext/pg_query/include/utils/fmgroids.h +1300 -696
- data/ext/pg_query/include/utils/fmgrprotos.h +199 -16
- data/ext/pg_query/include/utils/fmgrtab.h +6 -5
- data/ext/pg_query/include/utils/guc.h +69 -43
- data/ext/pg_query/include/utils/guc_tables.h +23 -19
- data/ext/pg_query/include/utils/hsearch.h +15 -11
- data/ext/pg_query/include/utils/inval.h +4 -1
- data/ext/pg_query/include/utils/lsyscache.h +11 -1
- data/ext/pg_query/include/utils/memdebug.h +1 -1
- data/ext/pg_query/include/utils/memutils.h +8 -3
- data/ext/pg_query/include/utils/numeric.h +19 -5
- data/ext/pg_query/include/utils/palloc.h +25 -3
- data/ext/pg_query/include/utils/partcache.h +1 -1
- data/ext/pg_query/include/utils/pg_locale.h +17 -9
- data/ext/pg_query/include/utils/pg_lsn.h +1 -1
- data/ext/pg_query/include/utils/pgstat_internal.h +784 -0
- data/ext/pg_query/include/utils/pidfile.h +1 -1
- data/ext/pg_query/include/utils/plancache.h +6 -5
- data/ext/pg_query/include/utils/portal.h +10 -12
- data/ext/pg_query/include/utils/ps_status.h +1 -1
- data/ext/pg_query/include/utils/queryenvironment.h +1 -1
- data/ext/pg_query/include/utils/queryjumble.h +88 -0
- data/ext/pg_query/include/utils/regproc.h +14 -3
- data/ext/pg_query/include/utils/rel.h +71 -19
- data/ext/pg_query/include/utils/relcache.h +8 -5
- data/ext/pg_query/include/utils/reltrigger.h +1 -1
- data/ext/pg_query/include/utils/resowner.h +1 -1
- data/ext/pg_query/include/utils/rls.h +2 -2
- data/ext/pg_query/include/utils/ruleutils.h +4 -1
- data/ext/pg_query/include/utils/sharedtuplestore.h +1 -1
- data/ext/pg_query/include/utils/snapmgr.h +34 -14
- data/ext/pg_query/include/utils/snapshot.h +14 -1
- data/ext/pg_query/include/utils/sortsupport.h +117 -2
- data/ext/pg_query/include/utils/syscache.h +6 -1
- data/ext/pg_query/include/utils/timeout.h +11 -4
- data/ext/pg_query/include/utils/timestamp.h +6 -5
- data/ext/pg_query/include/utils/tuplesort.h +25 -11
- data/ext/pg_query/include/utils/tuplestore.h +2 -2
- data/ext/pg_query/include/utils/typcache.h +24 -17
- data/ext/pg_query/include/utils/tzparser.h +1 -1
- data/ext/pg_query/include/utils/varlena.h +5 -3
- data/ext/pg_query/include/utils/wait_event.h +289 -0
- data/ext/pg_query/include/utils/xml.h +4 -4
- data/ext/pg_query/pg_query.pb-c.c +4302 -2304
- data/ext/pg_query/pg_query_deparse.c +1106 -373
- data/ext/pg_query/pg_query_fingerprint.c +30 -10
- data/ext/pg_query/pg_query_json_plpgsql.c +0 -25
- data/ext/pg_query/pg_query_normalize.c +1 -1
- data/ext/pg_query/pg_query_outfuncs_json.c +54 -16
- data/ext/pg_query/pg_query_outfuncs_protobuf.c +70 -10
- data/ext/pg_query/pg_query_parse.c +1 -1
- data/ext/pg_query/pg_query_readfuncs_protobuf.c +42 -8
- data/ext/pg_query/pg_query_scan.c +2 -1
- data/ext/pg_query/pg_query_split.c +3 -2
- data/ext/pg_query/src_backend_catalog_namespace.c +20 -9
- data/ext/pg_query/src_backend_catalog_pg_proc.c +4 -1
- data/ext/pg_query/src_backend_commands_define.c +11 -1
- data/ext/pg_query/src_backend_nodes_bitmapset.c +3 -1
- data/ext/pg_query/src_backend_nodes_copyfuncs.c +401 -76
- data/ext/pg_query/src_backend_nodes_equalfuncs.c +290 -46
- data/ext/pg_query/src_backend_nodes_extensible.c +1 -1
- data/ext/pg_query/src_backend_nodes_list.c +74 -11
- data/ext/pg_query/src_backend_nodes_makefuncs.c +5 -4
- data/ext/pg_query/src_backend_nodes_nodeFuncs.c +55 -12
- data/ext/pg_query/src_backend_nodes_value.c +28 -19
- data/ext/pg_query/src_backend_parser_gram.c +33874 -31261
- data/ext/pg_query/src_backend_parser_parser.c +26 -7
- data/ext/pg_query/src_backend_parser_scan.c +172 -209
- data/ext/pg_query/src_backend_parser_scansup.c +4 -28
- data/ext/pg_query/src_backend_postmaster_postmaster.c +77 -106
- data/ext/pg_query/src_backend_storage_ipc_ipc.c +13 -4
- data/ext/pg_query/src_backend_storage_lmgr_s_lock.c +5 -4
- data/ext/pg_query/src_backend_tcop_postgres.c +62 -23
- data/ext/pg_query/src_backend_utils_activity_pgstat_database.c +140 -0
- data/ext/pg_query/src_backend_utils_adt_datum.c +13 -1
- data/ext/pg_query/src_backend_utils_adt_expandeddatum.c +1 -1
- data/ext/pg_query/src_backend_utils_adt_format_type.c +6 -2
- data/ext/pg_query/src_backend_utils_adt_ruleutils.c +71 -5
- data/ext/pg_query/src_backend_utils_error_assert.c +16 -14
- data/ext/pg_query/src_backend_utils_error_elog.c +172 -99
- data/ext/pg_query/src_backend_utils_fmgr_fmgr.c +12 -17
- data/ext/pg_query/src_backend_utils_hash_dynahash.c +40 -10
- data/ext/pg_query/src_backend_utils_init_globals.c +5 -5
- data/ext/pg_query/src_backend_utils_mb_mbutils.c +55 -66
- data/ext/pg_query/src_backend_utils_misc_guc.c +206 -45
- data/ext/pg_query/src_backend_utils_mmgr_aset.c +7 -5
- data/ext/pg_query/src_backend_utils_mmgr_mcxt.c +123 -35
- data/ext/pg_query/src_common_encnames.c +1 -1
- data/ext/pg_query/src_common_hashfn.c +3 -3
- data/ext/pg_query/src_common_keywords.c +15 -2
- data/ext/pg_query/src_common_kwlist_d.h +517 -494
- data/ext/pg_query/src_common_kwlookup.c +1 -1
- data/ext/pg_query/src_common_pg_prng.c +152 -0
- data/ext/pg_query/src_common_psprintf.c +1 -1
- data/ext/pg_query/src_common_string.c +7 -1
- data/ext/pg_query/src_common_stringinfo.c +1 -1
- data/ext/pg_query/src_common_wchar.c +701 -109
- data/ext/pg_query/src_pl_plpgsql_src_pl_comp.c +45 -20
- data/ext/pg_query/src_pl_plpgsql_src_pl_funcs.c +1 -18
- data/ext/pg_query/src_pl_plpgsql_src_pl_gram.c +1233 -1259
- data/ext/pg_query/src_pl_plpgsql_src_pl_handler.c +1 -1
- data/ext/pg_query/src_pl_plpgsql_src_pl_reserved_kwlist_d.h +10 -10
- data/ext/pg_query/src_pl_plpgsql_src_pl_scanner.c +2 -2
- data/ext/pg_query/src_pl_plpgsql_src_pl_unreserved_kwlist_d.h +54 -56
- data/ext/pg_query/src_port_pg_bitutils.c +41 -31
- data/ext/pg_query/src_port_pgsleep.c +1 -1
- data/ext/pg_query/src_port_pgstrcasecmp.c +1 -1
- data/ext/pg_query/src_port_qsort.c +12 -224
- data/ext/pg_query/src_port_snprintf.c +37 -13
- data/ext/pg_query/src_port_strerror.c +9 -19
- data/ext/pg_query/src_port_strnlen.c +1 -1
- data/lib/pg_query/filter_columns.rb +1 -1
- data/lib/pg_query/fingerprint.rb +5 -1
- data/lib/pg_query/node.rb +2 -2
- data/lib/pg_query/param_refs.rb +1 -1
- data/lib/pg_query/parse.rb +20 -8
- data/lib/pg_query/pg_query_pb.rb +1108 -942
- data/lib/pg_query/treewalker.rb +6 -0
- data/lib/pg_query/truncate.rb +1 -1
- data/lib/pg_query/version.rb +1 -1
- metadata +27 -17
- data/ext/pg_query/include/access/xloginsert.h +0 -64
- data/ext/pg_query/include/bootstrap/bootstrap.h +0 -62
- data/ext/pg_query/include/parser/parse_clause.h +0 -54
- data/ext/pg_query/include/parser/parse_collate.h +0 -27
- data/ext/pg_query/include/parser/parse_target.h +0 -46
- data/ext/pg_query/pg_query_ruby_freebsd.sym +0 -2
- data/ext/pg_query/src_backend_libpq_pqcomm.c +0 -659
- data/ext/pg_query/src_backend_parser_parse_expr.c +0 -313
- data/ext/pg_query/src_port_erand48.c +0 -127
- data/ext/pg_query/src_port_random.c +0 -31
@@ -8,18 +8,21 @@
|
|
8
8
|
* - pg_wchar2single_with_len
|
9
9
|
* - pg_ascii_mblen
|
10
10
|
* - pg_ascii_dsplen
|
11
|
-
* -
|
11
|
+
* - pg_ascii_verifychar
|
12
|
+
* - pg_ascii_verifystr
|
12
13
|
* - pg_eucjp2wchar_with_len
|
13
14
|
* - pg_euc2wchar_with_len
|
14
15
|
* - pg_wchar2euc_with_len
|
15
16
|
* - pg_eucjp_mblen
|
16
17
|
* - pg_euc_mblen
|
17
18
|
* - pg_eucjp_dsplen
|
18
|
-
* -
|
19
|
+
* - pg_eucjp_verifychar
|
20
|
+
* - pg_eucjp_verifystr
|
19
21
|
* - pg_euccn2wchar_with_len
|
20
22
|
* - pg_euccn_mblen
|
21
23
|
* - pg_euccn_dsplen
|
22
|
-
* -
|
24
|
+
* - pg_euckr_verifychar
|
25
|
+
* - pg_euckr_verifystr
|
23
26
|
* - pg_euckr2wchar_with_len
|
24
27
|
* - pg_euckr_mblen
|
25
28
|
* - pg_euckr_dsplen
|
@@ -27,7 +30,8 @@
|
|
27
30
|
* - pg_euctw2wchar_with_len
|
28
31
|
* - pg_euctw_mblen
|
29
32
|
* - pg_euctw_dsplen
|
30
|
-
* -
|
33
|
+
* - pg_euctw_verifychar
|
34
|
+
* - pg_euctw_verifystr
|
31
35
|
* - pg_utf2wchar_with_len
|
32
36
|
* - pg_wchar2utf_with_len
|
33
37
|
* - unicode_to_utf8
|
@@ -35,34 +39,45 @@
|
|
35
39
|
* - utf8_to_unicode
|
36
40
|
* - ucs_wcwidth
|
37
41
|
* - mbbisearch
|
38
|
-
* -
|
42
|
+
* - pg_utf8_verifychar
|
39
43
|
* - pg_utf8_islegal
|
44
|
+
* - pg_utf8_verifystr
|
45
|
+
* - utf8_advance
|
46
|
+
* - Utf8Transition
|
40
47
|
* - pg_mule2wchar_with_len
|
41
48
|
* - pg_wchar2mule_with_len
|
42
49
|
* - pg_mule_dsplen
|
43
|
-
* -
|
50
|
+
* - pg_mule_verifychar
|
51
|
+
* - pg_mule_verifystr
|
44
52
|
* - pg_latin12wchar_with_len
|
45
53
|
* - pg_latin1_mblen
|
46
54
|
* - pg_latin1_dsplen
|
47
|
-
* -
|
55
|
+
* - pg_latin1_verifychar
|
56
|
+
* - pg_latin1_verifystr
|
48
57
|
* - pg_sjis_mblen
|
49
58
|
* - pg_sjis_dsplen
|
50
|
-
* -
|
59
|
+
* - pg_sjis_verifychar
|
60
|
+
* - pg_sjis_verifystr
|
51
61
|
* - pg_big5_mblen
|
52
62
|
* - pg_big5_dsplen
|
53
|
-
* -
|
63
|
+
* - pg_big5_verifychar
|
64
|
+
* - pg_big5_verifystr
|
54
65
|
* - pg_gbk_mblen
|
55
66
|
* - pg_gbk_dsplen
|
56
|
-
* -
|
67
|
+
* - pg_gbk_verifychar
|
68
|
+
* - pg_gbk_verifystr
|
57
69
|
* - pg_uhc_mblen
|
58
70
|
* - pg_uhc_dsplen
|
59
|
-
* -
|
71
|
+
* - pg_uhc_verifychar
|
72
|
+
* - pg_uhc_verifystr
|
60
73
|
* - pg_gb18030_mblen
|
61
74
|
* - pg_gb18030_dsplen
|
62
|
-
* -
|
75
|
+
* - pg_gb18030_verifychar
|
76
|
+
* - pg_gb18030_verifystr
|
63
77
|
* - pg_johab_mblen
|
64
78
|
* - pg_johab_dsplen
|
65
|
-
* -
|
79
|
+
* - pg_johab_verifychar
|
80
|
+
* - pg_johab_verifystr
|
66
81
|
* - pg_encoding_mblen
|
67
82
|
*--------------------------------------------------------------------
|
68
83
|
*/
|
@@ -72,7 +87,7 @@
|
|
72
87
|
* wchar.c
|
73
88
|
* Functions for working with multibyte characters in various encodings.
|
74
89
|
*
|
75
|
-
* Portions Copyright (c) 1998-
|
90
|
+
* Portions Copyright (c) 1998-2022, PostgreSQL Global Development Group
|
76
91
|
*
|
77
92
|
* IDENTIFICATION
|
78
93
|
* src/common/wchar.c
|
@@ -88,9 +103,9 @@
|
|
88
103
|
* Operations on multi-byte encodings are driven by a table of helper
|
89
104
|
* functions.
|
90
105
|
*
|
91
|
-
* To add an encoding support, define mblen(), dsplen()
|
92
|
-
* the encoding. For server-encodings, also define mb2wchar()
|
93
|
-
* conversion functions.
|
106
|
+
* To add an encoding support, define mblen(), dsplen(), verifychar() and
|
107
|
+
* verifystr() for the encoding. For server-encodings, also define mb2wchar()
|
108
|
+
* and wchar2mb() conversion functions.
|
94
109
|
*
|
95
110
|
* These functions generally assume that their input is validly formed.
|
96
111
|
* The "verifier" functions, further down in the file, have to be more
|
@@ -652,8 +667,8 @@ pg_utf_mblen(const unsigned char *s)
|
|
652
667
|
|
653
668
|
struct mbinterval
|
654
669
|
{
|
655
|
-
unsigned
|
656
|
-
unsigned
|
670
|
+
unsigned int first;
|
671
|
+
unsigned int last;
|
657
672
|
};
|
658
673
|
|
659
674
|
/* auxiliary function for binary search in interval table */
|
@@ -692,12 +707,6 @@ mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
|
|
692
707
|
* category code Mn or Me in the Unicode database) have a
|
693
708
|
* column width of 0.
|
694
709
|
*
|
695
|
-
* - Other format characters (general category code Cf in the Unicode
|
696
|
-
* database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
|
697
|
-
*
|
698
|
-
* - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
|
699
|
-
* have a column width of 0.
|
700
|
-
*
|
701
710
|
* - Spacing characters in the East Asian Wide (W) or East Asian
|
702
711
|
* FullWidth (F) category as defined in Unicode Technical
|
703
712
|
* Report #11 have a column width of 2.
|
@@ -714,6 +723,7 @@ static int
|
|
714
723
|
ucs_wcwidth(pg_wchar ucs)
|
715
724
|
{
|
716
725
|
#include "common/unicode_combining_table.h"
|
726
|
+
#include "common/unicode_east_asian_fw_table.h"
|
717
727
|
|
718
728
|
/* test for 8-bit control characters */
|
719
729
|
if (ucs == 0)
|
@@ -722,27 +732,25 @@ ucs_wcwidth(pg_wchar ucs)
|
|
722
732
|
if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
|
723
733
|
return -1;
|
724
734
|
|
725
|
-
/*
|
735
|
+
/*
|
736
|
+
* binary search in table of non-spacing characters
|
737
|
+
*
|
738
|
+
* XXX: In the official Unicode sources, it is possible for a character to
|
739
|
+
* be described as both non-spacing and wide at the same time. As of
|
740
|
+
* Unicode 13.0, treating the non-spacing property as the determining
|
741
|
+
* factor for display width leads to the correct behavior, so do that
|
742
|
+
* search first.
|
743
|
+
*/
|
726
744
|
if (mbbisearch(ucs, combining,
|
727
745
|
sizeof(combining) / sizeof(struct mbinterval) - 1))
|
728
746
|
return 0;
|
729
747
|
|
730
|
-
/*
|
731
|
-
|
732
|
-
|
748
|
+
/* binary search in table of wide characters */
|
749
|
+
if (mbbisearch(ucs, east_asian_fw,
|
750
|
+
sizeof(east_asian_fw) / sizeof(struct mbinterval) - 1))
|
751
|
+
return 2;
|
733
752
|
|
734
|
-
return 1
|
735
|
-
(ucs >= 0x1100 &&
|
736
|
-
(ucs <= 0x115f || /* Hangul Jamo init. consonants */
|
737
|
-
(ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
|
738
|
-
ucs != 0x303f) || /* CJK ... Yi */
|
739
|
-
(ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
|
740
|
-
(ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility
|
741
|
-
* Ideographs */
|
742
|
-
(ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
|
743
|
-
(ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
|
744
|
-
(ucs >= 0xffe0 && ucs <= 0xffe6) ||
|
745
|
-
(ucs >= 0x20000 && ucs <= 0x2ffff)));
|
753
|
+
return 1;
|
746
754
|
}
|
747
755
|
|
748
756
|
/*
|
@@ -1156,29 +1164,45 @@ pg_gb18030_dsplen(const unsigned char *s)
|
|
1156
1164
|
*-------------------------------------------------------------------
|
1157
1165
|
* multibyte sequence validators
|
1158
1166
|
*
|
1159
|
-
*
|
1160
|
-
* and "len", the remaining length of the string. If there is a
|
1161
|
-
* encoded character beginning at *s, return its length in bytes;
|
1162
|
-
* return -1.
|
1167
|
+
* The verifychar functions accept "s", a pointer to the first byte of a
|
1168
|
+
* string, and "len", the remaining length of the string. If there is a
|
1169
|
+
* validly encoded character beginning at *s, return its length in bytes;
|
1170
|
+
* else return -1.
|
1163
1171
|
*
|
1164
|
-
* The functions
|
1165
|
-
*
|
1172
|
+
* The verifystr functions also accept "s", a pointer to a string and "len",
|
1173
|
+
* the length of the string. They verify the whole string, and return the
|
1174
|
+
* number of input bytes (<= len) that are valid. In other words, if the
|
1175
|
+
* whole string is valid, verifystr returns "len", otherwise it returns the
|
1176
|
+
* byte offset of the first invalid character. The verifystr functions must
|
1177
|
+
* test for and reject zeroes in the input.
|
1166
1178
|
*
|
1167
|
-
*
|
1168
|
-
*
|
1179
|
+
* The verifychar functions can assume that len > 0 and that *s != '\0', but
|
1180
|
+
* they must test for and reject zeroes in any additional bytes of a
|
1181
|
+
* multibyte character. Note that this definition allows the function for a
|
1182
|
+
* single-byte encoding to be just "return 1".
|
1169
1183
|
*-------------------------------------------------------------------
|
1170
1184
|
*/
|
1171
|
-
|
1172
1185
|
static int
|
1173
|
-
|
1186
|
+
pg_ascii_verifychar(const unsigned char *s, int len)
|
1174
1187
|
{
|
1175
1188
|
return 1;
|
1176
1189
|
}
|
1177
1190
|
|
1191
|
+
static int
|
1192
|
+
pg_ascii_verifystr(const unsigned char *s, int len)
|
1193
|
+
{
|
1194
|
+
const unsigned char *nullpos = memchr(s, 0, len);
|
1195
|
+
|
1196
|
+
if (nullpos == NULL)
|
1197
|
+
return len;
|
1198
|
+
else
|
1199
|
+
return nullpos - s;
|
1200
|
+
}
|
1201
|
+
|
1178
1202
|
#define IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe)
|
1179
1203
|
|
1180
1204
|
static int
|
1181
|
-
|
1205
|
+
pg_eucjp_verifychar(const unsigned char *s, int len)
|
1182
1206
|
{
|
1183
1207
|
int l;
|
1184
1208
|
unsigned char c1,
|
@@ -1233,7 +1257,36 @@ pg_eucjp_verifier(const unsigned char *s, int len)
|
|
1233
1257
|
}
|
1234
1258
|
|
1235
1259
|
static int
|
1236
|
-
|
1260
|
+
pg_eucjp_verifystr(const unsigned char *s, int len)
|
1261
|
+
{
|
1262
|
+
const unsigned char *start = s;
|
1263
|
+
|
1264
|
+
while (len > 0)
|
1265
|
+
{
|
1266
|
+
int l;
|
1267
|
+
|
1268
|
+
/* fast path for ASCII-subset characters */
|
1269
|
+
if (!IS_HIGHBIT_SET(*s))
|
1270
|
+
{
|
1271
|
+
if (*s == '\0')
|
1272
|
+
break;
|
1273
|
+
l = 1;
|
1274
|
+
}
|
1275
|
+
else
|
1276
|
+
{
|
1277
|
+
l = pg_eucjp_verifychar(s, len);
|
1278
|
+
if (l == -1)
|
1279
|
+
break;
|
1280
|
+
}
|
1281
|
+
s += l;
|
1282
|
+
len -= l;
|
1283
|
+
}
|
1284
|
+
|
1285
|
+
return s - start;
|
1286
|
+
}
|
1287
|
+
|
1288
|
+
static int
|
1289
|
+
pg_euckr_verifychar(const unsigned char *s, int len)
|
1237
1290
|
{
|
1238
1291
|
int l;
|
1239
1292
|
unsigned char c1,
|
@@ -1261,11 +1314,41 @@ pg_euckr_verifier(const unsigned char *s, int len)
|
|
1261
1314
|
return l;
|
1262
1315
|
}
|
1263
1316
|
|
1317
|
+
static int
|
1318
|
+
pg_euckr_verifystr(const unsigned char *s, int len)
|
1319
|
+
{
|
1320
|
+
const unsigned char *start = s;
|
1321
|
+
|
1322
|
+
while (len > 0)
|
1323
|
+
{
|
1324
|
+
int l;
|
1325
|
+
|
1326
|
+
/* fast path for ASCII-subset characters */
|
1327
|
+
if (!IS_HIGHBIT_SET(*s))
|
1328
|
+
{
|
1329
|
+
if (*s == '\0')
|
1330
|
+
break;
|
1331
|
+
l = 1;
|
1332
|
+
}
|
1333
|
+
else
|
1334
|
+
{
|
1335
|
+
l = pg_euckr_verifychar(s, len);
|
1336
|
+
if (l == -1)
|
1337
|
+
break;
|
1338
|
+
}
|
1339
|
+
s += l;
|
1340
|
+
len -= l;
|
1341
|
+
}
|
1342
|
+
|
1343
|
+
return s - start;
|
1344
|
+
}
|
1345
|
+
|
1264
1346
|
/* EUC-CN byte sequences are exactly same as EUC-KR */
|
1265
|
-
#define
|
1347
|
+
#define pg_euccn_verifychar pg_euckr_verifychar
|
1348
|
+
#define pg_euccn_verifystr pg_euckr_verifystr
|
1266
1349
|
|
1267
1350
|
static int
|
1268
|
-
|
1351
|
+
pg_euctw_verifychar(const unsigned char *s, int len)
|
1269
1352
|
{
|
1270
1353
|
int l;
|
1271
1354
|
unsigned char c1,
|
@@ -1315,7 +1398,36 @@ pg_euctw_verifier(const unsigned char *s, int len)
|
|
1315
1398
|
}
|
1316
1399
|
|
1317
1400
|
static int
|
1318
|
-
|
1401
|
+
pg_euctw_verifystr(const unsigned char *s, int len)
|
1402
|
+
{
|
1403
|
+
const unsigned char *start = s;
|
1404
|
+
|
1405
|
+
while (len > 0)
|
1406
|
+
{
|
1407
|
+
int l;
|
1408
|
+
|
1409
|
+
/* fast path for ASCII-subset characters */
|
1410
|
+
if (!IS_HIGHBIT_SET(*s))
|
1411
|
+
{
|
1412
|
+
if (*s == '\0')
|
1413
|
+
break;
|
1414
|
+
l = 1;
|
1415
|
+
}
|
1416
|
+
else
|
1417
|
+
{
|
1418
|
+
l = pg_euctw_verifychar(s, len);
|
1419
|
+
if (l == -1)
|
1420
|
+
break;
|
1421
|
+
}
|
1422
|
+
s += l;
|
1423
|
+
len -= l;
|
1424
|
+
}
|
1425
|
+
|
1426
|
+
return s - start;
|
1427
|
+
}
|
1428
|
+
|
1429
|
+
static int
|
1430
|
+
pg_johab_verifychar(const unsigned char *s, int len)
|
1319
1431
|
{
|
1320
1432
|
int l,
|
1321
1433
|
mbl;
|
@@ -1339,7 +1451,36 @@ pg_johab_verifier(const unsigned char *s, int len)
|
|
1339
1451
|
}
|
1340
1452
|
|
1341
1453
|
static int
|
1342
|
-
|
1454
|
+
pg_johab_verifystr(const unsigned char *s, int len)
|
1455
|
+
{
|
1456
|
+
const unsigned char *start = s;
|
1457
|
+
|
1458
|
+
while (len > 0)
|
1459
|
+
{
|
1460
|
+
int l;
|
1461
|
+
|
1462
|
+
/* fast path for ASCII-subset characters */
|
1463
|
+
if (!IS_HIGHBIT_SET(*s))
|
1464
|
+
{
|
1465
|
+
if (*s == '\0')
|
1466
|
+
break;
|
1467
|
+
l = 1;
|
1468
|
+
}
|
1469
|
+
else
|
1470
|
+
{
|
1471
|
+
l = pg_johab_verifychar(s, len);
|
1472
|
+
if (l == -1)
|
1473
|
+
break;
|
1474
|
+
}
|
1475
|
+
s += l;
|
1476
|
+
len -= l;
|
1477
|
+
}
|
1478
|
+
|
1479
|
+
return s - start;
|
1480
|
+
}
|
1481
|
+
|
1482
|
+
static int
|
1483
|
+
pg_mule_verifychar(const unsigned char *s, int len)
|
1343
1484
|
{
|
1344
1485
|
int l,
|
1345
1486
|
mbl;
|
@@ -1360,13 +1501,53 @@ pg_mule_verifier(const unsigned char *s, int len)
|
|
1360
1501
|
}
|
1361
1502
|
|
1362
1503
|
static int
|
1363
|
-
|
1504
|
+
pg_mule_verifystr(const unsigned char *s, int len)
|
1505
|
+
{
|
1506
|
+
const unsigned char *start = s;
|
1507
|
+
|
1508
|
+
while (len > 0)
|
1509
|
+
{
|
1510
|
+
int l;
|
1511
|
+
|
1512
|
+
/* fast path for ASCII-subset characters */
|
1513
|
+
if (!IS_HIGHBIT_SET(*s))
|
1514
|
+
{
|
1515
|
+
if (*s == '\0')
|
1516
|
+
break;
|
1517
|
+
l = 1;
|
1518
|
+
}
|
1519
|
+
else
|
1520
|
+
{
|
1521
|
+
l = pg_mule_verifychar(s, len);
|
1522
|
+
if (l == -1)
|
1523
|
+
break;
|
1524
|
+
}
|
1525
|
+
s += l;
|
1526
|
+
len -= l;
|
1527
|
+
}
|
1528
|
+
|
1529
|
+
return s - start;
|
1530
|
+
}
|
1531
|
+
|
1532
|
+
static int
|
1533
|
+
pg_latin1_verifychar(const unsigned char *s, int len)
|
1364
1534
|
{
|
1365
1535
|
return 1;
|
1366
1536
|
}
|
1367
1537
|
|
1368
1538
|
static int
|
1369
|
-
|
1539
|
+
pg_latin1_verifystr(const unsigned char *s, int len)
|
1540
|
+
{
|
1541
|
+
const unsigned char *nullpos = memchr(s, 0, len);
|
1542
|
+
|
1543
|
+
if (nullpos == NULL)
|
1544
|
+
return len;
|
1545
|
+
else
|
1546
|
+
return nullpos - s;
|
1547
|
+
}
|
1548
|
+
|
1549
|
+
static int
|
1550
|
+
pg_sjis_verifychar(const unsigned char *s, int len)
|
1370
1551
|
{
|
1371
1552
|
int l,
|
1372
1553
|
mbl;
|
@@ -1389,7 +1570,36 @@ pg_sjis_verifier(const unsigned char *s, int len)
|
|
1389
1570
|
}
|
1390
1571
|
|
1391
1572
|
static int
|
1392
|
-
|
1573
|
+
pg_sjis_verifystr(const unsigned char *s, int len)
|
1574
|
+
{
|
1575
|
+
const unsigned char *start = s;
|
1576
|
+
|
1577
|
+
while (len > 0)
|
1578
|
+
{
|
1579
|
+
int l;
|
1580
|
+
|
1581
|
+
/* fast path for ASCII-subset characters */
|
1582
|
+
if (!IS_HIGHBIT_SET(*s))
|
1583
|
+
{
|
1584
|
+
if (*s == '\0')
|
1585
|
+
break;
|
1586
|
+
l = 1;
|
1587
|
+
}
|
1588
|
+
else
|
1589
|
+
{
|
1590
|
+
l = pg_sjis_verifychar(s, len);
|
1591
|
+
if (l == -1)
|
1592
|
+
break;
|
1593
|
+
}
|
1594
|
+
s += l;
|
1595
|
+
len -= l;
|
1596
|
+
}
|
1597
|
+
|
1598
|
+
return s - start;
|
1599
|
+
}
|
1600
|
+
|
1601
|
+
static int
|
1602
|
+
pg_big5_verifychar(const unsigned char *s, int len)
|
1393
1603
|
{
|
1394
1604
|
int l,
|
1395
1605
|
mbl;
|
@@ -1409,7 +1619,36 @@ pg_big5_verifier(const unsigned char *s, int len)
|
|
1409
1619
|
}
|
1410
1620
|
|
1411
1621
|
static int
|
1412
|
-
|
1622
|
+
pg_big5_verifystr(const unsigned char *s, int len)
|
1623
|
+
{
|
1624
|
+
const unsigned char *start = s;
|
1625
|
+
|
1626
|
+
while (len > 0)
|
1627
|
+
{
|
1628
|
+
int l;
|
1629
|
+
|
1630
|
+
/* fast path for ASCII-subset characters */
|
1631
|
+
if (!IS_HIGHBIT_SET(*s))
|
1632
|
+
{
|
1633
|
+
if (*s == '\0')
|
1634
|
+
break;
|
1635
|
+
l = 1;
|
1636
|
+
}
|
1637
|
+
else
|
1638
|
+
{
|
1639
|
+
l = pg_big5_verifychar(s, len);
|
1640
|
+
if (l == -1)
|
1641
|
+
break;
|
1642
|
+
}
|
1643
|
+
s += l;
|
1644
|
+
len -= l;
|
1645
|
+
}
|
1646
|
+
|
1647
|
+
return s - start;
|
1648
|
+
}
|
1649
|
+
|
1650
|
+
static int
|
1651
|
+
pg_gbk_verifychar(const unsigned char *s, int len)
|
1413
1652
|
{
|
1414
1653
|
int l,
|
1415
1654
|
mbl;
|
@@ -1429,7 +1668,36 @@ pg_gbk_verifier(const unsigned char *s, int len)
|
|
1429
1668
|
}
|
1430
1669
|
|
1431
1670
|
static int
|
1432
|
-
|
1671
|
+
pg_gbk_verifystr(const unsigned char *s, int len)
|
1672
|
+
{
|
1673
|
+
const unsigned char *start = s;
|
1674
|
+
|
1675
|
+
while (len > 0)
|
1676
|
+
{
|
1677
|
+
int l;
|
1678
|
+
|
1679
|
+
/* fast path for ASCII-subset characters */
|
1680
|
+
if (!IS_HIGHBIT_SET(*s))
|
1681
|
+
{
|
1682
|
+
if (*s == '\0')
|
1683
|
+
break;
|
1684
|
+
l = 1;
|
1685
|
+
}
|
1686
|
+
else
|
1687
|
+
{
|
1688
|
+
l = pg_gbk_verifychar(s, len);
|
1689
|
+
if (l == -1)
|
1690
|
+
break;
|
1691
|
+
}
|
1692
|
+
s += l;
|
1693
|
+
len -= l;
|
1694
|
+
}
|
1695
|
+
|
1696
|
+
return s - start;
|
1697
|
+
}
|
1698
|
+
|
1699
|
+
static int
|
1700
|
+
pg_uhc_verifychar(const unsigned char *s, int len)
|
1433
1701
|
{
|
1434
1702
|
int l,
|
1435
1703
|
mbl;
|
@@ -1449,7 +1717,36 @@ pg_uhc_verifier(const unsigned char *s, int len)
|
|
1449
1717
|
}
|
1450
1718
|
|
1451
1719
|
static int
|
1452
|
-
|
1720
|
+
pg_uhc_verifystr(const unsigned char *s, int len)
|
1721
|
+
{
|
1722
|
+
const unsigned char *start = s;
|
1723
|
+
|
1724
|
+
while (len > 0)
|
1725
|
+
{
|
1726
|
+
int l;
|
1727
|
+
|
1728
|
+
/* fast path for ASCII-subset characters */
|
1729
|
+
if (!IS_HIGHBIT_SET(*s))
|
1730
|
+
{
|
1731
|
+
if (*s == '\0')
|
1732
|
+
break;
|
1733
|
+
l = 1;
|
1734
|
+
}
|
1735
|
+
else
|
1736
|
+
{
|
1737
|
+
l = pg_uhc_verifychar(s, len);
|
1738
|
+
if (l == -1)
|
1739
|
+
break;
|
1740
|
+
}
|
1741
|
+
s += l;
|
1742
|
+
len -= l;
|
1743
|
+
}
|
1744
|
+
|
1745
|
+
return s - start;
|
1746
|
+
}
|
1747
|
+
|
1748
|
+
static int
|
1749
|
+
pg_gb18030_verifychar(const unsigned char *s, int len)
|
1453
1750
|
{
|
1454
1751
|
int l;
|
1455
1752
|
|
@@ -1480,11 +1777,55 @@ pg_gb18030_verifier(const unsigned char *s, int len)
|
|
1480
1777
|
}
|
1481
1778
|
|
1482
1779
|
static int
|
1483
|
-
|
1780
|
+
pg_gb18030_verifystr(const unsigned char *s, int len)
|
1484
1781
|
{
|
1485
|
-
|
1782
|
+
const unsigned char *start = s;
|
1486
1783
|
|
1487
|
-
|
1784
|
+
while (len > 0)
|
1785
|
+
{
|
1786
|
+
int l;
|
1787
|
+
|
1788
|
+
/* fast path for ASCII-subset characters */
|
1789
|
+
if (!IS_HIGHBIT_SET(*s))
|
1790
|
+
{
|
1791
|
+
if (*s == '\0')
|
1792
|
+
break;
|
1793
|
+
l = 1;
|
1794
|
+
}
|
1795
|
+
else
|
1796
|
+
{
|
1797
|
+
l = pg_gb18030_verifychar(s, len);
|
1798
|
+
if (l == -1)
|
1799
|
+
break;
|
1800
|
+
}
|
1801
|
+
s += l;
|
1802
|
+
len -= l;
|
1803
|
+
}
|
1804
|
+
|
1805
|
+
return s - start;
|
1806
|
+
}
|
1807
|
+
|
1808
|
+
static int
|
1809
|
+
pg_utf8_verifychar(const unsigned char *s, int len)
|
1810
|
+
{
|
1811
|
+
int l;
|
1812
|
+
|
1813
|
+
if ((*s & 0x80) == 0)
|
1814
|
+
{
|
1815
|
+
if (*s == '\0')
|
1816
|
+
return -1;
|
1817
|
+
return 1;
|
1818
|
+
}
|
1819
|
+
else if ((*s & 0xe0) == 0xc0)
|
1820
|
+
l = 2;
|
1821
|
+
else if ((*s & 0xf0) == 0xe0)
|
1822
|
+
l = 3;
|
1823
|
+
else if ((*s & 0xf8) == 0xf0)
|
1824
|
+
l = 4;
|
1825
|
+
else
|
1826
|
+
l = 1;
|
1827
|
+
|
1828
|
+
if (l > len)
|
1488
1829
|
return -1;
|
1489
1830
|
|
1490
1831
|
if (!pg_utf8_islegal(s, l))
|
@@ -1493,6 +1834,250 @@ pg_utf8_verifier(const unsigned char *s, int len)
|
|
1493
1834
|
return l;
|
1494
1835
|
}
|
1495
1836
|
|
1837
|
+
/*
|
1838
|
+
* The fast path of the UTF-8 verifier uses a deterministic finite automaton
|
1839
|
+
* (DFA) for multibyte characters. In a traditional table-driven DFA, the
|
1840
|
+
* input byte and current state are used to compute an index into an array of
|
1841
|
+
* state transitions. Since the address of the next transition is dependent
|
1842
|
+
* on this computation, there is latency in executing the load instruction,
|
1843
|
+
* and the CPU is not kept busy.
|
1844
|
+
*
|
1845
|
+
* Instead, we use a "shift-based" DFA as described by Per Vognsen:
|
1846
|
+
*
|
1847
|
+
* https://gist.github.com/pervognsen/218ea17743e1442e59bb60d29b1aa725
|
1848
|
+
*
|
1849
|
+
* In a shift-based DFA, the input byte is an index into array of integers
|
1850
|
+
* whose bit pattern encodes the state transitions. To compute the next
|
1851
|
+
* state, we simply right-shift the integer by the current state and apply a
|
1852
|
+
* mask. In this scheme, the address of the transition only depends on the
|
1853
|
+
* input byte, so there is better pipelining.
|
1854
|
+
*
|
1855
|
+
* The naming convention for states and transitions was adopted from a UTF-8
|
1856
|
+
* to UTF-16/32 transcoder, whose table is reproduced below:
|
1857
|
+
*
|
1858
|
+
* https://github.com/BobSteagall/utf_utils/blob/6b7a465265de2f5fa6133d653df0c9bdd73bbcf8/src/utf_utils.cpp
|
1859
|
+
*
|
1860
|
+
* ILL ASC CR1 CR2 CR3 L2A L3A L3B L3C L4A L4B L4C CLASS / STATE
|
1861
|
+
* ==========================================================================
|
1862
|
+
* err, END, err, err, err, CS1, P3A, CS2, P3B, P4A, CS3, P4B, | BGN/END
|
1863
|
+
* err, err, err, err, err, err, err, err, err, err, err, err, | ERR
|
1864
|
+
* |
|
1865
|
+
* err, err, END, END, END, err, err, err, err, err, err, err, | CS1
|
1866
|
+
* err, err, CS1, CS1, CS1, err, err, err, err, err, err, err, | CS2
|
1867
|
+
* err, err, CS2, CS2, CS2, err, err, err, err, err, err, err, | CS3
|
1868
|
+
* |
|
1869
|
+
* err, err, err, err, CS1, err, err, err, err, err, err, err, | P3A
|
1870
|
+
* err, err, CS1, CS1, err, err, err, err, err, err, err, err, | P3B
|
1871
|
+
* |
|
1872
|
+
* err, err, err, CS2, CS2, err, err, err, err, err, err, err, | P4A
|
1873
|
+
* err, err, CS2, err, err, err, err, err, err, err, err, err, | P4B
|
1874
|
+
*
|
1875
|
+
* In the most straightforward implementation, a shift-based DFA for UTF-8
|
1876
|
+
* requires 64-bit integers to encode the transitions, but with an SMT solver
|
1877
|
+
* it's possible to find state numbers such that the transitions fit within
|
1878
|
+
* 32-bit integers, as Dougall Johnson demonstrated:
|
1879
|
+
*
|
1880
|
+
* https://gist.github.com/dougallj/166e326de6ad4cf2c94be97a204c025f
|
1881
|
+
*
|
1882
|
+
* This packed representation is the reason for the seemingly odd choice of
|
1883
|
+
* state values below.
|
1884
|
+
*/
|
1885
|
+
|
1886
|
+
/* Error */
|
1887
|
+
#define ERR 0
|
1888
|
+
/* Begin */
|
1889
|
+
#define BGN 11
|
1890
|
+
/* Continuation states, expect 1/2/3 continuation bytes */
|
1891
|
+
#define CS1 16
|
1892
|
+
#define CS2 1
|
1893
|
+
#define CS3 5
|
1894
|
+
/* Partial states, where the first continuation byte has a restricted range */
|
1895
|
+
#define P3A 6 /* Lead was E0, check for 3-byte overlong */
|
1896
|
+
#define P3B 20 /* Lead was ED, check for surrogate */
|
1897
|
+
#define P4A 25 /* Lead was F0, check for 4-byte overlong */
|
1898
|
+
#define P4B 30 /* Lead was F4, check for too-large */
|
1899
|
+
/* Begin and End are the same state */
|
1900
|
+
#define END BGN
|
1901
|
+
|
1902
|
+
/* the encoded state transitions for the lookup table */
|
1903
|
+
|
1904
|
+
/* ASCII */
|
1905
|
+
#define ASC (END << BGN)
|
1906
|
+
/* 2-byte lead */
|
1907
|
+
#define L2A (CS1 << BGN)
|
1908
|
+
/* 3-byte lead */
|
1909
|
+
#define L3A (P3A << BGN)
|
1910
|
+
#define L3B (CS2 << BGN)
|
1911
|
+
#define L3C (P3B << BGN)
|
1912
|
+
/* 4-byte lead */
|
1913
|
+
#define L4A (P4A << BGN)
|
1914
|
+
#define L4B (CS3 << BGN)
|
1915
|
+
#define L4C (P4B << BGN)
|
1916
|
+
/* continuation byte */
|
1917
|
+
#define CR1 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)
|
1918
|
+
#define CR2 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)
|
1919
|
+
#define CR3 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)
|
1920
|
+
/* invalid byte */
|
1921
|
+
#define ILL ERR
|
1922
|
+
|
1923
|
+
static const uint32 Utf8Transition[256] =
|
1924
|
+
{
|
1925
|
+
/* ASCII */
|
1926
|
+
|
1927
|
+
ILL, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1928
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1929
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1930
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1931
|
+
|
1932
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1933
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1934
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1935
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1936
|
+
|
1937
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1938
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1939
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1940
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1941
|
+
|
1942
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1943
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1944
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1945
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
|
1946
|
+
|
1947
|
+
/* continuation bytes */
|
1948
|
+
|
1949
|
+
/* 80..8F */
|
1950
|
+
CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1,
|
1951
|
+
CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1,
|
1952
|
+
|
1953
|
+
/* 90..9F */
|
1954
|
+
CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2,
|
1955
|
+
CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2,
|
1956
|
+
|
1957
|
+
/* A0..BF */
|
1958
|
+
CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
|
1959
|
+
CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
|
1960
|
+
CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
|
1961
|
+
CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
|
1962
|
+
|
1963
|
+
/* leading bytes */
|
1964
|
+
|
1965
|
+
/* C0..DF */
|
1966
|
+
ILL, ILL, L2A, L2A, L2A, L2A, L2A, L2A,
|
1967
|
+
L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A,
|
1968
|
+
L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A,
|
1969
|
+
L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A,
|
1970
|
+
|
1971
|
+
/* E0..EF */
|
1972
|
+
L3A, L3B, L3B, L3B, L3B, L3B, L3B, L3B,
|
1973
|
+
L3B, L3B, L3B, L3B, L3B, L3C, L3B, L3B,
|
1974
|
+
|
1975
|
+
/* F0..FF */
|
1976
|
+
L4A, L4B, L4B, L4B, L4C, ILL, ILL, ILL,
|
1977
|
+
ILL, ILL, ILL, ILL, ILL, ILL, ILL, ILL
|
1978
|
+
};
|
1979
|
+
|
1980
|
+
static void
|
1981
|
+
utf8_advance(const unsigned char *s, uint32 *state, int len)
|
1982
|
+
{
|
1983
|
+
/* Note: We deliberately don't check the state's value here. */
|
1984
|
+
while (len > 0)
|
1985
|
+
{
|
1986
|
+
/*
|
1987
|
+
* It's important that the mask value is 31: In most instruction sets,
|
1988
|
+
* a shift by a 32-bit operand is understood to be a shift by its mod
|
1989
|
+
* 32, so the compiler should elide the mask operation.
|
1990
|
+
*/
|
1991
|
+
*state = Utf8Transition[*s++] >> (*state & 31);
|
1992
|
+
len--;
|
1993
|
+
}
|
1994
|
+
|
1995
|
+
*state &= 31;
|
1996
|
+
}
|
1997
|
+
|
1998
|
+
static int
|
1999
|
+
pg_utf8_verifystr(const unsigned char *s, int len)
|
2000
|
+
{
|
2001
|
+
const unsigned char *start = s;
|
2002
|
+
const int orig_len = len;
|
2003
|
+
uint32 state = BGN;
|
2004
|
+
|
2005
|
+
/*
|
2006
|
+
* Sixteen seems to give the best balance of performance across different
|
2007
|
+
* byte distributions.
|
2008
|
+
*/
|
2009
|
+
#define STRIDE_LENGTH 16
|
2010
|
+
|
2011
|
+
if (len >= STRIDE_LENGTH)
|
2012
|
+
{
|
2013
|
+
while (len >= STRIDE_LENGTH)
|
2014
|
+
{
|
2015
|
+
/*
|
2016
|
+
* If the chunk is all ASCII, we can skip the full UTF-8 check,
|
2017
|
+
* but we must first check for a non-END state, which means the
|
2018
|
+
* previous chunk ended in the middle of a multibyte sequence.
|
2019
|
+
*/
|
2020
|
+
if (state != END || !is_valid_ascii(s, STRIDE_LENGTH))
|
2021
|
+
utf8_advance(s, &state, STRIDE_LENGTH);
|
2022
|
+
|
2023
|
+
s += STRIDE_LENGTH;
|
2024
|
+
len -= STRIDE_LENGTH;
|
2025
|
+
}
|
2026
|
+
|
2027
|
+
/* The error state persists, so we only need to check for it here. */
|
2028
|
+
if (state == ERR)
|
2029
|
+
{
|
2030
|
+
/*
|
2031
|
+
* Start over from the beginning with the slow path so we can
|
2032
|
+
* count the valid bytes.
|
2033
|
+
*/
|
2034
|
+
len = orig_len;
|
2035
|
+
s = start;
|
2036
|
+
}
|
2037
|
+
else if (state != END)
|
2038
|
+
{
|
2039
|
+
/*
|
2040
|
+
* The fast path exited in the middle of a multibyte sequence.
|
2041
|
+
* Walk backwards to find the leading byte so that the slow path
|
2042
|
+
* can resume checking from there. We must always backtrack at
|
2043
|
+
* least one byte, since the current byte could be e.g. an ASCII
|
2044
|
+
* byte after a 2-byte lead, which is invalid.
|
2045
|
+
*/
|
2046
|
+
do
|
2047
|
+
{
|
2048
|
+
Assert(s > start);
|
2049
|
+
s--;
|
2050
|
+
len++;
|
2051
|
+
Assert(IS_HIGHBIT_SET(*s));
|
2052
|
+
} while (pg_utf_mblen(s) <= 1);
|
2053
|
+
}
|
2054
|
+
}
|
2055
|
+
|
2056
|
+
/* check remaining bytes */
|
2057
|
+
while (len > 0)
|
2058
|
+
{
|
2059
|
+
int l;
|
2060
|
+
|
2061
|
+
/* fast path for ASCII-subset characters */
|
2062
|
+
if (!IS_HIGHBIT_SET(*s))
|
2063
|
+
{
|
2064
|
+
if (*s == '\0')
|
2065
|
+
break;
|
2066
|
+
l = 1;
|
2067
|
+
}
|
2068
|
+
else
|
2069
|
+
{
|
2070
|
+
l = pg_utf8_verifychar(s, len);
|
2071
|
+
if (l == -1)
|
2072
|
+
break;
|
2073
|
+
}
|
2074
|
+
s += l;
|
2075
|
+
len -= l;
|
2076
|
+
}
|
2077
|
+
|
2078
|
+
return s - start;
|
2079
|
+
}
|
2080
|
+
|
1496
2081
|
/*
|
1497
2082
|
* Check for validity of a single UTF-8 encoded character
|
1498
2083
|
*
|
@@ -1572,48 +2157,48 @@ pg_utf8_islegal(const unsigned char *source, int length)
|
|
1572
2157
|
*-------------------------------------------------------------------
|
1573
2158
|
*/
|
1574
2159
|
const pg_wchar_tbl pg_wchar_table[] = {
|
1575
|
-
{pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen,
|
1576
|
-
{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen,
|
1577
|
-
{pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen,
|
1578
|
-
{pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen,
|
1579
|
-
{pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen,
|
1580
|
-
{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen,
|
1581
|
-
{pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen,
|
1582
|
-
{pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen,
|
1583
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1584
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1585
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1586
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1587
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1588
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1589
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1590
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1591
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1592
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1593
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1594
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1595
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1596
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1597
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1598
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1599
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1600
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1601
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1602
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1603
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1604
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1605
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1606
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1607
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1608
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1609
|
-
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen,
|
1610
|
-
{0, 0, pg_sjis_mblen, pg_sjis_dsplen,
|
1611
|
-
{0, 0, pg_big5_mblen, pg_big5_dsplen,
|
1612
|
-
{0, 0, pg_gbk_mblen, pg_gbk_dsplen,
|
1613
|
-
{0, 0, pg_uhc_mblen, pg_uhc_dsplen,
|
1614
|
-
{0, 0, pg_gb18030_mblen, pg_gb18030_dsplen,
|
1615
|
-
{0, 0, pg_johab_mblen, pg_johab_dsplen,
|
1616
|
-
{0, 0, pg_sjis_mblen, pg_sjis_dsplen,
|
2160
|
+
{pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifychar, pg_ascii_verifystr, 1}, /* PG_SQL_ASCII */
|
2161
|
+
{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3}, /* PG_EUC_JP */
|
2162
|
+
{pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 2}, /* PG_EUC_CN */
|
2163
|
+
{pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifychar, pg_euckr_verifystr, 3}, /* PG_EUC_KR */
|
2164
|
+
{pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifychar, pg_euctw_verifystr, 4}, /* PG_EUC_TW */
|
2165
|
+
{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3}, /* PG_EUC_JIS_2004 */
|
2166
|
+
{pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifychar, pg_utf8_verifystr, 4}, /* PG_UTF8 */
|
2167
|
+
{pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifychar, pg_mule_verifystr, 4}, /* PG_MULE_INTERNAL */
|
2168
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN1 */
|
2169
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN2 */
|
2170
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN3 */
|
2171
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN4 */
|
2172
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN5 */
|
2173
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN6 */
|
2174
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN7 */
|
2175
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN8 */
|
2176
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN9 */
|
2177
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN10 */
|
2178
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1256 */
|
2179
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1258 */
|
2180
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN866 */
|
2181
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN874 */
|
2182
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_KOI8R */
|
2183
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1251 */
|
2184
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1252 */
|
2185
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-5 */
|
2186
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-6 */
|
2187
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-7 */
|
2188
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-8 */
|
2189
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1250 */
|
2190
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1253 */
|
2191
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1254 */
|
2192
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1255 */
|
2193
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1257 */
|
2194
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_KOI8U */
|
2195
|
+
{0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifychar, pg_sjis_verifystr, 2}, /* PG_SJIS */
|
2196
|
+
{0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifychar, pg_big5_verifystr, 2}, /* PG_BIG5 */
|
2197
|
+
{0, 0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifychar, pg_gbk_verifystr, 2}, /* PG_GBK */
|
2198
|
+
{0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifychar, pg_uhc_verifystr, 2}, /* PG_UHC */
|
2199
|
+
{0, 0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifychar, pg_gb18030_verifystr, 4}, /* PG_GB18030 */
|
2200
|
+
{0, 0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifychar, pg_johab_verifystr, 3}, /* PG_JOHAB */
|
2201
|
+
{0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifychar, pg_sjis_verifystr, 2} /* PG_SHIFT_JIS_2004 */
|
1617
2202
|
};
|
1618
2203
|
|
1619
2204
|
/*
|
@@ -1646,7 +2231,14 @@ pg_encoding_mblen(int encoding, const char *mbstr)
|
|
1646
2231
|
/*
|
1647
2232
|
* Verify the first multibyte character of the given string.
|
1648
2233
|
* Return its byte length if good, -1 if bad. (See comments above for
|
1649
|
-
* full details of the
|
2234
|
+
* full details of the mbverifychar API.)
|
2235
|
+
*/
|
2236
|
+
|
2237
|
+
|
2238
|
+
/*
|
2239
|
+
* Verify that a string is valid for the given encoding.
|
2240
|
+
* Returns the number of input bytes (<= len) that form a valid string.
|
2241
|
+
* (See comments above for full details of the mbverifystr API.)
|
1650
2242
|
*/
|
1651
2243
|
|
1652
2244
|
|