gitlab-pg_query 1.3.1 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +217 -99
- data/README.md +92 -69
- data/Rakefile +85 -5
- data/ext/pg_query/extconf.rb +3 -40
- data/ext/pg_query/guc-file.c +0 -0
- data/ext/pg_query/include/access/amapi.h +246 -0
- data/ext/pg_query/include/access/attmap.h +52 -0
- data/ext/pg_query/include/access/attnum.h +64 -0
- data/ext/pg_query/include/access/clog.h +61 -0
- data/ext/pg_query/include/access/commit_ts.h +77 -0
- data/ext/pg_query/include/access/detoast.h +92 -0
- data/ext/pg_query/include/access/genam.h +228 -0
- data/ext/pg_query/include/access/gin.h +78 -0
- data/ext/pg_query/include/access/htup.h +89 -0
- data/ext/pg_query/include/access/htup_details.h +819 -0
- data/ext/pg_query/include/access/itup.h +161 -0
- data/ext/pg_query/include/access/parallel.h +82 -0
- data/ext/pg_query/include/access/printtup.h +35 -0
- data/ext/pg_query/include/access/relation.h +28 -0
- data/ext/pg_query/include/access/relscan.h +176 -0
- data/ext/pg_query/include/access/rmgr.h +35 -0
- data/ext/pg_query/include/access/rmgrlist.h +49 -0
- data/ext/pg_query/include/access/sdir.h +58 -0
- data/ext/pg_query/include/access/skey.h +151 -0
- data/ext/pg_query/include/access/stratnum.h +83 -0
- data/ext/pg_query/include/access/sysattr.h +29 -0
- data/ext/pg_query/include/access/table.h +27 -0
- data/ext/pg_query/include/access/tableam.h +1825 -0
- data/ext/pg_query/include/access/transam.h +265 -0
- data/ext/pg_query/include/access/tupconvert.h +51 -0
- data/ext/pg_query/include/access/tupdesc.h +154 -0
- data/ext/pg_query/include/access/tupmacs.h +247 -0
- data/ext/pg_query/include/access/twophase.h +61 -0
- data/ext/pg_query/include/access/xact.h +463 -0
- data/ext/pg_query/include/access/xlog.h +398 -0
- data/ext/pg_query/include/access/xlog_internal.h +330 -0
- data/ext/pg_query/include/access/xlogdefs.h +109 -0
- data/ext/pg_query/include/access/xloginsert.h +64 -0
- data/ext/pg_query/include/access/xlogreader.h +327 -0
- data/ext/pg_query/include/access/xlogrecord.h +227 -0
- data/ext/pg_query/include/bootstrap/bootstrap.h +62 -0
- data/ext/pg_query/include/c.h +1322 -0
- data/ext/pg_query/include/catalog/catalog.h +42 -0
- data/ext/pg_query/include/catalog/catversion.h +58 -0
- data/ext/pg_query/include/catalog/dependency.h +275 -0
- data/ext/pg_query/include/catalog/genbki.h +64 -0
- data/ext/pg_query/include/catalog/index.h +199 -0
- data/ext/pg_query/include/catalog/indexing.h +366 -0
- data/ext/pg_query/include/catalog/namespace.h +188 -0
- data/ext/pg_query/include/catalog/objectaccess.h +197 -0
- data/ext/pg_query/include/catalog/objectaddress.h +84 -0
- data/ext/pg_query/include/catalog/pg_aggregate.h +176 -0
- data/ext/pg_query/include/catalog/pg_aggregate_d.h +77 -0
- data/ext/pg_query/include/catalog/pg_am.h +60 -0
- data/ext/pg_query/include/catalog/pg_am_d.h +45 -0
- data/ext/pg_query/include/catalog/pg_attribute.h +204 -0
- data/ext/pg_query/include/catalog/pg_attribute_d.h +59 -0
- data/ext/pg_query/include/catalog/pg_authid.h +58 -0
- data/ext/pg_query/include/catalog/pg_authid_d.h +49 -0
- data/ext/pg_query/include/catalog/pg_class.h +200 -0
- data/ext/pg_query/include/catalog/pg_class_d.h +103 -0
- data/ext/pg_query/include/catalog/pg_collation.h +73 -0
- data/ext/pg_query/include/catalog/pg_collation_d.h +45 -0
- data/ext/pg_query/include/catalog/pg_constraint.h +247 -0
- data/ext/pg_query/include/catalog/pg_constraint_d.h +67 -0
- data/ext/pg_query/include/catalog/pg_control.h +250 -0
- data/ext/pg_query/include/catalog/pg_conversion.h +72 -0
- data/ext/pg_query/include/catalog/pg_conversion_d.h +35 -0
- data/ext/pg_query/include/catalog/pg_depend.h +73 -0
- data/ext/pg_query/include/catalog/pg_depend_d.h +34 -0
- data/ext/pg_query/include/catalog/pg_event_trigger.h +51 -0
- data/ext/pg_query/include/catalog/pg_event_trigger_d.h +34 -0
- data/ext/pg_query/include/catalog/pg_index.h +80 -0
- data/ext/pg_query/include/catalog/pg_index_d.h +56 -0
- data/ext/pg_query/include/catalog/pg_language.h +67 -0
- data/ext/pg_query/include/catalog/pg_language_d.h +39 -0
- data/ext/pg_query/include/catalog/pg_namespace.h +59 -0
- data/ext/pg_query/include/catalog/pg_namespace_d.h +34 -0
- data/ext/pg_query/include/catalog/pg_opclass.h +85 -0
- data/ext/pg_query/include/catalog/pg_opclass_d.h +49 -0
- data/ext/pg_query/include/catalog/pg_operator.h +102 -0
- data/ext/pg_query/include/catalog/pg_operator_d.h +106 -0
- data/ext/pg_query/include/catalog/pg_opfamily.h +60 -0
- data/ext/pg_query/include/catalog/pg_opfamily_d.h +47 -0
- data/ext/pg_query/include/catalog/pg_partitioned_table.h +63 -0
- data/ext/pg_query/include/catalog/pg_partitioned_table_d.h +35 -0
- data/ext/pg_query/include/catalog/pg_proc.h +211 -0
- data/ext/pg_query/include/catalog/pg_proc_d.h +99 -0
- data/ext/pg_query/include/catalog/pg_publication.h +115 -0
- data/ext/pg_query/include/catalog/pg_publication_d.h +36 -0
- data/ext/pg_query/include/catalog/pg_replication_origin.h +57 -0
- data/ext/pg_query/include/catalog/pg_replication_origin_d.h +29 -0
- data/ext/pg_query/include/catalog/pg_statistic.h +275 -0
- data/ext/pg_query/include/catalog/pg_statistic_d.h +194 -0
- data/ext/pg_query/include/catalog/pg_statistic_ext.h +74 -0
- data/ext/pg_query/include/catalog/pg_statistic_ext_d.h +40 -0
- data/ext/pg_query/include/catalog/pg_transform.h +45 -0
- data/ext/pg_query/include/catalog/pg_transform_d.h +32 -0
- data/ext/pg_query/include/catalog/pg_trigger.h +137 -0
- data/ext/pg_query/include/catalog/pg_trigger_d.h +106 -0
- data/ext/pg_query/include/catalog/pg_ts_config.h +50 -0
- data/ext/pg_query/include/catalog/pg_ts_config_d.h +32 -0
- data/ext/pg_query/include/catalog/pg_ts_dict.h +54 -0
- data/ext/pg_query/include/catalog/pg_ts_dict_d.h +33 -0
- data/ext/pg_query/include/catalog/pg_ts_parser.h +57 -0
- data/ext/pg_query/include/catalog/pg_ts_parser_d.h +35 -0
- data/ext/pg_query/include/catalog/pg_ts_template.h +48 -0
- data/ext/pg_query/include/catalog/pg_ts_template_d.h +32 -0
- data/ext/pg_query/include/catalog/pg_type.h +372 -0
- data/ext/pg_query/include/catalog/pg_type_d.h +285 -0
- data/ext/pg_query/include/catalog/storage.h +48 -0
- data/ext/pg_query/include/commands/async.h +54 -0
- data/ext/pg_query/include/commands/dbcommands.h +35 -0
- data/ext/pg_query/include/commands/defrem.h +173 -0
- data/ext/pg_query/include/commands/event_trigger.h +88 -0
- data/ext/pg_query/include/commands/explain.h +127 -0
- data/ext/pg_query/include/commands/prepare.h +61 -0
- data/ext/pg_query/include/commands/tablespace.h +67 -0
- data/ext/pg_query/include/commands/trigger.h +277 -0
- data/ext/pg_query/include/commands/user.h +37 -0
- data/ext/pg_query/include/commands/vacuum.h +293 -0
- data/ext/pg_query/include/commands/variable.h +38 -0
- data/ext/pg_query/include/common/file_perm.h +56 -0
- data/ext/pg_query/include/common/hashfn.h +104 -0
- data/ext/pg_query/include/common/ip.h +37 -0
- data/ext/pg_query/include/common/keywords.h +33 -0
- data/ext/pg_query/include/common/kwlookup.h +44 -0
- data/ext/pg_query/include/common/relpath.h +90 -0
- data/ext/pg_query/include/common/string.h +19 -0
- data/ext/pg_query/include/common/unicode_combining_table.h +196 -0
- data/ext/pg_query/include/datatype/timestamp.h +197 -0
- data/ext/pg_query/include/executor/execdesc.h +70 -0
- data/ext/pg_query/include/executor/executor.h +614 -0
- data/ext/pg_query/include/executor/functions.h +41 -0
- data/ext/pg_query/include/executor/instrument.h +101 -0
- data/ext/pg_query/include/executor/spi.h +175 -0
- data/ext/pg_query/include/executor/tablefunc.h +67 -0
- data/ext/pg_query/include/executor/tuptable.h +487 -0
- data/ext/pg_query/include/fmgr.h +775 -0
- data/ext/pg_query/include/funcapi.h +348 -0
- data/ext/pg_query/include/getaddrinfo.h +162 -0
- data/ext/pg_query/include/jit/jit.h +105 -0
- data/ext/pg_query/include/kwlist_d.h +1072 -0
- data/ext/pg_query/include/lib/ilist.h +727 -0
- data/ext/pg_query/include/lib/pairingheap.h +102 -0
- data/ext/pg_query/include/lib/simplehash.h +1059 -0
- data/ext/pg_query/include/lib/stringinfo.h +161 -0
- data/ext/pg_query/include/libpq/auth.h +29 -0
- data/ext/pg_query/include/libpq/crypt.h +46 -0
- data/ext/pg_query/include/libpq/hba.h +140 -0
- data/ext/pg_query/include/libpq/libpq-be.h +326 -0
- data/ext/pg_query/include/libpq/libpq.h +133 -0
- data/ext/pg_query/include/libpq/pqcomm.h +208 -0
- data/ext/pg_query/include/libpq/pqformat.h +210 -0
- data/ext/pg_query/include/libpq/pqsignal.h +42 -0
- data/ext/pg_query/include/mb/pg_wchar.h +672 -0
- data/ext/pg_query/include/mb/stringinfo_mb.h +24 -0
- data/ext/pg_query/include/miscadmin.h +476 -0
- data/ext/pg_query/include/nodes/bitmapset.h +122 -0
- data/ext/pg_query/include/nodes/execnodes.h +2520 -0
- data/ext/pg_query/include/nodes/extensible.h +160 -0
- data/ext/pg_query/include/nodes/lockoptions.h +61 -0
- data/ext/pg_query/include/nodes/makefuncs.h +108 -0
- data/ext/pg_query/include/nodes/memnodes.h +108 -0
- data/ext/pg_query/include/nodes/nodeFuncs.h +162 -0
- data/ext/pg_query/include/nodes/nodes.h +842 -0
- data/ext/pg_query/include/nodes/params.h +170 -0
- data/ext/pg_query/include/nodes/parsenodes.h +3579 -0
- data/ext/pg_query/include/nodes/pathnodes.h +2556 -0
- data/ext/pg_query/include/nodes/pg_list.h +605 -0
- data/ext/pg_query/include/nodes/plannodes.h +1251 -0
- data/ext/pg_query/include/nodes/primnodes.h +1541 -0
- data/ext/pg_query/include/nodes/print.h +34 -0
- data/ext/pg_query/include/nodes/tidbitmap.h +75 -0
- data/ext/pg_query/include/nodes/value.h +61 -0
- data/ext/pg_query/include/optimizer/cost.h +206 -0
- data/ext/pg_query/include/optimizer/geqo.h +88 -0
- data/ext/pg_query/include/optimizer/geqo_gene.h +45 -0
- data/ext/pg_query/include/optimizer/optimizer.h +199 -0
- data/ext/pg_query/include/optimizer/paths.h +249 -0
- data/ext/pg_query/include/optimizer/planmain.h +119 -0
- data/ext/pg_query/include/parser/analyze.h +49 -0
- data/ext/pg_query/include/parser/gram.h +1067 -0
- data/ext/pg_query/include/parser/gramparse.h +75 -0
- data/ext/pg_query/include/parser/kwlist.h +477 -0
- data/ext/pg_query/include/parser/parse_agg.h +68 -0
- data/ext/pg_query/include/parser/parse_clause.h +54 -0
- data/ext/pg_query/include/parser/parse_coerce.h +97 -0
- data/ext/pg_query/include/parser/parse_collate.h +27 -0
- data/ext/pg_query/include/parser/parse_expr.h +26 -0
- data/ext/pg_query/include/parser/parse_func.h +73 -0
- data/ext/pg_query/include/parser/parse_node.h +327 -0
- data/ext/pg_query/include/parser/parse_oper.h +67 -0
- data/ext/pg_query/include/parser/parse_relation.h +123 -0
- data/ext/pg_query/include/parser/parse_target.h +46 -0
- data/ext/pg_query/include/parser/parse_type.h +60 -0
- data/ext/pg_query/include/parser/parser.h +41 -0
- data/ext/pg_query/include/parser/parsetree.h +61 -0
- data/ext/pg_query/include/parser/scanner.h +152 -0
- data/ext/pg_query/include/parser/scansup.h +30 -0
- data/ext/pg_query/include/partitioning/partdefs.h +26 -0
- data/ext/pg_query/include/pg_config.h +989 -0
- data/ext/pg_query/include/pg_config_ext.h +8 -0
- data/ext/pg_query/include/pg_config_manual.h +350 -0
- data/ext/pg_query/include/pg_config_os.h +8 -0
- data/ext/pg_query/include/pg_getopt.h +56 -0
- data/ext/pg_query/include/pg_query.h +121 -0
- data/ext/pg_query/include/pg_query_enum_defs.c +2454 -0
- data/ext/pg_query/include/pg_query_fingerprint_conds.c +875 -0
- data/ext/pg_query/include/pg_query_fingerprint_defs.c +12413 -0
- data/ext/pg_query/include/pg_query_json_helper.c +61 -0
- data/ext/pg_query/include/pg_query_outfuncs_conds.c +686 -0
- data/ext/pg_query/include/pg_query_outfuncs_defs.c +2437 -0
- data/ext/pg_query/include/pg_query_readfuncs_conds.c +222 -0
- data/ext/pg_query/include/pg_query_readfuncs_defs.c +2878 -0
- data/ext/pg_query/include/pg_trace.h +17 -0
- data/ext/pg_query/include/pgstat.h +1487 -0
- data/ext/pg_query/include/pgtime.h +84 -0
- data/ext/pg_query/include/pl_gram.h +385 -0
- data/ext/pg_query/include/pl_reserved_kwlist.h +52 -0
- data/ext/pg_query/include/pl_reserved_kwlist_d.h +114 -0
- data/ext/pg_query/include/pl_unreserved_kwlist.h +112 -0
- data/ext/pg_query/include/pl_unreserved_kwlist_d.h +246 -0
- data/ext/pg_query/include/plerrcodes.h +990 -0
- data/ext/pg_query/include/plpgsql.h +1347 -0
- data/ext/pg_query/include/port.h +524 -0
- data/ext/pg_query/include/port/atomics.h +524 -0
- data/ext/pg_query/include/port/atomics/arch-arm.h +26 -0
- data/ext/pg_query/include/port/atomics/arch-ppc.h +254 -0
- data/ext/pg_query/include/port/atomics/arch-x86.h +252 -0
- data/ext/pg_query/include/port/atomics/fallback.h +170 -0
- data/ext/pg_query/include/port/atomics/generic-gcc.h +286 -0
- data/ext/pg_query/include/port/atomics/generic.h +401 -0
- data/ext/pg_query/include/port/pg_bitutils.h +226 -0
- data/ext/pg_query/include/port/pg_bswap.h +161 -0
- data/ext/pg_query/include/port/pg_crc32c.h +101 -0
- data/ext/pg_query/include/portability/instr_time.h +256 -0
- data/ext/pg_query/include/postgres.h +764 -0
- data/ext/pg_query/include/postgres_ext.h +74 -0
- data/ext/pg_query/include/postmaster/autovacuum.h +83 -0
- data/ext/pg_query/include/postmaster/bgworker.h +161 -0
- data/ext/pg_query/include/postmaster/bgworker_internals.h +64 -0
- data/ext/pg_query/include/postmaster/bgwriter.h +45 -0
- data/ext/pg_query/include/postmaster/fork_process.h +17 -0
- data/ext/pg_query/include/postmaster/interrupt.h +32 -0
- data/ext/pg_query/include/postmaster/pgarch.h +39 -0
- data/ext/pg_query/include/postmaster/postmaster.h +77 -0
- data/ext/pg_query/include/postmaster/syslogger.h +98 -0
- data/ext/pg_query/include/postmaster/walwriter.h +21 -0
- data/ext/pg_query/include/protobuf-c.h +1106 -0
- data/ext/pg_query/include/protobuf-c/protobuf-c.h +1106 -0
- data/ext/pg_query/include/protobuf/pg_query.pb-c.h +10846 -0
- data/ext/pg_query/include/protobuf/pg_query.pb.h +124718 -0
- data/ext/pg_query/include/regex/regex.h +184 -0
- data/ext/pg_query/include/replication/logicallauncher.h +31 -0
- data/ext/pg_query/include/replication/logicalproto.h +110 -0
- data/ext/pg_query/include/replication/logicalworker.h +19 -0
- data/ext/pg_query/include/replication/origin.h +73 -0
- data/ext/pg_query/include/replication/reorderbuffer.h +467 -0
- data/ext/pg_query/include/replication/slot.h +219 -0
- data/ext/pg_query/include/replication/syncrep.h +115 -0
- data/ext/pg_query/include/replication/walreceiver.h +340 -0
- data/ext/pg_query/include/replication/walsender.h +74 -0
- data/ext/pg_query/include/rewrite/prs2lock.h +46 -0
- data/ext/pg_query/include/rewrite/rewriteHandler.h +40 -0
- data/ext/pg_query/include/rewrite/rewriteManip.h +87 -0
- data/ext/pg_query/include/rewrite/rewriteSupport.h +26 -0
- data/ext/pg_query/include/storage/backendid.h +37 -0
- data/ext/pg_query/include/storage/block.h +121 -0
- data/ext/pg_query/include/storage/buf.h +46 -0
- data/ext/pg_query/include/storage/bufmgr.h +292 -0
- data/ext/pg_query/include/storage/bufpage.h +459 -0
- data/ext/pg_query/include/storage/condition_variable.h +62 -0
- data/ext/pg_query/include/storage/dsm.h +61 -0
- data/ext/pg_query/include/storage/dsm_impl.h +75 -0
- data/ext/pg_query/include/storage/fd.h +168 -0
- data/ext/pg_query/include/storage/ipc.h +81 -0
- data/ext/pg_query/include/storage/item.h +19 -0
- data/ext/pg_query/include/storage/itemid.h +184 -0
- data/ext/pg_query/include/storage/itemptr.h +206 -0
- data/ext/pg_query/include/storage/large_object.h +100 -0
- data/ext/pg_query/include/storage/latch.h +190 -0
- data/ext/pg_query/include/storage/lmgr.h +114 -0
- data/ext/pg_query/include/storage/lock.h +612 -0
- data/ext/pg_query/include/storage/lockdefs.h +59 -0
- data/ext/pg_query/include/storage/lwlock.h +232 -0
- data/ext/pg_query/include/storage/lwlocknames.h +51 -0
- data/ext/pg_query/include/storage/off.h +57 -0
- data/ext/pg_query/include/storage/pg_sema.h +61 -0
- data/ext/pg_query/include/storage/pg_shmem.h +90 -0
- data/ext/pg_query/include/storage/pmsignal.h +94 -0
- data/ext/pg_query/include/storage/predicate.h +87 -0
- data/ext/pg_query/include/storage/proc.h +333 -0
- data/ext/pg_query/include/storage/proclist_types.h +51 -0
- data/ext/pg_query/include/storage/procsignal.h +75 -0
- data/ext/pg_query/include/storage/relfilenode.h +99 -0
- data/ext/pg_query/include/storage/s_lock.h +1047 -0
- data/ext/pg_query/include/storage/sharedfileset.h +45 -0
- data/ext/pg_query/include/storage/shm_mq.h +85 -0
- data/ext/pg_query/include/storage/shm_toc.h +58 -0
- data/ext/pg_query/include/storage/shmem.h +81 -0
- data/ext/pg_query/include/storage/sinval.h +153 -0
- data/ext/pg_query/include/storage/sinvaladt.h +43 -0
- data/ext/pg_query/include/storage/smgr.h +109 -0
- data/ext/pg_query/include/storage/spin.h +77 -0
- data/ext/pg_query/include/storage/standby.h +91 -0
- data/ext/pg_query/include/storage/standbydefs.h +74 -0
- data/ext/pg_query/include/storage/sync.h +62 -0
- data/ext/pg_query/include/tcop/cmdtag.h +58 -0
- data/ext/pg_query/include/tcop/cmdtaglist.h +217 -0
- data/ext/pg_query/include/tcop/deparse_utility.h +108 -0
- data/ext/pg_query/include/tcop/dest.h +149 -0
- data/ext/pg_query/include/tcop/fastpath.h +21 -0
- data/ext/pg_query/include/tcop/pquery.h +45 -0
- data/ext/pg_query/include/tcop/tcopprot.h +89 -0
- data/ext/pg_query/include/tcop/utility.h +108 -0
- data/ext/pg_query/include/tsearch/ts_cache.h +98 -0
- data/ext/pg_query/include/utils/acl.h +312 -0
- data/ext/pg_query/include/utils/aclchk_internal.h +45 -0
- data/ext/pg_query/include/utils/array.h +458 -0
- data/ext/pg_query/include/utils/builtins.h +127 -0
- data/ext/pg_query/include/utils/bytea.h +27 -0
- data/ext/pg_query/include/utils/catcache.h +231 -0
- data/ext/pg_query/include/utils/date.h +90 -0
- data/ext/pg_query/include/utils/datetime.h +343 -0
- data/ext/pg_query/include/utils/datum.h +68 -0
- data/ext/pg_query/include/utils/dsa.h +123 -0
- data/ext/pg_query/include/utils/dynahash.h +19 -0
- data/ext/pg_query/include/utils/elog.h +439 -0
- data/ext/pg_query/include/utils/errcodes.h +352 -0
- data/ext/pg_query/include/utils/expandeddatum.h +159 -0
- data/ext/pg_query/include/utils/expandedrecord.h +231 -0
- data/ext/pg_query/include/utils/float.h +356 -0
- data/ext/pg_query/include/utils/fmgroids.h +2657 -0
- data/ext/pg_query/include/utils/fmgrprotos.h +2646 -0
- data/ext/pg_query/include/utils/fmgrtab.h +48 -0
- data/ext/pg_query/include/utils/guc.h +443 -0
- data/ext/pg_query/include/utils/guc_tables.h +272 -0
- data/ext/pg_query/include/utils/hsearch.h +149 -0
- data/ext/pg_query/include/utils/inval.h +64 -0
- data/ext/pg_query/include/utils/lsyscache.h +197 -0
- data/ext/pg_query/include/utils/memdebug.h +82 -0
- data/ext/pg_query/include/utils/memutils.h +225 -0
- data/ext/pg_query/include/utils/numeric.h +76 -0
- data/ext/pg_query/include/utils/palloc.h +136 -0
- data/ext/pg_query/include/utils/partcache.h +102 -0
- data/ext/pg_query/include/utils/pg_locale.h +119 -0
- data/ext/pg_query/include/utils/pg_lsn.h +29 -0
- data/ext/pg_query/include/utils/pidfile.h +56 -0
- data/ext/pg_query/include/utils/plancache.h +235 -0
- data/ext/pg_query/include/utils/portal.h +241 -0
- data/ext/pg_query/include/utils/probes.h +114 -0
- data/ext/pg_query/include/utils/ps_status.h +25 -0
- data/ext/pg_query/include/utils/queryenvironment.h +74 -0
- data/ext/pg_query/include/utils/regproc.h +28 -0
- data/ext/pg_query/include/utils/rel.h +644 -0
- data/ext/pg_query/include/utils/relcache.h +151 -0
- data/ext/pg_query/include/utils/reltrigger.h +81 -0
- data/ext/pg_query/include/utils/resowner.h +86 -0
- data/ext/pg_query/include/utils/rls.h +50 -0
- data/ext/pg_query/include/utils/ruleutils.h +44 -0
- data/ext/pg_query/include/utils/sharedtuplestore.h +61 -0
- data/ext/pg_query/include/utils/snapmgr.h +158 -0
- data/ext/pg_query/include/utils/snapshot.h +206 -0
- data/ext/pg_query/include/utils/sortsupport.h +276 -0
- data/ext/pg_query/include/utils/syscache.h +219 -0
- data/ext/pg_query/include/utils/timeout.h +88 -0
- data/ext/pg_query/include/utils/timestamp.h +116 -0
- data/ext/pg_query/include/utils/tuplesort.h +277 -0
- data/ext/pg_query/include/utils/tuplestore.h +91 -0
- data/ext/pg_query/include/utils/typcache.h +202 -0
- data/ext/pg_query/include/utils/tzparser.h +39 -0
- data/ext/pg_query/include/utils/varlena.h +39 -0
- data/ext/pg_query/include/utils/xml.h +84 -0
- data/ext/pg_query/include/xxhash.h +5445 -0
- data/ext/pg_query/include/xxhash/xxhash.h +5445 -0
- data/ext/pg_query/pg_query.c +104 -0
- data/ext/pg_query/pg_query.pb-c.c +37628 -0
- data/ext/pg_query/pg_query_deparse.c +9959 -0
- data/ext/pg_query/pg_query_fingerprint.c +295 -0
- data/ext/pg_query/pg_query_fingerprint.h +8 -0
- data/ext/pg_query/pg_query_internal.h +24 -0
- data/ext/pg_query/pg_query_json_plpgsql.c +738 -0
- data/ext/pg_query/pg_query_json_plpgsql.h +9 -0
- data/ext/pg_query/pg_query_normalize.c +439 -0
- data/ext/pg_query/pg_query_outfuncs.h +10 -0
- data/ext/pg_query/pg_query_outfuncs_json.c +297 -0
- data/ext/pg_query/pg_query_outfuncs_protobuf.c +237 -0
- data/ext/pg_query/pg_query_parse.c +148 -0
- data/ext/pg_query/pg_query_parse_plpgsql.c +460 -0
- data/ext/pg_query/pg_query_readfuncs.h +11 -0
- data/ext/pg_query/pg_query_readfuncs_protobuf.c +142 -0
- data/ext/pg_query/pg_query_ruby.c +108 -12
- data/ext/pg_query/pg_query_scan.c +173 -0
- data/ext/pg_query/pg_query_split.c +221 -0
- data/ext/pg_query/protobuf-c.c +3660 -0
- data/ext/pg_query/src_backend_catalog_namespace.c +1051 -0
- data/ext/pg_query/src_backend_catalog_pg_proc.c +142 -0
- data/ext/pg_query/src_backend_commands_define.c +117 -0
- data/ext/pg_query/src_backend_libpq_pqcomm.c +651 -0
- data/ext/pg_query/src_backend_nodes_bitmapset.c +513 -0
- data/ext/pg_query/src_backend_nodes_copyfuncs.c +6013 -0
- data/ext/pg_query/src_backend_nodes_equalfuncs.c +4003 -0
- data/ext/pg_query/src_backend_nodes_extensible.c +99 -0
- data/ext/pg_query/src_backend_nodes_list.c +922 -0
- data/ext/pg_query/src_backend_nodes_makefuncs.c +417 -0
- data/ext/pg_query/src_backend_nodes_nodeFuncs.c +1363 -0
- data/ext/pg_query/src_backend_nodes_value.c +84 -0
- data/ext/pg_query/src_backend_parser_gram.c +47456 -0
- data/ext/pg_query/src_backend_parser_parse_expr.c +313 -0
- data/ext/pg_query/src_backend_parser_parser.c +497 -0
- data/ext/pg_query/src_backend_parser_scan.c +7091 -0
- data/ext/pg_query/src_backend_parser_scansup.c +160 -0
- data/ext/pg_query/src_backend_postmaster_postmaster.c +2230 -0
- data/ext/pg_query/src_backend_storage_ipc_ipc.c +192 -0
- data/ext/pg_query/src_backend_storage_lmgr_s_lock.c +370 -0
- data/ext/pg_query/src_backend_tcop_postgres.c +776 -0
- data/ext/pg_query/src_backend_utils_adt_datum.c +326 -0
- data/ext/pg_query/src_backend_utils_adt_expandeddatum.c +98 -0
- data/ext/pg_query/src_backend_utils_adt_format_type.c +136 -0
- data/ext/pg_query/src_backend_utils_adt_ruleutils.c +1683 -0
- data/ext/pg_query/src_backend_utils_error_assert.c +74 -0
- data/ext/pg_query/src_backend_utils_error_elog.c +1748 -0
- data/ext/pg_query/src_backend_utils_fmgr_fmgr.c +570 -0
- data/ext/pg_query/src_backend_utils_hash_dynahash.c +1086 -0
- data/ext/pg_query/src_backend_utils_init_globals.c +168 -0
- data/ext/pg_query/src_backend_utils_mb_mbutils.c +839 -0
- data/ext/pg_query/src_backend_utils_misc_guc.c +1831 -0
- data/ext/pg_query/src_backend_utils_mmgr_aset.c +1560 -0
- data/ext/pg_query/src_backend_utils_mmgr_mcxt.c +1006 -0
- data/ext/pg_query/src_common_encnames.c +158 -0
- data/ext/pg_query/src_common_keywords.c +39 -0
- data/ext/pg_query/src_common_kwlist_d.h +1081 -0
- data/ext/pg_query/src_common_kwlookup.c +91 -0
- data/ext/pg_query/src_common_psprintf.c +158 -0
- data/ext/pg_query/src_common_string.c +86 -0
- data/ext/pg_query/src_common_stringinfo.c +336 -0
- data/ext/pg_query/src_common_wchar.c +1651 -0
- data/ext/pg_query/src_pl_plpgsql_src_pl_comp.c +1133 -0
- data/ext/pg_query/src_pl_plpgsql_src_pl_funcs.c +877 -0
- data/ext/pg_query/src_pl_plpgsql_src_pl_gram.c +6533 -0
- data/ext/pg_query/src_pl_plpgsql_src_pl_handler.c +107 -0
- data/ext/pg_query/src_pl_plpgsql_src_pl_reserved_kwlist_d.h +123 -0
- data/ext/pg_query/src_pl_plpgsql_src_pl_scanner.c +671 -0
- data/ext/pg_query/src_pl_plpgsql_src_pl_unreserved_kwlist_d.h +255 -0
- data/ext/pg_query/src_port_erand48.c +127 -0
- data/ext/pg_query/src_port_pg_bitutils.c +246 -0
- data/ext/pg_query/src_port_pgsleep.c +69 -0
- data/ext/pg_query/src_port_pgstrcasecmp.c +83 -0
- data/ext/pg_query/src_port_qsort.c +240 -0
- data/ext/pg_query/src_port_random.c +31 -0
- data/ext/pg_query/src_port_snprintf.c +1449 -0
- data/ext/pg_query/src_port_strerror.c +324 -0
- data/ext/pg_query/src_port_strnlen.c +39 -0
- data/ext/pg_query/xxhash.c +43 -0
- data/lib/pg_query.rb +7 -4
- data/lib/pg_query/constants.rb +21 -0
- data/lib/pg_query/deparse.rb +15 -1581
- data/lib/pg_query/filter_columns.rb +88 -85
- data/lib/pg_query/fingerprint.rb +122 -87
- data/lib/pg_query/json_field_names.rb +1402 -0
- data/lib/pg_query/node.rb +31 -0
- data/lib/pg_query/param_refs.rb +42 -37
- data/lib/pg_query/parse.rb +220 -203
- data/lib/pg_query/parse_error.rb +1 -1
- data/lib/pg_query/pg_query_pb.rb +3211 -0
- data/lib/pg_query/scan.rb +23 -0
- data/lib/pg_query/treewalker.rb +24 -40
- data/lib/pg_query/truncate.rb +71 -42
- data/lib/pg_query/version.rb +2 -2
- metadata +472 -11
- data/ext/pg_query/pg_query_ruby.h +0 -10
- data/lib/pg_query/deep_dup.rb +0 -16
- data/lib/pg_query/deparse/alter_table.rb +0 -42
- data/lib/pg_query/deparse/interval.rb +0 -105
- data/lib/pg_query/deparse/keywords.rb +0 -159
- data/lib/pg_query/deparse/rename.rb +0 -41
- data/lib/pg_query/legacy_parsetree.rb +0 -109
- data/lib/pg_query/node_types.rb +0 -296
@@ -0,0 +1,1651 @@
|
|
1
|
+
/*--------------------------------------------------------------------
|
2
|
+
* Symbols referenced in this file:
|
3
|
+
* - pg_encoding_max_length
|
4
|
+
* - pg_wchar_table
|
5
|
+
* - pg_utf_mblen
|
6
|
+
* - pg_mule_mblen
|
7
|
+
* - pg_ascii2wchar_with_len
|
8
|
+
* - pg_wchar2single_with_len
|
9
|
+
* - pg_ascii_mblen
|
10
|
+
* - pg_ascii_dsplen
|
11
|
+
* - pg_ascii_verifier
|
12
|
+
* - pg_eucjp2wchar_with_len
|
13
|
+
* - pg_euc2wchar_with_len
|
14
|
+
* - pg_wchar2euc_with_len
|
15
|
+
* - pg_eucjp_mblen
|
16
|
+
* - pg_euc_mblen
|
17
|
+
* - pg_eucjp_dsplen
|
18
|
+
* - pg_eucjp_verifier
|
19
|
+
* - pg_euccn2wchar_with_len
|
20
|
+
* - pg_euccn_mblen
|
21
|
+
* - pg_euccn_dsplen
|
22
|
+
* - pg_euckr_verifier
|
23
|
+
* - pg_euckr2wchar_with_len
|
24
|
+
* - pg_euckr_mblen
|
25
|
+
* - pg_euckr_dsplen
|
26
|
+
* - pg_euc_dsplen
|
27
|
+
* - pg_euctw2wchar_with_len
|
28
|
+
* - pg_euctw_mblen
|
29
|
+
* - pg_euctw_dsplen
|
30
|
+
* - pg_euctw_verifier
|
31
|
+
* - pg_utf2wchar_with_len
|
32
|
+
* - pg_wchar2utf_with_len
|
33
|
+
* - unicode_to_utf8
|
34
|
+
* - pg_utf_dsplen
|
35
|
+
* - utf8_to_unicode
|
36
|
+
* - ucs_wcwidth
|
37
|
+
* - mbbisearch
|
38
|
+
* - pg_utf8_verifier
|
39
|
+
* - pg_utf8_islegal
|
40
|
+
* - pg_mule2wchar_with_len
|
41
|
+
* - pg_wchar2mule_with_len
|
42
|
+
* - pg_mule_dsplen
|
43
|
+
* - pg_mule_verifier
|
44
|
+
* - pg_latin12wchar_with_len
|
45
|
+
* - pg_latin1_mblen
|
46
|
+
* - pg_latin1_dsplen
|
47
|
+
* - pg_latin1_verifier
|
48
|
+
* - pg_sjis_mblen
|
49
|
+
* - pg_sjis_dsplen
|
50
|
+
* - pg_sjis_verifier
|
51
|
+
* - pg_big5_mblen
|
52
|
+
* - pg_big5_dsplen
|
53
|
+
* - pg_big5_verifier
|
54
|
+
* - pg_gbk_mblen
|
55
|
+
* - pg_gbk_dsplen
|
56
|
+
* - pg_gbk_verifier
|
57
|
+
* - pg_uhc_mblen
|
58
|
+
* - pg_uhc_dsplen
|
59
|
+
* - pg_uhc_verifier
|
60
|
+
* - pg_gb18030_mblen
|
61
|
+
* - pg_gb18030_dsplen
|
62
|
+
* - pg_gb18030_verifier
|
63
|
+
* - pg_johab_mblen
|
64
|
+
* - pg_johab_dsplen
|
65
|
+
* - pg_johab_verifier
|
66
|
+
* - pg_encoding_mblen
|
67
|
+
*--------------------------------------------------------------------
|
68
|
+
*/
|
69
|
+
|
70
|
+
/*-------------------------------------------------------------------------
|
71
|
+
*
|
72
|
+
* wchar.c
|
73
|
+
* Functions for working with multibyte characters in various encodings.
|
74
|
+
*
|
75
|
+
* Portions Copyright (c) 1998-2020, PostgreSQL Global Development Group
|
76
|
+
*
|
77
|
+
* IDENTIFICATION
|
78
|
+
* src/common/wchar.c
|
79
|
+
*
|
80
|
+
*-------------------------------------------------------------------------
|
81
|
+
*/
|
82
|
+
#include "c.h"
|
83
|
+
|
84
|
+
#include "mb/pg_wchar.h"
|
85
|
+
|
86
|
+
|
87
|
+
/*
|
88
|
+
* Operations on multi-byte encodings are driven by a table of helper
|
89
|
+
* functions.
|
90
|
+
*
|
91
|
+
* To add an encoding support, define mblen(), dsplen() and verifier() for
|
92
|
+
* the encoding. For server-encodings, also define mb2wchar() and wchar2mb()
|
93
|
+
* conversion functions.
|
94
|
+
*
|
95
|
+
* These functions generally assume that their input is validly formed.
|
96
|
+
* The "verifier" functions, further down in the file, have to be more
|
97
|
+
* paranoid.
|
98
|
+
*
|
99
|
+
* We expect that mblen() does not need to examine more than the first byte
|
100
|
+
* of the character to discover the correct length. GB18030 is an exception
|
101
|
+
* to that rule, though, as it also looks at second byte. But even that
|
102
|
+
* behaves in a predictable way, if you only pass the first byte: it will
|
103
|
+
* treat 4-byte encoded characters as two 2-byte encoded characters, which is
|
104
|
+
* good enough for all current uses.
|
105
|
+
*
|
106
|
+
* Note: for the display output of psql to work properly, the return values
|
107
|
+
* of the dsplen functions must conform to the Unicode standard. In particular
|
108
|
+
* the NUL character is zero width and control characters are generally
|
109
|
+
* width -1. It is recommended that non-ASCII encodings refer their ASCII
|
110
|
+
* subset to the ASCII routines to ensure consistency.
|
111
|
+
*/
|
112
|
+
|
113
|
+
/*
|
114
|
+
* SQL/ASCII
|
115
|
+
*/
|
116
|
+
static int
|
117
|
+
pg_ascii2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
|
118
|
+
{
|
119
|
+
int cnt = 0;
|
120
|
+
|
121
|
+
while (len > 0 && *from)
|
122
|
+
{
|
123
|
+
*to++ = *from++;
|
124
|
+
len--;
|
125
|
+
cnt++;
|
126
|
+
}
|
127
|
+
*to = 0;
|
128
|
+
return cnt;
|
129
|
+
}
|
130
|
+
|
131
|
+
static int
|
132
|
+
pg_ascii_mblen(const unsigned char *s)
|
133
|
+
{
|
134
|
+
return 1;
|
135
|
+
}
|
136
|
+
|
137
|
+
static int
|
138
|
+
pg_ascii_dsplen(const unsigned char *s)
|
139
|
+
{
|
140
|
+
if (*s == '\0')
|
141
|
+
return 0;
|
142
|
+
if (*s < 0x20 || *s == 0x7f)
|
143
|
+
return -1;
|
144
|
+
|
145
|
+
return 1;
|
146
|
+
}
|
147
|
+
|
148
|
+
/*
|
149
|
+
* EUC
|
150
|
+
*/
|
151
|
+
static int
|
152
|
+
pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
|
153
|
+
{
|
154
|
+
int cnt = 0;
|
155
|
+
|
156
|
+
while (len > 0 && *from)
|
157
|
+
{
|
158
|
+
if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte
|
159
|
+
* KANA") */
|
160
|
+
{
|
161
|
+
from++;
|
162
|
+
*to = (SS2 << 8) | *from++;
|
163
|
+
len -= 2;
|
164
|
+
}
|
165
|
+
else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */
|
166
|
+
{
|
167
|
+
from++;
|
168
|
+
*to = (SS3 << 16) | (*from++ << 8);
|
169
|
+
*to |= *from++;
|
170
|
+
len -= 3;
|
171
|
+
}
|
172
|
+
else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */
|
173
|
+
{
|
174
|
+
*to = *from++ << 8;
|
175
|
+
*to |= *from++;
|
176
|
+
len -= 2;
|
177
|
+
}
|
178
|
+
else /* must be ASCII */
|
179
|
+
{
|
180
|
+
*to = *from++;
|
181
|
+
len--;
|
182
|
+
}
|
183
|
+
to++;
|
184
|
+
cnt++;
|
185
|
+
}
|
186
|
+
*to = 0;
|
187
|
+
return cnt;
|
188
|
+
}
|
189
|
+
|
190
|
+
static inline int
|
191
|
+
pg_euc_mblen(const unsigned char *s)
|
192
|
+
{
|
193
|
+
int len;
|
194
|
+
|
195
|
+
if (*s == SS2)
|
196
|
+
len = 2;
|
197
|
+
else if (*s == SS3)
|
198
|
+
len = 3;
|
199
|
+
else if (IS_HIGHBIT_SET(*s))
|
200
|
+
len = 2;
|
201
|
+
else
|
202
|
+
len = 1;
|
203
|
+
return len;
|
204
|
+
}
|
205
|
+
|
206
|
+
static inline int
|
207
|
+
pg_euc_dsplen(const unsigned char *s)
|
208
|
+
{
|
209
|
+
int len;
|
210
|
+
|
211
|
+
if (*s == SS2)
|
212
|
+
len = 2;
|
213
|
+
else if (*s == SS3)
|
214
|
+
len = 2;
|
215
|
+
else if (IS_HIGHBIT_SET(*s))
|
216
|
+
len = 2;
|
217
|
+
else
|
218
|
+
len = pg_ascii_dsplen(s);
|
219
|
+
return len;
|
220
|
+
}
|
221
|
+
|
222
|
+
/*
|
223
|
+
* EUC_JP
|
224
|
+
*/
|
225
|
+
static int
|
226
|
+
pg_eucjp2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
|
227
|
+
{
|
228
|
+
return pg_euc2wchar_with_len(from, to, len);
|
229
|
+
}
|
230
|
+
|
231
|
+
static int
|
232
|
+
pg_eucjp_mblen(const unsigned char *s)
|
233
|
+
{
|
234
|
+
return pg_euc_mblen(s);
|
235
|
+
}
|
236
|
+
|
237
|
+
static int
|
238
|
+
pg_eucjp_dsplen(const unsigned char *s)
|
239
|
+
{
|
240
|
+
int len;
|
241
|
+
|
242
|
+
if (*s == SS2)
|
243
|
+
len = 1;
|
244
|
+
else if (*s == SS3)
|
245
|
+
len = 2;
|
246
|
+
else if (IS_HIGHBIT_SET(*s))
|
247
|
+
len = 2;
|
248
|
+
else
|
249
|
+
len = pg_ascii_dsplen(s);
|
250
|
+
return len;
|
251
|
+
}
|
252
|
+
|
253
|
+
/*
|
254
|
+
* EUC_KR
|
255
|
+
*/
|
256
|
+
static int
|
257
|
+
pg_euckr2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
|
258
|
+
{
|
259
|
+
return pg_euc2wchar_with_len(from, to, len);
|
260
|
+
}
|
261
|
+
|
262
|
+
static int
|
263
|
+
pg_euckr_mblen(const unsigned char *s)
|
264
|
+
{
|
265
|
+
return pg_euc_mblen(s);
|
266
|
+
}
|
267
|
+
|
268
|
+
static int
|
269
|
+
pg_euckr_dsplen(const unsigned char *s)
|
270
|
+
{
|
271
|
+
return pg_euc_dsplen(s);
|
272
|
+
}
|
273
|
+
|
274
|
+
/*
|
275
|
+
* EUC_CN
|
276
|
+
*
|
277
|
+
*/
|
278
|
+
static int
|
279
|
+
pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
|
280
|
+
{
|
281
|
+
int cnt = 0;
|
282
|
+
|
283
|
+
while (len > 0 && *from)
|
284
|
+
{
|
285
|
+
if (*from == SS2 && len >= 3) /* code set 2 (unused?) */
|
286
|
+
{
|
287
|
+
from++;
|
288
|
+
*to = (SS2 << 16) | (*from++ << 8);
|
289
|
+
*to |= *from++;
|
290
|
+
len -= 3;
|
291
|
+
}
|
292
|
+
else if (*from == SS3 && len >= 3) /* code set 3 (unused ?) */
|
293
|
+
{
|
294
|
+
from++;
|
295
|
+
*to = (SS3 << 16) | (*from++ << 8);
|
296
|
+
*to |= *from++;
|
297
|
+
len -= 3;
|
298
|
+
}
|
299
|
+
else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */
|
300
|
+
{
|
301
|
+
*to = *from++ << 8;
|
302
|
+
*to |= *from++;
|
303
|
+
len -= 2;
|
304
|
+
}
|
305
|
+
else
|
306
|
+
{
|
307
|
+
*to = *from++;
|
308
|
+
len--;
|
309
|
+
}
|
310
|
+
to++;
|
311
|
+
cnt++;
|
312
|
+
}
|
313
|
+
*to = 0;
|
314
|
+
return cnt;
|
315
|
+
}
|
316
|
+
|
317
|
+
static int
|
318
|
+
pg_euccn_mblen(const unsigned char *s)
|
319
|
+
{
|
320
|
+
int len;
|
321
|
+
|
322
|
+
if (IS_HIGHBIT_SET(*s))
|
323
|
+
len = 2;
|
324
|
+
else
|
325
|
+
len = 1;
|
326
|
+
return len;
|
327
|
+
}
|
328
|
+
|
329
|
+
static int
|
330
|
+
pg_euccn_dsplen(const unsigned char *s)
|
331
|
+
{
|
332
|
+
int len;
|
333
|
+
|
334
|
+
if (IS_HIGHBIT_SET(*s))
|
335
|
+
len = 2;
|
336
|
+
else
|
337
|
+
len = pg_ascii_dsplen(s);
|
338
|
+
return len;
|
339
|
+
}
|
340
|
+
|
341
|
+
/*
|
342
|
+
* EUC_TW
|
343
|
+
*
|
344
|
+
*/
|
345
|
+
static int
|
346
|
+
pg_euctw2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
|
347
|
+
{
|
348
|
+
int cnt = 0;
|
349
|
+
|
350
|
+
while (len > 0 && *from)
|
351
|
+
{
|
352
|
+
if (*from == SS2 && len >= 4) /* code set 2 */
|
353
|
+
{
|
354
|
+
from++;
|
355
|
+
*to = (((uint32) SS2) << 24) | (*from++ << 16);
|
356
|
+
*to |= *from++ << 8;
|
357
|
+
*to |= *from++;
|
358
|
+
len -= 4;
|
359
|
+
}
|
360
|
+
else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */
|
361
|
+
{
|
362
|
+
from++;
|
363
|
+
*to = (SS3 << 16) | (*from++ << 8);
|
364
|
+
*to |= *from++;
|
365
|
+
len -= 3;
|
366
|
+
}
|
367
|
+
else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */
|
368
|
+
{
|
369
|
+
*to = *from++ << 8;
|
370
|
+
*to |= *from++;
|
371
|
+
len -= 2;
|
372
|
+
}
|
373
|
+
else
|
374
|
+
{
|
375
|
+
*to = *from++;
|
376
|
+
len--;
|
377
|
+
}
|
378
|
+
to++;
|
379
|
+
cnt++;
|
380
|
+
}
|
381
|
+
*to = 0;
|
382
|
+
return cnt;
|
383
|
+
}
|
384
|
+
|
385
|
+
static int
|
386
|
+
pg_euctw_mblen(const unsigned char *s)
|
387
|
+
{
|
388
|
+
int len;
|
389
|
+
|
390
|
+
if (*s == SS2)
|
391
|
+
len = 4;
|
392
|
+
else if (*s == SS3)
|
393
|
+
len = 3;
|
394
|
+
else if (IS_HIGHBIT_SET(*s))
|
395
|
+
len = 2;
|
396
|
+
else
|
397
|
+
len = 1;
|
398
|
+
return len;
|
399
|
+
}
|
400
|
+
|
401
|
+
static int
|
402
|
+
pg_euctw_dsplen(const unsigned char *s)
|
403
|
+
{
|
404
|
+
int len;
|
405
|
+
|
406
|
+
if (*s == SS2)
|
407
|
+
len = 2;
|
408
|
+
else if (*s == SS3)
|
409
|
+
len = 2;
|
410
|
+
else if (IS_HIGHBIT_SET(*s))
|
411
|
+
len = 2;
|
412
|
+
else
|
413
|
+
len = pg_ascii_dsplen(s);
|
414
|
+
return len;
|
415
|
+
}
|
416
|
+
|
417
|
+
/*
|
418
|
+
* Convert pg_wchar to EUC_* encoding.
|
419
|
+
* caller must allocate enough space for "to", including a trailing zero!
|
420
|
+
* len: length of from.
|
421
|
+
* "from" not necessarily null terminated.
|
422
|
+
*/
|
423
|
+
static int
|
424
|
+
pg_wchar2euc_with_len(const pg_wchar *from, unsigned char *to, int len)
|
425
|
+
{
|
426
|
+
int cnt = 0;
|
427
|
+
|
428
|
+
while (len > 0 && *from)
|
429
|
+
{
|
430
|
+
unsigned char c;
|
431
|
+
|
432
|
+
if ((c = (*from >> 24)))
|
433
|
+
{
|
434
|
+
*to++ = c;
|
435
|
+
*to++ = (*from >> 16) & 0xff;
|
436
|
+
*to++ = (*from >> 8) & 0xff;
|
437
|
+
*to++ = *from & 0xff;
|
438
|
+
cnt += 4;
|
439
|
+
}
|
440
|
+
else if ((c = (*from >> 16)))
|
441
|
+
{
|
442
|
+
*to++ = c;
|
443
|
+
*to++ = (*from >> 8) & 0xff;
|
444
|
+
*to++ = *from & 0xff;
|
445
|
+
cnt += 3;
|
446
|
+
}
|
447
|
+
else if ((c = (*from >> 8)))
|
448
|
+
{
|
449
|
+
*to++ = c;
|
450
|
+
*to++ = *from & 0xff;
|
451
|
+
cnt += 2;
|
452
|
+
}
|
453
|
+
else
|
454
|
+
{
|
455
|
+
*to++ = *from;
|
456
|
+
cnt++;
|
457
|
+
}
|
458
|
+
from++;
|
459
|
+
len--;
|
460
|
+
}
|
461
|
+
*to = 0;
|
462
|
+
return cnt;
|
463
|
+
}
|
464
|
+
|
465
|
+
|
466
|
+
/*
|
467
|
+
* JOHAB
|
468
|
+
*/
|
469
|
+
static int
|
470
|
+
pg_johab_mblen(const unsigned char *s)
|
471
|
+
{
|
472
|
+
return pg_euc_mblen(s);
|
473
|
+
}
|
474
|
+
|
475
|
+
static int
|
476
|
+
pg_johab_dsplen(const unsigned char *s)
|
477
|
+
{
|
478
|
+
return pg_euc_dsplen(s);
|
479
|
+
}
|
480
|
+
|
481
|
+
/*
|
482
|
+
* convert UTF8 string to pg_wchar (UCS-4)
|
483
|
+
* caller must allocate enough space for "to", including a trailing zero!
|
484
|
+
* len: length of from.
|
485
|
+
* "from" not necessarily null terminated.
|
486
|
+
*/
|
487
|
+
static int
|
488
|
+
pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
|
489
|
+
{
|
490
|
+
int cnt = 0;
|
491
|
+
uint32 c1,
|
492
|
+
c2,
|
493
|
+
c3,
|
494
|
+
c4;
|
495
|
+
|
496
|
+
while (len > 0 && *from)
|
497
|
+
{
|
498
|
+
if ((*from & 0x80) == 0)
|
499
|
+
{
|
500
|
+
*to = *from++;
|
501
|
+
len--;
|
502
|
+
}
|
503
|
+
else if ((*from & 0xe0) == 0xc0)
|
504
|
+
{
|
505
|
+
if (len < 2)
|
506
|
+
break; /* drop trailing incomplete char */
|
507
|
+
c1 = *from++ & 0x1f;
|
508
|
+
c2 = *from++ & 0x3f;
|
509
|
+
*to = (c1 << 6) | c2;
|
510
|
+
len -= 2;
|
511
|
+
}
|
512
|
+
else if ((*from & 0xf0) == 0xe0)
|
513
|
+
{
|
514
|
+
if (len < 3)
|
515
|
+
break; /* drop trailing incomplete char */
|
516
|
+
c1 = *from++ & 0x0f;
|
517
|
+
c2 = *from++ & 0x3f;
|
518
|
+
c3 = *from++ & 0x3f;
|
519
|
+
*to = (c1 << 12) | (c2 << 6) | c3;
|
520
|
+
len -= 3;
|
521
|
+
}
|
522
|
+
else if ((*from & 0xf8) == 0xf0)
|
523
|
+
{
|
524
|
+
if (len < 4)
|
525
|
+
break; /* drop trailing incomplete char */
|
526
|
+
c1 = *from++ & 0x07;
|
527
|
+
c2 = *from++ & 0x3f;
|
528
|
+
c3 = *from++ & 0x3f;
|
529
|
+
c4 = *from++ & 0x3f;
|
530
|
+
*to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
|
531
|
+
len -= 4;
|
532
|
+
}
|
533
|
+
else
|
534
|
+
{
|
535
|
+
/* treat a bogus char as length 1; not ours to raise error */
|
536
|
+
*to = *from++;
|
537
|
+
len--;
|
538
|
+
}
|
539
|
+
to++;
|
540
|
+
cnt++;
|
541
|
+
}
|
542
|
+
*to = 0;
|
543
|
+
return cnt;
|
544
|
+
}
|
545
|
+
|
546
|
+
|
547
|
+
/*
|
548
|
+
* Map a Unicode code point to UTF-8. utf8string must have 4 bytes of
|
549
|
+
* space allocated.
|
550
|
+
*/
|
551
|
+
unsigned char *
|
552
|
+
unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
|
553
|
+
{
|
554
|
+
if (c <= 0x7F)
|
555
|
+
{
|
556
|
+
utf8string[0] = c;
|
557
|
+
}
|
558
|
+
else if (c <= 0x7FF)
|
559
|
+
{
|
560
|
+
utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
|
561
|
+
utf8string[1] = 0x80 | (c & 0x3F);
|
562
|
+
}
|
563
|
+
else if (c <= 0xFFFF)
|
564
|
+
{
|
565
|
+
utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
|
566
|
+
utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
|
567
|
+
utf8string[2] = 0x80 | (c & 0x3F);
|
568
|
+
}
|
569
|
+
else
|
570
|
+
{
|
571
|
+
utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
|
572
|
+
utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
|
573
|
+
utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
|
574
|
+
utf8string[3] = 0x80 | (c & 0x3F);
|
575
|
+
}
|
576
|
+
|
577
|
+
return utf8string;
|
578
|
+
}
|
579
|
+
|
580
|
+
/*
|
581
|
+
* Trivial conversion from pg_wchar to UTF-8.
|
582
|
+
* caller should allocate enough space for "to"
|
583
|
+
* len: length of from.
|
584
|
+
* "from" not necessarily null terminated.
|
585
|
+
*/
|
586
|
+
static int
|
587
|
+
pg_wchar2utf_with_len(const pg_wchar *from, unsigned char *to, int len)
|
588
|
+
{
|
589
|
+
int cnt = 0;
|
590
|
+
|
591
|
+
while (len > 0 && *from)
|
592
|
+
{
|
593
|
+
int char_len;
|
594
|
+
|
595
|
+
unicode_to_utf8(*from, to);
|
596
|
+
char_len = pg_utf_mblen(to);
|
597
|
+
cnt += char_len;
|
598
|
+
to += char_len;
|
599
|
+
from++;
|
600
|
+
len--;
|
601
|
+
}
|
602
|
+
*to = 0;
|
603
|
+
return cnt;
|
604
|
+
}
|
605
|
+
|
606
|
+
/*
|
607
|
+
* Return the byte length of a UTF8 character pointed to by s
|
608
|
+
*
|
609
|
+
* Note: in the current implementation we do not support UTF8 sequences
|
610
|
+
* of more than 4 bytes; hence do NOT return a value larger than 4.
|
611
|
+
* We return "1" for any leading byte that is either flat-out illegal or
|
612
|
+
* indicates a length larger than we support.
|
613
|
+
*
|
614
|
+
* pg_utf2wchar_with_len(), utf8_to_unicode(), pg_utf8_islegal(), and perhaps
|
615
|
+
* other places would need to be fixed to change this.
|
616
|
+
*/
|
617
|
+
int
|
618
|
+
pg_utf_mblen(const unsigned char *s)
|
619
|
+
{
|
620
|
+
int len;
|
621
|
+
|
622
|
+
if ((*s & 0x80) == 0)
|
623
|
+
len = 1;
|
624
|
+
else if ((*s & 0xe0) == 0xc0)
|
625
|
+
len = 2;
|
626
|
+
else if ((*s & 0xf0) == 0xe0)
|
627
|
+
len = 3;
|
628
|
+
else if ((*s & 0xf8) == 0xf0)
|
629
|
+
len = 4;
|
630
|
+
#ifdef NOT_USED
|
631
|
+
else if ((*s & 0xfc) == 0xf8)
|
632
|
+
len = 5;
|
633
|
+
else if ((*s & 0xfe) == 0xfc)
|
634
|
+
len = 6;
|
635
|
+
#endif
|
636
|
+
else
|
637
|
+
len = 1;
|
638
|
+
return len;
|
639
|
+
}
|
640
|
+
|
641
|
+
/*
|
642
|
+
* This is an implementation of wcwidth() and wcswidth() as defined in
|
643
|
+
* "The Single UNIX Specification, Version 2, The Open Group, 1997"
|
644
|
+
* <http://www.unix.org/online.html>
|
645
|
+
*
|
646
|
+
* Markus Kuhn -- 2001-09-08 -- public domain
|
647
|
+
*
|
648
|
+
* customised for PostgreSQL
|
649
|
+
*
|
650
|
+
* original available at : http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
|
651
|
+
*/
|
652
|
+
|
653
|
+
struct mbinterval
|
654
|
+
{
|
655
|
+
unsigned short first;
|
656
|
+
unsigned short last;
|
657
|
+
};
|
658
|
+
|
659
|
+
/* auxiliary function for binary search in interval table */
|
660
|
+
static int
|
661
|
+
mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
|
662
|
+
{
|
663
|
+
int min = 0;
|
664
|
+
int mid;
|
665
|
+
|
666
|
+
if (ucs < table[0].first || ucs > table[max].last)
|
667
|
+
return 0;
|
668
|
+
while (max >= min)
|
669
|
+
{
|
670
|
+
mid = (min + max) / 2;
|
671
|
+
if (ucs > table[mid].last)
|
672
|
+
min = mid + 1;
|
673
|
+
else if (ucs < table[mid].first)
|
674
|
+
max = mid - 1;
|
675
|
+
else
|
676
|
+
return 1;
|
677
|
+
}
|
678
|
+
|
679
|
+
return 0;
|
680
|
+
}
|
681
|
+
|
682
|
+
|
683
|
+
/* The following functions define the column width of an ISO 10646
|
684
|
+
* character as follows:
|
685
|
+
*
|
686
|
+
* - The null character (U+0000) has a column width of 0.
|
687
|
+
*
|
688
|
+
* - Other C0/C1 control characters and DEL will lead to a return
|
689
|
+
* value of -1.
|
690
|
+
*
|
691
|
+
* - Non-spacing and enclosing combining characters (general
|
692
|
+
* category code Mn or Me in the Unicode database) have a
|
693
|
+
* column width of 0.
|
694
|
+
*
|
695
|
+
* - Other format characters (general category code Cf in the Unicode
|
696
|
+
* database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
|
697
|
+
*
|
698
|
+
* - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
|
699
|
+
* have a column width of 0.
|
700
|
+
*
|
701
|
+
* - Spacing characters in the East Asian Wide (W) or East Asian
|
702
|
+
* FullWidth (F) category as defined in Unicode Technical
|
703
|
+
* Report #11 have a column width of 2.
|
704
|
+
*
|
705
|
+
* - All remaining characters (including all printable
|
706
|
+
* ISO 8859-1 and WGL4 characters, Unicode control characters,
|
707
|
+
* etc.) have a column width of 1.
|
708
|
+
*
|
709
|
+
* This implementation assumes that wchar_t characters are encoded
|
710
|
+
* in ISO 10646.
|
711
|
+
*/
|
712
|
+
|
713
|
+
static int
|
714
|
+
ucs_wcwidth(pg_wchar ucs)
|
715
|
+
{
|
716
|
+
#include "common/unicode_combining_table.h"
|
717
|
+
|
718
|
+
/* test for 8-bit control characters */
|
719
|
+
if (ucs == 0)
|
720
|
+
return 0;
|
721
|
+
|
722
|
+
if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
|
723
|
+
return -1;
|
724
|
+
|
725
|
+
/* binary search in table of non-spacing characters */
|
726
|
+
if (mbbisearch(ucs, combining,
|
727
|
+
sizeof(combining) / sizeof(struct mbinterval) - 1))
|
728
|
+
return 0;
|
729
|
+
|
730
|
+
/*
|
731
|
+
* if we arrive here, ucs is not a combining or C0/C1 control character
|
732
|
+
*/
|
733
|
+
|
734
|
+
return 1 +
|
735
|
+
(ucs >= 0x1100 &&
|
736
|
+
(ucs <= 0x115f || /* Hangul Jamo init. consonants */
|
737
|
+
(ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
|
738
|
+
ucs != 0x303f) || /* CJK ... Yi */
|
739
|
+
(ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
|
740
|
+
(ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility
|
741
|
+
* Ideographs */
|
742
|
+
(ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
|
743
|
+
(ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
|
744
|
+
(ucs >= 0xffe0 && ucs <= 0xffe6) ||
|
745
|
+
(ucs >= 0x20000 && ucs <= 0x2ffff)));
|
746
|
+
}
|
747
|
+
|
748
|
+
/*
|
749
|
+
* Convert a UTF-8 character to a Unicode code point.
|
750
|
+
* This is a one-character version of pg_utf2wchar_with_len.
|
751
|
+
*
|
752
|
+
* No error checks here, c must point to a long-enough string.
|
753
|
+
*/
|
754
|
+
pg_wchar
|
755
|
+
utf8_to_unicode(const unsigned char *c)
|
756
|
+
{
|
757
|
+
if ((*c & 0x80) == 0)
|
758
|
+
return (pg_wchar) c[0];
|
759
|
+
else if ((*c & 0xe0) == 0xc0)
|
760
|
+
return (pg_wchar) (((c[0] & 0x1f) << 6) |
|
761
|
+
(c[1] & 0x3f));
|
762
|
+
else if ((*c & 0xf0) == 0xe0)
|
763
|
+
return (pg_wchar) (((c[0] & 0x0f) << 12) |
|
764
|
+
((c[1] & 0x3f) << 6) |
|
765
|
+
(c[2] & 0x3f));
|
766
|
+
else if ((*c & 0xf8) == 0xf0)
|
767
|
+
return (pg_wchar) (((c[0] & 0x07) << 18) |
|
768
|
+
((c[1] & 0x3f) << 12) |
|
769
|
+
((c[2] & 0x3f) << 6) |
|
770
|
+
(c[3] & 0x3f));
|
771
|
+
else
|
772
|
+
/* that is an invalid code on purpose */
|
773
|
+
return 0xffffffff;
|
774
|
+
}
|
775
|
+
|
776
|
+
static int
|
777
|
+
pg_utf_dsplen(const unsigned char *s)
|
778
|
+
{
|
779
|
+
return ucs_wcwidth(utf8_to_unicode(s));
|
780
|
+
}
|
781
|
+
|
782
|
+
/*
|
783
|
+
* convert mule internal code to pg_wchar
|
784
|
+
* caller should allocate enough space for "to"
|
785
|
+
* len: length of from.
|
786
|
+
* "from" not necessarily null terminated.
|
787
|
+
*/
|
788
|
+
static int
|
789
|
+
pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
|
790
|
+
{
|
791
|
+
int cnt = 0;
|
792
|
+
|
793
|
+
while (len > 0 && *from)
|
794
|
+
{
|
795
|
+
if (IS_LC1(*from) && len >= 2)
|
796
|
+
{
|
797
|
+
*to = *from++ << 16;
|
798
|
+
*to |= *from++;
|
799
|
+
len -= 2;
|
800
|
+
}
|
801
|
+
else if (IS_LCPRV1(*from) && len >= 3)
|
802
|
+
{
|
803
|
+
from++;
|
804
|
+
*to = *from++ << 16;
|
805
|
+
*to |= *from++;
|
806
|
+
len -= 3;
|
807
|
+
}
|
808
|
+
else if (IS_LC2(*from) && len >= 3)
|
809
|
+
{
|
810
|
+
*to = *from++ << 16;
|
811
|
+
*to |= *from++ << 8;
|
812
|
+
*to |= *from++;
|
813
|
+
len -= 3;
|
814
|
+
}
|
815
|
+
else if (IS_LCPRV2(*from) && len >= 4)
|
816
|
+
{
|
817
|
+
from++;
|
818
|
+
*to = *from++ << 16;
|
819
|
+
*to |= *from++ << 8;
|
820
|
+
*to |= *from++;
|
821
|
+
len -= 4;
|
822
|
+
}
|
823
|
+
else
|
824
|
+
{ /* assume ASCII */
|
825
|
+
*to = (unsigned char) *from++;
|
826
|
+
len--;
|
827
|
+
}
|
828
|
+
to++;
|
829
|
+
cnt++;
|
830
|
+
}
|
831
|
+
*to = 0;
|
832
|
+
return cnt;
|
833
|
+
}
|
834
|
+
|
835
|
+
/*
|
836
|
+
* convert pg_wchar to mule internal code
|
837
|
+
* caller should allocate enough space for "to"
|
838
|
+
* len: length of from.
|
839
|
+
* "from" not necessarily null terminated.
|
840
|
+
*/
|
841
|
+
static int
|
842
|
+
pg_wchar2mule_with_len(const pg_wchar *from, unsigned char *to, int len)
|
843
|
+
{
|
844
|
+
int cnt = 0;
|
845
|
+
|
846
|
+
while (len > 0 && *from)
|
847
|
+
{
|
848
|
+
unsigned char lb;
|
849
|
+
|
850
|
+
lb = (*from >> 16) & 0xff;
|
851
|
+
if (IS_LC1(lb))
|
852
|
+
{
|
853
|
+
*to++ = lb;
|
854
|
+
*to++ = *from & 0xff;
|
855
|
+
cnt += 2;
|
856
|
+
}
|
857
|
+
else if (IS_LC2(lb))
|
858
|
+
{
|
859
|
+
*to++ = lb;
|
860
|
+
*to++ = (*from >> 8) & 0xff;
|
861
|
+
*to++ = *from & 0xff;
|
862
|
+
cnt += 3;
|
863
|
+
}
|
864
|
+
else if (IS_LCPRV1_A_RANGE(lb))
|
865
|
+
{
|
866
|
+
*to++ = LCPRV1_A;
|
867
|
+
*to++ = lb;
|
868
|
+
*to++ = *from & 0xff;
|
869
|
+
cnt += 3;
|
870
|
+
}
|
871
|
+
else if (IS_LCPRV1_B_RANGE(lb))
|
872
|
+
{
|
873
|
+
*to++ = LCPRV1_B;
|
874
|
+
*to++ = lb;
|
875
|
+
*to++ = *from & 0xff;
|
876
|
+
cnt += 3;
|
877
|
+
}
|
878
|
+
else if (IS_LCPRV2_A_RANGE(lb))
|
879
|
+
{
|
880
|
+
*to++ = LCPRV2_A;
|
881
|
+
*to++ = lb;
|
882
|
+
*to++ = (*from >> 8) & 0xff;
|
883
|
+
*to++ = *from & 0xff;
|
884
|
+
cnt += 4;
|
885
|
+
}
|
886
|
+
else if (IS_LCPRV2_B_RANGE(lb))
|
887
|
+
{
|
888
|
+
*to++ = LCPRV2_B;
|
889
|
+
*to++ = lb;
|
890
|
+
*to++ = (*from >> 8) & 0xff;
|
891
|
+
*to++ = *from & 0xff;
|
892
|
+
cnt += 4;
|
893
|
+
}
|
894
|
+
else
|
895
|
+
{
|
896
|
+
*to++ = *from & 0xff;
|
897
|
+
cnt += 1;
|
898
|
+
}
|
899
|
+
from++;
|
900
|
+
len--;
|
901
|
+
}
|
902
|
+
*to = 0;
|
903
|
+
return cnt;
|
904
|
+
}
|
905
|
+
|
906
|
+
/* exported for direct use by conv.c */
|
907
|
+
int
|
908
|
+
pg_mule_mblen(const unsigned char *s)
|
909
|
+
{
|
910
|
+
int len;
|
911
|
+
|
912
|
+
if (IS_LC1(*s))
|
913
|
+
len = 2;
|
914
|
+
else if (IS_LCPRV1(*s))
|
915
|
+
len = 3;
|
916
|
+
else if (IS_LC2(*s))
|
917
|
+
len = 3;
|
918
|
+
else if (IS_LCPRV2(*s))
|
919
|
+
len = 4;
|
920
|
+
else
|
921
|
+
len = 1; /* assume ASCII */
|
922
|
+
return len;
|
923
|
+
}
|
924
|
+
|
925
|
+
static int
|
926
|
+
pg_mule_dsplen(const unsigned char *s)
|
927
|
+
{
|
928
|
+
int len;
|
929
|
+
|
930
|
+
/*
|
931
|
+
* Note: it's not really appropriate to assume that all multibyte charsets
|
932
|
+
* are double-wide on screen. But this seems an okay approximation for
|
933
|
+
* the MULE charsets we currently support.
|
934
|
+
*/
|
935
|
+
|
936
|
+
if (IS_LC1(*s))
|
937
|
+
len = 1;
|
938
|
+
else if (IS_LCPRV1(*s))
|
939
|
+
len = 1;
|
940
|
+
else if (IS_LC2(*s))
|
941
|
+
len = 2;
|
942
|
+
else if (IS_LCPRV2(*s))
|
943
|
+
len = 2;
|
944
|
+
else
|
945
|
+
len = 1; /* assume ASCII */
|
946
|
+
|
947
|
+
return len;
|
948
|
+
}
|
949
|
+
|
950
|
+
/*
|
951
|
+
* ISO8859-1
|
952
|
+
*/
|
953
|
+
static int
|
954
|
+
pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
|
955
|
+
{
|
956
|
+
int cnt = 0;
|
957
|
+
|
958
|
+
while (len > 0 && *from)
|
959
|
+
{
|
960
|
+
*to++ = *from++;
|
961
|
+
len--;
|
962
|
+
cnt++;
|
963
|
+
}
|
964
|
+
*to = 0;
|
965
|
+
return cnt;
|
966
|
+
}
|
967
|
+
|
968
|
+
/*
|
969
|
+
* Trivial conversion from pg_wchar to single byte encoding. Just ignores
|
970
|
+
* high bits.
|
971
|
+
* caller should allocate enough space for "to"
|
972
|
+
* len: length of from.
|
973
|
+
* "from" not necessarily null terminated.
|
974
|
+
*/
|
975
|
+
static int
|
976
|
+
pg_wchar2single_with_len(const pg_wchar *from, unsigned char *to, int len)
|
977
|
+
{
|
978
|
+
int cnt = 0;
|
979
|
+
|
980
|
+
while (len > 0 && *from)
|
981
|
+
{
|
982
|
+
*to++ = *from++;
|
983
|
+
len--;
|
984
|
+
cnt++;
|
985
|
+
}
|
986
|
+
*to = 0;
|
987
|
+
return cnt;
|
988
|
+
}
|
989
|
+
|
990
|
+
static int
|
991
|
+
pg_latin1_mblen(const unsigned char *s)
|
992
|
+
{
|
993
|
+
return 1;
|
994
|
+
}
|
995
|
+
|
996
|
+
static int
|
997
|
+
pg_latin1_dsplen(const unsigned char *s)
|
998
|
+
{
|
999
|
+
return pg_ascii_dsplen(s);
|
1000
|
+
}
|
1001
|
+
|
1002
|
+
/*
|
1003
|
+
* SJIS
|
1004
|
+
*/
|
1005
|
+
static int
|
1006
|
+
pg_sjis_mblen(const unsigned char *s)
|
1007
|
+
{
|
1008
|
+
int len;
|
1009
|
+
|
1010
|
+
if (*s >= 0xa1 && *s <= 0xdf)
|
1011
|
+
len = 1; /* 1 byte kana? */
|
1012
|
+
else if (IS_HIGHBIT_SET(*s))
|
1013
|
+
len = 2; /* kanji? */
|
1014
|
+
else
|
1015
|
+
len = 1; /* should be ASCII */
|
1016
|
+
return len;
|
1017
|
+
}
|
1018
|
+
|
1019
|
+
static int
|
1020
|
+
pg_sjis_dsplen(const unsigned char *s)
|
1021
|
+
{
|
1022
|
+
int len;
|
1023
|
+
|
1024
|
+
if (*s >= 0xa1 && *s <= 0xdf)
|
1025
|
+
len = 1; /* 1 byte kana? */
|
1026
|
+
else if (IS_HIGHBIT_SET(*s))
|
1027
|
+
len = 2; /* kanji? */
|
1028
|
+
else
|
1029
|
+
len = pg_ascii_dsplen(s); /* should be ASCII */
|
1030
|
+
return len;
|
1031
|
+
}
|
1032
|
+
|
1033
|
+
/*
|
1034
|
+
* Big5
|
1035
|
+
*/
|
1036
|
+
static int
|
1037
|
+
pg_big5_mblen(const unsigned char *s)
|
1038
|
+
{
|
1039
|
+
int len;
|
1040
|
+
|
1041
|
+
if (IS_HIGHBIT_SET(*s))
|
1042
|
+
len = 2; /* kanji? */
|
1043
|
+
else
|
1044
|
+
len = 1; /* should be ASCII */
|
1045
|
+
return len;
|
1046
|
+
}
|
1047
|
+
|
1048
|
+
static int
|
1049
|
+
pg_big5_dsplen(const unsigned char *s)
|
1050
|
+
{
|
1051
|
+
int len;
|
1052
|
+
|
1053
|
+
if (IS_HIGHBIT_SET(*s))
|
1054
|
+
len = 2; /* kanji? */
|
1055
|
+
else
|
1056
|
+
len = pg_ascii_dsplen(s); /* should be ASCII */
|
1057
|
+
return len;
|
1058
|
+
}
|
1059
|
+
|
1060
|
+
/*
|
1061
|
+
* GBK
|
1062
|
+
*/
|
1063
|
+
static int
|
1064
|
+
pg_gbk_mblen(const unsigned char *s)
|
1065
|
+
{
|
1066
|
+
int len;
|
1067
|
+
|
1068
|
+
if (IS_HIGHBIT_SET(*s))
|
1069
|
+
len = 2; /* kanji? */
|
1070
|
+
else
|
1071
|
+
len = 1; /* should be ASCII */
|
1072
|
+
return len;
|
1073
|
+
}
|
1074
|
+
|
1075
|
+
static int
|
1076
|
+
pg_gbk_dsplen(const unsigned char *s)
|
1077
|
+
{
|
1078
|
+
int len;
|
1079
|
+
|
1080
|
+
if (IS_HIGHBIT_SET(*s))
|
1081
|
+
len = 2; /* kanji? */
|
1082
|
+
else
|
1083
|
+
len = pg_ascii_dsplen(s); /* should be ASCII */
|
1084
|
+
return len;
|
1085
|
+
}
|
1086
|
+
|
1087
|
+
/*
|
1088
|
+
* UHC
|
1089
|
+
*/
|
1090
|
+
static int
|
1091
|
+
pg_uhc_mblen(const unsigned char *s)
|
1092
|
+
{
|
1093
|
+
int len;
|
1094
|
+
|
1095
|
+
if (IS_HIGHBIT_SET(*s))
|
1096
|
+
len = 2; /* 2byte? */
|
1097
|
+
else
|
1098
|
+
len = 1; /* should be ASCII */
|
1099
|
+
return len;
|
1100
|
+
}
|
1101
|
+
|
1102
|
+
static int
|
1103
|
+
pg_uhc_dsplen(const unsigned char *s)
|
1104
|
+
{
|
1105
|
+
int len;
|
1106
|
+
|
1107
|
+
if (IS_HIGHBIT_SET(*s))
|
1108
|
+
len = 2; /* 2byte? */
|
1109
|
+
else
|
1110
|
+
len = pg_ascii_dsplen(s); /* should be ASCII */
|
1111
|
+
return len;
|
1112
|
+
}
|
1113
|
+
|
1114
|
+
/*
|
1115
|
+
* GB18030
|
1116
|
+
* Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp>
|
1117
|
+
*/
|
1118
|
+
|
1119
|
+
/*
|
1120
|
+
* Unlike all other mblen() functions, this also looks at the second byte of
|
1121
|
+
* the input. However, if you only pass the first byte of a multi-byte
|
1122
|
+
* string, and \0 as the second byte, this still works in a predictable way:
|
1123
|
+
* a 4-byte character will be reported as two 2-byte characters. That's
|
1124
|
+
* enough for all current uses, as a client-only encoding. It works that
|
1125
|
+
* way, because in any valid 4-byte GB18030-encoded character, the third and
|
1126
|
+
* fourth byte look like a 2-byte encoded character, when looked at
|
1127
|
+
* separately.
|
1128
|
+
*/
|
1129
|
+
static int
|
1130
|
+
pg_gb18030_mblen(const unsigned char *s)
|
1131
|
+
{
|
1132
|
+
int len;
|
1133
|
+
|
1134
|
+
if (!IS_HIGHBIT_SET(*s))
|
1135
|
+
len = 1; /* ASCII */
|
1136
|
+
else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
|
1137
|
+
len = 4;
|
1138
|
+
else
|
1139
|
+
len = 2;
|
1140
|
+
return len;
|
1141
|
+
}
|
1142
|
+
|
1143
|
+
static int
|
1144
|
+
pg_gb18030_dsplen(const unsigned char *s)
|
1145
|
+
{
|
1146
|
+
int len;
|
1147
|
+
|
1148
|
+
if (IS_HIGHBIT_SET(*s))
|
1149
|
+
len = 2;
|
1150
|
+
else
|
1151
|
+
len = pg_ascii_dsplen(s); /* ASCII */
|
1152
|
+
return len;
|
1153
|
+
}
|
1154
|
+
|
1155
|
+
/*
|
1156
|
+
*-------------------------------------------------------------------
|
1157
|
+
* multibyte sequence validators
|
1158
|
+
*
|
1159
|
+
* These functions accept "s", a pointer to the first byte of a string,
|
1160
|
+
* and "len", the remaining length of the string. If there is a validly
|
1161
|
+
* encoded character beginning at *s, return its length in bytes; else
|
1162
|
+
* return -1.
|
1163
|
+
*
|
1164
|
+
* The functions can assume that len > 0 and that *s != '\0', but they must
|
1165
|
+
* test for and reject zeroes in any additional bytes of a multibyte character.
|
1166
|
+
*
|
1167
|
+
* Note that this definition allows the function for a single-byte
|
1168
|
+
* encoding to be just "return 1".
|
1169
|
+
*-------------------------------------------------------------------
|
1170
|
+
*/
|
1171
|
+
|
1172
|
+
static int
|
1173
|
+
pg_ascii_verifier(const unsigned char *s, int len)
|
1174
|
+
{
|
1175
|
+
return 1;
|
1176
|
+
}
|
1177
|
+
|
1178
|
+
#define IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe)
|
1179
|
+
|
1180
|
+
static int
|
1181
|
+
pg_eucjp_verifier(const unsigned char *s, int len)
|
1182
|
+
{
|
1183
|
+
int l;
|
1184
|
+
unsigned char c1,
|
1185
|
+
c2;
|
1186
|
+
|
1187
|
+
c1 = *s++;
|
1188
|
+
|
1189
|
+
switch (c1)
|
1190
|
+
{
|
1191
|
+
case SS2: /* JIS X 0201 */
|
1192
|
+
l = 2;
|
1193
|
+
if (l > len)
|
1194
|
+
return -1;
|
1195
|
+
c2 = *s++;
|
1196
|
+
if (c2 < 0xa1 || c2 > 0xdf)
|
1197
|
+
return -1;
|
1198
|
+
break;
|
1199
|
+
|
1200
|
+
case SS3: /* JIS X 0212 */
|
1201
|
+
l = 3;
|
1202
|
+
if (l > len)
|
1203
|
+
return -1;
|
1204
|
+
c2 = *s++;
|
1205
|
+
if (!IS_EUC_RANGE_VALID(c2))
|
1206
|
+
return -1;
|
1207
|
+
c2 = *s++;
|
1208
|
+
if (!IS_EUC_RANGE_VALID(c2))
|
1209
|
+
return -1;
|
1210
|
+
break;
|
1211
|
+
|
1212
|
+
default:
|
1213
|
+
if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
|
1214
|
+
{
|
1215
|
+
l = 2;
|
1216
|
+
if (l > len)
|
1217
|
+
return -1;
|
1218
|
+
if (!IS_EUC_RANGE_VALID(c1))
|
1219
|
+
return -1;
|
1220
|
+
c2 = *s++;
|
1221
|
+
if (!IS_EUC_RANGE_VALID(c2))
|
1222
|
+
return -1;
|
1223
|
+
}
|
1224
|
+
else
|
1225
|
+
/* must be ASCII */
|
1226
|
+
{
|
1227
|
+
l = 1;
|
1228
|
+
}
|
1229
|
+
break;
|
1230
|
+
}
|
1231
|
+
|
1232
|
+
return l;
|
1233
|
+
}
|
1234
|
+
|
1235
|
+
static int
|
1236
|
+
pg_euckr_verifier(const unsigned char *s, int len)
|
1237
|
+
{
|
1238
|
+
int l;
|
1239
|
+
unsigned char c1,
|
1240
|
+
c2;
|
1241
|
+
|
1242
|
+
c1 = *s++;
|
1243
|
+
|
1244
|
+
if (IS_HIGHBIT_SET(c1))
|
1245
|
+
{
|
1246
|
+
l = 2;
|
1247
|
+
if (l > len)
|
1248
|
+
return -1;
|
1249
|
+
if (!IS_EUC_RANGE_VALID(c1))
|
1250
|
+
return -1;
|
1251
|
+
c2 = *s++;
|
1252
|
+
if (!IS_EUC_RANGE_VALID(c2))
|
1253
|
+
return -1;
|
1254
|
+
}
|
1255
|
+
else
|
1256
|
+
/* must be ASCII */
|
1257
|
+
{
|
1258
|
+
l = 1;
|
1259
|
+
}
|
1260
|
+
|
1261
|
+
return l;
|
1262
|
+
}
|
1263
|
+
|
1264
|
+
/* EUC-CN byte sequences are exactly same as EUC-KR */
|
1265
|
+
#define pg_euccn_verifier pg_euckr_verifier
|
1266
|
+
|
1267
|
+
static int
|
1268
|
+
pg_euctw_verifier(const unsigned char *s, int len)
|
1269
|
+
{
|
1270
|
+
int l;
|
1271
|
+
unsigned char c1,
|
1272
|
+
c2;
|
1273
|
+
|
1274
|
+
c1 = *s++;
|
1275
|
+
|
1276
|
+
switch (c1)
|
1277
|
+
{
|
1278
|
+
case SS2: /* CNS 11643 Plane 1-7 */
|
1279
|
+
l = 4;
|
1280
|
+
if (l > len)
|
1281
|
+
return -1;
|
1282
|
+
c2 = *s++;
|
1283
|
+
if (c2 < 0xa1 || c2 > 0xa7)
|
1284
|
+
return -1;
|
1285
|
+
c2 = *s++;
|
1286
|
+
if (!IS_EUC_RANGE_VALID(c2))
|
1287
|
+
return -1;
|
1288
|
+
c2 = *s++;
|
1289
|
+
if (!IS_EUC_RANGE_VALID(c2))
|
1290
|
+
return -1;
|
1291
|
+
break;
|
1292
|
+
|
1293
|
+
case SS3: /* unused */
|
1294
|
+
return -1;
|
1295
|
+
|
1296
|
+
default:
|
1297
|
+
if (IS_HIGHBIT_SET(c1)) /* CNS 11643 Plane 1 */
|
1298
|
+
{
|
1299
|
+
l = 2;
|
1300
|
+
if (l > len)
|
1301
|
+
return -1;
|
1302
|
+
/* no further range check on c1? */
|
1303
|
+
c2 = *s++;
|
1304
|
+
if (!IS_EUC_RANGE_VALID(c2))
|
1305
|
+
return -1;
|
1306
|
+
}
|
1307
|
+
else
|
1308
|
+
/* must be ASCII */
|
1309
|
+
{
|
1310
|
+
l = 1;
|
1311
|
+
}
|
1312
|
+
break;
|
1313
|
+
}
|
1314
|
+
return l;
|
1315
|
+
}
|
1316
|
+
|
1317
|
+
static int
|
1318
|
+
pg_johab_verifier(const unsigned char *s, int len)
|
1319
|
+
{
|
1320
|
+
int l,
|
1321
|
+
mbl;
|
1322
|
+
unsigned char c;
|
1323
|
+
|
1324
|
+
l = mbl = pg_johab_mblen(s);
|
1325
|
+
|
1326
|
+
if (len < l)
|
1327
|
+
return -1;
|
1328
|
+
|
1329
|
+
if (!IS_HIGHBIT_SET(*s))
|
1330
|
+
return mbl;
|
1331
|
+
|
1332
|
+
while (--l > 0)
|
1333
|
+
{
|
1334
|
+
c = *++s;
|
1335
|
+
if (!IS_EUC_RANGE_VALID(c))
|
1336
|
+
return -1;
|
1337
|
+
}
|
1338
|
+
return mbl;
|
1339
|
+
}
|
1340
|
+
|
1341
|
+
static int
|
1342
|
+
pg_mule_verifier(const unsigned char *s, int len)
|
1343
|
+
{
|
1344
|
+
int l,
|
1345
|
+
mbl;
|
1346
|
+
unsigned char c;
|
1347
|
+
|
1348
|
+
l = mbl = pg_mule_mblen(s);
|
1349
|
+
|
1350
|
+
if (len < l)
|
1351
|
+
return -1;
|
1352
|
+
|
1353
|
+
while (--l > 0)
|
1354
|
+
{
|
1355
|
+
c = *++s;
|
1356
|
+
if (!IS_HIGHBIT_SET(c))
|
1357
|
+
return -1;
|
1358
|
+
}
|
1359
|
+
return mbl;
|
1360
|
+
}
|
1361
|
+
|
1362
|
+
static int
|
1363
|
+
pg_latin1_verifier(const unsigned char *s, int len)
|
1364
|
+
{
|
1365
|
+
return 1;
|
1366
|
+
}
|
1367
|
+
|
1368
|
+
static int
|
1369
|
+
pg_sjis_verifier(const unsigned char *s, int len)
|
1370
|
+
{
|
1371
|
+
int l,
|
1372
|
+
mbl;
|
1373
|
+
unsigned char c1,
|
1374
|
+
c2;
|
1375
|
+
|
1376
|
+
l = mbl = pg_sjis_mblen(s);
|
1377
|
+
|
1378
|
+
if (len < l)
|
1379
|
+
return -1;
|
1380
|
+
|
1381
|
+
if (l == 1) /* pg_sjis_mblen already verified it */
|
1382
|
+
return mbl;
|
1383
|
+
|
1384
|
+
c1 = *s++;
|
1385
|
+
c2 = *s;
|
1386
|
+
if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2))
|
1387
|
+
return -1;
|
1388
|
+
return mbl;
|
1389
|
+
}
|
1390
|
+
|
1391
|
+
static int
|
1392
|
+
pg_big5_verifier(const unsigned char *s, int len)
|
1393
|
+
{
|
1394
|
+
int l,
|
1395
|
+
mbl;
|
1396
|
+
|
1397
|
+
l = mbl = pg_big5_mblen(s);
|
1398
|
+
|
1399
|
+
if (len < l)
|
1400
|
+
return -1;
|
1401
|
+
|
1402
|
+
while (--l > 0)
|
1403
|
+
{
|
1404
|
+
if (*++s == '\0')
|
1405
|
+
return -1;
|
1406
|
+
}
|
1407
|
+
|
1408
|
+
return mbl;
|
1409
|
+
}
|
1410
|
+
|
1411
|
+
static int
|
1412
|
+
pg_gbk_verifier(const unsigned char *s, int len)
|
1413
|
+
{
|
1414
|
+
int l,
|
1415
|
+
mbl;
|
1416
|
+
|
1417
|
+
l = mbl = pg_gbk_mblen(s);
|
1418
|
+
|
1419
|
+
if (len < l)
|
1420
|
+
return -1;
|
1421
|
+
|
1422
|
+
while (--l > 0)
|
1423
|
+
{
|
1424
|
+
if (*++s == '\0')
|
1425
|
+
return -1;
|
1426
|
+
}
|
1427
|
+
|
1428
|
+
return mbl;
|
1429
|
+
}
|
1430
|
+
|
1431
|
+
static int
|
1432
|
+
pg_uhc_verifier(const unsigned char *s, int len)
|
1433
|
+
{
|
1434
|
+
int l,
|
1435
|
+
mbl;
|
1436
|
+
|
1437
|
+
l = mbl = pg_uhc_mblen(s);
|
1438
|
+
|
1439
|
+
if (len < l)
|
1440
|
+
return -1;
|
1441
|
+
|
1442
|
+
while (--l > 0)
|
1443
|
+
{
|
1444
|
+
if (*++s == '\0')
|
1445
|
+
return -1;
|
1446
|
+
}
|
1447
|
+
|
1448
|
+
return mbl;
|
1449
|
+
}
|
1450
|
+
|
1451
|
+
static int
|
1452
|
+
pg_gb18030_verifier(const unsigned char *s, int len)
|
1453
|
+
{
|
1454
|
+
int l;
|
1455
|
+
|
1456
|
+
if (!IS_HIGHBIT_SET(*s))
|
1457
|
+
l = 1; /* ASCII */
|
1458
|
+
else if (len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
|
1459
|
+
{
|
1460
|
+
/* Should be 4-byte, validate remaining bytes */
|
1461
|
+
if (*s >= 0x81 && *s <= 0xfe &&
|
1462
|
+
*(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
|
1463
|
+
*(s + 3) >= 0x30 && *(s + 3) <= 0x39)
|
1464
|
+
l = 4;
|
1465
|
+
else
|
1466
|
+
l = -1;
|
1467
|
+
}
|
1468
|
+
else if (len >= 2 && *s >= 0x81 && *s <= 0xfe)
|
1469
|
+
{
|
1470
|
+
/* Should be 2-byte, validate */
|
1471
|
+
if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
|
1472
|
+
(*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
|
1473
|
+
l = 2;
|
1474
|
+
else
|
1475
|
+
l = -1;
|
1476
|
+
}
|
1477
|
+
else
|
1478
|
+
l = -1;
|
1479
|
+
return l;
|
1480
|
+
}
|
1481
|
+
|
1482
|
+
static int
|
1483
|
+
pg_utf8_verifier(const unsigned char *s, int len)
|
1484
|
+
{
|
1485
|
+
int l = pg_utf_mblen(s);
|
1486
|
+
|
1487
|
+
if (len < l)
|
1488
|
+
return -1;
|
1489
|
+
|
1490
|
+
if (!pg_utf8_islegal(s, l))
|
1491
|
+
return -1;
|
1492
|
+
|
1493
|
+
return l;
|
1494
|
+
}
|
1495
|
+
|
1496
|
+
/*
|
1497
|
+
* Check for validity of a single UTF-8 encoded character
|
1498
|
+
*
|
1499
|
+
* This directly implements the rules in RFC3629. The bizarre-looking
|
1500
|
+
* restrictions on the second byte are meant to ensure that there isn't
|
1501
|
+
* more than one encoding of a given Unicode character point; that is,
|
1502
|
+
* you may not use a longer-than-necessary byte sequence with high order
|
1503
|
+
* zero bits to represent a character that would fit in fewer bytes.
|
1504
|
+
* To do otherwise is to create security hazards (eg, create an apparent
|
1505
|
+
* non-ASCII character that decodes to plain ASCII).
|
1506
|
+
*
|
1507
|
+
* length is assumed to have been obtained by pg_utf_mblen(), and the
|
1508
|
+
* caller must have checked that that many bytes are present in the buffer.
|
1509
|
+
*/
|
1510
|
+
bool
|
1511
|
+
pg_utf8_islegal(const unsigned char *source, int length)
|
1512
|
+
{
|
1513
|
+
unsigned char a;
|
1514
|
+
|
1515
|
+
switch (length)
|
1516
|
+
{
|
1517
|
+
default:
|
1518
|
+
/* reject lengths 5 and 6 for now */
|
1519
|
+
return false;
|
1520
|
+
case 4:
|
1521
|
+
a = source[3];
|
1522
|
+
if (a < 0x80 || a > 0xBF)
|
1523
|
+
return false;
|
1524
|
+
/* FALL THRU */
|
1525
|
+
case 3:
|
1526
|
+
a = source[2];
|
1527
|
+
if (a < 0x80 || a > 0xBF)
|
1528
|
+
return false;
|
1529
|
+
/* FALL THRU */
|
1530
|
+
case 2:
|
1531
|
+
a = source[1];
|
1532
|
+
switch (*source)
|
1533
|
+
{
|
1534
|
+
case 0xE0:
|
1535
|
+
if (a < 0xA0 || a > 0xBF)
|
1536
|
+
return false;
|
1537
|
+
break;
|
1538
|
+
case 0xED:
|
1539
|
+
if (a < 0x80 || a > 0x9F)
|
1540
|
+
return false;
|
1541
|
+
break;
|
1542
|
+
case 0xF0:
|
1543
|
+
if (a < 0x90 || a > 0xBF)
|
1544
|
+
return false;
|
1545
|
+
break;
|
1546
|
+
case 0xF4:
|
1547
|
+
if (a < 0x80 || a > 0x8F)
|
1548
|
+
return false;
|
1549
|
+
break;
|
1550
|
+
default:
|
1551
|
+
if (a < 0x80 || a > 0xBF)
|
1552
|
+
return false;
|
1553
|
+
break;
|
1554
|
+
}
|
1555
|
+
/* FALL THRU */
|
1556
|
+
case 1:
|
1557
|
+
a = *source;
|
1558
|
+
if (a >= 0x80 && a < 0xC2)
|
1559
|
+
return false;
|
1560
|
+
if (a > 0xF4)
|
1561
|
+
return false;
|
1562
|
+
break;
|
1563
|
+
}
|
1564
|
+
return true;
|
1565
|
+
}
|
1566
|
+
|
1567
|
+
|
1568
|
+
/*
|
1569
|
+
*-------------------------------------------------------------------
|
1570
|
+
* encoding info table
|
1571
|
+
* XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h)
|
1572
|
+
*-------------------------------------------------------------------
|
1573
|
+
*/
|
1574
|
+
const pg_wchar_tbl pg_wchar_table[] = {
|
1575
|
+
{pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifier, 1}, /* PG_SQL_ASCII */
|
1576
|
+
{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JP */
|
1577
|
+
{pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifier, 2}, /* PG_EUC_CN */
|
1578
|
+
{pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifier, 3}, /* PG_EUC_KR */
|
1579
|
+
{pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifier, 4}, /* PG_EUC_TW */
|
1580
|
+
{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JIS_2004 */
|
1581
|
+
{pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifier, 4}, /* PG_UTF8 */
|
1582
|
+
{pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifier, 4}, /* PG_MULE_INTERNAL */
|
1583
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN1 */
|
1584
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN2 */
|
1585
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN3 */
|
1586
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN4 */
|
1587
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN5 */
|
1588
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN6 */
|
1589
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN7 */
|
1590
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN8 */
|
1591
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN9 */
|
1592
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN10 */
|
1593
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1256 */
|
1594
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1258 */
|
1595
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN866 */
|
1596
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN874 */
|
1597
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8R */
|
1598
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1251 */
|
1599
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1252 */
|
1600
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-5 */
|
1601
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-6 */
|
1602
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-7 */
|
1603
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-8 */
|
1604
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1250 */
|
1605
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1253 */
|
1606
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1254 */
|
1607
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1255 */
|
1608
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1257 */
|
1609
|
+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8U */
|
1610
|
+
{0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}, /* PG_SJIS */
|
1611
|
+
{0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2}, /* PG_BIG5 */
|
1612
|
+
{0, 0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2}, /* PG_GBK */
|
1613
|
+
{0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2}, /* PG_UHC */
|
1614
|
+
{0, 0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, 4}, /* PG_GB18030 */
|
1615
|
+
{0, 0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifier, 3}, /* PG_JOHAB */
|
1616
|
+
{0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2} /* PG_SHIFT_JIS_2004 */
|
1617
|
+
};
|
1618
|
+
|
1619
|
+
/*
|
1620
|
+
* Returns the byte length of a multibyte character.
|
1621
|
+
*/
|
1622
|
+
int
|
1623
|
+
pg_encoding_mblen(int encoding, const char *mbstr)
|
1624
|
+
{
|
1625
|
+
return (PG_VALID_ENCODING(encoding) ?
|
1626
|
+
pg_wchar_table[encoding].mblen((const unsigned char *) mbstr) :
|
1627
|
+
pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr));
|
1628
|
+
}
|
1629
|
+
|
1630
|
+
/*
|
1631
|
+
* Returns the display length of a multibyte character.
|
1632
|
+
*/
|
1633
|
+
|
1634
|
+
|
1635
|
+
/*
|
1636
|
+
* Verify the first multibyte character of the given string.
|
1637
|
+
* Return its byte length if good, -1 if bad. (See comments above for
|
1638
|
+
* full details of the mbverify API.)
|
1639
|
+
*/
|
1640
|
+
|
1641
|
+
|
1642
|
+
/*
|
1643
|
+
* fetch maximum length of a given encoding
|
1644
|
+
*/
|
1645
|
+
int
|
1646
|
+
pg_encoding_max_length(int encoding)
|
1647
|
+
{
|
1648
|
+
Assert(PG_VALID_ENCODING(encoding));
|
1649
|
+
|
1650
|
+
return pg_wchar_table[encoding].maxmblen;
|
1651
|
+
}
|