pg_query 2.2.0 → 4.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (467) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -0
  3. data/README.md +59 -31
  4. data/Rakefile +2 -2
  5. data/ext/pg_query/include/access/amapi.h +45 -1
  6. data/ext/pg_query/include/access/attmap.h +1 -1
  7. data/ext/pg_query/include/access/attnum.h +2 -2
  8. data/ext/pg_query/include/access/clog.h +4 -2
  9. data/ext/pg_query/include/access/commit_ts.h +6 -9
  10. data/ext/pg_query/include/access/detoast.h +1 -11
  11. data/ext/pg_query/include/access/genam.h +15 -12
  12. data/ext/pg_query/include/access/gin.h +2 -2
  13. data/ext/pg_query/include/access/htup.h +1 -1
  14. data/ext/pg_query/include/access/htup_details.h +75 -87
  15. data/ext/pg_query/include/access/itup.h +7 -1
  16. data/ext/pg_query/include/access/parallel.h +2 -2
  17. data/ext/pg_query/include/access/printtup.h +1 -1
  18. data/ext/pg_query/include/access/relation.h +1 -1
  19. data/ext/pg_query/include/access/relscan.h +17 -2
  20. data/ext/pg_query/include/access/rmgr.h +30 -3
  21. data/ext/pg_query/include/access/rmgrlist.h +23 -23
  22. data/ext/pg_query/include/access/sdir.h +1 -1
  23. data/ext/pg_query/include/access/skey.h +1 -1
  24. data/ext/pg_query/include/access/stratnum.h +4 -2
  25. data/ext/pg_query/include/access/sysattr.h +1 -1
  26. data/ext/pg_query/include/access/table.h +2 -1
  27. data/ext/pg_query/include/access/tableam.h +272 -20
  28. data/ext/pg_query/include/access/toast_compression.h +73 -0
  29. data/ext/pg_query/include/access/transam.h +123 -13
  30. data/ext/pg_query/include/access/tupconvert.h +1 -1
  31. data/ext/pg_query/include/access/tupdesc.h +1 -1
  32. data/ext/pg_query/include/access/tupmacs.h +3 -3
  33. data/ext/pg_query/include/access/twophase.h +3 -1
  34. data/ext/pg_query/include/access/xact.h +73 -19
  35. data/ext/pg_query/include/access/xlog.h +60 -155
  36. data/ext/pg_query/include/access/xlog_internal.h +40 -13
  37. data/ext/pg_query/include/access/xlogdefs.h +8 -16
  38. data/ext/pg_query/include/access/xlogprefetcher.h +55 -0
  39. data/ext/pg_query/include/access/xlogreader.h +145 -39
  40. data/ext/pg_query/include/access/xlogrecord.h +18 -9
  41. data/ext/pg_query/include/access/xlogrecovery.h +157 -0
  42. data/ext/pg_query/include/c.h +101 -44
  43. data/ext/pg_query/include/catalog/catalog.h +3 -1
  44. data/ext/pg_query/include/catalog/catversion.h +2 -2
  45. data/ext/pg_query/include/catalog/dependency.h +8 -16
  46. data/ext/pg_query/include/catalog/genbki.h +83 -5
  47. data/ext/pg_query/include/catalog/index.h +18 -3
  48. data/ext/pg_query/include/catalog/indexing.h +12 -324
  49. data/ext/pg_query/include/catalog/namespace.h +4 -2
  50. data/ext/pg_query/include/catalog/objectaccess.h +70 -2
  51. data/ext/pg_query/include/catalog/objectaddress.h +11 -6
  52. data/ext/pg_query/include/catalog/pg_aggregate.h +14 -10
  53. data/ext/pg_query/include/catalog/pg_aggregate_d.h +2 -1
  54. data/ext/pg_query/include/catalog/pg_am.h +4 -1
  55. data/ext/pg_query/include/catalog/pg_am_d.h +3 -1
  56. data/ext/pg_query/include/catalog/pg_attribute.h +27 -10
  57. data/ext/pg_query/include/catalog/pg_attribute_d.h +21 -18
  58. data/ext/pg_query/include/catalog/pg_authid.h +7 -2
  59. data/ext/pg_query/include/catalog/pg_authid_d.h +17 -9
  60. data/ext/pg_query/include/catalog/pg_class.h +44 -14
  61. data/ext/pg_query/include/catalog/pg_class_d.h +30 -1
  62. data/ext/pg_query/include/catalog/pg_collation.h +33 -8
  63. data/ext/pg_query/include/catalog/pg_collation_d.h +20 -3
  64. data/ext/pg_query/include/catalog/pg_constraint.h +38 -12
  65. data/ext/pg_query/include/catalog/pg_constraint_d.h +10 -4
  66. data/ext/pg_query/include/catalog/pg_control.h +3 -5
  67. data/ext/pg_query/include/catalog/pg_conversion.h +7 -4
  68. data/ext/pg_query/include/catalog/pg_conversion_d.h +4 -1
  69. data/ext/pg_query/include/catalog/pg_depend.h +11 -7
  70. data/ext/pg_query/include/catalog/pg_depend_d.h +3 -1
  71. data/ext/pg_query/include/catalog/pg_event_trigger.h +9 -3
  72. data/ext/pg_query/include/catalog/pg_event_trigger_d.h +3 -1
  73. data/ext/pg_query/include/catalog/pg_index.h +17 -7
  74. data/ext/pg_query/include/catalog/pg_index_d.h +20 -17
  75. data/ext/pg_query/include/catalog/pg_language.h +10 -5
  76. data/ext/pg_query/include/catalog/pg_language_d.h +3 -1
  77. data/ext/pg_query/include/catalog/pg_namespace.h +7 -2
  78. data/ext/pg_query/include/catalog/pg_namespace_d.h +3 -1
  79. data/ext/pg_query/include/catalog/pg_opclass.h +8 -5
  80. data/ext/pg_query/include/catalog/pg_opclass_d.h +3 -1
  81. data/ext/pg_query/include/catalog/pg_operator.h +18 -15
  82. data/ext/pg_query/include/catalog/pg_operator_d.h +37 -1
  83. data/ext/pg_query/include/catalog/pg_opfamily.h +6 -3
  84. data/ext/pg_query/include/catalog/pg_opfamily_d.h +3 -1
  85. data/ext/pg_query/include/catalog/pg_parameter_acl.h +60 -0
  86. data/ext/pg_query/include/catalog/pg_parameter_acl_d.h +34 -0
  87. data/ext/pg_query/include/catalog/pg_partitioned_table.h +20 -9
  88. data/ext/pg_query/include/catalog/pg_partitioned_table_d.h +2 -1
  89. data/ext/pg_query/include/catalog/pg_proc.h +20 -11
  90. data/ext/pg_query/include/catalog/pg_proc_d.h +10 -8
  91. data/ext/pg_query/include/catalog/pg_publication.h +50 -7
  92. data/ext/pg_query/include/catalog/pg_publication_d.h +3 -1
  93. data/ext/pg_query/include/catalog/pg_replication_origin.h +6 -1
  94. data/ext/pg_query/include/catalog/pg_replication_origin_d.h +5 -1
  95. data/ext/pg_query/include/catalog/pg_statistic.h +19 -12
  96. data/ext/pg_query/include/catalog/pg_statistic_d.h +2 -1
  97. data/ext/pg_query/include/catalog/pg_statistic_ext.h +19 -5
  98. data/ext/pg_query/include/catalog/pg_statistic_ext_d.h +7 -2
  99. data/ext/pg_query/include/catalog/pg_transform.h +8 -5
  100. data/ext/pg_query/include/catalog/pg_transform_d.h +3 -1
  101. data/ext/pg_query/include/catalog/pg_trigger.h +24 -8
  102. data/ext/pg_query/include/catalog/pg_trigger_d.h +4 -1
  103. data/ext/pg_query/include/catalog/pg_ts_config.h +6 -3
  104. data/ext/pg_query/include/catalog/pg_ts_config_d.h +3 -1
  105. data/ext/pg_query/include/catalog/pg_ts_dict.h +8 -3
  106. data/ext/pg_query/include/catalog/pg_ts_dict_d.h +3 -1
  107. data/ext/pg_query/include/catalog/pg_ts_parser.h +6 -3
  108. data/ext/pg_query/include/catalog/pg_ts_parser_d.h +3 -1
  109. data/ext/pg_query/include/catalog/pg_ts_template.h +6 -3
  110. data/ext/pg_query/include/catalog/pg_ts_template_d.h +3 -1
  111. data/ext/pg_query/include/catalog/pg_type.h +55 -24
  112. data/ext/pg_query/include/catalog/pg_type_d.h +70 -31
  113. data/ext/pg_query/include/catalog/storage.h +5 -3
  114. data/ext/pg_query/include/commands/async.h +3 -4
  115. data/ext/pg_query/include/commands/dbcommands.h +2 -1
  116. data/ext/pg_query/include/commands/defrem.h +11 -24
  117. data/ext/pg_query/include/commands/event_trigger.h +2 -2
  118. data/ext/pg_query/include/commands/explain.h +1 -1
  119. data/ext/pg_query/include/commands/prepare.h +1 -1
  120. data/ext/pg_query/include/commands/tablespace.h +2 -2
  121. data/ext/pg_query/include/commands/trigger.h +18 -16
  122. data/ext/pg_query/include/commands/user.h +2 -2
  123. data/ext/pg_query/include/commands/vacuum.h +88 -41
  124. data/ext/pg_query/include/commands/variable.h +1 -1
  125. data/ext/pg_query/include/common/file_perm.h +4 -4
  126. data/ext/pg_query/include/common/hashfn.h +1 -1
  127. data/ext/pg_query/include/common/ip.h +1 -7
  128. data/ext/pg_query/include/common/keywords.h +2 -6
  129. data/ext/pg_query/include/common/kwlookup.h +1 -1
  130. data/ext/pg_query/include/common/pg_prng.h +60 -0
  131. data/ext/pg_query/include/common/relpath.h +2 -2
  132. data/ext/pg_query/include/common/string.h +24 -1
  133. data/ext/pg_query/include/common/unicode_combining_table.h +114 -2
  134. data/ext/pg_query/include/common/unicode_east_asian_fw_table.h +125 -0
  135. data/ext/pg_query/include/datatype/timestamp.h +40 -1
  136. data/ext/pg_query/include/executor/execdesc.h +1 -1
  137. data/ext/pg_query/include/executor/executor.h +65 -22
  138. data/ext/pg_query/include/executor/functions.h +17 -3
  139. data/ext/pg_query/include/executor/instrument.h +33 -16
  140. data/ext/pg_query/include/executor/spi.h +41 -3
  141. data/ext/pg_query/include/executor/tablefunc.h +1 -1
  142. data/ext/pg_query/include/executor/tuptable.h +1 -1
  143. data/ext/pg_query/include/fmgr.h +13 -7
  144. data/ext/pg_query/include/funcapi.h +16 -4
  145. data/ext/pg_query/include/getaddrinfo.h +1 -1
  146. data/ext/pg_query/include/jit/jit.h +11 -11
  147. data/ext/pg_query/include/kwlist_d.h +517 -494
  148. data/ext/pg_query/include/lib/dshash.h +112 -0
  149. data/ext/pg_query/include/lib/ilist.h +20 -1
  150. data/ext/pg_query/include/lib/pairingheap.h +1 -1
  151. data/ext/pg_query/include/lib/simplehash.h +140 -15
  152. data/ext/pg_query/include/lib/sort_template.h +432 -0
  153. data/ext/pg_query/include/lib/stringinfo.h +1 -1
  154. data/ext/pg_query/include/libpq/auth.h +6 -4
  155. data/ext/pg_query/include/libpq/crypt.h +5 -4
  156. data/ext/pg_query/include/libpq/hba.h +43 -4
  157. data/ext/pg_query/include/libpq/libpq-be.h +23 -6
  158. data/ext/pg_query/include/libpq/libpq.h +30 -20
  159. data/ext/pg_query/include/libpq/pqcomm.h +17 -31
  160. data/ext/pg_query/include/libpq/pqformat.h +1 -1
  161. data/ext/pg_query/include/libpq/pqsignal.h +4 -4
  162. data/ext/pg_query/include/mb/pg_wchar.h +105 -23
  163. data/ext/pg_query/include/mb/stringinfo_mb.h +1 -1
  164. data/ext/pg_query/include/miscadmin.h +47 -41
  165. data/ext/pg_query/include/nodes/bitmapset.h +1 -1
  166. data/ext/pg_query/include/nodes/execnodes.h +270 -78
  167. data/ext/pg_query/include/nodes/extensible.h +4 -2
  168. data/ext/pg_query/include/nodes/lockoptions.h +1 -1
  169. data/ext/pg_query/include/nodes/makefuncs.h +7 -6
  170. data/ext/pg_query/include/nodes/memnodes.h +5 -3
  171. data/ext/pg_query/include/nodes/nodeFuncs.h +1 -1
  172. data/ext/pg_query/include/nodes/nodes.h +30 -11
  173. data/ext/pg_query/include/nodes/params.h +1 -1
  174. data/ext/pg_query/include/nodes/parsenodes.h +322 -90
  175. data/ext/pg_query/include/nodes/pathnodes.h +243 -66
  176. data/ext/pg_query/include/nodes/pg_list.h +75 -69
  177. data/ext/pg_query/include/nodes/plannodes.h +111 -28
  178. data/ext/pg_query/include/nodes/primnodes.h +99 -47
  179. data/ext/pg_query/include/nodes/print.h +1 -1
  180. data/ext/pg_query/include/nodes/tidbitmap.h +1 -1
  181. data/ext/pg_query/include/nodes/value.h +58 -39
  182. data/ext/pg_query/include/optimizer/cost.h +9 -2
  183. data/ext/pg_query/include/optimizer/geqo.h +9 -7
  184. data/ext/pg_query/include/optimizer/geqo_gene.h +1 -1
  185. data/ext/pg_query/include/optimizer/optimizer.h +25 -17
  186. data/ext/pg_query/include/optimizer/paths.h +6 -6
  187. data/ext/pg_query/include/optimizer/planmain.h +15 -14
  188. data/ext/pg_query/include/parser/analyze.h +19 -5
  189. data/ext/pg_query/include/parser/gram.h +947 -913
  190. data/ext/pg_query/include/parser/gramparse.h +1 -1
  191. data/ext/pg_query/include/parser/kwlist.h +463 -453
  192. data/ext/pg_query/include/parser/parse_agg.h +2 -7
  193. data/ext/pg_query/include/parser/parse_coerce.h +3 -1
  194. data/ext/pg_query/include/parser/parse_expr.h +2 -3
  195. data/ext/pg_query/include/parser/parse_func.h +2 -1
  196. data/ext/pg_query/include/parser/parse_node.h +21 -9
  197. data/ext/pg_query/include/parser/parse_oper.h +1 -3
  198. data/ext/pg_query/include/parser/parse_relation.h +5 -4
  199. data/ext/pg_query/include/parser/parse_type.h +1 -1
  200. data/ext/pg_query/include/parser/parser.h +31 -4
  201. data/ext/pg_query/include/parser/parsetree.h +1 -1
  202. data/ext/pg_query/include/parser/scanner.h +1 -1
  203. data/ext/pg_query/include/parser/scansup.h +2 -5
  204. data/ext/pg_query/include/partitioning/partdefs.h +1 -1
  205. data/ext/pg_query/include/pg_config.h +83 -41
  206. data/ext/pg_query/include/pg_config_manual.h +74 -21
  207. data/ext/pg_query/include/pg_getopt.h +6 -6
  208. data/ext/pg_query/include/pg_query.h +5 -4
  209. data/ext/pg_query/include/pg_query_enum_defs.c +358 -241
  210. data/ext/pg_query/include/pg_query_fingerprint_conds.c +44 -7
  211. data/ext/pg_query/include/pg_query_fingerprint_defs.c +939 -113
  212. data/ext/pg_query/include/pg_query_outfuncs_conds.c +43 -13
  213. data/ext/pg_query/include/pg_query_outfuncs_defs.c +151 -26
  214. data/ext/pg_query/include/pg_query_readfuncs_conds.c +11 -2
  215. data/ext/pg_query/include/pg_query_readfuncs_defs.c +173 -30
  216. data/ext/pg_query/include/pg_trace.h +1 -1
  217. data/ext/pg_query/include/pgstat.h +449 -1238
  218. data/ext/pg_query/include/pgtime.h +14 -4
  219. data/ext/pg_query/include/pl_gram.h +126 -128
  220. data/ext/pg_query/include/pl_reserved_kwlist.h +1 -1
  221. data/ext/pg_query/include/pl_reserved_kwlist_d.h +10 -10
  222. data/ext/pg_query/include/pl_unreserved_kwlist.h +2 -3
  223. data/ext/pg_query/include/pl_unreserved_kwlist_d.h +54 -56
  224. data/ext/pg_query/include/plerrcodes.h +9 -1
  225. data/ext/pg_query/include/plpgsql.h +52 -54
  226. data/ext/pg_query/include/port/atomics/arch-arm.h +7 -1
  227. data/ext/pg_query/include/port/atomics/arch-ppc.h +1 -1
  228. data/ext/pg_query/include/port/atomics/arch-x86.h +1 -1
  229. data/ext/pg_query/include/port/atomics/fallback.h +1 -1
  230. data/ext/pg_query/include/port/atomics/generic-gcc.h +3 -3
  231. data/ext/pg_query/include/port/atomics/generic.h +1 -1
  232. data/ext/pg_query/include/port/atomics.h +1 -1
  233. data/ext/pg_query/include/port/pg_bitutils.h +40 -10
  234. data/ext/pg_query/include/port/pg_bswap.h +1 -1
  235. data/ext/pg_query/include/port/pg_crc32c.h +1 -1
  236. data/ext/pg_query/include/port.h +71 -46
  237. data/ext/pg_query/include/portability/instr_time.h +1 -1
  238. data/ext/pg_query/include/postgres.h +60 -16
  239. data/ext/pg_query/include/postmaster/autovacuum.h +17 -17
  240. data/ext/pg_query/include/postmaster/auxprocess.h +20 -0
  241. data/ext/pg_query/include/postmaster/bgworker.h +2 -1
  242. data/ext/pg_query/include/postmaster/bgworker_internals.h +2 -2
  243. data/ext/pg_query/include/postmaster/bgwriter.h +5 -5
  244. data/ext/pg_query/include/postmaster/fork_process.h +1 -1
  245. data/ext/pg_query/include/postmaster/interrupt.h +1 -1
  246. data/ext/pg_query/include/postmaster/pgarch.h +42 -8
  247. data/ext/pg_query/include/postmaster/postmaster.h +18 -17
  248. data/ext/pg_query/include/postmaster/startup.h +39 -0
  249. data/ext/pg_query/include/postmaster/syslogger.h +15 -10
  250. data/ext/pg_query/include/postmaster/walwriter.h +3 -3
  251. data/ext/pg_query/include/protobuf/pg_query.pb-c.h +1419 -914
  252. data/ext/pg_query/include/protobuf/pg_query.pb.h +43678 -32769
  253. data/ext/pg_query/include/regex/regex.h +18 -16
  254. data/ext/pg_query/include/replication/logicallauncher.h +3 -5
  255. data/ext/pg_query/include/replication/logicalproto.h +161 -17
  256. data/ext/pg_query/include/replication/logicalworker.h +1 -1
  257. data/ext/pg_query/include/replication/origin.h +7 -7
  258. data/ext/pg_query/include/replication/reorderbuffer.h +259 -42
  259. data/ext/pg_query/include/replication/slot.h +22 -11
  260. data/ext/pg_query/include/replication/syncrep.h +5 -5
  261. data/ext/pg_query/include/replication/walreceiver.h +145 -13
  262. data/ext/pg_query/include/replication/walsender.h +8 -8
  263. data/ext/pg_query/include/rewrite/prs2lock.h +1 -1
  264. data/ext/pg_query/include/rewrite/rewriteHandler.h +1 -3
  265. data/ext/pg_query/include/rewrite/rewriteManip.h +1 -1
  266. data/ext/pg_query/include/rewrite/rewriteSupport.h +1 -1
  267. data/ext/pg_query/include/storage/backendid.h +3 -3
  268. data/ext/pg_query/include/storage/block.h +4 -10
  269. data/ext/pg_query/include/storage/buf.h +1 -1
  270. data/ext/pg_query/include/storage/bufmgr.h +19 -14
  271. data/ext/pg_query/include/storage/bufpage.h +6 -8
  272. data/ext/pg_query/include/storage/condition_variable.h +13 -2
  273. data/ext/pg_query/include/storage/dsm.h +4 -1
  274. data/ext/pg_query/include/storage/dsm_impl.h +3 -2
  275. data/ext/pg_query/include/storage/fd.h +33 -3
  276. data/ext/pg_query/include/storage/fileset.h +40 -0
  277. data/ext/pg_query/include/storage/ipc.h +4 -1
  278. data/ext/pg_query/include/storage/item.h +1 -1
  279. data/ext/pg_query/include/storage/itemid.h +1 -1
  280. data/ext/pg_query/include/storage/itemptr.h +3 -1
  281. data/ext/pg_query/include/storage/large_object.h +2 -2
  282. data/ext/pg_query/include/storage/latch.h +9 -13
  283. data/ext/pg_query/include/storage/lmgr.h +2 -1
  284. data/ext/pg_query/include/storage/lock.h +11 -8
  285. data/ext/pg_query/include/storage/lockdefs.h +2 -2
  286. data/ext/pg_query/include/storage/lwlock.h +5 -32
  287. data/ext/pg_query/include/storage/lwlocknames.h +0 -1
  288. data/ext/pg_query/include/storage/off.h +1 -1
  289. data/ext/pg_query/include/storage/pg_sema.h +1 -1
  290. data/ext/pg_query/include/storage/pg_shmem.h +9 -7
  291. data/ext/pg_query/include/storage/pmsignal.h +15 -4
  292. data/ext/pg_query/include/storage/predicate.h +4 -4
  293. data/ext/pg_query/include/storage/proc.h +173 -59
  294. data/ext/pg_query/include/storage/procarray.h +98 -0
  295. data/ext/pg_query/include/storage/proclist_types.h +1 -1
  296. data/ext/pg_query/include/storage/procsignal.h +3 -7
  297. data/ext/pg_query/include/storage/relfilenode.h +1 -1
  298. data/ext/pg_query/include/storage/s_lock.h +60 -21
  299. data/ext/pg_query/include/storage/sharedfileset.h +3 -11
  300. data/ext/pg_query/include/storage/shm_mq.h +5 -4
  301. data/ext/pg_query/include/storage/shm_toc.h +1 -1
  302. data/ext/pg_query/include/storage/shmem.h +1 -1
  303. data/ext/pg_query/include/storage/sinval.h +3 -3
  304. data/ext/pg_query/include/storage/sinvaladt.h +1 -1
  305. data/ext/pg_query/include/storage/smgr.h +10 -8
  306. data/ext/pg_query/include/storage/spin.h +2 -2
  307. data/ext/pg_query/include/storage/standby.h +13 -6
  308. data/ext/pg_query/include/storage/standbydefs.h +2 -2
  309. data/ext/pg_query/include/storage/sync.h +7 -3
  310. data/ext/pg_query/include/tcop/cmdtag.h +1 -1
  311. data/ext/pg_query/include/tcop/cmdtaglist.h +3 -2
  312. data/ext/pg_query/include/tcop/deparse_utility.h +1 -1
  313. data/ext/pg_query/include/tcop/dest.h +1 -1
  314. data/ext/pg_query/include/tcop/fastpath.h +1 -2
  315. data/ext/pg_query/include/tcop/pquery.h +1 -1
  316. data/ext/pg_query/include/tcop/tcopprot.h +19 -11
  317. data/ext/pg_query/include/tcop/utility.h +7 -3
  318. data/ext/pg_query/include/tsearch/ts_cache.h +2 -2
  319. data/ext/pg_query/include/utils/acl.h +24 -3
  320. data/ext/pg_query/include/utils/aclchk_internal.h +1 -1
  321. data/ext/pg_query/include/utils/array.h +7 -2
  322. data/ext/pg_query/include/utils/backend_progress.h +44 -0
  323. data/ext/pg_query/include/utils/backend_status.h +321 -0
  324. data/ext/pg_query/include/utils/builtins.h +10 -11
  325. data/ext/pg_query/include/utils/bytea.h +3 -2
  326. data/ext/pg_query/include/utils/catcache.h +1 -1
  327. data/ext/pg_query/include/utils/date.h +1 -1
  328. data/ext/pg_query/include/utils/datetime.h +8 -7
  329. data/ext/pg_query/include/utils/datum.h +9 -1
  330. data/ext/pg_query/include/utils/dsa.h +1 -1
  331. data/ext/pg_query/include/utils/dynahash.h +4 -3
  332. data/ext/pg_query/include/utils/elog.h +52 -21
  333. data/ext/pg_query/include/utils/errcodes.h +2 -0
  334. data/ext/pg_query/include/utils/expandeddatum.h +1 -1
  335. data/ext/pg_query/include/utils/expandedrecord.h +1 -1
  336. data/ext/pg_query/include/utils/float.h +7 -7
  337. data/ext/pg_query/include/utils/fmgroids.h +1300 -696
  338. data/ext/pg_query/include/utils/fmgrprotos.h +199 -16
  339. data/ext/pg_query/include/utils/fmgrtab.h +6 -5
  340. data/ext/pg_query/include/utils/guc.h +69 -43
  341. data/ext/pg_query/include/utils/guc_tables.h +23 -19
  342. data/ext/pg_query/include/utils/hsearch.h +15 -11
  343. data/ext/pg_query/include/utils/inval.h +4 -1
  344. data/ext/pg_query/include/utils/lsyscache.h +11 -1
  345. data/ext/pg_query/include/utils/memdebug.h +1 -1
  346. data/ext/pg_query/include/utils/memutils.h +8 -3
  347. data/ext/pg_query/include/utils/numeric.h +19 -5
  348. data/ext/pg_query/include/utils/palloc.h +25 -3
  349. data/ext/pg_query/include/utils/partcache.h +1 -1
  350. data/ext/pg_query/include/utils/pg_locale.h +17 -9
  351. data/ext/pg_query/include/utils/pg_lsn.h +1 -1
  352. data/ext/pg_query/include/utils/pgstat_internal.h +784 -0
  353. data/ext/pg_query/include/utils/pidfile.h +1 -1
  354. data/ext/pg_query/include/utils/plancache.h +6 -5
  355. data/ext/pg_query/include/utils/portal.h +10 -12
  356. data/ext/pg_query/include/utils/ps_status.h +1 -1
  357. data/ext/pg_query/include/utils/queryenvironment.h +1 -1
  358. data/ext/pg_query/include/utils/queryjumble.h +88 -0
  359. data/ext/pg_query/include/utils/regproc.h +14 -3
  360. data/ext/pg_query/include/utils/rel.h +71 -19
  361. data/ext/pg_query/include/utils/relcache.h +8 -5
  362. data/ext/pg_query/include/utils/reltrigger.h +1 -1
  363. data/ext/pg_query/include/utils/resowner.h +1 -1
  364. data/ext/pg_query/include/utils/rls.h +2 -2
  365. data/ext/pg_query/include/utils/ruleutils.h +4 -1
  366. data/ext/pg_query/include/utils/sharedtuplestore.h +1 -1
  367. data/ext/pg_query/include/utils/snapmgr.h +34 -14
  368. data/ext/pg_query/include/utils/snapshot.h +14 -1
  369. data/ext/pg_query/include/utils/sortsupport.h +117 -2
  370. data/ext/pg_query/include/utils/syscache.h +6 -1
  371. data/ext/pg_query/include/utils/timeout.h +11 -4
  372. data/ext/pg_query/include/utils/timestamp.h +6 -5
  373. data/ext/pg_query/include/utils/tuplesort.h +25 -11
  374. data/ext/pg_query/include/utils/tuplestore.h +2 -2
  375. data/ext/pg_query/include/utils/typcache.h +24 -17
  376. data/ext/pg_query/include/utils/tzparser.h +1 -1
  377. data/ext/pg_query/include/utils/varlena.h +5 -3
  378. data/ext/pg_query/include/utils/wait_event.h +289 -0
  379. data/ext/pg_query/include/utils/xml.h +4 -4
  380. data/ext/pg_query/pg_query.pb-c.c +4302 -2304
  381. data/ext/pg_query/pg_query_deparse.c +1106 -373
  382. data/ext/pg_query/pg_query_fingerprint.c +30 -10
  383. data/ext/pg_query/pg_query_json_plpgsql.c +0 -25
  384. data/ext/pg_query/pg_query_normalize.c +1 -1
  385. data/ext/pg_query/pg_query_outfuncs_json.c +54 -16
  386. data/ext/pg_query/pg_query_outfuncs_protobuf.c +70 -10
  387. data/ext/pg_query/pg_query_parse.c +1 -1
  388. data/ext/pg_query/pg_query_readfuncs_protobuf.c +42 -8
  389. data/ext/pg_query/pg_query_scan.c +2 -1
  390. data/ext/pg_query/pg_query_split.c +3 -2
  391. data/ext/pg_query/src_backend_catalog_namespace.c +20 -9
  392. data/ext/pg_query/src_backend_catalog_pg_proc.c +4 -1
  393. data/ext/pg_query/src_backend_commands_define.c +11 -1
  394. data/ext/pg_query/src_backend_nodes_bitmapset.c +3 -1
  395. data/ext/pg_query/src_backend_nodes_copyfuncs.c +401 -76
  396. data/ext/pg_query/src_backend_nodes_equalfuncs.c +290 -46
  397. data/ext/pg_query/src_backend_nodes_extensible.c +1 -1
  398. data/ext/pg_query/src_backend_nodes_list.c +74 -11
  399. data/ext/pg_query/src_backend_nodes_makefuncs.c +5 -4
  400. data/ext/pg_query/src_backend_nodes_nodeFuncs.c +55 -12
  401. data/ext/pg_query/src_backend_nodes_value.c +28 -19
  402. data/ext/pg_query/src_backend_parser_gram.c +33874 -31261
  403. data/ext/pg_query/src_backend_parser_parser.c +26 -7
  404. data/ext/pg_query/src_backend_parser_scan.c +172 -209
  405. data/ext/pg_query/src_backend_parser_scansup.c +4 -28
  406. data/ext/pg_query/src_backend_postmaster_postmaster.c +77 -106
  407. data/ext/pg_query/src_backend_storage_ipc_ipc.c +13 -4
  408. data/ext/pg_query/src_backend_storage_lmgr_s_lock.c +5 -4
  409. data/ext/pg_query/src_backend_tcop_postgres.c +62 -23
  410. data/ext/pg_query/src_backend_utils_activity_pgstat_database.c +140 -0
  411. data/ext/pg_query/src_backend_utils_adt_datum.c +13 -1
  412. data/ext/pg_query/src_backend_utils_adt_expandeddatum.c +1 -1
  413. data/ext/pg_query/src_backend_utils_adt_format_type.c +6 -2
  414. data/ext/pg_query/src_backend_utils_adt_ruleutils.c +71 -5
  415. data/ext/pg_query/src_backend_utils_error_assert.c +16 -14
  416. data/ext/pg_query/src_backend_utils_error_elog.c +172 -99
  417. data/ext/pg_query/src_backend_utils_fmgr_fmgr.c +12 -17
  418. data/ext/pg_query/src_backend_utils_hash_dynahash.c +40 -10
  419. data/ext/pg_query/src_backend_utils_init_globals.c +5 -5
  420. data/ext/pg_query/src_backend_utils_mb_mbutils.c +55 -66
  421. data/ext/pg_query/src_backend_utils_misc_guc.c +206 -45
  422. data/ext/pg_query/src_backend_utils_mmgr_aset.c +7 -5
  423. data/ext/pg_query/src_backend_utils_mmgr_mcxt.c +123 -35
  424. data/ext/pg_query/src_common_encnames.c +1 -1
  425. data/ext/pg_query/src_common_hashfn.c +3 -3
  426. data/ext/pg_query/src_common_keywords.c +15 -2
  427. data/ext/pg_query/src_common_kwlist_d.h +517 -494
  428. data/ext/pg_query/src_common_kwlookup.c +1 -1
  429. data/ext/pg_query/src_common_pg_prng.c +152 -0
  430. data/ext/pg_query/src_common_psprintf.c +1 -1
  431. data/ext/pg_query/src_common_string.c +7 -1
  432. data/ext/pg_query/src_common_stringinfo.c +1 -1
  433. data/ext/pg_query/src_common_wchar.c +701 -109
  434. data/ext/pg_query/src_pl_plpgsql_src_pl_comp.c +45 -20
  435. data/ext/pg_query/src_pl_plpgsql_src_pl_funcs.c +1 -18
  436. data/ext/pg_query/src_pl_plpgsql_src_pl_gram.c +1233 -1259
  437. data/ext/pg_query/src_pl_plpgsql_src_pl_handler.c +1 -1
  438. data/ext/pg_query/src_pl_plpgsql_src_pl_reserved_kwlist_d.h +10 -10
  439. data/ext/pg_query/src_pl_plpgsql_src_pl_scanner.c +2 -2
  440. data/ext/pg_query/src_pl_plpgsql_src_pl_unreserved_kwlist_d.h +54 -56
  441. data/ext/pg_query/src_port_pg_bitutils.c +41 -31
  442. data/ext/pg_query/src_port_pgsleep.c +1 -1
  443. data/ext/pg_query/src_port_pgstrcasecmp.c +1 -1
  444. data/ext/pg_query/src_port_qsort.c +12 -224
  445. data/ext/pg_query/src_port_snprintf.c +37 -13
  446. data/ext/pg_query/src_port_strerror.c +9 -19
  447. data/ext/pg_query/src_port_strnlen.c +1 -1
  448. data/lib/pg_query/filter_columns.rb +1 -1
  449. data/lib/pg_query/fingerprint.rb +5 -1
  450. data/lib/pg_query/node.rb +2 -2
  451. data/lib/pg_query/param_refs.rb +1 -1
  452. data/lib/pg_query/parse.rb +20 -8
  453. data/lib/pg_query/pg_query_pb.rb +1108 -942
  454. data/lib/pg_query/treewalker.rb +6 -0
  455. data/lib/pg_query/truncate.rb +1 -1
  456. data/lib/pg_query/version.rb +1 -1
  457. metadata +27 -17
  458. data/ext/pg_query/include/access/xloginsert.h +0 -64
  459. data/ext/pg_query/include/bootstrap/bootstrap.h +0 -62
  460. data/ext/pg_query/include/parser/parse_clause.h +0 -54
  461. data/ext/pg_query/include/parser/parse_collate.h +0 -27
  462. data/ext/pg_query/include/parser/parse_target.h +0 -46
  463. data/ext/pg_query/pg_query_ruby_freebsd.sym +0 -2
  464. data/ext/pg_query/src_backend_libpq_pqcomm.c +0 -659
  465. data/ext/pg_query/src_backend_parser_parse_expr.c +0 -313
  466. data/ext/pg_query/src_port_erand48.c +0 -127
  467. data/ext/pg_query/src_port_random.c +0 -31
@@ -8,18 +8,21 @@
8
8
  * - pg_wchar2single_with_len
9
9
  * - pg_ascii_mblen
10
10
  * - pg_ascii_dsplen
11
- * - pg_ascii_verifier
11
+ * - pg_ascii_verifychar
12
+ * - pg_ascii_verifystr
12
13
  * - pg_eucjp2wchar_with_len
13
14
  * - pg_euc2wchar_with_len
14
15
  * - pg_wchar2euc_with_len
15
16
  * - pg_eucjp_mblen
16
17
  * - pg_euc_mblen
17
18
  * - pg_eucjp_dsplen
18
- * - pg_eucjp_verifier
19
+ * - pg_eucjp_verifychar
20
+ * - pg_eucjp_verifystr
19
21
  * - pg_euccn2wchar_with_len
20
22
  * - pg_euccn_mblen
21
23
  * - pg_euccn_dsplen
22
- * - pg_euckr_verifier
24
+ * - pg_euckr_verifychar
25
+ * - pg_euckr_verifystr
23
26
  * - pg_euckr2wchar_with_len
24
27
  * - pg_euckr_mblen
25
28
  * - pg_euckr_dsplen
@@ -27,7 +30,8 @@
27
30
  * - pg_euctw2wchar_with_len
28
31
  * - pg_euctw_mblen
29
32
  * - pg_euctw_dsplen
30
- * - pg_euctw_verifier
33
+ * - pg_euctw_verifychar
34
+ * - pg_euctw_verifystr
31
35
  * - pg_utf2wchar_with_len
32
36
  * - pg_wchar2utf_with_len
33
37
  * - unicode_to_utf8
@@ -35,34 +39,45 @@
35
39
  * - utf8_to_unicode
36
40
  * - ucs_wcwidth
37
41
  * - mbbisearch
38
- * - pg_utf8_verifier
42
+ * - pg_utf8_verifychar
39
43
  * - pg_utf8_islegal
44
+ * - pg_utf8_verifystr
45
+ * - utf8_advance
46
+ * - Utf8Transition
40
47
  * - pg_mule2wchar_with_len
41
48
  * - pg_wchar2mule_with_len
42
49
  * - pg_mule_dsplen
43
- * - pg_mule_verifier
50
+ * - pg_mule_verifychar
51
+ * - pg_mule_verifystr
44
52
  * - pg_latin12wchar_with_len
45
53
  * - pg_latin1_mblen
46
54
  * - pg_latin1_dsplen
47
- * - pg_latin1_verifier
55
+ * - pg_latin1_verifychar
56
+ * - pg_latin1_verifystr
48
57
  * - pg_sjis_mblen
49
58
  * - pg_sjis_dsplen
50
- * - pg_sjis_verifier
59
+ * - pg_sjis_verifychar
60
+ * - pg_sjis_verifystr
51
61
  * - pg_big5_mblen
52
62
  * - pg_big5_dsplen
53
- * - pg_big5_verifier
63
+ * - pg_big5_verifychar
64
+ * - pg_big5_verifystr
54
65
  * - pg_gbk_mblen
55
66
  * - pg_gbk_dsplen
56
- * - pg_gbk_verifier
67
+ * - pg_gbk_verifychar
68
+ * - pg_gbk_verifystr
57
69
  * - pg_uhc_mblen
58
70
  * - pg_uhc_dsplen
59
- * - pg_uhc_verifier
71
+ * - pg_uhc_verifychar
72
+ * - pg_uhc_verifystr
60
73
  * - pg_gb18030_mblen
61
74
  * - pg_gb18030_dsplen
62
- * - pg_gb18030_verifier
75
+ * - pg_gb18030_verifychar
76
+ * - pg_gb18030_verifystr
63
77
  * - pg_johab_mblen
64
78
  * - pg_johab_dsplen
65
- * - pg_johab_verifier
79
+ * - pg_johab_verifychar
80
+ * - pg_johab_verifystr
66
81
  * - pg_encoding_mblen
67
82
  *--------------------------------------------------------------------
68
83
  */
@@ -72,7 +87,7 @@
72
87
  * wchar.c
73
88
  * Functions for working with multibyte characters in various encodings.
74
89
  *
75
- * Portions Copyright (c) 1998-2020, PostgreSQL Global Development Group
90
+ * Portions Copyright (c) 1998-2022, PostgreSQL Global Development Group
76
91
  *
77
92
  * IDENTIFICATION
78
93
  * src/common/wchar.c
@@ -88,9 +103,9 @@
88
103
  * Operations on multi-byte encodings are driven by a table of helper
89
104
  * functions.
90
105
  *
91
- * To add an encoding support, define mblen(), dsplen() and verifier() for
92
- * the encoding. For server-encodings, also define mb2wchar() and wchar2mb()
93
- * conversion functions.
106
+ * To add an encoding support, define mblen(), dsplen(), verifychar() and
107
+ * verifystr() for the encoding. For server-encodings, also define mb2wchar()
108
+ * and wchar2mb() conversion functions.
94
109
  *
95
110
  * These functions generally assume that their input is validly formed.
96
111
  * The "verifier" functions, further down in the file, have to be more
@@ -652,8 +667,8 @@ pg_utf_mblen(const unsigned char *s)
652
667
 
653
668
  struct mbinterval
654
669
  {
655
- unsigned short first;
656
- unsigned short last;
670
+ unsigned int first;
671
+ unsigned int last;
657
672
  };
658
673
 
659
674
  /* auxiliary function for binary search in interval table */
@@ -692,12 +707,6 @@ mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
692
707
  * category code Mn or Me in the Unicode database) have a
693
708
  * column width of 0.
694
709
  *
695
- * - Other format characters (general category code Cf in the Unicode
696
- * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
697
- *
698
- * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
699
- * have a column width of 0.
700
- *
701
710
  * - Spacing characters in the East Asian Wide (W) or East Asian
702
711
  * FullWidth (F) category as defined in Unicode Technical
703
712
  * Report #11 have a column width of 2.
@@ -714,6 +723,7 @@ static int
714
723
  ucs_wcwidth(pg_wchar ucs)
715
724
  {
716
725
  #include "common/unicode_combining_table.h"
726
+ #include "common/unicode_east_asian_fw_table.h"
717
727
 
718
728
  /* test for 8-bit control characters */
719
729
  if (ucs == 0)
@@ -722,27 +732,25 @@ ucs_wcwidth(pg_wchar ucs)
722
732
  if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
723
733
  return -1;
724
734
 
725
- /* binary search in table of non-spacing characters */
735
+ /*
736
+ * binary search in table of non-spacing characters
737
+ *
738
+ * XXX: In the official Unicode sources, it is possible for a character to
739
+ * be described as both non-spacing and wide at the same time. As of
740
+ * Unicode 13.0, treating the non-spacing property as the determining
741
+ * factor for display width leads to the correct behavior, so do that
742
+ * search first.
743
+ */
726
744
  if (mbbisearch(ucs, combining,
727
745
  sizeof(combining) / sizeof(struct mbinterval) - 1))
728
746
  return 0;
729
747
 
730
- /*
731
- * if we arrive here, ucs is not a combining or C0/C1 control character
732
- */
748
+ /* binary search in table of wide characters */
749
+ if (mbbisearch(ucs, east_asian_fw,
750
+ sizeof(east_asian_fw) / sizeof(struct mbinterval) - 1))
751
+ return 2;
733
752
 
734
- return 1 +
735
- (ucs >= 0x1100 &&
736
- (ucs <= 0x115f || /* Hangul Jamo init. consonants */
737
- (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
738
- ucs != 0x303f) || /* CJK ... Yi */
739
- (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
740
- (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility
741
- * Ideographs */
742
- (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
743
- (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
744
- (ucs >= 0xffe0 && ucs <= 0xffe6) ||
745
- (ucs >= 0x20000 && ucs <= 0x2ffff)));
753
+ return 1;
746
754
  }
747
755
 
748
756
  /*
@@ -1156,29 +1164,45 @@ pg_gb18030_dsplen(const unsigned char *s)
1156
1164
  *-------------------------------------------------------------------
1157
1165
  * multibyte sequence validators
1158
1166
  *
1159
- * These functions accept "s", a pointer to the first byte of a string,
1160
- * and "len", the remaining length of the string. If there is a validly
1161
- * encoded character beginning at *s, return its length in bytes; else
1162
- * return -1.
1167
+ * The verifychar functions accept "s", a pointer to the first byte of a
1168
+ * string, and "len", the remaining length of the string. If there is a
1169
+ * validly encoded character beginning at *s, return its length in bytes;
1170
+ * else return -1.
1163
1171
  *
1164
- * The functions can assume that len > 0 and that *s != '\0', but they must
1165
- * test for and reject zeroes in any additional bytes of a multibyte character.
1172
+ * The verifystr functions also accept "s", a pointer to a string and "len",
1173
+ * the length of the string. They verify the whole string, and return the
1174
+ * number of input bytes (<= len) that are valid. In other words, if the
1175
+ * whole string is valid, verifystr returns "len", otherwise it returns the
1176
+ * byte offset of the first invalid character. The verifystr functions must
1177
+ * test for and reject zeroes in the input.
1166
1178
  *
1167
- * Note that this definition allows the function for a single-byte
1168
- * encoding to be just "return 1".
1179
+ * The verifychar functions can assume that len > 0 and that *s != '\0', but
1180
+ * they must test for and reject zeroes in any additional bytes of a
1181
+ * multibyte character. Note that this definition allows the function for a
1182
+ * single-byte encoding to be just "return 1".
1169
1183
  *-------------------------------------------------------------------
1170
1184
  */
1171
-
1172
1185
  static int
1173
- pg_ascii_verifier(const unsigned char *s, int len)
1186
+ pg_ascii_verifychar(const unsigned char *s, int len)
1174
1187
  {
1175
1188
  return 1;
1176
1189
  }
1177
1190
 
1191
+ static int
1192
+ pg_ascii_verifystr(const unsigned char *s, int len)
1193
+ {
1194
+ const unsigned char *nullpos = memchr(s, 0, len);
1195
+
1196
+ if (nullpos == NULL)
1197
+ return len;
1198
+ else
1199
+ return nullpos - s;
1200
+ }
1201
+
1178
1202
  #define IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe)
1179
1203
 
1180
1204
  static int
1181
- pg_eucjp_verifier(const unsigned char *s, int len)
1205
+ pg_eucjp_verifychar(const unsigned char *s, int len)
1182
1206
  {
1183
1207
  int l;
1184
1208
  unsigned char c1,
@@ -1233,7 +1257,36 @@ pg_eucjp_verifier(const unsigned char *s, int len)
1233
1257
  }
1234
1258
 
1235
1259
  static int
1236
- pg_euckr_verifier(const unsigned char *s, int len)
1260
+ pg_eucjp_verifystr(const unsigned char *s, int len)
1261
+ {
1262
+ const unsigned char *start = s;
1263
+
1264
+ while (len > 0)
1265
+ {
1266
+ int l;
1267
+
1268
+ /* fast path for ASCII-subset characters */
1269
+ if (!IS_HIGHBIT_SET(*s))
1270
+ {
1271
+ if (*s == '\0')
1272
+ break;
1273
+ l = 1;
1274
+ }
1275
+ else
1276
+ {
1277
+ l = pg_eucjp_verifychar(s, len);
1278
+ if (l == -1)
1279
+ break;
1280
+ }
1281
+ s += l;
1282
+ len -= l;
1283
+ }
1284
+
1285
+ return s - start;
1286
+ }
1287
+
1288
+ static int
1289
+ pg_euckr_verifychar(const unsigned char *s, int len)
1237
1290
  {
1238
1291
  int l;
1239
1292
  unsigned char c1,
@@ -1261,11 +1314,41 @@ pg_euckr_verifier(const unsigned char *s, int len)
1261
1314
  return l;
1262
1315
  }
1263
1316
 
1317
+ static int
1318
+ pg_euckr_verifystr(const unsigned char *s, int len)
1319
+ {
1320
+ const unsigned char *start = s;
1321
+
1322
+ while (len > 0)
1323
+ {
1324
+ int l;
1325
+
1326
+ /* fast path for ASCII-subset characters */
1327
+ if (!IS_HIGHBIT_SET(*s))
1328
+ {
1329
+ if (*s == '\0')
1330
+ break;
1331
+ l = 1;
1332
+ }
1333
+ else
1334
+ {
1335
+ l = pg_euckr_verifychar(s, len);
1336
+ if (l == -1)
1337
+ break;
1338
+ }
1339
+ s += l;
1340
+ len -= l;
1341
+ }
1342
+
1343
+ return s - start;
1344
+ }
1345
+
1264
1346
  /* EUC-CN byte sequences are exactly same as EUC-KR */
1265
- #define pg_euccn_verifier pg_euckr_verifier
1347
+ #define pg_euccn_verifychar pg_euckr_verifychar
1348
+ #define pg_euccn_verifystr pg_euckr_verifystr
1266
1349
 
1267
1350
  static int
1268
- pg_euctw_verifier(const unsigned char *s, int len)
1351
+ pg_euctw_verifychar(const unsigned char *s, int len)
1269
1352
  {
1270
1353
  int l;
1271
1354
  unsigned char c1,
@@ -1315,7 +1398,36 @@ pg_euctw_verifier(const unsigned char *s, int len)
1315
1398
  }
1316
1399
 
1317
1400
  static int
1318
- pg_johab_verifier(const unsigned char *s, int len)
1401
+ pg_euctw_verifystr(const unsigned char *s, int len)
1402
+ {
1403
+ const unsigned char *start = s;
1404
+
1405
+ while (len > 0)
1406
+ {
1407
+ int l;
1408
+
1409
+ /* fast path for ASCII-subset characters */
1410
+ if (!IS_HIGHBIT_SET(*s))
1411
+ {
1412
+ if (*s == '\0')
1413
+ break;
1414
+ l = 1;
1415
+ }
1416
+ else
1417
+ {
1418
+ l = pg_euctw_verifychar(s, len);
1419
+ if (l == -1)
1420
+ break;
1421
+ }
1422
+ s += l;
1423
+ len -= l;
1424
+ }
1425
+
1426
+ return s - start;
1427
+ }
1428
+
1429
+ static int
1430
+ pg_johab_verifychar(const unsigned char *s, int len)
1319
1431
  {
1320
1432
  int l,
1321
1433
  mbl;
@@ -1339,7 +1451,36 @@ pg_johab_verifier(const unsigned char *s, int len)
1339
1451
  }
1340
1452
 
1341
1453
  static int
1342
- pg_mule_verifier(const unsigned char *s, int len)
1454
+ pg_johab_verifystr(const unsigned char *s, int len)
1455
+ {
1456
+ const unsigned char *start = s;
1457
+
1458
+ while (len > 0)
1459
+ {
1460
+ int l;
1461
+
1462
+ /* fast path for ASCII-subset characters */
1463
+ if (!IS_HIGHBIT_SET(*s))
1464
+ {
1465
+ if (*s == '\0')
1466
+ break;
1467
+ l = 1;
1468
+ }
1469
+ else
1470
+ {
1471
+ l = pg_johab_verifychar(s, len);
1472
+ if (l == -1)
1473
+ break;
1474
+ }
1475
+ s += l;
1476
+ len -= l;
1477
+ }
1478
+
1479
+ return s - start;
1480
+ }
1481
+
1482
+ static int
1483
+ pg_mule_verifychar(const unsigned char *s, int len)
1343
1484
  {
1344
1485
  int l,
1345
1486
  mbl;
@@ -1360,13 +1501,53 @@ pg_mule_verifier(const unsigned char *s, int len)
1360
1501
  }
1361
1502
 
1362
1503
  static int
1363
- pg_latin1_verifier(const unsigned char *s, int len)
1504
+ pg_mule_verifystr(const unsigned char *s, int len)
1505
+ {
1506
+ const unsigned char *start = s;
1507
+
1508
+ while (len > 0)
1509
+ {
1510
+ int l;
1511
+
1512
+ /* fast path for ASCII-subset characters */
1513
+ if (!IS_HIGHBIT_SET(*s))
1514
+ {
1515
+ if (*s == '\0')
1516
+ break;
1517
+ l = 1;
1518
+ }
1519
+ else
1520
+ {
1521
+ l = pg_mule_verifychar(s, len);
1522
+ if (l == -1)
1523
+ break;
1524
+ }
1525
+ s += l;
1526
+ len -= l;
1527
+ }
1528
+
1529
+ return s - start;
1530
+ }
1531
+
1532
+ static int
1533
+ pg_latin1_verifychar(const unsigned char *s, int len)
1364
1534
  {
1365
1535
  return 1;
1366
1536
  }
1367
1537
 
1368
1538
  static int
1369
- pg_sjis_verifier(const unsigned char *s, int len)
1539
+ pg_latin1_verifystr(const unsigned char *s, int len)
1540
+ {
1541
+ const unsigned char *nullpos = memchr(s, 0, len);
1542
+
1543
+ if (nullpos == NULL)
1544
+ return len;
1545
+ else
1546
+ return nullpos - s;
1547
+ }
1548
+
1549
+ static int
1550
+ pg_sjis_verifychar(const unsigned char *s, int len)
1370
1551
  {
1371
1552
  int l,
1372
1553
  mbl;
@@ -1389,7 +1570,36 @@ pg_sjis_verifier(const unsigned char *s, int len)
1389
1570
  }
1390
1571
 
1391
1572
  static int
1392
- pg_big5_verifier(const unsigned char *s, int len)
1573
+ pg_sjis_verifystr(const unsigned char *s, int len)
1574
+ {
1575
+ const unsigned char *start = s;
1576
+
1577
+ while (len > 0)
1578
+ {
1579
+ int l;
1580
+
1581
+ /* fast path for ASCII-subset characters */
1582
+ if (!IS_HIGHBIT_SET(*s))
1583
+ {
1584
+ if (*s == '\0')
1585
+ break;
1586
+ l = 1;
1587
+ }
1588
+ else
1589
+ {
1590
+ l = pg_sjis_verifychar(s, len);
1591
+ if (l == -1)
1592
+ break;
1593
+ }
1594
+ s += l;
1595
+ len -= l;
1596
+ }
1597
+
1598
+ return s - start;
1599
+ }
1600
+
1601
+ static int
1602
+ pg_big5_verifychar(const unsigned char *s, int len)
1393
1603
  {
1394
1604
  int l,
1395
1605
  mbl;
@@ -1409,7 +1619,36 @@ pg_big5_verifier(const unsigned char *s, int len)
1409
1619
  }
1410
1620
 
1411
1621
  static int
1412
- pg_gbk_verifier(const unsigned char *s, int len)
1622
+ pg_big5_verifystr(const unsigned char *s, int len)
1623
+ {
1624
+ const unsigned char *start = s;
1625
+
1626
+ while (len > 0)
1627
+ {
1628
+ int l;
1629
+
1630
+ /* fast path for ASCII-subset characters */
1631
+ if (!IS_HIGHBIT_SET(*s))
1632
+ {
1633
+ if (*s == '\0')
1634
+ break;
1635
+ l = 1;
1636
+ }
1637
+ else
1638
+ {
1639
+ l = pg_big5_verifychar(s, len);
1640
+ if (l == -1)
1641
+ break;
1642
+ }
1643
+ s += l;
1644
+ len -= l;
1645
+ }
1646
+
1647
+ return s - start;
1648
+ }
1649
+
1650
+ static int
1651
+ pg_gbk_verifychar(const unsigned char *s, int len)
1413
1652
  {
1414
1653
  int l,
1415
1654
  mbl;
@@ -1429,7 +1668,36 @@ pg_gbk_verifier(const unsigned char *s, int len)
1429
1668
  }
1430
1669
 
1431
1670
  static int
1432
- pg_uhc_verifier(const unsigned char *s, int len)
1671
+ pg_gbk_verifystr(const unsigned char *s, int len)
1672
+ {
1673
+ const unsigned char *start = s;
1674
+
1675
+ while (len > 0)
1676
+ {
1677
+ int l;
1678
+
1679
+ /* fast path for ASCII-subset characters */
1680
+ if (!IS_HIGHBIT_SET(*s))
1681
+ {
1682
+ if (*s == '\0')
1683
+ break;
1684
+ l = 1;
1685
+ }
1686
+ else
1687
+ {
1688
+ l = pg_gbk_verifychar(s, len);
1689
+ if (l == -1)
1690
+ break;
1691
+ }
1692
+ s += l;
1693
+ len -= l;
1694
+ }
1695
+
1696
+ return s - start;
1697
+ }
1698
+
1699
+ static int
1700
+ pg_uhc_verifychar(const unsigned char *s, int len)
1433
1701
  {
1434
1702
  int l,
1435
1703
  mbl;
@@ -1449,7 +1717,36 @@ pg_uhc_verifier(const unsigned char *s, int len)
1449
1717
  }
1450
1718
 
1451
1719
  static int
1452
- pg_gb18030_verifier(const unsigned char *s, int len)
1720
+ pg_uhc_verifystr(const unsigned char *s, int len)
1721
+ {
1722
+ const unsigned char *start = s;
1723
+
1724
+ while (len > 0)
1725
+ {
1726
+ int l;
1727
+
1728
+ /* fast path for ASCII-subset characters */
1729
+ if (!IS_HIGHBIT_SET(*s))
1730
+ {
1731
+ if (*s == '\0')
1732
+ break;
1733
+ l = 1;
1734
+ }
1735
+ else
1736
+ {
1737
+ l = pg_uhc_verifychar(s, len);
1738
+ if (l == -1)
1739
+ break;
1740
+ }
1741
+ s += l;
1742
+ len -= l;
1743
+ }
1744
+
1745
+ return s - start;
1746
+ }
1747
+
1748
+ static int
1749
+ pg_gb18030_verifychar(const unsigned char *s, int len)
1453
1750
  {
1454
1751
  int l;
1455
1752
 
@@ -1480,11 +1777,55 @@ pg_gb18030_verifier(const unsigned char *s, int len)
1480
1777
  }
1481
1778
 
1482
1779
  static int
1483
- pg_utf8_verifier(const unsigned char *s, int len)
1780
+ pg_gb18030_verifystr(const unsigned char *s, int len)
1484
1781
  {
1485
- int l = pg_utf_mblen(s);
1782
+ const unsigned char *start = s;
1486
1783
 
1487
- if (len < l)
1784
+ while (len > 0)
1785
+ {
1786
+ int l;
1787
+
1788
+ /* fast path for ASCII-subset characters */
1789
+ if (!IS_HIGHBIT_SET(*s))
1790
+ {
1791
+ if (*s == '\0')
1792
+ break;
1793
+ l = 1;
1794
+ }
1795
+ else
1796
+ {
1797
+ l = pg_gb18030_verifychar(s, len);
1798
+ if (l == -1)
1799
+ break;
1800
+ }
1801
+ s += l;
1802
+ len -= l;
1803
+ }
1804
+
1805
+ return s - start;
1806
+ }
1807
+
1808
+ static int
1809
+ pg_utf8_verifychar(const unsigned char *s, int len)
1810
+ {
1811
+ int l;
1812
+
1813
+ if ((*s & 0x80) == 0)
1814
+ {
1815
+ if (*s == '\0')
1816
+ return -1;
1817
+ return 1;
1818
+ }
1819
+ else if ((*s & 0xe0) == 0xc0)
1820
+ l = 2;
1821
+ else if ((*s & 0xf0) == 0xe0)
1822
+ l = 3;
1823
+ else if ((*s & 0xf8) == 0xf0)
1824
+ l = 4;
1825
+ else
1826
+ l = 1;
1827
+
1828
+ if (l > len)
1488
1829
  return -1;
1489
1830
 
1490
1831
  if (!pg_utf8_islegal(s, l))
@@ -1493,6 +1834,250 @@ pg_utf8_verifier(const unsigned char *s, int len)
1493
1834
  return l;
1494
1835
  }
1495
1836
 
1837
+ /*
1838
+ * The fast path of the UTF-8 verifier uses a deterministic finite automaton
1839
+ * (DFA) for multibyte characters. In a traditional table-driven DFA, the
1840
+ * input byte and current state are used to compute an index into an array of
1841
+ * state transitions. Since the address of the next transition is dependent
1842
+ * on this computation, there is latency in executing the load instruction,
1843
+ * and the CPU is not kept busy.
1844
+ *
1845
+ * Instead, we use a "shift-based" DFA as described by Per Vognsen:
1846
+ *
1847
+ * https://gist.github.com/pervognsen/218ea17743e1442e59bb60d29b1aa725
1848
+ *
1849
+ * In a shift-based DFA, the input byte is an index into array of integers
1850
+ * whose bit pattern encodes the state transitions. To compute the next
1851
+ * state, we simply right-shift the integer by the current state and apply a
1852
+ * mask. In this scheme, the address of the transition only depends on the
1853
+ * input byte, so there is better pipelining.
1854
+ *
1855
+ * The naming convention for states and transitions was adopted from a UTF-8
1856
+ * to UTF-16/32 transcoder, whose table is reproduced below:
1857
+ *
1858
+ * https://github.com/BobSteagall/utf_utils/blob/6b7a465265de2f5fa6133d653df0c9bdd73bbcf8/src/utf_utils.cpp
1859
+ *
1860
+ * ILL ASC CR1 CR2 CR3 L2A L3A L3B L3C L4A L4B L4C CLASS / STATE
1861
+ * ==========================================================================
1862
+ * err, END, err, err, err, CS1, P3A, CS2, P3B, P4A, CS3, P4B, | BGN/END
1863
+ * err, err, err, err, err, err, err, err, err, err, err, err, | ERR
1864
+ * |
1865
+ * err, err, END, END, END, err, err, err, err, err, err, err, | CS1
1866
+ * err, err, CS1, CS1, CS1, err, err, err, err, err, err, err, | CS2
1867
+ * err, err, CS2, CS2, CS2, err, err, err, err, err, err, err, | CS3
1868
+ * |
1869
+ * err, err, err, err, CS1, err, err, err, err, err, err, err, | P3A
1870
+ * err, err, CS1, CS1, err, err, err, err, err, err, err, err, | P3B
1871
+ * |
1872
+ * err, err, err, CS2, CS2, err, err, err, err, err, err, err, | P4A
1873
+ * err, err, CS2, err, err, err, err, err, err, err, err, err, | P4B
1874
+ *
1875
+ * In the most straightforward implementation, a shift-based DFA for UTF-8
1876
+ * requires 64-bit integers to encode the transitions, but with an SMT solver
1877
+ * it's possible to find state numbers such that the transitions fit within
1878
+ * 32-bit integers, as Dougall Johnson demonstrated:
1879
+ *
1880
+ * https://gist.github.com/dougallj/166e326de6ad4cf2c94be97a204c025f
1881
+ *
1882
+ * This packed representation is the reason for the seemingly odd choice of
1883
+ * state values below.
1884
+ */
1885
+
1886
+ /* Error */
1887
+ #define ERR 0
1888
+ /* Begin */
1889
+ #define BGN 11
1890
+ /* Continuation states, expect 1/2/3 continuation bytes */
1891
+ #define CS1 16
1892
+ #define CS2 1
1893
+ #define CS3 5
1894
+ /* Partial states, where the first continuation byte has a restricted range */
1895
+ #define P3A 6 /* Lead was E0, check for 3-byte overlong */
1896
+ #define P3B 20 /* Lead was ED, check for surrogate */
1897
+ #define P4A 25 /* Lead was F0, check for 4-byte overlong */
1898
+ #define P4B 30 /* Lead was F4, check for too-large */
1899
+ /* Begin and End are the same state */
1900
+ #define END BGN
1901
+
1902
+ /* the encoded state transitions for the lookup table */
1903
+
1904
+ /* ASCII */
1905
+ #define ASC (END << BGN)
1906
+ /* 2-byte lead */
1907
+ #define L2A (CS1 << BGN)
1908
+ /* 3-byte lead */
1909
+ #define L3A (P3A << BGN)
1910
+ #define L3B (CS2 << BGN)
1911
+ #define L3C (P3B << BGN)
1912
+ /* 4-byte lead */
1913
+ #define L4A (P4A << BGN)
1914
+ #define L4B (CS3 << BGN)
1915
+ #define L4C (P4B << BGN)
1916
+ /* continuation byte */
1917
+ #define CR1 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)
1918
+ #define CR2 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)
1919
+ #define CR3 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)
1920
+ /* invalid byte */
1921
+ #define ILL ERR
1922
+
1923
+ static const uint32 Utf8Transition[256] =
1924
+ {
1925
+ /* ASCII */
1926
+
1927
+ ILL, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1928
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1929
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1930
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1931
+
1932
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1933
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1934
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1935
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1936
+
1937
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1938
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1939
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1940
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1941
+
1942
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1943
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1944
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1945
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1946
+
1947
+ /* continuation bytes */
1948
+
1949
+ /* 80..8F */
1950
+ CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1,
1951
+ CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1,
1952
+
1953
+ /* 90..9F */
1954
+ CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2,
1955
+ CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2,
1956
+
1957
+ /* A0..BF */
1958
+ CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
1959
+ CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
1960
+ CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
1961
+ CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
1962
+
1963
+ /* leading bytes */
1964
+
1965
+ /* C0..DF */
1966
+ ILL, ILL, L2A, L2A, L2A, L2A, L2A, L2A,
1967
+ L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A,
1968
+ L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A,
1969
+ L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A,
1970
+
1971
+ /* E0..EF */
1972
+ L3A, L3B, L3B, L3B, L3B, L3B, L3B, L3B,
1973
+ L3B, L3B, L3B, L3B, L3B, L3C, L3B, L3B,
1974
+
1975
+ /* F0..FF */
1976
+ L4A, L4B, L4B, L4B, L4C, ILL, ILL, ILL,
1977
+ ILL, ILL, ILL, ILL, ILL, ILL, ILL, ILL
1978
+ };
1979
+
1980
+ static void
1981
+ utf8_advance(const unsigned char *s, uint32 *state, int len)
1982
+ {
1983
+ /* Note: We deliberately don't check the state's value here. */
1984
+ while (len > 0)
1985
+ {
1986
+ /*
1987
+ * It's important that the mask value is 31: In most instruction sets,
1988
+ * a shift by a 32-bit operand is understood to be a shift by its mod
1989
+ * 32, so the compiler should elide the mask operation.
1990
+ */
1991
+ *state = Utf8Transition[*s++] >> (*state & 31);
1992
+ len--;
1993
+ }
1994
+
1995
+ *state &= 31;
1996
+ }
1997
+
1998
+ static int
1999
+ pg_utf8_verifystr(const unsigned char *s, int len)
2000
+ {
2001
+ const unsigned char *start = s;
2002
+ const int orig_len = len;
2003
+ uint32 state = BGN;
2004
+
2005
+ /*
2006
+ * Sixteen seems to give the best balance of performance across different
2007
+ * byte distributions.
2008
+ */
2009
+ #define STRIDE_LENGTH 16
2010
+
2011
+ if (len >= STRIDE_LENGTH)
2012
+ {
2013
+ while (len >= STRIDE_LENGTH)
2014
+ {
2015
+ /*
2016
+ * If the chunk is all ASCII, we can skip the full UTF-8 check,
2017
+ * but we must first check for a non-END state, which means the
2018
+ * previous chunk ended in the middle of a multibyte sequence.
2019
+ */
2020
+ if (state != END || !is_valid_ascii(s, STRIDE_LENGTH))
2021
+ utf8_advance(s, &state, STRIDE_LENGTH);
2022
+
2023
+ s += STRIDE_LENGTH;
2024
+ len -= STRIDE_LENGTH;
2025
+ }
2026
+
2027
+ /* The error state persists, so we only need to check for it here. */
2028
+ if (state == ERR)
2029
+ {
2030
+ /*
2031
+ * Start over from the beginning with the slow path so we can
2032
+ * count the valid bytes.
2033
+ */
2034
+ len = orig_len;
2035
+ s = start;
2036
+ }
2037
+ else if (state != END)
2038
+ {
2039
+ /*
2040
+ * The fast path exited in the middle of a multibyte sequence.
2041
+ * Walk backwards to find the leading byte so that the slow path
2042
+ * can resume checking from there. We must always backtrack at
2043
+ * least one byte, since the current byte could be e.g. an ASCII
2044
+ * byte after a 2-byte lead, which is invalid.
2045
+ */
2046
+ do
2047
+ {
2048
+ Assert(s > start);
2049
+ s--;
2050
+ len++;
2051
+ Assert(IS_HIGHBIT_SET(*s));
2052
+ } while (pg_utf_mblen(s) <= 1);
2053
+ }
2054
+ }
2055
+
2056
+ /* check remaining bytes */
2057
+ while (len > 0)
2058
+ {
2059
+ int l;
2060
+
2061
+ /* fast path for ASCII-subset characters */
2062
+ if (!IS_HIGHBIT_SET(*s))
2063
+ {
2064
+ if (*s == '\0')
2065
+ break;
2066
+ l = 1;
2067
+ }
2068
+ else
2069
+ {
2070
+ l = pg_utf8_verifychar(s, len);
2071
+ if (l == -1)
2072
+ break;
2073
+ }
2074
+ s += l;
2075
+ len -= l;
2076
+ }
2077
+
2078
+ return s - start;
2079
+ }
2080
+
1496
2081
  /*
1497
2082
  * Check for validity of a single UTF-8 encoded character
1498
2083
  *
@@ -1572,48 +2157,48 @@ pg_utf8_islegal(const unsigned char *source, int length)
1572
2157
  *-------------------------------------------------------------------
1573
2158
  */
1574
2159
  const pg_wchar_tbl pg_wchar_table[] = {
1575
- {pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifier, 1}, /* PG_SQL_ASCII */
1576
- {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JP */
1577
- {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifier, 2}, /* PG_EUC_CN */
1578
- {pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifier, 3}, /* PG_EUC_KR */
1579
- {pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifier, 4}, /* PG_EUC_TW */
1580
- {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JIS_2004 */
1581
- {pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifier, 4}, /* PG_UTF8 */
1582
- {pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifier, 4}, /* PG_MULE_INTERNAL */
1583
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN1 */
1584
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN2 */
1585
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN3 */
1586
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN4 */
1587
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN5 */
1588
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN6 */
1589
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN7 */
1590
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN8 */
1591
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN9 */
1592
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN10 */
1593
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1256 */
1594
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1258 */
1595
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN866 */
1596
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN874 */
1597
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8R */
1598
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1251 */
1599
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1252 */
1600
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-5 */
1601
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-6 */
1602
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-7 */
1603
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-8 */
1604
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1250 */
1605
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1253 */
1606
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1254 */
1607
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1255 */
1608
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1257 */
1609
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8U */
1610
- {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}, /* PG_SJIS */
1611
- {0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2}, /* PG_BIG5 */
1612
- {0, 0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2}, /* PG_GBK */
1613
- {0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2}, /* PG_UHC */
1614
- {0, 0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, 4}, /* PG_GB18030 */
1615
- {0, 0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifier, 3}, /* PG_JOHAB */
1616
- {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2} /* PG_SHIFT_JIS_2004 */
2160
+ {pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifychar, pg_ascii_verifystr, 1}, /* PG_SQL_ASCII */
2161
+ {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3}, /* PG_EUC_JP */
2162
+ {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 2}, /* PG_EUC_CN */
2163
+ {pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifychar, pg_euckr_verifystr, 3}, /* PG_EUC_KR */
2164
+ {pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifychar, pg_euctw_verifystr, 4}, /* PG_EUC_TW */
2165
+ {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3}, /* PG_EUC_JIS_2004 */
2166
+ {pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifychar, pg_utf8_verifystr, 4}, /* PG_UTF8 */
2167
+ {pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifychar, pg_mule_verifystr, 4}, /* PG_MULE_INTERNAL */
2168
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN1 */
2169
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN2 */
2170
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN3 */
2171
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN4 */
2172
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN5 */
2173
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN6 */
2174
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN7 */
2175
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN8 */
2176
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN9 */
2177
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN10 */
2178
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1256 */
2179
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1258 */
2180
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN866 */
2181
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN874 */
2182
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_KOI8R */
2183
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1251 */
2184
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1252 */
2185
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-5 */
2186
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-6 */
2187
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-7 */
2188
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-8 */
2189
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1250 */
2190
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1253 */
2191
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1254 */
2192
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1255 */
2193
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1257 */
2194
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_KOI8U */
2195
+ {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifychar, pg_sjis_verifystr, 2}, /* PG_SJIS */
2196
+ {0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifychar, pg_big5_verifystr, 2}, /* PG_BIG5 */
2197
+ {0, 0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifychar, pg_gbk_verifystr, 2}, /* PG_GBK */
2198
+ {0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifychar, pg_uhc_verifystr, 2}, /* PG_UHC */
2199
+ {0, 0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifychar, pg_gb18030_verifystr, 4}, /* PG_GB18030 */
2200
+ {0, 0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifychar, pg_johab_verifystr, 3}, /* PG_JOHAB */
2201
+ {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifychar, pg_sjis_verifystr, 2} /* PG_SHIFT_JIS_2004 */
1617
2202
  };
1618
2203
 
1619
2204
  /*
@@ -1646,7 +2231,14 @@ pg_encoding_mblen(int encoding, const char *mbstr)
1646
2231
  /*
1647
2232
  * Verify the first multibyte character of the given string.
1648
2233
  * Return its byte length if good, -1 if bad. (See comments above for
1649
- * full details of the mbverify API.)
2234
+ * full details of the mbverifychar API.)
2235
+ */
2236
+
2237
+
2238
+ /*
2239
+ * Verify that a string is valid for the given encoding.
2240
+ * Returns the number of input bytes (<= len) that form a valid string.
2241
+ * (See comments above for full details of the mbverifystr API.)
1650
2242
  */
1651
2243
 
1652
2244