pg_query 2.2.1 → 4.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (466) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +21 -1
  3. data/README.md +29 -33
  4. data/Rakefile +2 -2
  5. data/ext/pg_query/include/access/amapi.h +45 -1
  6. data/ext/pg_query/include/access/attmap.h +1 -1
  7. data/ext/pg_query/include/access/attnum.h +2 -2
  8. data/ext/pg_query/include/access/clog.h +4 -2
  9. data/ext/pg_query/include/access/commit_ts.h +6 -9
  10. data/ext/pg_query/include/access/detoast.h +1 -11
  11. data/ext/pg_query/include/access/genam.h +15 -12
  12. data/ext/pg_query/include/access/gin.h +2 -2
  13. data/ext/pg_query/include/access/htup.h +1 -1
  14. data/ext/pg_query/include/access/htup_details.h +75 -87
  15. data/ext/pg_query/include/access/itup.h +7 -1
  16. data/ext/pg_query/include/access/parallel.h +2 -2
  17. data/ext/pg_query/include/access/printtup.h +1 -1
  18. data/ext/pg_query/include/access/relation.h +1 -1
  19. data/ext/pg_query/include/access/relscan.h +17 -2
  20. data/ext/pg_query/include/access/rmgr.h +30 -3
  21. data/ext/pg_query/include/access/rmgrlist.h +23 -23
  22. data/ext/pg_query/include/access/sdir.h +1 -1
  23. data/ext/pg_query/include/access/skey.h +1 -1
  24. data/ext/pg_query/include/access/stratnum.h +4 -2
  25. data/ext/pg_query/include/access/sysattr.h +1 -1
  26. data/ext/pg_query/include/access/table.h +2 -1
  27. data/ext/pg_query/include/access/tableam.h +272 -20
  28. data/ext/pg_query/include/access/toast_compression.h +73 -0
  29. data/ext/pg_query/include/access/transam.h +123 -13
  30. data/ext/pg_query/include/access/tupconvert.h +1 -1
  31. data/ext/pg_query/include/access/tupdesc.h +1 -1
  32. data/ext/pg_query/include/access/tupmacs.h +3 -3
  33. data/ext/pg_query/include/access/twophase.h +3 -1
  34. data/ext/pg_query/include/access/xact.h +73 -19
  35. data/ext/pg_query/include/access/xlog.h +60 -155
  36. data/ext/pg_query/include/access/xlog_internal.h +40 -13
  37. data/ext/pg_query/include/access/xlogdefs.h +8 -16
  38. data/ext/pg_query/include/access/xlogprefetcher.h +55 -0
  39. data/ext/pg_query/include/access/xlogreader.h +145 -39
  40. data/ext/pg_query/include/access/xlogrecord.h +18 -9
  41. data/ext/pg_query/include/access/xlogrecovery.h +157 -0
  42. data/ext/pg_query/include/c.h +101 -44
  43. data/ext/pg_query/include/catalog/catalog.h +3 -1
  44. data/ext/pg_query/include/catalog/catversion.h +2 -2
  45. data/ext/pg_query/include/catalog/dependency.h +8 -16
  46. data/ext/pg_query/include/catalog/genbki.h +83 -5
  47. data/ext/pg_query/include/catalog/index.h +18 -3
  48. data/ext/pg_query/include/catalog/indexing.h +12 -324
  49. data/ext/pg_query/include/catalog/namespace.h +4 -2
  50. data/ext/pg_query/include/catalog/objectaccess.h +70 -2
  51. data/ext/pg_query/include/catalog/objectaddress.h +11 -6
  52. data/ext/pg_query/include/catalog/pg_aggregate.h +14 -10
  53. data/ext/pg_query/include/catalog/pg_aggregate_d.h +2 -1
  54. data/ext/pg_query/include/catalog/pg_am.h +4 -1
  55. data/ext/pg_query/include/catalog/pg_am_d.h +3 -1
  56. data/ext/pg_query/include/catalog/pg_attribute.h +27 -10
  57. data/ext/pg_query/include/catalog/pg_attribute_d.h +21 -18
  58. data/ext/pg_query/include/catalog/pg_authid.h +7 -2
  59. data/ext/pg_query/include/catalog/pg_authid_d.h +17 -9
  60. data/ext/pg_query/include/catalog/pg_class.h +44 -14
  61. data/ext/pg_query/include/catalog/pg_class_d.h +30 -1
  62. data/ext/pg_query/include/catalog/pg_collation.h +33 -8
  63. data/ext/pg_query/include/catalog/pg_collation_d.h +20 -3
  64. data/ext/pg_query/include/catalog/pg_constraint.h +38 -12
  65. data/ext/pg_query/include/catalog/pg_constraint_d.h +10 -4
  66. data/ext/pg_query/include/catalog/pg_control.h +3 -5
  67. data/ext/pg_query/include/catalog/pg_conversion.h +7 -4
  68. data/ext/pg_query/include/catalog/pg_conversion_d.h +4 -1
  69. data/ext/pg_query/include/catalog/pg_depend.h +11 -7
  70. data/ext/pg_query/include/catalog/pg_depend_d.h +3 -1
  71. data/ext/pg_query/include/catalog/pg_event_trigger.h +9 -3
  72. data/ext/pg_query/include/catalog/pg_event_trigger_d.h +3 -1
  73. data/ext/pg_query/include/catalog/pg_index.h +17 -7
  74. data/ext/pg_query/include/catalog/pg_index_d.h +20 -17
  75. data/ext/pg_query/include/catalog/pg_language.h +10 -5
  76. data/ext/pg_query/include/catalog/pg_language_d.h +3 -1
  77. data/ext/pg_query/include/catalog/pg_namespace.h +7 -2
  78. data/ext/pg_query/include/catalog/pg_namespace_d.h +3 -1
  79. data/ext/pg_query/include/catalog/pg_opclass.h +8 -5
  80. data/ext/pg_query/include/catalog/pg_opclass_d.h +3 -1
  81. data/ext/pg_query/include/catalog/pg_operator.h +18 -15
  82. data/ext/pg_query/include/catalog/pg_operator_d.h +37 -1
  83. data/ext/pg_query/include/catalog/pg_opfamily.h +6 -3
  84. data/ext/pg_query/include/catalog/pg_opfamily_d.h +3 -1
  85. data/ext/pg_query/include/catalog/pg_parameter_acl.h +60 -0
  86. data/ext/pg_query/include/catalog/pg_parameter_acl_d.h +34 -0
  87. data/ext/pg_query/include/catalog/pg_partitioned_table.h +20 -9
  88. data/ext/pg_query/include/catalog/pg_partitioned_table_d.h +2 -1
  89. data/ext/pg_query/include/catalog/pg_proc.h +20 -11
  90. data/ext/pg_query/include/catalog/pg_proc_d.h +10 -8
  91. data/ext/pg_query/include/catalog/pg_publication.h +50 -7
  92. data/ext/pg_query/include/catalog/pg_publication_d.h +3 -1
  93. data/ext/pg_query/include/catalog/pg_replication_origin.h +6 -1
  94. data/ext/pg_query/include/catalog/pg_replication_origin_d.h +5 -1
  95. data/ext/pg_query/include/catalog/pg_statistic.h +19 -12
  96. data/ext/pg_query/include/catalog/pg_statistic_d.h +2 -1
  97. data/ext/pg_query/include/catalog/pg_statistic_ext.h +19 -5
  98. data/ext/pg_query/include/catalog/pg_statistic_ext_d.h +7 -2
  99. data/ext/pg_query/include/catalog/pg_transform.h +8 -5
  100. data/ext/pg_query/include/catalog/pg_transform_d.h +3 -1
  101. data/ext/pg_query/include/catalog/pg_trigger.h +24 -8
  102. data/ext/pg_query/include/catalog/pg_trigger_d.h +4 -1
  103. data/ext/pg_query/include/catalog/pg_ts_config.h +6 -3
  104. data/ext/pg_query/include/catalog/pg_ts_config_d.h +3 -1
  105. data/ext/pg_query/include/catalog/pg_ts_dict.h +8 -3
  106. data/ext/pg_query/include/catalog/pg_ts_dict_d.h +3 -1
  107. data/ext/pg_query/include/catalog/pg_ts_parser.h +6 -3
  108. data/ext/pg_query/include/catalog/pg_ts_parser_d.h +3 -1
  109. data/ext/pg_query/include/catalog/pg_ts_template.h +6 -3
  110. data/ext/pg_query/include/catalog/pg_ts_template_d.h +3 -1
  111. data/ext/pg_query/include/catalog/pg_type.h +55 -24
  112. data/ext/pg_query/include/catalog/pg_type_d.h +70 -31
  113. data/ext/pg_query/include/catalog/storage.h +5 -3
  114. data/ext/pg_query/include/commands/async.h +3 -4
  115. data/ext/pg_query/include/commands/dbcommands.h +2 -1
  116. data/ext/pg_query/include/commands/defrem.h +11 -24
  117. data/ext/pg_query/include/commands/event_trigger.h +2 -2
  118. data/ext/pg_query/include/commands/explain.h +1 -1
  119. data/ext/pg_query/include/commands/prepare.h +1 -1
  120. data/ext/pg_query/include/commands/tablespace.h +2 -2
  121. data/ext/pg_query/include/commands/trigger.h +18 -16
  122. data/ext/pg_query/include/commands/user.h +2 -2
  123. data/ext/pg_query/include/commands/vacuum.h +88 -41
  124. data/ext/pg_query/include/commands/variable.h +1 -1
  125. data/ext/pg_query/include/common/file_perm.h +4 -4
  126. data/ext/pg_query/include/common/hashfn.h +1 -1
  127. data/ext/pg_query/include/common/ip.h +1 -7
  128. data/ext/pg_query/include/common/keywords.h +2 -6
  129. data/ext/pg_query/include/common/kwlookup.h +1 -1
  130. data/ext/pg_query/include/common/pg_prng.h +60 -0
  131. data/ext/pg_query/include/common/relpath.h +2 -2
  132. data/ext/pg_query/include/common/string.h +24 -1
  133. data/ext/pg_query/include/common/unicode_combining_table.h +114 -2
  134. data/ext/pg_query/include/common/unicode_east_asian_fw_table.h +125 -0
  135. data/ext/pg_query/include/datatype/timestamp.h +40 -1
  136. data/ext/pg_query/include/executor/execdesc.h +1 -1
  137. data/ext/pg_query/include/executor/executor.h +65 -22
  138. data/ext/pg_query/include/executor/functions.h +17 -3
  139. data/ext/pg_query/include/executor/instrument.h +33 -16
  140. data/ext/pg_query/include/executor/spi.h +41 -3
  141. data/ext/pg_query/include/executor/tablefunc.h +1 -1
  142. data/ext/pg_query/include/executor/tuptable.h +1 -1
  143. data/ext/pg_query/include/fmgr.h +13 -7
  144. data/ext/pg_query/include/funcapi.h +16 -4
  145. data/ext/pg_query/include/getaddrinfo.h +1 -1
  146. data/ext/pg_query/include/jit/jit.h +11 -11
  147. data/ext/pg_query/include/kwlist_d.h +517 -494
  148. data/ext/pg_query/include/lib/dshash.h +112 -0
  149. data/ext/pg_query/include/lib/ilist.h +20 -1
  150. data/ext/pg_query/include/lib/pairingheap.h +1 -1
  151. data/ext/pg_query/include/lib/simplehash.h +140 -15
  152. data/ext/pg_query/include/lib/sort_template.h +432 -0
  153. data/ext/pg_query/include/lib/stringinfo.h +1 -1
  154. data/ext/pg_query/include/libpq/auth.h +6 -4
  155. data/ext/pg_query/include/libpq/crypt.h +5 -4
  156. data/ext/pg_query/include/libpq/hba.h +43 -4
  157. data/ext/pg_query/include/libpq/libpq-be.h +23 -6
  158. data/ext/pg_query/include/libpq/libpq.h +30 -20
  159. data/ext/pg_query/include/libpq/pqcomm.h +17 -31
  160. data/ext/pg_query/include/libpq/pqformat.h +1 -1
  161. data/ext/pg_query/include/libpq/pqsignal.h +4 -4
  162. data/ext/pg_query/include/mb/pg_wchar.h +105 -23
  163. data/ext/pg_query/include/mb/stringinfo_mb.h +1 -1
  164. data/ext/pg_query/include/miscadmin.h +47 -41
  165. data/ext/pg_query/include/nodes/bitmapset.h +1 -1
  166. data/ext/pg_query/include/nodes/execnodes.h +270 -78
  167. data/ext/pg_query/include/nodes/extensible.h +4 -2
  168. data/ext/pg_query/include/nodes/lockoptions.h +1 -1
  169. data/ext/pg_query/include/nodes/makefuncs.h +7 -6
  170. data/ext/pg_query/include/nodes/memnodes.h +5 -3
  171. data/ext/pg_query/include/nodes/nodeFuncs.h +1 -1
  172. data/ext/pg_query/include/nodes/nodes.h +30 -11
  173. data/ext/pg_query/include/nodes/params.h +1 -1
  174. data/ext/pg_query/include/nodes/parsenodes.h +322 -90
  175. data/ext/pg_query/include/nodes/pathnodes.h +243 -66
  176. data/ext/pg_query/include/nodes/pg_list.h +75 -69
  177. data/ext/pg_query/include/nodes/plannodes.h +111 -28
  178. data/ext/pg_query/include/nodes/primnodes.h +99 -47
  179. data/ext/pg_query/include/nodes/print.h +1 -1
  180. data/ext/pg_query/include/nodes/tidbitmap.h +1 -1
  181. data/ext/pg_query/include/nodes/value.h +58 -39
  182. data/ext/pg_query/include/optimizer/cost.h +9 -2
  183. data/ext/pg_query/include/optimizer/geqo.h +9 -7
  184. data/ext/pg_query/include/optimizer/geqo_gene.h +1 -1
  185. data/ext/pg_query/include/optimizer/optimizer.h +25 -17
  186. data/ext/pg_query/include/optimizer/paths.h +6 -6
  187. data/ext/pg_query/include/optimizer/planmain.h +15 -14
  188. data/ext/pg_query/include/parser/analyze.h +19 -5
  189. data/ext/pg_query/include/parser/gram.h +947 -913
  190. data/ext/pg_query/include/parser/gramparse.h +1 -1
  191. data/ext/pg_query/include/parser/kwlist.h +463 -453
  192. data/ext/pg_query/include/parser/parse_agg.h +2 -7
  193. data/ext/pg_query/include/parser/parse_coerce.h +3 -1
  194. data/ext/pg_query/include/parser/parse_expr.h +2 -3
  195. data/ext/pg_query/include/parser/parse_func.h +2 -1
  196. data/ext/pg_query/include/parser/parse_node.h +21 -9
  197. data/ext/pg_query/include/parser/parse_oper.h +1 -3
  198. data/ext/pg_query/include/parser/parse_relation.h +5 -4
  199. data/ext/pg_query/include/parser/parse_type.h +1 -1
  200. data/ext/pg_query/include/parser/parser.h +31 -4
  201. data/ext/pg_query/include/parser/parsetree.h +1 -1
  202. data/ext/pg_query/include/parser/scanner.h +1 -1
  203. data/ext/pg_query/include/parser/scansup.h +2 -5
  204. data/ext/pg_query/include/partitioning/partdefs.h +1 -1
  205. data/ext/pg_query/include/pg_config.h +83 -41
  206. data/ext/pg_query/include/pg_config_manual.h +74 -21
  207. data/ext/pg_query/include/pg_getopt.h +6 -6
  208. data/ext/pg_query/include/pg_query.h +5 -4
  209. data/ext/pg_query/include/pg_query_enum_defs.c +358 -241
  210. data/ext/pg_query/include/pg_query_fingerprint_conds.c +44 -7
  211. data/ext/pg_query/include/pg_query_fingerprint_defs.c +939 -113
  212. data/ext/pg_query/include/pg_query_outfuncs_conds.c +43 -13
  213. data/ext/pg_query/include/pg_query_outfuncs_defs.c +151 -26
  214. data/ext/pg_query/include/pg_query_readfuncs_conds.c +11 -2
  215. data/ext/pg_query/include/pg_query_readfuncs_defs.c +173 -30
  216. data/ext/pg_query/include/pg_trace.h +1 -1
  217. data/ext/pg_query/include/pgstat.h +449 -1238
  218. data/ext/pg_query/include/pgtime.h +14 -4
  219. data/ext/pg_query/include/pl_gram.h +126 -128
  220. data/ext/pg_query/include/pl_reserved_kwlist.h +1 -1
  221. data/ext/pg_query/include/pl_reserved_kwlist_d.h +10 -10
  222. data/ext/pg_query/include/pl_unreserved_kwlist.h +2 -3
  223. data/ext/pg_query/include/pl_unreserved_kwlist_d.h +54 -56
  224. data/ext/pg_query/include/plerrcodes.h +9 -1
  225. data/ext/pg_query/include/plpgsql.h +52 -54
  226. data/ext/pg_query/include/port/atomics/arch-arm.h +7 -1
  227. data/ext/pg_query/include/port/atomics/arch-ppc.h +1 -1
  228. data/ext/pg_query/include/port/atomics/arch-x86.h +1 -1
  229. data/ext/pg_query/include/port/atomics/fallback.h +1 -1
  230. data/ext/pg_query/include/port/atomics/generic-gcc.h +3 -3
  231. data/ext/pg_query/include/port/atomics/generic.h +1 -1
  232. data/ext/pg_query/include/port/atomics.h +1 -1
  233. data/ext/pg_query/include/port/pg_bitutils.h +40 -10
  234. data/ext/pg_query/include/port/pg_bswap.h +1 -1
  235. data/ext/pg_query/include/port/pg_crc32c.h +1 -1
  236. data/ext/pg_query/include/port.h +71 -46
  237. data/ext/pg_query/include/portability/instr_time.h +1 -1
  238. data/ext/pg_query/include/postgres.h +60 -16
  239. data/ext/pg_query/include/postmaster/autovacuum.h +17 -17
  240. data/ext/pg_query/include/postmaster/auxprocess.h +20 -0
  241. data/ext/pg_query/include/postmaster/bgworker.h +2 -1
  242. data/ext/pg_query/include/postmaster/bgworker_internals.h +2 -2
  243. data/ext/pg_query/include/postmaster/bgwriter.h +5 -5
  244. data/ext/pg_query/include/postmaster/fork_process.h +1 -1
  245. data/ext/pg_query/include/postmaster/interrupt.h +1 -1
  246. data/ext/pg_query/include/postmaster/pgarch.h +42 -8
  247. data/ext/pg_query/include/postmaster/postmaster.h +18 -17
  248. data/ext/pg_query/include/postmaster/startup.h +39 -0
  249. data/ext/pg_query/include/postmaster/syslogger.h +15 -10
  250. data/ext/pg_query/include/postmaster/walwriter.h +3 -3
  251. data/ext/pg_query/include/protobuf/pg_query.pb-c.h +1419 -914
  252. data/ext/pg_query/include/protobuf/pg_query.pb.h +43678 -32769
  253. data/ext/pg_query/include/regex/regex.h +18 -16
  254. data/ext/pg_query/include/replication/logicallauncher.h +3 -5
  255. data/ext/pg_query/include/replication/logicalproto.h +161 -17
  256. data/ext/pg_query/include/replication/logicalworker.h +1 -1
  257. data/ext/pg_query/include/replication/origin.h +7 -7
  258. data/ext/pg_query/include/replication/reorderbuffer.h +259 -42
  259. data/ext/pg_query/include/replication/slot.h +22 -11
  260. data/ext/pg_query/include/replication/syncrep.h +5 -5
  261. data/ext/pg_query/include/replication/walreceiver.h +145 -13
  262. data/ext/pg_query/include/replication/walsender.h +8 -8
  263. data/ext/pg_query/include/rewrite/prs2lock.h +1 -1
  264. data/ext/pg_query/include/rewrite/rewriteHandler.h +1 -3
  265. data/ext/pg_query/include/rewrite/rewriteManip.h +1 -1
  266. data/ext/pg_query/include/rewrite/rewriteSupport.h +1 -1
  267. data/ext/pg_query/include/storage/backendid.h +3 -3
  268. data/ext/pg_query/include/storage/block.h +4 -10
  269. data/ext/pg_query/include/storage/buf.h +1 -1
  270. data/ext/pg_query/include/storage/bufmgr.h +19 -14
  271. data/ext/pg_query/include/storage/bufpage.h +6 -8
  272. data/ext/pg_query/include/storage/condition_variable.h +13 -2
  273. data/ext/pg_query/include/storage/dsm.h +4 -1
  274. data/ext/pg_query/include/storage/dsm_impl.h +3 -2
  275. data/ext/pg_query/include/storage/fd.h +33 -3
  276. data/ext/pg_query/include/storage/fileset.h +40 -0
  277. data/ext/pg_query/include/storage/ipc.h +4 -1
  278. data/ext/pg_query/include/storage/item.h +1 -1
  279. data/ext/pg_query/include/storage/itemid.h +1 -1
  280. data/ext/pg_query/include/storage/itemptr.h +3 -1
  281. data/ext/pg_query/include/storage/large_object.h +2 -2
  282. data/ext/pg_query/include/storage/latch.h +9 -13
  283. data/ext/pg_query/include/storage/lmgr.h +2 -1
  284. data/ext/pg_query/include/storage/lock.h +11 -8
  285. data/ext/pg_query/include/storage/lockdefs.h +2 -2
  286. data/ext/pg_query/include/storage/lwlock.h +5 -32
  287. data/ext/pg_query/include/storage/lwlocknames.h +0 -1
  288. data/ext/pg_query/include/storage/off.h +1 -1
  289. data/ext/pg_query/include/storage/pg_sema.h +1 -1
  290. data/ext/pg_query/include/storage/pg_shmem.h +9 -7
  291. data/ext/pg_query/include/storage/pmsignal.h +15 -4
  292. data/ext/pg_query/include/storage/predicate.h +4 -4
  293. data/ext/pg_query/include/storage/proc.h +173 -59
  294. data/ext/pg_query/include/storage/procarray.h +98 -0
  295. data/ext/pg_query/include/storage/proclist_types.h +1 -1
  296. data/ext/pg_query/include/storage/procsignal.h +3 -7
  297. data/ext/pg_query/include/storage/relfilenode.h +1 -1
  298. data/ext/pg_query/include/storage/s_lock.h +60 -21
  299. data/ext/pg_query/include/storage/sharedfileset.h +3 -11
  300. data/ext/pg_query/include/storage/shm_mq.h +5 -4
  301. data/ext/pg_query/include/storage/shm_toc.h +1 -1
  302. data/ext/pg_query/include/storage/shmem.h +1 -1
  303. data/ext/pg_query/include/storage/sinval.h +3 -3
  304. data/ext/pg_query/include/storage/sinvaladt.h +1 -1
  305. data/ext/pg_query/include/storage/smgr.h +10 -8
  306. data/ext/pg_query/include/storage/spin.h +2 -2
  307. data/ext/pg_query/include/storage/standby.h +13 -6
  308. data/ext/pg_query/include/storage/standbydefs.h +2 -2
  309. data/ext/pg_query/include/storage/sync.h +7 -3
  310. data/ext/pg_query/include/tcop/cmdtag.h +1 -1
  311. data/ext/pg_query/include/tcop/cmdtaglist.h +3 -2
  312. data/ext/pg_query/include/tcop/deparse_utility.h +1 -1
  313. data/ext/pg_query/include/tcop/dest.h +1 -1
  314. data/ext/pg_query/include/tcop/fastpath.h +1 -2
  315. data/ext/pg_query/include/tcop/pquery.h +1 -1
  316. data/ext/pg_query/include/tcop/tcopprot.h +19 -11
  317. data/ext/pg_query/include/tcop/utility.h +7 -3
  318. data/ext/pg_query/include/tsearch/ts_cache.h +2 -2
  319. data/ext/pg_query/include/utils/acl.h +24 -3
  320. data/ext/pg_query/include/utils/aclchk_internal.h +1 -1
  321. data/ext/pg_query/include/utils/array.h +7 -2
  322. data/ext/pg_query/include/utils/backend_progress.h +44 -0
  323. data/ext/pg_query/include/utils/backend_status.h +321 -0
  324. data/ext/pg_query/include/utils/builtins.h +10 -11
  325. data/ext/pg_query/include/utils/bytea.h +3 -2
  326. data/ext/pg_query/include/utils/catcache.h +1 -1
  327. data/ext/pg_query/include/utils/date.h +1 -1
  328. data/ext/pg_query/include/utils/datetime.h +8 -7
  329. data/ext/pg_query/include/utils/datum.h +9 -1
  330. data/ext/pg_query/include/utils/dsa.h +1 -1
  331. data/ext/pg_query/include/utils/dynahash.h +4 -3
  332. data/ext/pg_query/include/utils/elog.h +52 -21
  333. data/ext/pg_query/include/utils/errcodes.h +2 -0
  334. data/ext/pg_query/include/utils/expandeddatum.h +1 -1
  335. data/ext/pg_query/include/utils/expandedrecord.h +1 -1
  336. data/ext/pg_query/include/utils/float.h +7 -7
  337. data/ext/pg_query/include/utils/fmgroids.h +1300 -696
  338. data/ext/pg_query/include/utils/fmgrprotos.h +199 -16
  339. data/ext/pg_query/include/utils/fmgrtab.h +6 -5
  340. data/ext/pg_query/include/utils/guc.h +69 -43
  341. data/ext/pg_query/include/utils/guc_tables.h +23 -19
  342. data/ext/pg_query/include/utils/hsearch.h +15 -11
  343. data/ext/pg_query/include/utils/inval.h +4 -1
  344. data/ext/pg_query/include/utils/lsyscache.h +11 -1
  345. data/ext/pg_query/include/utils/memdebug.h +1 -1
  346. data/ext/pg_query/include/utils/memutils.h +8 -3
  347. data/ext/pg_query/include/utils/numeric.h +19 -5
  348. data/ext/pg_query/include/utils/palloc.h +25 -3
  349. data/ext/pg_query/include/utils/partcache.h +1 -1
  350. data/ext/pg_query/include/utils/pg_locale.h +17 -9
  351. data/ext/pg_query/include/utils/pg_lsn.h +1 -1
  352. data/ext/pg_query/include/utils/pgstat_internal.h +784 -0
  353. data/ext/pg_query/include/utils/pidfile.h +1 -1
  354. data/ext/pg_query/include/utils/plancache.h +6 -5
  355. data/ext/pg_query/include/utils/portal.h +10 -12
  356. data/ext/pg_query/include/utils/ps_status.h +1 -1
  357. data/ext/pg_query/include/utils/queryenvironment.h +1 -1
  358. data/ext/pg_query/include/utils/queryjumble.h +88 -0
  359. data/ext/pg_query/include/utils/regproc.h +14 -3
  360. data/ext/pg_query/include/utils/rel.h +71 -19
  361. data/ext/pg_query/include/utils/relcache.h +8 -5
  362. data/ext/pg_query/include/utils/reltrigger.h +1 -1
  363. data/ext/pg_query/include/utils/resowner.h +1 -1
  364. data/ext/pg_query/include/utils/rls.h +2 -2
  365. data/ext/pg_query/include/utils/ruleutils.h +4 -1
  366. data/ext/pg_query/include/utils/sharedtuplestore.h +1 -1
  367. data/ext/pg_query/include/utils/snapmgr.h +34 -14
  368. data/ext/pg_query/include/utils/snapshot.h +14 -1
  369. data/ext/pg_query/include/utils/sortsupport.h +117 -2
  370. data/ext/pg_query/include/utils/syscache.h +6 -1
  371. data/ext/pg_query/include/utils/timeout.h +11 -4
  372. data/ext/pg_query/include/utils/timestamp.h +6 -5
  373. data/ext/pg_query/include/utils/tuplesort.h +25 -11
  374. data/ext/pg_query/include/utils/tuplestore.h +2 -2
  375. data/ext/pg_query/include/utils/typcache.h +24 -17
  376. data/ext/pg_query/include/utils/tzparser.h +1 -1
  377. data/ext/pg_query/include/utils/varlena.h +5 -3
  378. data/ext/pg_query/include/utils/wait_event.h +289 -0
  379. data/ext/pg_query/include/utils/xml.h +4 -4
  380. data/ext/pg_query/pg_query.pb-c.c +4302 -2304
  381. data/ext/pg_query/pg_query_deparse.c +1106 -373
  382. data/ext/pg_query/pg_query_fingerprint.c +30 -10
  383. data/ext/pg_query/pg_query_json_plpgsql.c +0 -25
  384. data/ext/pg_query/pg_query_normalize.c +1 -1
  385. data/ext/pg_query/pg_query_outfuncs_json.c +54 -16
  386. data/ext/pg_query/pg_query_outfuncs_protobuf.c +70 -10
  387. data/ext/pg_query/pg_query_parse.c +1 -1
  388. data/ext/pg_query/pg_query_readfuncs_protobuf.c +42 -8
  389. data/ext/pg_query/pg_query_scan.c +2 -1
  390. data/ext/pg_query/pg_query_split.c +3 -2
  391. data/ext/pg_query/src_backend_catalog_namespace.c +20 -9
  392. data/ext/pg_query/src_backend_catalog_pg_proc.c +4 -1
  393. data/ext/pg_query/src_backend_commands_define.c +11 -1
  394. data/ext/pg_query/src_backend_nodes_bitmapset.c +3 -1
  395. data/ext/pg_query/src_backend_nodes_copyfuncs.c +401 -76
  396. data/ext/pg_query/src_backend_nodes_equalfuncs.c +290 -46
  397. data/ext/pg_query/src_backend_nodes_extensible.c +1 -1
  398. data/ext/pg_query/src_backend_nodes_list.c +74 -11
  399. data/ext/pg_query/src_backend_nodes_makefuncs.c +5 -4
  400. data/ext/pg_query/src_backend_nodes_nodeFuncs.c +55 -12
  401. data/ext/pg_query/src_backend_nodes_value.c +28 -19
  402. data/ext/pg_query/src_backend_parser_gram.c +33874 -31261
  403. data/ext/pg_query/src_backend_parser_parser.c +26 -7
  404. data/ext/pg_query/src_backend_parser_scan.c +172 -209
  405. data/ext/pg_query/src_backend_parser_scansup.c +4 -28
  406. data/ext/pg_query/src_backend_postmaster_postmaster.c +77 -106
  407. data/ext/pg_query/src_backend_storage_ipc_ipc.c +13 -4
  408. data/ext/pg_query/src_backend_storage_lmgr_s_lock.c +5 -4
  409. data/ext/pg_query/src_backend_tcop_postgres.c +62 -23
  410. data/ext/pg_query/src_backend_utils_activity_pgstat_database.c +140 -0
  411. data/ext/pg_query/src_backend_utils_adt_datum.c +13 -1
  412. data/ext/pg_query/src_backend_utils_adt_expandeddatum.c +1 -1
  413. data/ext/pg_query/src_backend_utils_adt_format_type.c +6 -2
  414. data/ext/pg_query/src_backend_utils_adt_ruleutils.c +71 -5
  415. data/ext/pg_query/src_backend_utils_error_assert.c +16 -14
  416. data/ext/pg_query/src_backend_utils_error_elog.c +172 -99
  417. data/ext/pg_query/src_backend_utils_fmgr_fmgr.c +12 -17
  418. data/ext/pg_query/src_backend_utils_hash_dynahash.c +40 -10
  419. data/ext/pg_query/src_backend_utils_init_globals.c +5 -5
  420. data/ext/pg_query/src_backend_utils_mb_mbutils.c +55 -66
  421. data/ext/pg_query/src_backend_utils_misc_guc.c +206 -45
  422. data/ext/pg_query/src_backend_utils_mmgr_aset.c +7 -5
  423. data/ext/pg_query/src_backend_utils_mmgr_mcxt.c +123 -35
  424. data/ext/pg_query/src_common_encnames.c +1 -1
  425. data/ext/pg_query/src_common_hashfn.c +3 -3
  426. data/ext/pg_query/src_common_keywords.c +15 -2
  427. data/ext/pg_query/src_common_kwlist_d.h +517 -494
  428. data/ext/pg_query/src_common_kwlookup.c +1 -1
  429. data/ext/pg_query/src_common_pg_prng.c +152 -0
  430. data/ext/pg_query/src_common_psprintf.c +1 -1
  431. data/ext/pg_query/src_common_string.c +7 -1
  432. data/ext/pg_query/src_common_stringinfo.c +1 -1
  433. data/ext/pg_query/src_common_wchar.c +701 -109
  434. data/ext/pg_query/src_pl_plpgsql_src_pl_comp.c +45 -20
  435. data/ext/pg_query/src_pl_plpgsql_src_pl_funcs.c +1 -18
  436. data/ext/pg_query/src_pl_plpgsql_src_pl_gram.c +1233 -1259
  437. data/ext/pg_query/src_pl_plpgsql_src_pl_handler.c +1 -1
  438. data/ext/pg_query/src_pl_plpgsql_src_pl_reserved_kwlist_d.h +10 -10
  439. data/ext/pg_query/src_pl_plpgsql_src_pl_scanner.c +2 -2
  440. data/ext/pg_query/src_pl_plpgsql_src_pl_unreserved_kwlist_d.h +54 -56
  441. data/ext/pg_query/src_port_pg_bitutils.c +41 -31
  442. data/ext/pg_query/src_port_pgsleep.c +1 -1
  443. data/ext/pg_query/src_port_pgstrcasecmp.c +1 -1
  444. data/ext/pg_query/src_port_qsort.c +12 -224
  445. data/ext/pg_query/src_port_snprintf.c +37 -13
  446. data/ext/pg_query/src_port_strerror.c +9 -19
  447. data/ext/pg_query/src_port_strnlen.c +1 -1
  448. data/lib/pg_query/filter_columns.rb +1 -1
  449. data/lib/pg_query/fingerprint.rb +5 -1
  450. data/lib/pg_query/node.rb +2 -2
  451. data/lib/pg_query/param_refs.rb +1 -1
  452. data/lib/pg_query/parse.rb +18 -8
  453. data/lib/pg_query/pg_query_pb.rb +1108 -942
  454. data/lib/pg_query/truncate.rb +1 -1
  455. data/lib/pg_query/version.rb +1 -1
  456. metadata +28 -18
  457. data/ext/pg_query/include/access/xloginsert.h +0 -64
  458. data/ext/pg_query/include/bootstrap/bootstrap.h +0 -62
  459. data/ext/pg_query/include/parser/parse_clause.h +0 -54
  460. data/ext/pg_query/include/parser/parse_collate.h +0 -27
  461. data/ext/pg_query/include/parser/parse_target.h +0 -46
  462. data/ext/pg_query/pg_query_ruby_freebsd.sym +0 -2
  463. data/ext/pg_query/src_backend_libpq_pqcomm.c +0 -659
  464. data/ext/pg_query/src_backend_parser_parse_expr.c +0 -313
  465. data/ext/pg_query/src_port_erand48.c +0 -127
  466. data/ext/pg_query/src_port_random.c +0 -31
@@ -8,18 +8,21 @@
8
8
  * - pg_wchar2single_with_len
9
9
  * - pg_ascii_mblen
10
10
  * - pg_ascii_dsplen
11
- * - pg_ascii_verifier
11
+ * - pg_ascii_verifychar
12
+ * - pg_ascii_verifystr
12
13
  * - pg_eucjp2wchar_with_len
13
14
  * - pg_euc2wchar_with_len
14
15
  * - pg_wchar2euc_with_len
15
16
  * - pg_eucjp_mblen
16
17
  * - pg_euc_mblen
17
18
  * - pg_eucjp_dsplen
18
- * - pg_eucjp_verifier
19
+ * - pg_eucjp_verifychar
20
+ * - pg_eucjp_verifystr
19
21
  * - pg_euccn2wchar_with_len
20
22
  * - pg_euccn_mblen
21
23
  * - pg_euccn_dsplen
22
- * - pg_euckr_verifier
24
+ * - pg_euckr_verifychar
25
+ * - pg_euckr_verifystr
23
26
  * - pg_euckr2wchar_with_len
24
27
  * - pg_euckr_mblen
25
28
  * - pg_euckr_dsplen
@@ -27,7 +30,8 @@
27
30
  * - pg_euctw2wchar_with_len
28
31
  * - pg_euctw_mblen
29
32
  * - pg_euctw_dsplen
30
- * - pg_euctw_verifier
33
+ * - pg_euctw_verifychar
34
+ * - pg_euctw_verifystr
31
35
  * - pg_utf2wchar_with_len
32
36
  * - pg_wchar2utf_with_len
33
37
  * - unicode_to_utf8
@@ -35,34 +39,45 @@
35
39
  * - utf8_to_unicode
36
40
  * - ucs_wcwidth
37
41
  * - mbbisearch
38
- * - pg_utf8_verifier
42
+ * - pg_utf8_verifychar
39
43
  * - pg_utf8_islegal
44
+ * - pg_utf8_verifystr
45
+ * - utf8_advance
46
+ * - Utf8Transition
40
47
  * - pg_mule2wchar_with_len
41
48
  * - pg_wchar2mule_with_len
42
49
  * - pg_mule_dsplen
43
- * - pg_mule_verifier
50
+ * - pg_mule_verifychar
51
+ * - pg_mule_verifystr
44
52
  * - pg_latin12wchar_with_len
45
53
  * - pg_latin1_mblen
46
54
  * - pg_latin1_dsplen
47
- * - pg_latin1_verifier
55
+ * - pg_latin1_verifychar
56
+ * - pg_latin1_verifystr
48
57
  * - pg_sjis_mblen
49
58
  * - pg_sjis_dsplen
50
- * - pg_sjis_verifier
59
+ * - pg_sjis_verifychar
60
+ * - pg_sjis_verifystr
51
61
  * - pg_big5_mblen
52
62
  * - pg_big5_dsplen
53
- * - pg_big5_verifier
63
+ * - pg_big5_verifychar
64
+ * - pg_big5_verifystr
54
65
  * - pg_gbk_mblen
55
66
  * - pg_gbk_dsplen
56
- * - pg_gbk_verifier
67
+ * - pg_gbk_verifychar
68
+ * - pg_gbk_verifystr
57
69
  * - pg_uhc_mblen
58
70
  * - pg_uhc_dsplen
59
- * - pg_uhc_verifier
71
+ * - pg_uhc_verifychar
72
+ * - pg_uhc_verifystr
60
73
  * - pg_gb18030_mblen
61
74
  * - pg_gb18030_dsplen
62
- * - pg_gb18030_verifier
75
+ * - pg_gb18030_verifychar
76
+ * - pg_gb18030_verifystr
63
77
  * - pg_johab_mblen
64
78
  * - pg_johab_dsplen
65
- * - pg_johab_verifier
79
+ * - pg_johab_verifychar
80
+ * - pg_johab_verifystr
66
81
  * - pg_encoding_mblen
67
82
  *--------------------------------------------------------------------
68
83
  */
@@ -72,7 +87,7 @@
72
87
  * wchar.c
73
88
  * Functions for working with multibyte characters in various encodings.
74
89
  *
75
- * Portions Copyright (c) 1998-2020, PostgreSQL Global Development Group
90
+ * Portions Copyright (c) 1998-2022, PostgreSQL Global Development Group
76
91
  *
77
92
  * IDENTIFICATION
78
93
  * src/common/wchar.c
@@ -88,9 +103,9 @@
88
103
  * Operations on multi-byte encodings are driven by a table of helper
89
104
  * functions.
90
105
  *
91
- * To add an encoding support, define mblen(), dsplen() and verifier() for
92
- * the encoding. For server-encodings, also define mb2wchar() and wchar2mb()
93
- * conversion functions.
106
+ * To add an encoding support, define mblen(), dsplen(), verifychar() and
107
+ * verifystr() for the encoding. For server-encodings, also define mb2wchar()
108
+ * and wchar2mb() conversion functions.
94
109
  *
95
110
  * These functions generally assume that their input is validly formed.
96
111
  * The "verifier" functions, further down in the file, have to be more
@@ -652,8 +667,8 @@ pg_utf_mblen(const unsigned char *s)
652
667
 
653
668
  struct mbinterval
654
669
  {
655
- unsigned short first;
656
- unsigned short last;
670
+ unsigned int first;
671
+ unsigned int last;
657
672
  };
658
673
 
659
674
  /* auxiliary function for binary search in interval table */
@@ -692,12 +707,6 @@ mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
692
707
  * category code Mn or Me in the Unicode database) have a
693
708
  * column width of 0.
694
709
  *
695
- * - Other format characters (general category code Cf in the Unicode
696
- * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
697
- *
698
- * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
699
- * have a column width of 0.
700
- *
701
710
  * - Spacing characters in the East Asian Wide (W) or East Asian
702
711
  * FullWidth (F) category as defined in Unicode Technical
703
712
  * Report #11 have a column width of 2.
@@ -714,6 +723,7 @@ static int
714
723
  ucs_wcwidth(pg_wchar ucs)
715
724
  {
716
725
  #include "common/unicode_combining_table.h"
726
+ #include "common/unicode_east_asian_fw_table.h"
717
727
 
718
728
  /* test for 8-bit control characters */
719
729
  if (ucs == 0)
@@ -722,27 +732,25 @@ ucs_wcwidth(pg_wchar ucs)
722
732
  if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
723
733
  return -1;
724
734
 
725
- /* binary search in table of non-spacing characters */
735
+ /*
736
+ * binary search in table of non-spacing characters
737
+ *
738
+ * XXX: In the official Unicode sources, it is possible for a character to
739
+ * be described as both non-spacing and wide at the same time. As of
740
+ * Unicode 13.0, treating the non-spacing property as the determining
741
+ * factor for display width leads to the correct behavior, so do that
742
+ * search first.
743
+ */
726
744
  if (mbbisearch(ucs, combining,
727
745
  sizeof(combining) / sizeof(struct mbinterval) - 1))
728
746
  return 0;
729
747
 
730
- /*
731
- * if we arrive here, ucs is not a combining or C0/C1 control character
732
- */
748
+ /* binary search in table of wide characters */
749
+ if (mbbisearch(ucs, east_asian_fw,
750
+ sizeof(east_asian_fw) / sizeof(struct mbinterval) - 1))
751
+ return 2;
733
752
 
734
- return 1 +
735
- (ucs >= 0x1100 &&
736
- (ucs <= 0x115f || /* Hangul Jamo init. consonants */
737
- (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
738
- ucs != 0x303f) || /* CJK ... Yi */
739
- (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
740
- (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility
741
- * Ideographs */
742
- (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
743
- (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
744
- (ucs >= 0xffe0 && ucs <= 0xffe6) ||
745
- (ucs >= 0x20000 && ucs <= 0x2ffff)));
753
+ return 1;
746
754
  }
747
755
 
748
756
  /*
@@ -1156,29 +1164,45 @@ pg_gb18030_dsplen(const unsigned char *s)
1156
1164
  *-------------------------------------------------------------------
1157
1165
  * multibyte sequence validators
1158
1166
  *
1159
- * These functions accept "s", a pointer to the first byte of a string,
1160
- * and "len", the remaining length of the string. If there is a validly
1161
- * encoded character beginning at *s, return its length in bytes; else
1162
- * return -1.
1167
+ * The verifychar functions accept "s", a pointer to the first byte of a
1168
+ * string, and "len", the remaining length of the string. If there is a
1169
+ * validly encoded character beginning at *s, return its length in bytes;
1170
+ * else return -1.
1163
1171
  *
1164
- * The functions can assume that len > 0 and that *s != '\0', but they must
1165
- * test for and reject zeroes in any additional bytes of a multibyte character.
1172
+ * The verifystr functions also accept "s", a pointer to a string and "len",
1173
+ * the length of the string. They verify the whole string, and return the
1174
+ * number of input bytes (<= len) that are valid. In other words, if the
1175
+ * whole string is valid, verifystr returns "len", otherwise it returns the
1176
+ * byte offset of the first invalid character. The verifystr functions must
1177
+ * test for and reject zeroes in the input.
1166
1178
  *
1167
- * Note that this definition allows the function for a single-byte
1168
- * encoding to be just "return 1".
1179
+ * The verifychar functions can assume that len > 0 and that *s != '\0', but
1180
+ * they must test for and reject zeroes in any additional bytes of a
1181
+ * multibyte character. Note that this definition allows the function for a
1182
+ * single-byte encoding to be just "return 1".
1169
1183
  *-------------------------------------------------------------------
1170
1184
  */
1171
-
1172
1185
  static int
1173
- pg_ascii_verifier(const unsigned char *s, int len)
1186
+ pg_ascii_verifychar(const unsigned char *s, int len)
1174
1187
  {
1175
1188
  return 1;
1176
1189
  }
1177
1190
 
1191
+ static int
1192
+ pg_ascii_verifystr(const unsigned char *s, int len)
1193
+ {
1194
+ const unsigned char *nullpos = memchr(s, 0, len);
1195
+
1196
+ if (nullpos == NULL)
1197
+ return len;
1198
+ else
1199
+ return nullpos - s;
1200
+ }
1201
+
1178
1202
  #define IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe)
1179
1203
 
1180
1204
  static int
1181
- pg_eucjp_verifier(const unsigned char *s, int len)
1205
+ pg_eucjp_verifychar(const unsigned char *s, int len)
1182
1206
  {
1183
1207
  int l;
1184
1208
  unsigned char c1,
@@ -1233,7 +1257,36 @@ pg_eucjp_verifier(const unsigned char *s, int len)
1233
1257
  }
1234
1258
 
1235
1259
  static int
1236
- pg_euckr_verifier(const unsigned char *s, int len)
1260
+ pg_eucjp_verifystr(const unsigned char *s, int len)
1261
+ {
1262
+ const unsigned char *start = s;
1263
+
1264
+ while (len > 0)
1265
+ {
1266
+ int l;
1267
+
1268
+ /* fast path for ASCII-subset characters */
1269
+ if (!IS_HIGHBIT_SET(*s))
1270
+ {
1271
+ if (*s == '\0')
1272
+ break;
1273
+ l = 1;
1274
+ }
1275
+ else
1276
+ {
1277
+ l = pg_eucjp_verifychar(s, len);
1278
+ if (l == -1)
1279
+ break;
1280
+ }
1281
+ s += l;
1282
+ len -= l;
1283
+ }
1284
+
1285
+ return s - start;
1286
+ }
1287
+
1288
+ static int
1289
+ pg_euckr_verifychar(const unsigned char *s, int len)
1237
1290
  {
1238
1291
  int l;
1239
1292
  unsigned char c1,
@@ -1261,11 +1314,41 @@ pg_euckr_verifier(const unsigned char *s, int len)
1261
1314
  return l;
1262
1315
  }
1263
1316
 
1317
+ static int
1318
+ pg_euckr_verifystr(const unsigned char *s, int len)
1319
+ {
1320
+ const unsigned char *start = s;
1321
+
1322
+ while (len > 0)
1323
+ {
1324
+ int l;
1325
+
1326
+ /* fast path for ASCII-subset characters */
1327
+ if (!IS_HIGHBIT_SET(*s))
1328
+ {
1329
+ if (*s == '\0')
1330
+ break;
1331
+ l = 1;
1332
+ }
1333
+ else
1334
+ {
1335
+ l = pg_euckr_verifychar(s, len);
1336
+ if (l == -1)
1337
+ break;
1338
+ }
1339
+ s += l;
1340
+ len -= l;
1341
+ }
1342
+
1343
+ return s - start;
1344
+ }
1345
+
1264
1346
  /* EUC-CN byte sequences are exactly same as EUC-KR */
1265
- #define pg_euccn_verifier pg_euckr_verifier
1347
+ #define pg_euccn_verifychar pg_euckr_verifychar
1348
+ #define pg_euccn_verifystr pg_euckr_verifystr
1266
1349
 
1267
1350
  static int
1268
- pg_euctw_verifier(const unsigned char *s, int len)
1351
+ pg_euctw_verifychar(const unsigned char *s, int len)
1269
1352
  {
1270
1353
  int l;
1271
1354
  unsigned char c1,
@@ -1315,7 +1398,36 @@ pg_euctw_verifier(const unsigned char *s, int len)
1315
1398
  }
1316
1399
 
1317
1400
  static int
1318
- pg_johab_verifier(const unsigned char *s, int len)
1401
+ pg_euctw_verifystr(const unsigned char *s, int len)
1402
+ {
1403
+ const unsigned char *start = s;
1404
+
1405
+ while (len > 0)
1406
+ {
1407
+ int l;
1408
+
1409
+ /* fast path for ASCII-subset characters */
1410
+ if (!IS_HIGHBIT_SET(*s))
1411
+ {
1412
+ if (*s == '\0')
1413
+ break;
1414
+ l = 1;
1415
+ }
1416
+ else
1417
+ {
1418
+ l = pg_euctw_verifychar(s, len);
1419
+ if (l == -1)
1420
+ break;
1421
+ }
1422
+ s += l;
1423
+ len -= l;
1424
+ }
1425
+
1426
+ return s - start;
1427
+ }
1428
+
1429
+ static int
1430
+ pg_johab_verifychar(const unsigned char *s, int len)
1319
1431
  {
1320
1432
  int l,
1321
1433
  mbl;
@@ -1339,7 +1451,36 @@ pg_johab_verifier(const unsigned char *s, int len)
1339
1451
  }
1340
1452
 
1341
1453
  static int
1342
- pg_mule_verifier(const unsigned char *s, int len)
1454
+ pg_johab_verifystr(const unsigned char *s, int len)
1455
+ {
1456
+ const unsigned char *start = s;
1457
+
1458
+ while (len > 0)
1459
+ {
1460
+ int l;
1461
+
1462
+ /* fast path for ASCII-subset characters */
1463
+ if (!IS_HIGHBIT_SET(*s))
1464
+ {
1465
+ if (*s == '\0')
1466
+ break;
1467
+ l = 1;
1468
+ }
1469
+ else
1470
+ {
1471
+ l = pg_johab_verifychar(s, len);
1472
+ if (l == -1)
1473
+ break;
1474
+ }
1475
+ s += l;
1476
+ len -= l;
1477
+ }
1478
+
1479
+ return s - start;
1480
+ }
1481
+
1482
+ static int
1483
+ pg_mule_verifychar(const unsigned char *s, int len)
1343
1484
  {
1344
1485
  int l,
1345
1486
  mbl;
@@ -1360,13 +1501,53 @@ pg_mule_verifier(const unsigned char *s, int len)
1360
1501
  }
1361
1502
 
1362
1503
  static int
1363
- pg_latin1_verifier(const unsigned char *s, int len)
1504
+ pg_mule_verifystr(const unsigned char *s, int len)
1505
+ {
1506
+ const unsigned char *start = s;
1507
+
1508
+ while (len > 0)
1509
+ {
1510
+ int l;
1511
+
1512
+ /* fast path for ASCII-subset characters */
1513
+ if (!IS_HIGHBIT_SET(*s))
1514
+ {
1515
+ if (*s == '\0')
1516
+ break;
1517
+ l = 1;
1518
+ }
1519
+ else
1520
+ {
1521
+ l = pg_mule_verifychar(s, len);
1522
+ if (l == -1)
1523
+ break;
1524
+ }
1525
+ s += l;
1526
+ len -= l;
1527
+ }
1528
+
1529
+ return s - start;
1530
+ }
1531
+
1532
+ static int
1533
+ pg_latin1_verifychar(const unsigned char *s, int len)
1364
1534
  {
1365
1535
  return 1;
1366
1536
  }
1367
1537
 
1368
1538
  static int
1369
- pg_sjis_verifier(const unsigned char *s, int len)
1539
+ pg_latin1_verifystr(const unsigned char *s, int len)
1540
+ {
1541
+ const unsigned char *nullpos = memchr(s, 0, len);
1542
+
1543
+ if (nullpos == NULL)
1544
+ return len;
1545
+ else
1546
+ return nullpos - s;
1547
+ }
1548
+
1549
+ static int
1550
+ pg_sjis_verifychar(const unsigned char *s, int len)
1370
1551
  {
1371
1552
  int l,
1372
1553
  mbl;
@@ -1389,7 +1570,36 @@ pg_sjis_verifier(const unsigned char *s, int len)
1389
1570
  }
1390
1571
 
1391
1572
  static int
1392
- pg_big5_verifier(const unsigned char *s, int len)
1573
+ pg_sjis_verifystr(const unsigned char *s, int len)
1574
+ {
1575
+ const unsigned char *start = s;
1576
+
1577
+ while (len > 0)
1578
+ {
1579
+ int l;
1580
+
1581
+ /* fast path for ASCII-subset characters */
1582
+ if (!IS_HIGHBIT_SET(*s))
1583
+ {
1584
+ if (*s == '\0')
1585
+ break;
1586
+ l = 1;
1587
+ }
1588
+ else
1589
+ {
1590
+ l = pg_sjis_verifychar(s, len);
1591
+ if (l == -1)
1592
+ break;
1593
+ }
1594
+ s += l;
1595
+ len -= l;
1596
+ }
1597
+
1598
+ return s - start;
1599
+ }
1600
+
1601
+ static int
1602
+ pg_big5_verifychar(const unsigned char *s, int len)
1393
1603
  {
1394
1604
  int l,
1395
1605
  mbl;
@@ -1409,7 +1619,36 @@ pg_big5_verifier(const unsigned char *s, int len)
1409
1619
  }
1410
1620
 
1411
1621
  static int
1412
- pg_gbk_verifier(const unsigned char *s, int len)
1622
+ pg_big5_verifystr(const unsigned char *s, int len)
1623
+ {
1624
+ const unsigned char *start = s;
1625
+
1626
+ while (len > 0)
1627
+ {
1628
+ int l;
1629
+
1630
+ /* fast path for ASCII-subset characters */
1631
+ if (!IS_HIGHBIT_SET(*s))
1632
+ {
1633
+ if (*s == '\0')
1634
+ break;
1635
+ l = 1;
1636
+ }
1637
+ else
1638
+ {
1639
+ l = pg_big5_verifychar(s, len);
1640
+ if (l == -1)
1641
+ break;
1642
+ }
1643
+ s += l;
1644
+ len -= l;
1645
+ }
1646
+
1647
+ return s - start;
1648
+ }
1649
+
1650
+ static int
1651
+ pg_gbk_verifychar(const unsigned char *s, int len)
1413
1652
  {
1414
1653
  int l,
1415
1654
  mbl;
@@ -1429,7 +1668,36 @@ pg_gbk_verifier(const unsigned char *s, int len)
1429
1668
  }
1430
1669
 
1431
1670
  static int
1432
- pg_uhc_verifier(const unsigned char *s, int len)
1671
+ pg_gbk_verifystr(const unsigned char *s, int len)
1672
+ {
1673
+ const unsigned char *start = s;
1674
+
1675
+ while (len > 0)
1676
+ {
1677
+ int l;
1678
+
1679
+ /* fast path for ASCII-subset characters */
1680
+ if (!IS_HIGHBIT_SET(*s))
1681
+ {
1682
+ if (*s == '\0')
1683
+ break;
1684
+ l = 1;
1685
+ }
1686
+ else
1687
+ {
1688
+ l = pg_gbk_verifychar(s, len);
1689
+ if (l == -1)
1690
+ break;
1691
+ }
1692
+ s += l;
1693
+ len -= l;
1694
+ }
1695
+
1696
+ return s - start;
1697
+ }
1698
+
1699
+ static int
1700
+ pg_uhc_verifychar(const unsigned char *s, int len)
1433
1701
  {
1434
1702
  int l,
1435
1703
  mbl;
@@ -1449,7 +1717,36 @@ pg_uhc_verifier(const unsigned char *s, int len)
1449
1717
  }
1450
1718
 
1451
1719
  static int
1452
- pg_gb18030_verifier(const unsigned char *s, int len)
1720
+ pg_uhc_verifystr(const unsigned char *s, int len)
1721
+ {
1722
+ const unsigned char *start = s;
1723
+
1724
+ while (len > 0)
1725
+ {
1726
+ int l;
1727
+
1728
+ /* fast path for ASCII-subset characters */
1729
+ if (!IS_HIGHBIT_SET(*s))
1730
+ {
1731
+ if (*s == '\0')
1732
+ break;
1733
+ l = 1;
1734
+ }
1735
+ else
1736
+ {
1737
+ l = pg_uhc_verifychar(s, len);
1738
+ if (l == -1)
1739
+ break;
1740
+ }
1741
+ s += l;
1742
+ len -= l;
1743
+ }
1744
+
1745
+ return s - start;
1746
+ }
1747
+
1748
+ static int
1749
+ pg_gb18030_verifychar(const unsigned char *s, int len)
1453
1750
  {
1454
1751
  int l;
1455
1752
 
@@ -1480,11 +1777,55 @@ pg_gb18030_verifier(const unsigned char *s, int len)
1480
1777
  }
1481
1778
 
1482
1779
  static int
1483
- pg_utf8_verifier(const unsigned char *s, int len)
1780
+ pg_gb18030_verifystr(const unsigned char *s, int len)
1484
1781
  {
1485
- int l = pg_utf_mblen(s);
1782
+ const unsigned char *start = s;
1486
1783
 
1487
- if (len < l)
1784
+ while (len > 0)
1785
+ {
1786
+ int l;
1787
+
1788
+ /* fast path for ASCII-subset characters */
1789
+ if (!IS_HIGHBIT_SET(*s))
1790
+ {
1791
+ if (*s == '\0')
1792
+ break;
1793
+ l = 1;
1794
+ }
1795
+ else
1796
+ {
1797
+ l = pg_gb18030_verifychar(s, len);
1798
+ if (l == -1)
1799
+ break;
1800
+ }
1801
+ s += l;
1802
+ len -= l;
1803
+ }
1804
+
1805
+ return s - start;
1806
+ }
1807
+
1808
+ static int
1809
+ pg_utf8_verifychar(const unsigned char *s, int len)
1810
+ {
1811
+ int l;
1812
+
1813
+ if ((*s & 0x80) == 0)
1814
+ {
1815
+ if (*s == '\0')
1816
+ return -1;
1817
+ return 1;
1818
+ }
1819
+ else if ((*s & 0xe0) == 0xc0)
1820
+ l = 2;
1821
+ else if ((*s & 0xf0) == 0xe0)
1822
+ l = 3;
1823
+ else if ((*s & 0xf8) == 0xf0)
1824
+ l = 4;
1825
+ else
1826
+ l = 1;
1827
+
1828
+ if (l > len)
1488
1829
  return -1;
1489
1830
 
1490
1831
  if (!pg_utf8_islegal(s, l))
@@ -1493,6 +1834,250 @@ pg_utf8_verifier(const unsigned char *s, int len)
1493
1834
  return l;
1494
1835
  }
1495
1836
 
1837
+ /*
1838
+ * The fast path of the UTF-8 verifier uses a deterministic finite automaton
1839
+ * (DFA) for multibyte characters. In a traditional table-driven DFA, the
1840
+ * input byte and current state are used to compute an index into an array of
1841
+ * state transitions. Since the address of the next transition is dependent
1842
+ * on this computation, there is latency in executing the load instruction,
1843
+ * and the CPU is not kept busy.
1844
+ *
1845
+ * Instead, we use a "shift-based" DFA as described by Per Vognsen:
1846
+ *
1847
+ * https://gist.github.com/pervognsen/218ea17743e1442e59bb60d29b1aa725
1848
+ *
1849
+ * In a shift-based DFA, the input byte is an index into array of integers
1850
+ * whose bit pattern encodes the state transitions. To compute the next
1851
+ * state, we simply right-shift the integer by the current state and apply a
1852
+ * mask. In this scheme, the address of the transition only depends on the
1853
+ * input byte, so there is better pipelining.
1854
+ *
1855
+ * The naming convention for states and transitions was adopted from a UTF-8
1856
+ * to UTF-16/32 transcoder, whose table is reproduced below:
1857
+ *
1858
+ * https://github.com/BobSteagall/utf_utils/blob/6b7a465265de2f5fa6133d653df0c9bdd73bbcf8/src/utf_utils.cpp
1859
+ *
1860
+ * ILL ASC CR1 CR2 CR3 L2A L3A L3B L3C L4A L4B L4C CLASS / STATE
1861
+ * ==========================================================================
1862
+ * err, END, err, err, err, CS1, P3A, CS2, P3B, P4A, CS3, P4B, | BGN/END
1863
+ * err, err, err, err, err, err, err, err, err, err, err, err, | ERR
1864
+ * |
1865
+ * err, err, END, END, END, err, err, err, err, err, err, err, | CS1
1866
+ * err, err, CS1, CS1, CS1, err, err, err, err, err, err, err, | CS2
1867
+ * err, err, CS2, CS2, CS2, err, err, err, err, err, err, err, | CS3
1868
+ * |
1869
+ * err, err, err, err, CS1, err, err, err, err, err, err, err, | P3A
1870
+ * err, err, CS1, CS1, err, err, err, err, err, err, err, err, | P3B
1871
+ * |
1872
+ * err, err, err, CS2, CS2, err, err, err, err, err, err, err, | P4A
1873
+ * err, err, CS2, err, err, err, err, err, err, err, err, err, | P4B
1874
+ *
1875
+ * In the most straightforward implementation, a shift-based DFA for UTF-8
1876
+ * requires 64-bit integers to encode the transitions, but with an SMT solver
1877
+ * it's possible to find state numbers such that the transitions fit within
1878
+ * 32-bit integers, as Dougall Johnson demonstrated:
1879
+ *
1880
+ * https://gist.github.com/dougallj/166e326de6ad4cf2c94be97a204c025f
1881
+ *
1882
+ * This packed representation is the reason for the seemingly odd choice of
1883
+ * state values below.
1884
+ */
1885
+
1886
+ /* Error */
1887
+ #define ERR 0
1888
+ /* Begin */
1889
+ #define BGN 11
1890
+ /* Continuation states, expect 1/2/3 continuation bytes */
1891
+ #define CS1 16
1892
+ #define CS2 1
1893
+ #define CS3 5
1894
+ /* Partial states, where the first continuation byte has a restricted range */
1895
+ #define P3A 6 /* Lead was E0, check for 3-byte overlong */
1896
+ #define P3B 20 /* Lead was ED, check for surrogate */
1897
+ #define P4A 25 /* Lead was F0, check for 4-byte overlong */
1898
+ #define P4B 30 /* Lead was F4, check for too-large */
1899
+ /* Begin and End are the same state */
1900
+ #define END BGN
1901
+
1902
+ /* the encoded state transitions for the lookup table */
1903
+
1904
+ /* ASCII */
1905
+ #define ASC (END << BGN)
1906
+ /* 2-byte lead */
1907
+ #define L2A (CS1 << BGN)
1908
+ /* 3-byte lead */
1909
+ #define L3A (P3A << BGN)
1910
+ #define L3B (CS2 << BGN)
1911
+ #define L3C (P3B << BGN)
1912
+ /* 4-byte lead */
1913
+ #define L4A (P4A << BGN)
1914
+ #define L4B (CS3 << BGN)
1915
+ #define L4C (P4B << BGN)
1916
+ /* continuation byte */
1917
+ #define CR1 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)
1918
+ #define CR2 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)
1919
+ #define CR3 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)
1920
+ /* invalid byte */
1921
+ #define ILL ERR
1922
+
1923
+ static const uint32 Utf8Transition[256] =
1924
+ {
1925
+ /* ASCII */
1926
+
1927
+ ILL, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1928
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1929
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1930
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1931
+
1932
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1933
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1934
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1935
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1936
+
1937
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1938
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1939
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1940
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1941
+
1942
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1943
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1944
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1945
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
1946
+
1947
+ /* continuation bytes */
1948
+
1949
+ /* 80..8F */
1950
+ CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1,
1951
+ CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1,
1952
+
1953
+ /* 90..9F */
1954
+ CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2,
1955
+ CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2,
1956
+
1957
+ /* A0..BF */
1958
+ CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
1959
+ CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
1960
+ CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
1961
+ CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
1962
+
1963
+ /* leading bytes */
1964
+
1965
+ /* C0..DF */
1966
+ ILL, ILL, L2A, L2A, L2A, L2A, L2A, L2A,
1967
+ L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A,
1968
+ L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A,
1969
+ L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A,
1970
+
1971
+ /* E0..EF */
1972
+ L3A, L3B, L3B, L3B, L3B, L3B, L3B, L3B,
1973
+ L3B, L3B, L3B, L3B, L3B, L3C, L3B, L3B,
1974
+
1975
+ /* F0..FF */
1976
+ L4A, L4B, L4B, L4B, L4C, ILL, ILL, ILL,
1977
+ ILL, ILL, ILL, ILL, ILL, ILL, ILL, ILL
1978
+ };
1979
+
1980
+ static void
1981
+ utf8_advance(const unsigned char *s, uint32 *state, int len)
1982
+ {
1983
+ /* Note: We deliberately don't check the state's value here. */
1984
+ while (len > 0)
1985
+ {
1986
+ /*
1987
+ * It's important that the mask value is 31: In most instruction sets,
1988
+ * a shift by a 32-bit operand is understood to be a shift by its mod
1989
+ * 32, so the compiler should elide the mask operation.
1990
+ */
1991
+ *state = Utf8Transition[*s++] >> (*state & 31);
1992
+ len--;
1993
+ }
1994
+
1995
+ *state &= 31;
1996
+ }
1997
+
1998
+ static int
1999
+ pg_utf8_verifystr(const unsigned char *s, int len)
2000
+ {
2001
+ const unsigned char *start = s;
2002
+ const int orig_len = len;
2003
+ uint32 state = BGN;
2004
+
2005
+ /*
2006
+ * Sixteen seems to give the best balance of performance across different
2007
+ * byte distributions.
2008
+ */
2009
+ #define STRIDE_LENGTH 16
2010
+
2011
+ if (len >= STRIDE_LENGTH)
2012
+ {
2013
+ while (len >= STRIDE_LENGTH)
2014
+ {
2015
+ /*
2016
+ * If the chunk is all ASCII, we can skip the full UTF-8 check,
2017
+ * but we must first check for a non-END state, which means the
2018
+ * previous chunk ended in the middle of a multibyte sequence.
2019
+ */
2020
+ if (state != END || !is_valid_ascii(s, STRIDE_LENGTH))
2021
+ utf8_advance(s, &state, STRIDE_LENGTH);
2022
+
2023
+ s += STRIDE_LENGTH;
2024
+ len -= STRIDE_LENGTH;
2025
+ }
2026
+
2027
+ /* The error state persists, so we only need to check for it here. */
2028
+ if (state == ERR)
2029
+ {
2030
+ /*
2031
+ * Start over from the beginning with the slow path so we can
2032
+ * count the valid bytes.
2033
+ */
2034
+ len = orig_len;
2035
+ s = start;
2036
+ }
2037
+ else if (state != END)
2038
+ {
2039
+ /*
2040
+ * The fast path exited in the middle of a multibyte sequence.
2041
+ * Walk backwards to find the leading byte so that the slow path
2042
+ * can resume checking from there. We must always backtrack at
2043
+ * least one byte, since the current byte could be e.g. an ASCII
2044
+ * byte after a 2-byte lead, which is invalid.
2045
+ */
2046
+ do
2047
+ {
2048
+ Assert(s > start);
2049
+ s--;
2050
+ len++;
2051
+ Assert(IS_HIGHBIT_SET(*s));
2052
+ } while (pg_utf_mblen(s) <= 1);
2053
+ }
2054
+ }
2055
+
2056
+ /* check remaining bytes */
2057
+ while (len > 0)
2058
+ {
2059
+ int l;
2060
+
2061
+ /* fast path for ASCII-subset characters */
2062
+ if (!IS_HIGHBIT_SET(*s))
2063
+ {
2064
+ if (*s == '\0')
2065
+ break;
2066
+ l = 1;
2067
+ }
2068
+ else
2069
+ {
2070
+ l = pg_utf8_verifychar(s, len);
2071
+ if (l == -1)
2072
+ break;
2073
+ }
2074
+ s += l;
2075
+ len -= l;
2076
+ }
2077
+
2078
+ return s - start;
2079
+ }
2080
+
1496
2081
  /*
1497
2082
  * Check for validity of a single UTF-8 encoded character
1498
2083
  *
@@ -1572,48 +2157,48 @@ pg_utf8_islegal(const unsigned char *source, int length)
1572
2157
  *-------------------------------------------------------------------
1573
2158
  */
1574
2159
  const pg_wchar_tbl pg_wchar_table[] = {
1575
- {pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifier, 1}, /* PG_SQL_ASCII */
1576
- {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JP */
1577
- {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifier, 2}, /* PG_EUC_CN */
1578
- {pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifier, 3}, /* PG_EUC_KR */
1579
- {pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifier, 4}, /* PG_EUC_TW */
1580
- {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JIS_2004 */
1581
- {pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifier, 4}, /* PG_UTF8 */
1582
- {pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifier, 4}, /* PG_MULE_INTERNAL */
1583
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN1 */
1584
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN2 */
1585
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN3 */
1586
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN4 */
1587
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN5 */
1588
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN6 */
1589
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN7 */
1590
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN8 */
1591
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN9 */
1592
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN10 */
1593
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1256 */
1594
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1258 */
1595
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN866 */
1596
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN874 */
1597
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8R */
1598
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1251 */
1599
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1252 */
1600
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-5 */
1601
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-6 */
1602
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-7 */
1603
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-8 */
1604
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1250 */
1605
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1253 */
1606
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1254 */
1607
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1255 */
1608
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1257 */
1609
- {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8U */
1610
- {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}, /* PG_SJIS */
1611
- {0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2}, /* PG_BIG5 */
1612
- {0, 0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2}, /* PG_GBK */
1613
- {0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2}, /* PG_UHC */
1614
- {0, 0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, 4}, /* PG_GB18030 */
1615
- {0, 0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifier, 3}, /* PG_JOHAB */
1616
- {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2} /* PG_SHIFT_JIS_2004 */
2160
+ {pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifychar, pg_ascii_verifystr, 1}, /* PG_SQL_ASCII */
2161
+ {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3}, /* PG_EUC_JP */
2162
+ {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 2}, /* PG_EUC_CN */
2163
+ {pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifychar, pg_euckr_verifystr, 3}, /* PG_EUC_KR */
2164
+ {pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifychar, pg_euctw_verifystr, 4}, /* PG_EUC_TW */
2165
+ {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3}, /* PG_EUC_JIS_2004 */
2166
+ {pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifychar, pg_utf8_verifystr, 4}, /* PG_UTF8 */
2167
+ {pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifychar, pg_mule_verifystr, 4}, /* PG_MULE_INTERNAL */
2168
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN1 */
2169
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN2 */
2170
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN3 */
2171
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN4 */
2172
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN5 */
2173
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN6 */
2174
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN7 */
2175
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN8 */
2176
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN9 */
2177
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN10 */
2178
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1256 */
2179
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1258 */
2180
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN866 */
2181
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN874 */
2182
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_KOI8R */
2183
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1251 */
2184
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1252 */
2185
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-5 */
2186
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-6 */
2187
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-7 */
2188
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-8 */
2189
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1250 */
2190
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1253 */
2191
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1254 */
2192
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1255 */
2193
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1257 */
2194
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_KOI8U */
2195
+ {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifychar, pg_sjis_verifystr, 2}, /* PG_SJIS */
2196
+ {0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifychar, pg_big5_verifystr, 2}, /* PG_BIG5 */
2197
+ {0, 0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifychar, pg_gbk_verifystr, 2}, /* PG_GBK */
2198
+ {0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifychar, pg_uhc_verifystr, 2}, /* PG_UHC */
2199
+ {0, 0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifychar, pg_gb18030_verifystr, 4}, /* PG_GB18030 */
2200
+ {0, 0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifychar, pg_johab_verifystr, 3}, /* PG_JOHAB */
2201
+ {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifychar, pg_sjis_verifystr, 2} /* PG_SHIFT_JIS_2004 */
1617
2202
  };
1618
2203
 
1619
2204
  /*
@@ -1646,7 +2231,14 @@ pg_encoding_mblen(int encoding, const char *mbstr)
1646
2231
  /*
1647
2232
  * Verify the first multibyte character of the given string.
1648
2233
  * Return its byte length if good, -1 if bad. (See comments above for
1649
- * full details of the mbverify API.)
2234
+ * full details of the mbverifychar API.)
2235
+ */
2236
+
2237
+
2238
+ /*
2239
+ * Verify that a string is valid for the given encoding.
2240
+ * Returns the number of input bytes (<= len) that form a valid string.
2241
+ * (See comments above for full details of the mbverifystr API.)
1650
2242
  */
1651
2243
 
1652
2244