gitlab-pg_query 1.3.1 → 2.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (480) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +217 -99
  3. data/README.md +92 -69
  4. data/Rakefile +85 -5
  5. data/ext/pg_query/extconf.rb +3 -40
  6. data/ext/pg_query/guc-file.c +0 -0
  7. data/ext/pg_query/include/access/amapi.h +246 -0
  8. data/ext/pg_query/include/access/attmap.h +52 -0
  9. data/ext/pg_query/include/access/attnum.h +64 -0
  10. data/ext/pg_query/include/access/clog.h +61 -0
  11. data/ext/pg_query/include/access/commit_ts.h +77 -0
  12. data/ext/pg_query/include/access/detoast.h +92 -0
  13. data/ext/pg_query/include/access/genam.h +228 -0
  14. data/ext/pg_query/include/access/gin.h +78 -0
  15. data/ext/pg_query/include/access/htup.h +89 -0
  16. data/ext/pg_query/include/access/htup_details.h +819 -0
  17. data/ext/pg_query/include/access/itup.h +161 -0
  18. data/ext/pg_query/include/access/parallel.h +82 -0
  19. data/ext/pg_query/include/access/printtup.h +35 -0
  20. data/ext/pg_query/include/access/relation.h +28 -0
  21. data/ext/pg_query/include/access/relscan.h +176 -0
  22. data/ext/pg_query/include/access/rmgr.h +35 -0
  23. data/ext/pg_query/include/access/rmgrlist.h +49 -0
  24. data/ext/pg_query/include/access/sdir.h +58 -0
  25. data/ext/pg_query/include/access/skey.h +151 -0
  26. data/ext/pg_query/include/access/stratnum.h +83 -0
  27. data/ext/pg_query/include/access/sysattr.h +29 -0
  28. data/ext/pg_query/include/access/table.h +27 -0
  29. data/ext/pg_query/include/access/tableam.h +1825 -0
  30. data/ext/pg_query/include/access/transam.h +265 -0
  31. data/ext/pg_query/include/access/tupconvert.h +51 -0
  32. data/ext/pg_query/include/access/tupdesc.h +154 -0
  33. data/ext/pg_query/include/access/tupmacs.h +247 -0
  34. data/ext/pg_query/include/access/twophase.h +61 -0
  35. data/ext/pg_query/include/access/xact.h +463 -0
  36. data/ext/pg_query/include/access/xlog.h +398 -0
  37. data/ext/pg_query/include/access/xlog_internal.h +330 -0
  38. data/ext/pg_query/include/access/xlogdefs.h +109 -0
  39. data/ext/pg_query/include/access/xloginsert.h +64 -0
  40. data/ext/pg_query/include/access/xlogreader.h +327 -0
  41. data/ext/pg_query/include/access/xlogrecord.h +227 -0
  42. data/ext/pg_query/include/bootstrap/bootstrap.h +62 -0
  43. data/ext/pg_query/include/c.h +1322 -0
  44. data/ext/pg_query/include/catalog/catalog.h +42 -0
  45. data/ext/pg_query/include/catalog/catversion.h +58 -0
  46. data/ext/pg_query/include/catalog/dependency.h +275 -0
  47. data/ext/pg_query/include/catalog/genbki.h +64 -0
  48. data/ext/pg_query/include/catalog/index.h +199 -0
  49. data/ext/pg_query/include/catalog/indexing.h +366 -0
  50. data/ext/pg_query/include/catalog/namespace.h +188 -0
  51. data/ext/pg_query/include/catalog/objectaccess.h +197 -0
  52. data/ext/pg_query/include/catalog/objectaddress.h +84 -0
  53. data/ext/pg_query/include/catalog/pg_aggregate.h +176 -0
  54. data/ext/pg_query/include/catalog/pg_aggregate_d.h +77 -0
  55. data/ext/pg_query/include/catalog/pg_am.h +60 -0
  56. data/ext/pg_query/include/catalog/pg_am_d.h +45 -0
  57. data/ext/pg_query/include/catalog/pg_attribute.h +204 -0
  58. data/ext/pg_query/include/catalog/pg_attribute_d.h +59 -0
  59. data/ext/pg_query/include/catalog/pg_authid.h +58 -0
  60. data/ext/pg_query/include/catalog/pg_authid_d.h +49 -0
  61. data/ext/pg_query/include/catalog/pg_class.h +200 -0
  62. data/ext/pg_query/include/catalog/pg_class_d.h +103 -0
  63. data/ext/pg_query/include/catalog/pg_collation.h +73 -0
  64. data/ext/pg_query/include/catalog/pg_collation_d.h +45 -0
  65. data/ext/pg_query/include/catalog/pg_constraint.h +247 -0
  66. data/ext/pg_query/include/catalog/pg_constraint_d.h +67 -0
  67. data/ext/pg_query/include/catalog/pg_control.h +250 -0
  68. data/ext/pg_query/include/catalog/pg_conversion.h +72 -0
  69. data/ext/pg_query/include/catalog/pg_conversion_d.h +35 -0
  70. data/ext/pg_query/include/catalog/pg_depend.h +73 -0
  71. data/ext/pg_query/include/catalog/pg_depend_d.h +34 -0
  72. data/ext/pg_query/include/catalog/pg_event_trigger.h +51 -0
  73. data/ext/pg_query/include/catalog/pg_event_trigger_d.h +34 -0
  74. data/ext/pg_query/include/catalog/pg_index.h +80 -0
  75. data/ext/pg_query/include/catalog/pg_index_d.h +56 -0
  76. data/ext/pg_query/include/catalog/pg_language.h +67 -0
  77. data/ext/pg_query/include/catalog/pg_language_d.h +39 -0
  78. data/ext/pg_query/include/catalog/pg_namespace.h +59 -0
  79. data/ext/pg_query/include/catalog/pg_namespace_d.h +34 -0
  80. data/ext/pg_query/include/catalog/pg_opclass.h +85 -0
  81. data/ext/pg_query/include/catalog/pg_opclass_d.h +49 -0
  82. data/ext/pg_query/include/catalog/pg_operator.h +102 -0
  83. data/ext/pg_query/include/catalog/pg_operator_d.h +106 -0
  84. data/ext/pg_query/include/catalog/pg_opfamily.h +60 -0
  85. data/ext/pg_query/include/catalog/pg_opfamily_d.h +47 -0
  86. data/ext/pg_query/include/catalog/pg_partitioned_table.h +63 -0
  87. data/ext/pg_query/include/catalog/pg_partitioned_table_d.h +35 -0
  88. data/ext/pg_query/include/catalog/pg_proc.h +211 -0
  89. data/ext/pg_query/include/catalog/pg_proc_d.h +99 -0
  90. data/ext/pg_query/include/catalog/pg_publication.h +115 -0
  91. data/ext/pg_query/include/catalog/pg_publication_d.h +36 -0
  92. data/ext/pg_query/include/catalog/pg_replication_origin.h +57 -0
  93. data/ext/pg_query/include/catalog/pg_replication_origin_d.h +29 -0
  94. data/ext/pg_query/include/catalog/pg_statistic.h +275 -0
  95. data/ext/pg_query/include/catalog/pg_statistic_d.h +194 -0
  96. data/ext/pg_query/include/catalog/pg_statistic_ext.h +74 -0
  97. data/ext/pg_query/include/catalog/pg_statistic_ext_d.h +40 -0
  98. data/ext/pg_query/include/catalog/pg_transform.h +45 -0
  99. data/ext/pg_query/include/catalog/pg_transform_d.h +32 -0
  100. data/ext/pg_query/include/catalog/pg_trigger.h +137 -0
  101. data/ext/pg_query/include/catalog/pg_trigger_d.h +106 -0
  102. data/ext/pg_query/include/catalog/pg_ts_config.h +50 -0
  103. data/ext/pg_query/include/catalog/pg_ts_config_d.h +32 -0
  104. data/ext/pg_query/include/catalog/pg_ts_dict.h +54 -0
  105. data/ext/pg_query/include/catalog/pg_ts_dict_d.h +33 -0
  106. data/ext/pg_query/include/catalog/pg_ts_parser.h +57 -0
  107. data/ext/pg_query/include/catalog/pg_ts_parser_d.h +35 -0
  108. data/ext/pg_query/include/catalog/pg_ts_template.h +48 -0
  109. data/ext/pg_query/include/catalog/pg_ts_template_d.h +32 -0
  110. data/ext/pg_query/include/catalog/pg_type.h +372 -0
  111. data/ext/pg_query/include/catalog/pg_type_d.h +285 -0
  112. data/ext/pg_query/include/catalog/storage.h +48 -0
  113. data/ext/pg_query/include/commands/async.h +54 -0
  114. data/ext/pg_query/include/commands/dbcommands.h +35 -0
  115. data/ext/pg_query/include/commands/defrem.h +173 -0
  116. data/ext/pg_query/include/commands/event_trigger.h +88 -0
  117. data/ext/pg_query/include/commands/explain.h +127 -0
  118. data/ext/pg_query/include/commands/prepare.h +61 -0
  119. data/ext/pg_query/include/commands/tablespace.h +67 -0
  120. data/ext/pg_query/include/commands/trigger.h +277 -0
  121. data/ext/pg_query/include/commands/user.h +37 -0
  122. data/ext/pg_query/include/commands/vacuum.h +293 -0
  123. data/ext/pg_query/include/commands/variable.h +38 -0
  124. data/ext/pg_query/include/common/file_perm.h +56 -0
  125. data/ext/pg_query/include/common/hashfn.h +104 -0
  126. data/ext/pg_query/include/common/ip.h +37 -0
  127. data/ext/pg_query/include/common/keywords.h +33 -0
  128. data/ext/pg_query/include/common/kwlookup.h +44 -0
  129. data/ext/pg_query/include/common/relpath.h +90 -0
  130. data/ext/pg_query/include/common/string.h +19 -0
  131. data/ext/pg_query/include/common/unicode_combining_table.h +196 -0
  132. data/ext/pg_query/include/datatype/timestamp.h +197 -0
  133. data/ext/pg_query/include/executor/execdesc.h +70 -0
  134. data/ext/pg_query/include/executor/executor.h +614 -0
  135. data/ext/pg_query/include/executor/functions.h +41 -0
  136. data/ext/pg_query/include/executor/instrument.h +101 -0
  137. data/ext/pg_query/include/executor/spi.h +175 -0
  138. data/ext/pg_query/include/executor/tablefunc.h +67 -0
  139. data/ext/pg_query/include/executor/tuptable.h +487 -0
  140. data/ext/pg_query/include/fmgr.h +775 -0
  141. data/ext/pg_query/include/funcapi.h +348 -0
  142. data/ext/pg_query/include/getaddrinfo.h +162 -0
  143. data/ext/pg_query/include/jit/jit.h +105 -0
  144. data/ext/pg_query/include/kwlist_d.h +1072 -0
  145. data/ext/pg_query/include/lib/ilist.h +727 -0
  146. data/ext/pg_query/include/lib/pairingheap.h +102 -0
  147. data/ext/pg_query/include/lib/simplehash.h +1059 -0
  148. data/ext/pg_query/include/lib/stringinfo.h +161 -0
  149. data/ext/pg_query/include/libpq/auth.h +29 -0
  150. data/ext/pg_query/include/libpq/crypt.h +46 -0
  151. data/ext/pg_query/include/libpq/hba.h +140 -0
  152. data/ext/pg_query/include/libpq/libpq-be.h +326 -0
  153. data/ext/pg_query/include/libpq/libpq.h +133 -0
  154. data/ext/pg_query/include/libpq/pqcomm.h +208 -0
  155. data/ext/pg_query/include/libpq/pqformat.h +210 -0
  156. data/ext/pg_query/include/libpq/pqsignal.h +42 -0
  157. data/ext/pg_query/include/mb/pg_wchar.h +672 -0
  158. data/ext/pg_query/include/mb/stringinfo_mb.h +24 -0
  159. data/ext/pg_query/include/miscadmin.h +476 -0
  160. data/ext/pg_query/include/nodes/bitmapset.h +122 -0
  161. data/ext/pg_query/include/nodes/execnodes.h +2520 -0
  162. data/ext/pg_query/include/nodes/extensible.h +160 -0
  163. data/ext/pg_query/include/nodes/lockoptions.h +61 -0
  164. data/ext/pg_query/include/nodes/makefuncs.h +108 -0
  165. data/ext/pg_query/include/nodes/memnodes.h +108 -0
  166. data/ext/pg_query/include/nodes/nodeFuncs.h +162 -0
  167. data/ext/pg_query/include/nodes/nodes.h +842 -0
  168. data/ext/pg_query/include/nodes/params.h +170 -0
  169. data/ext/pg_query/include/nodes/parsenodes.h +3579 -0
  170. data/ext/pg_query/include/nodes/pathnodes.h +2556 -0
  171. data/ext/pg_query/include/nodes/pg_list.h +605 -0
  172. data/ext/pg_query/include/nodes/plannodes.h +1251 -0
  173. data/ext/pg_query/include/nodes/primnodes.h +1541 -0
  174. data/ext/pg_query/include/nodes/print.h +34 -0
  175. data/ext/pg_query/include/nodes/tidbitmap.h +75 -0
  176. data/ext/pg_query/include/nodes/value.h +61 -0
  177. data/ext/pg_query/include/optimizer/cost.h +206 -0
  178. data/ext/pg_query/include/optimizer/geqo.h +88 -0
  179. data/ext/pg_query/include/optimizer/geqo_gene.h +45 -0
  180. data/ext/pg_query/include/optimizer/optimizer.h +199 -0
  181. data/ext/pg_query/include/optimizer/paths.h +249 -0
  182. data/ext/pg_query/include/optimizer/planmain.h +119 -0
  183. data/ext/pg_query/include/parser/analyze.h +49 -0
  184. data/ext/pg_query/include/parser/gram.h +1067 -0
  185. data/ext/pg_query/include/parser/gramparse.h +75 -0
  186. data/ext/pg_query/include/parser/kwlist.h +477 -0
  187. data/ext/pg_query/include/parser/parse_agg.h +68 -0
  188. data/ext/pg_query/include/parser/parse_clause.h +54 -0
  189. data/ext/pg_query/include/parser/parse_coerce.h +97 -0
  190. data/ext/pg_query/include/parser/parse_collate.h +27 -0
  191. data/ext/pg_query/include/parser/parse_expr.h +26 -0
  192. data/ext/pg_query/include/parser/parse_func.h +73 -0
  193. data/ext/pg_query/include/parser/parse_node.h +327 -0
  194. data/ext/pg_query/include/parser/parse_oper.h +67 -0
  195. data/ext/pg_query/include/parser/parse_relation.h +123 -0
  196. data/ext/pg_query/include/parser/parse_target.h +46 -0
  197. data/ext/pg_query/include/parser/parse_type.h +60 -0
  198. data/ext/pg_query/include/parser/parser.h +41 -0
  199. data/ext/pg_query/include/parser/parsetree.h +61 -0
  200. data/ext/pg_query/include/parser/scanner.h +152 -0
  201. data/ext/pg_query/include/parser/scansup.h +30 -0
  202. data/ext/pg_query/include/partitioning/partdefs.h +26 -0
  203. data/ext/pg_query/include/pg_config.h +989 -0
  204. data/ext/pg_query/include/pg_config_ext.h +8 -0
  205. data/ext/pg_query/include/pg_config_manual.h +350 -0
  206. data/ext/pg_query/include/pg_config_os.h +8 -0
  207. data/ext/pg_query/include/pg_getopt.h +56 -0
  208. data/ext/pg_query/include/pg_query.h +121 -0
  209. data/ext/pg_query/include/pg_query_enum_defs.c +2454 -0
  210. data/ext/pg_query/include/pg_query_fingerprint_conds.c +875 -0
  211. data/ext/pg_query/include/pg_query_fingerprint_defs.c +12413 -0
  212. data/ext/pg_query/include/pg_query_json_helper.c +61 -0
  213. data/ext/pg_query/include/pg_query_outfuncs_conds.c +686 -0
  214. data/ext/pg_query/include/pg_query_outfuncs_defs.c +2437 -0
  215. data/ext/pg_query/include/pg_query_readfuncs_conds.c +222 -0
  216. data/ext/pg_query/include/pg_query_readfuncs_defs.c +2878 -0
  217. data/ext/pg_query/include/pg_trace.h +17 -0
  218. data/ext/pg_query/include/pgstat.h +1487 -0
  219. data/ext/pg_query/include/pgtime.h +84 -0
  220. data/ext/pg_query/include/pl_gram.h +385 -0
  221. data/ext/pg_query/include/pl_reserved_kwlist.h +52 -0
  222. data/ext/pg_query/include/pl_reserved_kwlist_d.h +114 -0
  223. data/ext/pg_query/include/pl_unreserved_kwlist.h +112 -0
  224. data/ext/pg_query/include/pl_unreserved_kwlist_d.h +246 -0
  225. data/ext/pg_query/include/plerrcodes.h +990 -0
  226. data/ext/pg_query/include/plpgsql.h +1347 -0
  227. data/ext/pg_query/include/port.h +524 -0
  228. data/ext/pg_query/include/port/atomics.h +524 -0
  229. data/ext/pg_query/include/port/atomics/arch-arm.h +26 -0
  230. data/ext/pg_query/include/port/atomics/arch-ppc.h +254 -0
  231. data/ext/pg_query/include/port/atomics/arch-x86.h +252 -0
  232. data/ext/pg_query/include/port/atomics/fallback.h +170 -0
  233. data/ext/pg_query/include/port/atomics/generic-gcc.h +286 -0
  234. data/ext/pg_query/include/port/atomics/generic.h +401 -0
  235. data/ext/pg_query/include/port/pg_bitutils.h +226 -0
  236. data/ext/pg_query/include/port/pg_bswap.h +161 -0
  237. data/ext/pg_query/include/port/pg_crc32c.h +101 -0
  238. data/ext/pg_query/include/portability/instr_time.h +256 -0
  239. data/ext/pg_query/include/postgres.h +764 -0
  240. data/ext/pg_query/include/postgres_ext.h +74 -0
  241. data/ext/pg_query/include/postmaster/autovacuum.h +83 -0
  242. data/ext/pg_query/include/postmaster/bgworker.h +161 -0
  243. data/ext/pg_query/include/postmaster/bgworker_internals.h +64 -0
  244. data/ext/pg_query/include/postmaster/bgwriter.h +45 -0
  245. data/ext/pg_query/include/postmaster/fork_process.h +17 -0
  246. data/ext/pg_query/include/postmaster/interrupt.h +32 -0
  247. data/ext/pg_query/include/postmaster/pgarch.h +39 -0
  248. data/ext/pg_query/include/postmaster/postmaster.h +77 -0
  249. data/ext/pg_query/include/postmaster/syslogger.h +98 -0
  250. data/ext/pg_query/include/postmaster/walwriter.h +21 -0
  251. data/ext/pg_query/include/protobuf-c.h +1106 -0
  252. data/ext/pg_query/include/protobuf-c/protobuf-c.h +1106 -0
  253. data/ext/pg_query/include/protobuf/pg_query.pb-c.h +10846 -0
  254. data/ext/pg_query/include/protobuf/pg_query.pb.h +124718 -0
  255. data/ext/pg_query/include/regex/regex.h +184 -0
  256. data/ext/pg_query/include/replication/logicallauncher.h +31 -0
  257. data/ext/pg_query/include/replication/logicalproto.h +110 -0
  258. data/ext/pg_query/include/replication/logicalworker.h +19 -0
  259. data/ext/pg_query/include/replication/origin.h +73 -0
  260. data/ext/pg_query/include/replication/reorderbuffer.h +467 -0
  261. data/ext/pg_query/include/replication/slot.h +219 -0
  262. data/ext/pg_query/include/replication/syncrep.h +115 -0
  263. data/ext/pg_query/include/replication/walreceiver.h +340 -0
  264. data/ext/pg_query/include/replication/walsender.h +74 -0
  265. data/ext/pg_query/include/rewrite/prs2lock.h +46 -0
  266. data/ext/pg_query/include/rewrite/rewriteHandler.h +40 -0
  267. data/ext/pg_query/include/rewrite/rewriteManip.h +87 -0
  268. data/ext/pg_query/include/rewrite/rewriteSupport.h +26 -0
  269. data/ext/pg_query/include/storage/backendid.h +37 -0
  270. data/ext/pg_query/include/storage/block.h +121 -0
  271. data/ext/pg_query/include/storage/buf.h +46 -0
  272. data/ext/pg_query/include/storage/bufmgr.h +292 -0
  273. data/ext/pg_query/include/storage/bufpage.h +459 -0
  274. data/ext/pg_query/include/storage/condition_variable.h +62 -0
  275. data/ext/pg_query/include/storage/dsm.h +61 -0
  276. data/ext/pg_query/include/storage/dsm_impl.h +75 -0
  277. data/ext/pg_query/include/storage/fd.h +168 -0
  278. data/ext/pg_query/include/storage/ipc.h +81 -0
  279. data/ext/pg_query/include/storage/item.h +19 -0
  280. data/ext/pg_query/include/storage/itemid.h +184 -0
  281. data/ext/pg_query/include/storage/itemptr.h +206 -0
  282. data/ext/pg_query/include/storage/large_object.h +100 -0
  283. data/ext/pg_query/include/storage/latch.h +190 -0
  284. data/ext/pg_query/include/storage/lmgr.h +114 -0
  285. data/ext/pg_query/include/storage/lock.h +612 -0
  286. data/ext/pg_query/include/storage/lockdefs.h +59 -0
  287. data/ext/pg_query/include/storage/lwlock.h +232 -0
  288. data/ext/pg_query/include/storage/lwlocknames.h +51 -0
  289. data/ext/pg_query/include/storage/off.h +57 -0
  290. data/ext/pg_query/include/storage/pg_sema.h +61 -0
  291. data/ext/pg_query/include/storage/pg_shmem.h +90 -0
  292. data/ext/pg_query/include/storage/pmsignal.h +94 -0
  293. data/ext/pg_query/include/storage/predicate.h +87 -0
  294. data/ext/pg_query/include/storage/proc.h +333 -0
  295. data/ext/pg_query/include/storage/proclist_types.h +51 -0
  296. data/ext/pg_query/include/storage/procsignal.h +75 -0
  297. data/ext/pg_query/include/storage/relfilenode.h +99 -0
  298. data/ext/pg_query/include/storage/s_lock.h +1047 -0
  299. data/ext/pg_query/include/storage/sharedfileset.h +45 -0
  300. data/ext/pg_query/include/storage/shm_mq.h +85 -0
  301. data/ext/pg_query/include/storage/shm_toc.h +58 -0
  302. data/ext/pg_query/include/storage/shmem.h +81 -0
  303. data/ext/pg_query/include/storage/sinval.h +153 -0
  304. data/ext/pg_query/include/storage/sinvaladt.h +43 -0
  305. data/ext/pg_query/include/storage/smgr.h +109 -0
  306. data/ext/pg_query/include/storage/spin.h +77 -0
  307. data/ext/pg_query/include/storage/standby.h +91 -0
  308. data/ext/pg_query/include/storage/standbydefs.h +74 -0
  309. data/ext/pg_query/include/storage/sync.h +62 -0
  310. data/ext/pg_query/include/tcop/cmdtag.h +58 -0
  311. data/ext/pg_query/include/tcop/cmdtaglist.h +217 -0
  312. data/ext/pg_query/include/tcop/deparse_utility.h +108 -0
  313. data/ext/pg_query/include/tcop/dest.h +149 -0
  314. data/ext/pg_query/include/tcop/fastpath.h +21 -0
  315. data/ext/pg_query/include/tcop/pquery.h +45 -0
  316. data/ext/pg_query/include/tcop/tcopprot.h +89 -0
  317. data/ext/pg_query/include/tcop/utility.h +108 -0
  318. data/ext/pg_query/include/tsearch/ts_cache.h +98 -0
  319. data/ext/pg_query/include/utils/acl.h +312 -0
  320. data/ext/pg_query/include/utils/aclchk_internal.h +45 -0
  321. data/ext/pg_query/include/utils/array.h +458 -0
  322. data/ext/pg_query/include/utils/builtins.h +127 -0
  323. data/ext/pg_query/include/utils/bytea.h +27 -0
  324. data/ext/pg_query/include/utils/catcache.h +231 -0
  325. data/ext/pg_query/include/utils/date.h +90 -0
  326. data/ext/pg_query/include/utils/datetime.h +343 -0
  327. data/ext/pg_query/include/utils/datum.h +68 -0
  328. data/ext/pg_query/include/utils/dsa.h +123 -0
  329. data/ext/pg_query/include/utils/dynahash.h +19 -0
  330. data/ext/pg_query/include/utils/elog.h +439 -0
  331. data/ext/pg_query/include/utils/errcodes.h +352 -0
  332. data/ext/pg_query/include/utils/expandeddatum.h +159 -0
  333. data/ext/pg_query/include/utils/expandedrecord.h +231 -0
  334. data/ext/pg_query/include/utils/float.h +356 -0
  335. data/ext/pg_query/include/utils/fmgroids.h +2657 -0
  336. data/ext/pg_query/include/utils/fmgrprotos.h +2646 -0
  337. data/ext/pg_query/include/utils/fmgrtab.h +48 -0
  338. data/ext/pg_query/include/utils/guc.h +443 -0
  339. data/ext/pg_query/include/utils/guc_tables.h +272 -0
  340. data/ext/pg_query/include/utils/hsearch.h +149 -0
  341. data/ext/pg_query/include/utils/inval.h +64 -0
  342. data/ext/pg_query/include/utils/lsyscache.h +197 -0
  343. data/ext/pg_query/include/utils/memdebug.h +82 -0
  344. data/ext/pg_query/include/utils/memutils.h +225 -0
  345. data/ext/pg_query/include/utils/numeric.h +76 -0
  346. data/ext/pg_query/include/utils/palloc.h +136 -0
  347. data/ext/pg_query/include/utils/partcache.h +102 -0
  348. data/ext/pg_query/include/utils/pg_locale.h +119 -0
  349. data/ext/pg_query/include/utils/pg_lsn.h +29 -0
  350. data/ext/pg_query/include/utils/pidfile.h +56 -0
  351. data/ext/pg_query/include/utils/plancache.h +235 -0
  352. data/ext/pg_query/include/utils/portal.h +241 -0
  353. data/ext/pg_query/include/utils/probes.h +114 -0
  354. data/ext/pg_query/include/utils/ps_status.h +25 -0
  355. data/ext/pg_query/include/utils/queryenvironment.h +74 -0
  356. data/ext/pg_query/include/utils/regproc.h +28 -0
  357. data/ext/pg_query/include/utils/rel.h +644 -0
  358. data/ext/pg_query/include/utils/relcache.h +151 -0
  359. data/ext/pg_query/include/utils/reltrigger.h +81 -0
  360. data/ext/pg_query/include/utils/resowner.h +86 -0
  361. data/ext/pg_query/include/utils/rls.h +50 -0
  362. data/ext/pg_query/include/utils/ruleutils.h +44 -0
  363. data/ext/pg_query/include/utils/sharedtuplestore.h +61 -0
  364. data/ext/pg_query/include/utils/snapmgr.h +158 -0
  365. data/ext/pg_query/include/utils/snapshot.h +206 -0
  366. data/ext/pg_query/include/utils/sortsupport.h +276 -0
  367. data/ext/pg_query/include/utils/syscache.h +219 -0
  368. data/ext/pg_query/include/utils/timeout.h +88 -0
  369. data/ext/pg_query/include/utils/timestamp.h +116 -0
  370. data/ext/pg_query/include/utils/tuplesort.h +277 -0
  371. data/ext/pg_query/include/utils/tuplestore.h +91 -0
  372. data/ext/pg_query/include/utils/typcache.h +202 -0
  373. data/ext/pg_query/include/utils/tzparser.h +39 -0
  374. data/ext/pg_query/include/utils/varlena.h +39 -0
  375. data/ext/pg_query/include/utils/xml.h +84 -0
  376. data/ext/pg_query/include/xxhash.h +5445 -0
  377. data/ext/pg_query/include/xxhash/xxhash.h +5445 -0
  378. data/ext/pg_query/pg_query.c +104 -0
  379. data/ext/pg_query/pg_query.pb-c.c +37628 -0
  380. data/ext/pg_query/pg_query_deparse.c +9959 -0
  381. data/ext/pg_query/pg_query_fingerprint.c +295 -0
  382. data/ext/pg_query/pg_query_fingerprint.h +8 -0
  383. data/ext/pg_query/pg_query_internal.h +24 -0
  384. data/ext/pg_query/pg_query_json_plpgsql.c +738 -0
  385. data/ext/pg_query/pg_query_json_plpgsql.h +9 -0
  386. data/ext/pg_query/pg_query_normalize.c +439 -0
  387. data/ext/pg_query/pg_query_outfuncs.h +10 -0
  388. data/ext/pg_query/pg_query_outfuncs_json.c +297 -0
  389. data/ext/pg_query/pg_query_outfuncs_protobuf.c +237 -0
  390. data/ext/pg_query/pg_query_parse.c +148 -0
  391. data/ext/pg_query/pg_query_parse_plpgsql.c +460 -0
  392. data/ext/pg_query/pg_query_readfuncs.h +11 -0
  393. data/ext/pg_query/pg_query_readfuncs_protobuf.c +142 -0
  394. data/ext/pg_query/pg_query_ruby.c +108 -12
  395. data/ext/pg_query/pg_query_scan.c +173 -0
  396. data/ext/pg_query/pg_query_split.c +221 -0
  397. data/ext/pg_query/protobuf-c.c +3660 -0
  398. data/ext/pg_query/src_backend_catalog_namespace.c +1051 -0
  399. data/ext/pg_query/src_backend_catalog_pg_proc.c +142 -0
  400. data/ext/pg_query/src_backend_commands_define.c +117 -0
  401. data/ext/pg_query/src_backend_libpq_pqcomm.c +651 -0
  402. data/ext/pg_query/src_backend_nodes_bitmapset.c +513 -0
  403. data/ext/pg_query/src_backend_nodes_copyfuncs.c +6013 -0
  404. data/ext/pg_query/src_backend_nodes_equalfuncs.c +4003 -0
  405. data/ext/pg_query/src_backend_nodes_extensible.c +99 -0
  406. data/ext/pg_query/src_backend_nodes_list.c +922 -0
  407. data/ext/pg_query/src_backend_nodes_makefuncs.c +417 -0
  408. data/ext/pg_query/src_backend_nodes_nodeFuncs.c +1363 -0
  409. data/ext/pg_query/src_backend_nodes_value.c +84 -0
  410. data/ext/pg_query/src_backend_parser_gram.c +47456 -0
  411. data/ext/pg_query/src_backend_parser_parse_expr.c +313 -0
  412. data/ext/pg_query/src_backend_parser_parser.c +497 -0
  413. data/ext/pg_query/src_backend_parser_scan.c +7091 -0
  414. data/ext/pg_query/src_backend_parser_scansup.c +160 -0
  415. data/ext/pg_query/src_backend_postmaster_postmaster.c +2230 -0
  416. data/ext/pg_query/src_backend_storage_ipc_ipc.c +192 -0
  417. data/ext/pg_query/src_backend_storage_lmgr_s_lock.c +370 -0
  418. data/ext/pg_query/src_backend_tcop_postgres.c +776 -0
  419. data/ext/pg_query/src_backend_utils_adt_datum.c +326 -0
  420. data/ext/pg_query/src_backend_utils_adt_expandeddatum.c +98 -0
  421. data/ext/pg_query/src_backend_utils_adt_format_type.c +136 -0
  422. data/ext/pg_query/src_backend_utils_adt_ruleutils.c +1683 -0
  423. data/ext/pg_query/src_backend_utils_error_assert.c +74 -0
  424. data/ext/pg_query/src_backend_utils_error_elog.c +1748 -0
  425. data/ext/pg_query/src_backend_utils_fmgr_fmgr.c +570 -0
  426. data/ext/pg_query/src_backend_utils_hash_dynahash.c +1086 -0
  427. data/ext/pg_query/src_backend_utils_init_globals.c +168 -0
  428. data/ext/pg_query/src_backend_utils_mb_mbutils.c +839 -0
  429. data/ext/pg_query/src_backend_utils_misc_guc.c +1831 -0
  430. data/ext/pg_query/src_backend_utils_mmgr_aset.c +1560 -0
  431. data/ext/pg_query/src_backend_utils_mmgr_mcxt.c +1006 -0
  432. data/ext/pg_query/src_common_encnames.c +158 -0
  433. data/ext/pg_query/src_common_keywords.c +39 -0
  434. data/ext/pg_query/src_common_kwlist_d.h +1081 -0
  435. data/ext/pg_query/src_common_kwlookup.c +91 -0
  436. data/ext/pg_query/src_common_psprintf.c +158 -0
  437. data/ext/pg_query/src_common_string.c +86 -0
  438. data/ext/pg_query/src_common_stringinfo.c +336 -0
  439. data/ext/pg_query/src_common_wchar.c +1651 -0
  440. data/ext/pg_query/src_pl_plpgsql_src_pl_comp.c +1133 -0
  441. data/ext/pg_query/src_pl_plpgsql_src_pl_funcs.c +877 -0
  442. data/ext/pg_query/src_pl_plpgsql_src_pl_gram.c +6533 -0
  443. data/ext/pg_query/src_pl_plpgsql_src_pl_handler.c +107 -0
  444. data/ext/pg_query/src_pl_plpgsql_src_pl_reserved_kwlist_d.h +123 -0
  445. data/ext/pg_query/src_pl_plpgsql_src_pl_scanner.c +671 -0
  446. data/ext/pg_query/src_pl_plpgsql_src_pl_unreserved_kwlist_d.h +255 -0
  447. data/ext/pg_query/src_port_erand48.c +127 -0
  448. data/ext/pg_query/src_port_pg_bitutils.c +246 -0
  449. data/ext/pg_query/src_port_pgsleep.c +69 -0
  450. data/ext/pg_query/src_port_pgstrcasecmp.c +83 -0
  451. data/ext/pg_query/src_port_qsort.c +240 -0
  452. data/ext/pg_query/src_port_random.c +31 -0
  453. data/ext/pg_query/src_port_snprintf.c +1449 -0
  454. data/ext/pg_query/src_port_strerror.c +324 -0
  455. data/ext/pg_query/src_port_strnlen.c +39 -0
  456. data/ext/pg_query/xxhash.c +43 -0
  457. data/lib/pg_query.rb +7 -4
  458. data/lib/pg_query/constants.rb +21 -0
  459. data/lib/pg_query/deparse.rb +15 -1581
  460. data/lib/pg_query/filter_columns.rb +88 -85
  461. data/lib/pg_query/fingerprint.rb +122 -87
  462. data/lib/pg_query/json_field_names.rb +1402 -0
  463. data/lib/pg_query/node.rb +31 -0
  464. data/lib/pg_query/param_refs.rb +42 -37
  465. data/lib/pg_query/parse.rb +220 -203
  466. data/lib/pg_query/parse_error.rb +1 -1
  467. data/lib/pg_query/pg_query_pb.rb +3211 -0
  468. data/lib/pg_query/scan.rb +23 -0
  469. data/lib/pg_query/treewalker.rb +24 -40
  470. data/lib/pg_query/truncate.rb +71 -42
  471. data/lib/pg_query/version.rb +2 -2
  472. metadata +472 -11
  473. data/ext/pg_query/pg_query_ruby.h +0 -10
  474. data/lib/pg_query/deep_dup.rb +0 -16
  475. data/lib/pg_query/deparse/alter_table.rb +0 -42
  476. data/lib/pg_query/deparse/interval.rb +0 -105
  477. data/lib/pg_query/deparse/keywords.rb +0 -159
  478. data/lib/pg_query/deparse/rename.rb +0 -41
  479. data/lib/pg_query/legacy_parsetree.rb +0 -109
  480. data/lib/pg_query/node_types.rb +0 -296
@@ -0,0 +1,1651 @@
1
+ /*--------------------------------------------------------------------
2
+ * Symbols referenced in this file:
3
+ * - pg_encoding_max_length
4
+ * - pg_wchar_table
5
+ * - pg_utf_mblen
6
+ * - pg_mule_mblen
7
+ * - pg_ascii2wchar_with_len
8
+ * - pg_wchar2single_with_len
9
+ * - pg_ascii_mblen
10
+ * - pg_ascii_dsplen
11
+ * - pg_ascii_verifier
12
+ * - pg_eucjp2wchar_with_len
13
+ * - pg_euc2wchar_with_len
14
+ * - pg_wchar2euc_with_len
15
+ * - pg_eucjp_mblen
16
+ * - pg_euc_mblen
17
+ * - pg_eucjp_dsplen
18
+ * - pg_eucjp_verifier
19
+ * - pg_euccn2wchar_with_len
20
+ * - pg_euccn_mblen
21
+ * - pg_euccn_dsplen
22
+ * - pg_euckr_verifier
23
+ * - pg_euckr2wchar_with_len
24
+ * - pg_euckr_mblen
25
+ * - pg_euckr_dsplen
26
+ * - pg_euc_dsplen
27
+ * - pg_euctw2wchar_with_len
28
+ * - pg_euctw_mblen
29
+ * - pg_euctw_dsplen
30
+ * - pg_euctw_verifier
31
+ * - pg_utf2wchar_with_len
32
+ * - pg_wchar2utf_with_len
33
+ * - unicode_to_utf8
34
+ * - pg_utf_dsplen
35
+ * - utf8_to_unicode
36
+ * - ucs_wcwidth
37
+ * - mbbisearch
38
+ * - pg_utf8_verifier
39
+ * - pg_utf8_islegal
40
+ * - pg_mule2wchar_with_len
41
+ * - pg_wchar2mule_with_len
42
+ * - pg_mule_dsplen
43
+ * - pg_mule_verifier
44
+ * - pg_latin12wchar_with_len
45
+ * - pg_latin1_mblen
46
+ * - pg_latin1_dsplen
47
+ * - pg_latin1_verifier
48
+ * - pg_sjis_mblen
49
+ * - pg_sjis_dsplen
50
+ * - pg_sjis_verifier
51
+ * - pg_big5_mblen
52
+ * - pg_big5_dsplen
53
+ * - pg_big5_verifier
54
+ * - pg_gbk_mblen
55
+ * - pg_gbk_dsplen
56
+ * - pg_gbk_verifier
57
+ * - pg_uhc_mblen
58
+ * - pg_uhc_dsplen
59
+ * - pg_uhc_verifier
60
+ * - pg_gb18030_mblen
61
+ * - pg_gb18030_dsplen
62
+ * - pg_gb18030_verifier
63
+ * - pg_johab_mblen
64
+ * - pg_johab_dsplen
65
+ * - pg_johab_verifier
66
+ * - pg_encoding_mblen
67
+ *--------------------------------------------------------------------
68
+ */
69
+
70
+ /*-------------------------------------------------------------------------
71
+ *
72
+ * wchar.c
73
+ * Functions for working with multibyte characters in various encodings.
74
+ *
75
+ * Portions Copyright (c) 1998-2020, PostgreSQL Global Development Group
76
+ *
77
+ * IDENTIFICATION
78
+ * src/common/wchar.c
79
+ *
80
+ *-------------------------------------------------------------------------
81
+ */
82
+ #include "c.h"
83
+
84
+ #include "mb/pg_wchar.h"
85
+
86
+
87
+ /*
88
+ * Operations on multi-byte encodings are driven by a table of helper
89
+ * functions.
90
+ *
91
+ * To add an encoding support, define mblen(), dsplen() and verifier() for
92
+ * the encoding. For server-encodings, also define mb2wchar() and wchar2mb()
93
+ * conversion functions.
94
+ *
95
+ * These functions generally assume that their input is validly formed.
96
+ * The "verifier" functions, further down in the file, have to be more
97
+ * paranoid.
98
+ *
99
+ * We expect that mblen() does not need to examine more than the first byte
100
+ * of the character to discover the correct length. GB18030 is an exception
101
+ * to that rule, though, as it also looks at second byte. But even that
102
+ * behaves in a predictable way, if you only pass the first byte: it will
103
+ * treat 4-byte encoded characters as two 2-byte encoded characters, which is
104
+ * good enough for all current uses.
105
+ *
106
+ * Note: for the display output of psql to work properly, the return values
107
+ * of the dsplen functions must conform to the Unicode standard. In particular
108
+ * the NUL character is zero width and control characters are generally
109
+ * width -1. It is recommended that non-ASCII encodings refer their ASCII
110
+ * subset to the ASCII routines to ensure consistency.
111
+ */
112
+
113
+ /*
114
+ * SQL/ASCII
115
+ */
116
+ static int
117
+ pg_ascii2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
118
+ {
119
+ int cnt = 0;
120
+
121
+ while (len > 0 && *from)
122
+ {
123
+ *to++ = *from++;
124
+ len--;
125
+ cnt++;
126
+ }
127
+ *to = 0;
128
+ return cnt;
129
+ }
130
+
131
+ static int
132
+ pg_ascii_mblen(const unsigned char *s)
133
+ {
134
+ return 1;
135
+ }
136
+
137
+ static int
138
+ pg_ascii_dsplen(const unsigned char *s)
139
+ {
140
+ if (*s == '\0')
141
+ return 0;
142
+ if (*s < 0x20 || *s == 0x7f)
143
+ return -1;
144
+
145
+ return 1;
146
+ }
147
+
148
+ /*
149
+ * EUC
150
+ */
151
+ static int
152
+ pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
153
+ {
154
+ int cnt = 0;
155
+
156
+ while (len > 0 && *from)
157
+ {
158
+ if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte
159
+ * KANA") */
160
+ {
161
+ from++;
162
+ *to = (SS2 << 8) | *from++;
163
+ len -= 2;
164
+ }
165
+ else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */
166
+ {
167
+ from++;
168
+ *to = (SS3 << 16) | (*from++ << 8);
169
+ *to |= *from++;
170
+ len -= 3;
171
+ }
172
+ else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */
173
+ {
174
+ *to = *from++ << 8;
175
+ *to |= *from++;
176
+ len -= 2;
177
+ }
178
+ else /* must be ASCII */
179
+ {
180
+ *to = *from++;
181
+ len--;
182
+ }
183
+ to++;
184
+ cnt++;
185
+ }
186
+ *to = 0;
187
+ return cnt;
188
+ }
189
+
190
+ static inline int
191
+ pg_euc_mblen(const unsigned char *s)
192
+ {
193
+ int len;
194
+
195
+ if (*s == SS2)
196
+ len = 2;
197
+ else if (*s == SS3)
198
+ len = 3;
199
+ else if (IS_HIGHBIT_SET(*s))
200
+ len = 2;
201
+ else
202
+ len = 1;
203
+ return len;
204
+ }
205
+
206
+ static inline int
207
+ pg_euc_dsplen(const unsigned char *s)
208
+ {
209
+ int len;
210
+
211
+ if (*s == SS2)
212
+ len = 2;
213
+ else if (*s == SS3)
214
+ len = 2;
215
+ else if (IS_HIGHBIT_SET(*s))
216
+ len = 2;
217
+ else
218
+ len = pg_ascii_dsplen(s);
219
+ return len;
220
+ }
221
+
222
+ /*
223
+ * EUC_JP
224
+ */
225
+ static int
226
+ pg_eucjp2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
227
+ {
228
+ return pg_euc2wchar_with_len(from, to, len);
229
+ }
230
+
231
+ static int
232
+ pg_eucjp_mblen(const unsigned char *s)
233
+ {
234
+ return pg_euc_mblen(s);
235
+ }
236
+
237
+ static int
238
+ pg_eucjp_dsplen(const unsigned char *s)
239
+ {
240
+ int len;
241
+
242
+ if (*s == SS2)
243
+ len = 1;
244
+ else if (*s == SS3)
245
+ len = 2;
246
+ else if (IS_HIGHBIT_SET(*s))
247
+ len = 2;
248
+ else
249
+ len = pg_ascii_dsplen(s);
250
+ return len;
251
+ }
252
+
253
+ /*
254
+ * EUC_KR
255
+ */
256
+ static int
257
+ pg_euckr2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
258
+ {
259
+ return pg_euc2wchar_with_len(from, to, len);
260
+ }
261
+
262
+ static int
263
+ pg_euckr_mblen(const unsigned char *s)
264
+ {
265
+ return pg_euc_mblen(s);
266
+ }
267
+
268
+ static int
269
+ pg_euckr_dsplen(const unsigned char *s)
270
+ {
271
+ return pg_euc_dsplen(s);
272
+ }
273
+
274
+ /*
275
+ * EUC_CN
276
+ *
277
+ */
278
+ static int
279
+ pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
280
+ {
281
+ int cnt = 0;
282
+
283
+ while (len > 0 && *from)
284
+ {
285
+ if (*from == SS2 && len >= 3) /* code set 2 (unused?) */
286
+ {
287
+ from++;
288
+ *to = (SS2 << 16) | (*from++ << 8);
289
+ *to |= *from++;
290
+ len -= 3;
291
+ }
292
+ else if (*from == SS3 && len >= 3) /* code set 3 (unused ?) */
293
+ {
294
+ from++;
295
+ *to = (SS3 << 16) | (*from++ << 8);
296
+ *to |= *from++;
297
+ len -= 3;
298
+ }
299
+ else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */
300
+ {
301
+ *to = *from++ << 8;
302
+ *to |= *from++;
303
+ len -= 2;
304
+ }
305
+ else
306
+ {
307
+ *to = *from++;
308
+ len--;
309
+ }
310
+ to++;
311
+ cnt++;
312
+ }
313
+ *to = 0;
314
+ return cnt;
315
+ }
316
+
317
+ static int
318
+ pg_euccn_mblen(const unsigned char *s)
319
+ {
320
+ int len;
321
+
322
+ if (IS_HIGHBIT_SET(*s))
323
+ len = 2;
324
+ else
325
+ len = 1;
326
+ return len;
327
+ }
328
+
329
+ static int
330
+ pg_euccn_dsplen(const unsigned char *s)
331
+ {
332
+ int len;
333
+
334
+ if (IS_HIGHBIT_SET(*s))
335
+ len = 2;
336
+ else
337
+ len = pg_ascii_dsplen(s);
338
+ return len;
339
+ }
340
+
341
+ /*
342
+ * EUC_TW
343
+ *
344
+ */
345
+ static int
346
+ pg_euctw2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
347
+ {
348
+ int cnt = 0;
349
+
350
+ while (len > 0 && *from)
351
+ {
352
+ if (*from == SS2 && len >= 4) /* code set 2 */
353
+ {
354
+ from++;
355
+ *to = (((uint32) SS2) << 24) | (*from++ << 16);
356
+ *to |= *from++ << 8;
357
+ *to |= *from++;
358
+ len -= 4;
359
+ }
360
+ else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */
361
+ {
362
+ from++;
363
+ *to = (SS3 << 16) | (*from++ << 8);
364
+ *to |= *from++;
365
+ len -= 3;
366
+ }
367
+ else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */
368
+ {
369
+ *to = *from++ << 8;
370
+ *to |= *from++;
371
+ len -= 2;
372
+ }
373
+ else
374
+ {
375
+ *to = *from++;
376
+ len--;
377
+ }
378
+ to++;
379
+ cnt++;
380
+ }
381
+ *to = 0;
382
+ return cnt;
383
+ }
384
+
385
+ static int
386
+ pg_euctw_mblen(const unsigned char *s)
387
+ {
388
+ int len;
389
+
390
+ if (*s == SS2)
391
+ len = 4;
392
+ else if (*s == SS3)
393
+ len = 3;
394
+ else if (IS_HIGHBIT_SET(*s))
395
+ len = 2;
396
+ else
397
+ len = 1;
398
+ return len;
399
+ }
400
+
401
+ static int
402
+ pg_euctw_dsplen(const unsigned char *s)
403
+ {
404
+ int len;
405
+
406
+ if (*s == SS2)
407
+ len = 2;
408
+ else if (*s == SS3)
409
+ len = 2;
410
+ else if (IS_HIGHBIT_SET(*s))
411
+ len = 2;
412
+ else
413
+ len = pg_ascii_dsplen(s);
414
+ return len;
415
+ }
416
+
417
+ /*
418
+ * Convert pg_wchar to EUC_* encoding.
419
+ * caller must allocate enough space for "to", including a trailing zero!
420
+ * len: length of from.
421
+ * "from" not necessarily null terminated.
422
+ */
423
+ static int
424
+ pg_wchar2euc_with_len(const pg_wchar *from, unsigned char *to, int len)
425
+ {
426
+ int cnt = 0;
427
+
428
+ while (len > 0 && *from)
429
+ {
430
+ unsigned char c;
431
+
432
+ if ((c = (*from >> 24)))
433
+ {
434
+ *to++ = c;
435
+ *to++ = (*from >> 16) & 0xff;
436
+ *to++ = (*from >> 8) & 0xff;
437
+ *to++ = *from & 0xff;
438
+ cnt += 4;
439
+ }
440
+ else if ((c = (*from >> 16)))
441
+ {
442
+ *to++ = c;
443
+ *to++ = (*from >> 8) & 0xff;
444
+ *to++ = *from & 0xff;
445
+ cnt += 3;
446
+ }
447
+ else if ((c = (*from >> 8)))
448
+ {
449
+ *to++ = c;
450
+ *to++ = *from & 0xff;
451
+ cnt += 2;
452
+ }
453
+ else
454
+ {
455
+ *to++ = *from;
456
+ cnt++;
457
+ }
458
+ from++;
459
+ len--;
460
+ }
461
+ *to = 0;
462
+ return cnt;
463
+ }
464
+
465
+
466
+ /*
467
+ * JOHAB
468
+ */
469
+ static int
470
+ pg_johab_mblen(const unsigned char *s)
471
+ {
472
+ return pg_euc_mblen(s);
473
+ }
474
+
475
+ static int
476
+ pg_johab_dsplen(const unsigned char *s)
477
+ {
478
+ return pg_euc_dsplen(s);
479
+ }
480
+
481
+ /*
482
+ * convert UTF8 string to pg_wchar (UCS-4)
483
+ * caller must allocate enough space for "to", including a trailing zero!
484
+ * len: length of from.
485
+ * "from" not necessarily null terminated.
486
+ */
487
+ static int
488
+ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
489
+ {
490
+ int cnt = 0;
491
+ uint32 c1,
492
+ c2,
493
+ c3,
494
+ c4;
495
+
496
+ while (len > 0 && *from)
497
+ {
498
+ if ((*from & 0x80) == 0)
499
+ {
500
+ *to = *from++;
501
+ len--;
502
+ }
503
+ else if ((*from & 0xe0) == 0xc0)
504
+ {
505
+ if (len < 2)
506
+ break; /* drop trailing incomplete char */
507
+ c1 = *from++ & 0x1f;
508
+ c2 = *from++ & 0x3f;
509
+ *to = (c1 << 6) | c2;
510
+ len -= 2;
511
+ }
512
+ else if ((*from & 0xf0) == 0xe0)
513
+ {
514
+ if (len < 3)
515
+ break; /* drop trailing incomplete char */
516
+ c1 = *from++ & 0x0f;
517
+ c2 = *from++ & 0x3f;
518
+ c3 = *from++ & 0x3f;
519
+ *to = (c1 << 12) | (c2 << 6) | c3;
520
+ len -= 3;
521
+ }
522
+ else if ((*from & 0xf8) == 0xf0)
523
+ {
524
+ if (len < 4)
525
+ break; /* drop trailing incomplete char */
526
+ c1 = *from++ & 0x07;
527
+ c2 = *from++ & 0x3f;
528
+ c3 = *from++ & 0x3f;
529
+ c4 = *from++ & 0x3f;
530
+ *to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
531
+ len -= 4;
532
+ }
533
+ else
534
+ {
535
+ /* treat a bogus char as length 1; not ours to raise error */
536
+ *to = *from++;
537
+ len--;
538
+ }
539
+ to++;
540
+ cnt++;
541
+ }
542
+ *to = 0;
543
+ return cnt;
544
+ }
545
+
546
+
547
+ /*
548
+ * Map a Unicode code point to UTF-8. utf8string must have 4 bytes of
549
+ * space allocated.
550
+ */
551
+ unsigned char *
552
+ unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
553
+ {
554
+ if (c <= 0x7F)
555
+ {
556
+ utf8string[0] = c;
557
+ }
558
+ else if (c <= 0x7FF)
559
+ {
560
+ utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
561
+ utf8string[1] = 0x80 | (c & 0x3F);
562
+ }
563
+ else if (c <= 0xFFFF)
564
+ {
565
+ utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
566
+ utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
567
+ utf8string[2] = 0x80 | (c & 0x3F);
568
+ }
569
+ else
570
+ {
571
+ utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
572
+ utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
573
+ utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
574
+ utf8string[3] = 0x80 | (c & 0x3F);
575
+ }
576
+
577
+ return utf8string;
578
+ }
579
+
580
+ /*
581
+ * Trivial conversion from pg_wchar to UTF-8.
582
+ * caller should allocate enough space for "to"
583
+ * len: length of from.
584
+ * "from" not necessarily null terminated.
585
+ */
586
+ static int
587
+ pg_wchar2utf_with_len(const pg_wchar *from, unsigned char *to, int len)
588
+ {
589
+ int cnt = 0;
590
+
591
+ while (len > 0 && *from)
592
+ {
593
+ int char_len;
594
+
595
+ unicode_to_utf8(*from, to);
596
+ char_len = pg_utf_mblen(to);
597
+ cnt += char_len;
598
+ to += char_len;
599
+ from++;
600
+ len--;
601
+ }
602
+ *to = 0;
603
+ return cnt;
604
+ }
605
+
606
+ /*
607
+ * Return the byte length of a UTF8 character pointed to by s
608
+ *
609
+ * Note: in the current implementation we do not support UTF8 sequences
610
+ * of more than 4 bytes; hence do NOT return a value larger than 4.
611
+ * We return "1" for any leading byte that is either flat-out illegal or
612
+ * indicates a length larger than we support.
613
+ *
614
+ * pg_utf2wchar_with_len(), utf8_to_unicode(), pg_utf8_islegal(), and perhaps
615
+ * other places would need to be fixed to change this.
616
+ */
617
+ int
618
+ pg_utf_mblen(const unsigned char *s)
619
+ {
620
+ int len;
621
+
622
+ if ((*s & 0x80) == 0)
623
+ len = 1;
624
+ else if ((*s & 0xe0) == 0xc0)
625
+ len = 2;
626
+ else if ((*s & 0xf0) == 0xe0)
627
+ len = 3;
628
+ else if ((*s & 0xf8) == 0xf0)
629
+ len = 4;
630
+ #ifdef NOT_USED
631
+ else if ((*s & 0xfc) == 0xf8)
632
+ len = 5;
633
+ else if ((*s & 0xfe) == 0xfc)
634
+ len = 6;
635
+ #endif
636
+ else
637
+ len = 1;
638
+ return len;
639
+ }
640
+
641
+ /*
642
+ * This is an implementation of wcwidth() and wcswidth() as defined in
643
+ * "The Single UNIX Specification, Version 2, The Open Group, 1997"
644
+ * <http://www.unix.org/online.html>
645
+ *
646
+ * Markus Kuhn -- 2001-09-08 -- public domain
647
+ *
648
+ * customised for PostgreSQL
649
+ *
650
+ * original available at : http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
651
+ */
652
+
653
+ struct mbinterval
654
+ {
655
+ unsigned short first;
656
+ unsigned short last;
657
+ };
658
+
659
+ /* auxiliary function for binary search in interval table */
660
+ static int
661
+ mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
662
+ {
663
+ int min = 0;
664
+ int mid;
665
+
666
+ if (ucs < table[0].first || ucs > table[max].last)
667
+ return 0;
668
+ while (max >= min)
669
+ {
670
+ mid = (min + max) / 2;
671
+ if (ucs > table[mid].last)
672
+ min = mid + 1;
673
+ else if (ucs < table[mid].first)
674
+ max = mid - 1;
675
+ else
676
+ return 1;
677
+ }
678
+
679
+ return 0;
680
+ }
681
+
682
+
683
+ /* The following functions define the column width of an ISO 10646
684
+ * character as follows:
685
+ *
686
+ * - The null character (U+0000) has a column width of 0.
687
+ *
688
+ * - Other C0/C1 control characters and DEL will lead to a return
689
+ * value of -1.
690
+ *
691
+ * - Non-spacing and enclosing combining characters (general
692
+ * category code Mn or Me in the Unicode database) have a
693
+ * column width of 0.
694
+ *
695
+ * - Other format characters (general category code Cf in the Unicode
696
+ * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
697
+ *
698
+ * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
699
+ * have a column width of 0.
700
+ *
701
+ * - Spacing characters in the East Asian Wide (W) or East Asian
702
+ * FullWidth (F) category as defined in Unicode Technical
703
+ * Report #11 have a column width of 2.
704
+ *
705
+ * - All remaining characters (including all printable
706
+ * ISO 8859-1 and WGL4 characters, Unicode control characters,
707
+ * etc.) have a column width of 1.
708
+ *
709
+ * This implementation assumes that wchar_t characters are encoded
710
+ * in ISO 10646.
711
+ */
712
+
713
+ static int
714
+ ucs_wcwidth(pg_wchar ucs)
715
+ {
716
+ #include "common/unicode_combining_table.h"
717
+
718
+ /* test for 8-bit control characters */
719
+ if (ucs == 0)
720
+ return 0;
721
+
722
+ if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
723
+ return -1;
724
+
725
+ /* binary search in table of non-spacing characters */
726
+ if (mbbisearch(ucs, combining,
727
+ sizeof(combining) / sizeof(struct mbinterval) - 1))
728
+ return 0;
729
+
730
+ /*
731
+ * if we arrive here, ucs is not a combining or C0/C1 control character
732
+ */
733
+
734
+ return 1 +
735
+ (ucs >= 0x1100 &&
736
+ (ucs <= 0x115f || /* Hangul Jamo init. consonants */
737
+ (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
738
+ ucs != 0x303f) || /* CJK ... Yi */
739
+ (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
740
+ (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility
741
+ * Ideographs */
742
+ (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
743
+ (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
744
+ (ucs >= 0xffe0 && ucs <= 0xffe6) ||
745
+ (ucs >= 0x20000 && ucs <= 0x2ffff)));
746
+ }
747
+
748
+ /*
749
+ * Convert a UTF-8 character to a Unicode code point.
750
+ * This is a one-character version of pg_utf2wchar_with_len.
751
+ *
752
+ * No error checks here, c must point to a long-enough string.
753
+ */
754
+ pg_wchar
755
+ utf8_to_unicode(const unsigned char *c)
756
+ {
757
+ if ((*c & 0x80) == 0)
758
+ return (pg_wchar) c[0];
759
+ else if ((*c & 0xe0) == 0xc0)
760
+ return (pg_wchar) (((c[0] & 0x1f) << 6) |
761
+ (c[1] & 0x3f));
762
+ else if ((*c & 0xf0) == 0xe0)
763
+ return (pg_wchar) (((c[0] & 0x0f) << 12) |
764
+ ((c[1] & 0x3f) << 6) |
765
+ (c[2] & 0x3f));
766
+ else if ((*c & 0xf8) == 0xf0)
767
+ return (pg_wchar) (((c[0] & 0x07) << 18) |
768
+ ((c[1] & 0x3f) << 12) |
769
+ ((c[2] & 0x3f) << 6) |
770
+ (c[3] & 0x3f));
771
+ else
772
+ /* that is an invalid code on purpose */
773
+ return 0xffffffff;
774
+ }
775
+
776
+ static int
777
+ pg_utf_dsplen(const unsigned char *s)
778
+ {
779
+ return ucs_wcwidth(utf8_to_unicode(s));
780
+ }
781
+
782
+ /*
783
+ * convert mule internal code to pg_wchar
784
+ * caller should allocate enough space for "to"
785
+ * len: length of from.
786
+ * "from" not necessarily null terminated.
787
+ */
788
+ static int
789
+ pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
790
+ {
791
+ int cnt = 0;
792
+
793
+ while (len > 0 && *from)
794
+ {
795
+ if (IS_LC1(*from) && len >= 2)
796
+ {
797
+ *to = *from++ << 16;
798
+ *to |= *from++;
799
+ len -= 2;
800
+ }
801
+ else if (IS_LCPRV1(*from) && len >= 3)
802
+ {
803
+ from++;
804
+ *to = *from++ << 16;
805
+ *to |= *from++;
806
+ len -= 3;
807
+ }
808
+ else if (IS_LC2(*from) && len >= 3)
809
+ {
810
+ *to = *from++ << 16;
811
+ *to |= *from++ << 8;
812
+ *to |= *from++;
813
+ len -= 3;
814
+ }
815
+ else if (IS_LCPRV2(*from) && len >= 4)
816
+ {
817
+ from++;
818
+ *to = *from++ << 16;
819
+ *to |= *from++ << 8;
820
+ *to |= *from++;
821
+ len -= 4;
822
+ }
823
+ else
824
+ { /* assume ASCII */
825
+ *to = (unsigned char) *from++;
826
+ len--;
827
+ }
828
+ to++;
829
+ cnt++;
830
+ }
831
+ *to = 0;
832
+ return cnt;
833
+ }
834
+
835
+ /*
836
+ * convert pg_wchar to mule internal code
837
+ * caller should allocate enough space for "to"
838
+ * len: length of from.
839
+ * "from" not necessarily null terminated.
840
+ */
841
+ static int
842
+ pg_wchar2mule_with_len(const pg_wchar *from, unsigned char *to, int len)
843
+ {
844
+ int cnt = 0;
845
+
846
+ while (len > 0 && *from)
847
+ {
848
+ unsigned char lb;
849
+
850
+ lb = (*from >> 16) & 0xff;
851
+ if (IS_LC1(lb))
852
+ {
853
+ *to++ = lb;
854
+ *to++ = *from & 0xff;
855
+ cnt += 2;
856
+ }
857
+ else if (IS_LC2(lb))
858
+ {
859
+ *to++ = lb;
860
+ *to++ = (*from >> 8) & 0xff;
861
+ *to++ = *from & 0xff;
862
+ cnt += 3;
863
+ }
864
+ else if (IS_LCPRV1_A_RANGE(lb))
865
+ {
866
+ *to++ = LCPRV1_A;
867
+ *to++ = lb;
868
+ *to++ = *from & 0xff;
869
+ cnt += 3;
870
+ }
871
+ else if (IS_LCPRV1_B_RANGE(lb))
872
+ {
873
+ *to++ = LCPRV1_B;
874
+ *to++ = lb;
875
+ *to++ = *from & 0xff;
876
+ cnt += 3;
877
+ }
878
+ else if (IS_LCPRV2_A_RANGE(lb))
879
+ {
880
+ *to++ = LCPRV2_A;
881
+ *to++ = lb;
882
+ *to++ = (*from >> 8) & 0xff;
883
+ *to++ = *from & 0xff;
884
+ cnt += 4;
885
+ }
886
+ else if (IS_LCPRV2_B_RANGE(lb))
887
+ {
888
+ *to++ = LCPRV2_B;
889
+ *to++ = lb;
890
+ *to++ = (*from >> 8) & 0xff;
891
+ *to++ = *from & 0xff;
892
+ cnt += 4;
893
+ }
894
+ else
895
+ {
896
+ *to++ = *from & 0xff;
897
+ cnt += 1;
898
+ }
899
+ from++;
900
+ len--;
901
+ }
902
+ *to = 0;
903
+ return cnt;
904
+ }
905
+
906
+ /* exported for direct use by conv.c */
907
+ int
908
+ pg_mule_mblen(const unsigned char *s)
909
+ {
910
+ int len;
911
+
912
+ if (IS_LC1(*s))
913
+ len = 2;
914
+ else if (IS_LCPRV1(*s))
915
+ len = 3;
916
+ else if (IS_LC2(*s))
917
+ len = 3;
918
+ else if (IS_LCPRV2(*s))
919
+ len = 4;
920
+ else
921
+ len = 1; /* assume ASCII */
922
+ return len;
923
+ }
924
+
925
+ static int
926
+ pg_mule_dsplen(const unsigned char *s)
927
+ {
928
+ int len;
929
+
930
+ /*
931
+ * Note: it's not really appropriate to assume that all multibyte charsets
932
+ * are double-wide on screen. But this seems an okay approximation for
933
+ * the MULE charsets we currently support.
934
+ */
935
+
936
+ if (IS_LC1(*s))
937
+ len = 1;
938
+ else if (IS_LCPRV1(*s))
939
+ len = 1;
940
+ else if (IS_LC2(*s))
941
+ len = 2;
942
+ else if (IS_LCPRV2(*s))
943
+ len = 2;
944
+ else
945
+ len = 1; /* assume ASCII */
946
+
947
+ return len;
948
+ }
949
+
950
+ /*
951
+ * ISO8859-1
952
+ */
953
+ static int
954
+ pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
955
+ {
956
+ int cnt = 0;
957
+
958
+ while (len > 0 && *from)
959
+ {
960
+ *to++ = *from++;
961
+ len--;
962
+ cnt++;
963
+ }
964
+ *to = 0;
965
+ return cnt;
966
+ }
967
+
968
+ /*
969
+ * Trivial conversion from pg_wchar to single byte encoding. Just ignores
970
+ * high bits.
971
+ * caller should allocate enough space for "to"
972
+ * len: length of from.
973
+ * "from" not necessarily null terminated.
974
+ */
975
+ static int
976
+ pg_wchar2single_with_len(const pg_wchar *from, unsigned char *to, int len)
977
+ {
978
+ int cnt = 0;
979
+
980
+ while (len > 0 && *from)
981
+ {
982
+ *to++ = *from++;
983
+ len--;
984
+ cnt++;
985
+ }
986
+ *to = 0;
987
+ return cnt;
988
+ }
989
+
990
+ static int
991
+ pg_latin1_mblen(const unsigned char *s)
992
+ {
993
+ return 1;
994
+ }
995
+
996
+ static int
997
+ pg_latin1_dsplen(const unsigned char *s)
998
+ {
999
+ return pg_ascii_dsplen(s);
1000
+ }
1001
+
1002
+ /*
1003
+ * SJIS
1004
+ */
1005
+ static int
1006
+ pg_sjis_mblen(const unsigned char *s)
1007
+ {
1008
+ int len;
1009
+
1010
+ if (*s >= 0xa1 && *s <= 0xdf)
1011
+ len = 1; /* 1 byte kana? */
1012
+ else if (IS_HIGHBIT_SET(*s))
1013
+ len = 2; /* kanji? */
1014
+ else
1015
+ len = 1; /* should be ASCII */
1016
+ return len;
1017
+ }
1018
+
1019
+ static int
1020
+ pg_sjis_dsplen(const unsigned char *s)
1021
+ {
1022
+ int len;
1023
+
1024
+ if (*s >= 0xa1 && *s <= 0xdf)
1025
+ len = 1; /* 1 byte kana? */
1026
+ else if (IS_HIGHBIT_SET(*s))
1027
+ len = 2; /* kanji? */
1028
+ else
1029
+ len = pg_ascii_dsplen(s); /* should be ASCII */
1030
+ return len;
1031
+ }
1032
+
1033
+ /*
1034
+ * Big5
1035
+ */
1036
+ static int
1037
+ pg_big5_mblen(const unsigned char *s)
1038
+ {
1039
+ int len;
1040
+
1041
+ if (IS_HIGHBIT_SET(*s))
1042
+ len = 2; /* kanji? */
1043
+ else
1044
+ len = 1; /* should be ASCII */
1045
+ return len;
1046
+ }
1047
+
1048
+ static int
1049
+ pg_big5_dsplen(const unsigned char *s)
1050
+ {
1051
+ int len;
1052
+
1053
+ if (IS_HIGHBIT_SET(*s))
1054
+ len = 2; /* kanji? */
1055
+ else
1056
+ len = pg_ascii_dsplen(s); /* should be ASCII */
1057
+ return len;
1058
+ }
1059
+
1060
+ /*
1061
+ * GBK
1062
+ */
1063
+ static int
1064
+ pg_gbk_mblen(const unsigned char *s)
1065
+ {
1066
+ int len;
1067
+
1068
+ if (IS_HIGHBIT_SET(*s))
1069
+ len = 2; /* kanji? */
1070
+ else
1071
+ len = 1; /* should be ASCII */
1072
+ return len;
1073
+ }
1074
+
1075
+ static int
1076
+ pg_gbk_dsplen(const unsigned char *s)
1077
+ {
1078
+ int len;
1079
+
1080
+ if (IS_HIGHBIT_SET(*s))
1081
+ len = 2; /* kanji? */
1082
+ else
1083
+ len = pg_ascii_dsplen(s); /* should be ASCII */
1084
+ return len;
1085
+ }
1086
+
1087
+ /*
1088
+ * UHC
1089
+ */
1090
+ static int
1091
+ pg_uhc_mblen(const unsigned char *s)
1092
+ {
1093
+ int len;
1094
+
1095
+ if (IS_HIGHBIT_SET(*s))
1096
+ len = 2; /* 2byte? */
1097
+ else
1098
+ len = 1; /* should be ASCII */
1099
+ return len;
1100
+ }
1101
+
1102
+ static int
1103
+ pg_uhc_dsplen(const unsigned char *s)
1104
+ {
1105
+ int len;
1106
+
1107
+ if (IS_HIGHBIT_SET(*s))
1108
+ len = 2; /* 2byte? */
1109
+ else
1110
+ len = pg_ascii_dsplen(s); /* should be ASCII */
1111
+ return len;
1112
+ }
1113
+
1114
+ /*
1115
+ * GB18030
1116
+ * Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp>
1117
+ */
1118
+
1119
+ /*
1120
+ * Unlike all other mblen() functions, this also looks at the second byte of
1121
+ * the input. However, if you only pass the first byte of a multi-byte
1122
+ * string, and \0 as the second byte, this still works in a predictable way:
1123
+ * a 4-byte character will be reported as two 2-byte characters. That's
1124
+ * enough for all current uses, as a client-only encoding. It works that
1125
+ * way, because in any valid 4-byte GB18030-encoded character, the third and
1126
+ * fourth byte look like a 2-byte encoded character, when looked at
1127
+ * separately.
1128
+ */
1129
+ static int
1130
+ pg_gb18030_mblen(const unsigned char *s)
1131
+ {
1132
+ int len;
1133
+
1134
+ if (!IS_HIGHBIT_SET(*s))
1135
+ len = 1; /* ASCII */
1136
+ else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1137
+ len = 4;
1138
+ else
1139
+ len = 2;
1140
+ return len;
1141
+ }
1142
+
1143
+ static int
1144
+ pg_gb18030_dsplen(const unsigned char *s)
1145
+ {
1146
+ int len;
1147
+
1148
+ if (IS_HIGHBIT_SET(*s))
1149
+ len = 2;
1150
+ else
1151
+ len = pg_ascii_dsplen(s); /* ASCII */
1152
+ return len;
1153
+ }
1154
+
1155
+ /*
1156
+ *-------------------------------------------------------------------
1157
+ * multibyte sequence validators
1158
+ *
1159
+ * These functions accept "s", a pointer to the first byte of a string,
1160
+ * and "len", the remaining length of the string. If there is a validly
1161
+ * encoded character beginning at *s, return its length in bytes; else
1162
+ * return -1.
1163
+ *
1164
+ * The functions can assume that len > 0 and that *s != '\0', but they must
1165
+ * test for and reject zeroes in any additional bytes of a multibyte character.
1166
+ *
1167
+ * Note that this definition allows the function for a single-byte
1168
+ * encoding to be just "return 1".
1169
+ *-------------------------------------------------------------------
1170
+ */
1171
+
1172
+ static int
1173
+ pg_ascii_verifier(const unsigned char *s, int len)
1174
+ {
1175
+ return 1;
1176
+ }
1177
+
1178
+ #define IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe)
1179
+
1180
+ static int
1181
+ pg_eucjp_verifier(const unsigned char *s, int len)
1182
+ {
1183
+ int l;
1184
+ unsigned char c1,
1185
+ c2;
1186
+
1187
+ c1 = *s++;
1188
+
1189
+ switch (c1)
1190
+ {
1191
+ case SS2: /* JIS X 0201 */
1192
+ l = 2;
1193
+ if (l > len)
1194
+ return -1;
1195
+ c2 = *s++;
1196
+ if (c2 < 0xa1 || c2 > 0xdf)
1197
+ return -1;
1198
+ break;
1199
+
1200
+ case SS3: /* JIS X 0212 */
1201
+ l = 3;
1202
+ if (l > len)
1203
+ return -1;
1204
+ c2 = *s++;
1205
+ if (!IS_EUC_RANGE_VALID(c2))
1206
+ return -1;
1207
+ c2 = *s++;
1208
+ if (!IS_EUC_RANGE_VALID(c2))
1209
+ return -1;
1210
+ break;
1211
+
1212
+ default:
1213
+ if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
1214
+ {
1215
+ l = 2;
1216
+ if (l > len)
1217
+ return -1;
1218
+ if (!IS_EUC_RANGE_VALID(c1))
1219
+ return -1;
1220
+ c2 = *s++;
1221
+ if (!IS_EUC_RANGE_VALID(c2))
1222
+ return -1;
1223
+ }
1224
+ else
1225
+ /* must be ASCII */
1226
+ {
1227
+ l = 1;
1228
+ }
1229
+ break;
1230
+ }
1231
+
1232
+ return l;
1233
+ }
1234
+
1235
+ static int
1236
+ pg_euckr_verifier(const unsigned char *s, int len)
1237
+ {
1238
+ int l;
1239
+ unsigned char c1,
1240
+ c2;
1241
+
1242
+ c1 = *s++;
1243
+
1244
+ if (IS_HIGHBIT_SET(c1))
1245
+ {
1246
+ l = 2;
1247
+ if (l > len)
1248
+ return -1;
1249
+ if (!IS_EUC_RANGE_VALID(c1))
1250
+ return -1;
1251
+ c2 = *s++;
1252
+ if (!IS_EUC_RANGE_VALID(c2))
1253
+ return -1;
1254
+ }
1255
+ else
1256
+ /* must be ASCII */
1257
+ {
1258
+ l = 1;
1259
+ }
1260
+
1261
+ return l;
1262
+ }
1263
+
1264
+ /* EUC-CN byte sequences are exactly same as EUC-KR */
1265
+ #define pg_euccn_verifier pg_euckr_verifier
1266
+
1267
+ static int
1268
+ pg_euctw_verifier(const unsigned char *s, int len)
1269
+ {
1270
+ int l;
1271
+ unsigned char c1,
1272
+ c2;
1273
+
1274
+ c1 = *s++;
1275
+
1276
+ switch (c1)
1277
+ {
1278
+ case SS2: /* CNS 11643 Plane 1-7 */
1279
+ l = 4;
1280
+ if (l > len)
1281
+ return -1;
1282
+ c2 = *s++;
1283
+ if (c2 < 0xa1 || c2 > 0xa7)
1284
+ return -1;
1285
+ c2 = *s++;
1286
+ if (!IS_EUC_RANGE_VALID(c2))
1287
+ return -1;
1288
+ c2 = *s++;
1289
+ if (!IS_EUC_RANGE_VALID(c2))
1290
+ return -1;
1291
+ break;
1292
+
1293
+ case SS3: /* unused */
1294
+ return -1;
1295
+
1296
+ default:
1297
+ if (IS_HIGHBIT_SET(c1)) /* CNS 11643 Plane 1 */
1298
+ {
1299
+ l = 2;
1300
+ if (l > len)
1301
+ return -1;
1302
+ /* no further range check on c1? */
1303
+ c2 = *s++;
1304
+ if (!IS_EUC_RANGE_VALID(c2))
1305
+ return -1;
1306
+ }
1307
+ else
1308
+ /* must be ASCII */
1309
+ {
1310
+ l = 1;
1311
+ }
1312
+ break;
1313
+ }
1314
+ return l;
1315
+ }
1316
+
1317
+ static int
1318
+ pg_johab_verifier(const unsigned char *s, int len)
1319
+ {
1320
+ int l,
1321
+ mbl;
1322
+ unsigned char c;
1323
+
1324
+ l = mbl = pg_johab_mblen(s);
1325
+
1326
+ if (len < l)
1327
+ return -1;
1328
+
1329
+ if (!IS_HIGHBIT_SET(*s))
1330
+ return mbl;
1331
+
1332
+ while (--l > 0)
1333
+ {
1334
+ c = *++s;
1335
+ if (!IS_EUC_RANGE_VALID(c))
1336
+ return -1;
1337
+ }
1338
+ return mbl;
1339
+ }
1340
+
1341
+ static int
1342
+ pg_mule_verifier(const unsigned char *s, int len)
1343
+ {
1344
+ int l,
1345
+ mbl;
1346
+ unsigned char c;
1347
+
1348
+ l = mbl = pg_mule_mblen(s);
1349
+
1350
+ if (len < l)
1351
+ return -1;
1352
+
1353
+ while (--l > 0)
1354
+ {
1355
+ c = *++s;
1356
+ if (!IS_HIGHBIT_SET(c))
1357
+ return -1;
1358
+ }
1359
+ return mbl;
1360
+ }
1361
+
1362
+ static int
1363
+ pg_latin1_verifier(const unsigned char *s, int len)
1364
+ {
1365
+ return 1;
1366
+ }
1367
+
1368
+ static int
1369
+ pg_sjis_verifier(const unsigned char *s, int len)
1370
+ {
1371
+ int l,
1372
+ mbl;
1373
+ unsigned char c1,
1374
+ c2;
1375
+
1376
+ l = mbl = pg_sjis_mblen(s);
1377
+
1378
+ if (len < l)
1379
+ return -1;
1380
+
1381
+ if (l == 1) /* pg_sjis_mblen already verified it */
1382
+ return mbl;
1383
+
1384
+ c1 = *s++;
1385
+ c2 = *s;
1386
+ if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2))
1387
+ return -1;
1388
+ return mbl;
1389
+ }
1390
+
1391
+ static int
1392
+ pg_big5_verifier(const unsigned char *s, int len)
1393
+ {
1394
+ int l,
1395
+ mbl;
1396
+
1397
+ l = mbl = pg_big5_mblen(s);
1398
+
1399
+ if (len < l)
1400
+ return -1;
1401
+
1402
+ while (--l > 0)
1403
+ {
1404
+ if (*++s == '\0')
1405
+ return -1;
1406
+ }
1407
+
1408
+ return mbl;
1409
+ }
1410
+
1411
+ static int
1412
+ pg_gbk_verifier(const unsigned char *s, int len)
1413
+ {
1414
+ int l,
1415
+ mbl;
1416
+
1417
+ l = mbl = pg_gbk_mblen(s);
1418
+
1419
+ if (len < l)
1420
+ return -1;
1421
+
1422
+ while (--l > 0)
1423
+ {
1424
+ if (*++s == '\0')
1425
+ return -1;
1426
+ }
1427
+
1428
+ return mbl;
1429
+ }
1430
+
1431
+ static int
1432
+ pg_uhc_verifier(const unsigned char *s, int len)
1433
+ {
1434
+ int l,
1435
+ mbl;
1436
+
1437
+ l = mbl = pg_uhc_mblen(s);
1438
+
1439
+ if (len < l)
1440
+ return -1;
1441
+
1442
+ while (--l > 0)
1443
+ {
1444
+ if (*++s == '\0')
1445
+ return -1;
1446
+ }
1447
+
1448
+ return mbl;
1449
+ }
1450
+
1451
+ static int
1452
+ pg_gb18030_verifier(const unsigned char *s, int len)
1453
+ {
1454
+ int l;
1455
+
1456
+ if (!IS_HIGHBIT_SET(*s))
1457
+ l = 1; /* ASCII */
1458
+ else if (len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1459
+ {
1460
+ /* Should be 4-byte, validate remaining bytes */
1461
+ if (*s >= 0x81 && *s <= 0xfe &&
1462
+ *(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
1463
+ *(s + 3) >= 0x30 && *(s + 3) <= 0x39)
1464
+ l = 4;
1465
+ else
1466
+ l = -1;
1467
+ }
1468
+ else if (len >= 2 && *s >= 0x81 && *s <= 0xfe)
1469
+ {
1470
+ /* Should be 2-byte, validate */
1471
+ if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
1472
+ (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
1473
+ l = 2;
1474
+ else
1475
+ l = -1;
1476
+ }
1477
+ else
1478
+ l = -1;
1479
+ return l;
1480
+ }
1481
+
1482
+ static int
1483
+ pg_utf8_verifier(const unsigned char *s, int len)
1484
+ {
1485
+ int l = pg_utf_mblen(s);
1486
+
1487
+ if (len < l)
1488
+ return -1;
1489
+
1490
+ if (!pg_utf8_islegal(s, l))
1491
+ return -1;
1492
+
1493
+ return l;
1494
+ }
1495
+
1496
+ /*
1497
+ * Check for validity of a single UTF-8 encoded character
1498
+ *
1499
+ * This directly implements the rules in RFC3629. The bizarre-looking
1500
+ * restrictions on the second byte are meant to ensure that there isn't
1501
+ * more than one encoding of a given Unicode character point; that is,
1502
+ * you may not use a longer-than-necessary byte sequence with high order
1503
+ * zero bits to represent a character that would fit in fewer bytes.
1504
+ * To do otherwise is to create security hazards (eg, create an apparent
1505
+ * non-ASCII character that decodes to plain ASCII).
1506
+ *
1507
+ * length is assumed to have been obtained by pg_utf_mblen(), and the
1508
+ * caller must have checked that that many bytes are present in the buffer.
1509
+ */
1510
+ bool
1511
+ pg_utf8_islegal(const unsigned char *source, int length)
1512
+ {
1513
+ unsigned char a;
1514
+
1515
+ switch (length)
1516
+ {
1517
+ default:
1518
+ /* reject lengths 5 and 6 for now */
1519
+ return false;
1520
+ case 4:
1521
+ a = source[3];
1522
+ if (a < 0x80 || a > 0xBF)
1523
+ return false;
1524
+ /* FALL THRU */
1525
+ case 3:
1526
+ a = source[2];
1527
+ if (a < 0x80 || a > 0xBF)
1528
+ return false;
1529
+ /* FALL THRU */
1530
+ case 2:
1531
+ a = source[1];
1532
+ switch (*source)
1533
+ {
1534
+ case 0xE0:
1535
+ if (a < 0xA0 || a > 0xBF)
1536
+ return false;
1537
+ break;
1538
+ case 0xED:
1539
+ if (a < 0x80 || a > 0x9F)
1540
+ return false;
1541
+ break;
1542
+ case 0xF0:
1543
+ if (a < 0x90 || a > 0xBF)
1544
+ return false;
1545
+ break;
1546
+ case 0xF4:
1547
+ if (a < 0x80 || a > 0x8F)
1548
+ return false;
1549
+ break;
1550
+ default:
1551
+ if (a < 0x80 || a > 0xBF)
1552
+ return false;
1553
+ break;
1554
+ }
1555
+ /* FALL THRU */
1556
+ case 1:
1557
+ a = *source;
1558
+ if (a >= 0x80 && a < 0xC2)
1559
+ return false;
1560
+ if (a > 0xF4)
1561
+ return false;
1562
+ break;
1563
+ }
1564
+ return true;
1565
+ }
1566
+
1567
+
1568
+ /*
1569
+ *-------------------------------------------------------------------
1570
+ * encoding info table
1571
+ * XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h)
1572
+ *-------------------------------------------------------------------
1573
+ */
1574
+ const pg_wchar_tbl pg_wchar_table[] = {
1575
+ {pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifier, 1}, /* PG_SQL_ASCII */
1576
+ {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JP */
1577
+ {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifier, 2}, /* PG_EUC_CN */
1578
+ {pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifier, 3}, /* PG_EUC_KR */
1579
+ {pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifier, 4}, /* PG_EUC_TW */
1580
+ {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JIS_2004 */
1581
+ {pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifier, 4}, /* PG_UTF8 */
1582
+ {pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifier, 4}, /* PG_MULE_INTERNAL */
1583
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN1 */
1584
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN2 */
1585
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN3 */
1586
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN4 */
1587
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN5 */
1588
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN6 */
1589
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN7 */
1590
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN8 */
1591
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN9 */
1592
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN10 */
1593
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1256 */
1594
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1258 */
1595
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN866 */
1596
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN874 */
1597
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8R */
1598
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1251 */
1599
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1252 */
1600
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-5 */
1601
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-6 */
1602
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-7 */
1603
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-8 */
1604
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1250 */
1605
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1253 */
1606
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1254 */
1607
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1255 */
1608
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1257 */
1609
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8U */
1610
+ {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}, /* PG_SJIS */
1611
+ {0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2}, /* PG_BIG5 */
1612
+ {0, 0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2}, /* PG_GBK */
1613
+ {0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2}, /* PG_UHC */
1614
+ {0, 0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, 4}, /* PG_GB18030 */
1615
+ {0, 0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifier, 3}, /* PG_JOHAB */
1616
+ {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2} /* PG_SHIFT_JIS_2004 */
1617
+ };
1618
+
1619
+ /*
1620
+ * Returns the byte length of a multibyte character.
1621
+ */
1622
+ int
1623
+ pg_encoding_mblen(int encoding, const char *mbstr)
1624
+ {
1625
+ return (PG_VALID_ENCODING(encoding) ?
1626
+ pg_wchar_table[encoding].mblen((const unsigned char *) mbstr) :
1627
+ pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr));
1628
+ }
1629
+
1630
+ /*
1631
+ * Returns the display length of a multibyte character.
1632
+ */
1633
+
1634
+
1635
+ /*
1636
+ * Verify the first multibyte character of the given string.
1637
+ * Return its byte length if good, -1 if bad. (See comments above for
1638
+ * full details of the mbverify API.)
1639
+ */
1640
+
1641
+
1642
+ /*
1643
+ * fetch maximum length of a given encoding
1644
+ */
1645
+ int
1646
+ pg_encoding_max_length(int encoding)
1647
+ {
1648
+ Assert(PG_VALID_ENCODING(encoding));
1649
+
1650
+ return pg_wchar_table[encoding].maxmblen;
1651
+ }