pg_query 1.1.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (478) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +163 -52
  3. data/README.md +80 -69
  4. data/Rakefile +82 -1
  5. data/ext/pg_query/extconf.rb +3 -31
  6. data/ext/pg_query/guc-file.c +0 -0
  7. data/ext/pg_query/include/access/amapi.h +246 -0
  8. data/ext/pg_query/include/access/attmap.h +52 -0
  9. data/ext/pg_query/include/access/attnum.h +64 -0
  10. data/ext/pg_query/include/access/clog.h +61 -0
  11. data/ext/pg_query/include/access/commit_ts.h +77 -0
  12. data/ext/pg_query/include/access/detoast.h +92 -0
  13. data/ext/pg_query/include/access/genam.h +228 -0
  14. data/ext/pg_query/include/access/gin.h +78 -0
  15. data/ext/pg_query/include/access/htup.h +89 -0
  16. data/ext/pg_query/include/access/htup_details.h +819 -0
  17. data/ext/pg_query/include/access/itup.h +161 -0
  18. data/ext/pg_query/include/access/parallel.h +82 -0
  19. data/ext/pg_query/include/access/printtup.h +35 -0
  20. data/ext/pg_query/include/access/relation.h +28 -0
  21. data/ext/pg_query/include/access/relscan.h +176 -0
  22. data/ext/pg_query/include/access/rmgr.h +35 -0
  23. data/ext/pg_query/include/access/rmgrlist.h +49 -0
  24. data/ext/pg_query/include/access/sdir.h +58 -0
  25. data/ext/pg_query/include/access/skey.h +151 -0
  26. data/ext/pg_query/include/access/stratnum.h +83 -0
  27. data/ext/pg_query/include/access/sysattr.h +29 -0
  28. data/ext/pg_query/include/access/table.h +27 -0
  29. data/ext/pg_query/include/access/tableam.h +1825 -0
  30. data/ext/pg_query/include/access/transam.h +265 -0
  31. data/ext/pg_query/include/access/tupconvert.h +51 -0
  32. data/ext/pg_query/include/access/tupdesc.h +154 -0
  33. data/ext/pg_query/include/access/tupmacs.h +247 -0
  34. data/ext/pg_query/include/access/twophase.h +61 -0
  35. data/ext/pg_query/include/access/xact.h +463 -0
  36. data/ext/pg_query/include/access/xlog.h +398 -0
  37. data/ext/pg_query/include/access/xlog_internal.h +330 -0
  38. data/ext/pg_query/include/access/xlogdefs.h +109 -0
  39. data/ext/pg_query/include/access/xloginsert.h +64 -0
  40. data/ext/pg_query/include/access/xlogreader.h +327 -0
  41. data/ext/pg_query/include/access/xlogrecord.h +227 -0
  42. data/ext/pg_query/include/bootstrap/bootstrap.h +62 -0
  43. data/ext/pg_query/include/c.h +1322 -0
  44. data/ext/pg_query/include/catalog/catalog.h +42 -0
  45. data/ext/pg_query/include/catalog/catversion.h +58 -0
  46. data/ext/pg_query/include/catalog/dependency.h +275 -0
  47. data/ext/pg_query/include/catalog/genbki.h +64 -0
  48. data/ext/pg_query/include/catalog/index.h +199 -0
  49. data/ext/pg_query/include/catalog/indexing.h +366 -0
  50. data/ext/pg_query/include/catalog/namespace.h +188 -0
  51. data/ext/pg_query/include/catalog/objectaccess.h +197 -0
  52. data/ext/pg_query/include/catalog/objectaddress.h +84 -0
  53. data/ext/pg_query/include/catalog/pg_aggregate.h +176 -0
  54. data/ext/pg_query/include/catalog/pg_aggregate_d.h +77 -0
  55. data/ext/pg_query/include/catalog/pg_am.h +60 -0
  56. data/ext/pg_query/include/catalog/pg_am_d.h +45 -0
  57. data/ext/pg_query/include/catalog/pg_attribute.h +204 -0
  58. data/ext/pg_query/include/catalog/pg_attribute_d.h +59 -0
  59. data/ext/pg_query/include/catalog/pg_authid.h +58 -0
  60. data/ext/pg_query/include/catalog/pg_authid_d.h +49 -0
  61. data/ext/pg_query/include/catalog/pg_class.h +200 -0
  62. data/ext/pg_query/include/catalog/pg_class_d.h +103 -0
  63. data/ext/pg_query/include/catalog/pg_collation.h +73 -0
  64. data/ext/pg_query/include/catalog/pg_collation_d.h +45 -0
  65. data/ext/pg_query/include/catalog/pg_constraint.h +247 -0
  66. data/ext/pg_query/include/catalog/pg_constraint_d.h +67 -0
  67. data/ext/pg_query/include/catalog/pg_control.h +250 -0
  68. data/ext/pg_query/include/catalog/pg_conversion.h +72 -0
  69. data/ext/pg_query/include/catalog/pg_conversion_d.h +35 -0
  70. data/ext/pg_query/include/catalog/pg_depend.h +73 -0
  71. data/ext/pg_query/include/catalog/pg_depend_d.h +34 -0
  72. data/ext/pg_query/include/catalog/pg_event_trigger.h +51 -0
  73. data/ext/pg_query/include/catalog/pg_event_trigger_d.h +34 -0
  74. data/ext/pg_query/include/catalog/pg_index.h +80 -0
  75. data/ext/pg_query/include/catalog/pg_index_d.h +56 -0
  76. data/ext/pg_query/include/catalog/pg_language.h +67 -0
  77. data/ext/pg_query/include/catalog/pg_language_d.h +39 -0
  78. data/ext/pg_query/include/catalog/pg_namespace.h +59 -0
  79. data/ext/pg_query/include/catalog/pg_namespace_d.h +34 -0
  80. data/ext/pg_query/include/catalog/pg_opclass.h +85 -0
  81. data/ext/pg_query/include/catalog/pg_opclass_d.h +49 -0
  82. data/ext/pg_query/include/catalog/pg_operator.h +102 -0
  83. data/ext/pg_query/include/catalog/pg_operator_d.h +106 -0
  84. data/ext/pg_query/include/catalog/pg_opfamily.h +60 -0
  85. data/ext/pg_query/include/catalog/pg_opfamily_d.h +47 -0
  86. data/ext/pg_query/include/catalog/pg_partitioned_table.h +63 -0
  87. data/ext/pg_query/include/catalog/pg_partitioned_table_d.h +35 -0
  88. data/ext/pg_query/include/catalog/pg_proc.h +211 -0
  89. data/ext/pg_query/include/catalog/pg_proc_d.h +99 -0
  90. data/ext/pg_query/include/catalog/pg_publication.h +115 -0
  91. data/ext/pg_query/include/catalog/pg_publication_d.h +36 -0
  92. data/ext/pg_query/include/catalog/pg_replication_origin.h +57 -0
  93. data/ext/pg_query/include/catalog/pg_replication_origin_d.h +29 -0
  94. data/ext/pg_query/include/catalog/pg_statistic.h +275 -0
  95. data/ext/pg_query/include/catalog/pg_statistic_d.h +194 -0
  96. data/ext/pg_query/include/catalog/pg_statistic_ext.h +74 -0
  97. data/ext/pg_query/include/catalog/pg_statistic_ext_d.h +40 -0
  98. data/ext/pg_query/include/catalog/pg_transform.h +45 -0
  99. data/ext/pg_query/include/catalog/pg_transform_d.h +32 -0
  100. data/ext/pg_query/include/catalog/pg_trigger.h +137 -0
  101. data/ext/pg_query/include/catalog/pg_trigger_d.h +106 -0
  102. data/ext/pg_query/include/catalog/pg_ts_config.h +50 -0
  103. data/ext/pg_query/include/catalog/pg_ts_config_d.h +32 -0
  104. data/ext/pg_query/include/catalog/pg_ts_dict.h +54 -0
  105. data/ext/pg_query/include/catalog/pg_ts_dict_d.h +33 -0
  106. data/ext/pg_query/include/catalog/pg_ts_parser.h +57 -0
  107. data/ext/pg_query/include/catalog/pg_ts_parser_d.h +35 -0
  108. data/ext/pg_query/include/catalog/pg_ts_template.h +48 -0
  109. data/ext/pg_query/include/catalog/pg_ts_template_d.h +32 -0
  110. data/ext/pg_query/include/catalog/pg_type.h +372 -0
  111. data/ext/pg_query/include/catalog/pg_type_d.h +285 -0
  112. data/ext/pg_query/include/catalog/storage.h +48 -0
  113. data/ext/pg_query/include/commands/async.h +54 -0
  114. data/ext/pg_query/include/commands/dbcommands.h +35 -0
  115. data/ext/pg_query/include/commands/defrem.h +173 -0
  116. data/ext/pg_query/include/commands/event_trigger.h +88 -0
  117. data/ext/pg_query/include/commands/explain.h +127 -0
  118. data/ext/pg_query/include/commands/prepare.h +61 -0
  119. data/ext/pg_query/include/commands/tablespace.h +67 -0
  120. data/ext/pg_query/include/commands/trigger.h +277 -0
  121. data/ext/pg_query/include/commands/user.h +37 -0
  122. data/ext/pg_query/include/commands/vacuum.h +293 -0
  123. data/ext/pg_query/include/commands/variable.h +38 -0
  124. data/ext/pg_query/include/common/file_perm.h +56 -0
  125. data/ext/pg_query/include/common/hashfn.h +104 -0
  126. data/ext/pg_query/include/common/ip.h +37 -0
  127. data/ext/pg_query/include/common/keywords.h +33 -0
  128. data/ext/pg_query/include/common/kwlookup.h +44 -0
  129. data/ext/pg_query/include/common/relpath.h +90 -0
  130. data/ext/pg_query/include/common/string.h +19 -0
  131. data/ext/pg_query/include/common/unicode_combining_table.h +196 -0
  132. data/ext/pg_query/include/datatype/timestamp.h +197 -0
  133. data/ext/pg_query/include/executor/execdesc.h +70 -0
  134. data/ext/pg_query/include/executor/executor.h +614 -0
  135. data/ext/pg_query/include/executor/functions.h +41 -0
  136. data/ext/pg_query/include/executor/instrument.h +101 -0
  137. data/ext/pg_query/include/executor/spi.h +175 -0
  138. data/ext/pg_query/include/executor/tablefunc.h +67 -0
  139. data/ext/pg_query/include/executor/tuptable.h +487 -0
  140. data/ext/pg_query/include/fmgr.h +775 -0
  141. data/ext/pg_query/include/funcapi.h +348 -0
  142. data/ext/pg_query/include/getaddrinfo.h +162 -0
  143. data/ext/pg_query/include/jit/jit.h +105 -0
  144. data/ext/pg_query/include/kwlist_d.h +1072 -0
  145. data/ext/pg_query/include/lib/ilist.h +727 -0
  146. data/ext/pg_query/include/lib/pairingheap.h +102 -0
  147. data/ext/pg_query/include/lib/simplehash.h +1059 -0
  148. data/ext/pg_query/include/lib/stringinfo.h +161 -0
  149. data/ext/pg_query/include/libpq/auth.h +29 -0
  150. data/ext/pg_query/include/libpq/crypt.h +46 -0
  151. data/ext/pg_query/include/libpq/hba.h +140 -0
  152. data/ext/pg_query/include/libpq/libpq-be.h +326 -0
  153. data/ext/pg_query/include/libpq/libpq.h +133 -0
  154. data/ext/pg_query/include/libpq/pqcomm.h +208 -0
  155. data/ext/pg_query/include/libpq/pqformat.h +210 -0
  156. data/ext/pg_query/include/libpq/pqsignal.h +42 -0
  157. data/ext/pg_query/include/mb/pg_wchar.h +672 -0
  158. data/ext/pg_query/include/mb/stringinfo_mb.h +24 -0
  159. data/ext/pg_query/include/miscadmin.h +476 -0
  160. data/ext/pg_query/include/nodes/bitmapset.h +122 -0
  161. data/ext/pg_query/include/nodes/execnodes.h +2520 -0
  162. data/ext/pg_query/include/nodes/extensible.h +160 -0
  163. data/ext/pg_query/include/nodes/lockoptions.h +61 -0
  164. data/ext/pg_query/include/nodes/makefuncs.h +108 -0
  165. data/ext/pg_query/include/nodes/memnodes.h +108 -0
  166. data/ext/pg_query/include/nodes/nodeFuncs.h +162 -0
  167. data/ext/pg_query/include/nodes/nodes.h +842 -0
  168. data/ext/pg_query/include/nodes/params.h +170 -0
  169. data/ext/pg_query/include/nodes/parsenodes.h +3579 -0
  170. data/ext/pg_query/include/nodes/pathnodes.h +2556 -0
  171. data/ext/pg_query/include/nodes/pg_list.h +605 -0
  172. data/ext/pg_query/include/nodes/plannodes.h +1251 -0
  173. data/ext/pg_query/include/nodes/primnodes.h +1541 -0
  174. data/ext/pg_query/include/nodes/print.h +34 -0
  175. data/ext/pg_query/include/nodes/tidbitmap.h +75 -0
  176. data/ext/pg_query/include/nodes/value.h +61 -0
  177. data/ext/pg_query/include/optimizer/cost.h +206 -0
  178. data/ext/pg_query/include/optimizer/geqo.h +88 -0
  179. data/ext/pg_query/include/optimizer/geqo_gene.h +45 -0
  180. data/ext/pg_query/include/optimizer/optimizer.h +199 -0
  181. data/ext/pg_query/include/optimizer/paths.h +249 -0
  182. data/ext/pg_query/include/optimizer/planmain.h +119 -0
  183. data/ext/pg_query/include/parser/analyze.h +49 -0
  184. data/ext/pg_query/include/parser/gram.h +1067 -0
  185. data/ext/pg_query/include/parser/gramparse.h +75 -0
  186. data/ext/pg_query/include/parser/kwlist.h +477 -0
  187. data/ext/pg_query/include/parser/parse_agg.h +68 -0
  188. data/ext/pg_query/include/parser/parse_clause.h +54 -0
  189. data/ext/pg_query/include/parser/parse_coerce.h +97 -0
  190. data/ext/pg_query/include/parser/parse_collate.h +27 -0
  191. data/ext/pg_query/include/parser/parse_expr.h +26 -0
  192. data/ext/pg_query/include/parser/parse_func.h +73 -0
  193. data/ext/pg_query/include/parser/parse_node.h +327 -0
  194. data/ext/pg_query/include/parser/parse_oper.h +67 -0
  195. data/ext/pg_query/include/parser/parse_relation.h +123 -0
  196. data/ext/pg_query/include/parser/parse_target.h +46 -0
  197. data/ext/pg_query/include/parser/parse_type.h +60 -0
  198. data/ext/pg_query/include/parser/parser.h +41 -0
  199. data/ext/pg_query/include/parser/parsetree.h +61 -0
  200. data/ext/pg_query/include/parser/scanner.h +152 -0
  201. data/ext/pg_query/include/parser/scansup.h +30 -0
  202. data/ext/pg_query/include/partitioning/partdefs.h +26 -0
  203. data/ext/pg_query/include/pg_config.h +988 -0
  204. data/ext/pg_query/include/pg_config_ext.h +8 -0
  205. data/ext/pg_query/include/pg_config_manual.h +350 -0
  206. data/ext/pg_query/include/pg_config_os.h +8 -0
  207. data/ext/pg_query/include/pg_getopt.h +56 -0
  208. data/ext/pg_query/include/pg_query.h +121 -0
  209. data/ext/pg_query/include/pg_query_enum_defs.c +2454 -0
  210. data/ext/pg_query/include/pg_query_fingerprint_conds.c +875 -0
  211. data/ext/pg_query/include/pg_query_fingerprint_defs.c +12413 -0
  212. data/ext/pg_query/include/pg_query_json_helper.c +61 -0
  213. data/ext/pg_query/include/pg_query_outfuncs_conds.c +686 -0
  214. data/ext/pg_query/include/pg_query_outfuncs_defs.c +2437 -0
  215. data/ext/pg_query/include/pg_query_readfuncs_conds.c +222 -0
  216. data/ext/pg_query/include/pg_query_readfuncs_defs.c +2878 -0
  217. data/ext/pg_query/include/pg_trace.h +17 -0
  218. data/ext/pg_query/include/pgstat.h +1487 -0
  219. data/ext/pg_query/include/pgtime.h +84 -0
  220. data/ext/pg_query/include/pl_gram.h +385 -0
  221. data/ext/pg_query/include/pl_reserved_kwlist.h +52 -0
  222. data/ext/pg_query/include/pl_reserved_kwlist_d.h +114 -0
  223. data/ext/pg_query/include/pl_unreserved_kwlist.h +112 -0
  224. data/ext/pg_query/include/pl_unreserved_kwlist_d.h +246 -0
  225. data/ext/pg_query/include/plerrcodes.h +990 -0
  226. data/ext/pg_query/include/plpgsql.h +1347 -0
  227. data/ext/pg_query/include/port.h +524 -0
  228. data/ext/pg_query/include/port/atomics.h +524 -0
  229. data/ext/pg_query/include/port/atomics/arch-arm.h +26 -0
  230. data/ext/pg_query/include/port/atomics/arch-ppc.h +254 -0
  231. data/ext/pg_query/include/port/atomics/arch-x86.h +252 -0
  232. data/ext/pg_query/include/port/atomics/fallback.h +170 -0
  233. data/ext/pg_query/include/port/atomics/generic-gcc.h +286 -0
  234. data/ext/pg_query/include/port/atomics/generic.h +401 -0
  235. data/ext/pg_query/include/port/pg_bitutils.h +226 -0
  236. data/ext/pg_query/include/port/pg_bswap.h +161 -0
  237. data/ext/pg_query/include/port/pg_crc32c.h +101 -0
  238. data/ext/pg_query/include/portability/instr_time.h +256 -0
  239. data/ext/pg_query/include/postgres.h +764 -0
  240. data/ext/pg_query/include/postgres_ext.h +74 -0
  241. data/ext/pg_query/include/postmaster/autovacuum.h +83 -0
  242. data/ext/pg_query/include/postmaster/bgworker.h +161 -0
  243. data/ext/pg_query/include/postmaster/bgworker_internals.h +64 -0
  244. data/ext/pg_query/include/postmaster/bgwriter.h +45 -0
  245. data/ext/pg_query/include/postmaster/fork_process.h +17 -0
  246. data/ext/pg_query/include/postmaster/interrupt.h +32 -0
  247. data/ext/pg_query/include/postmaster/pgarch.h +39 -0
  248. data/ext/pg_query/include/postmaster/postmaster.h +77 -0
  249. data/ext/pg_query/include/postmaster/syslogger.h +98 -0
  250. data/ext/pg_query/include/postmaster/walwriter.h +21 -0
  251. data/ext/pg_query/include/protobuf-c.h +1106 -0
  252. data/ext/pg_query/include/protobuf-c/protobuf-c.h +1106 -0
  253. data/ext/pg_query/include/protobuf/pg_query.pb-c.h +10846 -0
  254. data/ext/pg_query/include/protobuf/pg_query.pb.h +124718 -0
  255. data/ext/pg_query/include/regex/regex.h +184 -0
  256. data/ext/pg_query/include/replication/logicallauncher.h +31 -0
  257. data/ext/pg_query/include/replication/logicalproto.h +110 -0
  258. data/ext/pg_query/include/replication/logicalworker.h +19 -0
  259. data/ext/pg_query/include/replication/origin.h +73 -0
  260. data/ext/pg_query/include/replication/reorderbuffer.h +467 -0
  261. data/ext/pg_query/include/replication/slot.h +219 -0
  262. data/ext/pg_query/include/replication/syncrep.h +115 -0
  263. data/ext/pg_query/include/replication/walreceiver.h +340 -0
  264. data/ext/pg_query/include/replication/walsender.h +74 -0
  265. data/ext/pg_query/include/rewrite/prs2lock.h +46 -0
  266. data/ext/pg_query/include/rewrite/rewriteHandler.h +40 -0
  267. data/ext/pg_query/include/rewrite/rewriteManip.h +87 -0
  268. data/ext/pg_query/include/rewrite/rewriteSupport.h +26 -0
  269. data/ext/pg_query/include/storage/backendid.h +37 -0
  270. data/ext/pg_query/include/storage/block.h +121 -0
  271. data/ext/pg_query/include/storage/buf.h +46 -0
  272. data/ext/pg_query/include/storage/bufmgr.h +292 -0
  273. data/ext/pg_query/include/storage/bufpage.h +459 -0
  274. data/ext/pg_query/include/storage/condition_variable.h +62 -0
  275. data/ext/pg_query/include/storage/dsm.h +61 -0
  276. data/ext/pg_query/include/storage/dsm_impl.h +75 -0
  277. data/ext/pg_query/include/storage/fd.h +168 -0
  278. data/ext/pg_query/include/storage/ipc.h +81 -0
  279. data/ext/pg_query/include/storage/item.h +19 -0
  280. data/ext/pg_query/include/storage/itemid.h +184 -0
  281. data/ext/pg_query/include/storage/itemptr.h +206 -0
  282. data/ext/pg_query/include/storage/large_object.h +100 -0
  283. data/ext/pg_query/include/storage/latch.h +190 -0
  284. data/ext/pg_query/include/storage/lmgr.h +114 -0
  285. data/ext/pg_query/include/storage/lock.h +612 -0
  286. data/ext/pg_query/include/storage/lockdefs.h +59 -0
  287. data/ext/pg_query/include/storage/lwlock.h +232 -0
  288. data/ext/pg_query/include/storage/lwlocknames.h +51 -0
  289. data/ext/pg_query/include/storage/off.h +57 -0
  290. data/ext/pg_query/include/storage/pg_sema.h +61 -0
  291. data/ext/pg_query/include/storage/pg_shmem.h +90 -0
  292. data/ext/pg_query/include/storage/pmsignal.h +94 -0
  293. data/ext/pg_query/include/storage/predicate.h +87 -0
  294. data/ext/pg_query/include/storage/proc.h +333 -0
  295. data/ext/pg_query/include/storage/proclist_types.h +51 -0
  296. data/ext/pg_query/include/storage/procsignal.h +75 -0
  297. data/ext/pg_query/include/storage/relfilenode.h +99 -0
  298. data/ext/pg_query/include/storage/s_lock.h +1047 -0
  299. data/ext/pg_query/include/storage/sharedfileset.h +45 -0
  300. data/ext/pg_query/include/storage/shm_mq.h +85 -0
  301. data/ext/pg_query/include/storage/shm_toc.h +58 -0
  302. data/ext/pg_query/include/storage/shmem.h +81 -0
  303. data/ext/pg_query/include/storage/sinval.h +153 -0
  304. data/ext/pg_query/include/storage/sinvaladt.h +43 -0
  305. data/ext/pg_query/include/storage/smgr.h +109 -0
  306. data/ext/pg_query/include/storage/spin.h +77 -0
  307. data/ext/pg_query/include/storage/standby.h +91 -0
  308. data/ext/pg_query/include/storage/standbydefs.h +74 -0
  309. data/ext/pg_query/include/storage/sync.h +62 -0
  310. data/ext/pg_query/include/tcop/cmdtag.h +58 -0
  311. data/ext/pg_query/include/tcop/cmdtaglist.h +217 -0
  312. data/ext/pg_query/include/tcop/deparse_utility.h +108 -0
  313. data/ext/pg_query/include/tcop/dest.h +149 -0
  314. data/ext/pg_query/include/tcop/fastpath.h +21 -0
  315. data/ext/pg_query/include/tcop/pquery.h +45 -0
  316. data/ext/pg_query/include/tcop/tcopprot.h +89 -0
  317. data/ext/pg_query/include/tcop/utility.h +108 -0
  318. data/ext/pg_query/include/tsearch/ts_cache.h +98 -0
  319. data/ext/pg_query/include/utils/acl.h +312 -0
  320. data/ext/pg_query/include/utils/aclchk_internal.h +45 -0
  321. data/ext/pg_query/include/utils/array.h +458 -0
  322. data/ext/pg_query/include/utils/builtins.h +127 -0
  323. data/ext/pg_query/include/utils/bytea.h +27 -0
  324. data/ext/pg_query/include/utils/catcache.h +231 -0
  325. data/ext/pg_query/include/utils/date.h +90 -0
  326. data/ext/pg_query/include/utils/datetime.h +343 -0
  327. data/ext/pg_query/include/utils/datum.h +68 -0
  328. data/ext/pg_query/include/utils/dsa.h +123 -0
  329. data/ext/pg_query/include/utils/dynahash.h +19 -0
  330. data/ext/pg_query/include/utils/elog.h +439 -0
  331. data/ext/pg_query/include/utils/errcodes.h +352 -0
  332. data/ext/pg_query/include/utils/expandeddatum.h +159 -0
  333. data/ext/pg_query/include/utils/expandedrecord.h +231 -0
  334. data/ext/pg_query/include/utils/float.h +356 -0
  335. data/ext/pg_query/include/utils/fmgroids.h +2657 -0
  336. data/ext/pg_query/include/utils/fmgrprotos.h +2646 -0
  337. data/ext/pg_query/include/utils/fmgrtab.h +48 -0
  338. data/ext/pg_query/include/utils/guc.h +443 -0
  339. data/ext/pg_query/include/utils/guc_tables.h +272 -0
  340. data/ext/pg_query/include/utils/hsearch.h +149 -0
  341. data/ext/pg_query/include/utils/inval.h +64 -0
  342. data/ext/pg_query/include/utils/lsyscache.h +197 -0
  343. data/ext/pg_query/include/utils/memdebug.h +82 -0
  344. data/ext/pg_query/include/utils/memutils.h +225 -0
  345. data/ext/pg_query/include/utils/numeric.h +76 -0
  346. data/ext/pg_query/include/utils/palloc.h +136 -0
  347. data/ext/pg_query/include/utils/partcache.h +102 -0
  348. data/ext/pg_query/include/utils/pg_locale.h +119 -0
  349. data/ext/pg_query/include/utils/pg_lsn.h +29 -0
  350. data/ext/pg_query/include/utils/pidfile.h +56 -0
  351. data/ext/pg_query/include/utils/plancache.h +235 -0
  352. data/ext/pg_query/include/utils/portal.h +241 -0
  353. data/ext/pg_query/include/utils/probes.h +114 -0
  354. data/ext/pg_query/include/utils/ps_status.h +25 -0
  355. data/ext/pg_query/include/utils/queryenvironment.h +74 -0
  356. data/ext/pg_query/include/utils/regproc.h +28 -0
  357. data/ext/pg_query/include/utils/rel.h +644 -0
  358. data/ext/pg_query/include/utils/relcache.h +151 -0
  359. data/ext/pg_query/include/utils/reltrigger.h +81 -0
  360. data/ext/pg_query/include/utils/resowner.h +86 -0
  361. data/ext/pg_query/include/utils/rls.h +50 -0
  362. data/ext/pg_query/include/utils/ruleutils.h +44 -0
  363. data/ext/pg_query/include/utils/sharedtuplestore.h +61 -0
  364. data/ext/pg_query/include/utils/snapmgr.h +158 -0
  365. data/ext/pg_query/include/utils/snapshot.h +206 -0
  366. data/ext/pg_query/include/utils/sortsupport.h +276 -0
  367. data/ext/pg_query/include/utils/syscache.h +219 -0
  368. data/ext/pg_query/include/utils/timeout.h +88 -0
  369. data/ext/pg_query/include/utils/timestamp.h +116 -0
  370. data/ext/pg_query/include/utils/tuplesort.h +277 -0
  371. data/ext/pg_query/include/utils/tuplestore.h +91 -0
  372. data/ext/pg_query/include/utils/typcache.h +202 -0
  373. data/ext/pg_query/include/utils/tzparser.h +39 -0
  374. data/ext/pg_query/include/utils/varlena.h +39 -0
  375. data/ext/pg_query/include/utils/xml.h +84 -0
  376. data/ext/pg_query/include/xxhash.h +5445 -0
  377. data/ext/pg_query/include/xxhash/xxhash.h +5445 -0
  378. data/ext/pg_query/pg_query.c +104 -0
  379. data/ext/pg_query/pg_query.pb-c.c +37628 -0
  380. data/ext/pg_query/pg_query_deparse.c +9953 -0
  381. data/ext/pg_query/pg_query_fingerprint.c +292 -0
  382. data/ext/pg_query/pg_query_fingerprint.h +8 -0
  383. data/ext/pg_query/pg_query_internal.h +24 -0
  384. data/ext/pg_query/pg_query_json_plpgsql.c +738 -0
  385. data/ext/pg_query/pg_query_json_plpgsql.h +9 -0
  386. data/ext/pg_query/pg_query_normalize.c +437 -0
  387. data/ext/pg_query/pg_query_outfuncs.h +10 -0
  388. data/ext/pg_query/pg_query_outfuncs_json.c +297 -0
  389. data/ext/pg_query/pg_query_outfuncs_protobuf.c +237 -0
  390. data/ext/pg_query/pg_query_parse.c +148 -0
  391. data/ext/pg_query/pg_query_parse_plpgsql.c +460 -0
  392. data/ext/pg_query/pg_query_readfuncs.h +11 -0
  393. data/ext/pg_query/pg_query_readfuncs_protobuf.c +142 -0
  394. data/ext/pg_query/pg_query_ruby.c +108 -12
  395. data/ext/pg_query/pg_query_scan.c +173 -0
  396. data/ext/pg_query/pg_query_split.c +221 -0
  397. data/ext/pg_query/protobuf-c.c +3660 -0
  398. data/ext/pg_query/src_backend_catalog_namespace.c +1051 -0
  399. data/ext/pg_query/src_backend_catalog_pg_proc.c +142 -0
  400. data/ext/pg_query/src_backend_commands_define.c +117 -0
  401. data/ext/pg_query/src_backend_libpq_pqcomm.c +651 -0
  402. data/ext/pg_query/src_backend_nodes_bitmapset.c +513 -0
  403. data/ext/pg_query/src_backend_nodes_copyfuncs.c +6013 -0
  404. data/ext/pg_query/src_backend_nodes_equalfuncs.c +4003 -0
  405. data/ext/pg_query/src_backend_nodes_extensible.c +99 -0
  406. data/ext/pg_query/src_backend_nodes_list.c +922 -0
  407. data/ext/pg_query/src_backend_nodes_makefuncs.c +417 -0
  408. data/ext/pg_query/src_backend_nodes_nodeFuncs.c +1363 -0
  409. data/ext/pg_query/src_backend_nodes_value.c +84 -0
  410. data/ext/pg_query/src_backend_parser_gram.c +47456 -0
  411. data/ext/pg_query/src_backend_parser_parse_expr.c +313 -0
  412. data/ext/pg_query/src_backend_parser_parser.c +497 -0
  413. data/ext/pg_query/src_backend_parser_scan.c +7091 -0
  414. data/ext/pg_query/src_backend_parser_scansup.c +160 -0
  415. data/ext/pg_query/src_backend_postmaster_postmaster.c +2230 -0
  416. data/ext/pg_query/src_backend_storage_ipc_ipc.c +192 -0
  417. data/ext/pg_query/src_backend_storage_lmgr_s_lock.c +370 -0
  418. data/ext/pg_query/src_backend_tcop_postgres.c +776 -0
  419. data/ext/pg_query/src_backend_utils_adt_datum.c +326 -0
  420. data/ext/pg_query/src_backend_utils_adt_expandeddatum.c +98 -0
  421. data/ext/pg_query/src_backend_utils_adt_format_type.c +136 -0
  422. data/ext/pg_query/src_backend_utils_adt_ruleutils.c +1683 -0
  423. data/ext/pg_query/src_backend_utils_error_assert.c +74 -0
  424. data/ext/pg_query/src_backend_utils_error_elog.c +1748 -0
  425. data/ext/pg_query/src_backend_utils_fmgr_fmgr.c +570 -0
  426. data/ext/pg_query/src_backend_utils_hash_dynahash.c +1086 -0
  427. data/ext/pg_query/src_backend_utils_init_globals.c +168 -0
  428. data/ext/pg_query/src_backend_utils_mb_mbutils.c +839 -0
  429. data/ext/pg_query/src_backend_utils_misc_guc.c +1831 -0
  430. data/ext/pg_query/src_backend_utils_mmgr_aset.c +1560 -0
  431. data/ext/pg_query/src_backend_utils_mmgr_mcxt.c +1006 -0
  432. data/ext/pg_query/src_common_encnames.c +158 -0
  433. data/ext/pg_query/src_common_keywords.c +39 -0
  434. data/ext/pg_query/src_common_kwlist_d.h +1081 -0
  435. data/ext/pg_query/src_common_kwlookup.c +91 -0
  436. data/ext/pg_query/src_common_psprintf.c +158 -0
  437. data/ext/pg_query/src_common_string.c +86 -0
  438. data/ext/pg_query/src_common_stringinfo.c +336 -0
  439. data/ext/pg_query/src_common_wchar.c +1651 -0
  440. data/ext/pg_query/src_pl_plpgsql_src_pl_comp.c +1133 -0
  441. data/ext/pg_query/src_pl_plpgsql_src_pl_funcs.c +877 -0
  442. data/ext/pg_query/src_pl_plpgsql_src_pl_gram.c +6533 -0
  443. data/ext/pg_query/src_pl_plpgsql_src_pl_handler.c +107 -0
  444. data/ext/pg_query/src_pl_plpgsql_src_pl_reserved_kwlist_d.h +123 -0
  445. data/ext/pg_query/src_pl_plpgsql_src_pl_scanner.c +671 -0
  446. data/ext/pg_query/src_pl_plpgsql_src_pl_unreserved_kwlist_d.h +255 -0
  447. data/ext/pg_query/src_port_erand48.c +127 -0
  448. data/ext/pg_query/src_port_pg_bitutils.c +246 -0
  449. data/ext/pg_query/src_port_pgsleep.c +69 -0
  450. data/ext/pg_query/src_port_pgstrcasecmp.c +83 -0
  451. data/ext/pg_query/src_port_qsort.c +240 -0
  452. data/ext/pg_query/src_port_random.c +31 -0
  453. data/ext/pg_query/src_port_snprintf.c +1449 -0
  454. data/ext/pg_query/src_port_strerror.c +324 -0
  455. data/ext/pg_query/src_port_strnlen.c +39 -0
  456. data/ext/pg_query/xxhash.c +43 -0
  457. data/lib/pg_query.rb +7 -4
  458. data/lib/pg_query/constants.rb +21 -0
  459. data/lib/pg_query/deparse.rb +16 -1117
  460. data/lib/pg_query/filter_columns.rb +86 -85
  461. data/lib/pg_query/fingerprint.rb +122 -87
  462. data/lib/pg_query/json_field_names.rb +1402 -0
  463. data/lib/pg_query/node.rb +31 -0
  464. data/lib/pg_query/param_refs.rb +42 -37
  465. data/lib/pg_query/parse.rb +220 -200
  466. data/lib/pg_query/parse_error.rb +1 -1
  467. data/lib/pg_query/pg_query_pb.rb +3211 -0
  468. data/lib/pg_query/scan.rb +23 -0
  469. data/lib/pg_query/treewalker.rb +24 -40
  470. data/lib/pg_query/truncate.rb +64 -43
  471. data/lib/pg_query/version.rb +2 -2
  472. metadata +473 -11
  473. data/ext/pg_query/pg_query_ruby.h +0 -10
  474. data/lib/pg_query/deep_dup.rb +0 -16
  475. data/lib/pg_query/deparse/alter_table.rb +0 -42
  476. data/lib/pg_query/deparse/interval.rb +0 -105
  477. data/lib/pg_query/legacy_parsetree.rb +0 -109
  478. data/lib/pg_query/node_types.rb +0 -284
@@ -0,0 +1,1651 @@
1
+ /*--------------------------------------------------------------------
2
+ * Symbols referenced in this file:
3
+ * - pg_encoding_max_length
4
+ * - pg_wchar_table
5
+ * - pg_utf_mblen
6
+ * - pg_mule_mblen
7
+ * - pg_ascii2wchar_with_len
8
+ * - pg_wchar2single_with_len
9
+ * - pg_ascii_mblen
10
+ * - pg_ascii_dsplen
11
+ * - pg_ascii_verifier
12
+ * - pg_eucjp2wchar_with_len
13
+ * - pg_euc2wchar_with_len
14
+ * - pg_wchar2euc_with_len
15
+ * - pg_eucjp_mblen
16
+ * - pg_euc_mblen
17
+ * - pg_eucjp_dsplen
18
+ * - pg_eucjp_verifier
19
+ * - pg_euccn2wchar_with_len
20
+ * - pg_euccn_mblen
21
+ * - pg_euccn_dsplen
22
+ * - pg_euckr_verifier
23
+ * - pg_euckr2wchar_with_len
24
+ * - pg_euckr_mblen
25
+ * - pg_euckr_dsplen
26
+ * - pg_euc_dsplen
27
+ * - pg_euctw2wchar_with_len
28
+ * - pg_euctw_mblen
29
+ * - pg_euctw_dsplen
30
+ * - pg_euctw_verifier
31
+ * - pg_utf2wchar_with_len
32
+ * - pg_wchar2utf_with_len
33
+ * - unicode_to_utf8
34
+ * - pg_utf_dsplen
35
+ * - utf8_to_unicode
36
+ * - ucs_wcwidth
37
+ * - mbbisearch
38
+ * - pg_utf8_verifier
39
+ * - pg_utf8_islegal
40
+ * - pg_mule2wchar_with_len
41
+ * - pg_wchar2mule_with_len
42
+ * - pg_mule_dsplen
43
+ * - pg_mule_verifier
44
+ * - pg_latin12wchar_with_len
45
+ * - pg_latin1_mblen
46
+ * - pg_latin1_dsplen
47
+ * - pg_latin1_verifier
48
+ * - pg_sjis_mblen
49
+ * - pg_sjis_dsplen
50
+ * - pg_sjis_verifier
51
+ * - pg_big5_mblen
52
+ * - pg_big5_dsplen
53
+ * - pg_big5_verifier
54
+ * - pg_gbk_mblen
55
+ * - pg_gbk_dsplen
56
+ * - pg_gbk_verifier
57
+ * - pg_uhc_mblen
58
+ * - pg_uhc_dsplen
59
+ * - pg_uhc_verifier
60
+ * - pg_gb18030_mblen
61
+ * - pg_gb18030_dsplen
62
+ * - pg_gb18030_verifier
63
+ * - pg_johab_mblen
64
+ * - pg_johab_dsplen
65
+ * - pg_johab_verifier
66
+ * - pg_encoding_mblen
67
+ *--------------------------------------------------------------------
68
+ */
69
+
70
+ /*-------------------------------------------------------------------------
71
+ *
72
+ * wchar.c
73
+ * Functions for working with multibyte characters in various encodings.
74
+ *
75
+ * Portions Copyright (c) 1998-2020, PostgreSQL Global Development Group
76
+ *
77
+ * IDENTIFICATION
78
+ * src/common/wchar.c
79
+ *
80
+ *-------------------------------------------------------------------------
81
+ */
82
+ #include "c.h"
83
+
84
+ #include "mb/pg_wchar.h"
85
+
86
+
87
+ /*
88
+ * Operations on multi-byte encodings are driven by a table of helper
89
+ * functions.
90
+ *
91
+ * To add an encoding support, define mblen(), dsplen() and verifier() for
92
+ * the encoding. For server-encodings, also define mb2wchar() and wchar2mb()
93
+ * conversion functions.
94
+ *
95
+ * These functions generally assume that their input is validly formed.
96
+ * The "verifier" functions, further down in the file, have to be more
97
+ * paranoid.
98
+ *
99
+ * We expect that mblen() does not need to examine more than the first byte
100
+ * of the character to discover the correct length. GB18030 is an exception
101
+ * to that rule, though, as it also looks at second byte. But even that
102
+ * behaves in a predictable way, if you only pass the first byte: it will
103
+ * treat 4-byte encoded characters as two 2-byte encoded characters, which is
104
+ * good enough for all current uses.
105
+ *
106
+ * Note: for the display output of psql to work properly, the return values
107
+ * of the dsplen functions must conform to the Unicode standard. In particular
108
+ * the NUL character is zero width and control characters are generally
109
+ * width -1. It is recommended that non-ASCII encodings refer their ASCII
110
+ * subset to the ASCII routines to ensure consistency.
111
+ */
112
+
113
+ /*
114
+ * SQL/ASCII
115
+ */
116
+ static int
117
+ pg_ascii2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
118
+ {
119
+ int cnt = 0;
120
+
121
+ while (len > 0 && *from)
122
+ {
123
+ *to++ = *from++;
124
+ len--;
125
+ cnt++;
126
+ }
127
+ *to = 0;
128
+ return cnt;
129
+ }
130
+
131
+ static int
132
+ pg_ascii_mblen(const unsigned char *s)
133
+ {
134
+ return 1;
135
+ }
136
+
137
+ static int
138
+ pg_ascii_dsplen(const unsigned char *s)
139
+ {
140
+ if (*s == '\0')
141
+ return 0;
142
+ if (*s < 0x20 || *s == 0x7f)
143
+ return -1;
144
+
145
+ return 1;
146
+ }
147
+
148
+ /*
149
+ * EUC
150
+ */
151
+ static int
152
+ pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
153
+ {
154
+ int cnt = 0;
155
+
156
+ while (len > 0 && *from)
157
+ {
158
+ if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte
159
+ * KANA") */
160
+ {
161
+ from++;
162
+ *to = (SS2 << 8) | *from++;
163
+ len -= 2;
164
+ }
165
+ else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */
166
+ {
167
+ from++;
168
+ *to = (SS3 << 16) | (*from++ << 8);
169
+ *to |= *from++;
170
+ len -= 3;
171
+ }
172
+ else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */
173
+ {
174
+ *to = *from++ << 8;
175
+ *to |= *from++;
176
+ len -= 2;
177
+ }
178
+ else /* must be ASCII */
179
+ {
180
+ *to = *from++;
181
+ len--;
182
+ }
183
+ to++;
184
+ cnt++;
185
+ }
186
+ *to = 0;
187
+ return cnt;
188
+ }
189
+
190
+ static inline int
191
+ pg_euc_mblen(const unsigned char *s)
192
+ {
193
+ int len;
194
+
195
+ if (*s == SS2)
196
+ len = 2;
197
+ else if (*s == SS3)
198
+ len = 3;
199
+ else if (IS_HIGHBIT_SET(*s))
200
+ len = 2;
201
+ else
202
+ len = 1;
203
+ return len;
204
+ }
205
+
206
+ static inline int
207
+ pg_euc_dsplen(const unsigned char *s)
208
+ {
209
+ int len;
210
+
211
+ if (*s == SS2)
212
+ len = 2;
213
+ else if (*s == SS3)
214
+ len = 2;
215
+ else if (IS_HIGHBIT_SET(*s))
216
+ len = 2;
217
+ else
218
+ len = pg_ascii_dsplen(s);
219
+ return len;
220
+ }
221
+
222
+ /*
223
+ * EUC_JP
224
+ */
225
+ static int
226
+ pg_eucjp2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
227
+ {
228
+ return pg_euc2wchar_with_len(from, to, len);
229
+ }
230
+
231
+ static int
232
+ pg_eucjp_mblen(const unsigned char *s)
233
+ {
234
+ return pg_euc_mblen(s);
235
+ }
236
+
237
+ static int
238
+ pg_eucjp_dsplen(const unsigned char *s)
239
+ {
240
+ int len;
241
+
242
+ if (*s == SS2)
243
+ len = 1;
244
+ else if (*s == SS3)
245
+ len = 2;
246
+ else if (IS_HIGHBIT_SET(*s))
247
+ len = 2;
248
+ else
249
+ len = pg_ascii_dsplen(s);
250
+ return len;
251
+ }
252
+
253
+ /*
254
+ * EUC_KR
255
+ */
256
+ static int
257
+ pg_euckr2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
258
+ {
259
+ return pg_euc2wchar_with_len(from, to, len);
260
+ }
261
+
262
+ static int
263
+ pg_euckr_mblen(const unsigned char *s)
264
+ {
265
+ return pg_euc_mblen(s);
266
+ }
267
+
268
+ static int
269
+ pg_euckr_dsplen(const unsigned char *s)
270
+ {
271
+ return pg_euc_dsplen(s);
272
+ }
273
+
274
+ /*
275
+ * EUC_CN
276
+ *
277
+ */
278
+ static int
279
+ pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
280
+ {
281
+ int cnt = 0;
282
+
283
+ while (len > 0 && *from)
284
+ {
285
+ if (*from == SS2 && len >= 3) /* code set 2 (unused?) */
286
+ {
287
+ from++;
288
+ *to = (SS2 << 16) | (*from++ << 8);
289
+ *to |= *from++;
290
+ len -= 3;
291
+ }
292
+ else if (*from == SS3 && len >= 3) /* code set 3 (unused ?) */
293
+ {
294
+ from++;
295
+ *to = (SS3 << 16) | (*from++ << 8);
296
+ *to |= *from++;
297
+ len -= 3;
298
+ }
299
+ else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */
300
+ {
301
+ *to = *from++ << 8;
302
+ *to |= *from++;
303
+ len -= 2;
304
+ }
305
+ else
306
+ {
307
+ *to = *from++;
308
+ len--;
309
+ }
310
+ to++;
311
+ cnt++;
312
+ }
313
+ *to = 0;
314
+ return cnt;
315
+ }
316
+
317
+ static int
318
+ pg_euccn_mblen(const unsigned char *s)
319
+ {
320
+ int len;
321
+
322
+ if (IS_HIGHBIT_SET(*s))
323
+ len = 2;
324
+ else
325
+ len = 1;
326
+ return len;
327
+ }
328
+
329
+ static int
330
+ pg_euccn_dsplen(const unsigned char *s)
331
+ {
332
+ int len;
333
+
334
+ if (IS_HIGHBIT_SET(*s))
335
+ len = 2;
336
+ else
337
+ len = pg_ascii_dsplen(s);
338
+ return len;
339
+ }
340
+
341
+ /*
342
+ * EUC_TW
343
+ *
344
+ */
345
+ static int
346
+ pg_euctw2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
347
+ {
348
+ int cnt = 0;
349
+
350
+ while (len > 0 && *from)
351
+ {
352
+ if (*from == SS2 && len >= 4) /* code set 2 */
353
+ {
354
+ from++;
355
+ *to = (((uint32) SS2) << 24) | (*from++ << 16);
356
+ *to |= *from++ << 8;
357
+ *to |= *from++;
358
+ len -= 4;
359
+ }
360
+ else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */
361
+ {
362
+ from++;
363
+ *to = (SS3 << 16) | (*from++ << 8);
364
+ *to |= *from++;
365
+ len -= 3;
366
+ }
367
+ else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */
368
+ {
369
+ *to = *from++ << 8;
370
+ *to |= *from++;
371
+ len -= 2;
372
+ }
373
+ else
374
+ {
375
+ *to = *from++;
376
+ len--;
377
+ }
378
+ to++;
379
+ cnt++;
380
+ }
381
+ *to = 0;
382
+ return cnt;
383
+ }
384
+
385
+ static int
386
+ pg_euctw_mblen(const unsigned char *s)
387
+ {
388
+ int len;
389
+
390
+ if (*s == SS2)
391
+ len = 4;
392
+ else if (*s == SS3)
393
+ len = 3;
394
+ else if (IS_HIGHBIT_SET(*s))
395
+ len = 2;
396
+ else
397
+ len = 1;
398
+ return len;
399
+ }
400
+
401
+ static int
402
+ pg_euctw_dsplen(const unsigned char *s)
403
+ {
404
+ int len;
405
+
406
+ if (*s == SS2)
407
+ len = 2;
408
+ else if (*s == SS3)
409
+ len = 2;
410
+ else if (IS_HIGHBIT_SET(*s))
411
+ len = 2;
412
+ else
413
+ len = pg_ascii_dsplen(s);
414
+ return len;
415
+ }
416
+
417
+ /*
418
+ * Convert pg_wchar to EUC_* encoding.
419
+ * caller must allocate enough space for "to", including a trailing zero!
420
+ * len: length of from.
421
+ * "from" not necessarily null terminated.
422
+ */
423
+ static int
424
+ pg_wchar2euc_with_len(const pg_wchar *from, unsigned char *to, int len)
425
+ {
426
+ int cnt = 0;
427
+
428
+ while (len > 0 && *from)
429
+ {
430
+ unsigned char c;
431
+
432
+ if ((c = (*from >> 24)))
433
+ {
434
+ *to++ = c;
435
+ *to++ = (*from >> 16) & 0xff;
436
+ *to++ = (*from >> 8) & 0xff;
437
+ *to++ = *from & 0xff;
438
+ cnt += 4;
439
+ }
440
+ else if ((c = (*from >> 16)))
441
+ {
442
+ *to++ = c;
443
+ *to++ = (*from >> 8) & 0xff;
444
+ *to++ = *from & 0xff;
445
+ cnt += 3;
446
+ }
447
+ else if ((c = (*from >> 8)))
448
+ {
449
+ *to++ = c;
450
+ *to++ = *from & 0xff;
451
+ cnt += 2;
452
+ }
453
+ else
454
+ {
455
+ *to++ = *from;
456
+ cnt++;
457
+ }
458
+ from++;
459
+ len--;
460
+ }
461
+ *to = 0;
462
+ return cnt;
463
+ }
464
+
465
+
466
+ /*
467
+ * JOHAB
468
+ */
469
+ static int
470
+ pg_johab_mblen(const unsigned char *s)
471
+ {
472
+ return pg_euc_mblen(s);
473
+ }
474
+
475
+ static int
476
+ pg_johab_dsplen(const unsigned char *s)
477
+ {
478
+ return pg_euc_dsplen(s);
479
+ }
480
+
481
+ /*
482
+ * convert UTF8 string to pg_wchar (UCS-4)
483
+ * caller must allocate enough space for "to", including a trailing zero!
484
+ * len: length of from.
485
+ * "from" not necessarily null terminated.
486
+ */
487
+ static int
488
+ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
489
+ {
490
+ int cnt = 0;
491
+ uint32 c1,
492
+ c2,
493
+ c3,
494
+ c4;
495
+
496
+ while (len > 0 && *from)
497
+ {
498
+ if ((*from & 0x80) == 0)
499
+ {
500
+ *to = *from++;
501
+ len--;
502
+ }
503
+ else if ((*from & 0xe0) == 0xc0)
504
+ {
505
+ if (len < 2)
506
+ break; /* drop trailing incomplete char */
507
+ c1 = *from++ & 0x1f;
508
+ c2 = *from++ & 0x3f;
509
+ *to = (c1 << 6) | c2;
510
+ len -= 2;
511
+ }
512
+ else if ((*from & 0xf0) == 0xe0)
513
+ {
514
+ if (len < 3)
515
+ break; /* drop trailing incomplete char */
516
+ c1 = *from++ & 0x0f;
517
+ c2 = *from++ & 0x3f;
518
+ c3 = *from++ & 0x3f;
519
+ *to = (c1 << 12) | (c2 << 6) | c3;
520
+ len -= 3;
521
+ }
522
+ else if ((*from & 0xf8) == 0xf0)
523
+ {
524
+ if (len < 4)
525
+ break; /* drop trailing incomplete char */
526
+ c1 = *from++ & 0x07;
527
+ c2 = *from++ & 0x3f;
528
+ c3 = *from++ & 0x3f;
529
+ c4 = *from++ & 0x3f;
530
+ *to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
531
+ len -= 4;
532
+ }
533
+ else
534
+ {
535
+ /* treat a bogus char as length 1; not ours to raise error */
536
+ *to = *from++;
537
+ len--;
538
+ }
539
+ to++;
540
+ cnt++;
541
+ }
542
+ *to = 0;
543
+ return cnt;
544
+ }
545
+
546
+
547
+ /*
548
+ * Map a Unicode code point to UTF-8. utf8string must have 4 bytes of
549
+ * space allocated.
550
+ */
551
+ unsigned char *
552
+ unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
553
+ {
554
+ if (c <= 0x7F)
555
+ {
556
+ utf8string[0] = c;
557
+ }
558
+ else if (c <= 0x7FF)
559
+ {
560
+ utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
561
+ utf8string[1] = 0x80 | (c & 0x3F);
562
+ }
563
+ else if (c <= 0xFFFF)
564
+ {
565
+ utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
566
+ utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
567
+ utf8string[2] = 0x80 | (c & 0x3F);
568
+ }
569
+ else
570
+ {
571
+ utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
572
+ utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
573
+ utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
574
+ utf8string[3] = 0x80 | (c & 0x3F);
575
+ }
576
+
577
+ return utf8string;
578
+ }
579
+
580
+ /*
581
+ * Trivial conversion from pg_wchar to UTF-8.
582
+ * caller should allocate enough space for "to"
583
+ * len: length of from.
584
+ * "from" not necessarily null terminated.
585
+ */
586
+ static int
587
+ pg_wchar2utf_with_len(const pg_wchar *from, unsigned char *to, int len)
588
+ {
589
+ int cnt = 0;
590
+
591
+ while (len > 0 && *from)
592
+ {
593
+ int char_len;
594
+
595
+ unicode_to_utf8(*from, to);
596
+ char_len = pg_utf_mblen(to);
597
+ cnt += char_len;
598
+ to += char_len;
599
+ from++;
600
+ len--;
601
+ }
602
+ *to = 0;
603
+ return cnt;
604
+ }
605
+
606
+ /*
607
+ * Return the byte length of a UTF8 character pointed to by s
608
+ *
609
+ * Note: in the current implementation we do not support UTF8 sequences
610
+ * of more than 4 bytes; hence do NOT return a value larger than 4.
611
+ * We return "1" for any leading byte that is either flat-out illegal or
612
+ * indicates a length larger than we support.
613
+ *
614
+ * pg_utf2wchar_with_len(), utf8_to_unicode(), pg_utf8_islegal(), and perhaps
615
+ * other places would need to be fixed to change this.
616
+ */
617
+ int
618
+ pg_utf_mblen(const unsigned char *s)
619
+ {
620
+ int len;
621
+
622
+ if ((*s & 0x80) == 0)
623
+ len = 1;
624
+ else if ((*s & 0xe0) == 0xc0)
625
+ len = 2;
626
+ else if ((*s & 0xf0) == 0xe0)
627
+ len = 3;
628
+ else if ((*s & 0xf8) == 0xf0)
629
+ len = 4;
630
+ #ifdef NOT_USED
631
+ else if ((*s & 0xfc) == 0xf8)
632
+ len = 5;
633
+ else if ((*s & 0xfe) == 0xfc)
634
+ len = 6;
635
+ #endif
636
+ else
637
+ len = 1;
638
+ return len;
639
+ }
640
+
641
+ /*
642
+ * This is an implementation of wcwidth() and wcswidth() as defined in
643
+ * "The Single UNIX Specification, Version 2, The Open Group, 1997"
644
+ * <http://www.unix.org/online.html>
645
+ *
646
+ * Markus Kuhn -- 2001-09-08 -- public domain
647
+ *
648
+ * customised for PostgreSQL
649
+ *
650
+ * original available at : http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
651
+ */
652
+
653
+ struct mbinterval
654
+ {
655
+ unsigned short first;
656
+ unsigned short last;
657
+ };
658
+
659
+ /* auxiliary function for binary search in interval table */
660
+ static int
661
+ mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
662
+ {
663
+ int min = 0;
664
+ int mid;
665
+
666
+ if (ucs < table[0].first || ucs > table[max].last)
667
+ return 0;
668
+ while (max >= min)
669
+ {
670
+ mid = (min + max) / 2;
671
+ if (ucs > table[mid].last)
672
+ min = mid + 1;
673
+ else if (ucs < table[mid].first)
674
+ max = mid - 1;
675
+ else
676
+ return 1;
677
+ }
678
+
679
+ return 0;
680
+ }
681
+
682
+
683
+ /* The following functions define the column width of an ISO 10646
684
+ * character as follows:
685
+ *
686
+ * - The null character (U+0000) has a column width of 0.
687
+ *
688
+ * - Other C0/C1 control characters and DEL will lead to a return
689
+ * value of -1.
690
+ *
691
+ * - Non-spacing and enclosing combining characters (general
692
+ * category code Mn or Me in the Unicode database) have a
693
+ * column width of 0.
694
+ *
695
+ * - Other format characters (general category code Cf in the Unicode
696
+ * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
697
+ *
698
+ * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
699
+ * have a column width of 0.
700
+ *
701
+ * - Spacing characters in the East Asian Wide (W) or East Asian
702
+ * FullWidth (F) category as defined in Unicode Technical
703
+ * Report #11 have a column width of 2.
704
+ *
705
+ * - All remaining characters (including all printable
706
+ * ISO 8859-1 and WGL4 characters, Unicode control characters,
707
+ * etc.) have a column width of 1.
708
+ *
709
+ * This implementation assumes that wchar_t characters are encoded
710
+ * in ISO 10646.
711
+ */
712
+
713
+ static int
714
+ ucs_wcwidth(pg_wchar ucs)
715
+ {
716
+ #include "common/unicode_combining_table.h"
717
+
718
+ /* test for 8-bit control characters */
719
+ if (ucs == 0)
720
+ return 0;
721
+
722
+ if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
723
+ return -1;
724
+
725
+ /* binary search in table of non-spacing characters */
726
+ if (mbbisearch(ucs, combining,
727
+ sizeof(combining) / sizeof(struct mbinterval) - 1))
728
+ return 0;
729
+
730
+ /*
731
+ * if we arrive here, ucs is not a combining or C0/C1 control character
732
+ */
733
+
734
+ return 1 +
735
+ (ucs >= 0x1100 &&
736
+ (ucs <= 0x115f || /* Hangul Jamo init. consonants */
737
+ (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
738
+ ucs != 0x303f) || /* CJK ... Yi */
739
+ (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
740
+ (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility
741
+ * Ideographs */
742
+ (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
743
+ (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
744
+ (ucs >= 0xffe0 && ucs <= 0xffe6) ||
745
+ (ucs >= 0x20000 && ucs <= 0x2ffff)));
746
+ }
747
+
748
+ /*
749
+ * Convert a UTF-8 character to a Unicode code point.
750
+ * This is a one-character version of pg_utf2wchar_with_len.
751
+ *
752
+ * No error checks here, c must point to a long-enough string.
753
+ */
754
+ pg_wchar
755
+ utf8_to_unicode(const unsigned char *c)
756
+ {
757
+ if ((*c & 0x80) == 0)
758
+ return (pg_wchar) c[0];
759
+ else if ((*c & 0xe0) == 0xc0)
760
+ return (pg_wchar) (((c[0] & 0x1f) << 6) |
761
+ (c[1] & 0x3f));
762
+ else if ((*c & 0xf0) == 0xe0)
763
+ return (pg_wchar) (((c[0] & 0x0f) << 12) |
764
+ ((c[1] & 0x3f) << 6) |
765
+ (c[2] & 0x3f));
766
+ else if ((*c & 0xf8) == 0xf0)
767
+ return (pg_wchar) (((c[0] & 0x07) << 18) |
768
+ ((c[1] & 0x3f) << 12) |
769
+ ((c[2] & 0x3f) << 6) |
770
+ (c[3] & 0x3f));
771
+ else
772
+ /* that is an invalid code on purpose */
773
+ return 0xffffffff;
774
+ }
775
+
776
+ static int
777
+ pg_utf_dsplen(const unsigned char *s)
778
+ {
779
+ return ucs_wcwidth(utf8_to_unicode(s));
780
+ }
781
+
782
+ /*
783
+ * convert mule internal code to pg_wchar
784
+ * caller should allocate enough space for "to"
785
+ * len: length of from.
786
+ * "from" not necessarily null terminated.
787
+ */
788
+ static int
789
+ pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
790
+ {
791
+ int cnt = 0;
792
+
793
+ while (len > 0 && *from)
794
+ {
795
+ if (IS_LC1(*from) && len >= 2)
796
+ {
797
+ *to = *from++ << 16;
798
+ *to |= *from++;
799
+ len -= 2;
800
+ }
801
+ else if (IS_LCPRV1(*from) && len >= 3)
802
+ {
803
+ from++;
804
+ *to = *from++ << 16;
805
+ *to |= *from++;
806
+ len -= 3;
807
+ }
808
+ else if (IS_LC2(*from) && len >= 3)
809
+ {
810
+ *to = *from++ << 16;
811
+ *to |= *from++ << 8;
812
+ *to |= *from++;
813
+ len -= 3;
814
+ }
815
+ else if (IS_LCPRV2(*from) && len >= 4)
816
+ {
817
+ from++;
818
+ *to = *from++ << 16;
819
+ *to |= *from++ << 8;
820
+ *to |= *from++;
821
+ len -= 4;
822
+ }
823
+ else
824
+ { /* assume ASCII */
825
+ *to = (unsigned char) *from++;
826
+ len--;
827
+ }
828
+ to++;
829
+ cnt++;
830
+ }
831
+ *to = 0;
832
+ return cnt;
833
+ }
834
+
835
+ /*
836
+ * convert pg_wchar to mule internal code
837
+ * caller should allocate enough space for "to"
838
+ * len: length of from.
839
+ * "from" not necessarily null terminated.
840
+ */
841
+ static int
842
+ pg_wchar2mule_with_len(const pg_wchar *from, unsigned char *to, int len)
843
+ {
844
+ int cnt = 0;
845
+
846
+ while (len > 0 && *from)
847
+ {
848
+ unsigned char lb;
849
+
850
+ lb = (*from >> 16) & 0xff;
851
+ if (IS_LC1(lb))
852
+ {
853
+ *to++ = lb;
854
+ *to++ = *from & 0xff;
855
+ cnt += 2;
856
+ }
857
+ else if (IS_LC2(lb))
858
+ {
859
+ *to++ = lb;
860
+ *to++ = (*from >> 8) & 0xff;
861
+ *to++ = *from & 0xff;
862
+ cnt += 3;
863
+ }
864
+ else if (IS_LCPRV1_A_RANGE(lb))
865
+ {
866
+ *to++ = LCPRV1_A;
867
+ *to++ = lb;
868
+ *to++ = *from & 0xff;
869
+ cnt += 3;
870
+ }
871
+ else if (IS_LCPRV1_B_RANGE(lb))
872
+ {
873
+ *to++ = LCPRV1_B;
874
+ *to++ = lb;
875
+ *to++ = *from & 0xff;
876
+ cnt += 3;
877
+ }
878
+ else if (IS_LCPRV2_A_RANGE(lb))
879
+ {
880
+ *to++ = LCPRV2_A;
881
+ *to++ = lb;
882
+ *to++ = (*from >> 8) & 0xff;
883
+ *to++ = *from & 0xff;
884
+ cnt += 4;
885
+ }
886
+ else if (IS_LCPRV2_B_RANGE(lb))
887
+ {
888
+ *to++ = LCPRV2_B;
889
+ *to++ = lb;
890
+ *to++ = (*from >> 8) & 0xff;
891
+ *to++ = *from & 0xff;
892
+ cnt += 4;
893
+ }
894
+ else
895
+ {
896
+ *to++ = *from & 0xff;
897
+ cnt += 1;
898
+ }
899
+ from++;
900
+ len--;
901
+ }
902
+ *to = 0;
903
+ return cnt;
904
+ }
905
+
906
+ /* exported for direct use by conv.c */
907
+ int
908
+ pg_mule_mblen(const unsigned char *s)
909
+ {
910
+ int len;
911
+
912
+ if (IS_LC1(*s))
913
+ len = 2;
914
+ else if (IS_LCPRV1(*s))
915
+ len = 3;
916
+ else if (IS_LC2(*s))
917
+ len = 3;
918
+ else if (IS_LCPRV2(*s))
919
+ len = 4;
920
+ else
921
+ len = 1; /* assume ASCII */
922
+ return len;
923
+ }
924
+
925
+ static int
926
+ pg_mule_dsplen(const unsigned char *s)
927
+ {
928
+ int len;
929
+
930
+ /*
931
+ * Note: it's not really appropriate to assume that all multibyte charsets
932
+ * are double-wide on screen. But this seems an okay approximation for
933
+ * the MULE charsets we currently support.
934
+ */
935
+
936
+ if (IS_LC1(*s))
937
+ len = 1;
938
+ else if (IS_LCPRV1(*s))
939
+ len = 1;
940
+ else if (IS_LC2(*s))
941
+ len = 2;
942
+ else if (IS_LCPRV2(*s))
943
+ len = 2;
944
+ else
945
+ len = 1; /* assume ASCII */
946
+
947
+ return len;
948
+ }
949
+
950
+ /*
951
+ * ISO8859-1
952
+ */
953
+ static int
954
+ pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
955
+ {
956
+ int cnt = 0;
957
+
958
+ while (len > 0 && *from)
959
+ {
960
+ *to++ = *from++;
961
+ len--;
962
+ cnt++;
963
+ }
964
+ *to = 0;
965
+ return cnt;
966
+ }
967
+
968
+ /*
969
+ * Trivial conversion from pg_wchar to single byte encoding. Just ignores
970
+ * high bits.
971
+ * caller should allocate enough space for "to"
972
+ * len: length of from.
973
+ * "from" not necessarily null terminated.
974
+ */
975
+ static int
976
+ pg_wchar2single_with_len(const pg_wchar *from, unsigned char *to, int len)
977
+ {
978
+ int cnt = 0;
979
+
980
+ while (len > 0 && *from)
981
+ {
982
+ *to++ = *from++;
983
+ len--;
984
+ cnt++;
985
+ }
986
+ *to = 0;
987
+ return cnt;
988
+ }
989
+
990
+ static int
991
+ pg_latin1_mblen(const unsigned char *s)
992
+ {
993
+ return 1;
994
+ }
995
+
996
+ static int
997
+ pg_latin1_dsplen(const unsigned char *s)
998
+ {
999
+ return pg_ascii_dsplen(s);
1000
+ }
1001
+
1002
+ /*
1003
+ * SJIS
1004
+ */
1005
+ static int
1006
+ pg_sjis_mblen(const unsigned char *s)
1007
+ {
1008
+ int len;
1009
+
1010
+ if (*s >= 0xa1 && *s <= 0xdf)
1011
+ len = 1; /* 1 byte kana? */
1012
+ else if (IS_HIGHBIT_SET(*s))
1013
+ len = 2; /* kanji? */
1014
+ else
1015
+ len = 1; /* should be ASCII */
1016
+ return len;
1017
+ }
1018
+
1019
+ static int
1020
+ pg_sjis_dsplen(const unsigned char *s)
1021
+ {
1022
+ int len;
1023
+
1024
+ if (*s >= 0xa1 && *s <= 0xdf)
1025
+ len = 1; /* 1 byte kana? */
1026
+ else if (IS_HIGHBIT_SET(*s))
1027
+ len = 2; /* kanji? */
1028
+ else
1029
+ len = pg_ascii_dsplen(s); /* should be ASCII */
1030
+ return len;
1031
+ }
1032
+
1033
+ /*
1034
+ * Big5
1035
+ */
1036
+ static int
1037
+ pg_big5_mblen(const unsigned char *s)
1038
+ {
1039
+ int len;
1040
+
1041
+ if (IS_HIGHBIT_SET(*s))
1042
+ len = 2; /* kanji? */
1043
+ else
1044
+ len = 1; /* should be ASCII */
1045
+ return len;
1046
+ }
1047
+
1048
+ static int
1049
+ pg_big5_dsplen(const unsigned char *s)
1050
+ {
1051
+ int len;
1052
+
1053
+ if (IS_HIGHBIT_SET(*s))
1054
+ len = 2; /* kanji? */
1055
+ else
1056
+ len = pg_ascii_dsplen(s); /* should be ASCII */
1057
+ return len;
1058
+ }
1059
+
1060
+ /*
1061
+ * GBK
1062
+ */
1063
+ static int
1064
+ pg_gbk_mblen(const unsigned char *s)
1065
+ {
1066
+ int len;
1067
+
1068
+ if (IS_HIGHBIT_SET(*s))
1069
+ len = 2; /* kanji? */
1070
+ else
1071
+ len = 1; /* should be ASCII */
1072
+ return len;
1073
+ }
1074
+
1075
+ static int
1076
+ pg_gbk_dsplen(const unsigned char *s)
1077
+ {
1078
+ int len;
1079
+
1080
+ if (IS_HIGHBIT_SET(*s))
1081
+ len = 2; /* kanji? */
1082
+ else
1083
+ len = pg_ascii_dsplen(s); /* should be ASCII */
1084
+ return len;
1085
+ }
1086
+
1087
+ /*
1088
+ * UHC
1089
+ */
1090
+ static int
1091
+ pg_uhc_mblen(const unsigned char *s)
1092
+ {
1093
+ int len;
1094
+
1095
+ if (IS_HIGHBIT_SET(*s))
1096
+ len = 2; /* 2byte? */
1097
+ else
1098
+ len = 1; /* should be ASCII */
1099
+ return len;
1100
+ }
1101
+
1102
+ static int
1103
+ pg_uhc_dsplen(const unsigned char *s)
1104
+ {
1105
+ int len;
1106
+
1107
+ if (IS_HIGHBIT_SET(*s))
1108
+ len = 2; /* 2byte? */
1109
+ else
1110
+ len = pg_ascii_dsplen(s); /* should be ASCII */
1111
+ return len;
1112
+ }
1113
+
1114
+ /*
1115
+ * GB18030
1116
+ * Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp>
1117
+ */
1118
+
1119
+ /*
1120
+ * Unlike all other mblen() functions, this also looks at the second byte of
1121
+ * the input. However, if you only pass the first byte of a multi-byte
1122
+ * string, and \0 as the second byte, this still works in a predictable way:
1123
+ * a 4-byte character will be reported as two 2-byte characters. That's
1124
+ * enough for all current uses, as a client-only encoding. It works that
1125
+ * way, because in any valid 4-byte GB18030-encoded character, the third and
1126
+ * fourth byte look like a 2-byte encoded character, when looked at
1127
+ * separately.
1128
+ */
1129
+ static int
1130
+ pg_gb18030_mblen(const unsigned char *s)
1131
+ {
1132
+ int len;
1133
+
1134
+ if (!IS_HIGHBIT_SET(*s))
1135
+ len = 1; /* ASCII */
1136
+ else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1137
+ len = 4;
1138
+ else
1139
+ len = 2;
1140
+ return len;
1141
+ }
1142
+
1143
+ static int
1144
+ pg_gb18030_dsplen(const unsigned char *s)
1145
+ {
1146
+ int len;
1147
+
1148
+ if (IS_HIGHBIT_SET(*s))
1149
+ len = 2;
1150
+ else
1151
+ len = pg_ascii_dsplen(s); /* ASCII */
1152
+ return len;
1153
+ }
1154
+
1155
+ /*
1156
+ *-------------------------------------------------------------------
1157
+ * multibyte sequence validators
1158
+ *
1159
+ * These functions accept "s", a pointer to the first byte of a string,
1160
+ * and "len", the remaining length of the string. If there is a validly
1161
+ * encoded character beginning at *s, return its length in bytes; else
1162
+ * return -1.
1163
+ *
1164
+ * The functions can assume that len > 0 and that *s != '\0', but they must
1165
+ * test for and reject zeroes in any additional bytes of a multibyte character.
1166
+ *
1167
+ * Note that this definition allows the function for a single-byte
1168
+ * encoding to be just "return 1".
1169
+ *-------------------------------------------------------------------
1170
+ */
1171
+
1172
+ static int
1173
+ pg_ascii_verifier(const unsigned char *s, int len)
1174
+ {
1175
+ return 1;
1176
+ }
1177
+
1178
+ #define IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe)
1179
+
1180
+ static int
1181
+ pg_eucjp_verifier(const unsigned char *s, int len)
1182
+ {
1183
+ int l;
1184
+ unsigned char c1,
1185
+ c2;
1186
+
1187
+ c1 = *s++;
1188
+
1189
+ switch (c1)
1190
+ {
1191
+ case SS2: /* JIS X 0201 */
1192
+ l = 2;
1193
+ if (l > len)
1194
+ return -1;
1195
+ c2 = *s++;
1196
+ if (c2 < 0xa1 || c2 > 0xdf)
1197
+ return -1;
1198
+ break;
1199
+
1200
+ case SS3: /* JIS X 0212 */
1201
+ l = 3;
1202
+ if (l > len)
1203
+ return -1;
1204
+ c2 = *s++;
1205
+ if (!IS_EUC_RANGE_VALID(c2))
1206
+ return -1;
1207
+ c2 = *s++;
1208
+ if (!IS_EUC_RANGE_VALID(c2))
1209
+ return -1;
1210
+ break;
1211
+
1212
+ default:
1213
+ if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
1214
+ {
1215
+ l = 2;
1216
+ if (l > len)
1217
+ return -1;
1218
+ if (!IS_EUC_RANGE_VALID(c1))
1219
+ return -1;
1220
+ c2 = *s++;
1221
+ if (!IS_EUC_RANGE_VALID(c2))
1222
+ return -1;
1223
+ }
1224
+ else
1225
+ /* must be ASCII */
1226
+ {
1227
+ l = 1;
1228
+ }
1229
+ break;
1230
+ }
1231
+
1232
+ return l;
1233
+ }
1234
+
1235
+ static int
1236
+ pg_euckr_verifier(const unsigned char *s, int len)
1237
+ {
1238
+ int l;
1239
+ unsigned char c1,
1240
+ c2;
1241
+
1242
+ c1 = *s++;
1243
+
1244
+ if (IS_HIGHBIT_SET(c1))
1245
+ {
1246
+ l = 2;
1247
+ if (l > len)
1248
+ return -1;
1249
+ if (!IS_EUC_RANGE_VALID(c1))
1250
+ return -1;
1251
+ c2 = *s++;
1252
+ if (!IS_EUC_RANGE_VALID(c2))
1253
+ return -1;
1254
+ }
1255
+ else
1256
+ /* must be ASCII */
1257
+ {
1258
+ l = 1;
1259
+ }
1260
+
1261
+ return l;
1262
+ }
1263
+
1264
+ /* EUC-CN byte sequences are exactly same as EUC-KR */
1265
+ #define pg_euccn_verifier pg_euckr_verifier
1266
+
1267
+ static int
1268
+ pg_euctw_verifier(const unsigned char *s, int len)
1269
+ {
1270
+ int l;
1271
+ unsigned char c1,
1272
+ c2;
1273
+
1274
+ c1 = *s++;
1275
+
1276
+ switch (c1)
1277
+ {
1278
+ case SS2: /* CNS 11643 Plane 1-7 */
1279
+ l = 4;
1280
+ if (l > len)
1281
+ return -1;
1282
+ c2 = *s++;
1283
+ if (c2 < 0xa1 || c2 > 0xa7)
1284
+ return -1;
1285
+ c2 = *s++;
1286
+ if (!IS_EUC_RANGE_VALID(c2))
1287
+ return -1;
1288
+ c2 = *s++;
1289
+ if (!IS_EUC_RANGE_VALID(c2))
1290
+ return -1;
1291
+ break;
1292
+
1293
+ case SS3: /* unused */
1294
+ return -1;
1295
+
1296
+ default:
1297
+ if (IS_HIGHBIT_SET(c1)) /* CNS 11643 Plane 1 */
1298
+ {
1299
+ l = 2;
1300
+ if (l > len)
1301
+ return -1;
1302
+ /* no further range check on c1? */
1303
+ c2 = *s++;
1304
+ if (!IS_EUC_RANGE_VALID(c2))
1305
+ return -1;
1306
+ }
1307
+ else
1308
+ /* must be ASCII */
1309
+ {
1310
+ l = 1;
1311
+ }
1312
+ break;
1313
+ }
1314
+ return l;
1315
+ }
1316
+
1317
+ static int
1318
+ pg_johab_verifier(const unsigned char *s, int len)
1319
+ {
1320
+ int l,
1321
+ mbl;
1322
+ unsigned char c;
1323
+
1324
+ l = mbl = pg_johab_mblen(s);
1325
+
1326
+ if (len < l)
1327
+ return -1;
1328
+
1329
+ if (!IS_HIGHBIT_SET(*s))
1330
+ return mbl;
1331
+
1332
+ while (--l > 0)
1333
+ {
1334
+ c = *++s;
1335
+ if (!IS_EUC_RANGE_VALID(c))
1336
+ return -1;
1337
+ }
1338
+ return mbl;
1339
+ }
1340
+
1341
+ static int
1342
+ pg_mule_verifier(const unsigned char *s, int len)
1343
+ {
1344
+ int l,
1345
+ mbl;
1346
+ unsigned char c;
1347
+
1348
+ l = mbl = pg_mule_mblen(s);
1349
+
1350
+ if (len < l)
1351
+ return -1;
1352
+
1353
+ while (--l > 0)
1354
+ {
1355
+ c = *++s;
1356
+ if (!IS_HIGHBIT_SET(c))
1357
+ return -1;
1358
+ }
1359
+ return mbl;
1360
+ }
1361
+
1362
+ static int
1363
+ pg_latin1_verifier(const unsigned char *s, int len)
1364
+ {
1365
+ return 1;
1366
+ }
1367
+
1368
+ static int
1369
+ pg_sjis_verifier(const unsigned char *s, int len)
1370
+ {
1371
+ int l,
1372
+ mbl;
1373
+ unsigned char c1,
1374
+ c2;
1375
+
1376
+ l = mbl = pg_sjis_mblen(s);
1377
+
1378
+ if (len < l)
1379
+ return -1;
1380
+
1381
+ if (l == 1) /* pg_sjis_mblen already verified it */
1382
+ return mbl;
1383
+
1384
+ c1 = *s++;
1385
+ c2 = *s;
1386
+ if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2))
1387
+ return -1;
1388
+ return mbl;
1389
+ }
1390
+
1391
+ static int
1392
+ pg_big5_verifier(const unsigned char *s, int len)
1393
+ {
1394
+ int l,
1395
+ mbl;
1396
+
1397
+ l = mbl = pg_big5_mblen(s);
1398
+
1399
+ if (len < l)
1400
+ return -1;
1401
+
1402
+ while (--l > 0)
1403
+ {
1404
+ if (*++s == '\0')
1405
+ return -1;
1406
+ }
1407
+
1408
+ return mbl;
1409
+ }
1410
+
1411
+ static int
1412
+ pg_gbk_verifier(const unsigned char *s, int len)
1413
+ {
1414
+ int l,
1415
+ mbl;
1416
+
1417
+ l = mbl = pg_gbk_mblen(s);
1418
+
1419
+ if (len < l)
1420
+ return -1;
1421
+
1422
+ while (--l > 0)
1423
+ {
1424
+ if (*++s == '\0')
1425
+ return -1;
1426
+ }
1427
+
1428
+ return mbl;
1429
+ }
1430
+
1431
+ static int
1432
+ pg_uhc_verifier(const unsigned char *s, int len)
1433
+ {
1434
+ int l,
1435
+ mbl;
1436
+
1437
+ l = mbl = pg_uhc_mblen(s);
1438
+
1439
+ if (len < l)
1440
+ return -1;
1441
+
1442
+ while (--l > 0)
1443
+ {
1444
+ if (*++s == '\0')
1445
+ return -1;
1446
+ }
1447
+
1448
+ return mbl;
1449
+ }
1450
+
1451
+ static int
1452
+ pg_gb18030_verifier(const unsigned char *s, int len)
1453
+ {
1454
+ int l;
1455
+
1456
+ if (!IS_HIGHBIT_SET(*s))
1457
+ l = 1; /* ASCII */
1458
+ else if (len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1459
+ {
1460
+ /* Should be 4-byte, validate remaining bytes */
1461
+ if (*s >= 0x81 && *s <= 0xfe &&
1462
+ *(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
1463
+ *(s + 3) >= 0x30 && *(s + 3) <= 0x39)
1464
+ l = 4;
1465
+ else
1466
+ l = -1;
1467
+ }
1468
+ else if (len >= 2 && *s >= 0x81 && *s <= 0xfe)
1469
+ {
1470
+ /* Should be 2-byte, validate */
1471
+ if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
1472
+ (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
1473
+ l = 2;
1474
+ else
1475
+ l = -1;
1476
+ }
1477
+ else
1478
+ l = -1;
1479
+ return l;
1480
+ }
1481
+
1482
+ static int
1483
+ pg_utf8_verifier(const unsigned char *s, int len)
1484
+ {
1485
+ int l = pg_utf_mblen(s);
1486
+
1487
+ if (len < l)
1488
+ return -1;
1489
+
1490
+ if (!pg_utf8_islegal(s, l))
1491
+ return -1;
1492
+
1493
+ return l;
1494
+ }
1495
+
1496
+ /*
1497
+ * Check for validity of a single UTF-8 encoded character
1498
+ *
1499
+ * This directly implements the rules in RFC3629. The bizarre-looking
1500
+ * restrictions on the second byte are meant to ensure that there isn't
1501
+ * more than one encoding of a given Unicode character point; that is,
1502
+ * you may not use a longer-than-necessary byte sequence with high order
1503
+ * zero bits to represent a character that would fit in fewer bytes.
1504
+ * To do otherwise is to create security hazards (eg, create an apparent
1505
+ * non-ASCII character that decodes to plain ASCII).
1506
+ *
1507
+ * length is assumed to have been obtained by pg_utf_mblen(), and the
1508
+ * caller must have checked that that many bytes are present in the buffer.
1509
+ */
1510
+ bool
1511
+ pg_utf8_islegal(const unsigned char *source, int length)
1512
+ {
1513
+ unsigned char a;
1514
+
1515
+ switch (length)
1516
+ {
1517
+ default:
1518
+ /* reject lengths 5 and 6 for now */
1519
+ return false;
1520
+ case 4:
1521
+ a = source[3];
1522
+ if (a < 0x80 || a > 0xBF)
1523
+ return false;
1524
+ /* FALL THRU */
1525
+ case 3:
1526
+ a = source[2];
1527
+ if (a < 0x80 || a > 0xBF)
1528
+ return false;
1529
+ /* FALL THRU */
1530
+ case 2:
1531
+ a = source[1];
1532
+ switch (*source)
1533
+ {
1534
+ case 0xE0:
1535
+ if (a < 0xA0 || a > 0xBF)
1536
+ return false;
1537
+ break;
1538
+ case 0xED:
1539
+ if (a < 0x80 || a > 0x9F)
1540
+ return false;
1541
+ break;
1542
+ case 0xF0:
1543
+ if (a < 0x90 || a > 0xBF)
1544
+ return false;
1545
+ break;
1546
+ case 0xF4:
1547
+ if (a < 0x80 || a > 0x8F)
1548
+ return false;
1549
+ break;
1550
+ default:
1551
+ if (a < 0x80 || a > 0xBF)
1552
+ return false;
1553
+ break;
1554
+ }
1555
+ /* FALL THRU */
1556
+ case 1:
1557
+ a = *source;
1558
+ if (a >= 0x80 && a < 0xC2)
1559
+ return false;
1560
+ if (a > 0xF4)
1561
+ return false;
1562
+ break;
1563
+ }
1564
+ return true;
1565
+ }
1566
+
1567
+
1568
+ /*
1569
+ *-------------------------------------------------------------------
1570
+ * encoding info table
1571
+ * XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h)
1572
+ *-------------------------------------------------------------------
1573
+ */
1574
+ const pg_wchar_tbl pg_wchar_table[] = {
1575
+ {pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifier, 1}, /* PG_SQL_ASCII */
1576
+ {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JP */
1577
+ {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifier, 2}, /* PG_EUC_CN */
1578
+ {pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifier, 3}, /* PG_EUC_KR */
1579
+ {pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifier, 4}, /* PG_EUC_TW */
1580
+ {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JIS_2004 */
1581
+ {pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifier, 4}, /* PG_UTF8 */
1582
+ {pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifier, 4}, /* PG_MULE_INTERNAL */
1583
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN1 */
1584
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN2 */
1585
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN3 */
1586
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN4 */
1587
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN5 */
1588
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN6 */
1589
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN7 */
1590
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN8 */
1591
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN9 */
1592
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN10 */
1593
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1256 */
1594
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1258 */
1595
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN866 */
1596
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN874 */
1597
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8R */
1598
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1251 */
1599
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1252 */
1600
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-5 */
1601
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-6 */
1602
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-7 */
1603
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-8 */
1604
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1250 */
1605
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1253 */
1606
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1254 */
1607
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1255 */
1608
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1257 */
1609
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8U */
1610
+ {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}, /* PG_SJIS */
1611
+ {0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2}, /* PG_BIG5 */
1612
+ {0, 0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2}, /* PG_GBK */
1613
+ {0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2}, /* PG_UHC */
1614
+ {0, 0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, 4}, /* PG_GB18030 */
1615
+ {0, 0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifier, 3}, /* PG_JOHAB */
1616
+ {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2} /* PG_SHIFT_JIS_2004 */
1617
+ };
1618
+
1619
+ /*
1620
+ * Returns the byte length of a multibyte character.
1621
+ */
1622
+ int
1623
+ pg_encoding_mblen(int encoding, const char *mbstr)
1624
+ {
1625
+ return (PG_VALID_ENCODING(encoding) ?
1626
+ pg_wchar_table[encoding].mblen((const unsigned char *) mbstr) :
1627
+ pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr));
1628
+ }
1629
+
1630
+ /*
1631
+ * Returns the display length of a multibyte character.
1632
+ */
1633
+
1634
+
1635
+ /*
1636
+ * Verify the first multibyte character of the given string.
1637
+ * Return its byte length if good, -1 if bad. (See comments above for
1638
+ * full details of the mbverify API.)
1639
+ */
1640
+
1641
+
1642
+ /*
1643
+ * fetch maximum length of a given encoding
1644
+ */
1645
+ int
1646
+ pg_encoding_max_length(int encoding)
1647
+ {
1648
+ Assert(PG_VALID_ENCODING(encoding));
1649
+
1650
+ return pg_wchar_table[encoding].maxmblen;
1651
+ }