rugged 0.28.3.1 → 0.28.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (350) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rugged/version.rb +1 -1
  3. data/vendor/libgit2/AUTHORS +1 -0
  4. data/vendor/libgit2/CMakeLists.txt +36 -16
  5. data/vendor/libgit2/COPYING +28 -0
  6. data/vendor/libgit2/cmake/Modules/EnableWarnings.cmake +5 -1
  7. data/vendor/libgit2/cmake/Modules/FindCoreFoundation.cmake +2 -2
  8. data/vendor/libgit2/cmake/Modules/FindGSSAPI.cmake +1 -1
  9. data/vendor/libgit2/cmake/Modules/FindGSSFramework.cmake +28 -0
  10. data/vendor/libgit2/cmake/Modules/FindPCRE.cmake +38 -0
  11. data/vendor/libgit2/cmake/Modules/FindPCRE2.cmake +37 -0
  12. data/vendor/libgit2/cmake/Modules/FindSecurity.cmake +2 -2
  13. data/vendor/libgit2/cmake/Modules/FindStatNsec.cmake +6 -0
  14. data/vendor/libgit2/cmake/Modules/PkgBuildConfig.cmake +110 -0
  15. data/vendor/libgit2/cmake/Modules/SelectGSSAPI.cmake +53 -0
  16. data/vendor/libgit2/cmake/Modules/SelectHTTPSBackend.cmake +124 -0
  17. data/vendor/libgit2/cmake/Modules/SelectHashes.cmake +66 -0
  18. data/vendor/libgit2/deps/http-parser/http_parser.c +11 -6
  19. data/vendor/libgit2/deps/ntlmclient/CMakeLists.txt +21 -0
  20. data/vendor/libgit2/deps/ntlmclient/compat.h +33 -0
  21. data/vendor/libgit2/deps/ntlmclient/crypt.h +64 -0
  22. data/vendor/libgit2/deps/ntlmclient/crypt_commoncrypto.c +120 -0
  23. data/vendor/libgit2/deps/ntlmclient/crypt_commoncrypto.h +18 -0
  24. data/vendor/libgit2/deps/ntlmclient/crypt_mbedtls.c +145 -0
  25. data/vendor/libgit2/deps/ntlmclient/crypt_mbedtls.h +18 -0
  26. data/vendor/libgit2/deps/ntlmclient/crypt_openssl.c +130 -0
  27. data/vendor/libgit2/deps/ntlmclient/crypt_openssl.h +21 -0
  28. data/vendor/libgit2/deps/ntlmclient/ntlm.c +1420 -0
  29. data/vendor/libgit2/deps/ntlmclient/ntlm.h +174 -0
  30. data/vendor/libgit2/deps/ntlmclient/ntlmclient.h +320 -0
  31. data/vendor/libgit2/deps/ntlmclient/unicode.h +36 -0
  32. data/vendor/libgit2/deps/ntlmclient/unicode_builtin.c +445 -0
  33. data/vendor/libgit2/deps/ntlmclient/unicode_iconv.c +201 -0
  34. data/vendor/libgit2/deps/ntlmclient/utf8.h +1257 -0
  35. data/vendor/libgit2/deps/ntlmclient/util.c +21 -0
  36. data/vendor/libgit2/deps/ntlmclient/util.h +14 -0
  37. data/vendor/libgit2/deps/pcre/CMakeLists.txt +140 -0
  38. data/vendor/libgit2/deps/pcre/COPYING +5 -0
  39. data/vendor/libgit2/deps/pcre/cmake/COPYING-CMAKE-SCRIPTS +22 -0
  40. data/vendor/libgit2/deps/pcre/cmake/FindEditline.cmake +17 -0
  41. data/vendor/libgit2/deps/pcre/cmake/FindPackageHandleStandardArgs.cmake +58 -0
  42. data/vendor/libgit2/deps/pcre/cmake/FindReadline.cmake +29 -0
  43. data/vendor/libgit2/deps/pcre/config.h.in +57 -0
  44. data/vendor/libgit2/deps/pcre/pcre.h +641 -0
  45. data/vendor/libgit2/deps/pcre/pcre_byte_order.c +319 -0
  46. data/vendor/libgit2/deps/pcre/pcre_chartables.c +198 -0
  47. data/vendor/libgit2/deps/pcre/pcre_compile.c +9800 -0
  48. data/vendor/libgit2/deps/pcre/pcre_config.c +190 -0
  49. data/vendor/libgit2/deps/pcre/pcre_dfa_exec.c +3676 -0
  50. data/vendor/libgit2/deps/pcre/pcre_exec.c +7173 -0
  51. data/vendor/libgit2/deps/pcre/pcre_fullinfo.c +245 -0
  52. data/vendor/libgit2/deps/pcre/pcre_get.c +669 -0
  53. data/vendor/libgit2/deps/pcre/pcre_globals.c +86 -0
  54. data/vendor/libgit2/deps/pcre/pcre_internal.h +2787 -0
  55. data/vendor/libgit2/deps/pcre/pcre_jit_compile.c +11913 -0
  56. data/vendor/libgit2/deps/pcre/pcre_maketables.c +156 -0
  57. data/vendor/libgit2/deps/pcre/pcre_newline.c +210 -0
  58. data/vendor/libgit2/deps/pcre/pcre_ord2utf8.c +94 -0
  59. data/vendor/libgit2/deps/pcre/pcre_printint.c +834 -0
  60. data/vendor/libgit2/deps/pcre/pcre_refcount.c +92 -0
  61. data/vendor/libgit2/deps/pcre/pcre_string_utils.c +211 -0
  62. data/vendor/libgit2/deps/pcre/pcre_study.c +1686 -0
  63. data/vendor/libgit2/deps/pcre/pcre_tables.c +727 -0
  64. data/vendor/libgit2/deps/pcre/pcre_ucd.c +3644 -0
  65. data/vendor/libgit2/deps/pcre/pcre_valid_utf8.c +301 -0
  66. data/vendor/libgit2/deps/pcre/pcre_version.c +98 -0
  67. data/vendor/libgit2/deps/pcre/pcre_xclass.c +268 -0
  68. data/vendor/libgit2/deps/pcre/pcreposix.c +421 -0
  69. data/vendor/libgit2/deps/pcre/pcreposix.h +117 -0
  70. data/vendor/libgit2/deps/pcre/ucp.h +224 -0
  71. data/vendor/libgit2/deps/zlib/adler32.c +0 -7
  72. data/vendor/libgit2/deps/zlib/crc32.c +0 -7
  73. data/vendor/libgit2/include/git2.h +2 -0
  74. data/vendor/libgit2/include/git2/apply.h +22 -2
  75. data/vendor/libgit2/include/git2/attr.h +19 -12
  76. data/vendor/libgit2/include/git2/blame.h +2 -2
  77. data/vendor/libgit2/include/git2/blob.h +44 -12
  78. data/vendor/libgit2/include/git2/buffer.h +20 -14
  79. data/vendor/libgit2/include/git2/cert.h +135 -0
  80. data/vendor/libgit2/include/git2/checkout.h +46 -14
  81. data/vendor/libgit2/include/git2/cherrypick.h +3 -3
  82. data/vendor/libgit2/include/git2/clone.h +2 -2
  83. data/vendor/libgit2/include/git2/commit.h +23 -1
  84. data/vendor/libgit2/include/git2/common.h +7 -5
  85. data/vendor/libgit2/include/git2/config.h +12 -12
  86. data/vendor/libgit2/include/git2/cred.h +308 -0
  87. data/vendor/libgit2/include/git2/deprecated.h +243 -3
  88. data/vendor/libgit2/include/git2/describe.h +4 -4
  89. data/vendor/libgit2/include/git2/diff.h +16 -14
  90. data/vendor/libgit2/include/git2/filter.h +8 -0
  91. data/vendor/libgit2/include/git2/index.h +2 -1
  92. data/vendor/libgit2/include/git2/indexer.h +48 -4
  93. data/vendor/libgit2/include/git2/merge.h +6 -10
  94. data/vendor/libgit2/include/git2/net.h +0 -5
  95. data/vendor/libgit2/include/git2/object.h +2 -14
  96. data/vendor/libgit2/include/git2/odb.h +3 -2
  97. data/vendor/libgit2/include/git2/odb_backend.h +5 -4
  98. data/vendor/libgit2/include/git2/oid.h +1 -1
  99. data/vendor/libgit2/include/git2/pack.h +12 -1
  100. data/vendor/libgit2/include/git2/proxy.h +5 -3
  101. data/vendor/libgit2/include/git2/rebase.h +46 -2
  102. data/vendor/libgit2/include/git2/refs.h +19 -0
  103. data/vendor/libgit2/include/git2/remote.h +35 -12
  104. data/vendor/libgit2/include/git2/repository.h +24 -2
  105. data/vendor/libgit2/include/git2/revert.h +1 -1
  106. data/vendor/libgit2/include/git2/stash.h +3 -3
  107. data/vendor/libgit2/include/git2/status.h +25 -16
  108. data/vendor/libgit2/include/git2/submodule.h +20 -3
  109. data/vendor/libgit2/include/git2/sys/alloc.h +9 -9
  110. data/vendor/libgit2/include/git2/sys/cred.h +90 -0
  111. data/vendor/libgit2/include/git2/sys/odb_backend.h +48 -4
  112. data/vendor/libgit2/include/git2/sys/refdb_backend.h +57 -21
  113. data/vendor/libgit2/include/git2/sys/repository.h +5 -1
  114. data/vendor/libgit2/include/git2/sys/transport.h +2 -2
  115. data/vendor/libgit2/include/git2/tag.h +11 -2
  116. data/vendor/libgit2/include/git2/trace.h +2 -2
  117. data/vendor/libgit2/include/git2/transport.h +11 -340
  118. data/vendor/libgit2/include/git2/tree.h +1 -1
  119. data/vendor/libgit2/include/git2/types.h +4 -89
  120. data/vendor/libgit2/include/git2/version.h +2 -2
  121. data/vendor/libgit2/include/git2/worktree.h +5 -5
  122. data/vendor/libgit2/src/CMakeLists.txt +88 -222
  123. data/vendor/libgit2/src/alloc.c +2 -14
  124. data/vendor/libgit2/src/{stdalloc.c → allocators/stdalloc.c} +3 -4
  125. data/vendor/libgit2/src/{stdalloc.h → allocators/stdalloc.h} +4 -4
  126. data/vendor/libgit2/src/allocators/win32_crtdbg.c +118 -0
  127. data/vendor/libgit2/src/{transports/cred.h → allocators/win32_crtdbg.h} +5 -4
  128. data/vendor/libgit2/src/apply.c +60 -30
  129. data/vendor/libgit2/src/attr.c +70 -64
  130. data/vendor/libgit2/src/attr_file.c +189 -96
  131. data/vendor/libgit2/src/attr_file.h +9 -9
  132. data/vendor/libgit2/src/attrcache.c +44 -46
  133. data/vendor/libgit2/src/attrcache.h +2 -1
  134. data/vendor/libgit2/src/blame.c +17 -5
  135. data/vendor/libgit2/src/blame.h +1 -1
  136. data/vendor/libgit2/src/blame_git.c +21 -7
  137. data/vendor/libgit2/src/blob.c +81 -17
  138. data/vendor/libgit2/src/blob.h +2 -2
  139. data/vendor/libgit2/src/branch.c +29 -5
  140. data/vendor/libgit2/src/buffer.c +14 -7
  141. data/vendor/libgit2/src/cache.c +26 -33
  142. data/vendor/libgit2/src/cache.h +1 -1
  143. data/vendor/libgit2/src/cc-compat.h +5 -0
  144. data/vendor/libgit2/src/checkout.c +26 -16
  145. data/vendor/libgit2/src/cherrypick.c +9 -3
  146. data/vendor/libgit2/src/clone.c +29 -7
  147. data/vendor/libgit2/src/clone.h +4 -0
  148. data/vendor/libgit2/src/commit.c +69 -21
  149. data/vendor/libgit2/src/commit.h +6 -0
  150. data/vendor/libgit2/src/commit_list.c +28 -76
  151. data/vendor/libgit2/src/commit_list.h +2 -2
  152. data/vendor/libgit2/src/common.h +3 -75
  153. data/vendor/libgit2/src/config.c +31 -40
  154. data/vendor/libgit2/src/config.h +7 -6
  155. data/vendor/libgit2/src/config_backend.h +12 -0
  156. data/vendor/libgit2/src/config_cache.c +39 -39
  157. data/vendor/libgit2/src/config_entries.c +69 -99
  158. data/vendor/libgit2/src/config_entries.h +1 -0
  159. data/vendor/libgit2/src/config_file.c +337 -380
  160. data/vendor/libgit2/src/config_mem.c +12 -16
  161. data/vendor/libgit2/src/config_parse.c +49 -29
  162. data/vendor/libgit2/src/config_parse.h +13 -12
  163. data/vendor/libgit2/src/config_snapshot.c +206 -0
  164. data/vendor/libgit2/src/crlf.c +14 -14
  165. data/vendor/libgit2/src/describe.c +21 -20
  166. data/vendor/libgit2/src/diff.c +43 -58
  167. data/vendor/libgit2/src/diff.h +2 -1
  168. data/vendor/libgit2/src/diff_driver.c +37 -38
  169. data/vendor/libgit2/src/diff_file.c +9 -7
  170. data/vendor/libgit2/src/diff_file.h +1 -1
  171. data/vendor/libgit2/src/diff_generate.c +135 -85
  172. data/vendor/libgit2/src/diff_generate.h +2 -2
  173. data/vendor/libgit2/src/diff_parse.c +1 -1
  174. data/vendor/libgit2/src/diff_print.c +25 -13
  175. data/vendor/libgit2/src/diff_stats.c +1 -1
  176. data/vendor/libgit2/src/diff_tform.c +4 -4
  177. data/vendor/libgit2/src/errors.c +12 -22
  178. data/vendor/libgit2/src/errors.h +81 -0
  179. data/vendor/libgit2/src/features.h.in +9 -2
  180. data/vendor/libgit2/src/fetch.c +7 -2
  181. data/vendor/libgit2/src/fetchhead.c +1 -1
  182. data/vendor/libgit2/src/filebuf.c +6 -10
  183. data/vendor/libgit2/src/filebuf.h +2 -2
  184. data/vendor/libgit2/src/filter.c +16 -8
  185. data/vendor/libgit2/src/{fileops.c → futils.c} +21 -17
  186. data/vendor/libgit2/src/{fileops.h → futils.h} +5 -5
  187. data/vendor/libgit2/src/global.c +12 -40
  188. data/vendor/libgit2/src/global.h +0 -2
  189. data/vendor/libgit2/src/hash.c +61 -0
  190. data/vendor/libgit2/src/hash.h +19 -21
  191. data/vendor/libgit2/src/hash/sha1.h +38 -0
  192. data/vendor/libgit2/src/hash/{hash_collisiondetect.h → sha1/collisiondetect.c} +14 -17
  193. data/vendor/libgit2/src/hash/sha1/collisiondetect.h +19 -0
  194. data/vendor/libgit2/src/hash/{hash_common_crypto.h → sha1/common_crypto.c} +15 -19
  195. data/vendor/libgit2/src/hash/sha1/common_crypto.h +19 -0
  196. data/vendor/libgit2/src/hash/{hash_generic.c → sha1/generic.c} +22 -10
  197. data/vendor/libgit2/src/hash/{hash_generic.h → sha1/generic.h} +4 -14
  198. data/vendor/libgit2/src/hash/{hash_mbedtls.c → sha1/mbedtls.c} +15 -7
  199. data/vendor/libgit2/src/hash/{hash_mbedtls.h → sha1/mbedtls.h} +6 -11
  200. data/vendor/libgit2/src/hash/{hash_openssl.h → sha1/openssl.c} +14 -18
  201. data/vendor/libgit2/src/hash/sha1/openssl.h +19 -0
  202. data/vendor/libgit2/src/hash/{sha1dc → sha1/sha1dc}/sha1.c +14 -3
  203. data/vendor/libgit2/src/hash/{sha1dc → sha1/sha1dc}/sha1.h +0 -0
  204. data/vendor/libgit2/src/hash/{sha1dc → sha1/sha1dc}/ubc_check.c +0 -0
  205. data/vendor/libgit2/src/hash/{sha1dc → sha1/sha1dc}/ubc_check.h +0 -0
  206. data/vendor/libgit2/src/hash/{hash_win32.c → sha1/win32.c} +34 -24
  207. data/vendor/libgit2/src/hash/{hash_win32.h → sha1/win32.h} +6 -19
  208. data/vendor/libgit2/src/hashsig.c +1 -1
  209. data/vendor/libgit2/src/idxmap.c +91 -65
  210. data/vendor/libgit2/src/idxmap.h +151 -15
  211. data/vendor/libgit2/src/ignore.c +32 -38
  212. data/vendor/libgit2/src/index.c +66 -43
  213. data/vendor/libgit2/src/index.h +1 -1
  214. data/vendor/libgit2/src/indexer.c +69 -70
  215. data/vendor/libgit2/src/integer.h +39 -4
  216. data/vendor/libgit2/src/iterator.c +27 -22
  217. data/vendor/libgit2/src/map.h +1 -1
  218. data/vendor/libgit2/src/merge.c +58 -44
  219. data/vendor/libgit2/src/merge_driver.c +4 -4
  220. data/vendor/libgit2/src/merge_file.c +1 -1
  221. data/vendor/libgit2/src/mwindow.c +18 -23
  222. data/vendor/libgit2/src/mwindow.h +4 -4
  223. data/vendor/libgit2/src/net.c +184 -0
  224. data/vendor/libgit2/src/net.h +36 -0
  225. data/vendor/libgit2/src/netops.c +55 -165
  226. data/vendor/libgit2/src/netops.h +3 -25
  227. data/vendor/libgit2/src/notes.c +2 -2
  228. data/vendor/libgit2/src/object.c +2 -2
  229. data/vendor/libgit2/src/object.h +2 -0
  230. data/vendor/libgit2/src/odb.c +41 -23
  231. data/vendor/libgit2/src/odb.h +3 -2
  232. data/vendor/libgit2/src/odb_loose.c +17 -10
  233. data/vendor/libgit2/src/odb_mempack.c +10 -23
  234. data/vendor/libgit2/src/odb_pack.c +4 -4
  235. data/vendor/libgit2/src/offmap.c +43 -55
  236. data/vendor/libgit2/src/offmap.h +102 -24
  237. data/vendor/libgit2/src/oid.c +6 -1
  238. data/vendor/libgit2/src/oidmap.c +39 -57
  239. data/vendor/libgit2/src/oidmap.h +99 -19
  240. data/vendor/libgit2/src/pack-objects.c +25 -32
  241. data/vendor/libgit2/src/pack-objects.h +1 -1
  242. data/vendor/libgit2/src/pack.c +45 -47
  243. data/vendor/libgit2/src/pack.h +12 -14
  244. data/vendor/libgit2/src/parse.c +10 -0
  245. data/vendor/libgit2/src/parse.h +3 -3
  246. data/vendor/libgit2/src/patch.c +1 -1
  247. data/vendor/libgit2/src/patch_generate.c +2 -2
  248. data/vendor/libgit2/src/patch_parse.c +124 -31
  249. data/vendor/libgit2/src/path.c +95 -27
  250. data/vendor/libgit2/src/path.h +2 -0
  251. data/vendor/libgit2/src/pathspec.c +13 -13
  252. data/vendor/libgit2/src/pool.c +26 -22
  253. data/vendor/libgit2/src/pool.h +7 -7
  254. data/vendor/libgit2/src/posix.c +7 -7
  255. data/vendor/libgit2/src/posix.h +12 -1
  256. data/vendor/libgit2/src/proxy.c +7 -2
  257. data/vendor/libgit2/src/push.c +10 -5
  258. data/vendor/libgit2/src/reader.c +2 -2
  259. data/vendor/libgit2/src/rebase.c +66 -7
  260. data/vendor/libgit2/src/refdb.c +12 -0
  261. data/vendor/libgit2/src/refdb_fs.c +214 -165
  262. data/vendor/libgit2/src/reflog.c +11 -13
  263. data/vendor/libgit2/src/refs.c +24 -18
  264. data/vendor/libgit2/src/refspec.c +9 -16
  265. data/vendor/libgit2/src/regexp.c +221 -0
  266. data/vendor/libgit2/src/regexp.h +97 -0
  267. data/vendor/libgit2/src/remote.c +50 -52
  268. data/vendor/libgit2/src/remote.h +2 -2
  269. data/vendor/libgit2/src/repository.c +115 -100
  270. data/vendor/libgit2/src/repository.h +49 -40
  271. data/vendor/libgit2/src/revert.c +8 -3
  272. data/vendor/libgit2/src/revparse.c +18 -19
  273. data/vendor/libgit2/src/revwalk.c +63 -30
  274. data/vendor/libgit2/src/revwalk.h +20 -0
  275. data/vendor/libgit2/src/settings.c +5 -0
  276. data/vendor/libgit2/src/sortedcache.c +12 -26
  277. data/vendor/libgit2/src/sortedcache.h +1 -1
  278. data/vendor/libgit2/src/stash.c +45 -65
  279. data/vendor/libgit2/src/status.c +15 -9
  280. data/vendor/libgit2/src/streams/openssl.c +20 -0
  281. data/vendor/libgit2/src/streams/socket.c +2 -2
  282. data/vendor/libgit2/src/strmap.c +37 -84
  283. data/vendor/libgit2/src/strmap.h +105 -33
  284. data/vendor/libgit2/src/submodule.c +102 -70
  285. data/vendor/libgit2/src/submodule.h +1 -1
  286. data/vendor/libgit2/src/sysdir.c +11 -1
  287. data/vendor/libgit2/src/tag.c +10 -2
  288. data/vendor/libgit2/src/trace.c +1 -1
  289. data/vendor/libgit2/src/trace.h +2 -2
  290. data/vendor/libgit2/src/trailer.c +46 -32
  291. data/vendor/libgit2/src/transaction.c +10 -9
  292. data/vendor/libgit2/src/transports/auth.c +10 -9
  293. data/vendor/libgit2/src/transports/auth.h +11 -4
  294. data/vendor/libgit2/src/transports/auth_negotiate.c +23 -9
  295. data/vendor/libgit2/src/transports/auth_negotiate.h +2 -2
  296. data/vendor/libgit2/src/transports/auth_ntlm.c +223 -0
  297. data/vendor/libgit2/src/transports/auth_ntlm.h +35 -0
  298. data/vendor/libgit2/src/transports/cred.c +6 -6
  299. data/vendor/libgit2/src/transports/git.c +11 -16
  300. data/vendor/libgit2/src/transports/http.c +419 -276
  301. data/vendor/libgit2/src/transports/http.h +1 -1
  302. data/vendor/libgit2/src/transports/local.c +9 -9
  303. data/vendor/libgit2/src/transports/smart.c +17 -17
  304. data/vendor/libgit2/src/transports/smart.h +2 -2
  305. data/vendor/libgit2/src/transports/smart_protocol.c +36 -60
  306. data/vendor/libgit2/src/transports/ssh.c +46 -36
  307. data/vendor/libgit2/src/transports/winhttp.c +231 -207
  308. data/vendor/libgit2/src/tree-cache.c +14 -7
  309. data/vendor/libgit2/src/tree.c +10 -24
  310. data/vendor/libgit2/src/unix/map.c +1 -1
  311. data/vendor/libgit2/src/unix/posix.h +1 -11
  312. data/vendor/libgit2/src/userdiff.h +3 -1
  313. data/vendor/libgit2/src/util.c +51 -53
  314. data/vendor/libgit2/src/util.h +16 -21
  315. data/vendor/libgit2/src/wildmatch.c +320 -0
  316. data/vendor/libgit2/src/wildmatch.h +23 -0
  317. data/vendor/libgit2/src/win32/map.c +3 -5
  318. data/vendor/libgit2/src/win32/path_w32.c +12 -2
  319. data/vendor/libgit2/src/win32/path_w32.h +0 -29
  320. data/vendor/libgit2/src/win32/posix.h +1 -4
  321. data/vendor/libgit2/src/win32/posix_w32.c +40 -5
  322. data/vendor/libgit2/src/win32/precompiled.h +0 -2
  323. data/vendor/libgit2/src/win32/thread.c +5 -10
  324. data/vendor/libgit2/src/win32/w32_buffer.c +7 -3
  325. data/vendor/libgit2/src/win32/w32_common.h +39 -0
  326. data/vendor/libgit2/src/win32/w32_crtdbg_stacktrace.c +0 -93
  327. data/vendor/libgit2/src/win32/w32_crtdbg_stacktrace.h +0 -2
  328. data/vendor/libgit2/src/win32/w32_stack.c +4 -9
  329. data/vendor/libgit2/src/win32/w32_stack.h +3 -3
  330. data/vendor/libgit2/src/win32/w32_util.c +31 -0
  331. data/vendor/libgit2/src/win32/w32_util.h +6 -32
  332. data/vendor/libgit2/src/worktree.c +36 -22
  333. data/vendor/libgit2/src/xdiff/xdiffi.c +1 -1
  334. data/vendor/libgit2/src/xdiff/xmerge.c +12 -0
  335. data/vendor/libgit2/src/xdiff/xpatience.c +3 -0
  336. metadata +98 -34
  337. data/vendor/libgit2/deps/regex/CMakeLists.txt +0 -2
  338. data/vendor/libgit2/deps/regex/COPYING +0 -502
  339. data/vendor/libgit2/deps/regex/config.h +0 -7
  340. data/vendor/libgit2/deps/regex/regcomp.c +0 -3857
  341. data/vendor/libgit2/deps/regex/regex.c +0 -92
  342. data/vendor/libgit2/deps/regex/regex.h +0 -582
  343. data/vendor/libgit2/deps/regex/regex_internal.c +0 -1744
  344. data/vendor/libgit2/deps/regex/regex_internal.h +0 -819
  345. data/vendor/libgit2/deps/regex/regexec.c +0 -4369
  346. data/vendor/libgit2/include/git2/inttypes.h +0 -309
  347. data/vendor/libgit2/include/git2/sys/time.h +0 -31
  348. data/vendor/libgit2/libgit2.pc.in +0 -13
  349. data/vendor/libgit2/src/fnmatch.c +0 -248
  350. data/vendor/libgit2/src/fnmatch.h +0 -48
@@ -1,7 +0,0 @@
1
- #ifndef _REGEX_CONFIG_H_
2
- #define _REGEX_CONFIG_H_
3
-
4
- # define GAWK
5
- # define NO_MBSUPPORT
6
-
7
- #endif
@@ -1,3857 +0,0 @@
1
- /* Extended regular expression matching and search library.
2
- Copyright (C) 2002-2007,2009,2010 Free Software Foundation, Inc.
3
- This file is part of the GNU C Library.
4
- Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
-
6
- The GNU C Library is free software; you can redistribute it and/or
7
- modify it under the terms of the GNU Lesser General Public
8
- License as published by the Free Software Foundation; either
9
- version 2.1 of the License, or (at your option) any later version.
10
-
11
- The GNU C Library is distributed in the hope that it will be useful,
12
- but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
- Lesser General Public License for more details.
15
-
16
- You should have received a copy of the GNU Lesser General Public
17
- License along with the GNU C Library; if not, write to the Free
18
- Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
- 02110-1301 USA. */
20
-
21
- static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
22
- size_t length, reg_syntax_t syntax);
23
- static void re_compile_fastmap_iter (regex_t *bufp,
24
- const re_dfastate_t *init_state,
25
- char *fastmap);
26
- static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
27
- #ifdef RE_ENABLE_I18N
28
- static void free_charset (re_charset_t *cset);
29
- #endif /* RE_ENABLE_I18N */
30
- static void free_workarea_compile (regex_t *preg);
31
- static reg_errcode_t create_initial_state (re_dfa_t *dfa);
32
- #ifdef RE_ENABLE_I18N
33
- static void optimize_utf8 (re_dfa_t *dfa);
34
- #endif
35
- static reg_errcode_t analyze (regex_t *preg);
36
- static reg_errcode_t preorder (bin_tree_t *root,
37
- reg_errcode_t (fn (void *, bin_tree_t *)),
38
- void *extra);
39
- static reg_errcode_t postorder (bin_tree_t *root,
40
- reg_errcode_t (fn (void *, bin_tree_t *)),
41
- void *extra);
42
- static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);
43
- static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node);
44
- static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,
45
- bin_tree_t *node);
46
- static reg_errcode_t calc_first (void *extra, bin_tree_t *node);
47
- static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
48
- static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
49
- static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint);
50
- static int search_duplicated_node (const re_dfa_t *dfa, int org_node,
51
- unsigned int constraint);
52
- static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
53
- static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
54
- int node, int root);
55
- static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
56
- static int fetch_number (re_string_t *input, re_token_t *token,
57
- reg_syntax_t syntax);
58
- static int peek_token (re_token_t *token, re_string_t *input,
59
- reg_syntax_t syntax) internal_function;
60
- static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
61
- reg_syntax_t syntax, reg_errcode_t *err);
62
- static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
63
- re_token_t *token, reg_syntax_t syntax,
64
- int nest, reg_errcode_t *err);
65
- static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
66
- re_token_t *token, reg_syntax_t syntax,
67
- int nest, reg_errcode_t *err);
68
- static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
69
- re_token_t *token, reg_syntax_t syntax,
70
- int nest, reg_errcode_t *err);
71
- static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
72
- re_token_t *token, reg_syntax_t syntax,
73
- int nest, reg_errcode_t *err);
74
- static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
75
- re_dfa_t *dfa, re_token_t *token,
76
- reg_syntax_t syntax, reg_errcode_t *err);
77
- static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
78
- re_token_t *token, reg_syntax_t syntax,
79
- reg_errcode_t *err);
80
- static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
81
- re_string_t *regexp,
82
- re_token_t *token, int token_len,
83
- re_dfa_t *dfa,
84
- reg_syntax_t syntax,
85
- int accept_hyphen);
86
- static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
87
- re_string_t *regexp,
88
- re_token_t *token);
89
- #ifdef RE_ENABLE_I18N
90
- static reg_errcode_t build_equiv_class (bitset_t sbcset,
91
- re_charset_t *mbcset,
92
- int *equiv_class_alloc,
93
- const unsigned char *name);
94
- static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
95
- bitset_t sbcset,
96
- re_charset_t *mbcset,
97
- int *char_class_alloc,
98
- const char *class_name,
99
- reg_syntax_t syntax);
100
- #else /* not RE_ENABLE_I18N */
101
- static reg_errcode_t build_equiv_class (bitset_t sbcset,
102
- const unsigned char *name);
103
- static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
104
- bitset_t sbcset,
105
- const char *class_name,
106
- reg_syntax_t syntax);
107
- #endif /* not RE_ENABLE_I18N */
108
- static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
109
- RE_TRANSLATE_TYPE trans,
110
- const char *class_name,
111
- const char *extra,
112
- int non_match, reg_errcode_t *err);
113
- static bin_tree_t *create_tree (re_dfa_t *dfa,
114
- bin_tree_t *left, bin_tree_t *right,
115
- re_token_type_t type);
116
- static bin_tree_t *create_token_tree (re_dfa_t *dfa,
117
- bin_tree_t *left, bin_tree_t *right,
118
- const re_token_t *token);
119
- static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
120
- static void free_token (re_token_t *node);
121
- static reg_errcode_t free_tree (void *extra, bin_tree_t *node);
122
- static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);
123
-
124
- /* This table gives an error message for each of the error codes listed
125
- in regex.h. Obviously the order here has to be same as there.
126
- POSIX doesn't require that we do anything for REG_NOERROR,
127
- but why not be nice? */
128
-
129
- const char __re_error_msgid[] attribute_hidden =
130
- {
131
- #define REG_NOERROR_IDX 0
132
- gettext_noop ("Success") /* REG_NOERROR */
133
- "\0"
134
- #define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
135
- gettext_noop ("No match") /* REG_NOMATCH */
136
- "\0"
137
- #define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
138
- gettext_noop ("Invalid regular expression") /* REG_BADPAT */
139
- "\0"
140
- #define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
141
- gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
142
- "\0"
143
- #define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
144
- gettext_noop ("Invalid character class name") /* REG_ECTYPE */
145
- "\0"
146
- #define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
147
- gettext_noop ("Trailing backslash") /* REG_EESCAPE */
148
- "\0"
149
- #define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
150
- gettext_noop ("Invalid back reference") /* REG_ESUBREG */
151
- "\0"
152
- #define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
153
- gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
154
- "\0"
155
- #define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
156
- gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
157
- "\0"
158
- #define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
159
- gettext_noop ("Unmatched \\{") /* REG_EBRACE */
160
- "\0"
161
- #define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
162
- gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
163
- "\0"
164
- #define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
165
- gettext_noop ("Invalid range end") /* REG_ERANGE */
166
- "\0"
167
- #define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
168
- gettext_noop ("Memory exhausted") /* REG_ESPACE */
169
- "\0"
170
- #define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
171
- gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
172
- "\0"
173
- #define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
174
- gettext_noop ("Premature end of regular expression") /* REG_EEND */
175
- "\0"
176
- #define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
177
- gettext_noop ("Regular expression too big") /* REG_ESIZE */
178
- "\0"
179
- #define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
180
- gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
181
- };
182
-
183
- const size_t __re_error_msgid_idx[] attribute_hidden =
184
- {
185
- REG_NOERROR_IDX,
186
- REG_NOMATCH_IDX,
187
- REG_BADPAT_IDX,
188
- REG_ECOLLATE_IDX,
189
- REG_ECTYPE_IDX,
190
- REG_EESCAPE_IDX,
191
- REG_ESUBREG_IDX,
192
- REG_EBRACK_IDX,
193
- REG_EPAREN_IDX,
194
- REG_EBRACE_IDX,
195
- REG_BADBR_IDX,
196
- REG_ERANGE_IDX,
197
- REG_ESPACE_IDX,
198
- REG_BADRPT_IDX,
199
- REG_EEND_IDX,
200
- REG_ESIZE_IDX,
201
- REG_ERPAREN_IDX
202
- };
203
-
204
- /* Entry points for GNU code. */
205
-
206
-
207
- #ifdef ZOS_USS
208
-
209
- /* For ZOS USS we must define btowc */
210
-
211
- wchar_t
212
- btowc (int c)
213
- {
214
- wchar_t wtmp[2];
215
- char tmp[2];
216
-
217
- tmp[0] = c;
218
- tmp[1] = 0;
219
-
220
- mbtowc (wtmp, tmp, 1);
221
- return wtmp[0];
222
- }
223
- #endif
224
-
225
- /* re_compile_pattern is the GNU regular expression compiler: it
226
- compiles PATTERN (of length LENGTH) and puts the result in BUFP.
227
- Returns 0 if the pattern was valid, otherwise an error string.
228
-
229
- Assumes the `allocated' (and perhaps `buffer') and `translate' fields
230
- are set in BUFP on entry. */
231
-
232
- const char *
233
- re_compile_pattern (const char *pattern,
234
- size_t length,
235
- struct re_pattern_buffer *bufp)
236
- {
237
- reg_errcode_t ret;
238
-
239
- /* And GNU code determines whether or not to get register information
240
- by passing null for the REGS argument to re_match, etc., not by
241
- setting no_sub, unless RE_NO_SUB is set. */
242
- bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
243
-
244
- /* Match anchors at newline. */
245
- bufp->newline_anchor = 1;
246
-
247
- ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
248
-
249
- if (!ret)
250
- return NULL;
251
- return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
252
- }
253
- #ifdef _LIBC
254
- weak_alias (__re_compile_pattern, re_compile_pattern)
255
- #endif
256
-
257
- /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
258
- also be assigned to arbitrarily: each pattern buffer stores its own
259
- syntax, so it can be changed between regex compilations. */
260
- /* This has no initializer because initialized variables in Emacs
261
- become read-only after dumping. */
262
- reg_syntax_t re_syntax_options;
263
-
264
-
265
- /* Specify the precise syntax of regexps for compilation. This provides
266
- for compatibility for various utilities which historically have
267
- different, incompatible syntaxes.
268
-
269
- The argument SYNTAX is a bit mask comprised of the various bits
270
- defined in regex.h. We return the old syntax. */
271
-
272
- reg_syntax_t
273
- re_set_syntax (reg_syntax_t syntax)
274
- {
275
- reg_syntax_t ret = re_syntax_options;
276
-
277
- re_syntax_options = syntax;
278
- return ret;
279
- }
280
- #ifdef _LIBC
281
- weak_alias (__re_set_syntax, re_set_syntax)
282
- #endif
283
-
284
- int
285
- re_compile_fastmap (struct re_pattern_buffer *bufp)
286
- {
287
- re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
288
- char *fastmap = bufp->fastmap;
289
-
290
- memset (fastmap, '\0', sizeof (char) * SBC_MAX);
291
- re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
292
- if (dfa->init_state != dfa->init_state_word)
293
- re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
294
- if (dfa->init_state != dfa->init_state_nl)
295
- re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
296
- if (dfa->init_state != dfa->init_state_begbuf)
297
- re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
298
- bufp->fastmap_accurate = 1;
299
- return 0;
300
- }
301
- #ifdef _LIBC
302
- weak_alias (__re_compile_fastmap, re_compile_fastmap)
303
- #endif
304
-
305
- static inline void
306
- __attribute ((always_inline))
307
- re_set_fastmap (char *fastmap, int icase, int ch)
308
- {
309
- fastmap[ch] = 1;
310
- if (icase)
311
- fastmap[tolower (ch)] = 1;
312
- }
313
-
314
- /* Helper function for re_compile_fastmap.
315
- Compile fastmap for the initial_state INIT_STATE. */
316
-
317
- static void
318
- re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
319
- char *fastmap)
320
- {
321
- volatile re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
322
- int node_cnt;
323
- int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
324
- for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
325
- {
326
- int node = init_state->nodes.elems[node_cnt];
327
- re_token_type_t type = dfa->nodes[node].type;
328
-
329
- if (type == CHARACTER)
330
- {
331
- re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
332
- #ifdef RE_ENABLE_I18N
333
- if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
334
- {
335
- unsigned char *buf = re_malloc (unsigned char, dfa->mb_cur_max), *p;
336
- wchar_t wc;
337
- mbstate_t state;
338
-
339
- p = buf;
340
- *p++ = dfa->nodes[node].opr.c;
341
- while (++node < dfa->nodes_len
342
- && dfa->nodes[node].type == CHARACTER
343
- && dfa->nodes[node].mb_partial)
344
- *p++ = dfa->nodes[node].opr.c;
345
- memset (&state, '\0', sizeof (state));
346
- if (__mbrtowc (&wc, (const char *) buf, p - buf,
347
- &state) == p - buf
348
- && (__wcrtomb ((char *) buf, towlower (wc), &state)
349
- != (size_t) -1))
350
- re_set_fastmap (fastmap, 0, buf[0]);
351
- re_free (buf);
352
- }
353
- #endif
354
- }
355
- else if (type == SIMPLE_BRACKET)
356
- {
357
- int i, ch;
358
- for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
359
- {
360
- int j;
361
- bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
362
- for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
363
- if (w & ((bitset_word_t) 1 << j))
364
- re_set_fastmap (fastmap, icase, ch);
365
- }
366
- }
367
- #ifdef RE_ENABLE_I18N
368
- else if (type == COMPLEX_BRACKET)
369
- {
370
- re_charset_t *cset = dfa->nodes[node].opr.mbcset;
371
- int i;
372
-
373
- # ifdef _LIBC
374
- /* See if we have to try all bytes which start multiple collation
375
- elements.
376
- e.g. In da_DK, we want to catch 'a' since "aa" is a valid
377
- collation element, and don't catch 'b' since 'b' is
378
- the only collation element which starts from 'b' (and
379
- it is caught by SIMPLE_BRACKET). */
380
- if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0
381
- && (cset->ncoll_syms || cset->nranges))
382
- {
383
- const int32_t *table = (const int32_t *)
384
- _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
385
- for (i = 0; i < SBC_MAX; ++i)
386
- if (table[i] < 0)
387
- re_set_fastmap (fastmap, icase, i);
388
- }
389
- # endif /* _LIBC */
390
-
391
- /* See if we have to start the match at all multibyte characters,
392
- i.e. where we would not find an invalid sequence. This only
393
- applies to multibyte character sets; for single byte character
394
- sets, the SIMPLE_BRACKET again suffices. */
395
- if (dfa->mb_cur_max > 1
396
- && (cset->nchar_classes || cset->non_match || cset->nranges
397
- # ifdef _LIBC
398
- || cset->nequiv_classes
399
- # endif /* _LIBC */
400
- ))
401
- {
402
- unsigned char c = 0;
403
- do
404
- {
405
- mbstate_t mbs;
406
- memset (&mbs, 0, sizeof (mbs));
407
- if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2)
408
- re_set_fastmap (fastmap, false, (int) c);
409
- }
410
- while (++c != 0);
411
- }
412
-
413
- else
414
- {
415
- /* ... Else catch all bytes which can start the mbchars. */
416
- for (i = 0; i < cset->nmbchars; ++i)
417
- {
418
- char buf[256];
419
- mbstate_t state;
420
- memset (&state, '\0', sizeof (state));
421
- if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
422
- re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
423
- if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
424
- {
425
- if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
426
- != (size_t) -1)
427
- re_set_fastmap (fastmap, false, *(unsigned char *) buf);
428
- }
429
- }
430
- }
431
- }
432
- #endif /* RE_ENABLE_I18N */
433
- else if (type == OP_PERIOD
434
- #ifdef RE_ENABLE_I18N
435
- || type == OP_UTF8_PERIOD
436
- #endif /* RE_ENABLE_I18N */
437
- || type == END_OF_RE)
438
- {
439
- memset (fastmap, '\1', sizeof (char) * SBC_MAX);
440
- if (type == END_OF_RE)
441
- bufp->can_be_null = 1;
442
- return;
443
- }
444
- }
445
- }
446
-
447
- /* Entry point for POSIX code. */
448
- /* regcomp takes a regular expression as a string and compiles it.
449
-
450
- PREG is a regex_t *. We do not expect any fields to be initialized,
451
- since POSIX says we shouldn't. Thus, we set
452
-
453
- `buffer' to the compiled pattern;
454
- `used' to the length of the compiled pattern;
455
- `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
456
- REG_EXTENDED bit in CFLAGS is set; otherwise, to
457
- RE_SYNTAX_POSIX_BASIC;
458
- `newline_anchor' to REG_NEWLINE being set in CFLAGS;
459
- `fastmap' to an allocated space for the fastmap;
460
- `fastmap_accurate' to zero;
461
- `re_nsub' to the number of subexpressions in PATTERN.
462
-
463
- PATTERN is the address of the pattern string.
464
-
465
- CFLAGS is a series of bits which affect compilation.
466
-
467
- If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
468
- use POSIX basic syntax.
469
-
470
- If REG_NEWLINE is set, then . and [^...] don't match newline.
471
- Also, regexec will try a match beginning after every newline.
472
-
473
- If REG_ICASE is set, then we considers upper- and lowercase
474
- versions of letters to be equivalent when matching.
475
-
476
- If REG_NOSUB is set, then when PREG is passed to regexec, that
477
- routine will report only success or failure, and nothing about the
478
- registers.
479
-
480
- It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
481
- the return codes and their meanings.) */
482
-
483
- int
484
- regcomp (regex_t *__restrict preg,
485
- const char *__restrict pattern,
486
- int cflags)
487
- {
488
- reg_errcode_t ret;
489
- reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
490
- : RE_SYNTAX_POSIX_BASIC);
491
-
492
- preg->buffer = NULL;
493
- preg->allocated = 0;
494
- preg->used = 0;
495
-
496
- /* Try to allocate space for the fastmap. */
497
- preg->fastmap = re_malloc (char, SBC_MAX);
498
- if (BE (preg->fastmap == NULL, 0))
499
- return REG_ESPACE;
500
-
501
- syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
502
-
503
- /* If REG_NEWLINE is set, newlines are treated differently. */
504
- if (cflags & REG_NEWLINE)
505
- { /* REG_NEWLINE implies neither . nor [^...] match newline. */
506
- syntax &= ~RE_DOT_NEWLINE;
507
- syntax |= RE_HAT_LISTS_NOT_NEWLINE;
508
- /* It also changes the matching behavior. */
509
- preg->newline_anchor = 1;
510
- }
511
- else
512
- preg->newline_anchor = 0;
513
- preg->no_sub = !!(cflags & REG_NOSUB);
514
- preg->translate = NULL;
515
-
516
- ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
517
-
518
- /* POSIX doesn't distinguish between an unmatched open-group and an
519
- unmatched close-group: both are REG_EPAREN. */
520
- if (ret == REG_ERPAREN)
521
- ret = REG_EPAREN;
522
-
523
- /* We have already checked preg->fastmap != NULL. */
524
- if (BE (ret == REG_NOERROR, 1))
525
- /* Compute the fastmap now, since regexec cannot modify the pattern
526
- buffer. This function never fails in this implementation. */
527
- (void) re_compile_fastmap (preg);
528
- else
529
- {
530
- /* Some error occurred while compiling the expression. */
531
- re_free (preg->fastmap);
532
- preg->fastmap = NULL;
533
- }
534
-
535
- return (int) ret;
536
- }
537
- #ifdef _LIBC
538
- weak_alias (__regcomp, regcomp)
539
- #endif
540
-
541
- /* Returns a message corresponding to an error code, ERRCODE, returned
542
- from either regcomp or regexec. We don't use PREG here. */
543
-
544
- size_t
545
- regerror(int errcode, UNUSED const regex_t *__restrict preg,
546
- char *__restrict errbuf, size_t errbuf_size)
547
- {
548
- const char *msg;
549
- size_t msg_size;
550
-
551
- if (BE (errcode < 0
552
- || errcode >= (int) (sizeof (__re_error_msgid_idx)
553
- / sizeof (__re_error_msgid_idx[0])), 0))
554
- /* Only error codes returned by the rest of the code should be passed
555
- to this routine. If we are given anything else, or if other regex
556
- code generates an invalid error code, then the program has a bug.
557
- Dump core so we can fix it. */
558
- abort ();
559
-
560
- msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
561
-
562
- msg_size = strlen (msg) + 1; /* Includes the null. */
563
-
564
- if (BE (errbuf_size != 0, 1))
565
- {
566
- if (BE (msg_size > errbuf_size, 0))
567
- {
568
- memcpy (errbuf, msg, errbuf_size - 1);
569
- errbuf[errbuf_size - 1] = 0;
570
- }
571
- else
572
- memcpy (errbuf, msg, msg_size);
573
- }
574
-
575
- return msg_size;
576
- }
577
- #ifdef _LIBC
578
- weak_alias (__regerror, regerror)
579
- #endif
580
-
581
-
582
- #ifdef RE_ENABLE_I18N
583
- /* This static array is used for the map to single-byte characters when
584
- UTF-8 is used. Otherwise we would allocate memory just to initialize
585
- it the same all the time. UTF-8 is the preferred encoding so this is
586
- a worthwhile optimization. */
587
- #if __GNUC__ >= 3
588
- static const bitset_t utf8_sb_map = {
589
- /* Set the first 128 bits. */
590
- [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
591
- };
592
- #else /* ! (__GNUC__ >= 3) */
593
- static bitset_t utf8_sb_map;
594
- #endif /* __GNUC__ >= 3 */
595
- #endif /* RE_ENABLE_I18N */
596
-
597
-
598
- static void
599
- free_dfa_content (re_dfa_t *dfa)
600
- {
601
- unsigned int i;
602
- int j;
603
-
604
- if (dfa->nodes)
605
- for (i = 0; i < dfa->nodes_len; ++i)
606
- free_token (dfa->nodes + i);
607
- re_free (dfa->nexts);
608
- for (i = 0; i < dfa->nodes_len; ++i)
609
- {
610
- if (dfa->eclosures != NULL)
611
- re_node_set_free (dfa->eclosures + i);
612
- if (dfa->inveclosures != NULL)
613
- re_node_set_free (dfa->inveclosures + i);
614
- if (dfa->edests != NULL)
615
- re_node_set_free (dfa->edests + i);
616
- }
617
- re_free (dfa->edests);
618
- re_free (dfa->eclosures);
619
- re_free (dfa->inveclosures);
620
- re_free (dfa->nodes);
621
-
622
- if (dfa->state_table)
623
- for (i = 0; i <= dfa->state_hash_mask; ++i)
624
- {
625
- struct re_state_table_entry *entry = dfa->state_table + i;
626
- for (j = 0; j < entry->num; ++j)
627
- {
628
- re_dfastate_t *state = entry->array[j];
629
- free_state (state);
630
- }
631
- re_free (entry->array);
632
- }
633
- re_free (dfa->state_table);
634
- #ifdef RE_ENABLE_I18N
635
- if (dfa->sb_char != utf8_sb_map)
636
- re_free (dfa->sb_char);
637
- #endif
638
- re_free (dfa->subexp_map);
639
- #ifdef DEBUG
640
- re_free (dfa->re_str);
641
- #endif
642
-
643
- re_free (dfa);
644
- }
645
-
646
-
647
- /* Free dynamically allocated space used by PREG. */
648
-
649
- void
650
- regfree (regex_t *preg)
651
- {
652
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
653
- if (BE (dfa != NULL, 1))
654
- free_dfa_content (dfa);
655
- preg->buffer = NULL;
656
- preg->allocated = 0;
657
-
658
- re_free (preg->fastmap);
659
- preg->fastmap = NULL;
660
-
661
- re_free (preg->translate);
662
- preg->translate = NULL;
663
- }
664
- #ifdef _LIBC
665
- weak_alias (__regfree, regfree)
666
- #endif
667
-
668
- /* Entry points compatible with 4.2 BSD regex library. We don't define
669
- them unless specifically requested. */
670
-
671
- #if defined _REGEX_RE_COMP || defined _LIBC
672
-
673
- /* BSD has one and only one pattern buffer. */
674
- static struct re_pattern_buffer re_comp_buf;
675
-
676
- char *
677
- # ifdef _LIBC
678
- /* Make these definitions weak in libc, so POSIX programs can redefine
679
- these names if they don't use our functions, and still use
680
- regcomp/regexec above without link errors. */
681
- weak_function
682
- # endif
683
- re_comp (s)
684
- const char *s;
685
- {
686
- reg_errcode_t ret;
687
- char *fastmap;
688
-
689
- if (!s)
690
- {
691
- if (!re_comp_buf.buffer)
692
- return gettext ("No previous regular expression");
693
- return 0;
694
- }
695
-
696
- if (re_comp_buf.buffer)
697
- {
698
- fastmap = re_comp_buf.fastmap;
699
- re_comp_buf.fastmap = NULL;
700
- __regfree (&re_comp_buf);
701
- memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
702
- re_comp_buf.fastmap = fastmap;
703
- }
704
-
705
- if (re_comp_buf.fastmap == NULL)
706
- {
707
- re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
708
- if (re_comp_buf.fastmap == NULL)
709
- return (char *) gettext (__re_error_msgid
710
- + __re_error_msgid_idx[(int) REG_ESPACE]);
711
- }
712
-
713
- /* Since `re_exec' always passes NULL for the `regs' argument, we
714
- don't need to initialize the pattern buffer fields which affect it. */
715
-
716
- /* Match anchors at newlines. */
717
- re_comp_buf.newline_anchor = 1;
718
-
719
- ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
720
-
721
- if (!ret)
722
- return NULL;
723
-
724
- /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
725
- return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
726
- }
727
-
728
- #ifdef _LIBC
729
- libc_freeres_fn (free_mem)
730
- {
731
- __regfree (&re_comp_buf);
732
- }
733
- #endif
734
-
735
- #endif /* _REGEX_RE_COMP */
736
-
737
- /* Internal entry point.
738
- Compile the regular expression PATTERN, whose length is LENGTH.
739
- SYNTAX indicate regular expression's syntax. */
740
-
741
- static reg_errcode_t
742
- re_compile_internal (regex_t *preg, const char * pattern, size_t length,
743
- reg_syntax_t syntax)
744
- {
745
- reg_errcode_t err = REG_NOERROR;
746
- re_dfa_t *dfa;
747
- re_string_t regexp;
748
-
749
- /* Initialize the pattern buffer. */
750
- preg->fastmap_accurate = 0;
751
- preg->syntax = syntax;
752
- preg->not_bol = preg->not_eol = 0;
753
- preg->used = 0;
754
- preg->re_nsub = 0;
755
- preg->can_be_null = 0;
756
- preg->regs_allocated = REGS_UNALLOCATED;
757
-
758
- /* Initialize the dfa. */
759
- dfa = (re_dfa_t *) preg->buffer;
760
- if (BE (preg->allocated < sizeof (re_dfa_t), 0))
761
- {
762
- /* If zero allocated, but buffer is non-null, try to realloc
763
- enough space. This loses if buffer's address is bogus, but
764
- that is the user's responsibility. If ->buffer is NULL this
765
- is a simple allocation. */
766
- dfa = re_realloc (preg->buffer, re_dfa_t, 1);
767
- if (dfa == NULL)
768
- return REG_ESPACE;
769
- preg->allocated = sizeof (re_dfa_t);
770
- preg->buffer = (unsigned char *) dfa;
771
- }
772
- preg->used = sizeof (re_dfa_t);
773
-
774
- err = init_dfa (dfa, length);
775
- if (BE (err != REG_NOERROR, 0))
776
- {
777
- free_dfa_content (dfa);
778
- preg->buffer = NULL;
779
- preg->allocated = 0;
780
- return err;
781
- }
782
- #ifdef DEBUG
783
- /* Note: length+1 will not overflow since it is checked in init_dfa. */
784
- dfa->re_str = re_malloc (char, length + 1);
785
- strncpy (dfa->re_str, pattern, length + 1);
786
- #endif
787
-
788
- __libc_lock_init (dfa->lock);
789
-
790
- err = re_string_construct (&regexp, pattern, length, preg->translate,
791
- syntax & RE_ICASE, dfa);
792
- if (BE (err != REG_NOERROR, 0))
793
- {
794
- re_compile_internal_free_return:
795
- free_workarea_compile (preg);
796
- re_string_destruct (&regexp);
797
- free_dfa_content (dfa);
798
- preg->buffer = NULL;
799
- preg->allocated = 0;
800
- return err;
801
- }
802
-
803
- /* Parse the regular expression, and build a structure tree. */
804
- preg->re_nsub = 0;
805
- dfa->str_tree = parse (&regexp, preg, syntax, &err);
806
- if (BE (dfa->str_tree == NULL, 0))
807
- goto re_compile_internal_free_return;
808
-
809
- /* Analyze the tree and create the nfa. */
810
- err = analyze (preg);
811
- if (BE (err != REG_NOERROR, 0))
812
- goto re_compile_internal_free_return;
813
-
814
- #ifdef RE_ENABLE_I18N
815
- /* If possible, do searching in single byte encoding to speed things up. */
816
- if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
817
- optimize_utf8 (dfa);
818
- #endif
819
-
820
- /* Then create the initial state of the dfa. */
821
- err = create_initial_state (dfa);
822
-
823
- /* Release work areas. */
824
- free_workarea_compile (preg);
825
- re_string_destruct (&regexp);
826
-
827
- if (BE (err != REG_NOERROR, 0))
828
- {
829
- free_dfa_content (dfa);
830
- preg->buffer = NULL;
831
- preg->allocated = 0;
832
- }
833
-
834
- return err;
835
- }
836
-
837
- /* Initialize DFA. We use the length of the regular expression PAT_LEN
838
- as the initial length of some arrays. */
839
-
840
- static reg_errcode_t
841
- init_dfa (re_dfa_t *dfa, size_t pat_len)
842
- {
843
- unsigned int table_size;
844
-
845
- memset (dfa, '\0', sizeof (re_dfa_t));
846
-
847
- /* Force allocation of str_tree_storage the first time. */
848
- dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
849
-
850
- /* Avoid overflows. */
851
- if (pat_len == SIZE_MAX)
852
- return REG_ESPACE;
853
-
854
- dfa->nodes_alloc = pat_len + 1;
855
- dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
856
-
857
- /* table_size = 2 ^ ceil(log pat_len) */
858
- for (table_size = 1; ; table_size <<= 1)
859
- if (table_size > pat_len)
860
- break;
861
-
862
- dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
863
- dfa->state_hash_mask = table_size - 1;
864
-
865
- dfa->mb_cur_max = MB_CUR_MAX;
866
- #ifdef _LIBC
867
- if (dfa->mb_cur_max == 6
868
- && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
869
- dfa->is_utf8 = 1;
870
- dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
871
- != 0);
872
- #else
873
- dfa->is_utf8 = 1;
874
- /* We check exhaustively in the loop below if this charset is a
875
- superset of ASCII. */
876
- dfa->map_notascii = 0;
877
- #endif
878
-
879
- #ifdef RE_ENABLE_I18N
880
- if (dfa->mb_cur_max > 1)
881
- {
882
- if (dfa->is_utf8)
883
- {
884
- #if !defined(__GNUC__) || __GNUC__ < 3
885
- static short utf8_sb_map_inited = 0;
886
-
887
- if (! utf8_sb_map_inited)
888
- {
889
- int i;
890
-
891
- utf8_sb_map_inited = 0;
892
- for (i = 0; i <= 0x80 / BITSET_WORD_BITS - 1; i++)
893
- utf8_sb_map[i] = BITSET_WORD_MAX;
894
- }
895
- #endif
896
- dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
897
- }
898
- else
899
- {
900
- int i, j, ch;
901
-
902
- dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
903
- if (BE (dfa->sb_char == NULL, 0))
904
- return REG_ESPACE;
905
-
906
- /* Set the bits corresponding to single byte chars. */
907
- for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
908
- for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
909
- {
910
- wint_t wch = __btowc (ch);
911
- if (wch != WEOF)
912
- dfa->sb_char[i] |= (bitset_word_t) 1 << j;
913
- # ifndef _LIBC
914
- if (isascii (ch) && wch != ch)
915
- dfa->map_notascii = 1;
916
- # endif
917
- }
918
- }
919
- }
920
- #endif
921
-
922
- if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0))
923
- return REG_ESPACE;
924
- return REG_NOERROR;
925
- }
926
-
927
- /* Initialize WORD_CHAR table, which indicate which character is
928
- "word". In this case "word" means that it is the word construction
929
- character used by some operators like "\<", "\>", etc. */
930
-
931
- static void
932
- internal_function
933
- init_word_char (re_dfa_t *dfa)
934
- {
935
- int i, j, ch;
936
- dfa->word_ops_used = 1;
937
- for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
938
- for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
939
- if (isalnum (ch) || ch == '_')
940
- dfa->word_char[i] |= (bitset_word_t) 1 << j;
941
- }
942
-
943
- /* Free the work area which are only used while compiling. */
944
-
945
- static void
946
- free_workarea_compile (regex_t *preg)
947
- {
948
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
949
- bin_tree_storage_t *storage, *next;
950
- for (storage = dfa->str_tree_storage; storage; storage = next)
951
- {
952
- next = storage->next;
953
- re_free (storage);
954
- }
955
- dfa->str_tree_storage = NULL;
956
- dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
957
- dfa->str_tree = NULL;
958
- re_free (dfa->org_indices);
959
- dfa->org_indices = NULL;
960
- }
961
-
962
- /* Create initial states for all contexts. */
963
-
964
- static reg_errcode_t
965
- create_initial_state (re_dfa_t *dfa)
966
- {
967
- int first, i;
968
- reg_errcode_t err;
969
- re_node_set init_nodes;
970
-
971
- /* Initial states have the epsilon closure of the node which is
972
- the first node of the regular expression. */
973
- first = dfa->str_tree->first->node_idx;
974
- dfa->init_node = first;
975
- err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
976
- if (BE (err != REG_NOERROR, 0))
977
- return err;
978
-
979
- /* The back-references which are in initial states can epsilon transit,
980
- since in this case all of the subexpressions can be null.
981
- Then we add epsilon closures of the nodes which are the next nodes of
982
- the back-references. */
983
- if (dfa->nbackref > 0)
984
- for (i = 0; i < init_nodes.nelem; ++i)
985
- {
986
- int node_idx = init_nodes.elems[i];
987
- re_token_type_t type = dfa->nodes[node_idx].type;
988
-
989
- int clexp_idx;
990
- if (type != OP_BACK_REF)
991
- continue;
992
- for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
993
- {
994
- re_token_t *clexp_node;
995
- clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
996
- if (clexp_node->type == OP_CLOSE_SUBEXP
997
- && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
998
- break;
999
- }
1000
- if (clexp_idx == init_nodes.nelem)
1001
- continue;
1002
-
1003
- if (type == OP_BACK_REF)
1004
- {
1005
- int dest_idx = dfa->edests[node_idx].elems[0];
1006
- if (!re_node_set_contains (&init_nodes, dest_idx))
1007
- {
1008
- reg_errcode_t err = re_node_set_merge (&init_nodes,
1009
- dfa->eclosures
1010
- + dest_idx);
1011
- if (err != REG_NOERROR)
1012
- return err;
1013
- i = 0;
1014
- }
1015
- }
1016
- }
1017
-
1018
- /* It must be the first time to invoke acquire_state. */
1019
- dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
1020
- /* We don't check ERR here, since the initial state must not be NULL. */
1021
- if (BE (dfa->init_state == NULL, 0))
1022
- return err;
1023
- if (dfa->init_state->has_constraint)
1024
- {
1025
- dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
1026
- CONTEXT_WORD);
1027
- dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
1028
- CONTEXT_NEWLINE);
1029
- dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
1030
- &init_nodes,
1031
- CONTEXT_NEWLINE
1032
- | CONTEXT_BEGBUF);
1033
- if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
1034
- || dfa->init_state_begbuf == NULL, 0))
1035
- return err;
1036
- }
1037
- else
1038
- dfa->init_state_word = dfa->init_state_nl
1039
- = dfa->init_state_begbuf = dfa->init_state;
1040
-
1041
- re_node_set_free (&init_nodes);
1042
- return REG_NOERROR;
1043
- }
1044
-
1045
- #ifdef RE_ENABLE_I18N
1046
- /* If it is possible to do searching in single byte encoding instead of UTF-8
1047
- to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
1048
- DFA nodes where needed. */
1049
-
1050
- static void
1051
- optimize_utf8 (re_dfa_t *dfa)
1052
- {
1053
- int node, i, mb_chars = 0, has_period = 0;
1054
-
1055
- for (node = 0; node < dfa->nodes_len; ++node)
1056
- switch (dfa->nodes[node].type)
1057
- {
1058
- case CHARACTER:
1059
- if (dfa->nodes[node].opr.c >= 0x80)
1060
- mb_chars = 1;
1061
- break;
1062
- case ANCHOR:
1063
- switch (dfa->nodes[node].opr.ctx_type)
1064
- {
1065
- case LINE_FIRST:
1066
- case LINE_LAST:
1067
- case BUF_FIRST:
1068
- case BUF_LAST:
1069
- break;
1070
- default:
1071
- /* Word anchors etc. cannot be handled. It's okay to test
1072
- opr.ctx_type since constraints (for all DFA nodes) are
1073
- created by ORing one or more opr.ctx_type values. */
1074
- return;
1075
- }
1076
- break;
1077
- case OP_PERIOD:
1078
- has_period = 1;
1079
- break;
1080
- case OP_BACK_REF:
1081
- case OP_ALT:
1082
- case END_OF_RE:
1083
- case OP_DUP_ASTERISK:
1084
- case OP_OPEN_SUBEXP:
1085
- case OP_CLOSE_SUBEXP:
1086
- break;
1087
- case COMPLEX_BRACKET:
1088
- return;
1089
- case SIMPLE_BRACKET:
1090
- /* Just double check. The non-ASCII range starts at 0x80. */
1091
- assert (0x80 % BITSET_WORD_BITS == 0);
1092
- for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
1093
- if (dfa->nodes[node].opr.sbcset[i])
1094
- return;
1095
- break;
1096
- default:
1097
- abort ();
1098
- }
1099
-
1100
- if (mb_chars || has_period)
1101
- for (node = 0; node < dfa->nodes_len; ++node)
1102
- {
1103
- if (dfa->nodes[node].type == CHARACTER
1104
- && dfa->nodes[node].opr.c >= 0x80)
1105
- dfa->nodes[node].mb_partial = 0;
1106
- else if (dfa->nodes[node].type == OP_PERIOD)
1107
- dfa->nodes[node].type = OP_UTF8_PERIOD;
1108
- }
1109
-
1110
- /* The search can be in single byte locale. */
1111
- dfa->mb_cur_max = 1;
1112
- dfa->is_utf8 = 0;
1113
- dfa->has_mb_node = dfa->nbackref > 0 || has_period;
1114
- }
1115
- #endif
1116
-
1117
- /* Analyze the structure tree, and calculate "first", "next", "edest",
1118
- "eclosure", and "inveclosure". */
1119
-
1120
- static reg_errcode_t
1121
- analyze (regex_t *preg)
1122
- {
1123
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1124
- reg_errcode_t ret;
1125
-
1126
- /* Allocate arrays. */
1127
- dfa->nexts = re_malloc (int, dfa->nodes_alloc);
1128
- dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
1129
- dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
1130
- dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
1131
- if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
1132
- || dfa->eclosures == NULL, 0))
1133
- return REG_ESPACE;
1134
-
1135
- dfa->subexp_map = re_malloc (int, preg->re_nsub);
1136
- if (dfa->subexp_map != NULL)
1137
- {
1138
- unsigned int i;
1139
- for (i = 0; i < preg->re_nsub; i++)
1140
- dfa->subexp_map[i] = i;
1141
- preorder (dfa->str_tree, optimize_subexps, dfa);
1142
- for (i = 0; i < preg->re_nsub; i++)
1143
- if (dfa->subexp_map[i] != (int)i)
1144
- break;
1145
- if (i == preg->re_nsub)
1146
- {
1147
- free (dfa->subexp_map);
1148
- dfa->subexp_map = NULL;
1149
- }
1150
- }
1151
-
1152
- ret = postorder (dfa->str_tree, lower_subexps, preg);
1153
- if (BE (ret != REG_NOERROR, 0))
1154
- return ret;
1155
- ret = postorder (dfa->str_tree, calc_first, dfa);
1156
- if (BE (ret != REG_NOERROR, 0))
1157
- return ret;
1158
- preorder (dfa->str_tree, calc_next, dfa);
1159
- ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
1160
- if (BE (ret != REG_NOERROR, 0))
1161
- return ret;
1162
- ret = calc_eclosure (dfa);
1163
- if (BE (ret != REG_NOERROR, 0))
1164
- return ret;
1165
-
1166
- /* We only need this during the prune_impossible_nodes pass in regexec.c;
1167
- skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */
1168
- if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
1169
- || dfa->nbackref)
1170
- {
1171
- dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
1172
- if (BE (dfa->inveclosures == NULL, 0))
1173
- return REG_ESPACE;
1174
- ret = calc_inveclosure (dfa);
1175
- }
1176
-
1177
- return ret;
1178
- }
1179
-
1180
- /* Our parse trees are very unbalanced, so we cannot use a stack to
1181
- implement parse tree visits. Instead, we use parent pointers and
1182
- some hairy code in these two functions. */
1183
- static reg_errcode_t
1184
- postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
1185
- void *extra)
1186
- {
1187
- bin_tree_t *node, *prev;
1188
-
1189
- for (node = root; ; )
1190
- {
1191
- /* Descend down the tree, preferably to the left (or to the right
1192
- if that's the only child). */
1193
- while (node->left || node->right)
1194
- if (node->left)
1195
- node = node->left;
1196
- else
1197
- node = node->right;
1198
-
1199
- do
1200
- {
1201
- reg_errcode_t err = fn (extra, node);
1202
- if (BE (err != REG_NOERROR, 0))
1203
- return err;
1204
- if (node->parent == NULL)
1205
- return REG_NOERROR;
1206
- prev = node;
1207
- node = node->parent;
1208
- }
1209
- /* Go up while we have a node that is reached from the right. */
1210
- while (node->right == prev || node->right == NULL);
1211
- node = node->right;
1212
- }
1213
- }
1214
-
1215
- static reg_errcode_t
1216
- preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
1217
- void *extra)
1218
- {
1219
- bin_tree_t *node;
1220
-
1221
- for (node = root; ; )
1222
- {
1223
- reg_errcode_t err = fn (extra, node);
1224
- if (BE (err != REG_NOERROR, 0))
1225
- return err;
1226
-
1227
- /* Go to the left node, or up and to the right. */
1228
- if (node->left)
1229
- node = node->left;
1230
- else
1231
- {
1232
- bin_tree_t *prev = NULL;
1233
- while (node->right == prev || node->right == NULL)
1234
- {
1235
- prev = node;
1236
- node = node->parent;
1237
- if (!node)
1238
- return REG_NOERROR;
1239
- }
1240
- node = node->right;
1241
- }
1242
- }
1243
- }
1244
-
1245
- /* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
1246
- re_search_internal to map the inner one's opr.idx to this one's. Adjust
1247
- backreferences as well. Requires a preorder visit. */
1248
- static reg_errcode_t
1249
- optimize_subexps (void *extra, bin_tree_t *node)
1250
- {
1251
- re_dfa_t *dfa = (re_dfa_t *) extra;
1252
-
1253
- if (node->token.type == OP_BACK_REF && dfa->subexp_map)
1254
- {
1255
- int idx = node->token.opr.idx;
1256
- node->token.opr.idx = dfa->subexp_map[idx];
1257
- dfa->used_bkref_map |= 1 << node->token.opr.idx;
1258
- }
1259
-
1260
- else if (node->token.type == SUBEXP
1261
- && node->left && node->left->token.type == SUBEXP)
1262
- {
1263
- int other_idx = node->left->token.opr.idx;
1264
-
1265
- node->left = node->left->left;
1266
- if (node->left)
1267
- node->left->parent = node;
1268
-
1269
- dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
1270
- if (other_idx < BITSET_WORD_BITS)
1271
- dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
1272
- }
1273
-
1274
- return REG_NOERROR;
1275
- }
1276
-
1277
- /* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
1278
- of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */
1279
- static reg_errcode_t
1280
- lower_subexps (void *extra, bin_tree_t *node)
1281
- {
1282
- regex_t *preg = (regex_t *) extra;
1283
- reg_errcode_t err = REG_NOERROR;
1284
-
1285
- if (node->left && node->left->token.type == SUBEXP)
1286
- {
1287
- node->left = lower_subexp (&err, preg, node->left);
1288
- if (node->left)
1289
- node->left->parent = node;
1290
- }
1291
- if (node->right && node->right->token.type == SUBEXP)
1292
- {
1293
- node->right = lower_subexp (&err, preg, node->right);
1294
- if (node->right)
1295
- node->right->parent = node;
1296
- }
1297
-
1298
- return err;
1299
- }
1300
-
1301
- static bin_tree_t *
1302
- lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
1303
- {
1304
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1305
- bin_tree_t *body = node->left;
1306
- bin_tree_t *op, *cls, *tree1, *tree;
1307
-
1308
- if (preg->no_sub
1309
- /* We do not optimize empty subexpressions, because otherwise we may
1310
- have bad CONCAT nodes with NULL children. This is obviously not
1311
- very common, so we do not lose much. An example that triggers
1312
- this case is the sed "script" /\(\)/x. */
1313
- && node->left != NULL
1314
- && (node->token.opr.idx >= BITSET_WORD_BITS
1315
- || !(dfa->used_bkref_map
1316
- & ((bitset_word_t) 1 << node->token.opr.idx))))
1317
- return node->left;
1318
-
1319
- /* Convert the SUBEXP node to the concatenation of an
1320
- OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */
1321
- op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
1322
- cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
1323
- tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
1324
- tree = create_tree (dfa, op, tree1, CONCAT);
1325
- if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0))
1326
- {
1327
- *err = REG_ESPACE;
1328
- return NULL;
1329
- }
1330
-
1331
- op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
1332
- op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
1333
- return tree;
1334
- }
1335
-
1336
- /* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
1337
- nodes. Requires a postorder visit. */
1338
- static reg_errcode_t
1339
- calc_first (void *extra, bin_tree_t *node)
1340
- {
1341
- re_dfa_t *dfa = (re_dfa_t *) extra;
1342
- if (node->token.type == CONCAT)
1343
- {
1344
- node->first = node->left->first;
1345
- node->node_idx = node->left->node_idx;
1346
- }
1347
- else
1348
- {
1349
- node->first = node;
1350
- node->node_idx = re_dfa_add_node (dfa, node->token);
1351
- if (BE (node->node_idx == -1, 0))
1352
- return REG_ESPACE;
1353
- if (node->token.type == ANCHOR)
1354
- dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
1355
- }
1356
- return REG_NOERROR;
1357
- }
1358
-
1359
- /* Pass 2: compute NEXT on the tree. Preorder visit. */
1360
- static reg_errcode_t
1361
- calc_next (UNUSED void *extra, bin_tree_t *node)
1362
- {
1363
- switch (node->token.type)
1364
- {
1365
- case OP_DUP_ASTERISK:
1366
- node->left->next = node;
1367
- break;
1368
- case CONCAT:
1369
- node->left->next = node->right->first;
1370
- node->right->next = node->next;
1371
- break;
1372
- default:
1373
- if (node->left)
1374
- node->left->next = node->next;
1375
- if (node->right)
1376
- node->right->next = node->next;
1377
- break;
1378
- }
1379
- return REG_NOERROR;
1380
- }
1381
-
1382
- /* Pass 3: link all DFA nodes to their NEXT node (any order will do). */
1383
- static reg_errcode_t
1384
- link_nfa_nodes (void *extra, bin_tree_t *node)
1385
- {
1386
- re_dfa_t *dfa = (re_dfa_t *) extra;
1387
- int idx = node->node_idx;
1388
- reg_errcode_t err = REG_NOERROR;
1389
-
1390
- switch (node->token.type)
1391
- {
1392
- case CONCAT:
1393
- break;
1394
-
1395
- case END_OF_RE:
1396
- assert (node->next == NULL);
1397
- break;
1398
-
1399
- case OP_DUP_ASTERISK:
1400
- case OP_ALT:
1401
- {
1402
- int left, right;
1403
- dfa->has_plural_match = 1;
1404
- if (node->left != NULL)
1405
- left = node->left->first->node_idx;
1406
- else
1407
- left = node->next->node_idx;
1408
- if (node->right != NULL)
1409
- right = node->right->first->node_idx;
1410
- else
1411
- right = node->next->node_idx;
1412
- assert (left > -1);
1413
- assert (right > -1);
1414
- err = re_node_set_init_2 (dfa->edests + idx, left, right);
1415
- }
1416
- break;
1417
-
1418
- case ANCHOR:
1419
- case OP_OPEN_SUBEXP:
1420
- case OP_CLOSE_SUBEXP:
1421
- err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
1422
- break;
1423
-
1424
- case OP_BACK_REF:
1425
- dfa->nexts[idx] = node->next->node_idx;
1426
- if (node->token.type == OP_BACK_REF)
1427
- err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
1428
- break;
1429
-
1430
- default:
1431
- assert (!IS_EPSILON_NODE (node->token.type));
1432
- dfa->nexts[idx] = node->next->node_idx;
1433
- break;
1434
- }
1435
-
1436
- return err;
1437
- }
1438
-
1439
- /* Duplicate the epsilon closure of the node ROOT_NODE.
1440
- Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
1441
- to their own constraint. */
1442
-
1443
- static reg_errcode_t
1444
- internal_function
1445
- duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node,
1446
- int root_node, unsigned int init_constraint)
1447
- {
1448
- int org_node, clone_node, ret;
1449
- unsigned int constraint = init_constraint;
1450
- for (org_node = top_org_node, clone_node = top_clone_node;;)
1451
- {
1452
- int org_dest, clone_dest;
1453
- if (dfa->nodes[org_node].type == OP_BACK_REF)
1454
- {
1455
- /* If the back reference epsilon-transit, its destination must
1456
- also have the constraint. Then duplicate the epsilon closure
1457
- of the destination of the back reference, and store it in
1458
- edests of the back reference. */
1459
- org_dest = dfa->nexts[org_node];
1460
- re_node_set_empty (dfa->edests + clone_node);
1461
- clone_dest = duplicate_node (dfa, org_dest, constraint);
1462
- if (BE (clone_dest == -1, 0))
1463
- return REG_ESPACE;
1464
- dfa->nexts[clone_node] = dfa->nexts[org_node];
1465
- ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1466
- if (BE (ret < 0, 0))
1467
- return REG_ESPACE;
1468
- }
1469
- else if (dfa->edests[org_node].nelem == 0)
1470
- {
1471
- /* In case of the node can't epsilon-transit, don't duplicate the
1472
- destination and store the original destination as the
1473
- destination of the node. */
1474
- dfa->nexts[clone_node] = dfa->nexts[org_node];
1475
- break;
1476
- }
1477
- else if (dfa->edests[org_node].nelem == 1)
1478
- {
1479
- /* In case of the node can epsilon-transit, and it has only one
1480
- destination. */
1481
- org_dest = dfa->edests[org_node].elems[0];
1482
- re_node_set_empty (dfa->edests + clone_node);
1483
- /* If the node is root_node itself, it means the epsilon clsoure
1484
- has a loop. Then tie it to the destination of the root_node. */
1485
- if (org_node == root_node && clone_node != org_node)
1486
- {
1487
- ret = re_node_set_insert (dfa->edests + clone_node, org_dest);
1488
- if (BE (ret < 0, 0))
1489
- return REG_ESPACE;
1490
- break;
1491
- }
1492
- /* In case of the node has another constraint, add it. */
1493
- constraint |= dfa->nodes[org_node].constraint;
1494
- clone_dest = duplicate_node (dfa, org_dest, constraint);
1495
- if (BE (clone_dest == -1, 0))
1496
- return REG_ESPACE;
1497
- ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1498
- if (BE (ret < 0, 0))
1499
- return REG_ESPACE;
1500
- }
1501
- else /* dfa->edests[org_node].nelem == 2 */
1502
- {
1503
- /* In case of the node can epsilon-transit, and it has two
1504
- destinations. In the bin_tree_t and DFA, that's '|' and '*'. */
1505
- org_dest = dfa->edests[org_node].elems[0];
1506
- re_node_set_empty (dfa->edests + clone_node);
1507
- /* Search for a duplicated node which satisfies the constraint. */
1508
- clone_dest = search_duplicated_node (dfa, org_dest, constraint);
1509
- if (clone_dest == -1)
1510
- {
1511
- /* There is no such duplicated node, create a new one. */
1512
- reg_errcode_t err;
1513
- clone_dest = duplicate_node (dfa, org_dest, constraint);
1514
- if (BE (clone_dest == -1, 0))
1515
- return REG_ESPACE;
1516
- ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1517
- if (BE (ret < 0, 0))
1518
- return REG_ESPACE;
1519
- err = duplicate_node_closure (dfa, org_dest, clone_dest,
1520
- root_node, constraint);
1521
- if (BE (err != REG_NOERROR, 0))
1522
- return err;
1523
- }
1524
- else
1525
- {
1526
- /* There is a duplicated node which satisfies the constraint,
1527
- use it to avoid infinite loop. */
1528
- ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1529
- if (BE (ret < 0, 0))
1530
- return REG_ESPACE;
1531
- }
1532
-
1533
- org_dest = dfa->edests[org_node].elems[1];
1534
- clone_dest = duplicate_node (dfa, org_dest, constraint);
1535
- if (BE (clone_dest == -1, 0))
1536
- return REG_ESPACE;
1537
- ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1538
- if (BE (ret < 0, 0))
1539
- return REG_ESPACE;
1540
- }
1541
- org_node = org_dest;
1542
- clone_node = clone_dest;
1543
- }
1544
- return REG_NOERROR;
1545
- }
1546
-
1547
- /* Search for a node which is duplicated from the node ORG_NODE, and
1548
- satisfies the constraint CONSTRAINT. */
1549
-
1550
- static int
1551
- search_duplicated_node (const re_dfa_t *dfa, int org_node,
1552
- unsigned int constraint)
1553
- {
1554
- int idx;
1555
- for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
1556
- {
1557
- if (org_node == dfa->org_indices[idx]
1558
- && constraint == dfa->nodes[idx].constraint)
1559
- return idx; /* Found. */
1560
- }
1561
- return -1; /* Not found. */
1562
- }
1563
-
1564
- /* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
1565
- Return the index of the new node, or -1 if insufficient storage is
1566
- available. */
1567
-
1568
- static int
1569
- duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint)
1570
- {
1571
- int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
1572
- if (BE (dup_idx != -1, 1))
1573
- {
1574
- dfa->nodes[dup_idx].constraint = constraint;
1575
- dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint;
1576
- dfa->nodes[dup_idx].duplicated = 1;
1577
-
1578
- /* Store the index of the original node. */
1579
- dfa->org_indices[dup_idx] = org_idx;
1580
- }
1581
- return dup_idx;
1582
- }
1583
-
1584
- static reg_errcode_t
1585
- calc_inveclosure (re_dfa_t *dfa)
1586
- {
1587
- int ret;
1588
- unsigned int src, idx;
1589
- for (idx = 0; idx < dfa->nodes_len; ++idx)
1590
- re_node_set_init_empty (dfa->inveclosures + idx);
1591
-
1592
- for (src = 0; src < dfa->nodes_len; ++src)
1593
- {
1594
- int *elems = dfa->eclosures[src].elems;
1595
- int idx;
1596
- for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
1597
- {
1598
- ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
1599
- if (BE (ret == -1, 0))
1600
- return REG_ESPACE;
1601
- }
1602
- }
1603
-
1604
- return REG_NOERROR;
1605
- }
1606
-
1607
- /* Calculate "eclosure" for all the node in DFA. */
1608
-
1609
- static reg_errcode_t
1610
- calc_eclosure (re_dfa_t *dfa)
1611
- {
1612
- size_t node_idx;
1613
- int incomplete;
1614
- #ifdef DEBUG
1615
- assert (dfa->nodes_len > 0);
1616
- #endif
1617
- incomplete = 0;
1618
- /* For each nodes, calculate epsilon closure. */
1619
- for (node_idx = 0; ; ++node_idx)
1620
- {
1621
- reg_errcode_t err;
1622
- re_node_set eclosure_elem;
1623
- if (node_idx == dfa->nodes_len)
1624
- {
1625
- if (!incomplete)
1626
- break;
1627
- incomplete = 0;
1628
- node_idx = 0;
1629
- }
1630
-
1631
- #ifdef DEBUG
1632
- assert (dfa->eclosures[node_idx].nelem != -1);
1633
- #endif
1634
-
1635
- /* If we have already calculated, skip it. */
1636
- if (dfa->eclosures[node_idx].nelem != 0)
1637
- continue;
1638
- /* Calculate epsilon closure of `node_idx'. */
1639
- err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
1640
- if (BE (err != REG_NOERROR, 0))
1641
- return err;
1642
-
1643
- if (dfa->eclosures[node_idx].nelem == 0)
1644
- {
1645
- incomplete = 1;
1646
- re_node_set_free (&eclosure_elem);
1647
- }
1648
- }
1649
- return REG_NOERROR;
1650
- }
1651
-
1652
- /* Calculate epsilon closure of NODE. */
1653
-
1654
- static reg_errcode_t
1655
- calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root)
1656
- {
1657
- reg_errcode_t err;
1658
- int i;
1659
- re_node_set eclosure;
1660
- int ret;
1661
- int incomplete = 0;
1662
- err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
1663
- if (BE (err != REG_NOERROR, 0))
1664
- return err;
1665
-
1666
- /* This indicates that we are calculating this node now.
1667
- We reference this value to avoid infinite loop. */
1668
- dfa->eclosures[node].nelem = -1;
1669
-
1670
- /* If the current node has constraints, duplicate all nodes
1671
- since they must inherit the constraints. */
1672
- if (dfa->nodes[node].constraint
1673
- && dfa->edests[node].nelem
1674
- && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
1675
- {
1676
- err = duplicate_node_closure (dfa, node, node, node,
1677
- dfa->nodes[node].constraint);
1678
- if (BE (err != REG_NOERROR, 0))
1679
- return err;
1680
- }
1681
-
1682
- /* Expand each epsilon destination nodes. */
1683
- if (IS_EPSILON_NODE(dfa->nodes[node].type))
1684
- for (i = 0; i < dfa->edests[node].nelem; ++i)
1685
- {
1686
- re_node_set eclosure_elem;
1687
- int edest = dfa->edests[node].elems[i];
1688
- /* If calculating the epsilon closure of `edest' is in progress,
1689
- return intermediate result. */
1690
- if (dfa->eclosures[edest].nelem == -1)
1691
- {
1692
- incomplete = 1;
1693
- continue;
1694
- }
1695
- /* If we haven't calculated the epsilon closure of `edest' yet,
1696
- calculate now. Otherwise use calculated epsilon closure. */
1697
- if (dfa->eclosures[edest].nelem == 0)
1698
- {
1699
- err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
1700
- if (BE (err != REG_NOERROR, 0))
1701
- return err;
1702
- }
1703
- else
1704
- eclosure_elem = dfa->eclosures[edest];
1705
- /* Merge the epsilon closure of `edest'. */
1706
- err = re_node_set_merge (&eclosure, &eclosure_elem);
1707
- if (BE (err != REG_NOERROR, 0))
1708
- return err;
1709
- /* If the epsilon closure of `edest' is incomplete,
1710
- the epsilon closure of this node is also incomplete. */
1711
- if (dfa->eclosures[edest].nelem == 0)
1712
- {
1713
- incomplete = 1;
1714
- re_node_set_free (&eclosure_elem);
1715
- }
1716
- }
1717
-
1718
- /* An epsilon closure includes itself. */
1719
- ret = re_node_set_insert (&eclosure, node);
1720
- if (BE (ret < 0, 0))
1721
- return REG_ESPACE;
1722
- if (incomplete && !root)
1723
- dfa->eclosures[node].nelem = 0;
1724
- else
1725
- dfa->eclosures[node] = eclosure;
1726
- *new_set = eclosure;
1727
- return REG_NOERROR;
1728
- }
1729
-
1730
- /* Functions for token which are used in the parser. */
1731
-
1732
- /* Fetch a token from INPUT.
1733
- We must not use this function inside bracket expressions. */
1734
-
1735
- static void
1736
- internal_function
1737
- fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
1738
- {
1739
- re_string_skip_bytes (input, peek_token (result, input, syntax));
1740
- }
1741
-
1742
- /* Peek a token from INPUT, and return the length of the token.
1743
- We must not use this function inside bracket expressions. */
1744
-
1745
- static int
1746
- internal_function
1747
- peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
1748
- {
1749
- unsigned char c;
1750
-
1751
- if (re_string_eoi (input))
1752
- {
1753
- token->type = END_OF_RE;
1754
- return 0;
1755
- }
1756
-
1757
- c = re_string_peek_byte (input, 0);
1758
- token->opr.c = c;
1759
-
1760
- token->word_char = 0;
1761
- #ifdef RE_ENABLE_I18N
1762
- token->mb_partial = 0;
1763
- if (input->mb_cur_max > 1 &&
1764
- !re_string_first_byte (input, re_string_cur_idx (input)))
1765
- {
1766
- token->type = CHARACTER;
1767
- token->mb_partial = 1;
1768
- return 1;
1769
- }
1770
- #endif
1771
- if (c == '\\')
1772
- {
1773
- unsigned char c2;
1774
- if (re_string_cur_idx (input) + 1 >= re_string_length (input))
1775
- {
1776
- token->type = BACK_SLASH;
1777
- return 1;
1778
- }
1779
-
1780
- c2 = re_string_peek_byte_case (input, 1);
1781
- token->opr.c = c2;
1782
- token->type = CHARACTER;
1783
- #ifdef RE_ENABLE_I18N
1784
- if (input->mb_cur_max > 1)
1785
- {
1786
- wint_t wc = re_string_wchar_at (input,
1787
- re_string_cur_idx (input) + 1);
1788
- token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
1789
- }
1790
- else
1791
- #endif
1792
- token->word_char = IS_WORD_CHAR (c2) != 0;
1793
-
1794
- switch (c2)
1795
- {
1796
- case '|':
1797
- if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
1798
- token->type = OP_ALT;
1799
- break;
1800
- case '1': case '2': case '3': case '4': case '5':
1801
- case '6': case '7': case '8': case '9':
1802
- if (!(syntax & RE_NO_BK_REFS))
1803
- {
1804
- token->type = OP_BACK_REF;
1805
- token->opr.idx = c2 - '1';
1806
- }
1807
- break;
1808
- case '<':
1809
- if (!(syntax & RE_NO_GNU_OPS))
1810
- {
1811
- token->type = ANCHOR;
1812
- token->opr.ctx_type = WORD_FIRST;
1813
- }
1814
- break;
1815
- case '>':
1816
- if (!(syntax & RE_NO_GNU_OPS))
1817
- {
1818
- token->type = ANCHOR;
1819
- token->opr.ctx_type = WORD_LAST;
1820
- }
1821
- break;
1822
- case 'b':
1823
- if (!(syntax & RE_NO_GNU_OPS))
1824
- {
1825
- token->type = ANCHOR;
1826
- token->opr.ctx_type = WORD_DELIM;
1827
- }
1828
- break;
1829
- case 'B':
1830
- if (!(syntax & RE_NO_GNU_OPS))
1831
- {
1832
- token->type = ANCHOR;
1833
- token->opr.ctx_type = NOT_WORD_DELIM;
1834
- }
1835
- break;
1836
- case 'w':
1837
- if (!(syntax & RE_NO_GNU_OPS))
1838
- token->type = OP_WORD;
1839
- break;
1840
- case 'W':
1841
- if (!(syntax & RE_NO_GNU_OPS))
1842
- token->type = OP_NOTWORD;
1843
- break;
1844
- case 's':
1845
- if (!(syntax & RE_NO_GNU_OPS))
1846
- token->type = OP_SPACE;
1847
- break;
1848
- case 'S':
1849
- if (!(syntax & RE_NO_GNU_OPS))
1850
- token->type = OP_NOTSPACE;
1851
- break;
1852
- case '`':
1853
- if (!(syntax & RE_NO_GNU_OPS))
1854
- {
1855
- token->type = ANCHOR;
1856
- token->opr.ctx_type = BUF_FIRST;
1857
- }
1858
- break;
1859
- case '\'':
1860
- if (!(syntax & RE_NO_GNU_OPS))
1861
- {
1862
- token->type = ANCHOR;
1863
- token->opr.ctx_type = BUF_LAST;
1864
- }
1865
- break;
1866
- case '(':
1867
- if (!(syntax & RE_NO_BK_PARENS))
1868
- token->type = OP_OPEN_SUBEXP;
1869
- break;
1870
- case ')':
1871
- if (!(syntax & RE_NO_BK_PARENS))
1872
- token->type = OP_CLOSE_SUBEXP;
1873
- break;
1874
- case '+':
1875
- if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
1876
- token->type = OP_DUP_PLUS;
1877
- break;
1878
- case '?':
1879
- if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
1880
- token->type = OP_DUP_QUESTION;
1881
- break;
1882
- case '{':
1883
- if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
1884
- token->type = OP_OPEN_DUP_NUM;
1885
- break;
1886
- case '}':
1887
- if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
1888
- token->type = OP_CLOSE_DUP_NUM;
1889
- break;
1890
- default:
1891
- break;
1892
- }
1893
- return 2;
1894
- }
1895
-
1896
- token->type = CHARACTER;
1897
- #ifdef RE_ENABLE_I18N
1898
- if (input->mb_cur_max > 1)
1899
- {
1900
- wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
1901
- token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
1902
- }
1903
- else
1904
- #endif
1905
- token->word_char = IS_WORD_CHAR (token->opr.c);
1906
-
1907
- switch (c)
1908
- {
1909
- case '\n':
1910
- if (syntax & RE_NEWLINE_ALT)
1911
- token->type = OP_ALT;
1912
- break;
1913
- case '|':
1914
- if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
1915
- token->type = OP_ALT;
1916
- break;
1917
- case '*':
1918
- token->type = OP_DUP_ASTERISK;
1919
- break;
1920
- case '+':
1921
- if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
1922
- token->type = OP_DUP_PLUS;
1923
- break;
1924
- case '?':
1925
- if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
1926
- token->type = OP_DUP_QUESTION;
1927
- break;
1928
- case '{':
1929
- if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1930
- token->type = OP_OPEN_DUP_NUM;
1931
- break;
1932
- case '}':
1933
- if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1934
- token->type = OP_CLOSE_DUP_NUM;
1935
- break;
1936
- case '(':
1937
- if (syntax & RE_NO_BK_PARENS)
1938
- token->type = OP_OPEN_SUBEXP;
1939
- break;
1940
- case ')':
1941
- if (syntax & RE_NO_BK_PARENS)
1942
- token->type = OP_CLOSE_SUBEXP;
1943
- break;
1944
- case '[':
1945
- token->type = OP_OPEN_BRACKET;
1946
- break;
1947
- case '.':
1948
- token->type = OP_PERIOD;
1949
- break;
1950
- case '^':
1951
- if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) &&
1952
- re_string_cur_idx (input) != 0)
1953
- {
1954
- char prev = re_string_peek_byte (input, -1);
1955
- if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')
1956
- break;
1957
- }
1958
- token->type = ANCHOR;
1959
- token->opr.ctx_type = LINE_FIRST;
1960
- break;
1961
- case '$':
1962
- if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
1963
- re_string_cur_idx (input) + 1 != re_string_length (input))
1964
- {
1965
- re_token_t next;
1966
- re_string_skip_bytes (input, 1);
1967
- peek_token (&next, input, syntax);
1968
- re_string_skip_bytes (input, -1);
1969
- if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
1970
- break;
1971
- }
1972
- token->type = ANCHOR;
1973
- token->opr.ctx_type = LINE_LAST;
1974
- break;
1975
- default:
1976
- break;
1977
- }
1978
- return 1;
1979
- }
1980
-
1981
- /* Peek a token from INPUT, and return the length of the token.
1982
- We must not use this function out of bracket expressions. */
1983
-
1984
- static int
1985
- internal_function
1986
- peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
1987
- {
1988
- unsigned char c;
1989
- if (re_string_eoi (input))
1990
- {
1991
- token->type = END_OF_RE;
1992
- return 0;
1993
- }
1994
- c = re_string_peek_byte (input, 0);
1995
- token->opr.c = c;
1996
-
1997
- #ifdef RE_ENABLE_I18N
1998
- if (input->mb_cur_max > 1 &&
1999
- !re_string_first_byte (input, re_string_cur_idx (input)))
2000
- {
2001
- token->type = CHARACTER;
2002
- return 1;
2003
- }
2004
- #endif /* RE_ENABLE_I18N */
2005
-
2006
- if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
2007
- && re_string_cur_idx (input) + 1 < re_string_length (input))
2008
- {
2009
- /* In this case, '\' escape a character. */
2010
- unsigned char c2;
2011
- re_string_skip_bytes (input, 1);
2012
- c2 = re_string_peek_byte (input, 0);
2013
- token->opr.c = c2;
2014
- token->type = CHARACTER;
2015
- return 1;
2016
- }
2017
- if (c == '[') /* '[' is a special char in a bracket exps. */
2018
- {
2019
- unsigned char c2;
2020
- int token_len;
2021
- if (re_string_cur_idx (input) + 1 < re_string_length (input))
2022
- c2 = re_string_peek_byte (input, 1);
2023
- else
2024
- c2 = 0;
2025
- token->opr.c = c2;
2026
- token_len = 2;
2027
- switch (c2)
2028
- {
2029
- case '.':
2030
- token->type = OP_OPEN_COLL_ELEM;
2031
- break;
2032
- case '=':
2033
- token->type = OP_OPEN_EQUIV_CLASS;
2034
- break;
2035
- case ':':
2036
- if (syntax & RE_CHAR_CLASSES)
2037
- {
2038
- token->type = OP_OPEN_CHAR_CLASS;
2039
- break;
2040
- }
2041
- /* else fall through. */
2042
- default:
2043
- token->type = CHARACTER;
2044
- token->opr.c = c;
2045
- token_len = 1;
2046
- break;
2047
- }
2048
- return token_len;
2049
- }
2050
- switch (c)
2051
- {
2052
- case '-':
2053
- token->type = OP_CHARSET_RANGE;
2054
- break;
2055
- case ']':
2056
- token->type = OP_CLOSE_BRACKET;
2057
- break;
2058
- case '^':
2059
- token->type = OP_NON_MATCH_LIST;
2060
- break;
2061
- default:
2062
- token->type = CHARACTER;
2063
- }
2064
- return 1;
2065
- }
2066
-
2067
- /* Functions for parser. */
2068
-
2069
- /* Entry point of the parser.
2070
- Parse the regular expression REGEXP and return the structure tree.
2071
- If an error is occured, ERR is set by error code, and return NULL.
2072
- This function build the following tree, from regular expression <reg_exp>:
2073
- CAT
2074
- / \
2075
- / \
2076
- <reg_exp> EOR
2077
-
2078
- CAT means concatenation.
2079
- EOR means end of regular expression. */
2080
-
2081
- static bin_tree_t *
2082
- parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
2083
- reg_errcode_t *err)
2084
- {
2085
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2086
- bin_tree_t *tree, *eor, *root;
2087
- re_token_t current_token;
2088
- dfa->syntax = syntax;
2089
- fetch_token (&current_token, regexp, syntax | RE_CARET_ANCHORS_HERE);
2090
- tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
2091
- if (BE (*err != REG_NOERROR && tree == NULL, 0))
2092
- return NULL;
2093
- eor = create_tree (dfa, NULL, NULL, END_OF_RE);
2094
- if (tree != NULL)
2095
- root = create_tree (dfa, tree, eor, CONCAT);
2096
- else
2097
- root = eor;
2098
- if (BE (eor == NULL || root == NULL, 0))
2099
- {
2100
- *err = REG_ESPACE;
2101
- return NULL;
2102
- }
2103
- return root;
2104
- }
2105
-
2106
- /* This function build the following tree, from regular expression
2107
- <branch1>|<branch2>:
2108
- ALT
2109
- / \
2110
- / \
2111
- <branch1> <branch2>
2112
-
2113
- ALT means alternative, which represents the operator `|'. */
2114
-
2115
- static bin_tree_t *
2116
- parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
2117
- reg_syntax_t syntax, int nest, reg_errcode_t *err)
2118
- {
2119
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2120
- bin_tree_t *tree, *branch = NULL;
2121
- tree = parse_branch (regexp, preg, token, syntax, nest, err);
2122
- if (BE (*err != REG_NOERROR && tree == NULL, 0))
2123
- return NULL;
2124
-
2125
- while (token->type == OP_ALT)
2126
- {
2127
- fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
2128
- if (token->type != OP_ALT && token->type != END_OF_RE
2129
- && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
2130
- {
2131
- branch = parse_branch (regexp, preg, token, syntax, nest, err);
2132
- if (BE (*err != REG_NOERROR && branch == NULL, 0))
2133
- return NULL;
2134
- }
2135
- else
2136
- branch = NULL;
2137
- tree = create_tree (dfa, tree, branch, OP_ALT);
2138
- if (BE (tree == NULL, 0))
2139
- {
2140
- *err = REG_ESPACE;
2141
- return NULL;
2142
- }
2143
- }
2144
- return tree;
2145
- }
2146
-
2147
- /* This function build the following tree, from regular expression
2148
- <exp1><exp2>:
2149
- CAT
2150
- / \
2151
- / \
2152
- <exp1> <exp2>
2153
-
2154
- CAT means concatenation. */
2155
-
2156
- static bin_tree_t *
2157
- parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
2158
- reg_syntax_t syntax, int nest, reg_errcode_t *err)
2159
- {
2160
- bin_tree_t *tree, *exp;
2161
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2162
- tree = parse_expression (regexp, preg, token, syntax, nest, err);
2163
- if (BE (*err != REG_NOERROR && tree == NULL, 0))
2164
- return NULL;
2165
-
2166
- while (token->type != OP_ALT && token->type != END_OF_RE
2167
- && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
2168
- {
2169
- exp = parse_expression (regexp, preg, token, syntax, nest, err);
2170
- if (BE (*err != REG_NOERROR && exp == NULL, 0))
2171
- {
2172
- return NULL;
2173
- }
2174
- if (tree != NULL && exp != NULL)
2175
- {
2176
- tree = create_tree (dfa, tree, exp, CONCAT);
2177
- if (tree == NULL)
2178
- {
2179
- *err = REG_ESPACE;
2180
- return NULL;
2181
- }
2182
- }
2183
- else if (tree == NULL)
2184
- tree = exp;
2185
- /* Otherwise exp == NULL, we don't need to create new tree. */
2186
- }
2187
- return tree;
2188
- }
2189
-
2190
- /* This function build the following tree, from regular expression a*:
2191
- *
2192
- |
2193
- a
2194
- */
2195
-
2196
- static bin_tree_t *
2197
- parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2198
- reg_syntax_t syntax, int nest, reg_errcode_t *err)
2199
- {
2200
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2201
- bin_tree_t *tree;
2202
- switch (token->type)
2203
- {
2204
- case CHARACTER:
2205
- tree = create_token_tree (dfa, NULL, NULL, token);
2206
- if (BE (tree == NULL, 0))
2207
- {
2208
- *err = REG_ESPACE;
2209
- return NULL;
2210
- }
2211
- #ifdef RE_ENABLE_I18N
2212
- if (dfa->mb_cur_max > 1)
2213
- {
2214
- while (!re_string_eoi (regexp)
2215
- && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
2216
- {
2217
- bin_tree_t *mbc_remain;
2218
- fetch_token (token, regexp, syntax);
2219
- mbc_remain = create_token_tree (dfa, NULL, NULL, token);
2220
- tree = create_tree (dfa, tree, mbc_remain, CONCAT);
2221
- if (BE (mbc_remain == NULL || tree == NULL, 0))
2222
- {
2223
- *err = REG_ESPACE;
2224
- return NULL;
2225
- }
2226
- }
2227
- }
2228
- #endif
2229
- break;
2230
- case OP_OPEN_SUBEXP:
2231
- tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
2232
- if (BE (*err != REG_NOERROR && tree == NULL, 0))
2233
- return NULL;
2234
- break;
2235
- case OP_OPEN_BRACKET:
2236
- tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
2237
- if (BE (*err != REG_NOERROR && tree == NULL, 0))
2238
- return NULL;
2239
- break;
2240
- case OP_BACK_REF:
2241
- if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
2242
- {
2243
- *err = REG_ESUBREG;
2244
- return NULL;
2245
- }
2246
- dfa->used_bkref_map |= 1 << token->opr.idx;
2247
- tree = create_token_tree (dfa, NULL, NULL, token);
2248
- if (BE (tree == NULL, 0))
2249
- {
2250
- *err = REG_ESPACE;
2251
- return NULL;
2252
- }
2253
- ++dfa->nbackref;
2254
- dfa->has_mb_node = 1;
2255
- break;
2256
- case OP_OPEN_DUP_NUM:
2257
- if (syntax & RE_CONTEXT_INVALID_DUP)
2258
- {
2259
- *err = REG_BADRPT;
2260
- return NULL;
2261
- }
2262
- /* FALLTHROUGH */
2263
- case OP_DUP_ASTERISK:
2264
- case OP_DUP_PLUS:
2265
- case OP_DUP_QUESTION:
2266
- if (syntax & RE_CONTEXT_INVALID_OPS)
2267
- {
2268
- *err = REG_BADRPT;
2269
- return NULL;
2270
- }
2271
- else if (syntax & RE_CONTEXT_INDEP_OPS)
2272
- {
2273
- fetch_token (token, regexp, syntax);
2274
- return parse_expression (regexp, preg, token, syntax, nest, err);
2275
- }
2276
- /* else fall through */
2277
- case OP_CLOSE_SUBEXP:
2278
- if ((token->type == OP_CLOSE_SUBEXP) &&
2279
- !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
2280
- {
2281
- *err = REG_ERPAREN;
2282
- return NULL;
2283
- }
2284
- /* else fall through */
2285
- case OP_CLOSE_DUP_NUM:
2286
- /* We treat it as a normal character. */
2287
-
2288
- /* Then we can these characters as normal characters. */
2289
- token->type = CHARACTER;
2290
- /* mb_partial and word_char bits should be initialized already
2291
- by peek_token. */
2292
- tree = create_token_tree (dfa, NULL, NULL, token);
2293
- if (BE (tree == NULL, 0))
2294
- {
2295
- *err = REG_ESPACE;
2296
- return NULL;
2297
- }
2298
- break;
2299
- case ANCHOR:
2300
- if ((token->opr.ctx_type
2301
- & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
2302
- && dfa->word_ops_used == 0)
2303
- init_word_char (dfa);
2304
- if (token->opr.ctx_type == WORD_DELIM
2305
- || token->opr.ctx_type == NOT_WORD_DELIM)
2306
- {
2307
- bin_tree_t *tree_first, *tree_last;
2308
- if (token->opr.ctx_type == WORD_DELIM)
2309
- {
2310
- token->opr.ctx_type = WORD_FIRST;
2311
- tree_first = create_token_tree (dfa, NULL, NULL, token);
2312
- token->opr.ctx_type = WORD_LAST;
2313
- }
2314
- else
2315
- {
2316
- token->opr.ctx_type = INSIDE_WORD;
2317
- tree_first = create_token_tree (dfa, NULL, NULL, token);
2318
- token->opr.ctx_type = INSIDE_NOTWORD;
2319
- }
2320
- tree_last = create_token_tree (dfa, NULL, NULL, token);
2321
- tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
2322
- if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
2323
- {
2324
- *err = REG_ESPACE;
2325
- return NULL;
2326
- }
2327
- }
2328
- else
2329
- {
2330
- tree = create_token_tree (dfa, NULL, NULL, token);
2331
- if (BE (tree == NULL, 0))
2332
- {
2333
- *err = REG_ESPACE;
2334
- return NULL;
2335
- }
2336
- }
2337
- /* We must return here, since ANCHORs can't be followed
2338
- by repetition operators.
2339
- eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
2340
- it must not be "<ANCHOR(^)><REPEAT(*)>". */
2341
- fetch_token (token, regexp, syntax);
2342
- return tree;
2343
- case OP_PERIOD:
2344
- tree = create_token_tree (dfa, NULL, NULL, token);
2345
- if (BE (tree == NULL, 0))
2346
- {
2347
- *err = REG_ESPACE;
2348
- return NULL;
2349
- }
2350
- if (dfa->mb_cur_max > 1)
2351
- dfa->has_mb_node = 1;
2352
- break;
2353
- case OP_WORD:
2354
- case OP_NOTWORD:
2355
- tree = build_charclass_op (dfa, regexp->trans,
2356
- "alnum",
2357
- "_",
2358
- token->type == OP_NOTWORD, err);
2359
- if (BE (*err != REG_NOERROR && tree == NULL, 0))
2360
- return NULL;
2361
- break;
2362
- case OP_SPACE:
2363
- case OP_NOTSPACE:
2364
- tree = build_charclass_op (dfa, regexp->trans,
2365
- "space",
2366
- "",
2367
- token->type == OP_NOTSPACE, err);
2368
- if (BE (*err != REG_NOERROR && tree == NULL, 0))
2369
- return NULL;
2370
- break;
2371
- case OP_ALT:
2372
- case END_OF_RE:
2373
- return NULL;
2374
- case BACK_SLASH:
2375
- *err = REG_EESCAPE;
2376
- return NULL;
2377
- default:
2378
- /* Must not happen? */
2379
- #ifdef DEBUG
2380
- assert (0);
2381
- #endif
2382
- return NULL;
2383
- }
2384
- fetch_token (token, regexp, syntax);
2385
-
2386
- while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
2387
- || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
2388
- {
2389
- tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
2390
- if (BE (*err != REG_NOERROR && tree == NULL, 0))
2391
- return NULL;
2392
- /* In BRE consecutive duplications are not allowed. */
2393
- if ((syntax & RE_CONTEXT_INVALID_DUP)
2394
- && (token->type == OP_DUP_ASTERISK
2395
- || token->type == OP_OPEN_DUP_NUM))
2396
- {
2397
- *err = REG_BADRPT;
2398
- return NULL;
2399
- }
2400
- }
2401
-
2402
- return tree;
2403
- }
2404
-
2405
- /* This function build the following tree, from regular expression
2406
- (<reg_exp>):
2407
- SUBEXP
2408
- |
2409
- <reg_exp>
2410
- */
2411
-
2412
- static bin_tree_t *
2413
- parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
2414
- reg_syntax_t syntax, int nest, reg_errcode_t *err)
2415
- {
2416
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2417
- bin_tree_t *tree;
2418
- size_t cur_nsub;
2419
- cur_nsub = preg->re_nsub++;
2420
-
2421
- fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
2422
-
2423
- /* The subexpression may be a null string. */
2424
- if (token->type == OP_CLOSE_SUBEXP)
2425
- tree = NULL;
2426
- else
2427
- {
2428
- tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
2429
- if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
2430
- *err = REG_EPAREN;
2431
- if (BE (*err != REG_NOERROR, 0))
2432
- return NULL;
2433
- }
2434
-
2435
- if (cur_nsub <= '9' - '1')
2436
- dfa->completed_bkref_map |= 1 << cur_nsub;
2437
-
2438
- tree = create_tree (dfa, tree, NULL, SUBEXP);
2439
- if (BE (tree == NULL, 0))
2440
- {
2441
- *err = REG_ESPACE;
2442
- return NULL;
2443
- }
2444
- tree->token.opr.idx = cur_nsub;
2445
- return tree;
2446
- }
2447
-
2448
- /* This function parse repetition operators like "*", "+", "{1,3}" etc. */
2449
-
2450
- static bin_tree_t *
2451
- parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2452
- re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)
2453
- {
2454
- bin_tree_t *tree = NULL, *old_tree = NULL;
2455
- int i, start, end, start_idx = re_string_cur_idx (regexp);
2456
- #ifndef RE_TOKEN_INIT_BUG
2457
- re_token_t start_token = *token;
2458
- #else
2459
- re_token_t start_token;
2460
-
2461
- memcpy ((void *) &start_token, (void *) token, sizeof start_token);
2462
- #endif
2463
-
2464
- if (token->type == OP_OPEN_DUP_NUM)
2465
- {
2466
- end = 0;
2467
- start = fetch_number (regexp, token, syntax);
2468
- if (start == -1)
2469
- {
2470
- if (token->type == CHARACTER && token->opr.c == ',')
2471
- start = 0; /* We treat "{,m}" as "{0,m}". */
2472
- else
2473
- {
2474
- *err = REG_BADBR; /* <re>{} is invalid. */
2475
- return NULL;
2476
- }
2477
- }
2478
- if (BE (start != -2, 1))
2479
- {
2480
- /* We treat "{n}" as "{n,n}". */
2481
- end = ((token->type == OP_CLOSE_DUP_NUM) ? start
2482
- : ((token->type == CHARACTER && token->opr.c == ',')
2483
- ? fetch_number (regexp, token, syntax) : -2));
2484
- }
2485
- if (BE (start == -2 || end == -2, 0))
2486
- {
2487
- /* Invalid sequence. */
2488
- if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
2489
- {
2490
- if (token->type == END_OF_RE)
2491
- *err = REG_EBRACE;
2492
- else
2493
- *err = REG_BADBR;
2494
-
2495
- return NULL;
2496
- }
2497
-
2498
- /* If the syntax bit is set, rollback. */
2499
- re_string_set_index (regexp, start_idx);
2500
- *token = start_token;
2501
- token->type = CHARACTER;
2502
- /* mb_partial and word_char bits should be already initialized by
2503
- peek_token. */
2504
- return elem;
2505
- }
2506
-
2507
- if (BE ((end != -1 && start > end) || token->type != OP_CLOSE_DUP_NUM, 0))
2508
- {
2509
- /* First number greater than second. */
2510
- *err = REG_BADBR;
2511
- return NULL;
2512
- }
2513
- }
2514
- else
2515
- {
2516
- start = (token->type == OP_DUP_PLUS) ? 1 : 0;
2517
- end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
2518
- }
2519
-
2520
- fetch_token (token, regexp, syntax);
2521
-
2522
- if (BE (elem == NULL, 0))
2523
- return NULL;
2524
- if (BE (start == 0 && end == 0, 0))
2525
- {
2526
- postorder (elem, free_tree, NULL);
2527
- return NULL;
2528
- }
2529
-
2530
- /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
2531
- if (BE (start > 0, 0))
2532
- {
2533
- tree = elem;
2534
- for (i = 2; i <= start; ++i)
2535
- {
2536
- elem = duplicate_tree (elem, dfa);
2537
- tree = create_tree (dfa, tree, elem, CONCAT);
2538
- if (BE (elem == NULL || tree == NULL, 0))
2539
- goto parse_dup_op_espace;
2540
- }
2541
-
2542
- if (start == end)
2543
- return tree;
2544
-
2545
- /* Duplicate ELEM before it is marked optional. */
2546
- elem = duplicate_tree (elem, dfa);
2547
- old_tree = tree;
2548
- }
2549
- else
2550
- old_tree = NULL;
2551
-
2552
- if (elem->token.type == SUBEXP)
2553
- postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx);
2554
-
2555
- tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
2556
- if (BE (tree == NULL, 0))
2557
- goto parse_dup_op_espace;
2558
-
2559
- /* This loop is actually executed only when end != -1,
2560
- to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
2561
- already created the start+1-th copy. */
2562
- for (i = start + 2; i <= end; ++i)
2563
- {
2564
- elem = duplicate_tree (elem, dfa);
2565
- tree = create_tree (dfa, tree, elem, CONCAT);
2566
- if (BE (elem == NULL || tree == NULL, 0))
2567
- goto parse_dup_op_espace;
2568
-
2569
- tree = create_tree (dfa, tree, NULL, OP_ALT);
2570
- if (BE (tree == NULL, 0))
2571
- goto parse_dup_op_espace;
2572
- }
2573
-
2574
- if (old_tree)
2575
- tree = create_tree (dfa, old_tree, tree, CONCAT);
2576
-
2577
- return tree;
2578
-
2579
- parse_dup_op_espace:
2580
- *err = REG_ESPACE;
2581
- return NULL;
2582
- }
2583
-
2584
- /* Size of the names for collating symbol/equivalence_class/character_class.
2585
- I'm not sure, but maybe enough. */
2586
- #define BRACKET_NAME_BUF_SIZE 32
2587
-
2588
- #ifndef _LIBC
2589
- /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
2590
- Build the range expression which starts from START_ELEM, and ends
2591
- at END_ELEM. The result are written to MBCSET and SBCSET.
2592
- RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2593
- mbcset->range_ends, is a pointer argument sinse we may
2594
- update it. */
2595
-
2596
- static reg_errcode_t
2597
- internal_function
2598
- # ifdef RE_ENABLE_I18N
2599
- build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
2600
- bracket_elem_t *start_elem, bracket_elem_t *end_elem)
2601
- # else /* not RE_ENABLE_I18N */
2602
- build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
2603
- bracket_elem_t *end_elem)
2604
- # endif /* not RE_ENABLE_I18N */
2605
- {
2606
- unsigned int start_ch, end_ch;
2607
- /* Equivalence Classes and Character Classes can't be a range start/end. */
2608
- if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
2609
- || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
2610
- 0))
2611
- return REG_ERANGE;
2612
-
2613
- /* We can handle no multi character collating elements without libc
2614
- support. */
2615
- if (BE ((start_elem->type == COLL_SYM
2616
- && strlen ((char *) start_elem->opr.name) > 1)
2617
- || (end_elem->type == COLL_SYM
2618
- && strlen ((char *) end_elem->opr.name) > 1), 0))
2619
- return REG_ECOLLATE;
2620
-
2621
- # ifdef RE_ENABLE_I18N
2622
- {
2623
- wchar_t wc;
2624
- wint_t start_wc;
2625
- wint_t end_wc;
2626
- wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
2627
-
2628
- start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
2629
- : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2630
- : 0));
2631
- end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
2632
- : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2633
- : 0));
2634
- #ifdef GAWK
2635
- /*
2636
- * Fedora Core 2, maybe others, have broken `btowc' that returns -1
2637
- * for any value > 127. Sigh. Note that `start_ch' and `end_ch' are
2638
- * unsigned, so we don't have sign extension problems.
2639
- */
2640
- start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
2641
- ? start_ch : start_elem->opr.wch);
2642
- end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
2643
- ? end_ch : end_elem->opr.wch);
2644
- #else
2645
- start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
2646
- ? __btowc (start_ch) : start_elem->opr.wch);
2647
- end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
2648
- ? __btowc (end_ch) : end_elem->opr.wch);
2649
- #endif
2650
- if (start_wc == WEOF || end_wc == WEOF)
2651
- return REG_ECOLLATE;
2652
- cmp_buf[0] = start_wc;
2653
- cmp_buf[4] = end_wc;
2654
- if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
2655
- return REG_ERANGE;
2656
-
2657
- /* Got valid collation sequence values, add them as a new entry.
2658
- However, for !_LIBC we have no collation elements: if the
2659
- character set is single byte, the single byte character set
2660
- that we build below suffices. parse_bracket_exp passes
2661
- no MBCSET if dfa->mb_cur_max == 1. */
2662
- if (mbcset)
2663
- {
2664
- /* Check the space of the arrays. */
2665
- if (BE (*range_alloc == mbcset->nranges, 0))
2666
- {
2667
- /* There is not enough space, need realloc. */
2668
- wchar_t *new_array_start, *new_array_end;
2669
- int new_nranges;
2670
-
2671
- /* +1 in case of mbcset->nranges is 0. */
2672
- new_nranges = 2 * mbcset->nranges + 1;
2673
- /* Use realloc since mbcset->range_starts and mbcset->range_ends
2674
- are NULL if *range_alloc == 0. */
2675
- new_array_start = re_realloc (mbcset->range_starts, wchar_t,
2676
- new_nranges);
2677
- new_array_end = re_realloc (mbcset->range_ends, wchar_t,
2678
- new_nranges);
2679
-
2680
- if (BE (new_array_start == NULL || new_array_end == NULL, 0))
2681
- return REG_ESPACE;
2682
-
2683
- mbcset->range_starts = new_array_start;
2684
- mbcset->range_ends = new_array_end;
2685
- *range_alloc = new_nranges;
2686
- }
2687
-
2688
- mbcset->range_starts[mbcset->nranges] = start_wc;
2689
- mbcset->range_ends[mbcset->nranges++] = end_wc;
2690
- }
2691
-
2692
- /* Build the table for single byte characters. */
2693
- for (wc = 0; wc < SBC_MAX; ++wc)
2694
- {
2695
- cmp_buf[2] = wc;
2696
- if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
2697
- && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
2698
- bitset_set (sbcset, wc);
2699
- }
2700
- }
2701
- # else /* not RE_ENABLE_I18N */
2702
- {
2703
- unsigned int ch;
2704
- start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
2705
- : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2706
- : 0));
2707
- end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
2708
- : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2709
- : 0));
2710
- if (start_ch > end_ch)
2711
- return REG_ERANGE;
2712
- /* Build the table for single byte characters. */
2713
- for (ch = 0; ch < SBC_MAX; ++ch)
2714
- if (start_ch <= ch && ch <= end_ch)
2715
- bitset_set (sbcset, ch);
2716
- }
2717
- # endif /* not RE_ENABLE_I18N */
2718
- return REG_NOERROR;
2719
- }
2720
- #endif /* not _LIBC */
2721
-
2722
- #ifndef _LIBC
2723
- /* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
2724
- Build the collating element which is represented by NAME.
2725
- The result are written to MBCSET and SBCSET.
2726
- COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2727
- pointer argument since we may update it. */
2728
-
2729
- static reg_errcode_t
2730
- internal_function
2731
- # ifdef RE_ENABLE_I18N
2732
- build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
2733
- int *coll_sym_alloc, const unsigned char *name)
2734
- # else /* not RE_ENABLE_I18N */
2735
- build_collating_symbol (bitset_t sbcset, const unsigned char *name)
2736
- # endif /* not RE_ENABLE_I18N */
2737
- {
2738
- size_t name_len = strlen ((const char *) name);
2739
- if (BE (name_len != 1, 0))
2740
- return REG_ECOLLATE;
2741
- else
2742
- {
2743
- bitset_set (sbcset, name[0]);
2744
- return REG_NOERROR;
2745
- }
2746
- }
2747
- #endif /* not _LIBC */
2748
-
2749
- /* This function parse bracket expression like "[abc]", "[a-c]",
2750
- "[[.a-a.]]" etc. */
2751
-
2752
- static bin_tree_t *
2753
- parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
2754
- reg_syntax_t syntax, reg_errcode_t *err)
2755
- {
2756
- #ifdef _LIBC
2757
- const unsigned char *collseqmb;
2758
- const char *collseqwc;
2759
- uint32_t nrules;
2760
- int32_t table_size;
2761
- const int32_t *symb_table;
2762
- const unsigned char *extra;
2763
-
2764
- /* Local function for parse_bracket_exp used in _LIBC environement.
2765
- Seek the collating symbol entry correspondings to NAME.
2766
- Return the index of the symbol in the SYMB_TABLE. */
2767
-
2768
- auto inline int32_t
2769
- __attribute ((always_inline))
2770
- seek_collating_symbol_entry (name, name_len)
2771
- const unsigned char *name;
2772
- size_t name_len;
2773
- {
2774
- int32_t hash = elem_hash ((const char *) name, name_len);
2775
- int32_t elem = hash % table_size;
2776
- if (symb_table[2 * elem] != 0)
2777
- {
2778
- int32_t second = hash % (table_size - 2) + 1;
2779
-
2780
- do
2781
- {
2782
- /* First compare the hashing value. */
2783
- if (symb_table[2 * elem] == hash
2784
- /* Compare the length of the name. */
2785
- && name_len == extra[symb_table[2 * elem + 1]]
2786
- /* Compare the name. */
2787
- && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
2788
- name_len) == 0)
2789
- {
2790
- /* Yep, this is the entry. */
2791
- break;
2792
- }
2793
-
2794
- /* Next entry. */
2795
- elem += second;
2796
- }
2797
- while (symb_table[2 * elem] != 0);
2798
- }
2799
- return elem;
2800
- }
2801
-
2802
- /* Local function for parse_bracket_exp used in _LIBC environment.
2803
- Look up the collation sequence value of BR_ELEM.
2804
- Return the value if succeeded, UINT_MAX otherwise. */
2805
-
2806
- auto inline unsigned int
2807
- __attribute ((always_inline))
2808
- lookup_collation_sequence_value (br_elem)
2809
- bracket_elem_t *br_elem;
2810
- {
2811
- if (br_elem->type == SB_CHAR)
2812
- {
2813
- /*
2814
- if (MB_CUR_MAX == 1)
2815
- */
2816
- if (nrules == 0)
2817
- return collseqmb[br_elem->opr.ch];
2818
- else
2819
- {
2820
- wint_t wc = __btowc (br_elem->opr.ch);
2821
- return __collseq_table_lookup (collseqwc, wc);
2822
- }
2823
- }
2824
- else if (br_elem->type == MB_CHAR)
2825
- {
2826
- if (nrules != 0)
2827
- return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
2828
- }
2829
- else if (br_elem->type == COLL_SYM)
2830
- {
2831
- size_t sym_name_len = strlen ((char *) br_elem->opr.name);
2832
- if (nrules != 0)
2833
- {
2834
- int32_t elem, idx;
2835
- elem = seek_collating_symbol_entry (br_elem->opr.name,
2836
- sym_name_len);
2837
- if (symb_table[2 * elem] != 0)
2838
- {
2839
- /* We found the entry. */
2840
- idx = symb_table[2 * elem + 1];
2841
- /* Skip the name of collating element name. */
2842
- idx += 1 + extra[idx];
2843
- /* Skip the byte sequence of the collating element. */
2844
- idx += 1 + extra[idx];
2845
- /* Adjust for the alignment. */
2846
- idx = (idx + 3) & ~3;
2847
- /* Skip the multibyte collation sequence value. */
2848
- idx += sizeof (unsigned int);
2849
- /* Skip the wide char sequence of the collating element. */
2850
- idx += sizeof (unsigned int) *
2851
- (1 + *(unsigned int *) (extra + idx));
2852
- /* Return the collation sequence value. */
2853
- return *(unsigned int *) (extra + idx);
2854
- }
2855
- else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
2856
- {
2857
- /* No valid character. Match it as a single byte
2858
- character. */
2859
- return collseqmb[br_elem->opr.name[0]];
2860
- }
2861
- }
2862
- else if (sym_name_len == 1)
2863
- return collseqmb[br_elem->opr.name[0]];
2864
- }
2865
- return UINT_MAX;
2866
- }
2867
-
2868
- /* Local function for parse_bracket_exp used in _LIBC environement.
2869
- Build the range expression which starts from START_ELEM, and ends
2870
- at END_ELEM. The result are written to MBCSET and SBCSET.
2871
- RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2872
- mbcset->range_ends, is a pointer argument sinse we may
2873
- update it. */
2874
-
2875
- auto inline reg_errcode_t
2876
- __attribute ((always_inline))
2877
- build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
2878
- re_charset_t *mbcset;
2879
- int *range_alloc;
2880
- bitset_t sbcset;
2881
- bracket_elem_t *start_elem, *end_elem;
2882
- {
2883
- unsigned int ch;
2884
- uint32_t start_collseq;
2885
- uint32_t end_collseq;
2886
-
2887
- /* Equivalence Classes and Character Classes can't be a range
2888
- start/end. */
2889
- if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
2890
- || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
2891
- 0))
2892
- return REG_ERANGE;
2893
-
2894
- start_collseq = lookup_collation_sequence_value (start_elem);
2895
- end_collseq = lookup_collation_sequence_value (end_elem);
2896
- /* Check start/end collation sequence values. */
2897
- if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
2898
- return REG_ECOLLATE;
2899
- if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
2900
- return REG_ERANGE;
2901
-
2902
- /* Got valid collation sequence values, add them as a new entry.
2903
- However, if we have no collation elements, and the character set
2904
- is single byte, the single byte character set that we
2905
- build below suffices. */
2906
- if (nrules > 0 || dfa->mb_cur_max > 1)
2907
- {
2908
- /* Check the space of the arrays. */
2909
- if (BE (*range_alloc == mbcset->nranges, 0))
2910
- {
2911
- /* There is not enough space, need realloc. */
2912
- uint32_t *new_array_start;
2913
- uint32_t *new_array_end;
2914
- int new_nranges;
2915
-
2916
- /* +1 in case of mbcset->nranges is 0. */
2917
- new_nranges = 2 * mbcset->nranges + 1;
2918
- new_array_start = re_realloc (mbcset->range_starts, uint32_t,
2919
- new_nranges);
2920
- new_array_end = re_realloc (mbcset->range_ends, uint32_t,
2921
- new_nranges);
2922
-
2923
- if (BE (new_array_start == NULL || new_array_end == NULL, 0))
2924
- return REG_ESPACE;
2925
-
2926
- mbcset->range_starts = new_array_start;
2927
- mbcset->range_ends = new_array_end;
2928
- *range_alloc = new_nranges;
2929
- }
2930
-
2931
- mbcset->range_starts[mbcset->nranges] = start_collseq;
2932
- mbcset->range_ends[mbcset->nranges++] = end_collseq;
2933
- }
2934
-
2935
- /* Build the table for single byte characters. */
2936
- for (ch = 0; ch < SBC_MAX; ch++)
2937
- {
2938
- uint32_t ch_collseq;
2939
- /*
2940
- if (MB_CUR_MAX == 1)
2941
- */
2942
- if (nrules == 0)
2943
- ch_collseq = collseqmb[ch];
2944
- else
2945
- ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
2946
- if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
2947
- bitset_set (sbcset, ch);
2948
- }
2949
- return REG_NOERROR;
2950
- }
2951
-
2952
- /* Local function for parse_bracket_exp used in _LIBC environement.
2953
- Build the collating element which is represented by NAME.
2954
- The result are written to MBCSET and SBCSET.
2955
- COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2956
- pointer argument sinse we may update it. */
2957
-
2958
- auto inline reg_errcode_t
2959
- __attribute ((always_inline))
2960
- build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
2961
- re_charset_t *mbcset;
2962
- int *coll_sym_alloc;
2963
- bitset_t sbcset;
2964
- const unsigned char *name;
2965
- {
2966
- int32_t elem, idx;
2967
- size_t name_len = strlen ((const char *) name);
2968
- if (nrules != 0)
2969
- {
2970
- elem = seek_collating_symbol_entry (name, name_len);
2971
- if (symb_table[2 * elem] != 0)
2972
- {
2973
- /* We found the entry. */
2974
- idx = symb_table[2 * elem + 1];
2975
- /* Skip the name of collating element name. */
2976
- idx += 1 + extra[idx];
2977
- }
2978
- else if (symb_table[2 * elem] == 0 && name_len == 1)
2979
- {
2980
- /* No valid character, treat it as a normal
2981
- character. */
2982
- bitset_set (sbcset, name[0]);
2983
- return REG_NOERROR;
2984
- }
2985
- else
2986
- return REG_ECOLLATE;
2987
-
2988
- /* Got valid collation sequence, add it as a new entry. */
2989
- /* Check the space of the arrays. */
2990
- if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
2991
- {
2992
- /* Not enough, realloc it. */
2993
- /* +1 in case of mbcset->ncoll_syms is 0. */
2994
- int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
2995
- /* Use realloc since mbcset->coll_syms is NULL
2996
- if *alloc == 0. */
2997
- int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
2998
- new_coll_sym_alloc);
2999
- if (BE (new_coll_syms == NULL, 0))
3000
- return REG_ESPACE;
3001
- mbcset->coll_syms = new_coll_syms;
3002
- *coll_sym_alloc = new_coll_sym_alloc;
3003
- }
3004
- mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
3005
- return REG_NOERROR;
3006
- }
3007
- else
3008
- {
3009
- if (BE (name_len != 1, 0))
3010
- return REG_ECOLLATE;
3011
- else
3012
- {
3013
- bitset_set (sbcset, name[0]);
3014
- return REG_NOERROR;
3015
- }
3016
- }
3017
- }
3018
- #endif
3019
-
3020
- re_token_t br_token;
3021
- re_bitset_ptr_t sbcset;
3022
- #ifdef RE_ENABLE_I18N
3023
- re_charset_t *mbcset;
3024
- int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
3025
- int equiv_class_alloc = 0, char_class_alloc = 0;
3026
- #endif /* not RE_ENABLE_I18N */
3027
- int non_match = 0;
3028
- bin_tree_t *work_tree;
3029
- int token_len;
3030
- int first_round = 1;
3031
- #ifdef _LIBC
3032
- collseqmb = (const unsigned char *)
3033
- _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3034
- nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3035
- if (nrules)
3036
- {
3037
- /*
3038
- if (MB_CUR_MAX > 1)
3039
- */
3040
- collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3041
- table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
3042
- symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3043
- _NL_COLLATE_SYMB_TABLEMB);
3044
- extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3045
- _NL_COLLATE_SYMB_EXTRAMB);
3046
- }
3047
- #endif
3048
- sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
3049
- #ifdef RE_ENABLE_I18N
3050
- mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3051
- #endif /* RE_ENABLE_I18N */
3052
- #ifdef RE_ENABLE_I18N
3053
- if (BE (sbcset == NULL || mbcset == NULL, 0))
3054
- #else
3055
- if (BE (sbcset == NULL, 0))
3056
- #endif /* RE_ENABLE_I18N */
3057
- {
3058
- *err = REG_ESPACE;
3059
- return NULL;
3060
- }
3061
-
3062
- token_len = peek_token_bracket (token, regexp, syntax);
3063
- if (BE (token->type == END_OF_RE, 0))
3064
- {
3065
- *err = REG_BADPAT;
3066
- goto parse_bracket_exp_free_return;
3067
- }
3068
- if (token->type == OP_NON_MATCH_LIST)
3069
- {
3070
- #ifdef RE_ENABLE_I18N
3071
- mbcset->non_match = 1;
3072
- #endif /* not RE_ENABLE_I18N */
3073
- non_match = 1;
3074
- if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
3075
- bitset_set (sbcset, '\n');
3076
- re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3077
- token_len = peek_token_bracket (token, regexp, syntax);
3078
- if (BE (token->type == END_OF_RE, 0))
3079
- {
3080
- *err = REG_BADPAT;
3081
- goto parse_bracket_exp_free_return;
3082
- }
3083
- }
3084
-
3085
- /* We treat the first ']' as a normal character. */
3086
- if (token->type == OP_CLOSE_BRACKET)
3087
- token->type = CHARACTER;
3088
-
3089
- while (1)
3090
- {
3091
- bracket_elem_t start_elem, end_elem;
3092
- unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
3093
- unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
3094
- reg_errcode_t ret;
3095
- int token_len2 = 0, is_range_exp = 0;
3096
- re_token_t token2;
3097
-
3098
- start_elem.opr.name = start_name_buf;
3099
- ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
3100
- syntax, first_round);
3101
- if (BE (ret != REG_NOERROR, 0))
3102
- {
3103
- *err = ret;
3104
- goto parse_bracket_exp_free_return;
3105
- }
3106
- first_round = 0;
3107
-
3108
- /* Get information about the next token. We need it in any case. */
3109
- token_len = peek_token_bracket (token, regexp, syntax);
3110
-
3111
- /* Do not check for ranges if we know they are not allowed. */
3112
- if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
3113
- {
3114
- if (BE (token->type == END_OF_RE, 0))
3115
- {
3116
- *err = REG_EBRACK;
3117
- goto parse_bracket_exp_free_return;
3118
- }
3119
- if (token->type == OP_CHARSET_RANGE)
3120
- {
3121
- re_string_skip_bytes (regexp, token_len); /* Skip '-'. */
3122
- token_len2 = peek_token_bracket (&token2, regexp, syntax);
3123
- if (BE (token2.type == END_OF_RE, 0))
3124
- {
3125
- *err = REG_EBRACK;
3126
- goto parse_bracket_exp_free_return;
3127
- }
3128
- if (token2.type == OP_CLOSE_BRACKET)
3129
- {
3130
- /* We treat the last '-' as a normal character. */
3131
- re_string_skip_bytes (regexp, -token_len);
3132
- token->type = CHARACTER;
3133
- }
3134
- else
3135
- is_range_exp = 1;
3136
- }
3137
- }
3138
-
3139
- if (is_range_exp == 1)
3140
- {
3141
- end_elem.opr.name = end_name_buf;
3142
- ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
3143
- dfa, syntax, 1);
3144
- if (BE (ret != REG_NOERROR, 0))
3145
- {
3146
- *err = ret;
3147
- goto parse_bracket_exp_free_return;
3148
- }
3149
-
3150
- token_len = peek_token_bracket (token, regexp, syntax);
3151
-
3152
- #ifdef _LIBC
3153
- *err = build_range_exp (sbcset, mbcset, &range_alloc,
3154
- &start_elem, &end_elem);
3155
- #else
3156
- # ifdef RE_ENABLE_I18N
3157
- *err = build_range_exp (sbcset,
3158
- dfa->mb_cur_max > 1 ? mbcset : NULL,
3159
- &range_alloc, &start_elem, &end_elem);
3160
- # else
3161
- *err = build_range_exp (sbcset, &start_elem, &end_elem);
3162
- # endif
3163
- #endif /* RE_ENABLE_I18N */
3164
- if (BE (*err != REG_NOERROR, 0))
3165
- goto parse_bracket_exp_free_return;
3166
- }
3167
- else
3168
- {
3169
- switch (start_elem.type)
3170
- {
3171
- case SB_CHAR:
3172
- bitset_set (sbcset, start_elem.opr.ch);
3173
- break;
3174
- #ifdef RE_ENABLE_I18N
3175
- case MB_CHAR:
3176
- /* Check whether the array has enough space. */
3177
- if (BE (mbchar_alloc == mbcset->nmbchars, 0))
3178
- {
3179
- wchar_t *new_mbchars;
3180
- /* Not enough, realloc it. */
3181
- /* +1 in case of mbcset->nmbchars is 0. */
3182
- mbchar_alloc = 2 * mbcset->nmbchars + 1;
3183
- /* Use realloc since array is NULL if *alloc == 0. */
3184
- new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
3185
- mbchar_alloc);
3186
- if (BE (new_mbchars == NULL, 0))
3187
- goto parse_bracket_exp_espace;
3188
- mbcset->mbchars = new_mbchars;
3189
- }
3190
- mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
3191
- break;
3192
- #endif /* RE_ENABLE_I18N */
3193
- case EQUIV_CLASS:
3194
- *err = build_equiv_class (sbcset,
3195
- #ifdef RE_ENABLE_I18N
3196
- mbcset, &equiv_class_alloc,
3197
- #endif /* RE_ENABLE_I18N */
3198
- start_elem.opr.name);
3199
- if (BE (*err != REG_NOERROR, 0))
3200
- goto parse_bracket_exp_free_return;
3201
- break;
3202
- case COLL_SYM:
3203
- *err = build_collating_symbol (sbcset,
3204
- #ifdef RE_ENABLE_I18N
3205
- mbcset, &coll_sym_alloc,
3206
- #endif /* RE_ENABLE_I18N */
3207
- start_elem.opr.name);
3208
- if (BE (*err != REG_NOERROR, 0))
3209
- goto parse_bracket_exp_free_return;
3210
- break;
3211
- case CHAR_CLASS:
3212
- *err = build_charclass (regexp->trans, sbcset,
3213
- #ifdef RE_ENABLE_I18N
3214
- mbcset, &char_class_alloc,
3215
- #endif /* RE_ENABLE_I18N */
3216
- (const char *) start_elem.opr.name, syntax);
3217
- if (BE (*err != REG_NOERROR, 0))
3218
- goto parse_bracket_exp_free_return;
3219
- break;
3220
- default:
3221
- assert (0);
3222
- break;
3223
- }
3224
- }
3225
- if (BE (token->type == END_OF_RE, 0))
3226
- {
3227
- *err = REG_EBRACK;
3228
- goto parse_bracket_exp_free_return;
3229
- }
3230
- if (token->type == OP_CLOSE_BRACKET)
3231
- break;
3232
- }
3233
-
3234
- re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3235
-
3236
- /* If it is non-matching list. */
3237
- if (non_match)
3238
- bitset_not (sbcset);
3239
-
3240
- #ifdef RE_ENABLE_I18N
3241
- /* Ensure only single byte characters are set. */
3242
- if (dfa->mb_cur_max > 1)
3243
- bitset_mask (sbcset, dfa->sb_char);
3244
-
3245
- if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
3246
- || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
3247
- || mbcset->non_match)))
3248
- {
3249
- bin_tree_t *mbc_tree;
3250
- int sbc_idx;
3251
- /* Build a tree for complex bracket. */
3252
- dfa->has_mb_node = 1;
3253
- br_token.type = COMPLEX_BRACKET;
3254
- br_token.opr.mbcset = mbcset;
3255
- mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3256
- if (BE (mbc_tree == NULL, 0))
3257
- goto parse_bracket_exp_espace;
3258
- for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
3259
- if (sbcset[sbc_idx])
3260
- break;
3261
- /* If there are no bits set in sbcset, there is no point
3262
- of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
3263
- if (sbc_idx < BITSET_WORDS)
3264
- {
3265
- /* Build a tree for simple bracket. */
3266
- br_token.type = SIMPLE_BRACKET;
3267
- br_token.opr.sbcset = sbcset;
3268
- work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3269
- if (BE (work_tree == NULL, 0))
3270
- goto parse_bracket_exp_espace;
3271
-
3272
- /* Then join them by ALT node. */
3273
- work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
3274
- if (BE (work_tree == NULL, 0))
3275
- goto parse_bracket_exp_espace;
3276
- }
3277
- else
3278
- {
3279
- re_free (sbcset);
3280
- work_tree = mbc_tree;
3281
- }
3282
- }
3283
- else
3284
- #endif /* not RE_ENABLE_I18N */
3285
- {
3286
- #ifdef RE_ENABLE_I18N
3287
- free_charset (mbcset);
3288
- #endif
3289
- /* Build a tree for simple bracket. */
3290
- br_token.type = SIMPLE_BRACKET;
3291
- br_token.opr.sbcset = sbcset;
3292
- work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3293
- if (BE (work_tree == NULL, 0))
3294
- goto parse_bracket_exp_espace;
3295
- }
3296
- return work_tree;
3297
-
3298
- parse_bracket_exp_espace:
3299
- *err = REG_ESPACE;
3300
- parse_bracket_exp_free_return:
3301
- re_free (sbcset);
3302
- #ifdef RE_ENABLE_I18N
3303
- free_charset (mbcset);
3304
- #endif /* RE_ENABLE_I18N */
3305
- return NULL;
3306
- }
3307
-
3308
- /* Parse an element in the bracket expression. */
3309
-
3310
- static reg_errcode_t
3311
- parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
3312
- re_token_t *token, int token_len, UNUSED re_dfa_t *dfa,
3313
- reg_syntax_t syntax, int accept_hyphen)
3314
- {
3315
- #ifdef RE_ENABLE_I18N
3316
- int cur_char_size;
3317
- cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
3318
- if (cur_char_size > 1)
3319
- {
3320
- elem->type = MB_CHAR;
3321
- elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
3322
- re_string_skip_bytes (regexp, cur_char_size);
3323
- return REG_NOERROR;
3324
- }
3325
- #endif /* RE_ENABLE_I18N */
3326
- re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3327
- if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
3328
- || token->type == OP_OPEN_EQUIV_CLASS)
3329
- return parse_bracket_symbol (elem, regexp, token);
3330
- if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
3331
- {
3332
- /* A '-' must only appear as anything but a range indicator before
3333
- the closing bracket. Everything else is an error. */
3334
- re_token_t token2;
3335
- (void) peek_token_bracket (&token2, regexp, syntax);
3336
- if (token2.type != OP_CLOSE_BRACKET)
3337
- /* The actual error value is not standardized since this whole
3338
- case is undefined. But ERANGE makes good sense. */
3339
- return REG_ERANGE;
3340
- }
3341
- elem->type = SB_CHAR;
3342
- elem->opr.ch = token->opr.c;
3343
- return REG_NOERROR;
3344
- }
3345
-
3346
- /* Parse a bracket symbol in the bracket expression. Bracket symbols are
3347
- such as [:<character_class>:], [.<collating_element>.], and
3348
- [=<equivalent_class>=]. */
3349
-
3350
- static reg_errcode_t
3351
- parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
3352
- re_token_t *token)
3353
- {
3354
- unsigned char ch, delim = token->opr.c;
3355
- int i = 0;
3356
- if (re_string_eoi(regexp))
3357
- return REG_EBRACK;
3358
- for (;; ++i)
3359
- {
3360
- if (i >= BRACKET_NAME_BUF_SIZE)
3361
- return REG_EBRACK;
3362
- if (token->type == OP_OPEN_CHAR_CLASS)
3363
- ch = re_string_fetch_byte_case (regexp);
3364
- else
3365
- ch = re_string_fetch_byte (regexp);
3366
- if (re_string_eoi(regexp))
3367
- return REG_EBRACK;
3368
- if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
3369
- break;
3370
- elem->opr.name[i] = ch;
3371
- }
3372
- re_string_skip_bytes (regexp, 1);
3373
- elem->opr.name[i] = '\0';
3374
- switch (token->type)
3375
- {
3376
- case OP_OPEN_COLL_ELEM:
3377
- elem->type = COLL_SYM;
3378
- break;
3379
- case OP_OPEN_EQUIV_CLASS:
3380
- elem->type = EQUIV_CLASS;
3381
- break;
3382
- case OP_OPEN_CHAR_CLASS:
3383
- elem->type = CHAR_CLASS;
3384
- break;
3385
- default:
3386
- break;
3387
- }
3388
- return REG_NOERROR;
3389
- }
3390
-
3391
- /* Helper function for parse_bracket_exp.
3392
- Build the equivalence class which is represented by NAME.
3393
- The result are written to MBCSET and SBCSET.
3394
- EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
3395
- is a pointer argument sinse we may update it. */
3396
-
3397
- static reg_errcode_t
3398
- #ifdef RE_ENABLE_I18N
3399
- build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
3400
- int *equiv_class_alloc, const unsigned char *name)
3401
- #else /* not RE_ENABLE_I18N */
3402
- build_equiv_class (bitset_t sbcset, const unsigned char *name)
3403
- #endif /* not RE_ENABLE_I18N */
3404
- {
3405
- #ifdef _LIBC
3406
- uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3407
- if (nrules != 0)
3408
- {
3409
- const int32_t *table, *indirect;
3410
- const unsigned char *weights, *extra, *cp;
3411
- unsigned char char_buf[2];
3412
- int32_t idx1, idx2;
3413
- unsigned int ch;
3414
- size_t len;
3415
- /* This #include defines a local function! */
3416
- # include <locale/weight.h>
3417
- /* Calculate the index for equivalence class. */
3418
- cp = name;
3419
- table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3420
- weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3421
- _NL_COLLATE_WEIGHTMB);
3422
- extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3423
- _NL_COLLATE_EXTRAMB);
3424
- indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3425
- _NL_COLLATE_INDIRECTMB);
3426
- idx1 = findidx (&cp);
3427
- if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
3428
- /* This isn't a valid character. */
3429
- return REG_ECOLLATE;
3430
-
3431
- /* Build single byte matcing table for this equivalence class. */
3432
- char_buf[1] = (unsigned char) '\0';
3433
- len = weights[idx1 & 0xffffff];
3434
- for (ch = 0; ch < SBC_MAX; ++ch)
3435
- {
3436
- char_buf[0] = ch;
3437
- cp = char_buf;
3438
- idx2 = findidx (&cp);
3439
- /*
3440
- idx2 = table[ch];
3441
- */
3442
- if (idx2 == 0)
3443
- /* This isn't a valid character. */
3444
- continue;
3445
- /* Compare only if the length matches and the collation rule
3446
- index is the same. */
3447
- if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24))
3448
- {
3449
- int cnt = 0;
3450
-
3451
- while (cnt <= len &&
3452
- weights[(idx1 & 0xffffff) + 1 + cnt]
3453
- == weights[(idx2 & 0xffffff) + 1 + cnt])
3454
- ++cnt;
3455
-
3456
- if (cnt > len)
3457
- bitset_set (sbcset, ch);
3458
- }
3459
- }
3460
- /* Check whether the array has enough space. */
3461
- if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
3462
- {
3463
- /* Not enough, realloc it. */
3464
- /* +1 in case of mbcset->nequiv_classes is 0. */
3465
- int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
3466
- /* Use realloc since the array is NULL if *alloc == 0. */
3467
- int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
3468
- int32_t,
3469
- new_equiv_class_alloc);
3470
- if (BE (new_equiv_classes == NULL, 0))
3471
- return REG_ESPACE;
3472
- mbcset->equiv_classes = new_equiv_classes;
3473
- *equiv_class_alloc = new_equiv_class_alloc;
3474
- }
3475
- mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
3476
- }
3477
- else
3478
- #endif /* _LIBC */
3479
- {
3480
- if (BE (strlen ((const char *) name) != 1, 0))
3481
- return REG_ECOLLATE;
3482
- bitset_set (sbcset, *name);
3483
- }
3484
- return REG_NOERROR;
3485
- }
3486
-
3487
- /* Helper function for parse_bracket_exp.
3488
- Build the character class which is represented by NAME.
3489
- The result are written to MBCSET and SBCSET.
3490
- CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
3491
- is a pointer argument sinse we may update it. */
3492
-
3493
- static reg_errcode_t
3494
- #ifdef RE_ENABLE_I18N
3495
- build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3496
- re_charset_t *mbcset, int *char_class_alloc,
3497
- const char *class_name, reg_syntax_t syntax)
3498
- #else /* not RE_ENABLE_I18N */
3499
- build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3500
- const char *class_name, reg_syntax_t syntax)
3501
- #endif /* not RE_ENABLE_I18N */
3502
- {
3503
- int i;
3504
-
3505
- /* In case of REG_ICASE "upper" and "lower" match the both of
3506
- upper and lower cases. */
3507
- if ((syntax & RE_ICASE)
3508
- && (strcmp (class_name, "upper") == 0 || strcmp (class_name, "lower") == 0))
3509
- class_name = "alpha";
3510
-
3511
- #ifdef RE_ENABLE_I18N
3512
- /* Check the space of the arrays. */
3513
- if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
3514
- {
3515
- /* Not enough, realloc it. */
3516
- /* +1 in case of mbcset->nchar_classes is 0. */
3517
- int new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
3518
- /* Use realloc since array is NULL if *alloc == 0. */
3519
- wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
3520
- new_char_class_alloc);
3521
- if (BE (new_char_classes == NULL, 0))
3522
- return REG_ESPACE;
3523
- mbcset->char_classes = new_char_classes;
3524
- *char_class_alloc = new_char_class_alloc;
3525
- }
3526
- mbcset->char_classes[mbcset->nchar_classes++] = __wctype (class_name);
3527
- #endif /* RE_ENABLE_I18N */
3528
-
3529
- #define BUILD_CHARCLASS_LOOP(ctype_func) \
3530
- do { \
3531
- if (BE (trans != NULL, 0)) \
3532
- { \
3533
- for (i = 0; i < SBC_MAX; ++i) \
3534
- if (ctype_func (i)) \
3535
- bitset_set (sbcset, trans[i]); \
3536
- } \
3537
- else \
3538
- { \
3539
- for (i = 0; i < SBC_MAX; ++i) \
3540
- if (ctype_func (i)) \
3541
- bitset_set (sbcset, i); \
3542
- } \
3543
- } while (0)
3544
-
3545
- if (strcmp (class_name, "alnum") == 0)
3546
- BUILD_CHARCLASS_LOOP (isalnum);
3547
- else if (strcmp (class_name, "cntrl") == 0)
3548
- BUILD_CHARCLASS_LOOP (iscntrl);
3549
- else if (strcmp (class_name, "lower") == 0)
3550
- BUILD_CHARCLASS_LOOP (islower);
3551
- else if (strcmp (class_name, "space") == 0)
3552
- BUILD_CHARCLASS_LOOP (isspace);
3553
- else if (strcmp (class_name, "alpha") == 0)
3554
- BUILD_CHARCLASS_LOOP (isalpha);
3555
- else if (strcmp (class_name, "digit") == 0)
3556
- BUILD_CHARCLASS_LOOP (isdigit);
3557
- else if (strcmp (class_name, "print") == 0)
3558
- BUILD_CHARCLASS_LOOP (isprint);
3559
- else if (strcmp (class_name, "upper") == 0)
3560
- BUILD_CHARCLASS_LOOP (isupper);
3561
- else if (strcmp (class_name, "blank") == 0)
3562
- #ifndef GAWK
3563
- BUILD_CHARCLASS_LOOP (isblank);
3564
- #else
3565
- /* see comments above */
3566
- BUILD_CHARCLASS_LOOP (is_blank);
3567
- #endif
3568
- else if (strcmp (class_name, "graph") == 0)
3569
- BUILD_CHARCLASS_LOOP (isgraph);
3570
- else if (strcmp (class_name, "punct") == 0)
3571
- BUILD_CHARCLASS_LOOP (ispunct);
3572
- else if (strcmp (class_name, "xdigit") == 0)
3573
- BUILD_CHARCLASS_LOOP (isxdigit);
3574
- else
3575
- return REG_ECTYPE;
3576
-
3577
- return REG_NOERROR;
3578
- }
3579
-
3580
- static bin_tree_t *
3581
- build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3582
- const char *class_name,
3583
- const char *extra, int non_match,
3584
- reg_errcode_t *err)
3585
- {
3586
- re_bitset_ptr_t sbcset;
3587
- #ifdef RE_ENABLE_I18N
3588
- re_charset_t *mbcset;
3589
- int alloc = 0;
3590
- #endif /* not RE_ENABLE_I18N */
3591
- reg_errcode_t ret;
3592
- re_token_t br_token;
3593
- bin_tree_t *tree;
3594
-
3595
- sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
3596
- #ifdef RE_ENABLE_I18N
3597
- mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3598
- #endif /* RE_ENABLE_I18N */
3599
-
3600
- #ifdef RE_ENABLE_I18N
3601
- if (BE (sbcset == NULL || mbcset == NULL, 0))
3602
- #else /* not RE_ENABLE_I18N */
3603
- if (BE (sbcset == NULL, 0))
3604
- #endif /* not RE_ENABLE_I18N */
3605
- {
3606
- *err = REG_ESPACE;
3607
- return NULL;
3608
- }
3609
-
3610
- if (non_match)
3611
- {
3612
- #ifdef RE_ENABLE_I18N
3613
- mbcset->non_match = 1;
3614
- #endif /* not RE_ENABLE_I18N */
3615
- }
3616
-
3617
- /* We don't care the syntax in this case. */
3618
- ret = build_charclass (trans, sbcset,
3619
- #ifdef RE_ENABLE_I18N
3620
- mbcset, &alloc,
3621
- #endif /* RE_ENABLE_I18N */
3622
- class_name, 0);
3623
-
3624
- if (BE (ret != REG_NOERROR, 0))
3625
- {
3626
- re_free (sbcset);
3627
- #ifdef RE_ENABLE_I18N
3628
- free_charset (mbcset);
3629
- #endif /* RE_ENABLE_I18N */
3630
- *err = ret;
3631
- return NULL;
3632
- }
3633
- /* \w match '_' also. */
3634
- for (; *extra; extra++)
3635
- bitset_set (sbcset, *extra);
3636
-
3637
- /* If it is non-matching list. */
3638
- if (non_match)
3639
- bitset_not (sbcset);
3640
-
3641
- #ifdef RE_ENABLE_I18N
3642
- /* Ensure only single byte characters are set. */
3643
- if (dfa->mb_cur_max > 1)
3644
- bitset_mask (sbcset, dfa->sb_char);
3645
- #endif
3646
-
3647
- /* Build a tree for simple bracket. */
3648
- br_token.type = SIMPLE_BRACKET;
3649
- br_token.opr.sbcset = sbcset;
3650
- tree = create_token_tree (dfa, NULL, NULL, &br_token);
3651
- if (BE (tree == NULL, 0))
3652
- goto build_word_op_espace;
3653
-
3654
- #ifdef RE_ENABLE_I18N
3655
- if (dfa->mb_cur_max > 1)
3656
- {
3657
- bin_tree_t *mbc_tree;
3658
- /* Build a tree for complex bracket. */
3659
- br_token.type = COMPLEX_BRACKET;
3660
- br_token.opr.mbcset = mbcset;
3661
- dfa->has_mb_node = 1;
3662
- mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3663
- if (BE (mbc_tree == NULL, 0))
3664
- goto build_word_op_espace;
3665
- /* Then join them by ALT node. */
3666
- tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
3667
- if (BE (mbc_tree != NULL, 1))
3668
- return tree;
3669
- }
3670
- else
3671
- {
3672
- free_charset (mbcset);
3673
- return tree;
3674
- }
3675
- #else /* not RE_ENABLE_I18N */
3676
- return tree;
3677
- #endif /* not RE_ENABLE_I18N */
3678
-
3679
- build_word_op_espace:
3680
- re_free (sbcset);
3681
- #ifdef RE_ENABLE_I18N
3682
- free_charset (mbcset);
3683
- #endif /* RE_ENABLE_I18N */
3684
- *err = REG_ESPACE;
3685
- return NULL;
3686
- }
3687
-
3688
- /* This is intended for the expressions like "a{1,3}".
3689
- Fetch a number from `input', and return the number.
3690
- Return -1, if the number field is empty like "{,1}".
3691
- Return -2, If an error is occured. */
3692
-
3693
- static int
3694
- fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
3695
- {
3696
- int num = -1;
3697
- unsigned char c;
3698
- while (1)
3699
- {
3700
- fetch_token (token, input, syntax);
3701
- c = token->opr.c;
3702
- if (BE (token->type == END_OF_RE, 0))
3703
- return -2;
3704
- if (token->type == OP_CLOSE_DUP_NUM || c == ',')
3705
- break;
3706
- num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)
3707
- ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0'));
3708
- num = (num > RE_DUP_MAX) ? -2 : num;
3709
- }
3710
- return num;
3711
- }
3712
-
3713
- #ifdef RE_ENABLE_I18N
3714
- static void
3715
- free_charset (re_charset_t *cset)
3716
- {
3717
- re_free (cset->mbchars);
3718
- # ifdef _LIBC
3719
- re_free (cset->coll_syms);
3720
- re_free (cset->equiv_classes);
3721
- re_free (cset->range_starts);
3722
- re_free (cset->range_ends);
3723
- # endif
3724
- re_free (cset->char_classes);
3725
- re_free (cset);
3726
- }
3727
- #endif /* RE_ENABLE_I18N */
3728
-
3729
- /* Functions for binary tree operation. */
3730
-
3731
- /* Create a tree node. */
3732
-
3733
- static bin_tree_t *
3734
- create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
3735
- re_token_type_t type)
3736
- {
3737
- re_token_t t;
3738
- t.type = type;
3739
- return create_token_tree (dfa, left, right, &t);
3740
- }
3741
-
3742
- static bin_tree_t *
3743
- create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
3744
- const re_token_t *token)
3745
- {
3746
- bin_tree_t *tree;
3747
- if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
3748
- {
3749
- bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
3750
-
3751
- if (storage == NULL)
3752
- return NULL;
3753
- storage->next = dfa->str_tree_storage;
3754
- dfa->str_tree_storage = storage;
3755
- dfa->str_tree_storage_idx = 0;
3756
- }
3757
- tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
3758
-
3759
- tree->parent = NULL;
3760
- tree->left = left;
3761
- tree->right = right;
3762
- tree->token = *token;
3763
- tree->token.duplicated = 0;
3764
- tree->token.opt_subexp = 0;
3765
- tree->first = NULL;
3766
- tree->next = NULL;
3767
- tree->node_idx = -1;
3768
-
3769
- if (left != NULL)
3770
- left->parent = tree;
3771
- if (right != NULL)
3772
- right->parent = tree;
3773
- return tree;
3774
- }
3775
-
3776
- /* Mark the tree SRC as an optional subexpression.
3777
- To be called from preorder or postorder. */
3778
-
3779
- static reg_errcode_t
3780
- mark_opt_subexp (void *extra, bin_tree_t *node)
3781
- {
3782
- int idx = (int) (long) extra;
3783
- if (node->token.type == SUBEXP && node->token.opr.idx == idx)
3784
- node->token.opt_subexp = 1;
3785
-
3786
- return REG_NOERROR;
3787
- }
3788
-
3789
- /* Free the allocated memory inside NODE. */
3790
-
3791
- static void
3792
- free_token (re_token_t *node)
3793
- {
3794
- #ifdef RE_ENABLE_I18N
3795
- if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
3796
- free_charset (node->opr.mbcset);
3797
- else
3798
- #endif /* RE_ENABLE_I18N */
3799
- if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
3800
- re_free (node->opr.sbcset);
3801
- }
3802
-
3803
- /* Worker function for tree walking. Free the allocated memory inside NODE
3804
- and its children. */
3805
-
3806
- static reg_errcode_t
3807
- free_tree (UNUSED void *extra, bin_tree_t *node)
3808
- {
3809
- free_token (&node->token);
3810
- return REG_NOERROR;
3811
- }
3812
-
3813
-
3814
- /* Duplicate the node SRC, and return new node. This is a preorder
3815
- visit similar to the one implemented by the generic visitor, but
3816
- we need more infrastructure to maintain two parallel trees --- so,
3817
- it's easier to duplicate. */
3818
-
3819
- static bin_tree_t *
3820
- duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
3821
- {
3822
- const bin_tree_t *node;
3823
- bin_tree_t *dup_root;
3824
- bin_tree_t **p_new = &dup_root, *dup_node = root->parent;
3825
-
3826
- for (node = root; ; )
3827
- {
3828
- /* Create a new tree and link it back to the current parent. */
3829
- *p_new = create_token_tree (dfa, NULL, NULL, &node->token);
3830
- if (*p_new == NULL)
3831
- return NULL;
3832
- (*p_new)->parent = dup_node;
3833
- (*p_new)->token.duplicated = 1;
3834
- dup_node = *p_new;
3835
-
3836
- /* Go to the left node, or up and to the right. */
3837
- if (node->left)
3838
- {
3839
- node = node->left;
3840
- p_new = &dup_node->left;
3841
- }
3842
- else
3843
- {
3844
- const bin_tree_t *prev = NULL;
3845
- while (node->right == prev || node->right == NULL)
3846
- {
3847
- prev = node;
3848
- node = node->parent;
3849
- dup_node = dup_node->parent;
3850
- if (!node)
3851
- return dup_root;
3852
- }
3853
- node = node->right;
3854
- p_new = &dup_node->right;
3855
- }
3856
- }
3857
- }