glib2 3.0.7-x64-mingw32 → 3.0.8-x64-mingw32

Sign up to get free protection for your applications and to get access to all the features.
Files changed (1062) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +24 -12
  3. data/ext/glib2/extconf.rb +1 -0
  4. data/ext/glib2/glib2.def +4 -0
  5. data/ext/glib2/rbglib-variant.c +163 -12
  6. data/ext/glib2/rbglib.c +31 -8
  7. data/ext/glib2/rbglib.h +15 -1
  8. data/ext/glib2/rbglib2conversions.h +3 -0
  9. data/ext/glib2/rbglib_iochannel.c +0 -3
  10. data/ext/glib2/rbglib_iochannel_win32_socket.c +0 -1
  11. data/ext/glib2/rbglib_matchinfo.c +179 -0
  12. data/ext/glib2/rbglib_regex.c +484 -0
  13. data/ext/glib2/rbgobj_value.c +7 -1
  14. data/ext/glib2/rbgprivate.h +2 -0
  15. data/ext/glib2/rbgutil.c +7 -0
  16. data/ext/glib2/rbgutil.h +2 -0
  17. data/lib/2.2/glib2.so +0 -0
  18. data/lib/2.3/glib2.so +0 -0
  19. data/lib/glib2.rb +2 -1
  20. data/lib/glib2/regex.rb +29 -0
  21. data/lib/gnome2/rake/external-package.rb +6 -1
  22. data/lib/gnome2/rake/package-task.rb +1 -1
  23. data/lib/gnome2/rake/package.rb +9 -0
  24. data/lib/gnome2/rake/windows-binary-build-task.rb +35 -11
  25. data/lib/mkmf-gnome2.rb +3 -1
  26. data/test/test-match-info.rb +113 -0
  27. data/test/test-regex.rb +320 -0
  28. data/vendor/local/bin/asn1Coding.exe +0 -0
  29. data/vendor/local/bin/asn1Decoding.exe +0 -0
  30. data/vendor/local/bin/asn1Parser.exe +0 -0
  31. data/vendor/local/bin/envsubst.exe +0 -0
  32. data/vendor/local/bin/gdbus.exe +0 -0
  33. data/vendor/local/bin/gettext.exe +0 -0
  34. data/vendor/local/bin/gettext.sh +1 -1
  35. data/vendor/local/bin/gio-querymodules.exe +0 -0
  36. data/vendor/local/bin/glib-compile-resources.exe +0 -0
  37. data/vendor/local/bin/glib-compile-schemas.exe +0 -0
  38. data/vendor/local/bin/glib-genmarshal.exe +0 -0
  39. data/vendor/local/bin/glib-gettextize +1 -1
  40. data/vendor/local/bin/glib-mkenums +2 -2
  41. data/vendor/local/bin/gobject-query.exe +0 -0
  42. data/vendor/local/bin/gresource.exe +0 -0
  43. data/vendor/local/bin/gsettings.exe +0 -0
  44. data/vendor/local/bin/gspawn-win64-helper-console.exe +0 -0
  45. data/vendor/local/bin/gspawn-win64-helper.exe +0 -0
  46. data/vendor/local/bin/iconv.exe +0 -0
  47. data/vendor/local/bin/idn.exe +0 -0
  48. data/vendor/local/bin/libasprintf-0.dll +0 -0
  49. data/vendor/local/bin/libcharset-1.dll +0 -0
  50. data/vendor/local/bin/libffi-6.dll +0 -0
  51. data/vendor/local/bin/libgio-2.0-0.dll +0 -0
  52. data/vendor/local/bin/libglib-2.0-0.dll +0 -0
  53. data/vendor/local/bin/libgmodule-2.0-0.dll +0 -0
  54. data/vendor/local/bin/libgmp-10.dll +0 -0
  55. data/vendor/local/bin/libgnutls-30.def +993 -984
  56. data/vendor/local/bin/libgnutls-30.dll +0 -0
  57. data/vendor/local/bin/libgobject-2.0-0.dll +0 -0
  58. data/vendor/local/bin/libgthread-2.0-0.dll +0 -0
  59. data/vendor/local/bin/libhogweed-4-2.dll +0 -0
  60. data/vendor/local/bin/libiconv-2.dll +0 -0
  61. data/vendor/local/bin/libidn-11.dll +0 -0
  62. data/vendor/local/bin/libintl-8.dll +0 -0
  63. data/vendor/local/bin/{libnettle-6-1.dll → libnettle-6-2.dll} +0 -0
  64. data/vendor/local/bin/libp11-kit-0.dll +0 -0
  65. data/vendor/local/bin/libpcre-1.dll +0 -0
  66. data/vendor/local/bin/libpcrecpp-0.dll +0 -0
  67. data/vendor/local/bin/libpcreposix-0.dll +0 -0
  68. data/vendor/local/bin/libtasn1-6.dll +0 -0
  69. data/vendor/local/bin/nettle-hash.exe +0 -0
  70. data/vendor/local/bin/nettle-lfib-stream.exe +0 -0
  71. data/vendor/local/bin/nettle-pbkdf2.exe +0 -0
  72. data/vendor/local/bin/ngettext.exe +0 -0
  73. data/vendor/local/bin/p11-kit.exe +0 -0
  74. data/vendor/local/bin/pcre-config +133 -0
  75. data/vendor/local/bin/pcregrep.exe +0 -0
  76. data/vendor/local/bin/pcretest.exe +0 -0
  77. data/vendor/local/bin/pkcs1-conv.exe +0 -0
  78. data/vendor/local/bin/sexp-conv.exe +0 -0
  79. data/vendor/local/bin/trust.exe +0 -0
  80. data/vendor/local/include/glib-2.0/gio/gdatagrambased.h +144 -0
  81. data/vendor/local/include/glib-2.0/gio/gdbusconnection.h +9 -1
  82. data/vendor/local/include/glib-2.0/gio/gdtlsclientconnection.h +75 -0
  83. data/vendor/local/include/glib-2.0/gio/gdtlsconnection.h +191 -0
  84. data/vendor/local/include/glib-2.0/gio/gdtlsserverconnection.h +69 -0
  85. data/vendor/local/include/glib-2.0/gio/gfileinfo.h +14 -0
  86. data/vendor/local/include/glib-2.0/gio/gio-autocleanups.h +1 -0
  87. data/vendor/local/include/glib-2.0/gio/gio.h +4 -0
  88. data/vendor/local/include/glib-2.0/gio/gioenums.h +69 -13
  89. data/vendor/local/include/glib-2.0/gio/gioenumtypes.h +2 -0
  90. data/vendor/local/include/glib-2.0/gio/giotypes.h +77 -0
  91. data/vendor/local/include/glib-2.0/gio/gliststore.h +5 -0
  92. data/vendor/local/include/glib-2.0/gio/gnetworkmonitor.h +3 -0
  93. data/vendor/local/include/glib-2.0/gio/gsettings.h +1 -1
  94. data/vendor/local/include/glib-2.0/gio/gsettingsschema.h +3 -0
  95. data/vendor/local/include/glib-2.0/gio/gsimpleasyncresult.h +25 -25
  96. data/vendor/local/include/glib-2.0/gio/gsocket.h +7 -0
  97. data/vendor/local/include/glib-2.0/gio/gsocketconnectable.h +6 -0
  98. data/vendor/local/include/glib-2.0/gio/gsocketlistener.h +4 -1
  99. data/vendor/local/include/glib-2.0/gio/gtlsbackend.h +14 -0
  100. data/vendor/local/include/glib-2.0/gio/gtlsclientconnection.h +7 -0
  101. data/vendor/local/include/glib-2.0/glib/gasyncqueue.h +13 -0
  102. data/vendor/local/include/glib-2.0/glib/gbacktrace.h +2 -0
  103. data/vendor/local/include/glib-2.0/glib/glib-autocleanups.h +12 -4
  104. data/vendor/local/include/glib-2.0/glib/gmacros.h +46 -10
  105. data/vendor/local/include/glib-2.0/glib/gmain.h +3 -1
  106. data/vendor/local/include/glib-2.0/glib/gmem.h +7 -5
  107. data/vendor/local/include/glib-2.0/glib/gmessages.h +8 -8
  108. data/vendor/local/include/glib-2.0/glib/gnode.h +2 -2
  109. data/vendor/local/include/glib-2.0/glib/gpoll.h +2 -9
  110. data/vendor/local/include/glib-2.0/glib/gsequence.h +2 -0
  111. data/vendor/local/include/glib-2.0/glib/gstdio.h +1 -1
  112. data/vendor/local/include/glib-2.0/glib/gstrfuncs.h +1 -0
  113. data/vendor/local/include/glib-2.0/glib/gtestutils.h +15 -2
  114. data/vendor/local/include/glib-2.0/glib/gthreadpool.h +4 -0
  115. data/vendor/local/include/glib-2.0/glib/gtrashstack.h +10 -57
  116. data/vendor/local/include/glib-2.0/glib/gtypes.h +61 -6
  117. data/vendor/local/include/glib-2.0/glib/gunicode.h +15 -1
  118. data/vendor/local/include/glib-2.0/glib/gutils.h +36 -57
  119. data/vendor/local/include/glib-2.0/glib/gversionmacros.h +59 -10
  120. data/vendor/local/include/glib-2.0/gobject/gboxed.h +3 -3
  121. data/vendor/local/include/glib-2.0/gobject/gclosure.h +2 -1
  122. data/vendor/local/include/glib-2.0/gobject/glib-types.h +0 -7
  123. data/vendor/local/include/glib-2.0/gobject/gobject-autocleanups.h +0 -1
  124. data/vendor/local/include/glib-2.0/gobject/gobject.h +8 -4
  125. data/vendor/local/include/glib-2.0/gobject/gparam.h +5 -2
  126. data/vendor/local/include/glib-2.0/gobject/gsignal.h +3 -0
  127. data/vendor/local/include/glib-2.0/gobject/gtype.h +47 -26
  128. data/vendor/local/include/glib-2.0/gobject/gtypemodule.h +1 -0
  129. data/vendor/local/include/gmp.h +56 -24
  130. data/vendor/local/include/gnutls/gnutls.h +79 -39
  131. data/vendor/local/include/gnutls/pkcs11.h +6 -0
  132. data/vendor/local/include/gnutls/pkcs7.h +17 -4
  133. data/vendor/local/include/gnutls/x509.h +8 -0
  134. data/vendor/local/include/libintl.h +1 -1
  135. data/vendor/local/include/libtasn1.h +1 -1
  136. data/vendor/local/include/nettle/rsa.h +72 -0
  137. data/vendor/local/include/nettle/sha3.h +4 -0
  138. data/vendor/local/include/nettle/version.h +8 -1
  139. data/vendor/local/include/p11-kit-1/p11-kit/p11-kit.h +1 -0
  140. data/vendor/local/include/pcre.h +677 -0
  141. data/vendor/local/include/pcre_scanner.h +172 -0
  142. data/vendor/local/include/pcre_stringpiece.h +180 -0
  143. data/vendor/local/include/pcrecpp.h +710 -0
  144. data/vendor/local/include/pcrecpparg.h +174 -0
  145. data/vendor/local/include/pcreposix.h +146 -0
  146. data/vendor/local/lib/gio/modules/libgiognutls.a +0 -0
  147. data/vendor/local/lib/gio/modules/libgiognutls.dll +0 -0
  148. data/vendor/local/lib/gio/modules/libgiognutls.dll.a +0 -0
  149. data/vendor/local/lib/gio/modules/libgiognutls.la +3 -3
  150. data/vendor/local/lib/glib-2.0/include/glibconfig.h +4 -16
  151. data/vendor/local/lib/libasprintf.a +0 -0
  152. data/vendor/local/lib/libasprintf.dll.a +0 -0
  153. data/vendor/local/lib/libcharset.a +0 -0
  154. data/vendor/local/lib/libcharset.dll.a +0 -0
  155. data/vendor/local/lib/libffi.a +0 -0
  156. data/vendor/local/lib/libffi.dll.a +0 -0
  157. data/vendor/local/lib/libgio-2.0.dll.a +0 -0
  158. data/vendor/local/lib/libgio-2.0.la +5 -5
  159. data/vendor/local/lib/libglib-2.0.dll.a +0 -0
  160. data/vendor/local/lib/libglib-2.0.la +5 -5
  161. data/vendor/local/lib/libgmodule-2.0.dll.a +0 -0
  162. data/vendor/local/lib/libgmodule-2.0.la +5 -5
  163. data/vendor/local/lib/libgmp.dll.a +0 -0
  164. data/vendor/local/lib/libgmp.la +4 -4
  165. data/vendor/local/lib/libgnutls.dll.a +0 -0
  166. data/vendor/local/lib/libgnutls.la +4 -4
  167. data/vendor/local/lib/libgobject-2.0.dll.a +0 -0
  168. data/vendor/local/lib/libgobject-2.0.la +5 -5
  169. data/vendor/local/lib/libgthread-2.0.dll.a +0 -0
  170. data/vendor/local/lib/libgthread-2.0.la +5 -5
  171. data/vendor/local/lib/libhogweed.a +0 -0
  172. data/vendor/local/lib/libhogweed.dll.a +0 -0
  173. data/vendor/local/lib/libiconv.dll.a +0 -0
  174. data/vendor/local/lib/libidn.a +0 -0
  175. data/vendor/local/lib/libidn.dll.a +0 -0
  176. data/vendor/local/lib/libidn.la +1 -1
  177. data/vendor/local/lib/libintl.a +0 -0
  178. data/vendor/local/lib/libintl.dll.a +0 -0
  179. data/vendor/local/lib/libintl.la +2 -2
  180. data/vendor/local/lib/libnettle.a +0 -0
  181. data/vendor/local/lib/libnettle.dll.a +0 -0
  182. data/vendor/local/lib/libp11-kit.dll.a +0 -0
  183. data/vendor/local/lib/libp11-kit.la +1 -1
  184. data/vendor/local/lib/libpcre.a +0 -0
  185. data/vendor/local/lib/libpcre.dll.a +0 -0
  186. data/vendor/local/lib/libpcre.la +41 -0
  187. data/vendor/local/lib/libpcrecpp.a +0 -0
  188. data/vendor/local/lib/libpcrecpp.dll.a +0 -0
  189. data/vendor/local/lib/libpcrecpp.la +41 -0
  190. data/vendor/local/lib/libpcreposix.a +0 -0
  191. data/vendor/local/lib/libpcreposix.dll.a +0 -0
  192. data/vendor/local/lib/libpcreposix.la +41 -0
  193. data/vendor/local/lib/libtasn1.a +0 -0
  194. data/vendor/local/lib/libtasn1.dll.a +0 -0
  195. data/vendor/local/lib/libtasn1.la +1 -1
  196. data/vendor/local/lib/p11-kit/p11-kit-remote.exe +0 -0
  197. data/vendor/local/lib/pkcs11/p11-kit-trust.dll +0 -0
  198. data/vendor/local/lib/pkcs11/p11-kit-trust.dll.a +0 -0
  199. data/vendor/local/lib/pkcs11/p11-kit-trust.la +1 -1
  200. data/vendor/local/lib/pkgconfig/gio-2.0.pc +1 -1
  201. data/vendor/local/lib/pkgconfig/gio-windows-2.0.pc +1 -1
  202. data/vendor/local/lib/pkgconfig/glib-2.0.pc +3 -3
  203. data/vendor/local/lib/pkgconfig/gmodule-2.0.pc +1 -1
  204. data/vendor/local/lib/pkgconfig/gmodule-export-2.0.pc +1 -1
  205. data/vendor/local/lib/pkgconfig/gmodule-no-export-2.0.pc +1 -1
  206. data/vendor/local/lib/pkgconfig/gnutls.pc +2 -2
  207. data/vendor/local/lib/pkgconfig/gobject-2.0.pc +1 -1
  208. data/vendor/local/lib/pkgconfig/gthread-2.0.pc +1 -1
  209. data/vendor/local/lib/pkgconfig/hogweed.pc +1 -1
  210. data/vendor/local/lib/pkgconfig/libidn.pc +1 -1
  211. data/vendor/local/lib/pkgconfig/libpcre.pc +13 -0
  212. data/vendor/local/lib/pkgconfig/libpcrecpp.pc +12 -0
  213. data/vendor/local/lib/pkgconfig/libpcreposix.pc +13 -0
  214. data/vendor/local/lib/pkgconfig/libtasn1.pc +1 -1
  215. data/vendor/local/lib/pkgconfig/nettle.pc +1 -1
  216. data/vendor/local/lib/pkgconfig/p11-kit-1.pc +1 -1
  217. data/vendor/local/share/aclocal/glib-gettext.m4 +3 -2
  218. data/vendor/local/share/doc/pcre/AUTHORS +45 -0
  219. data/vendor/local/share/doc/pcre/COPYING +5 -0
  220. data/vendor/local/share/doc/pcre/ChangeLog +5918 -0
  221. data/vendor/local/share/doc/pcre/LICENCE +93 -0
  222. data/vendor/local/share/doc/pcre/NEWS +716 -0
  223. data/vendor/local/share/doc/pcre/README +1002 -0
  224. data/vendor/local/share/doc/pcre/html/NON-AUTOTOOLS-BUILD.txt +772 -0
  225. data/vendor/local/share/doc/pcre/html/README.txt +1002 -0
  226. data/vendor/local/share/doc/pcre/html/index.html +185 -0
  227. data/vendor/local/share/doc/pcre/html/pcre-config.html +109 -0
  228. data/vendor/local/share/doc/pcre/html/pcre.html +224 -0
  229. data/vendor/local/share/doc/pcre/html/pcre16.html +384 -0
  230. data/vendor/local/share/doc/pcre/html/pcre32.html +382 -0
  231. data/vendor/local/share/doc/pcre/html/pcre_assign_jit_stack.html +76 -0
  232. data/vendor/local/share/doc/pcre/html/pcre_compile.html +111 -0
  233. data/vendor/local/share/doc/pcre/html/pcre_compile2.html +115 -0
  234. data/vendor/local/share/doc/pcre/html/pcre_config.html +94 -0
  235. data/vendor/local/share/doc/pcre/html/pcre_copy_named_substring.html +65 -0
  236. data/vendor/local/share/doc/pcre/html/pcre_copy_substring.html +61 -0
  237. data/vendor/local/share/doc/pcre/html/pcre_dfa_exec.html +129 -0
  238. data/vendor/local/share/doc/pcre/html/pcre_exec.html +111 -0
  239. data/vendor/local/share/doc/pcre/html/pcre_free_study.html +46 -0
  240. data/vendor/local/share/doc/pcre/html/pcre_free_substring.html +46 -0
  241. data/vendor/local/share/doc/pcre/html/pcre_free_substring_list.html +46 -0
  242. data/vendor/local/share/doc/pcre/html/pcre_fullinfo.html +118 -0
  243. data/vendor/local/share/doc/pcre/html/pcre_get_named_substring.html +68 -0
  244. data/vendor/local/share/doc/pcre/html/pcre_get_stringnumber.html +57 -0
  245. data/vendor/local/share/doc/pcre/html/pcre_get_stringtable_entries.html +60 -0
  246. data/vendor/local/share/doc/pcre/html/pcre_get_substring.html +64 -0
  247. data/vendor/local/share/doc/pcre/html/pcre_get_substring_list.html +61 -0
  248. data/vendor/local/share/doc/pcre/html/pcre_jit_exec.html +108 -0
  249. data/vendor/local/share/doc/pcre/html/pcre_jit_stack_alloc.html +55 -0
  250. data/vendor/local/share/doc/pcre/html/pcre_jit_stack_free.html +48 -0
  251. data/vendor/local/share/doc/pcre/html/pcre_maketables.html +48 -0
  252. data/vendor/local/share/doc/pcre/html/pcre_pattern_to_host_byte_order.html +58 -0
  253. data/vendor/local/share/doc/pcre/html/pcre_refcount.html +51 -0
  254. data/vendor/local/share/doc/pcre/html/pcre_study.html +68 -0
  255. data/vendor/local/share/doc/pcre/html/pcre_utf16_to_host_byte_order.html +57 -0
  256. data/vendor/local/share/doc/pcre/html/pcre_utf32_to_host_byte_order.html +57 -0
  257. data/vendor/local/share/doc/pcre/html/pcre_version.html +46 -0
  258. data/vendor/local/share/doc/pcre/html/pcreapi.html +2922 -0
  259. data/vendor/local/share/doc/pcre/html/pcrebuild.html +534 -0
  260. data/vendor/local/share/doc/pcre/html/pcrecallout.html +286 -0
  261. data/vendor/local/share/doc/pcre/html/pcrecompat.html +235 -0
  262. data/vendor/local/share/doc/pcre/html/pcrecpp.html +368 -0
  263. data/vendor/local/share/doc/pcre/html/pcredemo.html +426 -0
  264. data/vendor/local/share/doc/pcre/html/pcregrep.html +759 -0
  265. data/vendor/local/share/doc/pcre/html/pcrejit.html +452 -0
  266. data/vendor/local/share/doc/pcre/html/pcrelimits.html +90 -0
  267. data/vendor/local/share/doc/pcre/html/pcrematching.html +242 -0
  268. data/vendor/local/share/doc/pcre/html/pcrepartial.html +509 -0
  269. data/vendor/local/share/doc/pcre/html/pcrepattern.html +3273 -0
  270. data/vendor/local/share/doc/pcre/html/pcreperform.html +195 -0
  271. data/vendor/local/share/doc/pcre/html/pcreposix.html +290 -0
  272. data/vendor/local/share/doc/pcre/html/pcreprecompile.html +163 -0
  273. data/vendor/local/share/doc/pcre/html/pcresample.html +110 -0
  274. data/vendor/local/share/doc/pcre/html/pcrestack.html +225 -0
  275. data/vendor/local/share/doc/pcre/html/pcresyntax.html +561 -0
  276. data/vendor/local/share/doc/pcre/html/pcretest.html +1158 -0
  277. data/vendor/local/share/doc/pcre/html/pcreunicode.html +262 -0
  278. data/vendor/local/share/doc/pcre/pcre-config.txt +86 -0
  279. data/vendor/local/share/doc/pcre/pcre.txt +10455 -0
  280. data/vendor/local/share/doc/pcre/pcregrep.txt +741 -0
  281. data/vendor/local/share/doc/pcre/pcretest.txt +1087 -0
  282. data/vendor/local/share/gdb/auto-load/{libglib-2.0.so.0.4400.1-gdb.py → home/vagrant/ruby-gnome2.win64/glib2/vendor/local/lib/libglib-2.0.so.0.4800.0-gdb.py} +0 -0
  283. data/vendor/local/share/gdb/auto-load/{libgobject-2.0.so.0.4400.1-gdb.py → home/vagrant/ruby-gnome2.win64/glib2/vendor/local/lib/libgobject-2.0.so.0.4800.0-gdb.py} +0 -0
  284. data/vendor/local/share/gettext/ABOUT-NLS +382 -413
  285. data/vendor/local/share/gettext/its/gschema.its +25 -0
  286. data/vendor/local/share/gettext/its/gschema.loc +10 -0
  287. data/vendor/local/share/glib-2.0/codegen/__init__.pyc +0 -0
  288. data/vendor/local/share/glib-2.0/codegen/__init__.pyo +0 -0
  289. data/vendor/local/share/glib-2.0/codegen/codegen.py +22 -0
  290. data/vendor/local/share/glib-2.0/codegen/codegen.pyc +0 -0
  291. data/vendor/local/share/glib-2.0/codegen/codegen.pyo +0 -0
  292. data/vendor/local/share/glib-2.0/codegen/codegen_docbook.pyc +0 -0
  293. data/vendor/local/share/glib-2.0/codegen/codegen_docbook.pyo +0 -0
  294. data/vendor/local/share/glib-2.0/codegen/codegen_main.pyc +0 -0
  295. data/vendor/local/share/glib-2.0/codegen/codegen_main.pyo +0 -0
  296. data/vendor/local/share/glib-2.0/codegen/config.py +1 -1
  297. data/vendor/local/share/glib-2.0/codegen/config.pyc +0 -0
  298. data/vendor/local/share/glib-2.0/codegen/config.pyo +0 -0
  299. data/vendor/local/share/glib-2.0/codegen/dbustypes.pyc +0 -0
  300. data/vendor/local/share/glib-2.0/codegen/dbustypes.pyo +0 -0
  301. data/vendor/local/share/glib-2.0/codegen/parser.pyc +0 -0
  302. data/vendor/local/share/glib-2.0/codegen/parser.pyo +0 -0
  303. data/vendor/local/share/glib-2.0/codegen/utils.pyc +0 -0
  304. data/vendor/local/share/glib-2.0/codegen/utils.pyo +0 -0
  305. data/vendor/local/share/glib-2.0/gdb/glib.py +4 -0
  306. data/vendor/local/share/glib-2.0/schemas/gschemas.compiled +0 -0
  307. data/vendor/local/share/gtk-doc/html/gio/GAction.html +58 -64
  308. data/vendor/local/share/gtk-doc/html/gio/GActionGroup.html +75 -80
  309. data/vendor/local/share/gtk-doc/html/gio/GActionMap.html +50 -51
  310. data/vendor/local/share/gtk-doc/html/gio/GAppInfo.html +72 -97
  311. data/vendor/local/share/gtk-doc/html/gio/GAppInfoMonitor.html +7 -8
  312. data/vendor/local/share/gtk-doc/html/gio/GApplication.html +174 -160
  313. data/vendor/local/share/gtk-doc/html/gio/GApplicationCommandLine.html +127 -132
  314. data/vendor/local/share/gtk-doc/html/gio/GAsyncInitable.html +90 -92
  315. data/vendor/local/share/gtk-doc/html/gio/GAsyncResult.html +43 -46
  316. data/vendor/local/share/gtk-doc/html/gio/GBufferedInputStream.html +17 -26
  317. data/vendor/local/share/gtk-doc/html/gio/GBufferedOutputStream.html +12 -17
  318. data/vendor/local/share/gtk-doc/html/gio/GBytesIcon.html +10 -11
  319. data/vendor/local/share/gtk-doc/html/gio/GCancellable.html +45 -52
  320. data/vendor/local/share/gtk-doc/html/gio/GCharsetConverter.html +14 -18
  321. data/vendor/local/share/gtk-doc/html/gio/GConverter.html +20 -21
  322. data/vendor/local/share/gtk-doc/html/gio/GCredentials.html +41 -48
  323. data/vendor/local/share/gtk-doc/html/gio/GDBusActionGroup.html +9 -10
  324. data/vendor/local/share/gtk-doc/html/gio/GDBusAuthObserver.html +34 -40
  325. data/vendor/local/share/gtk-doc/html/gio/GDBusConnection.html +337 -258
  326. data/vendor/local/share/gtk-doc/html/gio/GDBusInterface.html +23 -22
  327. data/vendor/local/share/gtk-doc/html/gio/GDBusInterfaceSkeleton.html +44 -50
  328. data/vendor/local/share/gtk-doc/html/gio/GDBusMenuModel.html +9 -10
  329. data/vendor/local/share/gtk-doc/html/gio/GDBusMessage.html +143 -158
  330. data/vendor/local/share/gtk-doc/html/gio/GDBusMethodInvocation.html +59 -54
  331. data/vendor/local/share/gtk-doc/html/gio/GDBusObject.html +19 -22
  332. data/vendor/local/share/gtk-doc/html/gio/GDBusObjectManager.html +25 -27
  333. data/vendor/local/share/gtk-doc/html/gio/GDBusObjectManagerClient.html +48 -52
  334. data/vendor/local/share/gtk-doc/html/gio/GDBusObjectManagerServer.html +34 -31
  335. data/vendor/local/share/gtk-doc/html/gio/GDBusObjectProxy.html +15 -17
  336. data/vendor/local/share/gtk-doc/html/gio/GDBusObjectSkeleton.html +25 -28
  337. data/vendor/local/share/gtk-doc/html/gio/GDBusProxy.html +105 -122
  338. data/vendor/local/share/gtk-doc/html/gio/GDBusServer.html +32 -39
  339. data/vendor/local/share/gtk-doc/html/gio/GDataInputStream.html +40 -51
  340. data/vendor/local/share/gtk-doc/html/gio/GDataOutputStream.html +17 -28
  341. data/vendor/local/share/gtk-doc/html/gio/GDatagramBased.html +766 -0
  342. data/vendor/local/share/gtk-doc/html/gio/GDrive.html +56 -75
  343. data/vendor/local/share/gtk-doc/html/gio/GDtlsClientConnection.html +468 -0
  344. data/vendor/local/share/gtk-doc/html/gio/GDtlsConnection.html +1521 -0
  345. data/vendor/local/share/gtk-doc/html/gio/GDtlsServerConnection.html +208 -0
  346. data/vendor/local/share/gtk-doc/html/gio/GEmblem.html +16 -19
  347. data/vendor/local/share/gtk-doc/html/gio/GEmblemedIcon.html +16 -17
  348. data/vendor/local/share/gtk-doc/html/gio/GFile.html +204 -240
  349. data/vendor/local/share/gtk-doc/html/gio/GFileDescriptorBased.html +9 -11
  350. data/vendor/local/share/gtk-doc/html/gio/GFileEnumerator.html +36 -41
  351. data/vendor/local/share/gtk-doc/html/gio/GFileIOStream.html +13 -15
  352. data/vendor/local/share/gtk-doc/html/gio/GFileIcon.html +8 -9
  353. data/vendor/local/share/gtk-doc/html/gio/GFileInfo.html +128 -145
  354. data/vendor/local/share/gtk-doc/html/gio/GFileInputStream.html +8 -9
  355. data/vendor/local/share/gtk-doc/html/gio/GFileMonitor.html +75 -36
  356. data/vendor/local/share/gtk-doc/html/gio/GFileOutputStream.html +9 -11
  357. data/vendor/local/share/gtk-doc/html/gio/GFilenameCompleter.html +10 -13
  358. data/vendor/local/share/gtk-doc/html/gio/GFilterInputStream.html +9 -11
  359. data/vendor/local/share/gtk-doc/html/gio/GFilterOutputStream.html +9 -11
  360. data/vendor/local/share/gtk-doc/html/gio/GIOModule.html +41 -43
  361. data/vendor/local/share/gtk-doc/html/gio/GIOStream.html +56 -44
  362. data/vendor/local/share/gtk-doc/html/gio/GIcon.html +20 -23
  363. data/vendor/local/share/gtk-doc/html/gio/GInetAddress.html +103 -118
  364. data/vendor/local/share/gtk-doc/html/gio/GInetAddressMask.html +39 -47
  365. data/vendor/local/share/gtk-doc/html/gio/GInetSocketAddress.html +31 -37
  366. data/vendor/local/share/gtk-doc/html/gio/GInitable.html +17 -19
  367. data/vendor/local/share/gtk-doc/html/gio/GInputStream.html +32 -44
  368. data/vendor/local/share/gtk-doc/html/gio/GListModel.html +28 -29
  369. data/vendor/local/share/gtk-doc/html/gio/GListStore.html +80 -35
  370. data/vendor/local/share/gtk-doc/html/gio/GLoadableIcon.html +10 -11
  371. data/vendor/local/share/gtk-doc/html/gio/GMemoryInputStream.html +11 -15
  372. data/vendor/local/share/gtk-doc/html/gio/GMemoryOutputStream.html +38 -38
  373. data/vendor/local/share/gtk-doc/html/gio/GMenu.html +138 -145
  374. data/vendor/local/share/gtk-doc/html/gio/GMenuModel.html +84 -94
  375. data/vendor/local/share/gtk-doc/html/gio/GMount.html +54 -66
  376. data/vendor/local/share/gtk-doc/html/gio/GMountOperation.html +34 -42
  377. data/vendor/local/share/gtk-doc/html/gio/GNetworkAddress.html +26 -28
  378. data/vendor/local/share/gtk-doc/html/gio/GNetworkMonitor.html +97 -39
  379. data/vendor/local/share/gtk-doc/html/gio/GNetworkService.html +18 -23
  380. data/vendor/local/share/gtk-doc/html/gio/GNotification.html +49 -47
  381. data/vendor/local/share/gtk-doc/html/gio/GOutputStream.html +39 -56
  382. data/vendor/local/share/gtk-doc/html/gio/GPermission.html +26 -34
  383. data/vendor/local/share/gtk-doc/html/gio/GPollableInputStream.html +16 -20
  384. data/vendor/local/share/gtk-doc/html/gio/GPollableOutputStream.html +16 -20
  385. data/vendor/local/share/gtk-doc/html/gio/GPropertyAction.html +33 -20
  386. data/vendor/local/share/gtk-doc/html/gio/GProxy.html +30 -32
  387. data/vendor/local/share/gtk-doc/html/gio/GProxyAddress.html +42 -51
  388. data/vendor/local/share/gtk-doc/html/gio/GProxyResolver.html +17 -19
  389. data/vendor/local/share/gtk-doc/html/gio/GRemoteActionGroup.html +15 -16
  390. data/vendor/local/share/gtk-doc/html/gio/GResolver.html +46 -47
  391. data/vendor/local/share/gtk-doc/html/gio/GResource.html +79 -74
  392. data/vendor/local/share/gtk-doc/html/gio/GSeekable.html +12 -18
  393. data/vendor/local/share/gtk-doc/html/gio/GSettings.html +325 -200
  394. data/vendor/local/share/gtk-doc/html/gio/GSettingsBackend.html +28 -29
  395. data/vendor/local/share/gtk-doc/html/gio/GSimpleAction.html +46 -49
  396. data/vendor/local/share/gtk-doc/html/gio/GSimpleActionGroup.html +21 -23
  397. data/vendor/local/share/gtk-doc/html/gio/GSimpleAsyncResult.html +258 -162
  398. data/vendor/local/share/gtk-doc/html/gio/GSimpleIOStream.html +11 -13
  399. data/vendor/local/share/gtk-doc/html/gio/GSimplePermission.html +7 -9
  400. data/vendor/local/share/gtk-doc/html/gio/GSimpleProxyResolver.html +16 -18
  401. data/vendor/local/share/gtk-doc/html/gio/GSocket.html +470 -281
  402. data/vendor/local/share/gtk-doc/html/gio/GSocketAddress.html +27 -32
  403. data/vendor/local/share/gtk-doc/html/gio/GSocketClient.html +76 -85
  404. data/vendor/local/share/gtk-doc/html/gio/GSocketConnectable.html +118 -72
  405. data/vendor/local/share/gtk-doc/html/gio/GSocketConnection.html +28 -33
  406. data/vendor/local/share/gtk-doc/html/gio/GSocketControlMessage.html +26 -30
  407. data/vendor/local/share/gtk-doc/html/gio/GSocketListener.html +180 -62
  408. data/vendor/local/share/gtk-doc/html/gio/GSocketService.html +51 -28
  409. data/vendor/local/share/gtk-doc/html/gio/GSrvTarget.html +25 -31
  410. data/vendor/local/share/gtk-doc/html/gio/GSubprocess.html +63 -75
  411. data/vendor/local/share/gtk-doc/html/gio/GSubprocessLauncher.html +49 -54
  412. data/vendor/local/share/gtk-doc/html/gio/GTask.html +406 -404
  413. data/vendor/local/share/gtk-doc/html/gio/GTcpConnection.html +11 -13
  414. data/vendor/local/share/gtk-doc/html/gio/GTcpWrapperConnection.html +9 -11
  415. data/vendor/local/share/gtk-doc/html/gio/GTestDBus.html +36 -38
  416. data/vendor/local/share/gtk-doc/html/gio/GThemedIcon.html +30 -31
  417. data/vendor/local/share/gtk-doc/html/gio/GThreadedSocketService.html +11 -14
  418. data/vendor/local/share/gtk-doc/html/gio/GTlsBackend.html +172 -46
  419. data/vendor/local/share/gtk-doc/html/gio/GTlsCertificate.html +26 -32
  420. data/vendor/local/share/gtk-doc/html/gio/GTlsClientConnection.html +96 -37
  421. data/vendor/local/share/gtk-doc/html/gio/GTlsConnection.html +64 -66
  422. data/vendor/local/share/gtk-doc/html/gio/GTlsDatabase.html +256 -63
  423. data/vendor/local/share/gtk-doc/html/gio/GTlsFileDatabase.html +15 -16
  424. data/vendor/local/share/gtk-doc/html/gio/GTlsInteraction.html +150 -49
  425. data/vendor/local/share/gtk-doc/html/gio/GTlsPassword.html +46 -51
  426. data/vendor/local/share/gtk-doc/html/gio/GTlsServerConnection.html +21 -18
  427. data/vendor/local/share/gtk-doc/html/gio/GUnixConnection.html +21 -26
  428. data/vendor/local/share/gtk-doc/html/gio/GUnixCredentialsMessage.html +21 -25
  429. data/vendor/local/share/gtk-doc/html/gio/GUnixFDList.html +31 -37
  430. data/vendor/local/share/gtk-doc/html/gio/GUnixFDMessage.html +24 -28
  431. data/vendor/local/share/gtk-doc/html/gio/GUnixInputStream.html +15 -19
  432. data/vendor/local/share/gtk-doc/html/gio/GUnixOutputStream.html +15 -19
  433. data/vendor/local/share/gtk-doc/html/gio/GUnixSocketAddress.html +43 -47
  434. data/vendor/local/share/gtk-doc/html/gio/GVfs.html +22 -24
  435. data/vendor/local/share/gtk-doc/html/gio/GVolume.html +43 -54
  436. data/vendor/local/share/gtk-doc/html/gio/GVolumeMonitor.html +26 -27
  437. data/vendor/local/share/gtk-doc/html/gio/GZlibCompressor.html +15 -17
  438. data/vendor/local/share/gtk-doc/html/gio/GZlibDecompressor.html +11 -13
  439. data/vendor/local/share/gtk-doc/html/gio/annotation-glossary.html +21 -6
  440. data/vendor/local/share/gtk-doc/html/gio/api-index-2-18.html +3 -4
  441. data/vendor/local/share/gtk-doc/html/gio/api-index-2-20.html +3 -4
  442. data/vendor/local/share/gtk-doc/html/gio/api-index-2-22.html +3 -4
  443. data/vendor/local/share/gtk-doc/html/gio/api-index-2-24.html +3 -4
  444. data/vendor/local/share/gtk-doc/html/gio/api-index-2-26.html +3 -4
  445. data/vendor/local/share/gtk-doc/html/gio/api-index-2-28.html +3 -4
  446. data/vendor/local/share/gtk-doc/html/gio/api-index-2-30.html +11 -4
  447. data/vendor/local/share/gtk-doc/html/gio/api-index-2-32.html +3 -4
  448. data/vendor/local/share/gtk-doc/html/gio/api-index-2-34.html +3 -4
  449. data/vendor/local/share/gtk-doc/html/gio/api-index-2-36.html +3 -4
  450. data/vendor/local/share/gtk-doc/html/gio/api-index-2-38.html +3 -4
  451. data/vendor/local/share/gtk-doc/html/gio/api-index-2-40.html +3 -4
  452. data/vendor/local/share/gtk-doc/html/gio/api-index-2-42.html +11 -5
  453. data/vendor/local/share/gtk-doc/html/gio/api-index-2-44.html +5 -6
  454. data/vendor/local/share/gtk-doc/html/gio/api-index-2-46.html +211 -0
  455. data/vendor/local/share/gtk-doc/html/gio/api-index-2-48.html +283 -0
  456. data/vendor/local/share/gtk-doc/html/gio/api-index-deprecated.html +107 -4
  457. data/vendor/local/share/gtk-doc/html/gio/api-index-full.html +459 -8
  458. data/vendor/local/share/gtk-doc/html/gio/application.html +5 -6
  459. data/vendor/local/share/gtk-doc/html/gio/async.html +3 -4
  460. data/vendor/local/share/gtk-doc/html/gio/ch01.html +3 -4
  461. data/vendor/local/share/gtk-doc/html/gio/ch02.html +3 -4
  462. data/vendor/local/share/gtk-doc/html/gio/ch03.html +3 -4
  463. data/vendor/local/share/gtk-doc/html/gio/ch32.html +29 -172
  464. data/vendor/local/share/gtk-doc/html/gio/ch33.html +187 -27
  465. data/vendor/local/share/gtk-doc/html/gio/ch33s02.html +15 -34
  466. data/vendor/local/share/gtk-doc/html/gio/ch33s03.html +16 -137
  467. data/vendor/local/share/gtk-doc/html/gio/ch34.html +26 -87
  468. data/vendor/local/share/gtk-doc/html/gio/ch34s02.html +37 -121
  469. data/vendor/local/share/gtk-doc/html/gio/ch34s03.html +132 -176
  470. data/vendor/local/share/gtk-doc/html/gio/ch34s04.html +19 -71
  471. data/vendor/local/share/gtk-doc/html/gio/ch34s05.html +24 -908
  472. data/vendor/local/share/gtk-doc/html/gio/{ch33s06.html → ch34s06.html} +9 -10
  473. data/vendor/local/share/gtk-doc/html/gio/{ch33s07.html → ch34s07.html} +9 -10
  474. data/vendor/local/share/gtk-doc/html/gio/ch35.html +94 -0
  475. data/vendor/local/share/gtk-doc/html/gio/ch35s02.html +141 -0
  476. data/vendor/local/share/gtk-doc/html/gio/ch35s03.html +201 -0
  477. data/vendor/local/share/gtk-doc/html/gio/ch35s04.html +96 -0
  478. data/vendor/local/share/gtk-doc/html/gio/conversion.html +3 -4
  479. data/vendor/local/share/gtk-doc/html/gio/data-models.html +4 -5
  480. data/vendor/local/share/gtk-doc/html/gio/extending-gio.html +3 -4
  481. data/vendor/local/share/gtk-doc/html/gio/extending.html +3 -4
  482. data/vendor/local/share/gtk-doc/html/gio/failable_initialization.html +3 -4
  483. data/vendor/local/share/gtk-doc/html/gio/file_mon.html +3 -4
  484. data/vendor/local/share/gtk-doc/html/gio/file_ops.html +3 -4
  485. data/vendor/local/share/gtk-doc/html/gio/gapplication-tool.html +11 -12
  486. data/vendor/local/share/gtk-doc/html/gio/gdbus-codegen.html +199 -200
  487. data/vendor/local/share/gtk-doc/html/gio/gdbus-convenience.html +3 -4
  488. data/vendor/local/share/gtk-doc/html/gio/gdbus-example-gdbus-codegen.html +89 -0
  489. data/vendor/local/share/gtk-doc/html/gio/gdbus-lowlevel.html +3 -4
  490. data/vendor/local/share/gtk-doc/html/gio/gdbus.html +9 -10
  491. data/vendor/local/share/gtk-doc/html/gio/gio-D-Bus-Addresses.html +24 -28
  492. data/vendor/local/share/gtk-doc/html/gio/gio-D-Bus-Introspection-Data.html +77 -87
  493. data/vendor/local/share/gtk-doc/html/gio/gio-D-Bus-Utilities.html +19 -27
  494. data/vendor/local/share/gtk-doc/html/gio/gio-Desktop-file-based-GAppInfo.html +43 -57
  495. data/vendor/local/share/gtk-doc/html/gio/gio-Extension-Points.html +47 -52
  496. data/vendor/local/share/gtk-doc/html/gio/gio-GActionGroup-exporter.html +11 -13
  497. data/vendor/local/share/gtk-doc/html/gio/gio-GContentType.html +20 -27
  498. data/vendor/local/share/gtk-doc/html/gio/gio-GConverterInputstream.html +9 -11
  499. data/vendor/local/share/gtk-doc/html/gio/gio-GConverterOutputstream.html +9 -11
  500. data/vendor/local/share/gtk-doc/html/gio/gio-GDBusError.html +59 -67
  501. data/vendor/local/share/gtk-doc/html/gio/gio-GFileAttribute.html +16 -21
  502. data/vendor/local/share/gtk-doc/html/gio/gio-GIOError.html +21 -17
  503. data/vendor/local/share/gtk-doc/html/gio/gio-GIOScheduler.html +9 -12
  504. data/vendor/local/share/gtk-doc/html/gio/gio-GMenuModel-exporter.html +11 -13
  505. data/vendor/local/share/gtk-doc/html/gio/gio-GSettingsSchema-GSettingsSchemaSource.html +147 -114
  506. data/vendor/local/share/gtk-doc/html/gio/gio-GWin32InputStream.html +12 -16
  507. data/vendor/local/share/gtk-doc/html/gio/gio-GWin32OutputStream.html +13 -17
  508. data/vendor/local/share/gtk-doc/html/gio/gio-GWin32RegistryKey.html +2066 -0
  509. data/vendor/local/share/gtk-doc/html/gio/gio-Owning-Bus-Names.html +29 -32
  510. data/vendor/local/share/gtk-doc/html/gio/gio-TLS-Overview.html +16 -13
  511. data/vendor/local/share/gtk-doc/html/gio/gio-Unix-Mounts.html +54 -75
  512. data/vendor/local/share/gtk-doc/html/gio/gio-Watching-Bus-Names.html +27 -30
  513. data/vendor/local/share/gtk-doc/html/gio/gio-gnetworking.h.html +5 -6
  514. data/vendor/local/share/gtk-doc/html/gio/gio-gpollableutils.html +16 -21
  515. data/vendor/local/share/gtk-doc/html/gio/gio-hierarchy.html +11 -8
  516. data/vendor/local/share/gtk-doc/html/gio/gio-querymodules.html +4 -5
  517. data/vendor/local/share/gtk-doc/html/gio/gio.devhelp2 +194 -140
  518. data/vendor/local/share/gtk-doc/html/gio/glib-compile-resources.html +6 -7
  519. data/vendor/local/share/gtk-doc/html/gio/glib-compile-schemas.html +12 -6
  520. data/vendor/local/share/gtk-doc/html/gio/gresource-tool.html +5 -6
  521. data/vendor/local/share/gtk-doc/html/gio/gsettings-tool.html +5 -6
  522. data/vendor/local/share/gtk-doc/html/gio/highlevel-socket.html +3 -4
  523. data/vendor/local/share/gtk-doc/html/gio/icons.html +3 -4
  524. data/vendor/local/share/gtk-doc/html/gio/index.html +41 -45
  525. data/vendor/local/share/gtk-doc/html/gio/index.sgml +204 -137
  526. data/vendor/local/share/gtk-doc/html/gio/migrating.html +22 -44
  527. data/vendor/local/share/gtk-doc/html/gio/networking.html +6 -4
  528. data/vendor/local/share/gtk-doc/html/gio/permissions.html +3 -4
  529. data/vendor/local/share/gtk-doc/html/gio/pt01.html +3 -4
  530. data/vendor/local/share/gtk-doc/html/gio/pt02.html +20 -5
  531. data/vendor/local/share/gtk-doc/html/gio/registry.html +32 -0
  532. data/vendor/local/share/gtk-doc/html/gio/resolver.html +3 -4
  533. data/vendor/local/share/gtk-doc/html/gio/resources.html +3 -4
  534. data/vendor/local/share/gtk-doc/html/gio/running-gio-apps.html +3 -4
  535. data/vendor/local/share/gtk-doc/html/gio/settings.html +3 -4
  536. data/vendor/local/share/gtk-doc/html/gio/streaming.html +3 -4
  537. data/vendor/local/share/gtk-doc/html/gio/style.css +9 -6
  538. data/vendor/local/share/gtk-doc/html/gio/subprocesses.html +3 -4
  539. data/vendor/local/share/gtk-doc/html/gio/testing.html +3 -4
  540. data/vendor/local/share/gtk-doc/html/gio/tls.html +12 -4
  541. data/vendor/local/share/gtk-doc/html/gio/tools.html +3 -4
  542. data/vendor/local/share/gtk-doc/html/gio/types.html +3 -4
  543. data/vendor/local/share/gtk-doc/html/gio/utils.html +3 -4
  544. data/vendor/local/share/gtk-doc/html/gio/volume_mon.html +3 -4
  545. data/vendor/local/share/gtk-doc/html/glib/annotation-glossary.html +16 -6
  546. data/vendor/local/share/gtk-doc/html/glib/api-index-2-10.html +3 -4
  547. data/vendor/local/share/gtk-doc/html/glib/api-index-2-12.html +3 -4
  548. data/vendor/local/share/gtk-doc/html/glib/api-index-2-14.html +3 -4
  549. data/vendor/local/share/gtk-doc/html/glib/api-index-2-16.html +3 -4
  550. data/vendor/local/share/gtk-doc/html/glib/api-index-2-18.html +3 -4
  551. data/vendor/local/share/gtk-doc/html/glib/api-index-2-2.html +3 -4
  552. data/vendor/local/share/gtk-doc/html/glib/api-index-2-20.html +3 -4
  553. data/vendor/local/share/gtk-doc/html/glib/api-index-2-22.html +3 -4
  554. data/vendor/local/share/gtk-doc/html/glib/api-index-2-24.html +3 -4
  555. data/vendor/local/share/gtk-doc/html/glib/api-index-2-26.html +3 -4
  556. data/vendor/local/share/gtk-doc/html/glib/api-index-2-28.html +3 -4
  557. data/vendor/local/share/gtk-doc/html/glib/api-index-2-30.html +3 -4
  558. data/vendor/local/share/gtk-doc/html/glib/api-index-2-32.html +3 -4
  559. data/vendor/local/share/gtk-doc/html/glib/api-index-2-34.html +3 -4
  560. data/vendor/local/share/gtk-doc/html/glib/api-index-2-36.html +3 -4
  561. data/vendor/local/share/gtk-doc/html/glib/api-index-2-38.html +3 -4
  562. data/vendor/local/share/gtk-doc/html/glib/api-index-2-4.html +3 -4
  563. data/vendor/local/share/gtk-doc/html/glib/api-index-2-40.html +7 -4
  564. data/vendor/local/share/gtk-doc/html/glib/api-index-2-42.html +12 -6
  565. data/vendor/local/share/gtk-doc/html/glib/api-index-2-44.html +5 -6
  566. data/vendor/local/share/gtk-doc/html/glib/api-index-2-46.html +68 -0
  567. data/vendor/local/share/gtk-doc/html/glib/api-index-2-48.html +68 -0
  568. data/vendor/local/share/gtk-doc/html/glib/api-index-2-6.html +3 -4
  569. data/vendor/local/share/gtk-doc/html/glib/api-index-2-8.html +3 -4
  570. data/vendor/local/share/gtk-doc/html/glib/api-index-deprecated.html +46 -8
  571. data/vendor/local/share/gtk-doc/html/glib/api-index-full.html +80 -9
  572. data/vendor/local/share/gtk-doc/html/glib/deprecated.html +3 -4
  573. data/vendor/local/share/gtk-doc/html/glib/glib-Arrays.html +68 -85
  574. data/vendor/local/share/gtk-doc/html/glib/glib-Asynchronous-Queues.html +254 -84
  575. data/vendor/local/share/gtk-doc/html/glib/glib-Atomic-Operations.html +130 -144
  576. data/vendor/local/share/gtk-doc/html/glib/glib-Automatic-String-Completion.html +19 -23
  577. data/vendor/local/share/gtk-doc/html/glib/glib-Balanced-Binary-Trees.html +33 -46
  578. data/vendor/local/share/gtk-doc/html/glib/glib-Base64-Encoding.html +22 -26
  579. data/vendor/local/share/gtk-doc/html/glib/glib-Basic-Types.html +89 -82
  580. data/vendor/local/share/gtk-doc/html/glib/glib-Bookmark-file-parser.html +87 -112
  581. data/vendor/local/share/gtk-doc/html/glib/glib-Bounds-checked-integer-arithmetic.html +384 -0
  582. data/vendor/local/share/gtk-doc/html/glib/glib-Byte-Arrays.html +100 -114
  583. data/vendor/local/share/gtk-doc/html/glib/glib-Byte-Order-Macros.html +83 -143
  584. data/vendor/local/share/gtk-doc/html/glib/glib-Caches.html +14 -19
  585. data/vendor/local/share/gtk-doc/html/glib/glib-Character-Set-Conversion.html +49 -64
  586. data/vendor/local/share/gtk-doc/html/glib/glib-Commandline-option-parser.html +141 -156
  587. data/vendor/local/share/gtk-doc/html/glib/glib-Data-Checksums.html +32 -38
  588. data/vendor/local/share/gtk-doc/html/glib/glib-Data-HMACs.html +24 -31
  589. data/vendor/local/share/gtk-doc/html/glib/glib-Datasets.html +28 -32
  590. data/vendor/local/share/gtk-doc/html/glib/glib-Date-and-Time-Functions.html +108 -141
  591. data/vendor/local/share/gtk-doc/html/glib/glib-Deprecated-Thread-APIs.html +98 -113
  592. data/vendor/local/share/gtk-doc/html/glib/glib-Double-ended-Queues.html +89 -112
  593. data/vendor/local/share/gtk-doc/html/glib/glib-Doubly-Linked-Lists.html +93 -125
  594. data/vendor/local/share/gtk-doc/html/glib/glib-Dynamic-Loading-of-Modules.html +50 -58
  595. data/vendor/local/share/gtk-doc/html/glib/glib-Error-Reporting.html +189 -171
  596. data/vendor/local/share/gtk-doc/html/glib/glib-File-Utilities.html +99 -134
  597. data/vendor/local/share/gtk-doc/html/glib/glib-GDateTime.html +172 -217
  598. data/vendor/local/share/gtk-doc/html/glib/glib-GTimeZone.html +32 -42
  599. data/vendor/local/share/gtk-doc/html/glib/glib-GVariant.html +430 -451
  600. data/vendor/local/share/gtk-doc/html/glib/glib-GVariantType.html +53 -66
  601. data/vendor/local/share/gtk-doc/html/glib/glib-Glob-style-pattern-matching.html +12 -18
  602. data/vendor/local/share/gtk-doc/html/glib/glib-Hash-Tables.html +167 -192
  603. data/vendor/local/share/gtk-doc/html/glib/glib-Hook-Functions.html +58 -78
  604. data/vendor/local/share/gtk-doc/html/glib/glib-Hostname-Utilities.html +14 -20
  605. data/vendor/local/share/gtk-doc/html/glib/glib-I18N.html +70 -79
  606. data/vendor/local/share/gtk-doc/html/glib/glib-IO-Channels.html +68 -102
  607. data/vendor/local/share/gtk-doc/html/glib/glib-Key-value-file-parser.html +166 -183
  608. data/vendor/local/share/gtk-doc/html/glib/glib-Keyed-Data-Lists.html +33 -41
  609. data/vendor/local/share/gtk-doc/html/glib/glib-Lexical-Scanner.html +80 -92
  610. data/vendor/local/share/gtk-doc/html/glib/glib-Memory-Allocation.html +101 -119
  611. data/vendor/local/share/gtk-doc/html/glib/glib-Memory-Slices.html +102 -75
  612. data/vendor/local/share/gtk-doc/html/glib/glib-Message-Logging.html +133 -60
  613. data/vendor/local/share/gtk-doc/html/glib/glib-Miscellaneous-Macros.html +192 -162
  614. data/vendor/local/share/gtk-doc/html/glib/glib-Miscellaneous-Utility-Functions.html +107 -155
  615. data/vendor/local/share/gtk-doc/html/glib/glib-N-ary-Trees.html +69 -103
  616. data/vendor/local/share/gtk-doc/html/glib/glib-Numerical-Definitions.html +12 -16
  617. data/vendor/local/share/gtk-doc/html/glib/glib-Perl-compatible-regular-expressions.html +199 -230
  618. data/vendor/local/share/gtk-doc/html/glib/glib-Pointer-Arrays.html +54 -67
  619. data/vendor/local/share/gtk-doc/html/glib/glib-Quarks.html +15 -22
  620. data/vendor/local/share/gtk-doc/html/glib/glib-Random-Numbers.html +24 -39
  621. data/vendor/local/share/gtk-doc/html/glib/glib-Relations-and-Tuples.html +29 -36
  622. data/vendor/local/share/gtk-doc/html/glib/glib-Sequences.html +187 -173
  623. data/vendor/local/share/gtk-doc/html/glib/glib-Shell-related-Utilities.html +13 -17
  624. data/vendor/local/share/gtk-doc/html/glib/glib-Simple-XML-Subset-Parser.html +104 -117
  625. data/vendor/local/share/gtk-doc/html/glib/glib-Singly-Linked-Lists.html +62 -89
  626. data/vendor/local/share/gtk-doc/html/glib/glib-Spawning-Processes.html +26 -38
  627. data/vendor/local/share/gtk-doc/html/glib/glib-Standard-Macros.html +15 -23
  628. data/vendor/local/share/gtk-doc/html/glib/glib-String-Chunks.html +13 -18
  629. data/vendor/local/share/gtk-doc/html/glib/glib-String-Utility-Functions.html +210 -261
  630. data/vendor/local/share/gtk-doc/html/glib/glib-Strings.html +95 -101
  631. data/vendor/local/share/gtk-doc/html/glib/glib-Testing.html +306 -268
  632. data/vendor/local/share/gtk-doc/html/glib/glib-The-Main-Event-Loop.html +373 -369
  633. data/vendor/local/share/gtk-doc/html/glib/glib-Thread-Pools.html +79 -45
  634. data/vendor/local/share/gtk-doc/html/glib/glib-Threads.html +316 -319
  635. data/vendor/local/share/gtk-doc/html/glib/glib-Timers.html +12 -15
  636. data/vendor/local/share/gtk-doc/html/glib/glib-Trash-Stacks.html +44 -26
  637. data/vendor/local/share/gtk-doc/html/glib/glib-Type-Conversion-Macros.html +20 -21
  638. data/vendor/local/share/gtk-doc/html/glib/glib-UNIX-specific-utilities-and-integration.html +44 -53
  639. data/vendor/local/share/gtk-doc/html/glib/glib-URI-Functions.html +21 -26
  640. data/vendor/local/share/gtk-doc/html/glib/glib-Unicode-Manipulation.html +200 -220
  641. data/vendor/local/share/gtk-doc/html/glib/glib-Version-Information.html +51 -28
  642. data/vendor/local/share/gtk-doc/html/glib/glib-Warnings-and-Assertions.html +38 -41
  643. data/vendor/local/share/gtk-doc/html/glib/glib-Windows-Compatibility-Functions.html +25 -34
  644. data/vendor/local/share/gtk-doc/html/glib/glib-building.html +3 -4
  645. data/vendor/local/share/gtk-doc/html/glib/glib-changes.html +3 -4
  646. data/vendor/local/share/gtk-doc/html/glib/glib-compiling.html +3 -4
  647. data/vendor/local/share/gtk-doc/html/glib/glib-core.html +3 -4
  648. data/vendor/local/share/gtk-doc/html/glib/glib-cross-compiling.html +3 -4
  649. data/vendor/local/share/gtk-doc/html/glib/glib-data-types.html +3 -4
  650. data/vendor/local/share/gtk-doc/html/glib/glib-fundamentals.html +6 -4
  651. data/vendor/local/share/gtk-doc/html/glib/glib-gettextize.html +3 -4
  652. data/vendor/local/share/gtk-doc/html/glib/glib-programming.html +4 -5
  653. data/vendor/local/share/gtk-doc/html/glib/glib-regex-syntax.html +14 -15
  654. data/vendor/local/share/gtk-doc/html/glib/glib-resources.html +3 -4
  655. data/vendor/local/share/gtk-doc/html/glib/glib-running.html +3 -4
  656. data/vendor/local/share/gtk-doc/html/glib/glib-utilities.html +3 -4
  657. data/vendor/local/share/gtk-doc/html/glib/glib.devhelp2 +44 -59
  658. data/vendor/local/share/gtk-doc/html/glib/glib.html +3 -4
  659. data/vendor/local/share/gtk-doc/html/glib/gtester-report.html +3 -4
  660. data/vendor/local/share/gtk-doc/html/glib/gtester.html +3 -4
  661. data/vendor/local/share/gtk-doc/html/glib/gvariant-format-strings.html +149 -149
  662. data/vendor/local/share/gtk-doc/html/glib/gvariant-text.html +11 -12
  663. data/vendor/local/share/gtk-doc/html/glib/index.html +9 -5
  664. data/vendor/local/share/gtk-doc/html/glib/index.sgml +36 -2
  665. data/vendor/local/share/gtk-doc/html/glib/style.css +9 -6
  666. data/vendor/local/share/gtk-doc/html/glib/tools.html +3 -4
  667. data/vendor/local/share/gtk-doc/html/gobject/GBinding.html +53 -57
  668. data/vendor/local/share/gtk-doc/html/gobject/GTypeModule.html +72 -77
  669. data/vendor/local/share/gtk-doc/html/gobject/GTypePlugin.html +19 -20
  670. data/vendor/local/share/gtk-doc/html/gobject/annotation-glossary.html +14 -6
  671. data/vendor/local/share/gtk-doc/html/gobject/api-index-2-10.html +3 -4
  672. data/vendor/local/share/gtk-doc/html/gobject/api-index-2-12.html +3 -4
  673. data/vendor/local/share/gtk-doc/html/gobject/api-index-2-14.html +3 -4
  674. data/vendor/local/share/gtk-doc/html/gobject/api-index-2-18.html +3 -4
  675. data/vendor/local/share/gtk-doc/html/gobject/api-index-2-2.html +3 -4
  676. data/vendor/local/share/gtk-doc/html/gobject/api-index-2-22.html +3 -4
  677. data/vendor/local/share/gtk-doc/html/gobject/api-index-2-24.html +3 -4
  678. data/vendor/local/share/gtk-doc/html/gobject/api-index-2-26.html +3 -4
  679. data/vendor/local/share/gtk-doc/html/gobject/api-index-2-28.html +3 -4
  680. data/vendor/local/share/gtk-doc/html/gobject/api-index-2-30.html +3 -4
  681. data/vendor/local/share/gtk-doc/html/gobject/api-index-2-32.html +3 -4
  682. data/vendor/local/share/gtk-doc/html/gobject/api-index-2-34.html +3 -4
  683. data/vendor/local/share/gtk-doc/html/gobject/api-index-2-36.html +3 -4
  684. data/vendor/local/share/gtk-doc/html/gobject/api-index-2-38.html +3 -4
  685. data/vendor/local/share/gtk-doc/html/gobject/api-index-2-4.html +3 -4
  686. data/vendor/local/share/gtk-doc/html/gobject/api-index-2-40.html +3 -4
  687. data/vendor/local/share/gtk-doc/html/gobject/api-index-2-42.html +3 -4
  688. data/vendor/local/share/gtk-doc/html/gobject/api-index-2-44.html +5 -6
  689. data/vendor/local/share/gtk-doc/html/gobject/api-index-2-46.html +34 -0
  690. data/vendor/local/share/gtk-doc/html/gobject/api-index-2-6.html +3 -4
  691. data/vendor/local/share/gtk-doc/html/gobject/api-index-2-8.html +3 -4
  692. data/vendor/local/share/gtk-doc/html/gobject/api-index-deprecated.html +3 -4
  693. data/vendor/local/share/gtk-doc/html/gobject/api-index-full.html +7 -8
  694. data/vendor/local/share/gtk-doc/html/gobject/ch01s02.html +22 -17
  695. data/vendor/local/share/gtk-doc/html/gobject/chapter-gobject.html +85 -135
  696. data/vendor/local/share/gtk-doc/html/gobject/chapter-gtype.html +80 -144
  697. data/vendor/local/share/gtk-doc/html/gobject/chapter-intro.html +7 -8
  698. data/vendor/local/share/gtk-doc/html/gobject/chapter-signal.html +59 -75
  699. data/vendor/local/share/gtk-doc/html/gobject/glib-genmarshal.html +20 -21
  700. data/vendor/local/share/gtk-doc/html/gobject/glib-mkenums.html +19 -20
  701. data/vendor/local/share/gtk-doc/html/gobject/gobject-Boxed-Types.html +68 -89
  702. data/vendor/local/share/gtk-doc/html/gobject/gobject-Closures.html +227 -218
  703. data/vendor/local/share/gtk-doc/html/gobject/gobject-Enumeration-and-Flag-Types.html +48 -57
  704. data/vendor/local/share/gtk-doc/html/gobject/gobject-GParamSpec.html +125 -96
  705. data/vendor/local/share/gtk-doc/html/gobject/gobject-Generic-values.html +78 -90
  706. data/vendor/local/share/gtk-doc/html/gobject/gobject-Signals.html +143 -138
  707. data/vendor/local/share/gtk-doc/html/gobject/gobject-Standard-Parameter-and-Value-Types.html +226 -285
  708. data/vendor/local/share/gtk-doc/html/gobject/gobject-The-Base-Object-Type.html +306 -289
  709. data/vendor/local/share/gtk-doc/html/gobject/gobject-Type-Information.html +450 -448
  710. data/vendor/local/share/gtk-doc/html/gobject/gobject-Value-arrays.html +22 -23
  711. data/vendor/local/share/gtk-doc/html/gobject/gobject-Varargs-Value-Collection.html +11 -12
  712. data/vendor/local/share/gtk-doc/html/gobject/gobject-memory.html +32 -91
  713. data/vendor/local/share/gtk-doc/html/gobject/gobject-properties.html +134 -133
  714. data/vendor/local/share/gtk-doc/html/gobject/gobject-query.html +3 -4
  715. data/vendor/local/share/gtk-doc/html/gobject/gobject.devhelp2 +4 -8
  716. data/vendor/local/share/gtk-doc/html/gobject/gtype-conventions.html +89 -98
  717. data/vendor/local/share/gtk-doc/html/gobject/gtype-instantiable-classed.html +119 -112
  718. data/vendor/local/share/gtk-doc/html/gobject/gtype-non-instantiable-classed.html +199 -183
  719. data/vendor/local/share/gtk-doc/html/gobject/gtype-non-instantiable.html +30 -30
  720. data/vendor/local/share/gtk-doc/html/gobject/howto-gobject-chainup.html +32 -33
  721. data/vendor/local/share/gtk-doc/html/gobject/howto-gobject-code.html +80 -32
  722. data/vendor/local/share/gtk-doc/html/gobject/howto-gobject-construction.html +73 -53
  723. data/vendor/local/share/gtk-doc/html/gobject/howto-gobject-destruction.html +52 -50
  724. data/vendor/local/share/gtk-doc/html/gobject/howto-gobject-methods.html +255 -133
  725. data/vendor/local/share/gtk-doc/html/gobject/howto-gobject.html +147 -111
  726. data/vendor/local/share/gtk-doc/html/gobject/howto-interface-implement.html +65 -122
  727. data/vendor/local/share/gtk-doc/html/gobject/howto-interface-override.html +93 -79
  728. data/vendor/local/share/gtk-doc/html/gobject/howto-interface-prerequisite.html +80 -54
  729. data/vendor/local/share/gtk-doc/html/gobject/howto-interface-properties.html +82 -87
  730. data/vendor/local/share/gtk-doc/html/gobject/howto-interface.html +126 -62
  731. data/vendor/local/share/gtk-doc/html/gobject/howto-signals.html +61 -54
  732. data/vendor/local/share/gtk-doc/html/gobject/index.html +10 -10
  733. data/vendor/local/share/gtk-doc/html/gobject/index.sgml +4 -2
  734. data/vendor/local/share/gtk-doc/html/gobject/pr01.html +3 -4
  735. data/vendor/local/share/gtk-doc/html/gobject/pt01.html +3 -4
  736. data/vendor/local/share/gtk-doc/html/gobject/pt02.html +10 -11
  737. data/vendor/local/share/gtk-doc/html/gobject/pt03.html +3 -4
  738. data/vendor/local/share/gtk-doc/html/gobject/rn01.html +3 -4
  739. data/vendor/local/share/gtk-doc/html/gobject/rn02.html +3 -4
  740. data/vendor/local/share/gtk-doc/html/gobject/signal.html +98 -198
  741. data/vendor/local/share/gtk-doc/html/gobject/style.css +9 -6
  742. data/vendor/local/share/gtk-doc/html/gobject/tools-ginspector.html +3 -4
  743. data/vendor/local/share/gtk-doc/html/gobject/tools-gob.html +3 -4
  744. data/vendor/local/share/gtk-doc/html/gobject/tools-gtkdoc.html +10 -11
  745. data/vendor/local/share/gtk-doc/html/gobject/tools-refdb.html +3 -4
  746. data/vendor/local/share/gtk-doc/html/gobject/tools-vala.html +3 -4
  747. data/vendor/local/share/gtk-doc/html/p11-kit/config-example.html +3 -4
  748. data/vendor/local/share/gtk-doc/html/p11-kit/config-files.html +3 -4
  749. data/vendor/local/share/gtk-doc/html/p11-kit/config.html +3 -4
  750. data/vendor/local/share/gtk-doc/html/p11-kit/devel-building-style.html +3 -4
  751. data/vendor/local/share/gtk-doc/html/p11-kit/devel-building.html +3 -4
  752. data/vendor/local/share/gtk-doc/html/p11-kit/devel-commands.html +3 -4
  753. data/vendor/local/share/gtk-doc/html/p11-kit/devel-debugging.html +3 -4
  754. data/vendor/local/share/gtk-doc/html/p11-kit/devel-paths.html +3 -4
  755. data/vendor/local/share/gtk-doc/html/p11-kit/devel-testing.html +3 -4
  756. data/vendor/local/share/gtk-doc/html/p11-kit/devel.html +3 -4
  757. data/vendor/local/share/gtk-doc/html/p11-kit/gtk-doc.css +9 -6
  758. data/vendor/local/share/gtk-doc/html/p11-kit/index.html +2 -3
  759. data/vendor/local/share/gtk-doc/html/p11-kit/index.sgml +22 -0
  760. data/vendor/local/share/gtk-doc/html/p11-kit/p11-kit-Deprecated.html +3 -13
  761. data/vendor/local/share/gtk-doc/html/p11-kit/p11-kit-Future.html +3 -14
  762. data/vendor/local/share/gtk-doc/html/p11-kit/p11-kit-Modules.html +3 -15
  763. data/vendor/local/share/gtk-doc/html/p11-kit/p11-kit-PIN-Callbacks.html +3 -14
  764. data/vendor/local/share/gtk-doc/html/p11-kit/p11-kit-URIs.html +3 -18
  765. data/vendor/local/share/gtk-doc/html/p11-kit/p11-kit-Utilities.html +3 -8
  766. data/vendor/local/share/gtk-doc/html/p11-kit/p11-kit.devhelp2 +22 -1
  767. data/vendor/local/share/gtk-doc/html/p11-kit/p11-kit.html +2 -3
  768. data/vendor/local/share/gtk-doc/html/p11-kit/pkcs11-conf.html +7 -8
  769. data/vendor/local/share/gtk-doc/html/p11-kit/reference.html +3 -4
  770. data/vendor/local/share/gtk-doc/html/p11-kit/sharing-managed.html +3 -4
  771. data/vendor/local/share/gtk-doc/html/p11-kit/sharing.html +3 -4
  772. data/vendor/local/share/gtk-doc/html/p11-kit/tools.html +3 -4
  773. data/vendor/local/share/gtk-doc/html/p11-kit/trust-disable.html +3 -4
  774. data/vendor/local/share/gtk-doc/html/p11-kit/trust-glib-networking.html +3 -4
  775. data/vendor/local/share/gtk-doc/html/p11-kit/trust-module.html +3 -4
  776. data/vendor/local/share/gtk-doc/html/p11-kit/trust-nss.html +3 -4
  777. data/vendor/local/share/gtk-doc/html/p11-kit/trust.html +3 -4
  778. data/vendor/local/share/info/gmp.info +153 -158
  779. data/vendor/local/share/info/gmp.info-1 +131 -178
  780. data/vendor/local/share/info/gmp.info-2 +250 -193
  781. data/vendor/local/share/info/libtasn1.info +2 -2
  782. data/vendor/local/share/license/gmp/AUTHORS +1 -1
  783. data/vendor/local/share/license/pcre/AUTHORS +45 -0
  784. data/vendor/local/share/license/pcre/COPYING +5 -0
  785. data/vendor/local/share/locale/be/LC_MESSAGES/gettext-runtime.mo +0 -0
  786. data/vendor/local/share/locale/bg/LC_MESSAGES/gettext-runtime.mo +0 -0
  787. data/vendor/local/share/locale/bg/LC_MESSAGES/glib20.mo +0 -0
  788. data/vendor/local/share/locale/ca/LC_MESSAGES/gettext-runtime.mo +0 -0
  789. data/vendor/local/share/locale/ca/LC_MESSAGES/glib20.mo +0 -0
  790. data/vendor/local/share/locale/cs/LC_MESSAGES/gettext-runtime.mo +0 -0
  791. data/vendor/local/share/locale/cs/LC_MESSAGES/glib20.mo +0 -0
  792. data/vendor/local/share/locale/cs/LC_MESSAGES/gnutls.mo +0 -0
  793. data/vendor/local/share/locale/da/LC_MESSAGES/gettext-runtime.mo +0 -0
  794. data/vendor/local/share/locale/da/LC_MESSAGES/glib20.mo +0 -0
  795. data/vendor/local/share/locale/de/LC_MESSAGES/gettext-runtime.mo +0 -0
  796. data/vendor/local/share/locale/de/LC_MESSAGES/glib20.mo +0 -0
  797. data/vendor/local/share/locale/de/LC_MESSAGES/gnutls.mo +0 -0
  798. data/vendor/local/share/locale/el/LC_MESSAGES/gettext-runtime.mo +0 -0
  799. data/vendor/local/share/locale/el/LC_MESSAGES/glib20.mo +0 -0
  800. data/vendor/local/share/locale/en@boldquot/LC_MESSAGES/gettext-runtime.mo +0 -0
  801. data/vendor/local/share/locale/en@boldquot/LC_MESSAGES/gnutls.mo +0 -0
  802. data/vendor/local/share/locale/en@quot/LC_MESSAGES/gettext-runtime.mo +0 -0
  803. data/vendor/local/share/locale/en@quot/LC_MESSAGES/gnutls.mo +0 -0
  804. data/vendor/local/share/locale/eo/LC_MESSAGES/gettext-runtime.mo +0 -0
  805. data/vendor/local/share/locale/eo/LC_MESSAGES/gnutls.mo +0 -0
  806. data/vendor/local/share/locale/es/LC_MESSAGES/gettext-runtime.mo +0 -0
  807. data/vendor/local/share/locale/es/LC_MESSAGES/glib20.mo +0 -0
  808. data/vendor/local/share/locale/et/LC_MESSAGES/gettext-runtime.mo +0 -0
  809. data/vendor/local/share/locale/eu/LC_MESSAGES/glib20.mo +0 -0
  810. data/vendor/local/share/locale/fi/LC_MESSAGES/gettext-runtime.mo +0 -0
  811. data/vendor/local/share/locale/fi/LC_MESSAGES/glib20.mo +0 -0
  812. data/vendor/local/share/locale/fi/LC_MESSAGES/gnutls.mo +0 -0
  813. data/vendor/local/share/locale/fr/LC_MESSAGES/gettext-runtime.mo +0 -0
  814. data/vendor/local/share/locale/fr/LC_MESSAGES/glib20.mo +0 -0
  815. data/vendor/local/share/locale/fr/LC_MESSAGES/gnutls.mo +0 -0
  816. data/vendor/local/share/locale/ga/LC_MESSAGES/gettext-runtime.mo +0 -0
  817. data/vendor/local/share/locale/gd/LC_MESSAGES/glib20.mo +0 -0
  818. data/vendor/local/share/locale/gl/LC_MESSAGES/gettext-runtime.mo +0 -0
  819. data/vendor/local/share/locale/gl/LC_MESSAGES/glib20.mo +0 -0
  820. data/vendor/local/share/locale/he/LC_MESSAGES/glib20.mo +0 -0
  821. data/vendor/local/share/locale/hr/LC_MESSAGES/gettext-runtime.mo +0 -0
  822. data/vendor/local/share/locale/hu/LC_MESSAGES/gettext-runtime.mo +0 -0
  823. data/vendor/local/share/locale/hu/LC_MESSAGES/glib20.mo +0 -0
  824. data/vendor/local/share/locale/id/LC_MESSAGES/gettext-runtime.mo +0 -0
  825. data/vendor/local/share/locale/id/LC_MESSAGES/glib20.mo +0 -0
  826. data/vendor/local/share/locale/is/LC_MESSAGES/glib20.mo +0 -0
  827. data/vendor/local/share/locale/it/LC_MESSAGES/gettext-runtime.mo +0 -0
  828. data/vendor/local/share/locale/it/LC_MESSAGES/glib20.mo +0 -0
  829. data/vendor/local/share/locale/it/LC_MESSAGES/gnutls.mo +0 -0
  830. data/vendor/local/share/locale/ja/LC_MESSAGES/gettext-runtime.mo +0 -0
  831. data/vendor/local/share/locale/ja/LC_MESSAGES/glib-networking.mo +0 -0
  832. data/vendor/local/share/locale/kk/LC_MESSAGES/glib20.mo +0 -0
  833. data/vendor/local/share/locale/ko/LC_MESSAGES/gettext-runtime.mo +0 -0
  834. data/vendor/local/share/locale/ko/LC_MESSAGES/glib20.mo +0 -0
  835. data/vendor/local/share/locale/lt/LC_MESSAGES/glib20.mo +0 -0
  836. data/vendor/local/share/locale/lv/LC_MESSAGES/glib20.mo +0 -0
  837. data/vendor/local/share/locale/ms/LC_MESSAGES/gnutls.mo +0 -0
  838. data/vendor/local/share/locale/nb/LC_MESSAGES/gettext-runtime.mo +0 -0
  839. data/vendor/local/share/locale/nb/LC_MESSAGES/glib20.mo +0 -0
  840. data/vendor/local/share/locale/nl/LC_MESSAGES/gettext-runtime.mo +0 -0
  841. data/vendor/local/share/locale/nl/LC_MESSAGES/gnutls.mo +0 -0
  842. data/vendor/local/share/locale/nn/LC_MESSAGES/gettext-runtime.mo +0 -0
  843. data/vendor/local/share/locale/oc/LC_MESSAGES/glib-networking.mo +0 -0
  844. data/vendor/local/share/locale/oc/LC_MESSAGES/glib20.mo +0 -0
  845. data/vendor/local/share/locale/pl/LC_MESSAGES/gettext-runtime.mo +0 -0
  846. data/vendor/local/share/locale/pl/LC_MESSAGES/glib20.mo +0 -0
  847. data/vendor/local/share/locale/pl/LC_MESSAGES/gnutls.mo +0 -0
  848. data/vendor/local/share/locale/pt/LC_MESSAGES/gettext-runtime.mo +0 -0
  849. data/vendor/local/share/locale/pt/LC_MESSAGES/glib-networking.mo +0 -0
  850. data/vendor/local/share/locale/pt/LC_MESSAGES/glib20.mo +0 -0
  851. data/vendor/local/share/locale/pt_BR/LC_MESSAGES/gettext-runtime.mo +0 -0
  852. data/vendor/local/share/locale/pt_BR/LC_MESSAGES/glib20.mo +0 -0
  853. data/vendor/local/share/locale/ro/LC_MESSAGES/gettext-runtime.mo +0 -0
  854. data/vendor/local/share/locale/ru/LC_MESSAGES/gettext-runtime.mo +0 -0
  855. data/vendor/local/share/locale/ru/LC_MESSAGES/glib20.mo +0 -0
  856. data/vendor/local/share/locale/sk/LC_MESSAGES/gettext-runtime.mo +0 -0
  857. data/vendor/local/share/locale/sk/LC_MESSAGES/glib20.mo +0 -0
  858. data/vendor/local/share/locale/sl/LC_MESSAGES/gettext-runtime.mo +0 -0
  859. data/vendor/local/share/locale/sl/LC_MESSAGES/glib20.mo +0 -0
  860. data/vendor/local/share/locale/sr/LC_MESSAGES/gettext-runtime.mo +0 -0
  861. data/vendor/local/share/locale/sr/LC_MESSAGES/glib20.mo +0 -0
  862. data/vendor/local/share/locale/sr@latin/LC_MESSAGES/glib20.mo +0 -0
  863. data/vendor/local/share/locale/sv/LC_MESSAGES/gettext-runtime.mo +0 -0
  864. data/vendor/local/share/locale/sv/LC_MESSAGES/glib20.mo +0 -0
  865. data/vendor/local/share/locale/sv/LC_MESSAGES/gnutls.mo +0 -0
  866. data/vendor/local/share/locale/th/LC_MESSAGES/glib20.mo +0 -0
  867. data/vendor/local/share/locale/tr/LC_MESSAGES/gettext-runtime.mo +0 -0
  868. data/vendor/local/share/locale/tr/LC_MESSAGES/glib20.mo +0 -0
  869. data/vendor/local/share/locale/uk/LC_MESSAGES/gettext-runtime.mo +0 -0
  870. data/vendor/local/share/locale/uk/LC_MESSAGES/gnutls.mo +0 -0
  871. data/vendor/local/share/locale/vi/LC_MESSAGES/gettext-runtime.mo +0 -0
  872. data/vendor/local/share/locale/vi/LC_MESSAGES/glib20.mo +0 -0
  873. data/vendor/local/share/locale/vi/LC_MESSAGES/gnutls.mo +0 -0
  874. data/vendor/local/share/locale/zh_CN/LC_MESSAGES/gettext-runtime.mo +0 -0
  875. data/vendor/local/share/locale/zh_CN/LC_MESSAGES/glib20.mo +0 -0
  876. data/vendor/local/share/locale/zh_CN/LC_MESSAGES/gnutls.mo +0 -0
  877. data/vendor/local/share/locale/zh_HK/LC_MESSAGES/gettext-runtime.mo +0 -0
  878. data/vendor/local/share/locale/zh_TW/LC_MESSAGES/gettext-runtime.mo +0 -0
  879. data/vendor/local/share/locale/zh_TW/LC_MESSAGES/glib20.mo +0 -0
  880. data/vendor/local/share/man/man1/asn1Coding.1 +1 -1
  881. data/vendor/local/share/man/man1/asn1Decoding.1 +1 -1
  882. data/vendor/local/share/man/man1/asn1Parser.1 +1 -1
  883. data/vendor/local/share/man/man1/envsubst.1 +1 -1
  884. data/vendor/local/share/man/man1/gapplication.1 +2 -2
  885. data/vendor/local/share/man/man1/gdbus-codegen.1 +23 -22
  886. data/vendor/local/share/man/man1/gdbus.1 +2 -2
  887. data/vendor/local/share/man/man1/gettext.1 +1 -1
  888. data/vendor/local/share/man/man1/gio-querymodules.1 +2 -2
  889. data/vendor/local/share/man/man1/glib-compile-resources.1 +2 -2
  890. data/vendor/local/share/man/man1/glib-compile-schemas.1 +7 -2
  891. data/vendor/local/share/man/man1/glib-genmarshal.1 +2 -2
  892. data/vendor/local/share/man/man1/glib-gettextize.1 +2 -2
  893. data/vendor/local/share/man/man1/glib-mkenums.1 +2 -2
  894. data/vendor/local/share/man/man1/gobject-query.1 +2 -2
  895. data/vendor/local/share/man/man1/gresource.1 +2 -2
  896. data/vendor/local/share/man/man1/gsettings.1 +2 -2
  897. data/vendor/local/share/man/man1/gtester-report.1 +2 -2
  898. data/vendor/local/share/man/man1/gtester.1 +2 -2
  899. data/vendor/local/share/man/man1/ngettext.1 +1 -1
  900. data/vendor/local/share/man/man1/pcre-config.1 +92 -0
  901. data/vendor/local/share/man/man1/pcregrep.1 +683 -0
  902. data/vendor/local/share/man/man1/pcretest.1 +1156 -0
  903. data/vendor/local/share/man/man3/asn1_array2tree.3 +1 -1
  904. data/vendor/local/share/man/man3/asn1_bit_der.3 +1 -1
  905. data/vendor/local/share/man/man3/asn1_check_version.3 +1 -1
  906. data/vendor/local/share/man/man3/asn1_copy_node.3 +1 -1
  907. data/vendor/local/share/man/man3/asn1_create_element.3 +1 -1
  908. data/vendor/local/share/man/man3/asn1_decode_simple_ber.3 +1 -1
  909. data/vendor/local/share/man/man3/asn1_decode_simple_der.3 +1 -1
  910. data/vendor/local/share/man/man3/asn1_delete_element.3 +1 -1
  911. data/vendor/local/share/man/man3/asn1_delete_structure.3 +1 -1
  912. data/vendor/local/share/man/man3/asn1_delete_structure2.3 +1 -1
  913. data/vendor/local/share/man/man3/asn1_der_coding.3 +1 -1
  914. data/vendor/local/share/man/man3/asn1_der_decoding.3 +1 -1
  915. data/vendor/local/share/man/man3/asn1_der_decoding2.3 +1 -1
  916. data/vendor/local/share/man/man3/asn1_der_decoding_element.3 +1 -1
  917. data/vendor/local/share/man/man3/asn1_der_decoding_startEnd.3 +1 -1
  918. data/vendor/local/share/man/man3/asn1_dup_node.3 +1 -1
  919. data/vendor/local/share/man/man3/asn1_encode_simple_der.3 +1 -1
  920. data/vendor/local/share/man/man3/asn1_expand_any_defined_by.3 +1 -1
  921. data/vendor/local/share/man/man3/asn1_expand_octet_string.3 +1 -1
  922. data/vendor/local/share/man/man3/asn1_find_node.3 +1 -1
  923. data/vendor/local/share/man/man3/asn1_find_structure_from_oid.3 +1 -1
  924. data/vendor/local/share/man/man3/asn1_get_bit_der.3 +1 -1
  925. data/vendor/local/share/man/man3/asn1_get_length_ber.3 +1 -1
  926. data/vendor/local/share/man/man3/asn1_get_length_der.3 +1 -1
  927. data/vendor/local/share/man/man3/asn1_get_object_id_der.3 +1 -1
  928. data/vendor/local/share/man/man3/asn1_get_octet_der.3 +1 -1
  929. data/vendor/local/share/man/man3/asn1_get_tag_der.3 +1 -1
  930. data/vendor/local/share/man/man3/asn1_length_der.3 +1 -1
  931. data/vendor/local/share/man/man3/asn1_number_of_elements.3 +1 -1
  932. data/vendor/local/share/man/man3/asn1_octet_der.3 +1 -1
  933. data/vendor/local/share/man/man3/asn1_parser2array.3 +1 -1
  934. data/vendor/local/share/man/man3/asn1_parser2tree.3 +1 -1
  935. data/vendor/local/share/man/man3/asn1_perror.3 +1 -1
  936. data/vendor/local/share/man/man3/asn1_print_structure.3 +1 -1
  937. data/vendor/local/share/man/man3/asn1_read_node_value.3 +1 -1
  938. data/vendor/local/share/man/man3/asn1_read_tag.3 +1 -1
  939. data/vendor/local/share/man/man3/asn1_read_value.3 +1 -1
  940. data/vendor/local/share/man/man3/asn1_read_value_type.3 +1 -1
  941. data/vendor/local/share/man/man3/asn1_strerror.3 +1 -1
  942. data/vendor/local/share/man/man3/asn1_write_value.3 +1 -1
  943. data/vendor/local/share/man/man3/bind_textdomain_codeset.3 +1 -1
  944. data/vendor/local/share/man/man3/bindtextdomain.3 +1 -1
  945. data/vendor/local/share/man/man3/gettext.3 +1 -1
  946. data/vendor/local/share/man/man3/ngettext.3 +1 -1
  947. data/vendor/local/share/man/man3/pcre.3 +230 -0
  948. data/vendor/local/share/man/man3/pcre16.3 +371 -0
  949. data/vendor/local/share/man/man3/pcre16_assign_jit_stack.3 +59 -0
  950. data/vendor/local/share/man/man3/pcre16_compile.3 +96 -0
  951. data/vendor/local/share/man/man3/pcre16_compile2.3 +101 -0
  952. data/vendor/local/share/man/man3/pcre16_config.3 +79 -0
  953. data/vendor/local/share/man/man3/pcre16_copy_named_substring.3 +51 -0
  954. data/vendor/local/share/man/man3/pcre16_copy_substring.3 +47 -0
  955. data/vendor/local/share/man/man3/pcre16_dfa_exec.3 +118 -0
  956. data/vendor/local/share/man/man3/pcre16_exec.3 +99 -0
  957. data/vendor/local/share/man/man3/pcre16_free_study.3 +31 -0
  958. data/vendor/local/share/man/man3/pcre16_free_substring.3 +31 -0
  959. data/vendor/local/share/man/man3/pcre16_free_substring_list.3 +31 -0
  960. data/vendor/local/share/man/man3/pcre16_fullinfo.3 +103 -0
  961. data/vendor/local/share/man/man3/pcre16_get_named_substring.3 +54 -0
  962. data/vendor/local/share/man/man3/pcre16_get_stringnumber.3 +43 -0
  963. data/vendor/local/share/man/man3/pcre16_get_stringtable_entries.3 +46 -0
  964. data/vendor/local/share/man/man3/pcre16_get_substring.3 +50 -0
  965. data/vendor/local/share/man/man3/pcre16_get_substring_list.3 +47 -0
  966. data/vendor/local/share/man/man3/pcre16_jit_exec.3 +96 -0
  967. data/vendor/local/share/man/man3/pcre16_jit_stack_alloc.3 +43 -0
  968. data/vendor/local/share/man/man3/pcre16_jit_stack_free.3 +35 -0
  969. data/vendor/local/share/man/man3/pcre16_maketables.3 +33 -0
  970. data/vendor/local/share/man/man3/pcre16_pattern_to_host_byte_order.3 +44 -0
  971. data/vendor/local/share/man/man3/pcre16_refcount.3 +36 -0
  972. data/vendor/local/share/man/man3/pcre16_study.3 +54 -0
  973. data/vendor/local/share/man/man3/pcre16_utf16_to_host_byte_order.3 +45 -0
  974. data/vendor/local/share/man/man3/pcre16_version.3 +31 -0
  975. data/vendor/local/share/man/man3/pcre32.3 +369 -0
  976. data/vendor/local/share/man/man3/pcre32_assign_jit_stack.3 +59 -0
  977. data/vendor/local/share/man/man3/pcre32_compile.3 +96 -0
  978. data/vendor/local/share/man/man3/pcre32_compile2.3 +101 -0
  979. data/vendor/local/share/man/man3/pcre32_config.3 +79 -0
  980. data/vendor/local/share/man/man3/pcre32_copy_named_substring.3 +51 -0
  981. data/vendor/local/share/man/man3/pcre32_copy_substring.3 +47 -0
  982. data/vendor/local/share/man/man3/pcre32_dfa_exec.3 +118 -0
  983. data/vendor/local/share/man/man3/pcre32_exec.3 +99 -0
  984. data/vendor/local/share/man/man3/pcre32_free_study.3 +31 -0
  985. data/vendor/local/share/man/man3/pcre32_free_substring.3 +31 -0
  986. data/vendor/local/share/man/man3/pcre32_free_substring_list.3 +31 -0
  987. data/vendor/local/share/man/man3/pcre32_fullinfo.3 +103 -0
  988. data/vendor/local/share/man/man3/pcre32_get_named_substring.3 +54 -0
  989. data/vendor/local/share/man/man3/pcre32_get_stringnumber.3 +43 -0
  990. data/vendor/local/share/man/man3/pcre32_get_stringtable_entries.3 +46 -0
  991. data/vendor/local/share/man/man3/pcre32_get_substring.3 +50 -0
  992. data/vendor/local/share/man/man3/pcre32_get_substring_list.3 +47 -0
  993. data/vendor/local/share/man/man3/pcre32_jit_exec.3 +96 -0
  994. data/vendor/local/share/man/man3/pcre32_jit_stack_alloc.3 +43 -0
  995. data/vendor/local/share/man/man3/pcre32_jit_stack_free.3 +35 -0
  996. data/vendor/local/share/man/man3/pcre32_maketables.3 +33 -0
  997. data/vendor/local/share/man/man3/pcre32_pattern_to_host_byte_order.3 +44 -0
  998. data/vendor/local/share/man/man3/pcre32_refcount.3 +36 -0
  999. data/vendor/local/share/man/man3/pcre32_study.3 +54 -0
  1000. data/vendor/local/share/man/man3/pcre32_utf32_to_host_byte_order.3 +45 -0
  1001. data/vendor/local/share/man/man3/pcre32_version.3 +31 -0
  1002. data/vendor/local/share/man/man3/pcre_assign_jit_stack.3 +59 -0
  1003. data/vendor/local/share/man/man3/pcre_compile.3 +96 -0
  1004. data/vendor/local/share/man/man3/pcre_compile2.3 +101 -0
  1005. data/vendor/local/share/man/man3/pcre_config.3 +79 -0
  1006. data/vendor/local/share/man/man3/pcre_copy_named_substring.3 +51 -0
  1007. data/vendor/local/share/man/man3/pcre_copy_substring.3 +47 -0
  1008. data/vendor/local/share/man/man3/pcre_dfa_exec.3 +118 -0
  1009. data/vendor/local/share/man/man3/pcre_exec.3 +99 -0
  1010. data/vendor/local/share/man/man3/pcre_free_study.3 +31 -0
  1011. data/vendor/local/share/man/man3/pcre_free_substring.3 +31 -0
  1012. data/vendor/local/share/man/man3/pcre_free_substring_list.3 +31 -0
  1013. data/vendor/local/share/man/man3/pcre_fullinfo.3 +103 -0
  1014. data/vendor/local/share/man/man3/pcre_get_named_substring.3 +54 -0
  1015. data/vendor/local/share/man/man3/pcre_get_stringnumber.3 +43 -0
  1016. data/vendor/local/share/man/man3/pcre_get_stringtable_entries.3 +46 -0
  1017. data/vendor/local/share/man/man3/pcre_get_substring.3 +50 -0
  1018. data/vendor/local/share/man/man3/pcre_get_substring_list.3 +47 -0
  1019. data/vendor/local/share/man/man3/pcre_jit_exec.3 +96 -0
  1020. data/vendor/local/share/man/man3/pcre_jit_stack_alloc.3 +43 -0
  1021. data/vendor/local/share/man/man3/pcre_jit_stack_free.3 +35 -0
  1022. data/vendor/local/share/man/man3/pcre_maketables.3 +33 -0
  1023. data/vendor/local/share/man/man3/pcre_pattern_to_host_byte_order.3 +44 -0
  1024. data/vendor/local/share/man/man3/pcre_refcount.3 +36 -0
  1025. data/vendor/local/share/man/man3/pcre_study.3 +54 -0
  1026. data/vendor/local/share/man/man3/pcre_utf16_to_host_byte_order.3 +45 -0
  1027. data/vendor/local/share/man/man3/pcre_utf32_to_host_byte_order.3 +45 -0
  1028. data/vendor/local/share/man/man3/pcre_version.3 +31 -0
  1029. data/vendor/local/share/man/man3/pcreapi.3 +2919 -0
  1030. data/vendor/local/share/man/man3/pcrebuild.3 +550 -0
  1031. data/vendor/local/share/man/man3/pcrecallout.3 +255 -0
  1032. data/vendor/local/share/man/man3/pcrecompat.3 +200 -0
  1033. data/vendor/local/share/man/man3/pcrecpp.3 +348 -0
  1034. data/vendor/local/share/man/man3/pcredemo.3 +424 -0
  1035. data/vendor/local/share/man/man3/pcrejit.3 +431 -0
  1036. data/vendor/local/share/man/man3/pcrelimits.3 +71 -0
  1037. data/vendor/local/share/man/man3/pcrematching.3 +214 -0
  1038. data/vendor/local/share/man/man3/pcrepartial.3 +476 -0
  1039. data/vendor/local/share/man/man3/pcrepattern.3 +3301 -0
  1040. data/vendor/local/share/man/man3/pcreperform.3 +177 -0
  1041. data/vendor/local/share/man/man3/pcreposix.3 +267 -0
  1042. data/vendor/local/share/man/man3/pcreprecompile.3 +155 -0
  1043. data/vendor/local/share/man/man3/pcresample.3 +99 -0
  1044. data/vendor/local/share/man/man3/pcrestack.3 +215 -0
  1045. data/vendor/local/share/man/man3/pcresyntax.3 +540 -0
  1046. data/vendor/local/share/man/man3/pcreunicode.3 +249 -0
  1047. data/vendor/local/share/man/man3/textdomain.3 +1 -1
  1048. metadata +233 -22
  1049. data/lib/2.0/glib2.so +0 -0
  1050. data/lib/2.1/glib2.so +0 -0
  1051. data/vendor/local/bin/libhogweed-4-1.dll +0 -0
  1052. data/vendor/local/share/gtk-doc/html/gio/ExampleAnimal.html +0 -1537
  1053. data/vendor/local/share/gtk-doc/html/gio/ExampleCat.html +0 -679
  1054. data/vendor/local/share/gtk-doc/html/gio/ExampleObject.html +0 -572
  1055. data/vendor/local/share/gtk-doc/html/gio/ExampleObjectManagerClient.html +0 -561
  1056. data/vendor/local/share/gtk-doc/html/gio/ch31.html +0 -75
  1057. data/vendor/local/share/gtk-doc/html/gio/ch32s02.html +0 -41
  1058. data/vendor/local/share/gtk-doc/html/gio/ch32s03.html +0 -39
  1059. data/vendor/local/share/gtk-doc/html/gio/ch33s04.html +0 -46
  1060. data/vendor/local/share/gtk-doc/html/gio/ch33s05.html +0 -48
  1061. data/vendor/local/share/gtk-doc/html/gio/gdbus-org.gtk.GDBus.Example.ObjectManager.Animal.html +0 -161
  1062. data/vendor/local/share/gtk-doc/html/gio/gdbus-org.gtk.GDBus.Example.ObjectManager.Cat.html +0 -51
@@ -0,0 +1,3301 @@
1
+ .TH PCREPATTERN 3 "14 June 2015" "PCRE 8.38"
2
+ .SH NAME
3
+ PCRE - Perl-compatible regular expressions
4
+ .SH "PCRE REGULAR EXPRESSION DETAILS"
5
+ .rs
6
+ .sp
7
+ The syntax and semantics of the regular expressions that are supported by PCRE
8
+ are described in detail below. There is a quick-reference syntax summary in the
9
+ .\" HREF
10
+ \fBpcresyntax\fP
11
+ .\"
12
+ page. PCRE tries to match Perl syntax and semantics as closely as it can. PCRE
13
+ also supports some alternative regular expression syntax (which does not
14
+ conflict with the Perl syntax) in order to provide some compatibility with
15
+ regular expressions in Python, .NET, and Oniguruma.
16
+ .P
17
+ Perl's regular expressions are described in its own documentation, and
18
+ regular expressions in general are covered in a number of books, some of which
19
+ have copious examples. Jeffrey Friedl's "Mastering Regular Expressions",
20
+ published by O'Reilly, covers regular expressions in great detail. This
21
+ description of PCRE's regular expressions is intended as reference material.
22
+ .P
23
+ This document discusses the patterns that are supported by PCRE when one its
24
+ main matching functions, \fBpcre_exec()\fP (8-bit) or \fBpcre[16|32]_exec()\fP
25
+ (16- or 32-bit), is used. PCRE also has alternative matching functions,
26
+ \fBpcre_dfa_exec()\fP and \fBpcre[16|32_dfa_exec()\fP, which match using a
27
+ different algorithm that is not Perl-compatible. Some of the features discussed
28
+ below are not available when DFA matching is used. The advantages and
29
+ disadvantages of the alternative functions, and how they differ from the normal
30
+ functions, are discussed in the
31
+ .\" HREF
32
+ \fBpcrematching\fP
33
+ .\"
34
+ page.
35
+ .
36
+ .
37
+ .SH "SPECIAL START-OF-PATTERN ITEMS"
38
+ .rs
39
+ .sp
40
+ A number of options that can be passed to \fBpcre_compile()\fP can also be set
41
+ by special items at the start of a pattern. These are not Perl-compatible, but
42
+ are provided to make these options accessible to pattern writers who are not
43
+ able to change the program that processes the pattern. Any number of these
44
+ items may appear, but they must all be together right at the start of the
45
+ pattern string, and the letters must be in upper case.
46
+ .
47
+ .
48
+ .SS "UTF support"
49
+ .rs
50
+ .sp
51
+ The original operation of PCRE was on strings of one-byte characters. However,
52
+ there is now also support for UTF-8 strings in the original library, an
53
+ extra library that supports 16-bit and UTF-16 character strings, and a
54
+ third library that supports 32-bit and UTF-32 character strings. To use these
55
+ features, PCRE must be built to include appropriate support. When using UTF
56
+ strings you must either call the compiling function with the PCRE_UTF8,
57
+ PCRE_UTF16, or PCRE_UTF32 option, or the pattern must start with one of
58
+ these special sequences:
59
+ .sp
60
+ (*UTF8)
61
+ (*UTF16)
62
+ (*UTF32)
63
+ (*UTF)
64
+ .sp
65
+ (*UTF) is a generic sequence that can be used with any of the libraries.
66
+ Starting a pattern with such a sequence is equivalent to setting the relevant
67
+ option. How setting a UTF mode affects pattern matching is mentioned in several
68
+ places below. There is also a summary of features in the
69
+ .\" HREF
70
+ \fBpcreunicode\fP
71
+ .\"
72
+ page.
73
+ .P
74
+ Some applications that allow their users to supply patterns may wish to
75
+ restrict them to non-UTF data for security reasons. If the PCRE_NEVER_UTF
76
+ option is set at compile time, (*UTF) etc. are not allowed, and their
77
+ appearance causes an error.
78
+ .
79
+ .
80
+ .SS "Unicode property support"
81
+ .rs
82
+ .sp
83
+ Another special sequence that may appear at the start of a pattern is (*UCP).
84
+ This has the same effect as setting the PCRE_UCP option: it causes sequences
85
+ such as \ed and \ew to use Unicode properties to determine character types,
86
+ instead of recognizing only characters with codes less than 128 via a lookup
87
+ table.
88
+ .
89
+ .
90
+ .SS "Disabling auto-possessification"
91
+ .rs
92
+ .sp
93
+ If a pattern starts with (*NO_AUTO_POSSESS), it has the same effect as setting
94
+ the PCRE_NO_AUTO_POSSESS option at compile time. This stops PCRE from making
95
+ quantifiers possessive when what follows cannot match the repeated item. For
96
+ example, by default a+b is treated as a++b. For more details, see the
97
+ .\" HREF
98
+ \fBpcreapi\fP
99
+ .\"
100
+ documentation.
101
+ .
102
+ .
103
+ .SS "Disabling start-up optimizations"
104
+ .rs
105
+ .sp
106
+ If a pattern starts with (*NO_START_OPT), it has the same effect as setting the
107
+ PCRE_NO_START_OPTIMIZE option either at compile or matching time. This disables
108
+ several optimizations for quickly reaching "no match" results. For more
109
+ details, see the
110
+ .\" HREF
111
+ \fBpcreapi\fP
112
+ .\"
113
+ documentation.
114
+ .
115
+ .
116
+ .\" HTML <a name="newlines"></a>
117
+ .SS "Newline conventions"
118
+ .rs
119
+ .sp
120
+ PCRE supports five different conventions for indicating line breaks in
121
+ strings: a single CR (carriage return) character, a single LF (linefeed)
122
+ character, the two-character sequence CRLF, any of the three preceding, or any
123
+ Unicode newline sequence. The
124
+ .\" HREF
125
+ \fBpcreapi\fP
126
+ .\"
127
+ page has
128
+ .\" HTML <a href="pcreapi.html#newlines">
129
+ .\" </a>
130
+ further discussion
131
+ .\"
132
+ about newlines, and shows how to set the newline convention in the
133
+ \fIoptions\fP arguments for the compiling and matching functions.
134
+ .P
135
+ It is also possible to specify a newline convention by starting a pattern
136
+ string with one of the following five sequences:
137
+ .sp
138
+ (*CR) carriage return
139
+ (*LF) linefeed
140
+ (*CRLF) carriage return, followed by linefeed
141
+ (*ANYCRLF) any of the three above
142
+ (*ANY) all Unicode newline sequences
143
+ .sp
144
+ These override the default and the options given to the compiling function. For
145
+ example, on a Unix system where LF is the default newline sequence, the pattern
146
+ .sp
147
+ (*CR)a.b
148
+ .sp
149
+ changes the convention to CR. That pattern matches "a\enb" because LF is no
150
+ longer a newline. If more than one of these settings is present, the last one
151
+ is used.
152
+ .P
153
+ The newline convention affects where the circumflex and dollar assertions are
154
+ true. It also affects the interpretation of the dot metacharacter when
155
+ PCRE_DOTALL is not set, and the behaviour of \eN. However, it does not affect
156
+ what the \eR escape sequence matches. By default, this is any Unicode newline
157
+ sequence, for Perl compatibility. However, this can be changed; see the
158
+ description of \eR in the section entitled
159
+ .\" HTML <a href="#newlineseq">
160
+ .\" </a>
161
+ "Newline sequences"
162
+ .\"
163
+ below. A change of \eR setting can be combined with a change of newline
164
+ convention.
165
+ .
166
+ .
167
+ .SS "Setting match and recursion limits"
168
+ .rs
169
+ .sp
170
+ The caller of \fBpcre_exec()\fP can set a limit on the number of times the
171
+ internal \fBmatch()\fP function is called and on the maximum depth of
172
+ recursive calls. These facilities are provided to catch runaway matches that
173
+ are provoked by patterns with huge matching trees (a typical example is a
174
+ pattern with nested unlimited repeats) and to avoid running out of system stack
175
+ by too much recursion. When one of these limits is reached, \fBpcre_exec()\fP
176
+ gives an error return. The limits can also be set by items at the start of the
177
+ pattern of the form
178
+ .sp
179
+ (*LIMIT_MATCH=d)
180
+ (*LIMIT_RECURSION=d)
181
+ .sp
182
+ where d is any number of decimal digits. However, the value of the setting must
183
+ be less than the value set (or defaulted) by the caller of \fBpcre_exec()\fP
184
+ for it to have any effect. In other words, the pattern writer can lower the
185
+ limits set by the programmer, but not raise them. If there is more than one
186
+ setting of one of these limits, the lower value is used.
187
+ .
188
+ .
189
+ .SH "EBCDIC CHARACTER CODES"
190
+ .rs
191
+ .sp
192
+ PCRE can be compiled to run in an environment that uses EBCDIC as its character
193
+ code rather than ASCII or Unicode (typically a mainframe system). In the
194
+ sections below, character code values are ASCII or Unicode; in an EBCDIC
195
+ environment these characters may have different code values, and there are no
196
+ code points greater than 255.
197
+ .
198
+ .
199
+ .SH "CHARACTERS AND METACHARACTERS"
200
+ .rs
201
+ .sp
202
+ A regular expression is a pattern that is matched against a subject string from
203
+ left to right. Most characters stand for themselves in a pattern, and match the
204
+ corresponding characters in the subject. As a trivial example, the pattern
205
+ .sp
206
+ The quick brown fox
207
+ .sp
208
+ matches a portion of a subject string that is identical to itself. When
209
+ caseless matching is specified (the PCRE_CASELESS option), letters are matched
210
+ independently of case. In a UTF mode, PCRE always understands the concept of
211
+ case for characters whose values are less than 128, so caseless matching is
212
+ always possible. For characters with higher values, the concept of case is
213
+ supported if PCRE is compiled with Unicode property support, but not otherwise.
214
+ If you want to use caseless matching for characters 128 and above, you must
215
+ ensure that PCRE is compiled with Unicode property support as well as with
216
+ UTF support.
217
+ .P
218
+ The power of regular expressions comes from the ability to include alternatives
219
+ and repetitions in the pattern. These are encoded in the pattern by the use of
220
+ \fImetacharacters\fP, which do not stand for themselves but instead are
221
+ interpreted in some special way.
222
+ .P
223
+ There are two different sets of metacharacters: those that are recognized
224
+ anywhere in the pattern except within square brackets, and those that are
225
+ recognized within square brackets. Outside square brackets, the metacharacters
226
+ are as follows:
227
+ .sp
228
+ \e general escape character with several uses
229
+ ^ assert start of string (or line, in multiline mode)
230
+ $ assert end of string (or line, in multiline mode)
231
+ . match any character except newline (by default)
232
+ [ start character class definition
233
+ | start of alternative branch
234
+ ( start subpattern
235
+ ) end subpattern
236
+ ? extends the meaning of (
237
+ also 0 or 1 quantifier
238
+ also quantifier minimizer
239
+ * 0 or more quantifier
240
+ + 1 or more quantifier
241
+ also "possessive quantifier"
242
+ { start min/max quantifier
243
+ .sp
244
+ Part of a pattern that is in square brackets is called a "character class". In
245
+ a character class the only metacharacters are:
246
+ .sp
247
+ \e general escape character
248
+ ^ negate the class, but only if the first character
249
+ - indicates character range
250
+ .\" JOIN
251
+ [ POSIX character class (only if followed by POSIX
252
+ syntax)
253
+ ] terminates the character class
254
+ .sp
255
+ The following sections describe the use of each of the metacharacters.
256
+ .
257
+ .
258
+ .SH BACKSLASH
259
+ .rs
260
+ .sp
261
+ The backslash character has several uses. Firstly, if it is followed by a
262
+ character that is not a number or a letter, it takes away any special meaning
263
+ that character may have. This use of backslash as an escape character applies
264
+ both inside and outside character classes.
265
+ .P
266
+ For example, if you want to match a * character, you write \e* in the pattern.
267
+ This escaping action applies whether or not the following character would
268
+ otherwise be interpreted as a metacharacter, so it is always safe to precede a
269
+ non-alphanumeric with backslash to specify that it stands for itself. In
270
+ particular, if you want to match a backslash, you write \e\e.
271
+ .P
272
+ In a UTF mode, only ASCII numbers and letters have any special meaning after a
273
+ backslash. All other characters (in particular, those whose codepoints are
274
+ greater than 127) are treated as literals.
275
+ .P
276
+ If a pattern is compiled with the PCRE_EXTENDED option, most white space in the
277
+ pattern (other than in a character class), and characters between a # outside a
278
+ character class and the next newline, inclusive, are ignored. An escaping
279
+ backslash can be used to include a white space or # character as part of the
280
+ pattern.
281
+ .P
282
+ If you want to remove the special meaning from a sequence of characters, you
283
+ can do so by putting them between \eQ and \eE. This is different from Perl in
284
+ that $ and @ are handled as literals in \eQ...\eE sequences in PCRE, whereas in
285
+ Perl, $ and @ cause variable interpolation. Note the following examples:
286
+ .sp
287
+ Pattern PCRE matches Perl matches
288
+ .sp
289
+ .\" JOIN
290
+ \eQabc$xyz\eE abc$xyz abc followed by the
291
+ contents of $xyz
292
+ \eQabc\e$xyz\eE abc\e$xyz abc\e$xyz
293
+ \eQabc\eE\e$\eQxyz\eE abc$xyz abc$xyz
294
+ .sp
295
+ The \eQ...\eE sequence is recognized both inside and outside character classes.
296
+ An isolated \eE that is not preceded by \eQ is ignored. If \eQ is not followed
297
+ by \eE later in the pattern, the literal interpretation continues to the end of
298
+ the pattern (that is, \eE is assumed at the end). If the isolated \eQ is inside
299
+ a character class, this causes an error, because the character class is not
300
+ terminated.
301
+ .
302
+ .
303
+ .\" HTML <a name="digitsafterbackslash"></a>
304
+ .SS "Non-printing characters"
305
+ .rs
306
+ .sp
307
+ A second use of backslash provides a way of encoding non-printing characters
308
+ in patterns in a visible manner. There is no restriction on the appearance of
309
+ non-printing characters, apart from the binary zero that terminates a pattern,
310
+ but when a pattern is being prepared by text editing, it is often easier to use
311
+ one of the following escape sequences than the binary character it represents.
312
+ In an ASCII or Unicode environment, these escapes are as follows:
313
+ .sp
314
+ \ea alarm, that is, the BEL character (hex 07)
315
+ \ecx "control-x", where x is any ASCII character
316
+ \ee escape (hex 1B)
317
+ \ef form feed (hex 0C)
318
+ \en linefeed (hex 0A)
319
+ \er carriage return (hex 0D)
320
+ \et tab (hex 09)
321
+ \e0dd character with octal code 0dd
322
+ \eddd character with octal code ddd, or back reference
323
+ \eo{ddd..} character with octal code ddd..
324
+ \exhh character with hex code hh
325
+ \ex{hhh..} character with hex code hhh.. (non-JavaScript mode)
326
+ \euhhhh character with hex code hhhh (JavaScript mode only)
327
+ .sp
328
+ The precise effect of \ecx on ASCII characters is as follows: if x is a lower
329
+ case letter, it is converted to upper case. Then bit 6 of the character (hex
330
+ 40) is inverted. Thus \ecA to \ecZ become hex 01 to hex 1A (A is 41, Z is 5A),
331
+ but \ec{ becomes hex 3B ({ is 7B), and \ec; becomes hex 7B (; is 3B). If the
332
+ data item (byte or 16-bit value) following \ec has a value greater than 127, a
333
+ compile-time error occurs. This locks out non-ASCII characters in all modes.
334
+ .P
335
+ When PCRE is compiled in EBCDIC mode, \ea, \ee, \ef, \en, \er, and \et
336
+ generate the appropriate EBCDIC code values. The \ec escape is processed
337
+ as specified for Perl in the \fBperlebcdic\fP document. The only characters
338
+ that are allowed after \ec are A-Z, a-z, or one of @, [, \e, ], ^, _, or ?. Any
339
+ other character provokes a compile-time error. The sequence \e@ encodes
340
+ character code 0; the letters (in either case) encode characters 1-26 (hex 01
341
+ to hex 1A); [, \e, ], ^, and _ encode characters 27-31 (hex 1B to hex 1F), and
342
+ \e? becomes either 255 (hex FF) or 95 (hex 5F).
343
+ .P
344
+ Thus, apart from \e?, these escapes generate the same character code values as
345
+ they do in an ASCII environment, though the meanings of the values mostly
346
+ differ. For example, \eG always generates code value 7, which is BEL in ASCII
347
+ but DEL in EBCDIC.
348
+ .P
349
+ The sequence \e? generates DEL (127, hex 7F) in an ASCII environment, but
350
+ because 127 is not a control character in EBCDIC, Perl makes it generate the
351
+ APC character. Unfortunately, there are several variants of EBCDIC. In most of
352
+ them the APC character has the value 255 (hex FF), but in the one Perl calls
353
+ POSIX-BC its value is 95 (hex 5F). If certain other characters have POSIX-BC
354
+ values, PCRE makes \e? generate 95; otherwise it generates 255.
355
+ .P
356
+ After \e0 up to two further octal digits are read. If there are fewer than two
357
+ digits, just those that are present are used. Thus the sequence \e0\ex\e015
358
+ specifies two binary zeros followed by a CR character (code value 13). Make
359
+ sure you supply two digits after the initial zero if the pattern character that
360
+ follows is itself an octal digit.
361
+ .P
362
+ The escape \eo must be followed by a sequence of octal digits, enclosed in
363
+ braces. An error occurs if this is not the case. This escape is a recent
364
+ addition to Perl; it provides way of specifying character code points as octal
365
+ numbers greater than 0777, and it also allows octal numbers and back references
366
+ to be unambiguously specified.
367
+ .P
368
+ For greater clarity and unambiguity, it is best to avoid following \e by a
369
+ digit greater than zero. Instead, use \eo{} or \ex{} to specify character
370
+ numbers, and \eg{} to specify back references. The following paragraphs
371
+ describe the old, ambiguous syntax.
372
+ .P
373
+ The handling of a backslash followed by a digit other than 0 is complicated,
374
+ and Perl has changed in recent releases, causing PCRE also to change. Outside a
375
+ character class, PCRE reads the digit and any following digits as a decimal
376
+ number. If the number is less than 8, or if there have been at least that many
377
+ previous capturing left parentheses in the expression, the entire sequence is
378
+ taken as a \fIback reference\fP. A description of how this works is given
379
+ .\" HTML <a href="#backreferences">
380
+ .\" </a>
381
+ later,
382
+ .\"
383
+ following the discussion of
384
+ .\" HTML <a href="#subpattern">
385
+ .\" </a>
386
+ parenthesized subpatterns.
387
+ .\"
388
+ .P
389
+ Inside a character class, or if the decimal number following \e is greater than
390
+ 7 and there have not been that many capturing subpatterns, PCRE handles \e8 and
391
+ \e9 as the literal characters "8" and "9", and otherwise re-reads up to three
392
+ octal digits following the backslash, using them to generate a data character.
393
+ Any subsequent digits stand for themselves. For example:
394
+ .sp
395
+ \e040 is another way of writing an ASCII space
396
+ .\" JOIN
397
+ \e40 is the same, provided there are fewer than 40
398
+ previous capturing subpatterns
399
+ \e7 is always a back reference
400
+ .\" JOIN
401
+ \e11 might be a back reference, or another way of
402
+ writing a tab
403
+ \e011 is always a tab
404
+ \e0113 is a tab followed by the character "3"
405
+ .\" JOIN
406
+ \e113 might be a back reference, otherwise the
407
+ character with octal code 113
408
+ .\" JOIN
409
+ \e377 might be a back reference, otherwise
410
+ the value 255 (decimal)
411
+ .\" JOIN
412
+ \e81 is either a back reference, or the two
413
+ characters "8" and "1"
414
+ .sp
415
+ Note that octal values of 100 or greater that are specified using this syntax
416
+ must not be introduced by a leading zero, because no more than three octal
417
+ digits are ever read.
418
+ .P
419
+ By default, after \ex that is not followed by {, from zero to two hexadecimal
420
+ digits are read (letters can be in upper or lower case). Any number of
421
+ hexadecimal digits may appear between \ex{ and }. If a character other than
422
+ a hexadecimal digit appears between \ex{ and }, or if there is no terminating
423
+ }, an error occurs.
424
+ .P
425
+ If the PCRE_JAVASCRIPT_COMPAT option is set, the interpretation of \ex is
426
+ as just described only when it is followed by two hexadecimal digits.
427
+ Otherwise, it matches a literal "x" character. In JavaScript mode, support for
428
+ code points greater than 256 is provided by \eu, which must be followed by
429
+ four hexadecimal digits; otherwise it matches a literal "u" character.
430
+ .P
431
+ Characters whose value is less than 256 can be defined by either of the two
432
+ syntaxes for \ex (or by \eu in JavaScript mode). There is no difference in the
433
+ way they are handled. For example, \exdc is exactly the same as \ex{dc} (or
434
+ \eu00dc in JavaScript mode).
435
+ .
436
+ .
437
+ .SS "Constraints on character values"
438
+ .rs
439
+ .sp
440
+ Characters that are specified using octal or hexadecimal numbers are
441
+ limited to certain values, as follows:
442
+ .sp
443
+ 8-bit non-UTF mode less than 0x100
444
+ 8-bit UTF-8 mode less than 0x10ffff and a valid codepoint
445
+ 16-bit non-UTF mode less than 0x10000
446
+ 16-bit UTF-16 mode less than 0x10ffff and a valid codepoint
447
+ 32-bit non-UTF mode less than 0x100000000
448
+ 32-bit UTF-32 mode less than 0x10ffff and a valid codepoint
449
+ .sp
450
+ Invalid Unicode codepoints are the range 0xd800 to 0xdfff (the so-called
451
+ "surrogate" codepoints), and 0xffef.
452
+ .
453
+ .
454
+ .SS "Escape sequences in character classes"
455
+ .rs
456
+ .sp
457
+ All the sequences that define a single character value can be used both inside
458
+ and outside character classes. In addition, inside a character class, \eb is
459
+ interpreted as the backspace character (hex 08).
460
+ .P
461
+ \eN is not allowed in a character class. \eB, \eR, and \eX are not special
462
+ inside a character class. Like other unrecognized escape sequences, they are
463
+ treated as the literal characters "B", "R", and "X" by default, but cause an
464
+ error if the PCRE_EXTRA option is set. Outside a character class, these
465
+ sequences have different meanings.
466
+ .
467
+ .
468
+ .SS "Unsupported escape sequences"
469
+ .rs
470
+ .sp
471
+ In Perl, the sequences \el, \eL, \eu, and \eU are recognized by its string
472
+ handler and used to modify the case of following characters. By default, PCRE
473
+ does not support these escape sequences. However, if the PCRE_JAVASCRIPT_COMPAT
474
+ option is set, \eU matches a "U" character, and \eu can be used to define a
475
+ character by code point, as described in the previous section.
476
+ .
477
+ .
478
+ .SS "Absolute and relative back references"
479
+ .rs
480
+ .sp
481
+ The sequence \eg followed by an unsigned or a negative number, optionally
482
+ enclosed in braces, is an absolute or relative back reference. A named back
483
+ reference can be coded as \eg{name}. Back references are discussed
484
+ .\" HTML <a href="#backreferences">
485
+ .\" </a>
486
+ later,
487
+ .\"
488
+ following the discussion of
489
+ .\" HTML <a href="#subpattern">
490
+ .\" </a>
491
+ parenthesized subpatterns.
492
+ .\"
493
+ .
494
+ .
495
+ .SS "Absolute and relative subroutine calls"
496
+ .rs
497
+ .sp
498
+ For compatibility with Oniguruma, the non-Perl syntax \eg followed by a name or
499
+ a number enclosed either in angle brackets or single quotes, is an alternative
500
+ syntax for referencing a subpattern as a "subroutine". Details are discussed
501
+ .\" HTML <a href="#onigurumasubroutines">
502
+ .\" </a>
503
+ later.
504
+ .\"
505
+ Note that \eg{...} (Perl syntax) and \eg<...> (Oniguruma syntax) are \fInot\fP
506
+ synonymous. The former is a back reference; the latter is a
507
+ .\" HTML <a href="#subpatternsassubroutines">
508
+ .\" </a>
509
+ subroutine
510
+ .\"
511
+ call.
512
+ .
513
+ .
514
+ .\" HTML <a name="genericchartypes"></a>
515
+ .SS "Generic character types"
516
+ .rs
517
+ .sp
518
+ Another use of backslash is for specifying generic character types:
519
+ .sp
520
+ \ed any decimal digit
521
+ \eD any character that is not a decimal digit
522
+ \eh any horizontal white space character
523
+ \eH any character that is not a horizontal white space character
524
+ \es any white space character
525
+ \eS any character that is not a white space character
526
+ \ev any vertical white space character
527
+ \eV any character that is not a vertical white space character
528
+ \ew any "word" character
529
+ \eW any "non-word" character
530
+ .sp
531
+ There is also the single sequence \eN, which matches a non-newline character.
532
+ This is the same as
533
+ .\" HTML <a href="#fullstopdot">
534
+ .\" </a>
535
+ the "." metacharacter
536
+ .\"
537
+ when PCRE_DOTALL is not set. Perl also uses \eN to match characters by name;
538
+ PCRE does not support this.
539
+ .P
540
+ Each pair of lower and upper case escape sequences partitions the complete set
541
+ of characters into two disjoint sets. Any given character matches one, and only
542
+ one, of each pair. The sequences can appear both inside and outside character
543
+ classes. They each match one character of the appropriate type. If the current
544
+ matching point is at the end of the subject string, all of them fail, because
545
+ there is no character to match.
546
+ .P
547
+ For compatibility with Perl, \es did not used to match the VT character (code
548
+ 11), which made it different from the the POSIX "space" class. However, Perl
549
+ added VT at release 5.18, and PCRE followed suit at release 8.34. The default
550
+ \es characters are now HT (9), LF (10), VT (11), FF (12), CR (13), and space
551
+ (32), which are defined as white space in the "C" locale. This list may vary if
552
+ locale-specific matching is taking place. For example, in some locales the
553
+ "non-breaking space" character (\exA0) is recognized as white space, and in
554
+ others the VT character is not.
555
+ .P
556
+ A "word" character is an underscore or any character that is a letter or digit.
557
+ By default, the definition of letters and digits is controlled by PCRE's
558
+ low-valued character tables, and may vary if locale-specific matching is taking
559
+ place (see
560
+ .\" HTML <a href="pcreapi.html#localesupport">
561
+ .\" </a>
562
+ "Locale support"
563
+ .\"
564
+ in the
565
+ .\" HREF
566
+ \fBpcreapi\fP
567
+ .\"
568
+ page). For example, in a French locale such as "fr_FR" in Unix-like systems,
569
+ or "french" in Windows, some character codes greater than 127 are used for
570
+ accented letters, and these are then matched by \ew. The use of locales with
571
+ Unicode is discouraged.
572
+ .P
573
+ By default, characters whose code points are greater than 127 never match \ed,
574
+ \es, or \ew, and always match \eD, \eS, and \eW, although this may vary for
575
+ characters in the range 128-255 when locale-specific matching is happening.
576
+ These escape sequences retain their original meanings from before Unicode
577
+ support was available, mainly for efficiency reasons. If PCRE is compiled with
578
+ Unicode property support, and the PCRE_UCP option is set, the behaviour is
579
+ changed so that Unicode properties are used to determine character types, as
580
+ follows:
581
+ .sp
582
+ \ed any character that matches \ep{Nd} (decimal digit)
583
+ \es any character that matches \ep{Z} or \eh or \ev
584
+ \ew any character that matches \ep{L} or \ep{N}, plus underscore
585
+ .sp
586
+ The upper case escapes match the inverse sets of characters. Note that \ed
587
+ matches only decimal digits, whereas \ew matches any Unicode digit, as well as
588
+ any Unicode letter, and underscore. Note also that PCRE_UCP affects \eb, and
589
+ \eB because they are defined in terms of \ew and \eW. Matching these sequences
590
+ is noticeably slower when PCRE_UCP is set.
591
+ .P
592
+ The sequences \eh, \eH, \ev, and \eV are features that were added to Perl at
593
+ release 5.10. In contrast to the other sequences, which match only ASCII
594
+ characters by default, these always match certain high-valued code points,
595
+ whether or not PCRE_UCP is set. The horizontal space characters are:
596
+ .sp
597
+ U+0009 Horizontal tab (HT)
598
+ U+0020 Space
599
+ U+00A0 Non-break space
600
+ U+1680 Ogham space mark
601
+ U+180E Mongolian vowel separator
602
+ U+2000 En quad
603
+ U+2001 Em quad
604
+ U+2002 En space
605
+ U+2003 Em space
606
+ U+2004 Three-per-em space
607
+ U+2005 Four-per-em space
608
+ U+2006 Six-per-em space
609
+ U+2007 Figure space
610
+ U+2008 Punctuation space
611
+ U+2009 Thin space
612
+ U+200A Hair space
613
+ U+202F Narrow no-break space
614
+ U+205F Medium mathematical space
615
+ U+3000 Ideographic space
616
+ .sp
617
+ The vertical space characters are:
618
+ .sp
619
+ U+000A Linefeed (LF)
620
+ U+000B Vertical tab (VT)
621
+ U+000C Form feed (FF)
622
+ U+000D Carriage return (CR)
623
+ U+0085 Next line (NEL)
624
+ U+2028 Line separator
625
+ U+2029 Paragraph separator
626
+ .sp
627
+ In 8-bit, non-UTF-8 mode, only the characters with codepoints less than 256 are
628
+ relevant.
629
+ .
630
+ .
631
+ .\" HTML <a name="newlineseq"></a>
632
+ .SS "Newline sequences"
633
+ .rs
634
+ .sp
635
+ Outside a character class, by default, the escape sequence \eR matches any
636
+ Unicode newline sequence. In 8-bit non-UTF-8 mode \eR is equivalent to the
637
+ following:
638
+ .sp
639
+ (?>\er\en|\en|\ex0b|\ef|\er|\ex85)
640
+ .sp
641
+ This is an example of an "atomic group", details of which are given
642
+ .\" HTML <a href="#atomicgroup">
643
+ .\" </a>
644
+ below.
645
+ .\"
646
+ This particular group matches either the two-character sequence CR followed by
647
+ LF, or one of the single characters LF (linefeed, U+000A), VT (vertical tab,
648
+ U+000B), FF (form feed, U+000C), CR (carriage return, U+000D), or NEL (next
649
+ line, U+0085). The two-character sequence is treated as a single unit that
650
+ cannot be split.
651
+ .P
652
+ In other modes, two additional characters whose codepoints are greater than 255
653
+ are added: LS (line separator, U+2028) and PS (paragraph separator, U+2029).
654
+ Unicode character property support is not needed for these characters to be
655
+ recognized.
656
+ .P
657
+ It is possible to restrict \eR to match only CR, LF, or CRLF (instead of the
658
+ complete set of Unicode line endings) by setting the option PCRE_BSR_ANYCRLF
659
+ either at compile time or when the pattern is matched. (BSR is an abbrevation
660
+ for "backslash R".) This can be made the default when PCRE is built; if this is
661
+ the case, the other behaviour can be requested via the PCRE_BSR_UNICODE option.
662
+ It is also possible to specify these settings by starting a pattern string with
663
+ one of the following sequences:
664
+ .sp
665
+ (*BSR_ANYCRLF) CR, LF, or CRLF only
666
+ (*BSR_UNICODE) any Unicode newline sequence
667
+ .sp
668
+ These override the default and the options given to the compiling function, but
669
+ they can themselves be overridden by options given to a matching function. Note
670
+ that these special settings, which are not Perl-compatible, are recognized only
671
+ at the very start of a pattern, and that they must be in upper case. If more
672
+ than one of them is present, the last one is used. They can be combined with a
673
+ change of newline convention; for example, a pattern can start with:
674
+ .sp
675
+ (*ANY)(*BSR_ANYCRLF)
676
+ .sp
677
+ They can also be combined with the (*UTF8), (*UTF16), (*UTF32), (*UTF) or
678
+ (*UCP) special sequences. Inside a character class, \eR is treated as an
679
+ unrecognized escape sequence, and so matches the letter "R" by default, but
680
+ causes an error if PCRE_EXTRA is set.
681
+ .
682
+ .
683
+ .\" HTML <a name="uniextseq"></a>
684
+ .SS Unicode character properties
685
+ .rs
686
+ .sp
687
+ When PCRE is built with Unicode character property support, three additional
688
+ escape sequences that match characters with specific properties are available.
689
+ When in 8-bit non-UTF-8 mode, these sequences are of course limited to testing
690
+ characters whose codepoints are less than 256, but they do work in this mode.
691
+ The extra escape sequences are:
692
+ .sp
693
+ \ep{\fIxx\fP} a character with the \fIxx\fP property
694
+ \eP{\fIxx\fP} a character without the \fIxx\fP property
695
+ \eX a Unicode extended grapheme cluster
696
+ .sp
697
+ The property names represented by \fIxx\fP above are limited to the Unicode
698
+ script names, the general category properties, "Any", which matches any
699
+ character (including newline), and some special PCRE properties (described
700
+ in the
701
+ .\" HTML <a href="#extraprops">
702
+ .\" </a>
703
+ next section).
704
+ .\"
705
+ Other Perl properties such as "InMusicalSymbols" are not currently supported by
706
+ PCRE. Note that \eP{Any} does not match any characters, so always causes a
707
+ match failure.
708
+ .P
709
+ Sets of Unicode characters are defined as belonging to certain scripts. A
710
+ character from one of these sets can be matched using a script name. For
711
+ example:
712
+ .sp
713
+ \ep{Greek}
714
+ \eP{Han}
715
+ .sp
716
+ Those that are not part of an identified script are lumped together as
717
+ "Common". The current list of scripts is:
718
+ .P
719
+ Arabic,
720
+ Armenian,
721
+ Avestan,
722
+ Balinese,
723
+ Bamum,
724
+ Bassa_Vah,
725
+ Batak,
726
+ Bengali,
727
+ Bopomofo,
728
+ Brahmi,
729
+ Braille,
730
+ Buginese,
731
+ Buhid,
732
+ Canadian_Aboriginal,
733
+ Carian,
734
+ Caucasian_Albanian,
735
+ Chakma,
736
+ Cham,
737
+ Cherokee,
738
+ Common,
739
+ Coptic,
740
+ Cuneiform,
741
+ Cypriot,
742
+ Cyrillic,
743
+ Deseret,
744
+ Devanagari,
745
+ Duployan,
746
+ Egyptian_Hieroglyphs,
747
+ Elbasan,
748
+ Ethiopic,
749
+ Georgian,
750
+ Glagolitic,
751
+ Gothic,
752
+ Grantha,
753
+ Greek,
754
+ Gujarati,
755
+ Gurmukhi,
756
+ Han,
757
+ Hangul,
758
+ Hanunoo,
759
+ Hebrew,
760
+ Hiragana,
761
+ Imperial_Aramaic,
762
+ Inherited,
763
+ Inscriptional_Pahlavi,
764
+ Inscriptional_Parthian,
765
+ Javanese,
766
+ Kaithi,
767
+ Kannada,
768
+ Katakana,
769
+ Kayah_Li,
770
+ Kharoshthi,
771
+ Khmer,
772
+ Khojki,
773
+ Khudawadi,
774
+ Lao,
775
+ Latin,
776
+ Lepcha,
777
+ Limbu,
778
+ Linear_A,
779
+ Linear_B,
780
+ Lisu,
781
+ Lycian,
782
+ Lydian,
783
+ Mahajani,
784
+ Malayalam,
785
+ Mandaic,
786
+ Manichaean,
787
+ Meetei_Mayek,
788
+ Mende_Kikakui,
789
+ Meroitic_Cursive,
790
+ Meroitic_Hieroglyphs,
791
+ Miao,
792
+ Modi,
793
+ Mongolian,
794
+ Mro,
795
+ Myanmar,
796
+ Nabataean,
797
+ New_Tai_Lue,
798
+ Nko,
799
+ Ogham,
800
+ Ol_Chiki,
801
+ Old_Italic,
802
+ Old_North_Arabian,
803
+ Old_Permic,
804
+ Old_Persian,
805
+ Old_South_Arabian,
806
+ Old_Turkic,
807
+ Oriya,
808
+ Osmanya,
809
+ Pahawh_Hmong,
810
+ Palmyrene,
811
+ Pau_Cin_Hau,
812
+ Phags_Pa,
813
+ Phoenician,
814
+ Psalter_Pahlavi,
815
+ Rejang,
816
+ Runic,
817
+ Samaritan,
818
+ Saurashtra,
819
+ Sharada,
820
+ Shavian,
821
+ Siddham,
822
+ Sinhala,
823
+ Sora_Sompeng,
824
+ Sundanese,
825
+ Syloti_Nagri,
826
+ Syriac,
827
+ Tagalog,
828
+ Tagbanwa,
829
+ Tai_Le,
830
+ Tai_Tham,
831
+ Tai_Viet,
832
+ Takri,
833
+ Tamil,
834
+ Telugu,
835
+ Thaana,
836
+ Thai,
837
+ Tibetan,
838
+ Tifinagh,
839
+ Tirhuta,
840
+ Ugaritic,
841
+ Vai,
842
+ Warang_Citi,
843
+ Yi.
844
+ .P
845
+ Each character has exactly one Unicode general category property, specified by
846
+ a two-letter abbreviation. For compatibility with Perl, negation can be
847
+ specified by including a circumflex between the opening brace and the property
848
+ name. For example, \ep{^Lu} is the same as \eP{Lu}.
849
+ .P
850
+ If only one letter is specified with \ep or \eP, it includes all the general
851
+ category properties that start with that letter. In this case, in the absence
852
+ of negation, the curly brackets in the escape sequence are optional; these two
853
+ examples have the same effect:
854
+ .sp
855
+ \ep{L}
856
+ \epL
857
+ .sp
858
+ The following general category property codes are supported:
859
+ .sp
860
+ C Other
861
+ Cc Control
862
+ Cf Format
863
+ Cn Unassigned
864
+ Co Private use
865
+ Cs Surrogate
866
+ .sp
867
+ L Letter
868
+ Ll Lower case letter
869
+ Lm Modifier letter
870
+ Lo Other letter
871
+ Lt Title case letter
872
+ Lu Upper case letter
873
+ .sp
874
+ M Mark
875
+ Mc Spacing mark
876
+ Me Enclosing mark
877
+ Mn Non-spacing mark
878
+ .sp
879
+ N Number
880
+ Nd Decimal number
881
+ Nl Letter number
882
+ No Other number
883
+ .sp
884
+ P Punctuation
885
+ Pc Connector punctuation
886
+ Pd Dash punctuation
887
+ Pe Close punctuation
888
+ Pf Final punctuation
889
+ Pi Initial punctuation
890
+ Po Other punctuation
891
+ Ps Open punctuation
892
+ .sp
893
+ S Symbol
894
+ Sc Currency symbol
895
+ Sk Modifier symbol
896
+ Sm Mathematical symbol
897
+ So Other symbol
898
+ .sp
899
+ Z Separator
900
+ Zl Line separator
901
+ Zp Paragraph separator
902
+ Zs Space separator
903
+ .sp
904
+ The special property L& is also supported: it matches a character that has
905
+ the Lu, Ll, or Lt property, in other words, a letter that is not classified as
906
+ a modifier or "other".
907
+ .P
908
+ The Cs (Surrogate) property applies only to characters in the range U+D800 to
909
+ U+DFFF. Such characters are not valid in Unicode strings and so
910
+ cannot be tested by PCRE, unless UTF validity checking has been turned off
911
+ (see the discussion of PCRE_NO_UTF8_CHECK, PCRE_NO_UTF16_CHECK and
912
+ PCRE_NO_UTF32_CHECK in the
913
+ .\" HREF
914
+ \fBpcreapi\fP
915
+ .\"
916
+ page). Perl does not support the Cs property.
917
+ .P
918
+ The long synonyms for property names that Perl supports (such as \ep{Letter})
919
+ are not supported by PCRE, nor is it permitted to prefix any of these
920
+ properties with "Is".
921
+ .P
922
+ No character that is in the Unicode table has the Cn (unassigned) property.
923
+ Instead, this property is assumed for any code point that is not in the
924
+ Unicode table.
925
+ .P
926
+ Specifying caseless matching does not affect these escape sequences. For
927
+ example, \ep{Lu} always matches only upper case letters. This is different from
928
+ the behaviour of current versions of Perl.
929
+ .P
930
+ Matching characters by Unicode property is not fast, because PCRE has to do a
931
+ multistage table lookup in order to find a character's property. That is why
932
+ the traditional escape sequences such as \ed and \ew do not use Unicode
933
+ properties in PCRE by default, though you can make them do so by setting the
934
+ PCRE_UCP option or by starting the pattern with (*UCP).
935
+ .
936
+ .
937
+ .SS Extended grapheme clusters
938
+ .rs
939
+ .sp
940
+ The \eX escape matches any number of Unicode characters that form an "extended
941
+ grapheme cluster", and treats the sequence as an atomic group
942
+ .\" HTML <a href="#atomicgroup">
943
+ .\" </a>
944
+ (see below).
945
+ .\"
946
+ Up to and including release 8.31, PCRE matched an earlier, simpler definition
947
+ that was equivalent to
948
+ .sp
949
+ (?>\ePM\epM*)
950
+ .sp
951
+ That is, it matched a character without the "mark" property, followed by zero
952
+ or more characters with the "mark" property. Characters with the "mark"
953
+ property are typically non-spacing accents that affect the preceding character.
954
+ .P
955
+ This simple definition was extended in Unicode to include more complicated
956
+ kinds of composite character by giving each character a grapheme breaking
957
+ property, and creating rules that use these properties to define the boundaries
958
+ of extended grapheme clusters. In releases of PCRE later than 8.31, \eX matches
959
+ one of these clusters.
960
+ .P
961
+ \eX always matches at least one character. Then it decides whether to add
962
+ additional characters according to the following rules for ending a cluster:
963
+ .P
964
+ 1. End at the end of the subject string.
965
+ .P
966
+ 2. Do not end between CR and LF; otherwise end after any control character.
967
+ .P
968
+ 3. Do not break Hangul (a Korean script) syllable sequences. Hangul characters
969
+ are of five types: L, V, T, LV, and LVT. An L character may be followed by an
970
+ L, V, LV, or LVT character; an LV or V character may be followed by a V or T
971
+ character; an LVT or T character may be follwed only by a T character.
972
+ .P
973
+ 4. Do not end before extending characters or spacing marks. Characters with
974
+ the "mark" property always have the "extend" grapheme breaking property.
975
+ .P
976
+ 5. Do not end after prepend characters.
977
+ .P
978
+ 6. Otherwise, end the cluster.
979
+ .
980
+ .
981
+ .\" HTML <a name="extraprops"></a>
982
+ .SS PCRE's additional properties
983
+ .rs
984
+ .sp
985
+ As well as the standard Unicode properties described above, PCRE supports four
986
+ more that make it possible to convert traditional escape sequences such as \ew
987
+ and \es to use Unicode properties. PCRE uses these non-standard, non-Perl
988
+ properties internally when PCRE_UCP is set. However, they may also be used
989
+ explicitly. These properties are:
990
+ .sp
991
+ Xan Any alphanumeric character
992
+ Xps Any POSIX space character
993
+ Xsp Any Perl space character
994
+ Xwd Any Perl "word" character
995
+ .sp
996
+ Xan matches characters that have either the L (letter) or the N (number)
997
+ property. Xps matches the characters tab, linefeed, vertical tab, form feed, or
998
+ carriage return, and any other character that has the Z (separator) property.
999
+ Xsp is the same as Xps; it used to exclude vertical tab, for Perl
1000
+ compatibility, but Perl changed, and so PCRE followed at release 8.34. Xwd
1001
+ matches the same characters as Xan, plus underscore.
1002
+ .P
1003
+ There is another non-standard property, Xuc, which matches any character that
1004
+ can be represented by a Universal Character Name in C++ and other programming
1005
+ languages. These are the characters $, @, ` (grave accent), and all characters
1006
+ with Unicode code points greater than or equal to U+00A0, except for the
1007
+ surrogates U+D800 to U+DFFF. Note that most base (ASCII) characters are
1008
+ excluded. (Universal Character Names are of the form \euHHHH or \eUHHHHHHHH
1009
+ where H is a hexadecimal digit. Note that the Xuc property does not match these
1010
+ sequences but the characters that they represent.)
1011
+ .
1012
+ .
1013
+ .\" HTML <a name="resetmatchstart"></a>
1014
+ .SS "Resetting the match start"
1015
+ .rs
1016
+ .sp
1017
+ The escape sequence \eK causes any previously matched characters not to be
1018
+ included in the final matched sequence. For example, the pattern:
1019
+ .sp
1020
+ foo\eKbar
1021
+ .sp
1022
+ matches "foobar", but reports that it has matched "bar". This feature is
1023
+ similar to a lookbehind assertion
1024
+ .\" HTML <a href="#lookbehind">
1025
+ .\" </a>
1026
+ (described below).
1027
+ .\"
1028
+ However, in this case, the part of the subject before the real match does not
1029
+ have to be of fixed length, as lookbehind assertions do. The use of \eK does
1030
+ not interfere with the setting of
1031
+ .\" HTML <a href="#subpattern">
1032
+ .\" </a>
1033
+ captured substrings.
1034
+ .\"
1035
+ For example, when the pattern
1036
+ .sp
1037
+ (foo)\eKbar
1038
+ .sp
1039
+ matches "foobar", the first substring is still set to "foo".
1040
+ .P
1041
+ Perl documents that the use of \eK within assertions is "not well defined". In
1042
+ PCRE, \eK is acted upon when it occurs inside positive assertions, but is
1043
+ ignored in negative assertions. Note that when a pattern such as (?=ab\eK)
1044
+ matches, the reported start of the match can be greater than the end of the
1045
+ match.
1046
+ .
1047
+ .
1048
+ .\" HTML <a name="smallassertions"></a>
1049
+ .SS "Simple assertions"
1050
+ .rs
1051
+ .sp
1052
+ The final use of backslash is for certain simple assertions. An assertion
1053
+ specifies a condition that has to be met at a particular point in a match,
1054
+ without consuming any characters from the subject string. The use of
1055
+ subpatterns for more complicated assertions is described
1056
+ .\" HTML <a href="#bigassertions">
1057
+ .\" </a>
1058
+ below.
1059
+ .\"
1060
+ The backslashed assertions are:
1061
+ .sp
1062
+ \eb matches at a word boundary
1063
+ \eB matches when not at a word boundary
1064
+ \eA matches at the start of the subject
1065
+ \eZ matches at the end of the subject
1066
+ also matches before a newline at the end of the subject
1067
+ \ez matches only at the end of the subject
1068
+ \eG matches at the first matching position in the subject
1069
+ .sp
1070
+ Inside a character class, \eb has a different meaning; it matches the backspace
1071
+ character. If any other of these assertions appears in a character class, by
1072
+ default it matches the corresponding literal character (for example, \eB
1073
+ matches the letter B). However, if the PCRE_EXTRA option is set, an "invalid
1074
+ escape sequence" error is generated instead.
1075
+ .P
1076
+ A word boundary is a position in the subject string where the current character
1077
+ and the previous character do not both match \ew or \eW (i.e. one matches
1078
+ \ew and the other matches \eW), or the start or end of the string if the
1079
+ first or last character matches \ew, respectively. In a UTF mode, the meanings
1080
+ of \ew and \eW can be changed by setting the PCRE_UCP option. When this is
1081
+ done, it also affects \eb and \eB. Neither PCRE nor Perl has a separate "start
1082
+ of word" or "end of word" metasequence. However, whatever follows \eb normally
1083
+ determines which it is. For example, the fragment \eba matches "a" at the start
1084
+ of a word.
1085
+ .P
1086
+ The \eA, \eZ, and \ez assertions differ from the traditional circumflex and
1087
+ dollar (described in the next section) in that they only ever match at the very
1088
+ start and end of the subject string, whatever options are set. Thus, they are
1089
+ independent of multiline mode. These three assertions are not affected by the
1090
+ PCRE_NOTBOL or PCRE_NOTEOL options, which affect only the behaviour of the
1091
+ circumflex and dollar metacharacters. However, if the \fIstartoffset\fP
1092
+ argument of \fBpcre_exec()\fP is non-zero, indicating that matching is to start
1093
+ at a point other than the beginning of the subject, \eA can never match. The
1094
+ difference between \eZ and \ez is that \eZ matches before a newline at the end
1095
+ of the string as well as at the very end, whereas \ez matches only at the end.
1096
+ .P
1097
+ The \eG assertion is true only when the current matching position is at the
1098
+ start point of the match, as specified by the \fIstartoffset\fP argument of
1099
+ \fBpcre_exec()\fP. It differs from \eA when the value of \fIstartoffset\fP is
1100
+ non-zero. By calling \fBpcre_exec()\fP multiple times with appropriate
1101
+ arguments, you can mimic Perl's /g option, and it is in this kind of
1102
+ implementation where \eG can be useful.
1103
+ .P
1104
+ Note, however, that PCRE's interpretation of \eG, as the start of the current
1105
+ match, is subtly different from Perl's, which defines it as the end of the
1106
+ previous match. In Perl, these can be different when the previously matched
1107
+ string was empty. Because PCRE does just one match at a time, it cannot
1108
+ reproduce this behaviour.
1109
+ .P
1110
+ If all the alternatives of a pattern begin with \eG, the expression is anchored
1111
+ to the starting match position, and the "anchored" flag is set in the compiled
1112
+ regular expression.
1113
+ .
1114
+ .
1115
+ .SH "CIRCUMFLEX AND DOLLAR"
1116
+ .rs
1117
+ .sp
1118
+ The circumflex and dollar metacharacters are zero-width assertions. That is,
1119
+ they test for a particular condition being true without consuming any
1120
+ characters from the subject string.
1121
+ .P
1122
+ Outside a character class, in the default matching mode, the circumflex
1123
+ character is an assertion that is true only if the current matching point is at
1124
+ the start of the subject string. If the \fIstartoffset\fP argument of
1125
+ \fBpcre_exec()\fP is non-zero, circumflex can never match if the PCRE_MULTILINE
1126
+ option is unset. Inside a character class, circumflex has an entirely different
1127
+ meaning
1128
+ .\" HTML <a href="#characterclass">
1129
+ .\" </a>
1130
+ (see below).
1131
+ .\"
1132
+ .P
1133
+ Circumflex need not be the first character of the pattern if a number of
1134
+ alternatives are involved, but it should be the first thing in each alternative
1135
+ in which it appears if the pattern is ever to match that branch. If all
1136
+ possible alternatives start with a circumflex, that is, if the pattern is
1137
+ constrained to match only at the start of the subject, it is said to be an
1138
+ "anchored" pattern. (There are also other constructs that can cause a pattern
1139
+ to be anchored.)
1140
+ .P
1141
+ The dollar character is an assertion that is true only if the current matching
1142
+ point is at the end of the subject string, or immediately before a newline at
1143
+ the end of the string (by default). Note, however, that it does not actually
1144
+ match the newline. Dollar need not be the last character of the pattern if a
1145
+ number of alternatives are involved, but it should be the last item in any
1146
+ branch in which it appears. Dollar has no special meaning in a character class.
1147
+ .P
1148
+ The meaning of dollar can be changed so that it matches only at the very end of
1149
+ the string, by setting the PCRE_DOLLAR_ENDONLY option at compile time. This
1150
+ does not affect the \eZ assertion.
1151
+ .P
1152
+ The meanings of the circumflex and dollar characters are changed if the
1153
+ PCRE_MULTILINE option is set. When this is the case, a circumflex matches
1154
+ immediately after internal newlines as well as at the start of the subject
1155
+ string. It does not match after a newline that ends the string. A dollar
1156
+ matches before any newlines in the string, as well as at the very end, when
1157
+ PCRE_MULTILINE is set. When newline is specified as the two-character
1158
+ sequence CRLF, isolated CR and LF characters do not indicate newlines.
1159
+ .P
1160
+ For example, the pattern /^abc$/ matches the subject string "def\enabc" (where
1161
+ \en represents a newline) in multiline mode, but not otherwise. Consequently,
1162
+ patterns that are anchored in single line mode because all branches start with
1163
+ ^ are not anchored in multiline mode, and a match for circumflex is possible
1164
+ when the \fIstartoffset\fP argument of \fBpcre_exec()\fP is non-zero. The
1165
+ PCRE_DOLLAR_ENDONLY option is ignored if PCRE_MULTILINE is set.
1166
+ .P
1167
+ Note that the sequences \eA, \eZ, and \ez can be used to match the start and
1168
+ end of the subject in both modes, and if all branches of a pattern start with
1169
+ \eA it is always anchored, whether or not PCRE_MULTILINE is set.
1170
+ .
1171
+ .
1172
+ .\" HTML <a name="fullstopdot"></a>
1173
+ .SH "FULL STOP (PERIOD, DOT) AND \eN"
1174
+ .rs
1175
+ .sp
1176
+ Outside a character class, a dot in the pattern matches any one character in
1177
+ the subject string except (by default) a character that signifies the end of a
1178
+ line.
1179
+ .P
1180
+ When a line ending is defined as a single character, dot never matches that
1181
+ character; when the two-character sequence CRLF is used, dot does not match CR
1182
+ if it is immediately followed by LF, but otherwise it matches all characters
1183
+ (including isolated CRs and LFs). When any Unicode line endings are being
1184
+ recognized, dot does not match CR or LF or any of the other line ending
1185
+ characters.
1186
+ .P
1187
+ The behaviour of dot with regard to newlines can be changed. If the PCRE_DOTALL
1188
+ option is set, a dot matches any one character, without exception. If the
1189
+ two-character sequence CRLF is present in the subject string, it takes two dots
1190
+ to match it.
1191
+ .P
1192
+ The handling of dot is entirely independent of the handling of circumflex and
1193
+ dollar, the only relationship being that they both involve newlines. Dot has no
1194
+ special meaning in a character class.
1195
+ .P
1196
+ The escape sequence \eN behaves like a dot, except that it is not affected by
1197
+ the PCRE_DOTALL option. In other words, it matches any character except one
1198
+ that signifies the end of a line. Perl also uses \eN to match characters by
1199
+ name; PCRE does not support this.
1200
+ .
1201
+ .
1202
+ .SH "MATCHING A SINGLE DATA UNIT"
1203
+ .rs
1204
+ .sp
1205
+ Outside a character class, the escape sequence \eC matches any one data unit,
1206
+ whether or not a UTF mode is set. In the 8-bit library, one data unit is one
1207
+ byte; in the 16-bit library it is a 16-bit unit; in the 32-bit library it is
1208
+ a 32-bit unit. Unlike a dot, \eC always
1209
+ matches line-ending characters. The feature is provided in Perl in order to
1210
+ match individual bytes in UTF-8 mode, but it is unclear how it can usefully be
1211
+ used. Because \eC breaks up characters into individual data units, matching one
1212
+ unit with \eC in a UTF mode means that the rest of the string may start with a
1213
+ malformed UTF character. This has undefined results, because PCRE assumes that
1214
+ it is dealing with valid UTF strings (and by default it checks this at the
1215
+ start of processing unless the PCRE_NO_UTF8_CHECK, PCRE_NO_UTF16_CHECK or
1216
+ PCRE_NO_UTF32_CHECK option is used).
1217
+ .P
1218
+ PCRE does not allow \eC to appear in lookbehind assertions
1219
+ .\" HTML <a href="#lookbehind">
1220
+ .\" </a>
1221
+ (described below)
1222
+ .\"
1223
+ in a UTF mode, because this would make it impossible to calculate the length of
1224
+ the lookbehind.
1225
+ .P
1226
+ In general, the \eC escape sequence is best avoided. However, one
1227
+ way of using it that avoids the problem of malformed UTF characters is to use a
1228
+ lookahead to check the length of the next character, as in this pattern, which
1229
+ could be used with a UTF-8 string (ignore white space and line breaks):
1230
+ .sp
1231
+ (?| (?=[\ex00-\ex7f])(\eC) |
1232
+ (?=[\ex80-\ex{7ff}])(\eC)(\eC) |
1233
+ (?=[\ex{800}-\ex{ffff}])(\eC)(\eC)(\eC) |
1234
+ (?=[\ex{10000}-\ex{1fffff}])(\eC)(\eC)(\eC)(\eC))
1235
+ .sp
1236
+ A group that starts with (?| resets the capturing parentheses numbers in each
1237
+ alternative (see
1238
+ .\" HTML <a href="#dupsubpatternnumber">
1239
+ .\" </a>
1240
+ "Duplicate Subpattern Numbers"
1241
+ .\"
1242
+ below). The assertions at the start of each branch check the next UTF-8
1243
+ character for values whose encoding uses 1, 2, 3, or 4 bytes, respectively. The
1244
+ character's individual bytes are then captured by the appropriate number of
1245
+ groups.
1246
+ .
1247
+ .
1248
+ .\" HTML <a name="characterclass"></a>
1249
+ .SH "SQUARE BRACKETS AND CHARACTER CLASSES"
1250
+ .rs
1251
+ .sp
1252
+ An opening square bracket introduces a character class, terminated by a closing
1253
+ square bracket. A closing square bracket on its own is not special by default.
1254
+ However, if the PCRE_JAVASCRIPT_COMPAT option is set, a lone closing square
1255
+ bracket causes a compile-time error. If a closing square bracket is required as
1256
+ a member of the class, it should be the first data character in the class
1257
+ (after an initial circumflex, if present) or escaped with a backslash.
1258
+ .P
1259
+ A character class matches a single character in the subject. In a UTF mode, the
1260
+ character may be more than one data unit long. A matched character must be in
1261
+ the set of characters defined by the class, unless the first character in the
1262
+ class definition is a circumflex, in which case the subject character must not
1263
+ be in the set defined by the class. If a circumflex is actually required as a
1264
+ member of the class, ensure it is not the first character, or escape it with a
1265
+ backslash.
1266
+ .P
1267
+ For example, the character class [aeiou] matches any lower case vowel, while
1268
+ [^aeiou] matches any character that is not a lower case vowel. Note that a
1269
+ circumflex is just a convenient notation for specifying the characters that
1270
+ are in the class by enumerating those that are not. A class that starts with a
1271
+ circumflex is not an assertion; it still consumes a character from the subject
1272
+ string, and therefore it fails if the current pointer is at the end of the
1273
+ string.
1274
+ .P
1275
+ In UTF-8 (UTF-16, UTF-32) mode, characters with values greater than 255 (0xffff)
1276
+ can be included in a class as a literal string of data units, or by using the
1277
+ \ex{ escaping mechanism.
1278
+ .P
1279
+ When caseless matching is set, any letters in a class represent both their
1280
+ upper case and lower case versions, so for example, a caseless [aeiou] matches
1281
+ "A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a
1282
+ caseful version would. In a UTF mode, PCRE always understands the concept of
1283
+ case for characters whose values are less than 128, so caseless matching is
1284
+ always possible. For characters with higher values, the concept of case is
1285
+ supported if PCRE is compiled with Unicode property support, but not otherwise.
1286
+ If you want to use caseless matching in a UTF mode for characters 128 and
1287
+ above, you must ensure that PCRE is compiled with Unicode property support as
1288
+ well as with UTF support.
1289
+ .P
1290
+ Characters that might indicate line breaks are never treated in any special way
1291
+ when matching character classes, whatever line-ending sequence is in use, and
1292
+ whatever setting of the PCRE_DOTALL and PCRE_MULTILINE options is used. A class
1293
+ such as [^a] always matches one of these characters.
1294
+ .P
1295
+ The minus (hyphen) character can be used to specify a range of characters in a
1296
+ character class. For example, [d-m] matches any letter between d and m,
1297
+ inclusive. If a minus character is required in a class, it must be escaped with
1298
+ a backslash or appear in a position where it cannot be interpreted as
1299
+ indicating a range, typically as the first or last character in the class, or
1300
+ immediately after a range. For example, [b-d-z] matches letters in the range b
1301
+ to d, a hyphen character, or z.
1302
+ .P
1303
+ It is not possible to have the literal character "]" as the end character of a
1304
+ range. A pattern such as [W-]46] is interpreted as a class of two characters
1305
+ ("W" and "-") followed by a literal string "46]", so it would match "W46]" or
1306
+ "-46]". However, if the "]" is escaped with a backslash it is interpreted as
1307
+ the end of range, so [W-\e]46] is interpreted as a class containing a range
1308
+ followed by two other characters. The octal or hexadecimal representation of
1309
+ "]" can also be used to end a range.
1310
+ .P
1311
+ An error is generated if a POSIX character class (see below) or an escape
1312
+ sequence other than one that defines a single character appears at a point
1313
+ where a range ending character is expected. For example, [z-\exff] is valid,
1314
+ but [A-\ed] and [A-[:digit:]] are not.
1315
+ .P
1316
+ Ranges operate in the collating sequence of character values. They can also be
1317
+ used for characters specified numerically, for example [\e000-\e037]. Ranges
1318
+ can include any characters that are valid for the current mode.
1319
+ .P
1320
+ If a range that includes letters is used when caseless matching is set, it
1321
+ matches the letters in either case. For example, [W-c] is equivalent to
1322
+ [][\e\e^_`wxyzabc], matched caselessly, and in a non-UTF mode, if character
1323
+ tables for a French locale are in use, [\exc8-\excb] matches accented E
1324
+ characters in both cases. In UTF modes, PCRE supports the concept of case for
1325
+ characters with values greater than 128 only when it is compiled with Unicode
1326
+ property support.
1327
+ .P
1328
+ The character escape sequences \ed, \eD, \eh, \eH, \ep, \eP, \es, \eS, \ev,
1329
+ \eV, \ew, and \eW may appear in a character class, and add the characters that
1330
+ they match to the class. For example, [\edABCDEF] matches any hexadecimal
1331
+ digit. In UTF modes, the PCRE_UCP option affects the meanings of \ed, \es, \ew
1332
+ and their upper case partners, just as it does when they appear outside a
1333
+ character class, as described in the section entitled
1334
+ .\" HTML <a href="#genericchartypes">
1335
+ .\" </a>
1336
+ "Generic character types"
1337
+ .\"
1338
+ above. The escape sequence \eb has a different meaning inside a character
1339
+ class; it matches the backspace character. The sequences \eB, \eN, \eR, and \eX
1340
+ are not special inside a character class. Like any other unrecognized escape
1341
+ sequences, they are treated as the literal characters "B", "N", "R", and "X" by
1342
+ default, but cause an error if the PCRE_EXTRA option is set.
1343
+ .P
1344
+ A circumflex can conveniently be used with the upper case character types to
1345
+ specify a more restricted set of characters than the matching lower case type.
1346
+ For example, the class [^\eW_] matches any letter or digit, but not underscore,
1347
+ whereas [\ew] includes underscore. A positive character class should be read as
1348
+ "something OR something OR ..." and a negative class as "NOT something AND NOT
1349
+ something AND NOT ...".
1350
+ .P
1351
+ The only metacharacters that are recognized in character classes are backslash,
1352
+ hyphen (only where it can be interpreted as specifying a range), circumflex
1353
+ (only at the start), opening square bracket (only when it can be interpreted as
1354
+ introducing a POSIX class name, or for a special compatibility feature - see
1355
+ the next two sections), and the terminating closing square bracket. However,
1356
+ escaping other non-alphanumeric characters does no harm.
1357
+ .
1358
+ .
1359
+ .SH "POSIX CHARACTER CLASSES"
1360
+ .rs
1361
+ .sp
1362
+ Perl supports the POSIX notation for character classes. This uses names
1363
+ enclosed by [: and :] within the enclosing square brackets. PCRE also supports
1364
+ this notation. For example,
1365
+ .sp
1366
+ [01[:alpha:]%]
1367
+ .sp
1368
+ matches "0", "1", any alphabetic character, or "%". The supported class names
1369
+ are:
1370
+ .sp
1371
+ alnum letters and digits
1372
+ alpha letters
1373
+ ascii character codes 0 - 127
1374
+ blank space or tab only
1375
+ cntrl control characters
1376
+ digit decimal digits (same as \ed)
1377
+ graph printing characters, excluding space
1378
+ lower lower case letters
1379
+ print printing characters, including space
1380
+ punct printing characters, excluding letters and digits and space
1381
+ space white space (the same as \es from PCRE 8.34)
1382
+ upper upper case letters
1383
+ word "word" characters (same as \ew)
1384
+ xdigit hexadecimal digits
1385
+ .sp
1386
+ The default "space" characters are HT (9), LF (10), VT (11), FF (12), CR (13),
1387
+ and space (32). If locale-specific matching is taking place, the list of space
1388
+ characters may be different; there may be fewer or more of them. "Space" used
1389
+ to be different to \es, which did not include VT, for Perl compatibility.
1390
+ However, Perl changed at release 5.18, and PCRE followed at release 8.34.
1391
+ "Space" and \es now match the same set of characters.
1392
+ .P
1393
+ The name "word" is a Perl extension, and "blank" is a GNU extension from Perl
1394
+ 5.8. Another Perl extension is negation, which is indicated by a ^ character
1395
+ after the colon. For example,
1396
+ .sp
1397
+ [12[:^digit:]]
1398
+ .sp
1399
+ matches "1", "2", or any non-digit. PCRE (and Perl) also recognize the POSIX
1400
+ syntax [.ch.] and [=ch=] where "ch" is a "collating element", but these are not
1401
+ supported, and an error is given if they are encountered.
1402
+ .P
1403
+ By default, characters with values greater than 128 do not match any of the
1404
+ POSIX character classes. However, if the PCRE_UCP option is passed to
1405
+ \fBpcre_compile()\fP, some of the classes are changed so that Unicode character
1406
+ properties are used. This is achieved by replacing certain POSIX classes by
1407
+ other sequences, as follows:
1408
+ .sp
1409
+ [:alnum:] becomes \ep{Xan}
1410
+ [:alpha:] becomes \ep{L}
1411
+ [:blank:] becomes \eh
1412
+ [:digit:] becomes \ep{Nd}
1413
+ [:lower:] becomes \ep{Ll}
1414
+ [:space:] becomes \ep{Xps}
1415
+ [:upper:] becomes \ep{Lu}
1416
+ [:word:] becomes \ep{Xwd}
1417
+ .sp
1418
+ Negated versions, such as [:^alpha:] use \eP instead of \ep. Three other POSIX
1419
+ classes are handled specially in UCP mode:
1420
+ .TP 10
1421
+ [:graph:]
1422
+ This matches characters that have glyphs that mark the page when printed. In
1423
+ Unicode property terms, it matches all characters with the L, M, N, P, S, or Cf
1424
+ properties, except for:
1425
+ .sp
1426
+ U+061C Arabic Letter Mark
1427
+ U+180E Mongolian Vowel Separator
1428
+ U+2066 - U+2069 Various "isolate"s
1429
+ .sp
1430
+ .TP 10
1431
+ [:print:]
1432
+ This matches the same characters as [:graph:] plus space characters that are
1433
+ not controls, that is, characters with the Zs property.
1434
+ .TP 10
1435
+ [:punct:]
1436
+ This matches all characters that have the Unicode P (punctuation) property,
1437
+ plus those characters whose code points are less than 128 that have the S
1438
+ (Symbol) property.
1439
+ .P
1440
+ The other POSIX classes are unchanged, and match only characters with code
1441
+ points less than 128.
1442
+ .
1443
+ .
1444
+ .SH "COMPATIBILITY FEATURE FOR WORD BOUNDARIES"
1445
+ .rs
1446
+ .sp
1447
+ In the POSIX.2 compliant library that was included in 4.4BSD Unix, the ugly
1448
+ syntax [[:<:]] and [[:>:]] is used for matching "start of word" and "end of
1449
+ word". PCRE treats these items as follows:
1450
+ .sp
1451
+ [[:<:]] is converted to \eb(?=\ew)
1452
+ [[:>:]] is converted to \eb(?<=\ew)
1453
+ .sp
1454
+ Only these exact character sequences are recognized. A sequence such as
1455
+ [a[:<:]b] provokes error for an unrecognized POSIX class name. This support is
1456
+ not compatible with Perl. It is provided to help migrations from other
1457
+ environments, and is best not used in any new patterns. Note that \eb matches
1458
+ at the start and the end of a word (see
1459
+ .\" HTML <a href="#smallassertions">
1460
+ .\" </a>
1461
+ "Simple assertions"
1462
+ .\"
1463
+ above), and in a Perl-style pattern the preceding or following character
1464
+ normally shows which is wanted, without the need for the assertions that are
1465
+ used above in order to give exactly the POSIX behaviour.
1466
+ .
1467
+ .
1468
+ .SH "VERTICAL BAR"
1469
+ .rs
1470
+ .sp
1471
+ Vertical bar characters are used to separate alternative patterns. For example,
1472
+ the pattern
1473
+ .sp
1474
+ gilbert|sullivan
1475
+ .sp
1476
+ matches either "gilbert" or "sullivan". Any number of alternatives may appear,
1477
+ and an empty alternative is permitted (matching the empty string). The matching
1478
+ process tries each alternative in turn, from left to right, and the first one
1479
+ that succeeds is used. If the alternatives are within a subpattern
1480
+ .\" HTML <a href="#subpattern">
1481
+ .\" </a>
1482
+ (defined below),
1483
+ .\"
1484
+ "succeeds" means matching the rest of the main pattern as well as the
1485
+ alternative in the subpattern.
1486
+ .
1487
+ .
1488
+ .SH "INTERNAL OPTION SETTING"
1489
+ .rs
1490
+ .sp
1491
+ The settings of the PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and
1492
+ PCRE_EXTENDED options (which are Perl-compatible) can be changed from within
1493
+ the pattern by a sequence of Perl option letters enclosed between "(?" and ")".
1494
+ The option letters are
1495
+ .sp
1496
+ i for PCRE_CASELESS
1497
+ m for PCRE_MULTILINE
1498
+ s for PCRE_DOTALL
1499
+ x for PCRE_EXTENDED
1500
+ .sp
1501
+ For example, (?im) sets caseless, multiline matching. It is also possible to
1502
+ unset these options by preceding the letter with a hyphen, and a combined
1503
+ setting and unsetting such as (?im-sx), which sets PCRE_CASELESS and
1504
+ PCRE_MULTILINE while unsetting PCRE_DOTALL and PCRE_EXTENDED, is also
1505
+ permitted. If a letter appears both before and after the hyphen, the option is
1506
+ unset.
1507
+ .P
1508
+ The PCRE-specific options PCRE_DUPNAMES, PCRE_UNGREEDY, and PCRE_EXTRA can be
1509
+ changed in the same way as the Perl-compatible options by using the characters
1510
+ J, U and X respectively.
1511
+ .P
1512
+ When one of these option changes occurs at top level (that is, not inside
1513
+ subpattern parentheses), the change applies to the remainder of the pattern
1514
+ that follows. If the change is placed right at the start of a pattern, PCRE
1515
+ extracts it into the global options (and it will therefore show up in data
1516
+ extracted by the \fBpcre_fullinfo()\fP function).
1517
+ .P
1518
+ An option change within a subpattern (see below for a description of
1519
+ subpatterns) affects only that part of the subpattern that follows it, so
1520
+ .sp
1521
+ (a(?i)b)c
1522
+ .sp
1523
+ matches abc and aBc and no other strings (assuming PCRE_CASELESS is not used).
1524
+ By this means, options can be made to have different settings in different
1525
+ parts of the pattern. Any changes made in one alternative do carry on
1526
+ into subsequent branches within the same subpattern. For example,
1527
+ .sp
1528
+ (a(?i)b|c)
1529
+ .sp
1530
+ matches "ab", "aB", "c", and "C", even though when matching "C" the first
1531
+ branch is abandoned before the option setting. This is because the effects of
1532
+ option settings happen at compile time. There would be some very weird
1533
+ behaviour otherwise.
1534
+ .P
1535
+ \fBNote:\fP There are other PCRE-specific options that can be set by the
1536
+ application when the compiling or matching functions are called. In some cases
1537
+ the pattern can contain special leading sequences such as (*CRLF) to override
1538
+ what the application has set or what has been defaulted. Details are given in
1539
+ the section entitled
1540
+ .\" HTML <a href="#newlineseq">
1541
+ .\" </a>
1542
+ "Newline sequences"
1543
+ .\"
1544
+ above. There are also the (*UTF8), (*UTF16),(*UTF32), and (*UCP) leading
1545
+ sequences that can be used to set UTF and Unicode property modes; they are
1546
+ equivalent to setting the PCRE_UTF8, PCRE_UTF16, PCRE_UTF32 and the PCRE_UCP
1547
+ options, respectively. The (*UTF) sequence is a generic version that can be
1548
+ used with any of the libraries. However, the application can set the
1549
+ PCRE_NEVER_UTF option, which locks out the use of the (*UTF) sequences.
1550
+ .
1551
+ .
1552
+ .\" HTML <a name="subpattern"></a>
1553
+ .SH SUBPATTERNS
1554
+ .rs
1555
+ .sp
1556
+ Subpatterns are delimited by parentheses (round brackets), which can be nested.
1557
+ Turning part of a pattern into a subpattern does two things:
1558
+ .sp
1559
+ 1. It localizes a set of alternatives. For example, the pattern
1560
+ .sp
1561
+ cat(aract|erpillar|)
1562
+ .sp
1563
+ matches "cataract", "caterpillar", or "cat". Without the parentheses, it would
1564
+ match "cataract", "erpillar" or an empty string.
1565
+ .sp
1566
+ 2. It sets up the subpattern as a capturing subpattern. This means that, when
1567
+ the whole pattern matches, that portion of the subject string that matched the
1568
+ subpattern is passed back to the caller via the \fIovector\fP argument of the
1569
+ matching function. (This applies only to the traditional matching functions;
1570
+ the DFA matching functions do not support capturing.)
1571
+ .P
1572
+ Opening parentheses are counted from left to right (starting from 1) to obtain
1573
+ numbers for the capturing subpatterns. For example, if the string "the red
1574
+ king" is matched against the pattern
1575
+ .sp
1576
+ the ((red|white) (king|queen))
1577
+ .sp
1578
+ the captured substrings are "red king", "red", and "king", and are numbered 1,
1579
+ 2, and 3, respectively.
1580
+ .P
1581
+ The fact that plain parentheses fulfil two functions is not always helpful.
1582
+ There are often times when a grouping subpattern is required without a
1583
+ capturing requirement. If an opening parenthesis is followed by a question mark
1584
+ and a colon, the subpattern does not do any capturing, and is not counted when
1585
+ computing the number of any subsequent capturing subpatterns. For example, if
1586
+ the string "the white queen" is matched against the pattern
1587
+ .sp
1588
+ the ((?:red|white) (king|queen))
1589
+ .sp
1590
+ the captured substrings are "white queen" and "queen", and are numbered 1 and
1591
+ 2. The maximum number of capturing subpatterns is 65535.
1592
+ .P
1593
+ As a convenient shorthand, if any option settings are required at the start of
1594
+ a non-capturing subpattern, the option letters may appear between the "?" and
1595
+ the ":". Thus the two patterns
1596
+ .sp
1597
+ (?i:saturday|sunday)
1598
+ (?:(?i)saturday|sunday)
1599
+ .sp
1600
+ match exactly the same set of strings. Because alternative branches are tried
1601
+ from left to right, and options are not reset until the end of the subpattern
1602
+ is reached, an option setting in one branch does affect subsequent branches, so
1603
+ the above patterns match "SUNDAY" as well as "Saturday".
1604
+ .
1605
+ .
1606
+ .\" HTML <a name="dupsubpatternnumber"></a>
1607
+ .SH "DUPLICATE SUBPATTERN NUMBERS"
1608
+ .rs
1609
+ .sp
1610
+ Perl 5.10 introduced a feature whereby each alternative in a subpattern uses
1611
+ the same numbers for its capturing parentheses. Such a subpattern starts with
1612
+ (?| and is itself a non-capturing subpattern. For example, consider this
1613
+ pattern:
1614
+ .sp
1615
+ (?|(Sat)ur|(Sun))day
1616
+ .sp
1617
+ Because the two alternatives are inside a (?| group, both sets of capturing
1618
+ parentheses are numbered one. Thus, when the pattern matches, you can look
1619
+ at captured substring number one, whichever alternative matched. This construct
1620
+ is useful when you want to capture part, but not all, of one of a number of
1621
+ alternatives. Inside a (?| group, parentheses are numbered as usual, but the
1622
+ number is reset at the start of each branch. The numbers of any capturing
1623
+ parentheses that follow the subpattern start after the highest number used in
1624
+ any branch. The following example is taken from the Perl documentation. The
1625
+ numbers underneath show in which buffer the captured content will be stored.
1626
+ .sp
1627
+ # before ---------------branch-reset----------- after
1628
+ / ( a ) (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
1629
+ # 1 2 2 3 2 3 4
1630
+ .sp
1631
+ A back reference to a numbered subpattern uses the most recent value that is
1632
+ set for that number by any subpattern. The following pattern matches "abcabc"
1633
+ or "defdef":
1634
+ .sp
1635
+ /(?|(abc)|(def))\e1/
1636
+ .sp
1637
+ In contrast, a subroutine call to a numbered subpattern always refers to the
1638
+ first one in the pattern with the given number. The following pattern matches
1639
+ "abcabc" or "defabc":
1640
+ .sp
1641
+ /(?|(abc)|(def))(?1)/
1642
+ .sp
1643
+ If a
1644
+ .\" HTML <a href="#conditions">
1645
+ .\" </a>
1646
+ condition test
1647
+ .\"
1648
+ for a subpattern's having matched refers to a non-unique number, the test is
1649
+ true if any of the subpatterns of that number have matched.
1650
+ .P
1651
+ An alternative approach to using this "branch reset" feature is to use
1652
+ duplicate named subpatterns, as described in the next section.
1653
+ .
1654
+ .
1655
+ .SH "NAMED SUBPATTERNS"
1656
+ .rs
1657
+ .sp
1658
+ Identifying capturing parentheses by number is simple, but it can be very hard
1659
+ to keep track of the numbers in complicated regular expressions. Furthermore,
1660
+ if an expression is modified, the numbers may change. To help with this
1661
+ difficulty, PCRE supports the naming of subpatterns. This feature was not
1662
+ added to Perl until release 5.10. Python had the feature earlier, and PCRE
1663
+ introduced it at release 4.0, using the Python syntax. PCRE now supports both
1664
+ the Perl and the Python syntax. Perl allows identically numbered subpatterns to
1665
+ have different names, but PCRE does not.
1666
+ .P
1667
+ In PCRE, a subpattern can be named in one of three ways: (?<name>...) or
1668
+ (?'name'...) as in Perl, or (?P<name>...) as in Python. References to capturing
1669
+ parentheses from other parts of the pattern, such as
1670
+ .\" HTML <a href="#backreferences">
1671
+ .\" </a>
1672
+ back references,
1673
+ .\"
1674
+ .\" HTML <a href="#recursion">
1675
+ .\" </a>
1676
+ recursion,
1677
+ .\"
1678
+ and
1679
+ .\" HTML <a href="#conditions">
1680
+ .\" </a>
1681
+ conditions,
1682
+ .\"
1683
+ can be made by name as well as by number.
1684
+ .P
1685
+ Names consist of up to 32 alphanumeric characters and underscores, but must
1686
+ start with a non-digit. Named capturing parentheses are still allocated numbers
1687
+ as well as names, exactly as if the names were not present. The PCRE API
1688
+ provides function calls for extracting the name-to-number translation table
1689
+ from a compiled pattern. There is also a convenience function for extracting a
1690
+ captured substring by name.
1691
+ .P
1692
+ By default, a name must be unique within a pattern, but it is possible to relax
1693
+ this constraint by setting the PCRE_DUPNAMES option at compile time. (Duplicate
1694
+ names are also always permitted for subpatterns with the same number, set up as
1695
+ described in the previous section.) Duplicate names can be useful for patterns
1696
+ where only one instance of the named parentheses can match. Suppose you want to
1697
+ match the name of a weekday, either as a 3-letter abbreviation or as the full
1698
+ name, and in both cases you want to extract the abbreviation. This pattern
1699
+ (ignoring the line breaks) does the job:
1700
+ .sp
1701
+ (?<DN>Mon|Fri|Sun)(?:day)?|
1702
+ (?<DN>Tue)(?:sday)?|
1703
+ (?<DN>Wed)(?:nesday)?|
1704
+ (?<DN>Thu)(?:rsday)?|
1705
+ (?<DN>Sat)(?:urday)?
1706
+ .sp
1707
+ There are five capturing substrings, but only one is ever set after a match.
1708
+ (An alternative way of solving this problem is to use a "branch reset"
1709
+ subpattern, as described in the previous section.)
1710
+ .P
1711
+ The convenience function for extracting the data by name returns the substring
1712
+ for the first (and in this example, the only) subpattern of that name that
1713
+ matched. This saves searching to find which numbered subpattern it was.
1714
+ .P
1715
+ If you make a back reference to a non-unique named subpattern from elsewhere in
1716
+ the pattern, the subpatterns to which the name refers are checked in the order
1717
+ in which they appear in the overall pattern. The first one that is set is used
1718
+ for the reference. For example, this pattern matches both "foofoo" and
1719
+ "barbar" but not "foobar" or "barfoo":
1720
+ .sp
1721
+ (?:(?<n>foo)|(?<n>bar))\ek<n>
1722
+ .sp
1723
+ .P
1724
+ If you make a subroutine call to a non-unique named subpattern, the one that
1725
+ corresponds to the first occurrence of the name is used. In the absence of
1726
+ duplicate numbers (see the previous section) this is the one with the lowest
1727
+ number.
1728
+ .P
1729
+ If you use a named reference in a condition
1730
+ test (see the
1731
+ .\"
1732
+ .\" HTML <a href="#conditions">
1733
+ .\" </a>
1734
+ section about conditions
1735
+ .\"
1736
+ below), either to check whether a subpattern has matched, or to check for
1737
+ recursion, all subpatterns with the same name are tested. If the condition is
1738
+ true for any one of them, the overall condition is true. This is the same
1739
+ behaviour as testing by number. For further details of the interfaces for
1740
+ handling named subpatterns, see the
1741
+ .\" HREF
1742
+ \fBpcreapi\fP
1743
+ .\"
1744
+ documentation.
1745
+ .P
1746
+ \fBWarning:\fP You cannot use different names to distinguish between two
1747
+ subpatterns with the same number because PCRE uses only the numbers when
1748
+ matching. For this reason, an error is given at compile time if different names
1749
+ are given to subpatterns with the same number. However, you can always give the
1750
+ same name to subpatterns with the same number, even when PCRE_DUPNAMES is not
1751
+ set.
1752
+ .
1753
+ .
1754
+ .SH REPETITION
1755
+ .rs
1756
+ .sp
1757
+ Repetition is specified by quantifiers, which can follow any of the following
1758
+ items:
1759
+ .sp
1760
+ a literal data character
1761
+ the dot metacharacter
1762
+ the \eC escape sequence
1763
+ the \eX escape sequence
1764
+ the \eR escape sequence
1765
+ an escape such as \ed or \epL that matches a single character
1766
+ a character class
1767
+ a back reference (see next section)
1768
+ a parenthesized subpattern (including assertions)
1769
+ a subroutine call to a subpattern (recursive or otherwise)
1770
+ .sp
1771
+ The general repetition quantifier specifies a minimum and maximum number of
1772
+ permitted matches, by giving the two numbers in curly brackets (braces),
1773
+ separated by a comma. The numbers must be less than 65536, and the first must
1774
+ be less than or equal to the second. For example:
1775
+ .sp
1776
+ z{2,4}
1777
+ .sp
1778
+ matches "zz", "zzz", or "zzzz". A closing brace on its own is not a special
1779
+ character. If the second number is omitted, but the comma is present, there is
1780
+ no upper limit; if the second number and the comma are both omitted, the
1781
+ quantifier specifies an exact number of required matches. Thus
1782
+ .sp
1783
+ [aeiou]{3,}
1784
+ .sp
1785
+ matches at least 3 successive vowels, but may match many more, while
1786
+ .sp
1787
+ \ed{8}
1788
+ .sp
1789
+ matches exactly 8 digits. An opening curly bracket that appears in a position
1790
+ where a quantifier is not allowed, or one that does not match the syntax of a
1791
+ quantifier, is taken as a literal character. For example, {,6} is not a
1792
+ quantifier, but a literal string of four characters.
1793
+ .P
1794
+ In UTF modes, quantifiers apply to characters rather than to individual data
1795
+ units. Thus, for example, \ex{100}{2} matches two characters, each of
1796
+ which is represented by a two-byte sequence in a UTF-8 string. Similarly,
1797
+ \eX{3} matches three Unicode extended grapheme clusters, each of which may be
1798
+ several data units long (and they may be of different lengths).
1799
+ .P
1800
+ The quantifier {0} is permitted, causing the expression to behave as if the
1801
+ previous item and the quantifier were not present. This may be useful for
1802
+ subpatterns that are referenced as
1803
+ .\" HTML <a href="#subpatternsassubroutines">
1804
+ .\" </a>
1805
+ subroutines
1806
+ .\"
1807
+ from elsewhere in the pattern (but see also the section entitled
1808
+ .\" HTML <a href="#subdefine">
1809
+ .\" </a>
1810
+ "Defining subpatterns for use by reference only"
1811
+ .\"
1812
+ below). Items other than subpatterns that have a {0} quantifier are omitted
1813
+ from the compiled pattern.
1814
+ .P
1815
+ For convenience, the three most common quantifiers have single-character
1816
+ abbreviations:
1817
+ .sp
1818
+ * is equivalent to {0,}
1819
+ + is equivalent to {1,}
1820
+ ? is equivalent to {0,1}
1821
+ .sp
1822
+ It is possible to construct infinite loops by following a subpattern that can
1823
+ match no characters with a quantifier that has no upper limit, for example:
1824
+ .sp
1825
+ (a?)*
1826
+ .sp
1827
+ Earlier versions of Perl and PCRE used to give an error at compile time for
1828
+ such patterns. However, because there are cases where this can be useful, such
1829
+ patterns are now accepted, but if any repetition of the subpattern does in fact
1830
+ match no characters, the loop is forcibly broken.
1831
+ .P
1832
+ By default, the quantifiers are "greedy", that is, they match as much as
1833
+ possible (up to the maximum number of permitted times), without causing the
1834
+ rest of the pattern to fail. The classic example of where this gives problems
1835
+ is in trying to match comments in C programs. These appear between /* and */
1836
+ and within the comment, individual * and / characters may appear. An attempt to
1837
+ match C comments by applying the pattern
1838
+ .sp
1839
+ /\e*.*\e*/
1840
+ .sp
1841
+ to the string
1842
+ .sp
1843
+ /* first comment */ not comment /* second comment */
1844
+ .sp
1845
+ fails, because it matches the entire string owing to the greediness of the .*
1846
+ item.
1847
+ .P
1848
+ However, if a quantifier is followed by a question mark, it ceases to be
1849
+ greedy, and instead matches the minimum number of times possible, so the
1850
+ pattern
1851
+ .sp
1852
+ /\e*.*?\e*/
1853
+ .sp
1854
+ does the right thing with the C comments. The meaning of the various
1855
+ quantifiers is not otherwise changed, just the preferred number of matches.
1856
+ Do not confuse this use of question mark with its use as a quantifier in its
1857
+ own right. Because it has two uses, it can sometimes appear doubled, as in
1858
+ .sp
1859
+ \ed??\ed
1860
+ .sp
1861
+ which matches one digit by preference, but can match two if that is the only
1862
+ way the rest of the pattern matches.
1863
+ .P
1864
+ If the PCRE_UNGREEDY option is set (an option that is not available in Perl),
1865
+ the quantifiers are not greedy by default, but individual ones can be made
1866
+ greedy by following them with a question mark. In other words, it inverts the
1867
+ default behaviour.
1868
+ .P
1869
+ When a parenthesized subpattern is quantified with a minimum repeat count that
1870
+ is greater than 1 or with a limited maximum, more memory is required for the
1871
+ compiled pattern, in proportion to the size of the minimum or maximum.
1872
+ .P
1873
+ If a pattern starts with .* or .{0,} and the PCRE_DOTALL option (equivalent
1874
+ to Perl's /s) is set, thus allowing the dot to match newlines, the pattern is
1875
+ implicitly anchored, because whatever follows will be tried against every
1876
+ character position in the subject string, so there is no point in retrying the
1877
+ overall match at any position after the first. PCRE normally treats such a
1878
+ pattern as though it were preceded by \eA.
1879
+ .P
1880
+ In cases where it is known that the subject string contains no newlines, it is
1881
+ worth setting PCRE_DOTALL in order to obtain this optimization, or
1882
+ alternatively using ^ to indicate anchoring explicitly.
1883
+ .P
1884
+ However, there are some cases where the optimization cannot be used. When .*
1885
+ is inside capturing parentheses that are the subject of a back reference
1886
+ elsewhere in the pattern, a match at the start may fail where a later one
1887
+ succeeds. Consider, for example:
1888
+ .sp
1889
+ (.*)abc\e1
1890
+ .sp
1891
+ If the subject is "xyz123abc123" the match point is the fourth character. For
1892
+ this reason, such a pattern is not implicitly anchored.
1893
+ .P
1894
+ Another case where implicit anchoring is not applied is when the leading .* is
1895
+ inside an atomic group. Once again, a match at the start may fail where a later
1896
+ one succeeds. Consider this pattern:
1897
+ .sp
1898
+ (?>.*?a)b
1899
+ .sp
1900
+ It matches "ab" in the subject "aab". The use of the backtracking control verbs
1901
+ (*PRUNE) and (*SKIP) also disable this optimization.
1902
+ .P
1903
+ When a capturing subpattern is repeated, the value captured is the substring
1904
+ that matched the final iteration. For example, after
1905
+ .sp
1906
+ (tweedle[dume]{3}\es*)+
1907
+ .sp
1908
+ has matched "tweedledum tweedledee" the value of the captured substring is
1909
+ "tweedledee". However, if there are nested capturing subpatterns, the
1910
+ corresponding captured values may have been set in previous iterations. For
1911
+ example, after
1912
+ .sp
1913
+ /(a|(b))+/
1914
+ .sp
1915
+ matches "aba" the value of the second captured substring is "b".
1916
+ .
1917
+ .
1918
+ .\" HTML <a name="atomicgroup"></a>
1919
+ .SH "ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS"
1920
+ .rs
1921
+ .sp
1922
+ With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy")
1923
+ repetition, failure of what follows normally causes the repeated item to be
1924
+ re-evaluated to see if a different number of repeats allows the rest of the
1925
+ pattern to match. Sometimes it is useful to prevent this, either to change the
1926
+ nature of the match, or to cause it fail earlier than it otherwise might, when
1927
+ the author of the pattern knows there is no point in carrying on.
1928
+ .P
1929
+ Consider, for example, the pattern \ed+foo when applied to the subject line
1930
+ .sp
1931
+ 123456bar
1932
+ .sp
1933
+ After matching all 6 digits and then failing to match "foo", the normal
1934
+ action of the matcher is to try again with only 5 digits matching the \ed+
1935
+ item, and then with 4, and so on, before ultimately failing. "Atomic grouping"
1936
+ (a term taken from Jeffrey Friedl's book) provides the means for specifying
1937
+ that once a subpattern has matched, it is not to be re-evaluated in this way.
1938
+ .P
1939
+ If we use atomic grouping for the previous example, the matcher gives up
1940
+ immediately on failing to match "foo" the first time. The notation is a kind of
1941
+ special parenthesis, starting with (?> as in this example:
1942
+ .sp
1943
+ (?>\ed+)foo
1944
+ .sp
1945
+ This kind of parenthesis "locks up" the part of the pattern it contains once
1946
+ it has matched, and a failure further into the pattern is prevented from
1947
+ backtracking into it. Backtracking past it to previous items, however, works as
1948
+ normal.
1949
+ .P
1950
+ An alternative description is that a subpattern of this type matches the string
1951
+ of characters that an identical standalone pattern would match, if anchored at
1952
+ the current point in the subject string.
1953
+ .P
1954
+ Atomic grouping subpatterns are not capturing subpatterns. Simple cases such as
1955
+ the above example can be thought of as a maximizing repeat that must swallow
1956
+ everything it can. So, while both \ed+ and \ed+? are prepared to adjust the
1957
+ number of digits they match in order to make the rest of the pattern match,
1958
+ (?>\ed+) can only match an entire sequence of digits.
1959
+ .P
1960
+ Atomic groups in general can of course contain arbitrarily complicated
1961
+ subpatterns, and can be nested. However, when the subpattern for an atomic
1962
+ group is just a single repeated item, as in the example above, a simpler
1963
+ notation, called a "possessive quantifier" can be used. This consists of an
1964
+ additional + character following a quantifier. Using this notation, the
1965
+ previous example can be rewritten as
1966
+ .sp
1967
+ \ed++foo
1968
+ .sp
1969
+ Note that a possessive quantifier can be used with an entire group, for
1970
+ example:
1971
+ .sp
1972
+ (abc|xyz){2,3}+
1973
+ .sp
1974
+ Possessive quantifiers are always greedy; the setting of the PCRE_UNGREEDY
1975
+ option is ignored. They are a convenient notation for the simpler forms of
1976
+ atomic group. However, there is no difference in the meaning of a possessive
1977
+ quantifier and the equivalent atomic group, though there may be a performance
1978
+ difference; possessive quantifiers should be slightly faster.
1979
+ .P
1980
+ The possessive quantifier syntax is an extension to the Perl 5.8 syntax.
1981
+ Jeffrey Friedl originated the idea (and the name) in the first edition of his
1982
+ book. Mike McCloskey liked it, so implemented it when he built Sun's Java
1983
+ package, and PCRE copied it from there. It ultimately found its way into Perl
1984
+ at release 5.10.
1985
+ .P
1986
+ PCRE has an optimization that automatically "possessifies" certain simple
1987
+ pattern constructs. For example, the sequence A+B is treated as A++B because
1988
+ there is no point in backtracking into a sequence of A's when B must follow.
1989
+ .P
1990
+ When a pattern contains an unlimited repeat inside a subpattern that can itself
1991
+ be repeated an unlimited number of times, the use of an atomic group is the
1992
+ only way to avoid some failing matches taking a very long time indeed. The
1993
+ pattern
1994
+ .sp
1995
+ (\eD+|<\ed+>)*[!?]
1996
+ .sp
1997
+ matches an unlimited number of substrings that either consist of non-digits, or
1998
+ digits enclosed in <>, followed by either ! or ?. When it matches, it runs
1999
+ quickly. However, if it is applied to
2000
+ .sp
2001
+ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
2002
+ .sp
2003
+ it takes a long time before reporting failure. This is because the string can
2004
+ be divided between the internal \eD+ repeat and the external * repeat in a
2005
+ large number of ways, and all have to be tried. (The example uses [!?] rather
2006
+ than a single character at the end, because both PCRE and Perl have an
2007
+ optimization that allows for fast failure when a single character is used. They
2008
+ remember the last single character that is required for a match, and fail early
2009
+ if it is not present in the string.) If the pattern is changed so that it uses
2010
+ an atomic group, like this:
2011
+ .sp
2012
+ ((?>\eD+)|<\ed+>)*[!?]
2013
+ .sp
2014
+ sequences of non-digits cannot be broken, and failure happens quickly.
2015
+ .
2016
+ .
2017
+ .\" HTML <a name="backreferences"></a>
2018
+ .SH "BACK REFERENCES"
2019
+ .rs
2020
+ .sp
2021
+ Outside a character class, a backslash followed by a digit greater than 0 (and
2022
+ possibly further digits) is a back reference to a capturing subpattern earlier
2023
+ (that is, to its left) in the pattern, provided there have been that many
2024
+ previous capturing left parentheses.
2025
+ .P
2026
+ However, if the decimal number following the backslash is less than 10, it is
2027
+ always taken as a back reference, and causes an error only if there are not
2028
+ that many capturing left parentheses in the entire pattern. In other words, the
2029
+ parentheses that are referenced need not be to the left of the reference for
2030
+ numbers less than 10. A "forward back reference" of this type can make sense
2031
+ when a repetition is involved and the subpattern to the right has participated
2032
+ in an earlier iteration.
2033
+ .P
2034
+ It is not possible to have a numerical "forward back reference" to a subpattern
2035
+ whose number is 10 or more using this syntax because a sequence such as \e50 is
2036
+ interpreted as a character defined in octal. See the subsection entitled
2037
+ "Non-printing characters"
2038
+ .\" HTML <a href="#digitsafterbackslash">
2039
+ .\" </a>
2040
+ above
2041
+ .\"
2042
+ for further details of the handling of digits following a backslash. There is
2043
+ no such problem when named parentheses are used. A back reference to any
2044
+ subpattern is possible using named parentheses (see below).
2045
+ .P
2046
+ Another way of avoiding the ambiguity inherent in the use of digits following a
2047
+ backslash is to use the \eg escape sequence. This escape must be followed by an
2048
+ unsigned number or a negative number, optionally enclosed in braces. These
2049
+ examples are all identical:
2050
+ .sp
2051
+ (ring), \e1
2052
+ (ring), \eg1
2053
+ (ring), \eg{1}
2054
+ .sp
2055
+ An unsigned number specifies an absolute reference without the ambiguity that
2056
+ is present in the older syntax. It is also useful when literal digits follow
2057
+ the reference. A negative number is a relative reference. Consider this
2058
+ example:
2059
+ .sp
2060
+ (abc(def)ghi)\eg{-1}
2061
+ .sp
2062
+ The sequence \eg{-1} is a reference to the most recently started capturing
2063
+ subpattern before \eg, that is, is it equivalent to \e2 in this example.
2064
+ Similarly, \eg{-2} would be equivalent to \e1. The use of relative references
2065
+ can be helpful in long patterns, and also in patterns that are created by
2066
+ joining together fragments that contain references within themselves.
2067
+ .P
2068
+ A back reference matches whatever actually matched the capturing subpattern in
2069
+ the current subject string, rather than anything matching the subpattern
2070
+ itself (see
2071
+ .\" HTML <a href="#subpatternsassubroutines">
2072
+ .\" </a>
2073
+ "Subpatterns as subroutines"
2074
+ .\"
2075
+ below for a way of doing that). So the pattern
2076
+ .sp
2077
+ (sens|respons)e and \e1ibility
2078
+ .sp
2079
+ matches "sense and sensibility" and "response and responsibility", but not
2080
+ "sense and responsibility". If caseful matching is in force at the time of the
2081
+ back reference, the case of letters is relevant. For example,
2082
+ .sp
2083
+ ((?i)rah)\es+\e1
2084
+ .sp
2085
+ matches "rah rah" and "RAH RAH", but not "RAH rah", even though the original
2086
+ capturing subpattern is matched caselessly.
2087
+ .P
2088
+ There are several different ways of writing back references to named
2089
+ subpatterns. The .NET syntax \ek{name} and the Perl syntax \ek<name> or
2090
+ \ek'name' are supported, as is the Python syntax (?P=name). Perl 5.10's unified
2091
+ back reference syntax, in which \eg can be used for both numeric and named
2092
+ references, is also supported. We could rewrite the above example in any of
2093
+ the following ways:
2094
+ .sp
2095
+ (?<p1>(?i)rah)\es+\ek<p1>
2096
+ (?'p1'(?i)rah)\es+\ek{p1}
2097
+ (?P<p1>(?i)rah)\es+(?P=p1)
2098
+ (?<p1>(?i)rah)\es+\eg{p1}
2099
+ .sp
2100
+ A subpattern that is referenced by name may appear in the pattern before or
2101
+ after the reference.
2102
+ .P
2103
+ There may be more than one back reference to the same subpattern. If a
2104
+ subpattern has not actually been used in a particular match, any back
2105
+ references to it always fail by default. For example, the pattern
2106
+ .sp
2107
+ (a|(bc))\e2
2108
+ .sp
2109
+ always fails if it starts to match "a" rather than "bc". However, if the
2110
+ PCRE_JAVASCRIPT_COMPAT option is set at compile time, a back reference to an
2111
+ unset value matches an empty string.
2112
+ .P
2113
+ Because there may be many capturing parentheses in a pattern, all digits
2114
+ following a backslash are taken as part of a potential back reference number.
2115
+ If the pattern continues with a digit character, some delimiter must be used to
2116
+ terminate the back reference. If the PCRE_EXTENDED option is set, this can be
2117
+ white space. Otherwise, the \eg{ syntax or an empty comment (see
2118
+ .\" HTML <a href="#comments">
2119
+ .\" </a>
2120
+ "Comments"
2121
+ .\"
2122
+ below) can be used.
2123
+ .
2124
+ .SS "Recursive back references"
2125
+ .rs
2126
+ .sp
2127
+ A back reference that occurs inside the parentheses to which it refers fails
2128
+ when the subpattern is first used, so, for example, (a\e1) never matches.
2129
+ However, such references can be useful inside repeated subpatterns. For
2130
+ example, the pattern
2131
+ .sp
2132
+ (a|b\e1)+
2133
+ .sp
2134
+ matches any number of "a"s and also "aba", "ababbaa" etc. At each iteration of
2135
+ the subpattern, the back reference matches the character string corresponding
2136
+ to the previous iteration. In order for this to work, the pattern must be such
2137
+ that the first iteration does not need to match the back reference. This can be
2138
+ done using alternation, as in the example above, or by a quantifier with a
2139
+ minimum of zero.
2140
+ .P
2141
+ Back references of this type cause the group that they reference to be treated
2142
+ as an
2143
+ .\" HTML <a href="#atomicgroup">
2144
+ .\" </a>
2145
+ atomic group.
2146
+ .\"
2147
+ Once the whole group has been matched, a subsequent matching failure cannot
2148
+ cause backtracking into the middle of the group.
2149
+ .
2150
+ .
2151
+ .\" HTML <a name="bigassertions"></a>
2152
+ .SH ASSERTIONS
2153
+ .rs
2154
+ .sp
2155
+ An assertion is a test on the characters following or preceding the current
2156
+ matching point that does not actually consume any characters. The simple
2157
+ assertions coded as \eb, \eB, \eA, \eG, \eZ, \ez, ^ and $ are described
2158
+ .\" HTML <a href="#smallassertions">
2159
+ .\" </a>
2160
+ above.
2161
+ .\"
2162
+ .P
2163
+ More complicated assertions are coded as subpatterns. There are two kinds:
2164
+ those that look ahead of the current position in the subject string, and those
2165
+ that look behind it. An assertion subpattern is matched in the normal way,
2166
+ except that it does not cause the current matching position to be changed.
2167
+ .P
2168
+ Assertion subpatterns are not capturing subpatterns. If such an assertion
2169
+ contains capturing subpatterns within it, these are counted for the purposes of
2170
+ numbering the capturing subpatterns in the whole pattern. However, substring
2171
+ capturing is carried out only for positive assertions. (Perl sometimes, but not
2172
+ always, does do capturing in negative assertions.)
2173
+ .P
2174
+ For compatibility with Perl, assertion subpatterns may be repeated; though
2175
+ it makes no sense to assert the same thing several times, the side effect of
2176
+ capturing parentheses may occasionally be useful. In practice, there only three
2177
+ cases:
2178
+ .sp
2179
+ (1) If the quantifier is {0}, the assertion is never obeyed during matching.
2180
+ However, it may contain internal capturing parenthesized groups that are called
2181
+ from elsewhere via the
2182
+ .\" HTML <a href="#subpatternsassubroutines">
2183
+ .\" </a>
2184
+ subroutine mechanism.
2185
+ .\"
2186
+ .sp
2187
+ (2) If quantifier is {0,n} where n is greater than zero, it is treated as if it
2188
+ were {0,1}. At run time, the rest of the pattern match is tried with and
2189
+ without the assertion, the order depending on the greediness of the quantifier.
2190
+ .sp
2191
+ (3) If the minimum repetition is greater than zero, the quantifier is ignored.
2192
+ The assertion is obeyed just once when encountered during matching.
2193
+ .
2194
+ .
2195
+ .SS "Lookahead assertions"
2196
+ .rs
2197
+ .sp
2198
+ Lookahead assertions start with (?= for positive assertions and (?! for
2199
+ negative assertions. For example,
2200
+ .sp
2201
+ \ew+(?=;)
2202
+ .sp
2203
+ matches a word followed by a semicolon, but does not include the semicolon in
2204
+ the match, and
2205
+ .sp
2206
+ foo(?!bar)
2207
+ .sp
2208
+ matches any occurrence of "foo" that is not followed by "bar". Note that the
2209
+ apparently similar pattern
2210
+ .sp
2211
+ (?!foo)bar
2212
+ .sp
2213
+ does not find an occurrence of "bar" that is preceded by something other than
2214
+ "foo"; it finds any occurrence of "bar" whatsoever, because the assertion
2215
+ (?!foo) is always true when the next three characters are "bar". A
2216
+ lookbehind assertion is needed to achieve the other effect.
2217
+ .P
2218
+ If you want to force a matching failure at some point in a pattern, the most
2219
+ convenient way to do it is with (?!) because an empty string always matches, so
2220
+ an assertion that requires there not to be an empty string must always fail.
2221
+ The backtracking control verb (*FAIL) or (*F) is a synonym for (?!).
2222
+ .
2223
+ .
2224
+ .\" HTML <a name="lookbehind"></a>
2225
+ .SS "Lookbehind assertions"
2226
+ .rs
2227
+ .sp
2228
+ Lookbehind assertions start with (?<= for positive assertions and (?<! for
2229
+ negative assertions. For example,
2230
+ .sp
2231
+ (?<!foo)bar
2232
+ .sp
2233
+ does find an occurrence of "bar" that is not preceded by "foo". The contents of
2234
+ a lookbehind assertion are restricted such that all the strings it matches must
2235
+ have a fixed length. However, if there are several top-level alternatives, they
2236
+ do not all have to have the same fixed length. Thus
2237
+ .sp
2238
+ (?<=bullock|donkey)
2239
+ .sp
2240
+ is permitted, but
2241
+ .sp
2242
+ (?<!dogs?|cats?)
2243
+ .sp
2244
+ causes an error at compile time. Branches that match different length strings
2245
+ are permitted only at the top level of a lookbehind assertion. This is an
2246
+ extension compared with Perl, which requires all branches to match the same
2247
+ length of string. An assertion such as
2248
+ .sp
2249
+ (?<=ab(c|de))
2250
+ .sp
2251
+ is not permitted, because its single top-level branch can match two different
2252
+ lengths, but it is acceptable to PCRE if rewritten to use two top-level
2253
+ branches:
2254
+ .sp
2255
+ (?<=abc|abde)
2256
+ .sp
2257
+ In some cases, the escape sequence \eK
2258
+ .\" HTML <a href="#resetmatchstart">
2259
+ .\" </a>
2260
+ (see above)
2261
+ .\"
2262
+ can be used instead of a lookbehind assertion to get round the fixed-length
2263
+ restriction.
2264
+ .P
2265
+ The implementation of lookbehind assertions is, for each alternative, to
2266
+ temporarily move the current position back by the fixed length and then try to
2267
+ match. If there are insufficient characters before the current position, the
2268
+ assertion fails.
2269
+ .P
2270
+ In a UTF mode, PCRE does not allow the \eC escape (which matches a single data
2271
+ unit even in a UTF mode) to appear in lookbehind assertions, because it makes
2272
+ it impossible to calculate the length of the lookbehind. The \eX and \eR
2273
+ escapes, which can match different numbers of data units, are also not
2274
+ permitted.
2275
+ .P
2276
+ .\" HTML <a href="#subpatternsassubroutines">
2277
+ .\" </a>
2278
+ "Subroutine"
2279
+ .\"
2280
+ calls (see below) such as (?2) or (?&X) are permitted in lookbehinds, as long
2281
+ as the subpattern matches a fixed-length string.
2282
+ .\" HTML <a href="#recursion">
2283
+ .\" </a>
2284
+ Recursion,
2285
+ .\"
2286
+ however, is not supported.
2287
+ .P
2288
+ Possessive quantifiers can be used in conjunction with lookbehind assertions to
2289
+ specify efficient matching of fixed-length strings at the end of subject
2290
+ strings. Consider a simple pattern such as
2291
+ .sp
2292
+ abcd$
2293
+ .sp
2294
+ when applied to a long string that does not match. Because matching proceeds
2295
+ from left to right, PCRE will look for each "a" in the subject and then see if
2296
+ what follows matches the rest of the pattern. If the pattern is specified as
2297
+ .sp
2298
+ ^.*abcd$
2299
+ .sp
2300
+ the initial .* matches the entire string at first, but when this fails (because
2301
+ there is no following "a"), it backtracks to match all but the last character,
2302
+ then all but the last two characters, and so on. Once again the search for "a"
2303
+ covers the entire string, from right to left, so we are no better off. However,
2304
+ if the pattern is written as
2305
+ .sp
2306
+ ^.*+(?<=abcd)
2307
+ .sp
2308
+ there can be no backtracking for the .*+ item; it can match only the entire
2309
+ string. The subsequent lookbehind assertion does a single test on the last four
2310
+ characters. If it fails, the match fails immediately. For long strings, this
2311
+ approach makes a significant difference to the processing time.
2312
+ .
2313
+ .
2314
+ .SS "Using multiple assertions"
2315
+ .rs
2316
+ .sp
2317
+ Several assertions (of any sort) may occur in succession. For example,
2318
+ .sp
2319
+ (?<=\ed{3})(?<!999)foo
2320
+ .sp
2321
+ matches "foo" preceded by three digits that are not "999". Notice that each of
2322
+ the assertions is applied independently at the same point in the subject
2323
+ string. First there is a check that the previous three characters are all
2324
+ digits, and then there is a check that the same three characters are not "999".
2325
+ This pattern does \fInot\fP match "foo" preceded by six characters, the first
2326
+ of which are digits and the last three of which are not "999". For example, it
2327
+ doesn't match "123abcfoo". A pattern to do that is
2328
+ .sp
2329
+ (?<=\ed{3}...)(?<!999)foo
2330
+ .sp
2331
+ This time the first assertion looks at the preceding six characters, checking
2332
+ that the first three are digits, and then the second assertion checks that the
2333
+ preceding three characters are not "999".
2334
+ .P
2335
+ Assertions can be nested in any combination. For example,
2336
+ .sp
2337
+ (?<=(?<!foo)bar)baz
2338
+ .sp
2339
+ matches an occurrence of "baz" that is preceded by "bar" which in turn is not
2340
+ preceded by "foo", while
2341
+ .sp
2342
+ (?<=\ed{3}(?!999)...)foo
2343
+ .sp
2344
+ is another pattern that matches "foo" preceded by three digits and any three
2345
+ characters that are not "999".
2346
+ .
2347
+ .
2348
+ .\" HTML <a name="conditions"></a>
2349
+ .SH "CONDITIONAL SUBPATTERNS"
2350
+ .rs
2351
+ .sp
2352
+ It is possible to cause the matching process to obey a subpattern
2353
+ conditionally or to choose between two alternative subpatterns, depending on
2354
+ the result of an assertion, or whether a specific capturing subpattern has
2355
+ already been matched. The two possible forms of conditional subpattern are:
2356
+ .sp
2357
+ (?(condition)yes-pattern)
2358
+ (?(condition)yes-pattern|no-pattern)
2359
+ .sp
2360
+ If the condition is satisfied, the yes-pattern is used; otherwise the
2361
+ no-pattern (if present) is used. If there are more than two alternatives in the
2362
+ subpattern, a compile-time error occurs. Each of the two alternatives may
2363
+ itself contain nested subpatterns of any form, including conditional
2364
+ subpatterns; the restriction to two alternatives applies only at the level of
2365
+ the condition. This pattern fragment is an example where the alternatives are
2366
+ complex:
2367
+ .sp
2368
+ (?(1) (A|B|C) | (D | (?(2)E|F) | E) )
2369
+ .sp
2370
+ .P
2371
+ There are four kinds of condition: references to subpatterns, references to
2372
+ recursion, a pseudo-condition called DEFINE, and assertions.
2373
+ .
2374
+ .SS "Checking for a used subpattern by number"
2375
+ .rs
2376
+ .sp
2377
+ If the text between the parentheses consists of a sequence of digits, the
2378
+ condition is true if a capturing subpattern of that number has previously
2379
+ matched. If there is more than one capturing subpattern with the same number
2380
+ (see the earlier
2381
+ .\"
2382
+ .\" HTML <a href="#recursion">
2383
+ .\" </a>
2384
+ section about duplicate subpattern numbers),
2385
+ .\"
2386
+ the condition is true if any of them have matched. An alternative notation is
2387
+ to precede the digits with a plus or minus sign. In this case, the subpattern
2388
+ number is relative rather than absolute. The most recently opened parentheses
2389
+ can be referenced by (?(-1), the next most recent by (?(-2), and so on. Inside
2390
+ loops it can also make sense to refer to subsequent groups. The next
2391
+ parentheses to be opened can be referenced as (?(+1), and so on. (The value
2392
+ zero in any of these forms is not used; it provokes a compile-time error.)
2393
+ .P
2394
+ Consider the following pattern, which contains non-significant white space to
2395
+ make it more readable (assume the PCRE_EXTENDED option) and to divide it into
2396
+ three parts for ease of discussion:
2397
+ .sp
2398
+ ( \e( )? [^()]+ (?(1) \e) )
2399
+ .sp
2400
+ The first part matches an optional opening parenthesis, and if that
2401
+ character is present, sets it as the first captured substring. The second part
2402
+ matches one or more characters that are not parentheses. The third part is a
2403
+ conditional subpattern that tests whether or not the first set of parentheses
2404
+ matched. If they did, that is, if subject started with an opening parenthesis,
2405
+ the condition is true, and so the yes-pattern is executed and a closing
2406
+ parenthesis is required. Otherwise, since no-pattern is not present, the
2407
+ subpattern matches nothing. In other words, this pattern matches a sequence of
2408
+ non-parentheses, optionally enclosed in parentheses.
2409
+ .P
2410
+ If you were embedding this pattern in a larger one, you could use a relative
2411
+ reference:
2412
+ .sp
2413
+ ...other stuff... ( \e( )? [^()]+ (?(-1) \e) ) ...
2414
+ .sp
2415
+ This makes the fragment independent of the parentheses in the larger pattern.
2416
+ .
2417
+ .SS "Checking for a used subpattern by name"
2418
+ .rs
2419
+ .sp
2420
+ Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a used
2421
+ subpattern by name. For compatibility with earlier versions of PCRE, which had
2422
+ this facility before Perl, the syntax (?(name)...) is also recognized.
2423
+ .P
2424
+ Rewriting the above example to use a named subpattern gives this:
2425
+ .sp
2426
+ (?<OPEN> \e( )? [^()]+ (?(<OPEN>) \e) )
2427
+ .sp
2428
+ If the name used in a condition of this kind is a duplicate, the test is
2429
+ applied to all subpatterns of the same name, and is true if any one of them has
2430
+ matched.
2431
+ .
2432
+ .SS "Checking for pattern recursion"
2433
+ .rs
2434
+ .sp
2435
+ If the condition is the string (R), and there is no subpattern with the name R,
2436
+ the condition is true if a recursive call to the whole pattern or any
2437
+ subpattern has been made. If digits or a name preceded by ampersand follow the
2438
+ letter R, for example:
2439
+ .sp
2440
+ (?(R3)...) or (?(R&name)...)
2441
+ .sp
2442
+ the condition is true if the most recent recursion is into a subpattern whose
2443
+ number or name is given. This condition does not check the entire recursion
2444
+ stack. If the name used in a condition of this kind is a duplicate, the test is
2445
+ applied to all subpatterns of the same name, and is true if any one of them is
2446
+ the most recent recursion.
2447
+ .P
2448
+ At "top level", all these recursion test conditions are false.
2449
+ .\" HTML <a href="#recursion">
2450
+ .\" </a>
2451
+ The syntax for recursive patterns
2452
+ .\"
2453
+ is described below.
2454
+ .
2455
+ .\" HTML <a name="subdefine"></a>
2456
+ .SS "Defining subpatterns for use by reference only"
2457
+ .rs
2458
+ .sp
2459
+ If the condition is the string (DEFINE), and there is no subpattern with the
2460
+ name DEFINE, the condition is always false. In this case, there may be only one
2461
+ alternative in the subpattern. It is always skipped if control reaches this
2462
+ point in the pattern; the idea of DEFINE is that it can be used to define
2463
+ subroutines that can be referenced from elsewhere. (The use of
2464
+ .\" HTML <a href="#subpatternsassubroutines">
2465
+ .\" </a>
2466
+ subroutines
2467
+ .\"
2468
+ is described below.) For example, a pattern to match an IPv4 address such as
2469
+ "192.168.23.245" could be written like this (ignore white space and line
2470
+ breaks):
2471
+ .sp
2472
+ (?(DEFINE) (?<byte> 2[0-4]\ed | 25[0-5] | 1\ed\ed | [1-9]?\ed) )
2473
+ \eb (?&byte) (\e.(?&byte)){3} \eb
2474
+ .sp
2475
+ The first part of the pattern is a DEFINE group inside which a another group
2476
+ named "byte" is defined. This matches an individual component of an IPv4
2477
+ address (a number less than 256). When matching takes place, this part of the
2478
+ pattern is skipped because DEFINE acts like a false condition. The rest of the
2479
+ pattern uses references to the named group to match the four dot-separated
2480
+ components of an IPv4 address, insisting on a word boundary at each end.
2481
+ .
2482
+ .SS "Assertion conditions"
2483
+ .rs
2484
+ .sp
2485
+ If the condition is not in any of the above formats, it must be an assertion.
2486
+ This may be a positive or negative lookahead or lookbehind assertion. Consider
2487
+ this pattern, again containing non-significant white space, and with the two
2488
+ alternatives on the second line:
2489
+ .sp
2490
+ (?(?=[^a-z]*[a-z])
2491
+ \ed{2}-[a-z]{3}-\ed{2} | \ed{2}-\ed{2}-\ed{2} )
2492
+ .sp
2493
+ The condition is a positive lookahead assertion that matches an optional
2494
+ sequence of non-letters followed by a letter. In other words, it tests for the
2495
+ presence of at least one letter in the subject. If a letter is found, the
2496
+ subject is matched against the first alternative; otherwise it is matched
2497
+ against the second. This pattern matches strings in one of the two forms
2498
+ dd-aaa-dd or dd-dd-dd, where aaa are letters and dd are digits.
2499
+ .
2500
+ .
2501
+ .\" HTML <a name="comments"></a>
2502
+ .SH COMMENTS
2503
+ .rs
2504
+ .sp
2505
+ There are two ways of including comments in patterns that are processed by
2506
+ PCRE. In both cases, the start of the comment must not be in a character class,
2507
+ nor in the middle of any other sequence of related characters such as (?: or a
2508
+ subpattern name or number. The characters that make up a comment play no part
2509
+ in the pattern matching.
2510
+ .P
2511
+ The sequence (?# marks the start of a comment that continues up to the next
2512
+ closing parenthesis. Nested parentheses are not permitted. If the PCRE_EXTENDED
2513
+ option is set, an unescaped # character also introduces a comment, which in
2514
+ this case continues to immediately after the next newline character or
2515
+ character sequence in the pattern. Which characters are interpreted as newlines
2516
+ is controlled by the options passed to a compiling function or by a special
2517
+ sequence at the start of the pattern, as described in the section entitled
2518
+ .\" HTML <a href="#newlines">
2519
+ .\" </a>
2520
+ "Newline conventions"
2521
+ .\"
2522
+ above. Note that the end of this type of comment is a literal newline sequence
2523
+ in the pattern; escape sequences that happen to represent a newline do not
2524
+ count. For example, consider this pattern when PCRE_EXTENDED is set, and the
2525
+ default newline convention is in force:
2526
+ .sp
2527
+ abc #comment \en still comment
2528
+ .sp
2529
+ On encountering the # character, \fBpcre_compile()\fP skips along, looking for
2530
+ a newline in the pattern. The sequence \en is still literal at this stage, so
2531
+ it does not terminate the comment. Only an actual character with the code value
2532
+ 0x0a (the default newline) does so.
2533
+ .
2534
+ .
2535
+ .\" HTML <a name="recursion"></a>
2536
+ .SH "RECURSIVE PATTERNS"
2537
+ .rs
2538
+ .sp
2539
+ Consider the problem of matching a string in parentheses, allowing for
2540
+ unlimited nested parentheses. Without the use of recursion, the best that can
2541
+ be done is to use a pattern that matches up to some fixed depth of nesting. It
2542
+ is not possible to handle an arbitrary nesting depth.
2543
+ .P
2544
+ For some time, Perl has provided a facility that allows regular expressions to
2545
+ recurse (amongst other things). It does this by interpolating Perl code in the
2546
+ expression at run time, and the code can refer to the expression itself. A Perl
2547
+ pattern using code interpolation to solve the parentheses problem can be
2548
+ created like this:
2549
+ .sp
2550
+ $re = qr{\e( (?: (?>[^()]+) | (?p{$re}) )* \e)}x;
2551
+ .sp
2552
+ The (?p{...}) item interpolates Perl code at run time, and in this case refers
2553
+ recursively to the pattern in which it appears.
2554
+ .P
2555
+ Obviously, PCRE cannot support the interpolation of Perl code. Instead, it
2556
+ supports special syntax for recursion of the entire pattern, and also for
2557
+ individual subpattern recursion. After its introduction in PCRE and Python,
2558
+ this kind of recursion was subsequently introduced into Perl at release 5.10.
2559
+ .P
2560
+ A special item that consists of (? followed by a number greater than zero and a
2561
+ closing parenthesis is a recursive subroutine call of the subpattern of the
2562
+ given number, provided that it occurs inside that subpattern. (If not, it is a
2563
+ .\" HTML <a href="#subpatternsassubroutines">
2564
+ .\" </a>
2565
+ non-recursive subroutine
2566
+ .\"
2567
+ call, which is described in the next section.) The special item (?R) or (?0) is
2568
+ a recursive call of the entire regular expression.
2569
+ .P
2570
+ This PCRE pattern solves the nested parentheses problem (assume the
2571
+ PCRE_EXTENDED option is set so that white space is ignored):
2572
+ .sp
2573
+ \e( ( [^()]++ | (?R) )* \e)
2574
+ .sp
2575
+ First it matches an opening parenthesis. Then it matches any number of
2576
+ substrings which can either be a sequence of non-parentheses, or a recursive
2577
+ match of the pattern itself (that is, a correctly parenthesized substring).
2578
+ Finally there is a closing parenthesis. Note the use of a possessive quantifier
2579
+ to avoid backtracking into sequences of non-parentheses.
2580
+ .P
2581
+ If this were part of a larger pattern, you would not want to recurse the entire
2582
+ pattern, so instead you could use this:
2583
+ .sp
2584
+ ( \e( ( [^()]++ | (?1) )* \e) )
2585
+ .sp
2586
+ We have put the pattern into parentheses, and caused the recursion to refer to
2587
+ them instead of the whole pattern.
2588
+ .P
2589
+ In a larger pattern, keeping track of parenthesis numbers can be tricky. This
2590
+ is made easier by the use of relative references. Instead of (?1) in the
2591
+ pattern above you can write (?-2) to refer to the second most recently opened
2592
+ parentheses preceding the recursion. In other words, a negative number counts
2593
+ capturing parentheses leftwards from the point at which it is encountered.
2594
+ .P
2595
+ It is also possible to refer to subsequently opened parentheses, by writing
2596
+ references such as (?+2). However, these cannot be recursive because the
2597
+ reference is not inside the parentheses that are referenced. They are always
2598
+ .\" HTML <a href="#subpatternsassubroutines">
2599
+ .\" </a>
2600
+ non-recursive subroutine
2601
+ .\"
2602
+ calls, as described in the next section.
2603
+ .P
2604
+ An alternative approach is to use named parentheses instead. The Perl syntax
2605
+ for this is (?&name); PCRE's earlier syntax (?P>name) is also supported. We
2606
+ could rewrite the above example as follows:
2607
+ .sp
2608
+ (?<pn> \e( ( [^()]++ | (?&pn) )* \e) )
2609
+ .sp
2610
+ If there is more than one subpattern with the same name, the earliest one is
2611
+ used.
2612
+ .P
2613
+ This particular example pattern that we have been looking at contains nested
2614
+ unlimited repeats, and so the use of a possessive quantifier for matching
2615
+ strings of non-parentheses is important when applying the pattern to strings
2616
+ that do not match. For example, when this pattern is applied to
2617
+ .sp
2618
+ (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()
2619
+ .sp
2620
+ it yields "no match" quickly. However, if a possessive quantifier is not used,
2621
+ the match runs for a very long time indeed because there are so many different
2622
+ ways the + and * repeats can carve up the subject, and all have to be tested
2623
+ before failure can be reported.
2624
+ .P
2625
+ At the end of a match, the values of capturing parentheses are those from
2626
+ the outermost level. If you want to obtain intermediate values, a callout
2627
+ function can be used (see below and the
2628
+ .\" HREF
2629
+ \fBpcrecallout\fP
2630
+ .\"
2631
+ documentation). If the pattern above is matched against
2632
+ .sp
2633
+ (ab(cd)ef)
2634
+ .sp
2635
+ the value for the inner capturing parentheses (numbered 2) is "ef", which is
2636
+ the last value taken on at the top level. If a capturing subpattern is not
2637
+ matched at the top level, its final captured value is unset, even if it was
2638
+ (temporarily) set at a deeper level during the matching process.
2639
+ .P
2640
+ If there are more than 15 capturing parentheses in a pattern, PCRE has to
2641
+ obtain extra memory to store data during a recursion, which it does by using
2642
+ \fBpcre_malloc\fP, freeing it via \fBpcre_free\fP afterwards. If no memory can
2643
+ be obtained, the match fails with the PCRE_ERROR_NOMEMORY error.
2644
+ .P
2645
+ Do not confuse the (?R) item with the condition (R), which tests for recursion.
2646
+ Consider this pattern, which matches text in angle brackets, allowing for
2647
+ arbitrary nesting. Only digits are allowed in nested brackets (that is, when
2648
+ recursing), whereas any characters are permitted at the outer level.
2649
+ .sp
2650
+ < (?: (?(R) \ed++ | [^<>]*+) | (?R)) * >
2651
+ .sp
2652
+ In this pattern, (?(R) is the start of a conditional subpattern, with two
2653
+ different alternatives for the recursive and non-recursive cases. The (?R) item
2654
+ is the actual recursive call.
2655
+ .
2656
+ .
2657
+ .\" HTML <a name="recursiondifference"></a>
2658
+ .SS "Differences in recursion processing between PCRE and Perl"
2659
+ .rs
2660
+ .sp
2661
+ Recursion processing in PCRE differs from Perl in two important ways. In PCRE
2662
+ (like Python, but unlike Perl), a recursive subpattern call is always treated
2663
+ as an atomic group. That is, once it has matched some of the subject string, it
2664
+ is never re-entered, even if it contains untried alternatives and there is a
2665
+ subsequent matching failure. This can be illustrated by the following pattern,
2666
+ which purports to match a palindromic string that contains an odd number of
2667
+ characters (for example, "a", "aba", "abcba", "abcdcba"):
2668
+ .sp
2669
+ ^(.|(.)(?1)\e2)$
2670
+ .sp
2671
+ The idea is that it either matches a single character, or two identical
2672
+ characters surrounding a sub-palindrome. In Perl, this pattern works; in PCRE
2673
+ it does not if the pattern is longer than three characters. Consider the
2674
+ subject string "abcba":
2675
+ .P
2676
+ At the top level, the first character is matched, but as it is not at the end
2677
+ of the string, the first alternative fails; the second alternative is taken
2678
+ and the recursion kicks in. The recursive call to subpattern 1 successfully
2679
+ matches the next character ("b"). (Note that the beginning and end of line
2680
+ tests are not part of the recursion).
2681
+ .P
2682
+ Back at the top level, the next character ("c") is compared with what
2683
+ subpattern 2 matched, which was "a". This fails. Because the recursion is
2684
+ treated as an atomic group, there are now no backtracking points, and so the
2685
+ entire match fails. (Perl is able, at this point, to re-enter the recursion and
2686
+ try the second alternative.) However, if the pattern is written with the
2687
+ alternatives in the other order, things are different:
2688
+ .sp
2689
+ ^((.)(?1)\e2|.)$
2690
+ .sp
2691
+ This time, the recursing alternative is tried first, and continues to recurse
2692
+ until it runs out of characters, at which point the recursion fails. But this
2693
+ time we do have another alternative to try at the higher level. That is the big
2694
+ difference: in the previous case the remaining alternative is at a deeper
2695
+ recursion level, which PCRE cannot use.
2696
+ .P
2697
+ To change the pattern so that it matches all palindromic strings, not just
2698
+ those with an odd number of characters, it is tempting to change the pattern to
2699
+ this:
2700
+ .sp
2701
+ ^((.)(?1)\e2|.?)$
2702
+ .sp
2703
+ Again, this works in Perl, but not in PCRE, and for the same reason. When a
2704
+ deeper recursion has matched a single character, it cannot be entered again in
2705
+ order to match an empty string. The solution is to separate the two cases, and
2706
+ write out the odd and even cases as alternatives at the higher level:
2707
+ .sp
2708
+ ^(?:((.)(?1)\e2|)|((.)(?3)\e4|.))
2709
+ .sp
2710
+ If you want to match typical palindromic phrases, the pattern has to ignore all
2711
+ non-word characters, which can be done like this:
2712
+ .sp
2713
+ ^\eW*+(?:((.)\eW*+(?1)\eW*+\e2|)|((.)\eW*+(?3)\eW*+\e4|\eW*+.\eW*+))\eW*+$
2714
+ .sp
2715
+ If run with the PCRE_CASELESS option, this pattern matches phrases such as "A
2716
+ man, a plan, a canal: Panama!" and it works well in both PCRE and Perl. Note
2717
+ the use of the possessive quantifier *+ to avoid backtracking into sequences of
2718
+ non-word characters. Without this, PCRE takes a great deal longer (ten times or
2719
+ more) to match typical phrases, and Perl takes so long that you think it has
2720
+ gone into a loop.
2721
+ .P
2722
+ \fBWARNING\fP: The palindrome-matching patterns above work only if the subject
2723
+ string does not start with a palindrome that is shorter than the entire string.
2724
+ For example, although "abcba" is correctly matched, if the subject is "ababa",
2725
+ PCRE finds the palindrome "aba" at the start, then fails at top level because
2726
+ the end of the string does not follow. Once again, it cannot jump back into the
2727
+ recursion to try other alternatives, so the entire match fails.
2728
+ .P
2729
+ The second way in which PCRE and Perl differ in their recursion processing is
2730
+ in the handling of captured values. In Perl, when a subpattern is called
2731
+ recursively or as a subpattern (see the next section), it has no access to any
2732
+ values that were captured outside the recursion, whereas in PCRE these values
2733
+ can be referenced. Consider this pattern:
2734
+ .sp
2735
+ ^(.)(\e1|a(?2))
2736
+ .sp
2737
+ In PCRE, this pattern matches "bab". The first capturing parentheses match "b",
2738
+ then in the second group, when the back reference \e1 fails to match "b", the
2739
+ second alternative matches "a" and then recurses. In the recursion, \e1 does
2740
+ now match "b" and so the whole match succeeds. In Perl, the pattern fails to
2741
+ match because inside the recursive call \e1 cannot access the externally set
2742
+ value.
2743
+ .
2744
+ .
2745
+ .\" HTML <a name="subpatternsassubroutines"></a>
2746
+ .SH "SUBPATTERNS AS SUBROUTINES"
2747
+ .rs
2748
+ .sp
2749
+ If the syntax for a recursive subpattern call (either by number or by
2750
+ name) is used outside the parentheses to which it refers, it operates like a
2751
+ subroutine in a programming language. The called subpattern may be defined
2752
+ before or after the reference. A numbered reference can be absolute or
2753
+ relative, as in these examples:
2754
+ .sp
2755
+ (...(absolute)...)...(?2)...
2756
+ (...(relative)...)...(?-1)...
2757
+ (...(?+1)...(relative)...
2758
+ .sp
2759
+ An earlier example pointed out that the pattern
2760
+ .sp
2761
+ (sens|respons)e and \e1ibility
2762
+ .sp
2763
+ matches "sense and sensibility" and "response and responsibility", but not
2764
+ "sense and responsibility". If instead the pattern
2765
+ .sp
2766
+ (sens|respons)e and (?1)ibility
2767
+ .sp
2768
+ is used, it does match "sense and responsibility" as well as the other two
2769
+ strings. Another example is given in the discussion of DEFINE above.
2770
+ .P
2771
+ All subroutine calls, whether recursive or not, are always treated as atomic
2772
+ groups. That is, once a subroutine has matched some of the subject string, it
2773
+ is never re-entered, even if it contains untried alternatives and there is a
2774
+ subsequent matching failure. Any capturing parentheses that are set during the
2775
+ subroutine call revert to their previous values afterwards.
2776
+ .P
2777
+ Processing options such as case-independence are fixed when a subpattern is
2778
+ defined, so if it is used as a subroutine, such options cannot be changed for
2779
+ different calls. For example, consider this pattern:
2780
+ .sp
2781
+ (abc)(?i:(?-1))
2782
+ .sp
2783
+ It matches "abcabc". It does not match "abcABC" because the change of
2784
+ processing option does not affect the called subpattern.
2785
+ .
2786
+ .
2787
+ .\" HTML <a name="onigurumasubroutines"></a>
2788
+ .SH "ONIGURUMA SUBROUTINE SYNTAX"
2789
+ .rs
2790
+ .sp
2791
+ For compatibility with Oniguruma, the non-Perl syntax \eg followed by a name or
2792
+ a number enclosed either in angle brackets or single quotes, is an alternative
2793
+ syntax for referencing a subpattern as a subroutine, possibly recursively. Here
2794
+ are two of the examples used above, rewritten using this syntax:
2795
+ .sp
2796
+ (?<pn> \e( ( (?>[^()]+) | \eg<pn> )* \e) )
2797
+ (sens|respons)e and \eg'1'ibility
2798
+ .sp
2799
+ PCRE supports an extension to Oniguruma: if a number is preceded by a
2800
+ plus or a minus sign it is taken as a relative reference. For example:
2801
+ .sp
2802
+ (abc)(?i:\eg<-1>)
2803
+ .sp
2804
+ Note that \eg{...} (Perl syntax) and \eg<...> (Oniguruma syntax) are \fInot\fP
2805
+ synonymous. The former is a back reference; the latter is a subroutine call.
2806
+ .
2807
+ .
2808
+ .SH CALLOUTS
2809
+ .rs
2810
+ .sp
2811
+ Perl has a feature whereby using the sequence (?{...}) causes arbitrary Perl
2812
+ code to be obeyed in the middle of matching a regular expression. This makes it
2813
+ possible, amongst other things, to extract different substrings that match the
2814
+ same pair of parentheses when there is a repetition.
2815
+ .P
2816
+ PCRE provides a similar feature, but of course it cannot obey arbitrary Perl
2817
+ code. The feature is called "callout". The caller of PCRE provides an external
2818
+ function by putting its entry point in the global variable \fIpcre_callout\fP
2819
+ (8-bit library) or \fIpcre[16|32]_callout\fP (16-bit or 32-bit library).
2820
+ By default, this variable contains NULL, which disables all calling out.
2821
+ .P
2822
+ Within a regular expression, (?C) indicates the points at which the external
2823
+ function is to be called. If you want to identify different callout points, you
2824
+ can put a number less than 256 after the letter C. The default value is zero.
2825
+ For example, this pattern has two callout points:
2826
+ .sp
2827
+ (?C1)abc(?C2)def
2828
+ .sp
2829
+ If the PCRE_AUTO_CALLOUT flag is passed to a compiling function, callouts are
2830
+ automatically installed before each item in the pattern. They are all numbered
2831
+ 255. If there is a conditional group in the pattern whose condition is an
2832
+ assertion, an additional callout is inserted just before the condition. An
2833
+ explicit callout may also be set at this position, as in this example:
2834
+ .sp
2835
+ (?(?C9)(?=a)abc|def)
2836
+ .sp
2837
+ Note that this applies only to assertion conditions, not to other types of
2838
+ condition.
2839
+ .P
2840
+ During matching, when PCRE reaches a callout point, the external function is
2841
+ called. It is provided with the number of the callout, the position in the
2842
+ pattern, and, optionally, one item of data originally supplied by the caller of
2843
+ the matching function. The callout function may cause matching to proceed, to
2844
+ backtrack, or to fail altogether.
2845
+ .P
2846
+ By default, PCRE implements a number of optimizations at compile time and
2847
+ matching time, and one side-effect is that sometimes callouts are skipped. If
2848
+ you need all possible callouts to happen, you need to set options that disable
2849
+ the relevant optimizations. More details, and a complete description of the
2850
+ interface to the callout function, are given in the
2851
+ .\" HREF
2852
+ \fBpcrecallout\fP
2853
+ .\"
2854
+ documentation.
2855
+ .
2856
+ .
2857
+ .\" HTML <a name="backtrackcontrol"></a>
2858
+ .SH "BACKTRACKING CONTROL"
2859
+ .rs
2860
+ .sp
2861
+ Perl 5.10 introduced a number of "Special Backtracking Control Verbs", which
2862
+ are still described in the Perl documentation as "experimental and subject to
2863
+ change or removal in a future version of Perl". It goes on to say: "Their usage
2864
+ in production code should be noted to avoid problems during upgrades." The same
2865
+ remarks apply to the PCRE features described in this section.
2866
+ .P
2867
+ The new verbs make use of what was previously invalid syntax: an opening
2868
+ parenthesis followed by an asterisk. They are generally of the form
2869
+ (*VERB) or (*VERB:NAME). Some may take either form, possibly behaving
2870
+ differently depending on whether or not a name is present. A name is any
2871
+ sequence of characters that does not include a closing parenthesis. The maximum
2872
+ length of name is 255 in the 8-bit library and 65535 in the 16-bit and 32-bit
2873
+ libraries. If the name is empty, that is, if the closing parenthesis
2874
+ immediately follows the colon, the effect is as if the colon were not there.
2875
+ Any number of these verbs may occur in a pattern.
2876
+ .P
2877
+ Since these verbs are specifically related to backtracking, most of them can be
2878
+ used only when the pattern is to be matched using one of the traditional
2879
+ matching functions, because these use a backtracking algorithm. With the
2880
+ exception of (*FAIL), which behaves like a failing negative assertion, the
2881
+ backtracking control verbs cause an error if encountered by a DFA matching
2882
+ function.
2883
+ .P
2884
+ The behaviour of these verbs in
2885
+ .\" HTML <a href="#btrepeat">
2886
+ .\" </a>
2887
+ repeated groups,
2888
+ .\"
2889
+ .\" HTML <a href="#btassert">
2890
+ .\" </a>
2891
+ assertions,
2892
+ .\"
2893
+ and in
2894
+ .\" HTML <a href="#btsub">
2895
+ .\" </a>
2896
+ subpatterns called as subroutines
2897
+ .\"
2898
+ (whether or not recursively) is documented below.
2899
+ .
2900
+ .
2901
+ .\" HTML <a name="nooptimize"></a>
2902
+ .SS "Optimizations that affect backtracking verbs"
2903
+ .rs
2904
+ .sp
2905
+ PCRE contains some optimizations that are used to speed up matching by running
2906
+ some checks at the start of each match attempt. For example, it may know the
2907
+ minimum length of matching subject, or that a particular character must be
2908
+ present. When one of these optimizations bypasses the running of a match, any
2909
+ included backtracking verbs will not, of course, be processed. You can suppress
2910
+ the start-of-match optimizations by setting the PCRE_NO_START_OPTIMIZE option
2911
+ when calling \fBpcre_compile()\fP or \fBpcre_exec()\fP, or by starting the
2912
+ pattern with (*NO_START_OPT). There is more discussion of this option in the
2913
+ section entitled
2914
+ .\" HTML <a href="pcreapi.html#execoptions">
2915
+ .\" </a>
2916
+ "Option bits for \fBpcre_exec()\fP"
2917
+ .\"
2918
+ in the
2919
+ .\" HREF
2920
+ \fBpcreapi\fP
2921
+ .\"
2922
+ documentation.
2923
+ .P
2924
+ Experiments with Perl suggest that it too has similar optimizations, sometimes
2925
+ leading to anomalous results.
2926
+ .
2927
+ .
2928
+ .SS "Verbs that act immediately"
2929
+ .rs
2930
+ .sp
2931
+ The following verbs act as soon as they are encountered. They may not be
2932
+ followed by a name.
2933
+ .sp
2934
+ (*ACCEPT)
2935
+ .sp
2936
+ This verb causes the match to end successfully, skipping the remainder of the
2937
+ pattern. However, when it is inside a subpattern that is called as a
2938
+ subroutine, only that subpattern is ended successfully. Matching then continues
2939
+ at the outer level. If (*ACCEPT) in triggered in a positive assertion, the
2940
+ assertion succeeds; in a negative assertion, the assertion fails.
2941
+ .P
2942
+ If (*ACCEPT) is inside capturing parentheses, the data so far is captured. For
2943
+ example:
2944
+ .sp
2945
+ A((?:A|B(*ACCEPT)|C)D)
2946
+ .sp
2947
+ This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is captured by
2948
+ the outer parentheses.
2949
+ .sp
2950
+ (*FAIL) or (*F)
2951
+ .sp
2952
+ This verb causes a matching failure, forcing backtracking to occur. It is
2953
+ equivalent to (?!) but easier to read. The Perl documentation notes that it is
2954
+ probably useful only when combined with (?{}) or (??{}). Those are, of course,
2955
+ Perl features that are not present in PCRE. The nearest equivalent is the
2956
+ callout feature, as for example in this pattern:
2957
+ .sp
2958
+ a+(?C)(*FAIL)
2959
+ .sp
2960
+ A match with the string "aaaa" always fails, but the callout is taken before
2961
+ each backtrack happens (in this example, 10 times).
2962
+ .
2963
+ .
2964
+ .SS "Recording which path was taken"
2965
+ .rs
2966
+ .sp
2967
+ There is one verb whose main purpose is to track how a match was arrived at,
2968
+ though it also has a secondary use in conjunction with advancing the match
2969
+ starting point (see (*SKIP) below).
2970
+ .sp
2971
+ (*MARK:NAME) or (*:NAME)
2972
+ .sp
2973
+ A name is always required with this verb. There may be as many instances of
2974
+ (*MARK) as you like in a pattern, and their names do not have to be unique.
2975
+ .P
2976
+ When a match succeeds, the name of the last-encountered (*MARK:NAME),
2977
+ (*PRUNE:NAME), or (*THEN:NAME) on the matching path is passed back to the
2978
+ caller as described in the section entitled
2979
+ .\" HTML <a href="pcreapi.html#extradata">
2980
+ .\" </a>
2981
+ "Extra data for \fBpcre_exec()\fP"
2982
+ .\"
2983
+ in the
2984
+ .\" HREF
2985
+ \fBpcreapi\fP
2986
+ .\"
2987
+ documentation. Here is an example of \fBpcretest\fP output, where the /K
2988
+ modifier requests the retrieval and outputting of (*MARK) data:
2989
+ .sp
2990
+ re> /X(*MARK:A)Y|X(*MARK:B)Z/K
2991
+ data> XY
2992
+ 0: XY
2993
+ MK: A
2994
+ XZ
2995
+ 0: XZ
2996
+ MK: B
2997
+ .sp
2998
+ The (*MARK) name is tagged with "MK:" in this output, and in this example it
2999
+ indicates which of the two alternatives matched. This is a more efficient way
3000
+ of obtaining this information than putting each alternative in its own
3001
+ capturing parentheses.
3002
+ .P
3003
+ If a verb with a name is encountered in a positive assertion that is true, the
3004
+ name is recorded and passed back if it is the last-encountered. This does not
3005
+ happen for negative assertions or failing positive assertions.
3006
+ .P
3007
+ After a partial match or a failed match, the last encountered name in the
3008
+ entire match process is returned. For example:
3009
+ .sp
3010
+ re> /X(*MARK:A)Y|X(*MARK:B)Z/K
3011
+ data> XP
3012
+ No match, mark = B
3013
+ .sp
3014
+ Note that in this unanchored example the mark is retained from the match
3015
+ attempt that started at the letter "X" in the subject. Subsequent match
3016
+ attempts starting at "P" and then with an empty string do not get as far as the
3017
+ (*MARK) item, but nevertheless do not reset it.
3018
+ .P
3019
+ If you are interested in (*MARK) values after failed matches, you should
3020
+ probably set the PCRE_NO_START_OPTIMIZE option
3021
+ .\" HTML <a href="#nooptimize">
3022
+ .\" </a>
3023
+ (see above)
3024
+ .\"
3025
+ to ensure that the match is always attempted.
3026
+ .
3027
+ .
3028
+ .SS "Verbs that act after backtracking"
3029
+ .rs
3030
+ .sp
3031
+ The following verbs do nothing when they are encountered. Matching continues
3032
+ with what follows, but if there is no subsequent match, causing a backtrack to
3033
+ the verb, a failure is forced. That is, backtracking cannot pass to the left of
3034
+ the verb. However, when one of these verbs appears inside an atomic group or an
3035
+ assertion that is true, its effect is confined to that group, because once the
3036
+ group has been matched, there is never any backtracking into it. In this
3037
+ situation, backtracking can "jump back" to the left of the entire atomic group
3038
+ or assertion. (Remember also, as stated above, that this localization also
3039
+ applies in subroutine calls.)
3040
+ .P
3041
+ These verbs differ in exactly what kind of failure occurs when backtracking
3042
+ reaches them. The behaviour described below is what happens when the verb is
3043
+ not in a subroutine or an assertion. Subsequent sections cover these special
3044
+ cases.
3045
+ .sp
3046
+ (*COMMIT)
3047
+ .sp
3048
+ This verb, which may not be followed by a name, causes the whole match to fail
3049
+ outright if there is a later matching failure that causes backtracking to reach
3050
+ it. Even if the pattern is unanchored, no further attempts to find a match by
3051
+ advancing the starting point take place. If (*COMMIT) is the only backtracking
3052
+ verb that is encountered, once it has been passed \fBpcre_exec()\fP is
3053
+ committed to finding a match at the current starting point, or not at all. For
3054
+ example:
3055
+ .sp
3056
+ a+(*COMMIT)b
3057
+ .sp
3058
+ This matches "xxaab" but not "aacaab". It can be thought of as a kind of
3059
+ dynamic anchor, or "I've started, so I must finish." The name of the most
3060
+ recently passed (*MARK) in the path is passed back when (*COMMIT) forces a
3061
+ match failure.
3062
+ .P
3063
+ If there is more than one backtracking verb in a pattern, a different one that
3064
+ follows (*COMMIT) may be triggered first, so merely passing (*COMMIT) during a
3065
+ match does not always guarantee that a match must be at this starting point.
3066
+ .P
3067
+ Note that (*COMMIT) at the start of a pattern is not the same as an anchor,
3068
+ unless PCRE's start-of-match optimizations are turned off, as shown in this
3069
+ output from \fBpcretest\fP:
3070
+ .sp
3071
+ re> /(*COMMIT)abc/
3072
+ data> xyzabc
3073
+ 0: abc
3074
+ data> xyzabc\eY
3075
+ No match
3076
+ .sp
3077
+ For this pattern, PCRE knows that any match must start with "a", so the
3078
+ optimization skips along the subject to "a" before applying the pattern to the
3079
+ first set of data. The match attempt then succeeds. In the second set of data,
3080
+ the escape sequence \eY is interpreted by the \fBpcretest\fP program. It causes
3081
+ the PCRE_NO_START_OPTIMIZE option to be set when \fBpcre_exec()\fP is called.
3082
+ This disables the optimization that skips along to the first character. The
3083
+ pattern is now applied starting at "x", and so the (*COMMIT) causes the match
3084
+ to fail without trying any other starting points.
3085
+ .sp
3086
+ (*PRUNE) or (*PRUNE:NAME)
3087
+ .sp
3088
+ This verb causes the match to fail at the current starting position in the
3089
+ subject if there is a later matching failure that causes backtracking to reach
3090
+ it. If the pattern is unanchored, the normal "bumpalong" advance to the next
3091
+ starting character then happens. Backtracking can occur as usual to the left of
3092
+ (*PRUNE), before it is reached, or when matching to the right of (*PRUNE), but
3093
+ if there is no match to the right, backtracking cannot cross (*PRUNE). In
3094
+ simple cases, the use of (*PRUNE) is just an alternative to an atomic group or
3095
+ possessive quantifier, but there are some uses of (*PRUNE) that cannot be
3096
+ expressed in any other way. In an anchored pattern (*PRUNE) has the same effect
3097
+ as (*COMMIT).
3098
+ .P
3099
+ The behaviour of (*PRUNE:NAME) is the not the same as (*MARK:NAME)(*PRUNE).
3100
+ It is like (*MARK:NAME) in that the name is remembered for passing back to the
3101
+ caller. However, (*SKIP:NAME) searches only for names set with (*MARK).
3102
+ .sp
3103
+ (*SKIP)
3104
+ .sp
3105
+ This verb, when given without a name, is like (*PRUNE), except that if the
3106
+ pattern is unanchored, the "bumpalong" advance is not to the next character,
3107
+ but to the position in the subject where (*SKIP) was encountered. (*SKIP)
3108
+ signifies that whatever text was matched leading up to it cannot be part of a
3109
+ successful match. Consider:
3110
+ .sp
3111
+ a+(*SKIP)b
3112
+ .sp
3113
+ If the subject is "aaaac...", after the first match attempt fails (starting at
3114
+ the first character in the string), the starting point skips on to start the
3115
+ next attempt at "c". Note that a possessive quantifer does not have the same
3116
+ effect as this example; although it would suppress backtracking during the
3117
+ first match attempt, the second attempt would start at the second character
3118
+ instead of skipping on to "c".
3119
+ .sp
3120
+ (*SKIP:NAME)
3121
+ .sp
3122
+ When (*SKIP) has an associated name, its behaviour is modified. When it is
3123
+ triggered, the previous path through the pattern is searched for the most
3124
+ recent (*MARK) that has the same name. If one is found, the "bumpalong" advance
3125
+ is to the subject position that corresponds to that (*MARK) instead of to where
3126
+ (*SKIP) was encountered. If no (*MARK) with a matching name is found, the
3127
+ (*SKIP) is ignored.
3128
+ .P
3129
+ Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It ignores
3130
+ names that are set by (*PRUNE:NAME) or (*THEN:NAME).
3131
+ .sp
3132
+ (*THEN) or (*THEN:NAME)
3133
+ .sp
3134
+ This verb causes a skip to the next innermost alternative when backtracking
3135
+ reaches it. That is, it cancels any further backtracking within the current
3136
+ alternative. Its name comes from the observation that it can be used for a
3137
+ pattern-based if-then-else block:
3138
+ .sp
3139
+ ( COND1 (*THEN) FOO | COND2 (*THEN) BAR | COND3 (*THEN) BAZ ) ...
3140
+ .sp
3141
+ If the COND1 pattern matches, FOO is tried (and possibly further items after
3142
+ the end of the group if FOO succeeds); on failure, the matcher skips to the
3143
+ second alternative and tries COND2, without backtracking into COND1. If that
3144
+ succeeds and BAR fails, COND3 is tried. If subsequently BAZ fails, there are no
3145
+ more alternatives, so there is a backtrack to whatever came before the entire
3146
+ group. If (*THEN) is not inside an alternation, it acts like (*PRUNE).
3147
+ .P
3148
+ The behaviour of (*THEN:NAME) is the not the same as (*MARK:NAME)(*THEN).
3149
+ It is like (*MARK:NAME) in that the name is remembered for passing back to the
3150
+ caller. However, (*SKIP:NAME) searches only for names set with (*MARK).
3151
+ .P
3152
+ A subpattern that does not contain a | character is just a part of the
3153
+ enclosing alternative; it is not a nested alternation with only one
3154
+ alternative. The effect of (*THEN) extends beyond such a subpattern to the
3155
+ enclosing alternative. Consider this pattern, where A, B, etc. are complex
3156
+ pattern fragments that do not contain any | characters at this level:
3157
+ .sp
3158
+ A (B(*THEN)C) | D
3159
+ .sp
3160
+ If A and B are matched, but there is a failure in C, matching does not
3161
+ backtrack into A; instead it moves to the next alternative, that is, D.
3162
+ However, if the subpattern containing (*THEN) is given an alternative, it
3163
+ behaves differently:
3164
+ .sp
3165
+ A (B(*THEN)C | (*FAIL)) | D
3166
+ .sp
3167
+ The effect of (*THEN) is now confined to the inner subpattern. After a failure
3168
+ in C, matching moves to (*FAIL), which causes the whole subpattern to fail
3169
+ because there are no more alternatives to try. In this case, matching does now
3170
+ backtrack into A.
3171
+ .P
3172
+ Note that a conditional subpattern is not considered as having two
3173
+ alternatives, because only one is ever used. In other words, the | character in
3174
+ a conditional subpattern has a different meaning. Ignoring white space,
3175
+ consider:
3176
+ .sp
3177
+ ^.*? (?(?=a) a | b(*THEN)c )
3178
+ .sp
3179
+ If the subject is "ba", this pattern does not match. Because .*? is ungreedy,
3180
+ it initially matches zero characters. The condition (?=a) then fails, the
3181
+ character "b" is matched, but "c" is not. At this point, matching does not
3182
+ backtrack to .*? as might perhaps be expected from the presence of the |
3183
+ character. The conditional subpattern is part of the single alternative that
3184
+ comprises the whole pattern, and so the match fails. (If there was a backtrack
3185
+ into .*?, allowing it to match "b", the match would succeed.)
3186
+ .P
3187
+ The verbs just described provide four different "strengths" of control when
3188
+ subsequent matching fails. (*THEN) is the weakest, carrying on the match at the
3189
+ next alternative. (*PRUNE) comes next, failing the match at the current
3190
+ starting position, but allowing an advance to the next character (for an
3191
+ unanchored pattern). (*SKIP) is similar, except that the advance may be more
3192
+ than one character. (*COMMIT) is the strongest, causing the entire match to
3193
+ fail.
3194
+ .
3195
+ .
3196
+ .SS "More than one backtracking verb"
3197
+ .rs
3198
+ .sp
3199
+ If more than one backtracking verb is present in a pattern, the one that is
3200
+ backtracked onto first acts. For example, consider this pattern, where A, B,
3201
+ etc. are complex pattern fragments:
3202
+ .sp
3203
+ (A(*COMMIT)B(*THEN)C|ABD)
3204
+ .sp
3205
+ If A matches but B fails, the backtrack to (*COMMIT) causes the entire match to
3206
+ fail. However, if A and B match, but C fails, the backtrack to (*THEN) causes
3207
+ the next alternative (ABD) to be tried. This behaviour is consistent, but is
3208
+ not always the same as Perl's. It means that if two or more backtracking verbs
3209
+ appear in succession, all the the last of them has no effect. Consider this
3210
+ example:
3211
+ .sp
3212
+ ...(*COMMIT)(*PRUNE)...
3213
+ .sp
3214
+ If there is a matching failure to the right, backtracking onto (*PRUNE) causes
3215
+ it to be triggered, and its action is taken. There can never be a backtrack
3216
+ onto (*COMMIT).
3217
+ .
3218
+ .
3219
+ .\" HTML <a name="btrepeat"></a>
3220
+ .SS "Backtracking verbs in repeated groups"
3221
+ .rs
3222
+ .sp
3223
+ PCRE differs from Perl in its handling of backtracking verbs in repeated
3224
+ groups. For example, consider:
3225
+ .sp
3226
+ /(a(*COMMIT)b)+ac/
3227
+ .sp
3228
+ If the subject is "abac", Perl matches, but PCRE fails because the (*COMMIT) in
3229
+ the second repeat of the group acts.
3230
+ .
3231
+ .
3232
+ .\" HTML <a name="btassert"></a>
3233
+ .SS "Backtracking verbs in assertions"
3234
+ .rs
3235
+ .sp
3236
+ (*FAIL) in an assertion has its normal effect: it forces an immediate backtrack.
3237
+ .P
3238
+ (*ACCEPT) in a positive assertion causes the assertion to succeed without any
3239
+ further processing. In a negative assertion, (*ACCEPT) causes the assertion to
3240
+ fail without any further processing.
3241
+ .P
3242
+ The other backtracking verbs are not treated specially if they appear in a
3243
+ positive assertion. In particular, (*THEN) skips to the next alternative in the
3244
+ innermost enclosing group that has alternations, whether or not this is within
3245
+ the assertion.
3246
+ .P
3247
+ Negative assertions are, however, different, in order to ensure that changing a
3248
+ positive assertion into a negative assertion changes its result. Backtracking
3249
+ into (*COMMIT), (*SKIP), or (*PRUNE) causes a negative assertion to be true,
3250
+ without considering any further alternative branches in the assertion.
3251
+ Backtracking into (*THEN) causes it to skip to the next enclosing alternative
3252
+ within the assertion (the normal behaviour), but if the assertion does not have
3253
+ such an alternative, (*THEN) behaves like (*PRUNE).
3254
+ .
3255
+ .
3256
+ .\" HTML <a name="btsub"></a>
3257
+ .SS "Backtracking verbs in subroutines"
3258
+ .rs
3259
+ .sp
3260
+ These behaviours occur whether or not the subpattern is called recursively.
3261
+ Perl's treatment of subroutines is different in some cases.
3262
+ .P
3263
+ (*FAIL) in a subpattern called as a subroutine has its normal effect: it forces
3264
+ an immediate backtrack.
3265
+ .P
3266
+ (*ACCEPT) in a subpattern called as a subroutine causes the subroutine match to
3267
+ succeed without any further processing. Matching then continues after the
3268
+ subroutine call.
3269
+ .P
3270
+ (*COMMIT), (*SKIP), and (*PRUNE) in a subpattern called as a subroutine cause
3271
+ the subroutine match to fail.
3272
+ .P
3273
+ (*THEN) skips to the next alternative in the innermost enclosing group within
3274
+ the subpattern that has alternatives. If there is no such group within the
3275
+ subpattern, (*THEN) causes the subroutine match to fail.
3276
+ .
3277
+ .
3278
+ .SH "SEE ALSO"
3279
+ .rs
3280
+ .sp
3281
+ \fBpcreapi\fP(3), \fBpcrecallout\fP(3), \fBpcrematching\fP(3),
3282
+ \fBpcresyntax\fP(3), \fBpcre\fP(3), \fBpcre16(3)\fP, \fBpcre32(3)\fP.
3283
+ .
3284
+ .
3285
+ .SH AUTHOR
3286
+ .rs
3287
+ .sp
3288
+ .nf
3289
+ Philip Hazel
3290
+ University Computing Service
3291
+ Cambridge CB2 3QH, England.
3292
+ .fi
3293
+ .
3294
+ .
3295
+ .SH REVISION
3296
+ .rs
3297
+ .sp
3298
+ .nf
3299
+ Last updated: 14 June 2015
3300
+ Copyright (c) 1997-2015 University of Cambridge.
3301
+ .fi