rroonga 6.0.7-x64-mingw32 → 6.0.9-x64-mingw32

Sign up to get free protection for your applications and to get access to all the features.
Files changed (797) hide show
  1. checksums.yaml +4 -4
  2. data/doc/text/cross-compile.md +24 -23
  3. data/doc/text/news.md +10 -0
  4. data/ext/groonga/rb-grn-database.c +33 -0
  5. data/ext/groonga/rb-grn-id.c +19 -0
  6. data/ext/groonga/rb-grn-table.c +3 -1
  7. data/ext/groonga/rb-grn.h +1 -1
  8. data/lib/2.1/groonga.so +0 -0
  9. data/lib/2.2/groonga.so +0 -0
  10. data/lib/2.3/groonga.so +0 -0
  11. data/rroonga-build.rb +3 -3
  12. data/test/test-database.rb +21 -1
  13. data/test/test-id.rb +16 -0
  14. data/vendor/local/bin/grndb.exe +0 -0
  15. data/vendor/local/bin/groonga-benchmark.exe +0 -0
  16. data/vendor/local/bin/groonga-suggest-create-dataset.exe +0 -0
  17. data/vendor/local/bin/groonga.exe +0 -0
  18. data/vendor/local/bin/libgroonga-0.dll +0 -0
  19. data/vendor/local/bin/libmecab-2.dll +0 -0
  20. data/vendor/local/bin/libmsgpackc.dll +0 -0
  21. data/vendor/local/bin/libonig-5.dll +0 -0
  22. data/vendor/local/bin/libpcre-1.dll +0 -0
  23. data/vendor/local/bin/libpcrecpp-0.dll +0 -0
  24. data/vendor/local/bin/libpcreposix-0.dll +0 -0
  25. data/vendor/local/bin/lz4.exe +0 -0
  26. data/vendor/local/bin/lz4c.exe +0 -0
  27. data/vendor/local/bin/lz4cat +0 -0
  28. data/vendor/local/bin/mecab.exe +0 -0
  29. data/vendor/local/bin/pcre-config +133 -0
  30. data/vendor/local/bin/pcregrep.exe +0 -0
  31. data/vendor/local/bin/pcretest.exe +0 -0
  32. data/vendor/local/bin/zlib1.dll +0 -0
  33. data/vendor/local/include/groonga/groonga/db.h +22 -0
  34. data/vendor/local/include/groonga/groonga/groonga.h +21 -1
  35. data/vendor/local/include/groonga/groonga/id.h +1 -0
  36. data/vendor/local/include/pcre.h +677 -0
  37. data/vendor/local/include/pcre_scanner.h +172 -0
  38. data/vendor/local/include/pcre_stringpiece.h +180 -0
  39. data/vendor/local/include/pcrecpp.h +710 -0
  40. data/vendor/local/include/pcrecpparg.h +174 -0
  41. data/vendor/local/include/pcreposix.h +146 -0
  42. data/vendor/local/lib/groonga/plugins/functions/number.a +0 -0
  43. data/vendor/local/lib/groonga/plugins/functions/number.dll +0 -0
  44. data/vendor/local/lib/groonga/plugins/functions/number.dll.a +0 -0
  45. data/vendor/local/lib/groonga/plugins/functions/string.a +0 -0
  46. data/vendor/local/lib/groonga/plugins/functions/string.dll +0 -0
  47. data/vendor/local/lib/groonga/plugins/functions/string.dll.a +0 -0
  48. data/vendor/local/lib/groonga/plugins/functions/time.a +0 -0
  49. data/vendor/local/lib/groonga/plugins/functions/time.dll +0 -0
  50. data/vendor/local/lib/groonga/plugins/functions/time.dll.a +0 -0
  51. data/vendor/local/lib/groonga/plugins/functions/vector.a +0 -0
  52. data/vendor/local/lib/groonga/plugins/functions/vector.dll +0 -0
  53. data/vendor/local/lib/groonga/plugins/functions/vector.dll.a +0 -0
  54. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.a +0 -0
  55. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll +0 -0
  56. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll.a +0 -0
  57. data/vendor/local/lib/groonga/plugins/sharding/logical_table_remove.rb +253 -23
  58. data/vendor/local/lib/groonga/plugins/suggest/suggest.a +0 -0
  59. data/vendor/local/lib/groonga/plugins/suggest/suggest.dll +0 -0
  60. data/vendor/local/lib/groonga/plugins/suggest/suggest.dll.a +0 -0
  61. data/vendor/local/lib/groonga/plugins/table/table.a +0 -0
  62. data/vendor/local/lib/groonga/plugins/table/table.dll +0 -0
  63. data/vendor/local/lib/groonga/plugins/table/table.dll.a +0 -0
  64. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.a +0 -0
  65. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll +0 -0
  66. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll.a +0 -0
  67. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.a +0 -0
  68. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll +0 -0
  69. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll.a +0 -0
  70. data/vendor/local/lib/groonga/scripts/ruby/context.rb +19 -0
  71. data/vendor/local/lib/groonga/scripts/ruby/context/rc.rb +12 -4
  72. data/vendor/local/lib/groonga/scripts/ruby/database.rb +36 -18
  73. data/vendor/local/lib/groonga/scripts/ruby/scan_info_data.rb +13 -10
  74. data/vendor/local/lib/libgroonga.a +0 -0
  75. data/vendor/local/lib/libgroonga.dll.a +0 -0
  76. data/vendor/local/lib/liblz4.a +0 -0
  77. data/vendor/local/lib/liblz4.dll +0 -0
  78. data/vendor/local/lib/liblz4.dll.1 +0 -0
  79. data/vendor/local/lib/liblz4.dll.1.5.0 +0 -0
  80. data/vendor/local/lib/libmecab.a +0 -0
  81. data/vendor/local/lib/libmecab.dll.a +0 -0
  82. data/vendor/local/lib/libmsgpackc.a +0 -0
  83. data/vendor/local/lib/libmsgpackc.dll.a +0 -0
  84. data/vendor/local/lib/libonig.a +0 -0
  85. data/vendor/local/lib/libonig.dll.a +0 -0
  86. data/vendor/local/lib/libpcre.a +0 -0
  87. data/vendor/local/lib/libpcre.dll.a +0 -0
  88. data/vendor/local/lib/libpcre.la +41 -0
  89. data/vendor/local/lib/libpcrecpp.a +0 -0
  90. data/vendor/local/lib/libpcrecpp.dll.a +0 -0
  91. data/vendor/local/lib/libpcrecpp.la +41 -0
  92. data/vendor/local/lib/libpcreposix.a +0 -0
  93. data/vendor/local/lib/libpcreposix.dll.a +0 -0
  94. data/vendor/local/lib/libpcreposix.la +41 -0
  95. data/vendor/local/lib/libz.a +0 -0
  96. data/vendor/local/lib/libz.dll.a +0 -0
  97. data/vendor/local/lib/pkgconfig/groonga.pc +2 -2
  98. data/vendor/local/lib/pkgconfig/libpcre.pc +13 -0
  99. data/vendor/local/lib/pkgconfig/libpcrecpp.pc +12 -0
  100. data/vendor/local/lib/pkgconfig/libpcreposix.pc +13 -0
  101. data/vendor/local/libexec/mecab/mecab-cost-train.exe +0 -0
  102. data/vendor/local/libexec/mecab/mecab-dict-gen.exe +0 -0
  103. data/vendor/local/libexec/mecab/mecab-dict-index.exe +0 -0
  104. data/vendor/local/libexec/mecab/mecab-system-eval.exe +0 -0
  105. data/vendor/local/libexec/mecab/mecab-test-gen.exe +0 -0
  106. data/vendor/local/sbin/groonga-httpd.exe +0 -0
  107. data/vendor/local/share/doc/groonga/en/html/.buildinfo +1 -1
  108. data/vendor/local/share/doc/groonga/en/html/_sources/install/centos.txt +3 -3
  109. data/vendor/local/share/doc/groonga/en/html/_sources/install/debian.txt +3 -3
  110. data/vendor/local/share/doc/groonga/en/html/_sources/install/fedora.txt +3 -3
  111. data/vendor/local/share/doc/groonga/en/html/_sources/install/mac_os_x.txt +3 -3
  112. data/vendor/local/share/doc/groonga/en/html/_sources/install/others.txt +3 -3
  113. data/vendor/local/share/doc/groonga/en/html/_sources/install/solaris.txt +3 -3
  114. data/vendor/local/share/doc/groonga/en/html/_sources/install/ubuntu.txt +3 -3
  115. data/vendor/local/share/doc/groonga/en/html/_sources/install/windows.txt +9 -9
  116. data/vendor/local/share/doc/groonga/en/html/_sources/limitations.txt +24 -5
  117. data/vendor/local/share/doc/groonga/en/html/_sources/news.txt +156 -4
  118. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/lock_acquire.txt +1 -1
  119. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/lock_release.txt +1 -1
  120. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_table_remove.txt +86 -0
  121. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/object_list.txt +23 -11
  122. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_copy.txt +64 -0
  123. data/vendor/local/share/doc/groonga/en/html/_sources/reference/tables.txt +88 -45
  124. data/vendor/local/share/doc/groonga/en/html/characteristic.html +5 -5
  125. data/vendor/local/share/doc/groonga/en/html/client.html +5 -5
  126. data/vendor/local/share/doc/groonga/en/html/community.html +5 -5
  127. data/vendor/local/share/doc/groonga/en/html/contribution.html +5 -5
  128. data/vendor/local/share/doc/groonga/en/html/contribution/development.html +5 -5
  129. data/vendor/local/share/doc/groonga/en/html/contribution/development/build.html +5 -5
  130. data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_autotools.html +5 -5
  131. data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_cmake.html +5 -5
  132. data/vendor/local/share/doc/groonga/en/html/contribution/development/build/windows_cmake.html +5 -5
  133. data/vendor/local/share/doc/groonga/en/html/contribution/development/com.html +5 -5
  134. data/vendor/local/share/doc/groonga/en/html/contribution/development/cooperation.html +5 -5
  135. data/vendor/local/share/doc/groonga/en/html/contribution/development/query.html +5 -5
  136. data/vendor/local/share/doc/groonga/en/html/contribution/development/release.html +5 -5
  137. data/vendor/local/share/doc/groonga/en/html/contribution/development/repository.html +5 -5
  138. data/vendor/local/share/doc/groonga/en/html/contribution/development/test.html +5 -5
  139. data/vendor/local/share/doc/groonga/en/html/contribution/documentation.html +5 -5
  140. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/c-api.html +5 -5
  141. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/i18n.html +5 -5
  142. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/introduction.html +5 -5
  143. data/vendor/local/share/doc/groonga/en/html/contribution/report.html +5 -5
  144. data/vendor/local/share/doc/groonga/en/html/development.html +5 -5
  145. data/vendor/local/share/doc/groonga/en/html/development/travis-ci.html +5 -5
  146. data/vendor/local/share/doc/groonga/en/html/genindex.html +5 -5
  147. data/vendor/local/share/doc/groonga/en/html/index.html +15 -14
  148. data/vendor/local/share/doc/groonga/en/html/install.html +5 -5
  149. data/vendor/local/share/doc/groonga/en/html/install/centos.html +8 -8
  150. data/vendor/local/share/doc/groonga/en/html/install/debian.html +8 -8
  151. data/vendor/local/share/doc/groonga/en/html/install/fedora.html +8 -8
  152. data/vendor/local/share/doc/groonga/en/html/install/mac_os_x.html +8 -8
  153. data/vendor/local/share/doc/groonga/en/html/install/others.html +8 -8
  154. data/vendor/local/share/doc/groonga/en/html/install/solaris.html +8 -8
  155. data/vendor/local/share/doc/groonga/en/html/install/ubuntu.html +8 -8
  156. data/vendor/local/share/doc/groonga/en/html/install/windows.html +14 -14
  157. data/vendor/local/share/doc/groonga/en/html/limitations.html +28 -9
  158. data/vendor/local/share/doc/groonga/en/html/news.html +196 -61
  159. data/vendor/local/share/doc/groonga/en/html/news/0.x.html +5 -5
  160. data/vendor/local/share/doc/groonga/en/html/news/1.0.x.html +5 -5
  161. data/vendor/local/share/doc/groonga/en/html/news/1.1.x.html +5 -5
  162. data/vendor/local/share/doc/groonga/en/html/news/1.2.x.html +5 -5
  163. data/vendor/local/share/doc/groonga/en/html/news/1.3.x.html +5 -5
  164. data/vendor/local/share/doc/groonga/en/html/news/2.x.html +5 -5
  165. data/vendor/local/share/doc/groonga/en/html/news/3.x.html +5 -5
  166. data/vendor/local/share/doc/groonga/en/html/news/4.x.html +5 -5
  167. data/vendor/local/share/doc/groonga/en/html/news/5.x.html +5 -5
  168. data/vendor/local/share/doc/groonga/en/html/news/senna.html +5 -5
  169. data/vendor/local/share/doc/groonga/en/html/objects.inv +0 -0
  170. data/vendor/local/share/doc/groonga/en/html/reference.html +15 -14
  171. data/vendor/local/share/doc/groonga/en/html/reference/alias.html +5 -5
  172. data/vendor/local/share/doc/groonga/en/html/reference/api.html +5 -5
  173. data/vendor/local/share/doc/groonga/en/html/reference/api/global_configurations.html +5 -5
  174. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_cache.html +5 -5
  175. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_column.html +5 -5
  176. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_command_version.html +5 -5
  177. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_content_type.html +5 -5
  178. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ctx.html +5 -5
  179. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_db.html +5 -5
  180. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_encoding.html +5 -5
  181. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_expr.html +5 -5
  182. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_geo.html +5 -5
  183. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_hook.html +5 -5
  184. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ii.html +5 -5
  185. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_index_cursor.html +5 -5
  186. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_info.html +5 -5
  187. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_match_escalation.html +5 -5
  188. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_obj.html +5 -5
  189. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_proc.html +5 -5
  190. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_search.html +5 -5
  191. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table.html +5 -5
  192. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table_cursor.html +5 -5
  193. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_thread.html +5 -5
  194. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_type.html +5 -5
  195. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_user_data.html +5 -5
  196. data/vendor/local/share/doc/groonga/en/html/reference/api/overview.html +5 -5
  197. data/vendor/local/share/doc/groonga/en/html/reference/api/plugin.html +5 -5
  198. data/vendor/local/share/doc/groonga/en/html/reference/cast.html +5 -5
  199. data/vendor/local/share/doc/groonga/en/html/reference/column.html +5 -5
  200. data/vendor/local/share/doc/groonga/en/html/reference/columns/index.html +5 -5
  201. data/vendor/local/share/doc/groonga/en/html/reference/columns/pseudo.html +5 -5
  202. data/vendor/local/share/doc/groonga/en/html/reference/columns/scalar.html +5 -5
  203. data/vendor/local/share/doc/groonga/en/html/reference/columns/vector.html +5 -5
  204. data/vendor/local/share/doc/groonga/en/html/reference/command.html +15 -14
  205. data/vendor/local/share/doc/groonga/en/html/reference/command/command_version.html +5 -5
  206. data/vendor/local/share/doc/groonga/en/html/reference/command/output_format.html +5 -5
  207. data/vendor/local/share/doc/groonga/en/html/reference/command/pretty_print.html +5 -5
  208. data/vendor/local/share/doc/groonga/en/html/reference/command/request_id.html +5 -5
  209. data/vendor/local/share/doc/groonga/en/html/reference/command/request_timeout.html +5 -5
  210. data/vendor/local/share/doc/groonga/en/html/reference/command/return_code.html +5 -5
  211. data/vendor/local/share/doc/groonga/en/html/reference/commands/cache_limit.html +5 -5
  212. data/vendor/local/share/doc/groonga/en/html/reference/commands/check.html +5 -5
  213. data/vendor/local/share/doc/groonga/en/html/reference/commands/clearlock.html +5 -5
  214. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_copy.html +5 -5
  215. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_create.html +5 -5
  216. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_list.html +5 -5
  217. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_remove.html +5 -5
  218. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_rename.html +5 -5
  219. data/vendor/local/share/doc/groonga/en/html/reference/commands/config_delete.html +5 -5
  220. data/vendor/local/share/doc/groonga/en/html/reference/commands/config_get.html +5 -5
  221. data/vendor/local/share/doc/groonga/en/html/reference/commands/config_set.html +5 -5
  222. data/vendor/local/share/doc/groonga/en/html/reference/commands/database_unmap.html +5 -5
  223. data/vendor/local/share/doc/groonga/en/html/reference/commands/define_selector.html +5 -5
  224. data/vendor/local/share/doc/groonga/en/html/reference/commands/defrag.html +5 -5
  225. data/vendor/local/share/doc/groonga/en/html/reference/commands/delete.html +5 -5
  226. data/vendor/local/share/doc/groonga/en/html/reference/commands/dump.html +5 -5
  227. data/vendor/local/share/doc/groonga/en/html/reference/commands/io_flush.html +5 -5
  228. data/vendor/local/share/doc/groonga/en/html/reference/commands/load.html +5 -5
  229. data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_acquire.html +6 -6
  230. data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_clear.html +5 -5
  231. data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_release.html +6 -6
  232. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_level.html +5 -5
  233. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_put.html +5 -5
  234. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_reopen.html +5 -5
  235. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_count.html +5 -5
  236. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_parameters.html +5 -5
  237. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_range_filter.html +5 -5
  238. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_select.html +5 -5
  239. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_shard_list.html +5 -5
  240. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_table_remove.html +98 -8
  241. data/vendor/local/share/doc/groonga/en/html/reference/commands/normalize.html +5 -5
  242. data/vendor/local/share/doc/groonga/en/html/reference/commands/normalizer_list.html +5 -5
  243. data/vendor/local/share/doc/groonga/en/html/reference/commands/object_exist.html +5 -5
  244. data/vendor/local/share/doc/groonga/en/html/reference/commands/object_inspect.html +5 -5
  245. data/vendor/local/share/doc/groonga/en/html/reference/commands/object_list.html +32 -18
  246. data/vendor/local/share/doc/groonga/en/html/reference/commands/object_remove.html +5 -5
  247. data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_register.html +5 -5
  248. data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_unregister.html +5 -5
  249. data/vendor/local/share/doc/groonga/en/html/reference/commands/query_expand.html +5 -5
  250. data/vendor/local/share/doc/groonga/en/html/reference/commands/quit.html +5 -5
  251. data/vendor/local/share/doc/groonga/en/html/reference/commands/range_filter.html +5 -5
  252. data/vendor/local/share/doc/groonga/en/html/reference/commands/register.html +5 -5
  253. data/vendor/local/share/doc/groonga/en/html/reference/commands/reindex.html +5 -5
  254. data/vendor/local/share/doc/groonga/en/html/reference/commands/request_cancel.html +5 -5
  255. data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_eval.html +5 -5
  256. data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_load.html +5 -5
  257. data/vendor/local/share/doc/groonga/en/html/reference/commands/schema.html +5 -5
  258. data/vendor/local/share/doc/groonga/en/html/reference/commands/select.html +5 -5
  259. data/vendor/local/share/doc/groonga/en/html/reference/commands/shutdown.html +5 -5
  260. data/vendor/local/share/doc/groonga/en/html/reference/commands/status.html +5 -5
  261. data/vendor/local/share/doc/groonga/en/html/reference/commands/suggest.html +10 -10
  262. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_copy.html +200 -0
  263. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_create.html +52 -52
  264. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_list.html +25 -25
  265. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_remove.html +41 -41
  266. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_rename.html +31 -31
  267. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_tokenize.html +41 -41
  268. data/vendor/local/share/doc/groonga/en/html/reference/commands/thread_limit.html +31 -31
  269. data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenize.html +43 -43
  270. data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenizer_list.html +25 -25
  271. data/vendor/local/share/doc/groonga/en/html/reference/commands/truncate.html +25 -25
  272. data/vendor/local/share/doc/groonga/en/html/reference/configuration.html +5 -5
  273. data/vendor/local/share/doc/groonga/en/html/reference/executables.html +5 -5
  274. data/vendor/local/share/doc/groonga/en/html/reference/executables/grndb.html +5 -5
  275. data/vendor/local/share/doc/groonga/en/html/reference/executables/grnslap.html +5 -5
  276. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-benchmark.html +5 -5
  277. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-httpd.html +5 -5
  278. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-server-http.html +5 -5
  279. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-create-dataset.html +5 -5
  280. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-httpd.html +5 -5
  281. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-learner.html +5 -5
  282. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga.html +5 -5
  283. data/vendor/local/share/doc/groonga/en/html/reference/function.html +5 -5
  284. data/vendor/local/share/doc/groonga/en/html/reference/functions/between.html +5 -5
  285. data/vendor/local/share/doc/groonga/en/html/reference/functions/edit_distance.html +5 -5
  286. data/vendor/local/share/doc/groonga/en/html/reference/functions/fuzzy_search.html +5 -5
  287. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_distance.html +5 -5
  288. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_circle.html +5 -5
  289. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_rectangle.html +5 -5
  290. data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_full.html +5 -5
  291. data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_html.html +5 -5
  292. data/vendor/local/share/doc/groonga/en/html/reference/functions/html_untag.html +5 -5
  293. data/vendor/local/share/doc/groonga/en/html/reference/functions/in_values.html +5 -5
  294. data/vendor/local/share/doc/groonga/en/html/reference/functions/now.html +5 -5
  295. data/vendor/local/share/doc/groonga/en/html/reference/functions/number_classify.html +5 -5
  296. data/vendor/local/share/doc/groonga/en/html/reference/functions/prefix_rk_search.html +5 -5
  297. data/vendor/local/share/doc/groonga/en/html/reference/functions/query.html +5 -5
  298. data/vendor/local/share/doc/groonga/en/html/reference/functions/rand.html +5 -5
  299. data/vendor/local/share/doc/groonga/en/html/reference/functions/record_number.html +5 -5
  300. data/vendor/local/share/doc/groonga/en/html/reference/functions/snippet_html.html +5 -5
  301. data/vendor/local/share/doc/groonga/en/html/reference/functions/string_substring.html +5 -5
  302. data/vendor/local/share/doc/groonga/en/html/reference/functions/sub_filter.html +5 -5
  303. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_day.html +5 -5
  304. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_hour.html +5 -5
  305. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_minute.html +5 -5
  306. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_month.html +5 -5
  307. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_second.html +5 -5
  308. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_week.html +5 -5
  309. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_year.html +5 -5
  310. data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_size.html +5 -5
  311. data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_slice.html +5 -5
  312. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr.html +5 -5
  313. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/query_syntax.html +5 -5
  314. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/script_syntax.html +5 -5
  315. data/vendor/local/share/doc/groonga/en/html/reference/indexing.html +5 -5
  316. data/vendor/local/share/doc/groonga/en/html/reference/log.html +5 -5
  317. data/vendor/local/share/doc/groonga/en/html/reference/normalizers.html +5 -5
  318. data/vendor/local/share/doc/groonga/en/html/reference/operations.html +5 -5
  319. data/vendor/local/share/doc/groonga/en/html/reference/operations/geolocation_search.html +5 -5
  320. data/vendor/local/share/doc/groonga/en/html/reference/operations/prefix_rk_search.html +5 -5
  321. data/vendor/local/share/doc/groonga/en/html/reference/output.html +5 -5
  322. data/vendor/local/share/doc/groonga/en/html/reference/query_expanders.html +5 -5
  323. data/vendor/local/share/doc/groonga/en/html/reference/query_expanders/tsv.html +5 -5
  324. data/vendor/local/share/doc/groonga/en/html/reference/regular_expression.html +5 -5
  325. data/vendor/local/share/doc/groonga/en/html/reference/scorer.html +5 -5
  326. data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_at_most.html +5 -5
  327. data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_idf.html +5 -5
  328. data/vendor/local/share/doc/groonga/en/html/reference/sharding.html +5 -5
  329. data/vendor/local/share/doc/groonga/en/html/reference/suggest.html +5 -5
  330. data/vendor/local/share/doc/groonga/en/html/reference/suggest/completion.html +5 -5
  331. data/vendor/local/share/doc/groonga/en/html/reference/suggest/correction.html +5 -5
  332. data/vendor/local/share/doc/groonga/en/html/reference/suggest/introduction.html +5 -5
  333. data/vendor/local/share/doc/groonga/en/html/reference/suggest/suggestion.html +5 -5
  334. data/vendor/local/share/doc/groonga/en/html/reference/tables.html +41 -34
  335. data/vendor/local/share/doc/groonga/en/html/reference/token_filters.html +5 -5
  336. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers.html +5 -5
  337. data/vendor/local/share/doc/groonga/en/html/reference/tuning.html +5 -5
  338. data/vendor/local/share/doc/groonga/en/html/reference/types.html +9 -9
  339. data/vendor/local/share/doc/groonga/en/html/search.html +5 -5
  340. data/vendor/local/share/doc/groonga/en/html/searchindex.js +1 -1
  341. data/vendor/local/share/doc/groonga/en/html/server.html +5 -5
  342. data/vendor/local/share/doc/groonga/en/html/server/gqtp.html +5 -5
  343. data/vendor/local/share/doc/groonga/en/html/server/http.html +5 -5
  344. data/vendor/local/share/doc/groonga/en/html/server/http/comparison.html +5 -5
  345. data/vendor/local/share/doc/groonga/en/html/server/http/groonga-httpd.html +5 -5
  346. data/vendor/local/share/doc/groonga/en/html/server/http/groonga.html +5 -5
  347. data/vendor/local/share/doc/groonga/en/html/server/memcached.html +5 -5
  348. data/vendor/local/share/doc/groonga/en/html/server/package.html +5 -5
  349. data/vendor/local/share/doc/groonga/en/html/spec.html +5 -5
  350. data/vendor/local/share/doc/groonga/en/html/spec/gqtp.html +5 -5
  351. data/vendor/local/share/doc/groonga/en/html/spec/search.html +5 -5
  352. data/vendor/local/share/doc/groonga/en/html/troubleshooting.html +5 -5
  353. data/vendor/local/share/doc/groonga/en/html/troubleshooting/different_results_with_the_same_keyword.html +5 -5
  354. data/vendor/local/share/doc/groonga/en/html/troubleshooting/mmap_cannot_allocate_memory.html +5 -5
  355. data/vendor/local/share/doc/groonga/en/html/tutorial.html +5 -5
  356. data/vendor/local/share/doc/groonga/en/html/tutorial/data.html +5 -5
  357. data/vendor/local/share/doc/groonga/en/html/tutorial/drilldown.html +5 -5
  358. data/vendor/local/share/doc/groonga/en/html/tutorial/index.html +5 -5
  359. data/vendor/local/share/doc/groonga/en/html/tutorial/introduction.html +5 -5
  360. data/vendor/local/share/doc/groonga/en/html/tutorial/lexicon.html +5 -5
  361. data/vendor/local/share/doc/groonga/en/html/tutorial/match_columns.html +5 -5
  362. data/vendor/local/share/doc/groonga/en/html/tutorial/micro_blog.html +5 -5
  363. data/vendor/local/share/doc/groonga/en/html/tutorial/network.html +5 -5
  364. data/vendor/local/share/doc/groonga/en/html/tutorial/patricia_trie.html +5 -5
  365. data/vendor/local/share/doc/groonga/en/html/tutorial/query_expansion.html +5 -5
  366. data/vendor/local/share/doc/groonga/en/html/tutorial/search.html +5 -5
  367. data/vendor/local/share/doc/groonga/ja/html/.buildinfo +1 -1
  368. data/vendor/local/share/doc/groonga/ja/html/_sources/install/centos.txt +3 -3
  369. data/vendor/local/share/doc/groonga/ja/html/_sources/install/debian.txt +3 -3
  370. data/vendor/local/share/doc/groonga/ja/html/_sources/install/fedora.txt +3 -3
  371. data/vendor/local/share/doc/groonga/ja/html/_sources/install/mac_os_x.txt +3 -3
  372. data/vendor/local/share/doc/groonga/ja/html/_sources/install/others.txt +3 -3
  373. data/vendor/local/share/doc/groonga/ja/html/_sources/install/solaris.txt +3 -3
  374. data/vendor/local/share/doc/groonga/ja/html/_sources/install/ubuntu.txt +3 -3
  375. data/vendor/local/share/doc/groonga/ja/html/_sources/install/windows.txt +9 -9
  376. data/vendor/local/share/doc/groonga/ja/html/_sources/limitations.txt +24 -5
  377. data/vendor/local/share/doc/groonga/ja/html/_sources/news.txt +156 -4
  378. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/lock_acquire.txt +1 -1
  379. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/lock_release.txt +1 -1
  380. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_table_remove.txt +86 -0
  381. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/object_list.txt +23 -11
  382. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_copy.txt +64 -0
  383. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tables.txt +88 -45
  384. data/vendor/local/share/doc/groonga/ja/html/characteristic.html +5 -5
  385. data/vendor/local/share/doc/groonga/ja/html/client.html +5 -5
  386. data/vendor/local/share/doc/groonga/ja/html/community.html +5 -5
  387. data/vendor/local/share/doc/groonga/ja/html/contribution.html +5 -5
  388. data/vendor/local/share/doc/groonga/ja/html/contribution/development.html +5 -5
  389. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build.html +5 -5
  390. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_autotools.html +5 -5
  391. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_cmake.html +5 -5
  392. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/windows_cmake.html +5 -5
  393. data/vendor/local/share/doc/groonga/ja/html/contribution/development/com.html +5 -5
  394. data/vendor/local/share/doc/groonga/ja/html/contribution/development/cooperation.html +5 -5
  395. data/vendor/local/share/doc/groonga/ja/html/contribution/development/query.html +5 -5
  396. data/vendor/local/share/doc/groonga/ja/html/contribution/development/release.html +5 -5
  397. data/vendor/local/share/doc/groonga/ja/html/contribution/development/repository.html +5 -5
  398. data/vendor/local/share/doc/groonga/ja/html/contribution/development/test.html +5 -5
  399. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation.html +5 -5
  400. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/c-api.html +5 -5
  401. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/i18n.html +5 -5
  402. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/introduction.html +5 -5
  403. data/vendor/local/share/doc/groonga/ja/html/contribution/report.html +5 -5
  404. data/vendor/local/share/doc/groonga/ja/html/development.html +5 -5
  405. data/vendor/local/share/doc/groonga/ja/html/development/travis-ci.html +5 -5
  406. data/vendor/local/share/doc/groonga/ja/html/genindex.html +5 -5
  407. data/vendor/local/share/doc/groonga/ja/html/index.html +15 -14
  408. data/vendor/local/share/doc/groonga/ja/html/install.html +5 -5
  409. data/vendor/local/share/doc/groonga/ja/html/install/centos.html +8 -8
  410. data/vendor/local/share/doc/groonga/ja/html/install/debian.html +8 -8
  411. data/vendor/local/share/doc/groonga/ja/html/install/fedora.html +8 -8
  412. data/vendor/local/share/doc/groonga/ja/html/install/mac_os_x.html +8 -8
  413. data/vendor/local/share/doc/groonga/ja/html/install/others.html +8 -8
  414. data/vendor/local/share/doc/groonga/ja/html/install/solaris.html +8 -8
  415. data/vendor/local/share/doc/groonga/ja/html/install/ubuntu.html +8 -8
  416. data/vendor/local/share/doc/groonga/ja/html/install/windows.html +14 -14
  417. data/vendor/local/share/doc/groonga/ja/html/limitations.html +21 -8
  418. data/vendor/local/share/doc/groonga/ja/html/news.html +185 -61
  419. data/vendor/local/share/doc/groonga/ja/html/news/0.x.html +5 -5
  420. data/vendor/local/share/doc/groonga/ja/html/news/1.0.x.html +5 -5
  421. data/vendor/local/share/doc/groonga/ja/html/news/1.1.x.html +5 -5
  422. data/vendor/local/share/doc/groonga/ja/html/news/1.2.x.html +5 -5
  423. data/vendor/local/share/doc/groonga/ja/html/news/1.3.x.html +5 -5
  424. data/vendor/local/share/doc/groonga/ja/html/news/2.x.html +5 -5
  425. data/vendor/local/share/doc/groonga/ja/html/news/3.x.html +5 -5
  426. data/vendor/local/share/doc/groonga/ja/html/news/4.x.html +5 -5
  427. data/vendor/local/share/doc/groonga/ja/html/news/5.x.html +5 -5
  428. data/vendor/local/share/doc/groonga/ja/html/news/senna.html +5 -5
  429. data/vendor/local/share/doc/groonga/ja/html/objects.inv +0 -0
  430. data/vendor/local/share/doc/groonga/ja/html/reference.html +15 -14
  431. data/vendor/local/share/doc/groonga/ja/html/reference/alias.html +5 -5
  432. data/vendor/local/share/doc/groonga/ja/html/reference/api.html +5 -5
  433. data/vendor/local/share/doc/groonga/ja/html/reference/api/global_configurations.html +5 -5
  434. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_cache.html +5 -5
  435. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_column.html +5 -5
  436. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_command_version.html +5 -5
  437. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_content_type.html +5 -5
  438. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ctx.html +5 -5
  439. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_db.html +5 -5
  440. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_encoding.html +5 -5
  441. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_expr.html +5 -5
  442. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_geo.html +5 -5
  443. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_hook.html +5 -5
  444. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ii.html +5 -5
  445. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_index_cursor.html +5 -5
  446. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_info.html +5 -5
  447. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_match_escalation.html +5 -5
  448. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_obj.html +5 -5
  449. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_proc.html +5 -5
  450. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_search.html +5 -5
  451. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table.html +5 -5
  452. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table_cursor.html +5 -5
  453. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_thread.html +5 -5
  454. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_type.html +5 -5
  455. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_user_data.html +5 -5
  456. data/vendor/local/share/doc/groonga/ja/html/reference/api/overview.html +5 -5
  457. data/vendor/local/share/doc/groonga/ja/html/reference/api/plugin.html +5 -5
  458. data/vendor/local/share/doc/groonga/ja/html/reference/cast.html +5 -5
  459. data/vendor/local/share/doc/groonga/ja/html/reference/column.html +5 -5
  460. data/vendor/local/share/doc/groonga/ja/html/reference/columns/index.html +5 -5
  461. data/vendor/local/share/doc/groonga/ja/html/reference/columns/pseudo.html +5 -5
  462. data/vendor/local/share/doc/groonga/ja/html/reference/columns/scalar.html +5 -5
  463. data/vendor/local/share/doc/groonga/ja/html/reference/columns/vector.html +5 -5
  464. data/vendor/local/share/doc/groonga/ja/html/reference/command.html +15 -14
  465. data/vendor/local/share/doc/groonga/ja/html/reference/command/command_version.html +5 -5
  466. data/vendor/local/share/doc/groonga/ja/html/reference/command/output_format.html +5 -5
  467. data/vendor/local/share/doc/groonga/ja/html/reference/command/pretty_print.html +5 -5
  468. data/vendor/local/share/doc/groonga/ja/html/reference/command/request_id.html +5 -5
  469. data/vendor/local/share/doc/groonga/ja/html/reference/command/request_timeout.html +5 -5
  470. data/vendor/local/share/doc/groonga/ja/html/reference/command/return_code.html +5 -5
  471. data/vendor/local/share/doc/groonga/ja/html/reference/commands/cache_limit.html +5 -5
  472. data/vendor/local/share/doc/groonga/ja/html/reference/commands/check.html +5 -5
  473. data/vendor/local/share/doc/groonga/ja/html/reference/commands/clearlock.html +5 -5
  474. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_copy.html +5 -5
  475. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_create.html +5 -5
  476. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_list.html +5 -5
  477. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_remove.html +5 -5
  478. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_rename.html +5 -5
  479. data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_delete.html +5 -5
  480. data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_get.html +5 -5
  481. data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_set.html +5 -5
  482. data/vendor/local/share/doc/groonga/ja/html/reference/commands/database_unmap.html +5 -5
  483. data/vendor/local/share/doc/groonga/ja/html/reference/commands/define_selector.html +5 -5
  484. data/vendor/local/share/doc/groonga/ja/html/reference/commands/defrag.html +5 -5
  485. data/vendor/local/share/doc/groonga/ja/html/reference/commands/delete.html +5 -5
  486. data/vendor/local/share/doc/groonga/ja/html/reference/commands/dump.html +5 -5
  487. data/vendor/local/share/doc/groonga/ja/html/reference/commands/io_flush.html +5 -5
  488. data/vendor/local/share/doc/groonga/ja/html/reference/commands/load.html +5 -5
  489. data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_acquire.html +6 -6
  490. data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_clear.html +5 -5
  491. data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_release.html +6 -6
  492. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_level.html +5 -5
  493. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_put.html +5 -5
  494. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_reopen.html +5 -5
  495. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_count.html +5 -5
  496. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_parameters.html +5 -5
  497. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_range_filter.html +5 -5
  498. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_select.html +5 -5
  499. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_shard_list.html +5 -5
  500. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_table_remove.html +88 -8
  501. data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalize.html +5 -5
  502. data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalizer_list.html +5 -5
  503. data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_exist.html +5 -5
  504. data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_inspect.html +5 -5
  505. data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_list.html +103 -103
  506. data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_remove.html +6 -6
  507. data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_register.html +5 -5
  508. data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_unregister.html +5 -5
  509. data/vendor/local/share/doc/groonga/ja/html/reference/commands/query_expand.html +5 -5
  510. data/vendor/local/share/doc/groonga/ja/html/reference/commands/quit.html +5 -5
  511. data/vendor/local/share/doc/groonga/ja/html/reference/commands/range_filter.html +5 -5
  512. data/vendor/local/share/doc/groonga/ja/html/reference/commands/register.html +5 -5
  513. data/vendor/local/share/doc/groonga/ja/html/reference/commands/reindex.html +5 -5
  514. data/vendor/local/share/doc/groonga/ja/html/reference/commands/request_cancel.html +5 -5
  515. data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_eval.html +5 -5
  516. data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_load.html +5 -5
  517. data/vendor/local/share/doc/groonga/ja/html/reference/commands/schema.html +5 -5
  518. data/vendor/local/share/doc/groonga/ja/html/reference/commands/select.html +5 -5
  519. data/vendor/local/share/doc/groonga/ja/html/reference/commands/shutdown.html +5 -5
  520. data/vendor/local/share/doc/groonga/ja/html/reference/commands/status.html +5 -5
  521. data/vendor/local/share/doc/groonga/ja/html/reference/commands/suggest.html +10 -10
  522. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_copy.html +201 -0
  523. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_create.html +52 -52
  524. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_list.html +25 -25
  525. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_remove.html +41 -41
  526. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_rename.html +31 -31
  527. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_tokenize.html +41 -41
  528. data/vendor/local/share/doc/groonga/ja/html/reference/commands/thread_limit.html +31 -31
  529. data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenize.html +43 -43
  530. data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenizer_list.html +25 -25
  531. data/vendor/local/share/doc/groonga/ja/html/reference/commands/truncate.html +25 -25
  532. data/vendor/local/share/doc/groonga/ja/html/reference/configuration.html +5 -5
  533. data/vendor/local/share/doc/groonga/ja/html/reference/executables.html +5 -5
  534. data/vendor/local/share/doc/groonga/ja/html/reference/executables/grndb.html +5 -5
  535. data/vendor/local/share/doc/groonga/ja/html/reference/executables/grnslap.html +5 -5
  536. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-benchmark.html +5 -5
  537. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-httpd.html +5 -5
  538. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-server-http.html +5 -5
  539. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-create-dataset.html +5 -5
  540. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-httpd.html +6 -6
  541. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-learner.html +5 -5
  542. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga.html +5 -5
  543. data/vendor/local/share/doc/groonga/ja/html/reference/function.html +5 -5
  544. data/vendor/local/share/doc/groonga/ja/html/reference/functions/between.html +5 -5
  545. data/vendor/local/share/doc/groonga/ja/html/reference/functions/edit_distance.html +5 -5
  546. data/vendor/local/share/doc/groonga/ja/html/reference/functions/fuzzy_search.html +5 -5
  547. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_distance.html +5 -5
  548. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_circle.html +5 -5
  549. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_rectangle.html +5 -5
  550. data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_full.html +5 -5
  551. data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_html.html +5 -5
  552. data/vendor/local/share/doc/groonga/ja/html/reference/functions/html_untag.html +5 -5
  553. data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_values.html +5 -5
  554. data/vendor/local/share/doc/groonga/ja/html/reference/functions/now.html +5 -5
  555. data/vendor/local/share/doc/groonga/ja/html/reference/functions/number_classify.html +5 -5
  556. data/vendor/local/share/doc/groonga/ja/html/reference/functions/prefix_rk_search.html +5 -5
  557. data/vendor/local/share/doc/groonga/ja/html/reference/functions/query.html +5 -5
  558. data/vendor/local/share/doc/groonga/ja/html/reference/functions/rand.html +5 -5
  559. data/vendor/local/share/doc/groonga/ja/html/reference/functions/record_number.html +5 -5
  560. data/vendor/local/share/doc/groonga/ja/html/reference/functions/snippet_html.html +5 -5
  561. data/vendor/local/share/doc/groonga/ja/html/reference/functions/string_substring.html +5 -5
  562. data/vendor/local/share/doc/groonga/ja/html/reference/functions/sub_filter.html +5 -5
  563. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_day.html +5 -5
  564. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_hour.html +5 -5
  565. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_minute.html +5 -5
  566. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_month.html +5 -5
  567. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_second.html +5 -5
  568. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_week.html +5 -5
  569. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_year.html +5 -5
  570. data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_size.html +5 -5
  571. data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_slice.html +5 -5
  572. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr.html +5 -5
  573. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/query_syntax.html +5 -5
  574. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/script_syntax.html +5 -5
  575. data/vendor/local/share/doc/groonga/ja/html/reference/indexing.html +5 -5
  576. data/vendor/local/share/doc/groonga/ja/html/reference/log.html +5 -5
  577. data/vendor/local/share/doc/groonga/ja/html/reference/normalizers.html +5 -5
  578. data/vendor/local/share/doc/groonga/ja/html/reference/operations.html +5 -5
  579. data/vendor/local/share/doc/groonga/ja/html/reference/operations/geolocation_search.html +5 -5
  580. data/vendor/local/share/doc/groonga/ja/html/reference/operations/prefix_rk_search.html +5 -5
  581. data/vendor/local/share/doc/groonga/ja/html/reference/output.html +5 -5
  582. data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders.html +5 -5
  583. data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders/tsv.html +5 -5
  584. data/vendor/local/share/doc/groonga/ja/html/reference/regular_expression.html +5 -5
  585. data/vendor/local/share/doc/groonga/ja/html/reference/scorer.html +5 -5
  586. data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_at_most.html +5 -5
  587. data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_idf.html +5 -5
  588. data/vendor/local/share/doc/groonga/ja/html/reference/sharding.html +5 -5
  589. data/vendor/local/share/doc/groonga/ja/html/reference/suggest.html +5 -5
  590. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/completion.html +5 -5
  591. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/correction.html +5 -5
  592. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/introduction.html +5 -5
  593. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/suggestion.html +5 -5
  594. data/vendor/local/share/doc/groonga/ja/html/reference/tables.html +33 -10
  595. data/vendor/local/share/doc/groonga/ja/html/reference/token_filters.html +5 -5
  596. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers.html +5 -5
  597. data/vendor/local/share/doc/groonga/ja/html/reference/tuning.html +5 -5
  598. data/vendor/local/share/doc/groonga/ja/html/reference/types.html +9 -9
  599. data/vendor/local/share/doc/groonga/ja/html/search.html +5 -5
  600. data/vendor/local/share/doc/groonga/ja/html/searchindex.js +1 -1
  601. data/vendor/local/share/doc/groonga/ja/html/server.html +5 -5
  602. data/vendor/local/share/doc/groonga/ja/html/server/gqtp.html +5 -5
  603. data/vendor/local/share/doc/groonga/ja/html/server/http.html +5 -5
  604. data/vendor/local/share/doc/groonga/ja/html/server/http/comparison.html +5 -5
  605. data/vendor/local/share/doc/groonga/ja/html/server/http/groonga-httpd.html +5 -5
  606. data/vendor/local/share/doc/groonga/ja/html/server/http/groonga.html +5 -5
  607. data/vendor/local/share/doc/groonga/ja/html/server/memcached.html +5 -5
  608. data/vendor/local/share/doc/groonga/ja/html/server/package.html +5 -5
  609. data/vendor/local/share/doc/groonga/ja/html/spec.html +5 -5
  610. data/vendor/local/share/doc/groonga/ja/html/spec/gqtp.html +5 -5
  611. data/vendor/local/share/doc/groonga/ja/html/spec/search.html +5 -5
  612. data/vendor/local/share/doc/groonga/ja/html/troubleshooting.html +5 -5
  613. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/different_results_with_the_same_keyword.html +5 -5
  614. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/mmap_cannot_allocate_memory.html +5 -5
  615. data/vendor/local/share/doc/groonga/ja/html/tutorial.html +5 -5
  616. data/vendor/local/share/doc/groonga/ja/html/tutorial/data.html +5 -5
  617. data/vendor/local/share/doc/groonga/ja/html/tutorial/drilldown.html +5 -5
  618. data/vendor/local/share/doc/groonga/ja/html/tutorial/index.html +5 -5
  619. data/vendor/local/share/doc/groonga/ja/html/tutorial/introduction.html +5 -5
  620. data/vendor/local/share/doc/groonga/ja/html/tutorial/lexicon.html +5 -5
  621. data/vendor/local/share/doc/groonga/ja/html/tutorial/match_columns.html +5 -5
  622. data/vendor/local/share/doc/groonga/ja/html/tutorial/micro_blog.html +5 -5
  623. data/vendor/local/share/doc/groonga/ja/html/tutorial/network.html +5 -5
  624. data/vendor/local/share/doc/groonga/ja/html/tutorial/patricia_trie.html +5 -5
  625. data/vendor/local/share/doc/groonga/ja/html/tutorial/query_expansion.html +5 -5
  626. data/vendor/local/share/doc/groonga/ja/html/tutorial/search.html +5 -5
  627. data/vendor/local/share/doc/pcre/AUTHORS +45 -0
  628. data/vendor/local/share/doc/pcre/COPYING +5 -0
  629. data/vendor/local/share/doc/pcre/ChangeLog +6010 -0
  630. data/vendor/local/share/doc/pcre/LICENCE +93 -0
  631. data/vendor/local/share/doc/pcre/NEWS +725 -0
  632. data/vendor/local/share/doc/pcre/README +1002 -0
  633. data/vendor/local/share/doc/pcre/html/NON-AUTOTOOLS-BUILD.txt +772 -0
  634. data/vendor/local/share/doc/pcre/html/README.txt +1002 -0
  635. data/vendor/local/share/doc/pcre/html/index.html +185 -0
  636. data/vendor/local/share/doc/pcre/html/pcre-config.html +109 -0
  637. data/vendor/local/share/doc/pcre/html/pcre.html +224 -0
  638. data/vendor/local/share/doc/pcre/html/pcre16.html +384 -0
  639. data/vendor/local/share/doc/pcre/html/pcre32.html +382 -0
  640. data/vendor/local/share/doc/pcre/html/pcre_assign_jit_stack.html +76 -0
  641. data/vendor/local/share/doc/pcre/html/pcre_compile.html +111 -0
  642. data/vendor/local/share/doc/pcre/html/pcre_compile2.html +115 -0
  643. data/vendor/local/share/doc/pcre/html/pcre_config.html +94 -0
  644. data/vendor/local/share/doc/pcre/html/pcre_copy_named_substring.html +65 -0
  645. data/vendor/local/share/doc/pcre/html/pcre_copy_substring.html +61 -0
  646. data/vendor/local/share/doc/pcre/html/pcre_dfa_exec.html +129 -0
  647. data/vendor/local/share/doc/pcre/html/pcre_exec.html +111 -0
  648. data/vendor/local/share/doc/pcre/html/pcre_free_study.html +46 -0
  649. data/vendor/local/share/doc/pcre/html/pcre_free_substring.html +46 -0
  650. data/vendor/local/share/doc/pcre/html/pcre_free_substring_list.html +46 -0
  651. data/vendor/local/share/doc/pcre/html/pcre_fullinfo.html +118 -0
  652. data/vendor/local/share/doc/pcre/html/pcre_get_named_substring.html +68 -0
  653. data/vendor/local/share/doc/pcre/html/pcre_get_stringnumber.html +57 -0
  654. data/vendor/local/share/doc/pcre/html/pcre_get_stringtable_entries.html +60 -0
  655. data/vendor/local/share/doc/pcre/html/pcre_get_substring.html +64 -0
  656. data/vendor/local/share/doc/pcre/html/pcre_get_substring_list.html +61 -0
  657. data/vendor/local/share/doc/pcre/html/pcre_jit_exec.html +108 -0
  658. data/vendor/local/share/doc/pcre/html/pcre_jit_stack_alloc.html +55 -0
  659. data/vendor/local/share/doc/pcre/html/pcre_jit_stack_free.html +48 -0
  660. data/vendor/local/share/doc/pcre/html/pcre_maketables.html +48 -0
  661. data/vendor/local/share/doc/pcre/html/pcre_pattern_to_host_byte_order.html +58 -0
  662. data/vendor/local/share/doc/pcre/html/pcre_refcount.html +51 -0
  663. data/vendor/local/share/doc/pcre/html/pcre_study.html +68 -0
  664. data/vendor/local/share/doc/pcre/html/pcre_utf16_to_host_byte_order.html +57 -0
  665. data/vendor/local/share/doc/pcre/html/pcre_utf32_to_host_byte_order.html +57 -0
  666. data/vendor/local/share/doc/pcre/html/pcre_version.html +46 -0
  667. data/vendor/local/share/doc/pcre/html/pcreapi.html +2921 -0
  668. data/vendor/local/share/doc/pcre/html/pcrebuild.html +534 -0
  669. data/vendor/local/share/doc/pcre/html/pcrecallout.html +286 -0
  670. data/vendor/local/share/doc/pcre/html/pcrecompat.html +235 -0
  671. data/vendor/local/share/doc/pcre/html/pcrecpp.html +368 -0
  672. data/vendor/local/share/doc/pcre/html/pcredemo.html +426 -0
  673. data/vendor/local/share/doc/pcre/html/pcregrep.html +759 -0
  674. data/vendor/local/share/doc/pcre/html/pcrejit.html +452 -0
  675. data/vendor/local/share/doc/pcre/html/pcrelimits.html +90 -0
  676. data/vendor/local/share/doc/pcre/html/pcrematching.html +242 -0
  677. data/vendor/local/share/doc/pcre/html/pcrepartial.html +509 -0
  678. data/vendor/local/share/doc/pcre/html/pcrepattern.html +3273 -0
  679. data/vendor/local/share/doc/pcre/html/pcreperform.html +195 -0
  680. data/vendor/local/share/doc/pcre/html/pcreposix.html +290 -0
  681. data/vendor/local/share/doc/pcre/html/pcreprecompile.html +163 -0
  682. data/vendor/local/share/doc/pcre/html/pcresample.html +110 -0
  683. data/vendor/local/share/doc/pcre/html/pcrestack.html +225 -0
  684. data/vendor/local/share/doc/pcre/html/pcresyntax.html +561 -0
  685. data/vendor/local/share/doc/pcre/html/pcretest.html +1158 -0
  686. data/vendor/local/share/doc/pcre/html/pcreunicode.html +262 -0
  687. data/vendor/local/share/doc/pcre/pcre-config.txt +86 -0
  688. data/vendor/local/share/doc/pcre/pcre.txt +10454 -0
  689. data/vendor/local/share/doc/pcre/pcregrep.txt +741 -0
  690. data/vendor/local/share/doc/pcre/pcretest.txt +1087 -0
  691. data/vendor/local/share/groonga/html/admin.old/js/groonga-admin.ja.js +11 -6
  692. data/vendor/local/share/groonga/html/admin.old/js/groonga-admin.js +11 -6
  693. data/vendor/local/share/license/pcre/LICENCE +93 -0
  694. data/vendor/local/share/man/man1/pcre-config.1 +92 -0
  695. data/vendor/local/share/man/man1/pcregrep.1 +683 -0
  696. data/vendor/local/share/man/man1/pcretest.1 +1156 -0
  697. data/vendor/local/share/man/man3/pcre.3 +230 -0
  698. data/vendor/local/share/man/man3/pcre16.3 +371 -0
  699. data/vendor/local/share/man/man3/pcre16_assign_jit_stack.3 +59 -0
  700. data/vendor/local/share/man/man3/pcre16_compile.3 +96 -0
  701. data/vendor/local/share/man/man3/pcre16_compile2.3 +101 -0
  702. data/vendor/local/share/man/man3/pcre16_config.3 +79 -0
  703. data/vendor/local/share/man/man3/pcre16_copy_named_substring.3 +51 -0
  704. data/vendor/local/share/man/man3/pcre16_copy_substring.3 +47 -0
  705. data/vendor/local/share/man/man3/pcre16_dfa_exec.3 +118 -0
  706. data/vendor/local/share/man/man3/pcre16_exec.3 +99 -0
  707. data/vendor/local/share/man/man3/pcre16_free_study.3 +31 -0
  708. data/vendor/local/share/man/man3/pcre16_free_substring.3 +31 -0
  709. data/vendor/local/share/man/man3/pcre16_free_substring_list.3 +31 -0
  710. data/vendor/local/share/man/man3/pcre16_fullinfo.3 +103 -0
  711. data/vendor/local/share/man/man3/pcre16_get_named_substring.3 +54 -0
  712. data/vendor/local/share/man/man3/pcre16_get_stringnumber.3 +43 -0
  713. data/vendor/local/share/man/man3/pcre16_get_stringtable_entries.3 +46 -0
  714. data/vendor/local/share/man/man3/pcre16_get_substring.3 +50 -0
  715. data/vendor/local/share/man/man3/pcre16_get_substring_list.3 +47 -0
  716. data/vendor/local/share/man/man3/pcre16_jit_exec.3 +96 -0
  717. data/vendor/local/share/man/man3/pcre16_jit_stack_alloc.3 +43 -0
  718. data/vendor/local/share/man/man3/pcre16_jit_stack_free.3 +35 -0
  719. data/vendor/local/share/man/man3/pcre16_maketables.3 +33 -0
  720. data/vendor/local/share/man/man3/pcre16_pattern_to_host_byte_order.3 +44 -0
  721. data/vendor/local/share/man/man3/pcre16_refcount.3 +36 -0
  722. data/vendor/local/share/man/man3/pcre16_study.3 +54 -0
  723. data/vendor/local/share/man/man3/pcre16_utf16_to_host_byte_order.3 +45 -0
  724. data/vendor/local/share/man/man3/pcre16_version.3 +31 -0
  725. data/vendor/local/share/man/man3/pcre32.3 +369 -0
  726. data/vendor/local/share/man/man3/pcre32_assign_jit_stack.3 +59 -0
  727. data/vendor/local/share/man/man3/pcre32_compile.3 +96 -0
  728. data/vendor/local/share/man/man3/pcre32_compile2.3 +101 -0
  729. data/vendor/local/share/man/man3/pcre32_config.3 +79 -0
  730. data/vendor/local/share/man/man3/pcre32_copy_named_substring.3 +51 -0
  731. data/vendor/local/share/man/man3/pcre32_copy_substring.3 +47 -0
  732. data/vendor/local/share/man/man3/pcre32_dfa_exec.3 +118 -0
  733. data/vendor/local/share/man/man3/pcre32_exec.3 +99 -0
  734. data/vendor/local/share/man/man3/pcre32_free_study.3 +31 -0
  735. data/vendor/local/share/man/man3/pcre32_free_substring.3 +31 -0
  736. data/vendor/local/share/man/man3/pcre32_free_substring_list.3 +31 -0
  737. data/vendor/local/share/man/man3/pcre32_fullinfo.3 +103 -0
  738. data/vendor/local/share/man/man3/pcre32_get_named_substring.3 +54 -0
  739. data/vendor/local/share/man/man3/pcre32_get_stringnumber.3 +43 -0
  740. data/vendor/local/share/man/man3/pcre32_get_stringtable_entries.3 +46 -0
  741. data/vendor/local/share/man/man3/pcre32_get_substring.3 +50 -0
  742. data/vendor/local/share/man/man3/pcre32_get_substring_list.3 +47 -0
  743. data/vendor/local/share/man/man3/pcre32_jit_exec.3 +96 -0
  744. data/vendor/local/share/man/man3/pcre32_jit_stack_alloc.3 +43 -0
  745. data/vendor/local/share/man/man3/pcre32_jit_stack_free.3 +35 -0
  746. data/vendor/local/share/man/man3/pcre32_maketables.3 +33 -0
  747. data/vendor/local/share/man/man3/pcre32_pattern_to_host_byte_order.3 +44 -0
  748. data/vendor/local/share/man/man3/pcre32_refcount.3 +36 -0
  749. data/vendor/local/share/man/man3/pcre32_study.3 +54 -0
  750. data/vendor/local/share/man/man3/pcre32_utf32_to_host_byte_order.3 +45 -0
  751. data/vendor/local/share/man/man3/pcre32_version.3 +31 -0
  752. data/vendor/local/share/man/man3/pcre_assign_jit_stack.3 +59 -0
  753. data/vendor/local/share/man/man3/pcre_compile.3 +96 -0
  754. data/vendor/local/share/man/man3/pcre_compile2.3 +101 -0
  755. data/vendor/local/share/man/man3/pcre_config.3 +79 -0
  756. data/vendor/local/share/man/man3/pcre_copy_named_substring.3 +51 -0
  757. data/vendor/local/share/man/man3/pcre_copy_substring.3 +47 -0
  758. data/vendor/local/share/man/man3/pcre_dfa_exec.3 +118 -0
  759. data/vendor/local/share/man/man3/pcre_exec.3 +99 -0
  760. data/vendor/local/share/man/man3/pcre_free_study.3 +31 -0
  761. data/vendor/local/share/man/man3/pcre_free_substring.3 +31 -0
  762. data/vendor/local/share/man/man3/pcre_free_substring_list.3 +31 -0
  763. data/vendor/local/share/man/man3/pcre_fullinfo.3 +103 -0
  764. data/vendor/local/share/man/man3/pcre_get_named_substring.3 +54 -0
  765. data/vendor/local/share/man/man3/pcre_get_stringnumber.3 +43 -0
  766. data/vendor/local/share/man/man3/pcre_get_stringtable_entries.3 +46 -0
  767. data/vendor/local/share/man/man3/pcre_get_substring.3 +50 -0
  768. data/vendor/local/share/man/man3/pcre_get_substring_list.3 +47 -0
  769. data/vendor/local/share/man/man3/pcre_jit_exec.3 +96 -0
  770. data/vendor/local/share/man/man3/pcre_jit_stack_alloc.3 +43 -0
  771. data/vendor/local/share/man/man3/pcre_jit_stack_free.3 +35 -0
  772. data/vendor/local/share/man/man3/pcre_maketables.3 +33 -0
  773. data/vendor/local/share/man/man3/pcre_pattern_to_host_byte_order.3 +44 -0
  774. data/vendor/local/share/man/man3/pcre_refcount.3 +36 -0
  775. data/vendor/local/share/man/man3/pcre_study.3 +54 -0
  776. data/vendor/local/share/man/man3/pcre_utf16_to_host_byte_order.3 +45 -0
  777. data/vendor/local/share/man/man3/pcre_utf32_to_host_byte_order.3 +45 -0
  778. data/vendor/local/share/man/man3/pcre_version.3 +31 -0
  779. data/vendor/local/share/man/man3/pcreapi.3 +2918 -0
  780. data/vendor/local/share/man/man3/pcrebuild.3 +550 -0
  781. data/vendor/local/share/man/man3/pcrecallout.3 +255 -0
  782. data/vendor/local/share/man/man3/pcrecompat.3 +200 -0
  783. data/vendor/local/share/man/man3/pcrecpp.3 +348 -0
  784. data/vendor/local/share/man/man3/pcredemo.3 +424 -0
  785. data/vendor/local/share/man/man3/pcrejit.3 +431 -0
  786. data/vendor/local/share/man/man3/pcrelimits.3 +71 -0
  787. data/vendor/local/share/man/man3/pcrematching.3 +214 -0
  788. data/vendor/local/share/man/man3/pcrepartial.3 +476 -0
  789. data/vendor/local/share/man/man3/pcrepattern.3 +3301 -0
  790. data/vendor/local/share/man/man3/pcreperform.3 +177 -0
  791. data/vendor/local/share/man/man3/pcreposix.3 +267 -0
  792. data/vendor/local/share/man/man3/pcreprecompile.3 +155 -0
  793. data/vendor/local/share/man/man3/pcresample.3 +99 -0
  794. data/vendor/local/share/man/man3/pcrestack.3 +215 -0
  795. data/vendor/local/share/man/man3/pcresyntax.3 +540 -0
  796. data/vendor/local/share/man/man3/pcreunicode.3 +249 -0
  797. metadata +255 -59
@@ -0,0 +1,44 @@
1
+ .TH PCRE_PATTERN_TO_HOST_BYTE_ORDER 3 "24 June 2012" "PCRE 8.30"
2
+ .SH NAME
3
+ PCRE - Perl-compatible regular expressions
4
+ .SH SYNOPSIS
5
+ .rs
6
+ .sp
7
+ .B #include <pcre.h>
8
+ .PP
9
+ .nf
10
+ .B int pcre_pattern_to_host_byte_order(pcre *\fIcode\fP,
11
+ .B " pcre_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
12
+ .sp
13
+ .B int pcre16_pattern_to_host_byte_order(pcre16 *\fIcode\fP,
14
+ .B " pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
15
+ .sp
16
+ .B int pcre32_pattern_to_host_byte_order(pcre32 *\fIcode\fP,
17
+ .B " pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
18
+ .fi
19
+ .
20
+ .SH DESCRIPTION
21
+ .rs
22
+ .sp
23
+ This function ensures that the bytes in 2-byte and 4-byte values in a compiled
24
+ pattern are in the correct order for the current host. It is useful when a
25
+ pattern that has been compiled on one host is transferred to another that might
26
+ have different endianness. The arguments are:
27
+ .sp
28
+ \fIcode\fP A compiled regular expression
29
+ \fIextra\fP Points to an associated \fBpcre[16|32]_extra\fP structure,
30
+ or is NULL
31
+ \fItables\fP Pointer to character tables, or NULL to
32
+ set the built-in default
33
+ .sp
34
+ The result is 0 for success, a negative PCRE_ERROR_xxx value otherwise.
35
+ .P
36
+ There is a complete description of the PCRE native API in the
37
+ .\" HREF
38
+ \fBpcreapi\fP
39
+ .\"
40
+ page and a description of the POSIX API in the
41
+ .\" HREF
42
+ \fBpcreposix\fP
43
+ .\"
44
+ page.
@@ -0,0 +1,36 @@
1
+ .TH PCRE_REFCOUNT 3 "24 June 2012" "PCRE 8.30"
2
+ .SH NAME
3
+ PCRE - Perl-compatible regular expressions
4
+ .SH SYNOPSIS
5
+ .rs
6
+ .sp
7
+ .B #include <pcre.h>
8
+ .PP
9
+ .SM
10
+ .B int pcre_refcount(pcre *\fIcode\fP, int \fIadjust\fP);
11
+ .PP
12
+ .B int pcre16_refcount(pcre16 *\fIcode\fP, int \fIadjust\fP);
13
+ .PP
14
+ .B int pcre32_refcount(pcre32 *\fIcode\fP, int \fIadjust\fP);
15
+ .
16
+ .SH DESCRIPTION
17
+ .rs
18
+ .sp
19
+ This function is used to maintain a reference count inside a data block that
20
+ contains a compiled pattern. Its arguments are:
21
+ .sp
22
+ \fIcode\fP Compiled regular expression
23
+ \fIadjust\fP Adjustment to reference value
24
+ .sp
25
+ The yield of the function is the adjusted reference value, which is constrained
26
+ to lie between 0 and 65535.
27
+ .P
28
+ There is a complete description of the PCRE native API in the
29
+ .\" HREF
30
+ \fBpcreapi\fP
31
+ .\"
32
+ page and a description of the POSIX API in the
33
+ .\" HREF
34
+ \fBpcreposix\fP
35
+ .\"
36
+ page.
@@ -0,0 +1,54 @@
1
+ .TH PCRE_STUDY 3 " 24 June 2012" "PCRE 8.30"
2
+ .SH NAME
3
+ PCRE - Perl-compatible regular expressions
4
+ .SH SYNOPSIS
5
+ .rs
6
+ .sp
7
+ .B #include <pcre.h>
8
+ .PP
9
+ .nf
10
+ .B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP,
11
+ .B " const char **\fIerrptr\fP);"
12
+ .sp
13
+ .B pcre16_extra *pcre16_study(const pcre16 *\fIcode\fP, int \fIoptions\fP,
14
+ .B " const char **\fIerrptr\fP);"
15
+ .sp
16
+ .B pcre32_extra *pcre32_study(const pcre32 *\fIcode\fP, int \fIoptions\fP,
17
+ .B " const char **\fIerrptr\fP);"
18
+ .fi
19
+ .
20
+ .SH DESCRIPTION
21
+ .rs
22
+ .sp
23
+ This function studies a compiled pattern, to see if additional information can
24
+ be extracted that might speed up matching. Its arguments are:
25
+ .sp
26
+ \fIcode\fP A compiled regular expression
27
+ \fIoptions\fP Options for \fBpcre[16|32]_study()\fP
28
+ \fIerrptr\fP Where to put an error message
29
+ .sp
30
+ If the function succeeds, it returns a value that can be passed to
31
+ \fBpcre[16|32]_exec()\fP or \fBpcre[16|32]_dfa_exec()\fP via their \fIextra\fP
32
+ arguments.
33
+ .P
34
+ If the function returns NULL, either it could not find any additional
35
+ information, or there was an error. You can tell the difference by looking at
36
+ the error value. It is NULL in first case.
37
+ .P
38
+ The only option is PCRE_STUDY_JIT_COMPILE. It requests just-in-time compilation
39
+ if possible. If PCRE has been compiled without JIT support, this option is
40
+ ignored. See the
41
+ .\" HREF
42
+ \fBpcrejit\fP
43
+ .\"
44
+ page for further details.
45
+ .P
46
+ There is a complete description of the PCRE native API in the
47
+ .\" HREF
48
+ \fBpcreapi\fP
49
+ .\"
50
+ page and a description of the POSIX API in the
51
+ .\" HREF
52
+ \fBpcreposix\fP
53
+ .\"
54
+ page.
@@ -0,0 +1,45 @@
1
+ .TH PCRE_UTF16_TO_HOST_BYTE_ORDER 3 "21 January 2012" "PCRE 8.30"
2
+ .SH NAME
3
+ PCRE - Perl-compatible regular expressions
4
+ .SH SYNOPSIS
5
+ .rs
6
+ .sp
7
+ .B #include <pcre.h>
8
+ .PP
9
+ .nf
10
+ .B int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *\fIoutput\fP,
11
+ .B " PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,"
12
+ .B " int \fIkeep_boms\fP);"
13
+ .fi
14
+ .
15
+ .
16
+ .SH DESCRIPTION
17
+ .rs
18
+ .sp
19
+ This function, which exists only in the 16-bit library, converts a UTF-16
20
+ string to the correct order for the current host, taking account of any byte
21
+ order marks (BOMs) within the string. Its arguments are:
22
+ .sp
23
+ \fIoutput\fP pointer to output buffer, may be the same as \fIinput\fP
24
+ \fIinput\fP pointer to input buffer
25
+ \fIlength\fP number of 16-bit units in the input, or negative for
26
+ a zero-terminated string
27
+ \fIhost_byte_order\fP a NULL value or a non-zero value pointed to means
28
+ start in host byte order
29
+ \fIkeep_boms\fP if non-zero, BOMs are copied to the output string
30
+ .sp
31
+ The result of the function is the number of 16-bit units placed into the output
32
+ buffer, including the zero terminator if the string was zero-terminated.
33
+ .P
34
+ If \fIhost_byte_order\fP is not NULL, it is set to indicate the byte order that
35
+ is current at the end of the string.
36
+ .P
37
+ There is a complete description of the PCRE native API in the
38
+ .\" HREF
39
+ \fBpcreapi\fP
40
+ .\"
41
+ page and a description of the POSIX API in the
42
+ .\" HREF
43
+ \fBpcreposix\fP
44
+ .\"
45
+ page.
@@ -0,0 +1,45 @@
1
+ .TH PCRE_UTF32_TO_HOST_BYTE_ORDER 3 "24 June 2012" "PCRE 8.30"
2
+ .SH NAME
3
+ PCRE - Perl-compatible regular expressions
4
+ .SH SYNOPSIS
5
+ .rs
6
+ .sp
7
+ .B #include <pcre.h>
8
+ .PP
9
+ .nf
10
+ .B int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *\fIoutput\fP,
11
+ .B " PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,"
12
+ .B " int \fIkeep_boms\fP);"
13
+ .fi
14
+ .
15
+ .
16
+ .SH DESCRIPTION
17
+ .rs
18
+ .sp
19
+ This function, which exists only in the 32-bit library, converts a UTF-32
20
+ string to the correct order for the current host, taking account of any byte
21
+ order marks (BOMs) within the string. Its arguments are:
22
+ .sp
23
+ \fIoutput\fP pointer to output buffer, may be the same as \fIinput\fP
24
+ \fIinput\fP pointer to input buffer
25
+ \fIlength\fP number of 32-bit units in the input, or negative for
26
+ a zero-terminated string
27
+ \fIhost_byte_order\fP a NULL value or a non-zero value pointed to means
28
+ start in host byte order
29
+ \fIkeep_boms\fP if non-zero, BOMs are copied to the output string
30
+ .sp
31
+ The result of the function is the number of 32-bit units placed into the output
32
+ buffer, including the zero terminator if the string was zero-terminated.
33
+ .P
34
+ If \fIhost_byte_order\fP is not NULL, it is set to indicate the byte order that
35
+ is current at the end of the string.
36
+ .P
37
+ There is a complete description of the PCRE native API in the
38
+ .\" HREF
39
+ \fBpcreapi\fP
40
+ .\"
41
+ page and a description of the POSIX API in the
42
+ .\" HREF
43
+ \fBpcreposix\fP
44
+ .\"
45
+ page.
@@ -0,0 +1,31 @@
1
+ .TH PCRE_VERSION 3 "24 June 2012" "PCRE 8.30"
2
+ .SH NAME
3
+ PCRE - Perl-compatible regular expressions
4
+ .SH SYNOPSIS
5
+ .rs
6
+ .sp
7
+ .B #include <pcre.h>
8
+ .PP
9
+ .SM
10
+ .B const char *pcre_version(void);
11
+ .PP
12
+ .B const char *pcre16_version(void);
13
+ .PP
14
+ .B const char *pcre32_version(void);
15
+ .
16
+ .SH DESCRIPTION
17
+ .rs
18
+ .sp
19
+ This function (even in the 16-bit and 32-bit libraries) returns a
20
+ zero-terminated, 8-bit character string that gives the version number of the
21
+ PCRE library and the date of its release.
22
+ .P
23
+ There is a complete description of the PCRE native API in the
24
+ .\" HREF
25
+ \fBpcreapi\fP
26
+ .\"
27
+ page and a description of the POSIX API in the
28
+ .\" HREF
29
+ \fBpcreposix\fP
30
+ .\"
31
+ page.
@@ -0,0 +1,2918 @@
1
+ .TH PCREAPI 3 "18 December 2015" "PCRE 8.39"
2
+ .SH NAME
3
+ PCRE - Perl-compatible regular expressions
4
+ .sp
5
+ .B #include <pcre.h>
6
+ .
7
+ .
8
+ .SH "PCRE NATIVE API BASIC FUNCTIONS"
9
+ .rs
10
+ .sp
11
+ .nf
12
+ .B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP,
13
+ .B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
14
+ .B " const unsigned char *\fItableptr\fP);"
15
+ .sp
16
+ .B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP,
17
+ .B " int *\fIerrorcodeptr\fP,"
18
+ .B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
19
+ .B " const unsigned char *\fItableptr\fP);"
20
+ .sp
21
+ .B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP,
22
+ .B " const char **\fIerrptr\fP);"
23
+ .sp
24
+ .B void pcre_free_study(pcre_extra *\fIextra\fP);
25
+ .sp
26
+ .B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
27
+ .B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
28
+ .B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
29
+ .sp
30
+ .B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
31
+ .B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
32
+ .B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
33
+ .B " int *\fIworkspace\fP, int \fIwscount\fP);"
34
+ .fi
35
+ .
36
+ .
37
+ .SH "PCRE NATIVE API STRING EXTRACTION FUNCTIONS"
38
+ .rs
39
+ .sp
40
+ .nf
41
+ .B int pcre_copy_named_substring(const pcre *\fIcode\fP,
42
+ .B " const char *\fIsubject\fP, int *\fIovector\fP,"
43
+ .B " int \fIstringcount\fP, const char *\fIstringname\fP,"
44
+ .B " char *\fIbuffer\fP, int \fIbuffersize\fP);"
45
+ .sp
46
+ .B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP,
47
+ .B " int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,"
48
+ .B " int \fIbuffersize\fP);"
49
+ .sp
50
+ .B int pcre_get_named_substring(const pcre *\fIcode\fP,
51
+ .B " const char *\fIsubject\fP, int *\fIovector\fP,"
52
+ .B " int \fIstringcount\fP, const char *\fIstringname\fP,"
53
+ .B " const char **\fIstringptr\fP);"
54
+ .sp
55
+ .B int pcre_get_stringnumber(const pcre *\fIcode\fP,
56
+ .B " const char *\fIname\fP);"
57
+ .sp
58
+ .B int pcre_get_stringtable_entries(const pcre *\fIcode\fP,
59
+ .B " const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);"
60
+ .sp
61
+ .B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP,
62
+ .B " int \fIstringcount\fP, int \fIstringnumber\fP,"
63
+ .B " const char **\fIstringptr\fP);"
64
+ .sp
65
+ .B int pcre_get_substring_list(const char *\fIsubject\fP,
66
+ .B " int *\fIovector\fP, int \fIstringcount\fP, const char ***\fIlistptr\fP);"
67
+ .sp
68
+ .B void pcre_free_substring(const char *\fIstringptr\fP);
69
+ .sp
70
+ .B void pcre_free_substring_list(const char **\fIstringptr\fP);
71
+ .fi
72
+ .
73
+ .
74
+ .SH "PCRE NATIVE API AUXILIARY FUNCTIONS"
75
+ .rs
76
+ .sp
77
+ .nf
78
+ .B int pcre_jit_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
79
+ .B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
80
+ .B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
81
+ .B " pcre_jit_stack *\fIjstack\fP);"
82
+ .sp
83
+ .B pcre_jit_stack *pcre_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
84
+ .sp
85
+ .B void pcre_jit_stack_free(pcre_jit_stack *\fIstack\fP);
86
+ .sp
87
+ .B void pcre_assign_jit_stack(pcre_extra *\fIextra\fP,
88
+ .B " pcre_jit_callback \fIcallback\fP, void *\fIdata\fP);"
89
+ .sp
90
+ .B const unsigned char *pcre_maketables(void);
91
+ .sp
92
+ .B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
93
+ .B " int \fIwhat\fP, void *\fIwhere\fP);"
94
+ .sp
95
+ .B int pcre_refcount(pcre *\fIcode\fP, int \fIadjust\fP);
96
+ .sp
97
+ .B int pcre_config(int \fIwhat\fP, void *\fIwhere\fP);
98
+ .sp
99
+ .B const char *pcre_version(void);
100
+ .sp
101
+ .B int pcre_pattern_to_host_byte_order(pcre *\fIcode\fP,
102
+ .B " pcre_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
103
+ .fi
104
+ .
105
+ .
106
+ .SH "PCRE NATIVE API INDIRECTED FUNCTIONS"
107
+ .rs
108
+ .sp
109
+ .nf
110
+ .B void *(*pcre_malloc)(size_t);
111
+ .sp
112
+ .B void (*pcre_free)(void *);
113
+ .sp
114
+ .B void *(*pcre_stack_malloc)(size_t);
115
+ .sp
116
+ .B void (*pcre_stack_free)(void *);
117
+ .sp
118
+ .B int (*pcre_callout)(pcre_callout_block *);
119
+ .sp
120
+ .B int (*pcre_stack_guard)(void);
121
+ .fi
122
+ .
123
+ .
124
+ .SH "PCRE 8-BIT, 16-BIT, AND 32-BIT LIBRARIES"
125
+ .rs
126
+ .sp
127
+ As well as support for 8-bit character strings, PCRE also supports 16-bit
128
+ strings (from release 8.30) and 32-bit strings (from release 8.32), by means of
129
+ two additional libraries. They can be built as well as, or instead of, the
130
+ 8-bit library. To avoid too much complication, this document describes the
131
+ 8-bit versions of the functions, with only occasional references to the 16-bit
132
+ and 32-bit libraries.
133
+ .P
134
+ The 16-bit and 32-bit functions operate in the same way as their 8-bit
135
+ counterparts; they just use different data types for their arguments and
136
+ results, and their names start with \fBpcre16_\fP or \fBpcre32_\fP instead of
137
+ \fBpcre_\fP. For every option that has UTF8 in its name (for example,
138
+ PCRE_UTF8), there are corresponding 16-bit and 32-bit names with UTF8 replaced
139
+ by UTF16 or UTF32, respectively. This facility is in fact just cosmetic; the
140
+ 16-bit and 32-bit option names define the same bit values.
141
+ .P
142
+ References to bytes and UTF-8 in this document should be read as references to
143
+ 16-bit data units and UTF-16 when using the 16-bit library, or 32-bit data
144
+ units and UTF-32 when using the 32-bit library, unless specified otherwise.
145
+ More details of the specific differences for the 16-bit and 32-bit libraries
146
+ are given in the
147
+ .\" HREF
148
+ \fBpcre16\fP
149
+ .\"
150
+ and
151
+ .\" HREF
152
+ \fBpcre32\fP
153
+ .\"
154
+ pages.
155
+ .
156
+ .
157
+ .SH "PCRE API OVERVIEW"
158
+ .rs
159
+ .sp
160
+ PCRE has its own native API, which is described in this document. There are
161
+ also some wrapper functions (for the 8-bit library only) that correspond to the
162
+ POSIX regular expression API, but they do not give access to all the
163
+ functionality. They are described in the
164
+ .\" HREF
165
+ \fBpcreposix\fP
166
+ .\"
167
+ documentation. Both of these APIs define a set of C function calls. A C++
168
+ wrapper (again for the 8-bit library only) is also distributed with PCRE. It is
169
+ documented in the
170
+ .\" HREF
171
+ \fBpcrecpp\fP
172
+ .\"
173
+ page.
174
+ .P
175
+ The native API C function prototypes are defined in the header file
176
+ \fBpcre.h\fP, and on Unix-like systems the (8-bit) library itself is called
177
+ \fBlibpcre\fP. It can normally be accessed by adding \fB-lpcre\fP to the
178
+ command for linking an application that uses PCRE. The header file defines the
179
+ macros PCRE_MAJOR and PCRE_MINOR to contain the major and minor release numbers
180
+ for the library. Applications can use these to include support for different
181
+ releases of PCRE.
182
+ .P
183
+ In a Windows environment, if you want to statically link an application program
184
+ against a non-dll \fBpcre.a\fP file, you must define PCRE_STATIC before
185
+ including \fBpcre.h\fP or \fBpcrecpp.h\fP, because otherwise the
186
+ \fBpcre_malloc()\fP and \fBpcre_free()\fP exported functions will be declared
187
+ \fB__declspec(dllimport)\fP, with unwanted results.
188
+ .P
189
+ The functions \fBpcre_compile()\fP, \fBpcre_compile2()\fP, \fBpcre_study()\fP,
190
+ and \fBpcre_exec()\fP are used for compiling and matching regular expressions
191
+ in a Perl-compatible manner. A sample program that demonstrates the simplest
192
+ way of using them is provided in the file called \fIpcredemo.c\fP in the PCRE
193
+ source distribution. A listing of this program is given in the
194
+ .\" HREF
195
+ \fBpcredemo\fP
196
+ .\"
197
+ documentation, and the
198
+ .\" HREF
199
+ \fBpcresample\fP
200
+ .\"
201
+ documentation describes how to compile and run it.
202
+ .P
203
+ Just-in-time compiler support is an optional feature of PCRE that can be built
204
+ in appropriate hardware environments. It greatly speeds up the matching
205
+ performance of many patterns. Simple programs can easily request that it be
206
+ used if available, by setting an option that is ignored when it is not
207
+ relevant. More complicated programs might need to make use of the functions
208
+ \fBpcre_jit_stack_alloc()\fP, \fBpcre_jit_stack_free()\fP, and
209
+ \fBpcre_assign_jit_stack()\fP in order to control the JIT code's memory usage.
210
+ .P
211
+ From release 8.32 there is also a direct interface for JIT execution, which
212
+ gives improved performance. The JIT-specific functions are discussed in the
213
+ .\" HREF
214
+ \fBpcrejit\fP
215
+ .\"
216
+ documentation.
217
+ .P
218
+ A second matching function, \fBpcre_dfa_exec()\fP, which is not
219
+ Perl-compatible, is also provided. This uses a different algorithm for the
220
+ matching. The alternative algorithm finds all possible matches (at a given
221
+ point in the subject), and scans the subject just once (unless there are
222
+ lookbehind assertions). However, this algorithm does not return captured
223
+ substrings. A description of the two matching algorithms and their advantages
224
+ and disadvantages is given in the
225
+ .\" HREF
226
+ \fBpcrematching\fP
227
+ .\"
228
+ documentation.
229
+ .P
230
+ In addition to the main compiling and matching functions, there are convenience
231
+ functions for extracting captured substrings from a subject string that is
232
+ matched by \fBpcre_exec()\fP. They are:
233
+ .sp
234
+ \fBpcre_copy_substring()\fP
235
+ \fBpcre_copy_named_substring()\fP
236
+ \fBpcre_get_substring()\fP
237
+ \fBpcre_get_named_substring()\fP
238
+ \fBpcre_get_substring_list()\fP
239
+ \fBpcre_get_stringnumber()\fP
240
+ \fBpcre_get_stringtable_entries()\fP
241
+ .sp
242
+ \fBpcre_free_substring()\fP and \fBpcre_free_substring_list()\fP are also
243
+ provided, to free the memory used for extracted strings.
244
+ .P
245
+ The function \fBpcre_maketables()\fP is used to build a set of character tables
246
+ in the current locale for passing to \fBpcre_compile()\fP, \fBpcre_exec()\fP,
247
+ or \fBpcre_dfa_exec()\fP. This is an optional facility that is provided for
248
+ specialist use. Most commonly, no special tables are passed, in which case
249
+ internal tables that are generated when PCRE is built are used.
250
+ .P
251
+ The function \fBpcre_fullinfo()\fP is used to find out information about a
252
+ compiled pattern. The function \fBpcre_version()\fP returns a pointer to a
253
+ string containing the version of PCRE and its date of release.
254
+ .P
255
+ The function \fBpcre_refcount()\fP maintains a reference count in a data block
256
+ containing a compiled pattern. This is provided for the benefit of
257
+ object-oriented applications.
258
+ .P
259
+ The global variables \fBpcre_malloc\fP and \fBpcre_free\fP initially contain
260
+ the entry points of the standard \fBmalloc()\fP and \fBfree()\fP functions,
261
+ respectively. PCRE calls the memory management functions via these variables,
262
+ so a calling program can replace them if it wishes to intercept the calls. This
263
+ should be done before calling any PCRE functions.
264
+ .P
265
+ The global variables \fBpcre_stack_malloc\fP and \fBpcre_stack_free\fP are also
266
+ indirections to memory management functions. These special functions are used
267
+ only when PCRE is compiled to use the heap for remembering data, instead of
268
+ recursive function calls, when running the \fBpcre_exec()\fP function. See the
269
+ .\" HREF
270
+ \fBpcrebuild\fP
271
+ .\"
272
+ documentation for details of how to do this. It is a non-standard way of
273
+ building PCRE, for use in environments that have limited stacks. Because of the
274
+ greater use of memory management, it runs more slowly. Separate functions are
275
+ provided so that special-purpose external code can be used for this case. When
276
+ used, these functions always allocate memory blocks of the same size. There is
277
+ a discussion about PCRE's stack usage in the
278
+ .\" HREF
279
+ \fBpcrestack\fP
280
+ .\"
281
+ documentation.
282
+ .P
283
+ The global variable \fBpcre_callout\fP initially contains NULL. It can be set
284
+ by the caller to a "callout" function, which PCRE will then call at specified
285
+ points during a matching operation. Details are given in the
286
+ .\" HREF
287
+ \fBpcrecallout\fP
288
+ .\"
289
+ documentation.
290
+ .P
291
+ The global variable \fBpcre_stack_guard\fP initially contains NULL. It can be
292
+ set by the caller to a function that is called by PCRE whenever it starts
293
+ to compile a parenthesized part of a pattern. When parentheses are nested, PCRE
294
+ uses recursive function calls, which use up the system stack. This function is
295
+ provided so that applications with restricted stacks can force a compilation
296
+ error if the stack runs out. The function should return zero if all is well, or
297
+ non-zero to force an error.
298
+ .
299
+ .
300
+ .\" HTML <a name="newlines"></a>
301
+ .SH NEWLINES
302
+ .rs
303
+ .sp
304
+ PCRE supports five different conventions for indicating line breaks in
305
+ strings: a single CR (carriage return) character, a single LF (linefeed)
306
+ character, the two-character sequence CRLF, any of the three preceding, or any
307
+ Unicode newline sequence. The Unicode newline sequences are the three just
308
+ mentioned, plus the single characters VT (vertical tab, U+000B), FF (form feed,
309
+ U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS
310
+ (paragraph separator, U+2029).
311
+ .P
312
+ Each of the first three conventions is used by at least one operating system as
313
+ its standard newline sequence. When PCRE is built, a default can be specified.
314
+ The default default is LF, which is the Unix standard. When PCRE is run, the
315
+ default can be overridden, either when a pattern is compiled, or when it is
316
+ matched.
317
+ .P
318
+ At compile time, the newline convention can be specified by the \fIoptions\fP
319
+ argument of \fBpcre_compile()\fP, or it can be specified by special text at the
320
+ start of the pattern itself; this overrides any other settings. See the
321
+ .\" HREF
322
+ \fBpcrepattern\fP
323
+ .\"
324
+ page for details of the special character sequences.
325
+ .P
326
+ In the PCRE documentation the word "newline" is used to mean "the character or
327
+ pair of characters that indicate a line break". The choice of newline
328
+ convention affects the handling of the dot, circumflex, and dollar
329
+ metacharacters, the handling of #-comments in /x mode, and, when CRLF is a
330
+ recognized line ending sequence, the match position advancement for a
331
+ non-anchored pattern. There is more detail about this in the
332
+ .\" HTML <a href="#execoptions">
333
+ .\" </a>
334
+ section on \fBpcre_exec()\fP options
335
+ .\"
336
+ below.
337
+ .P
338
+ The choice of newline convention does not affect the interpretation of
339
+ the \en or \er escape sequences, nor does it affect what \eR matches, which is
340
+ controlled in a similar way, but by separate options.
341
+ .
342
+ .
343
+ .SH MULTITHREADING
344
+ .rs
345
+ .sp
346
+ The PCRE functions can be used in multi-threading applications, with the
347
+ proviso that the memory management functions pointed to by \fBpcre_malloc\fP,
348
+ \fBpcre_free\fP, \fBpcre_stack_malloc\fP, and \fBpcre_stack_free\fP, and the
349
+ callout and stack-checking functions pointed to by \fBpcre_callout\fP and
350
+ \fBpcre_stack_guard\fP, are shared by all threads.
351
+ .P
352
+ The compiled form of a regular expression is not altered during matching, so
353
+ the same compiled pattern can safely be used by several threads at once.
354
+ .P
355
+ If the just-in-time optimization feature is being used, it needs separate
356
+ memory stack areas for each thread. See the
357
+ .\" HREF
358
+ \fBpcrejit\fP
359
+ .\"
360
+ documentation for more details.
361
+ .
362
+ .
363
+ .SH "SAVING PRECOMPILED PATTERNS FOR LATER USE"
364
+ .rs
365
+ .sp
366
+ The compiled form of a regular expression can be saved and re-used at a later
367
+ time, possibly by a different program, and even on a host other than the one on
368
+ which it was compiled. Details are given in the
369
+ .\" HREF
370
+ \fBpcreprecompile\fP
371
+ .\"
372
+ documentation, which includes a description of the
373
+ \fBpcre_pattern_to_host_byte_order()\fP function. However, compiling a regular
374
+ expression with one version of PCRE for use with a different version is not
375
+ guaranteed to work and may cause crashes.
376
+ .
377
+ .
378
+ .SH "CHECKING BUILD-TIME OPTIONS"
379
+ .rs
380
+ .sp
381
+ .B int pcre_config(int \fIwhat\fP, void *\fIwhere\fP);
382
+ .PP
383
+ The function \fBpcre_config()\fP makes it possible for a PCRE client to
384
+ discover which optional features have been compiled into the PCRE library. The
385
+ .\" HREF
386
+ \fBpcrebuild\fP
387
+ .\"
388
+ documentation has more details about these optional features.
389
+ .P
390
+ The first argument for \fBpcre_config()\fP is an integer, specifying which
391
+ information is required; the second argument is a pointer to a variable into
392
+ which the information is placed. The returned value is zero on success, or the
393
+ negative error code PCRE_ERROR_BADOPTION if the value in the first argument is
394
+ not recognized. The following information is available:
395
+ .sp
396
+ PCRE_CONFIG_UTF8
397
+ .sp
398
+ The output is an integer that is set to one if UTF-8 support is available;
399
+ otherwise it is set to zero. This value should normally be given to the 8-bit
400
+ version of this function, \fBpcre_config()\fP. If it is given to the 16-bit
401
+ or 32-bit version of this function, the result is PCRE_ERROR_BADOPTION.
402
+ .sp
403
+ PCRE_CONFIG_UTF16
404
+ .sp
405
+ The output is an integer that is set to one if UTF-16 support is available;
406
+ otherwise it is set to zero. This value should normally be given to the 16-bit
407
+ version of this function, \fBpcre16_config()\fP. If it is given to the 8-bit
408
+ or 32-bit version of this function, the result is PCRE_ERROR_BADOPTION.
409
+ .sp
410
+ PCRE_CONFIG_UTF32
411
+ .sp
412
+ The output is an integer that is set to one if UTF-32 support is available;
413
+ otherwise it is set to zero. This value should normally be given to the 32-bit
414
+ version of this function, \fBpcre32_config()\fP. If it is given to the 8-bit
415
+ or 16-bit version of this function, the result is PCRE_ERROR_BADOPTION.
416
+ .sp
417
+ PCRE_CONFIG_UNICODE_PROPERTIES
418
+ .sp
419
+ The output is an integer that is set to one if support for Unicode character
420
+ properties is available; otherwise it is set to zero.
421
+ .sp
422
+ PCRE_CONFIG_JIT
423
+ .sp
424
+ The output is an integer that is set to one if support for just-in-time
425
+ compiling is available; otherwise it is set to zero.
426
+ .sp
427
+ PCRE_CONFIG_JITTARGET
428
+ .sp
429
+ The output is a pointer to a zero-terminated "const char *" string. If JIT
430
+ support is available, the string contains the name of the architecture for
431
+ which the JIT compiler is configured, for example "x86 32bit (little endian +
432
+ unaligned)". If JIT support is not available, the result is NULL.
433
+ .sp
434
+ PCRE_CONFIG_NEWLINE
435
+ .sp
436
+ The output is an integer whose value specifies the default character sequence
437
+ that is recognized as meaning "newline". The values that are supported in
438
+ ASCII/Unicode environments are: 10 for LF, 13 for CR, 3338 for CRLF, -2 for
439
+ ANYCRLF, and -1 for ANY. In EBCDIC environments, CR, ANYCRLF, and ANY yield the
440
+ same values. However, the value for LF is normally 21, though some EBCDIC
441
+ environments use 37. The corresponding values for CRLF are 3349 and 3365. The
442
+ default should normally correspond to the standard sequence for your operating
443
+ system.
444
+ .sp
445
+ PCRE_CONFIG_BSR
446
+ .sp
447
+ The output is an integer whose value indicates what character sequences the \eR
448
+ escape sequence matches by default. A value of 0 means that \eR matches any
449
+ Unicode line ending sequence; a value of 1 means that \eR matches only CR, LF,
450
+ or CRLF. The default can be overridden when a pattern is compiled or matched.
451
+ .sp
452
+ PCRE_CONFIG_LINK_SIZE
453
+ .sp
454
+ The output is an integer that contains the number of bytes used for internal
455
+ linkage in compiled regular expressions. For the 8-bit library, the value can
456
+ be 2, 3, or 4. For the 16-bit library, the value is either 2 or 4 and is still
457
+ a number of bytes. For the 32-bit library, the value is either 2 or 4 and is
458
+ still a number of bytes. The default value of 2 is sufficient for all but the
459
+ most massive patterns, since it allows the compiled pattern to be up to 64K in
460
+ size. Larger values allow larger regular expressions to be compiled, at the
461
+ expense of slower matching.
462
+ .sp
463
+ PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
464
+ .sp
465
+ The output is an integer that contains the threshold above which the POSIX
466
+ interface uses \fBmalloc()\fP for output vectors. Further details are given in
467
+ the
468
+ .\" HREF
469
+ \fBpcreposix\fP
470
+ .\"
471
+ documentation.
472
+ .sp
473
+ PCRE_CONFIG_PARENS_LIMIT
474
+ .sp
475
+ The output is a long integer that gives the maximum depth of nesting of
476
+ parentheses (of any kind) in a pattern. This limit is imposed to cap the amount
477
+ of system stack used when a pattern is compiled. It is specified when PCRE is
478
+ built; the default is 250. This limit does not take into account the stack that
479
+ may already be used by the calling application. For finer control over
480
+ compilation stack usage, you can set a pointer to an external checking function
481
+ in \fBpcre_stack_guard\fP.
482
+ .sp
483
+ PCRE_CONFIG_MATCH_LIMIT
484
+ .sp
485
+ The output is a long integer that gives the default limit for the number of
486
+ internal matching function calls in a \fBpcre_exec()\fP execution. Further
487
+ details are given with \fBpcre_exec()\fP below.
488
+ .sp
489
+ PCRE_CONFIG_MATCH_LIMIT_RECURSION
490
+ .sp
491
+ The output is a long integer that gives the default limit for the depth of
492
+ recursion when calling the internal matching function in a \fBpcre_exec()\fP
493
+ execution. Further details are given with \fBpcre_exec()\fP below.
494
+ .sp
495
+ PCRE_CONFIG_STACKRECURSE
496
+ .sp
497
+ The output is an integer that is set to one if internal recursion when running
498
+ \fBpcre_exec()\fP is implemented by recursive function calls that use the stack
499
+ to remember their state. This is the usual way that PCRE is compiled. The
500
+ output is zero if PCRE was compiled to use blocks of data on the heap instead
501
+ of recursive function calls. In this case, \fBpcre_stack_malloc\fP and
502
+ \fBpcre_stack_free\fP are called to manage memory blocks on the heap, thus
503
+ avoiding the use of the stack.
504
+ .
505
+ .
506
+ .SH "COMPILING A PATTERN"
507
+ .rs
508
+ .sp
509
+ .nf
510
+ .B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP,
511
+ .B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
512
+ .B " const unsigned char *\fItableptr\fP);"
513
+ .sp
514
+ .B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP,
515
+ .B " int *\fIerrorcodeptr\fP,"
516
+ .B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
517
+ .B " const unsigned char *\fItableptr\fP);"
518
+ .fi
519
+ .P
520
+ Either of the functions \fBpcre_compile()\fP or \fBpcre_compile2()\fP can be
521
+ called to compile a pattern into an internal form. The only difference between
522
+ the two interfaces is that \fBpcre_compile2()\fP has an additional argument,
523
+ \fIerrorcodeptr\fP, via which a numerical error code can be returned. To avoid
524
+ too much repetition, we refer just to \fBpcre_compile()\fP below, but the
525
+ information applies equally to \fBpcre_compile2()\fP.
526
+ .P
527
+ The pattern is a C string terminated by a binary zero, and is passed in the
528
+ \fIpattern\fP argument. A pointer to a single block of memory that is obtained
529
+ via \fBpcre_malloc\fP is returned. This contains the compiled code and related
530
+ data. The \fBpcre\fP type is defined for the returned block; this is a typedef
531
+ for a structure whose contents are not externally defined. It is up to the
532
+ caller to free the memory (via \fBpcre_free\fP) when it is no longer required.
533
+ .P
534
+ Although the compiled code of a PCRE regex is relocatable, that is, it does not
535
+ depend on memory location, the complete \fBpcre\fP data block is not
536
+ fully relocatable, because it may contain a copy of the \fItableptr\fP
537
+ argument, which is an address (see below).
538
+ .P
539
+ The \fIoptions\fP argument contains various bit settings that affect the
540
+ compilation. It should be zero if no options are required. The available
541
+ options are described below. Some of them (in particular, those that are
542
+ compatible with Perl, but some others as well) can also be set and unset from
543
+ within the pattern (see the detailed description in the
544
+ .\" HREF
545
+ \fBpcrepattern\fP
546
+ .\"
547
+ documentation). For those options that can be different in different parts of
548
+ the pattern, the contents of the \fIoptions\fP argument specifies their
549
+ settings at the start of compilation and execution. The PCRE_ANCHORED,
550
+ PCRE_BSR_\fIxxx\fP, PCRE_NEWLINE_\fIxxx\fP, PCRE_NO_UTF8_CHECK, and
551
+ PCRE_NO_START_OPTIMIZE options can be set at the time of matching as well as at
552
+ compile time.
553
+ .P
554
+ If \fIerrptr\fP is NULL, \fBpcre_compile()\fP returns NULL immediately.
555
+ Otherwise, if compilation of a pattern fails, \fBpcre_compile()\fP returns
556
+ NULL, and sets the variable pointed to by \fIerrptr\fP to point to a textual
557
+ error message. This is a static string that is part of the library. You must
558
+ not try to free it. Normally, the offset from the start of the pattern to the
559
+ data unit that was being processed when the error was discovered is placed in
560
+ the variable pointed to by \fIerroffset\fP, which must not be NULL (if it is,
561
+ an immediate error is given). However, for an invalid UTF-8 or UTF-16 string,
562
+ the offset is that of the first data unit of the failing character.
563
+ .P
564
+ Some errors are not detected until the whole pattern has been scanned; in these
565
+ cases, the offset passed back is the length of the pattern. Note that the
566
+ offset is in data units, not characters, even in a UTF mode. It may sometimes
567
+ point into the middle of a UTF-8 or UTF-16 character.
568
+ .P
569
+ If \fBpcre_compile2()\fP is used instead of \fBpcre_compile()\fP, and the
570
+ \fIerrorcodeptr\fP argument is not NULL, a non-zero error code number is
571
+ returned via this argument in the event of an error. This is in addition to the
572
+ textual error message. Error codes and messages are listed below.
573
+ .P
574
+ If the final argument, \fItableptr\fP, is NULL, PCRE uses a default set of
575
+ character tables that are built when PCRE is compiled, using the default C
576
+ locale. Otherwise, \fItableptr\fP must be an address that is the result of a
577
+ call to \fBpcre_maketables()\fP. This value is stored with the compiled
578
+ pattern, and used again by \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP when the
579
+ pattern is matched. For more discussion, see the section on locale support
580
+ below.
581
+ .P
582
+ This code fragment shows a typical straightforward call to \fBpcre_compile()\fP:
583
+ .sp
584
+ pcre *re;
585
+ const char *error;
586
+ int erroffset;
587
+ re = pcre_compile(
588
+ "^A.*Z", /* the pattern */
589
+ 0, /* default options */
590
+ &error, /* for error message */
591
+ &erroffset, /* for error offset */
592
+ NULL); /* use default character tables */
593
+ .sp
594
+ The following names for option bits are defined in the \fBpcre.h\fP header
595
+ file:
596
+ .sp
597
+ PCRE_ANCHORED
598
+ .sp
599
+ If this bit is set, the pattern is forced to be "anchored", that is, it is
600
+ constrained to match only at the first matching point in the string that is
601
+ being searched (the "subject string"). This effect can also be achieved by
602
+ appropriate constructs in the pattern itself, which is the only way to do it in
603
+ Perl.
604
+ .sp
605
+ PCRE_AUTO_CALLOUT
606
+ .sp
607
+ If this bit is set, \fBpcre_compile()\fP automatically inserts callout items,
608
+ all with number 255, before each pattern item. For discussion of the callout
609
+ facility, see the
610
+ .\" HREF
611
+ \fBpcrecallout\fP
612
+ .\"
613
+ documentation.
614
+ .sp
615
+ PCRE_BSR_ANYCRLF
616
+ PCRE_BSR_UNICODE
617
+ .sp
618
+ These options (which are mutually exclusive) control what the \eR escape
619
+ sequence matches. The choice is either to match only CR, LF, or CRLF, or to
620
+ match any Unicode newline sequence. The default is specified when PCRE is
621
+ built. It can be overridden from within the pattern, or by setting an option
622
+ when a compiled pattern is matched.
623
+ .sp
624
+ PCRE_CASELESS
625
+ .sp
626
+ If this bit is set, letters in the pattern match both upper and lower case
627
+ letters. It is equivalent to Perl's /i option, and it can be changed within a
628
+ pattern by a (?i) option setting. In UTF-8 mode, PCRE always understands the
629
+ concept of case for characters whose values are less than 128, so caseless
630
+ matching is always possible. For characters with higher values, the concept of
631
+ case is supported if PCRE is compiled with Unicode property support, but not
632
+ otherwise. If you want to use caseless matching for characters 128 and above,
633
+ you must ensure that PCRE is compiled with Unicode property support as well as
634
+ with UTF-8 support.
635
+ .sp
636
+ PCRE_DOLLAR_ENDONLY
637
+ .sp
638
+ If this bit is set, a dollar metacharacter in the pattern matches only at the
639
+ end of the subject string. Without this option, a dollar also matches
640
+ immediately before a newline at the end of the string (but not before any other
641
+ newlines). The PCRE_DOLLAR_ENDONLY option is ignored if PCRE_MULTILINE is set.
642
+ There is no equivalent to this option in Perl, and no way to set it within a
643
+ pattern.
644
+ .sp
645
+ PCRE_DOTALL
646
+ .sp
647
+ If this bit is set, a dot metacharacter in the pattern matches a character of
648
+ any value, including one that indicates a newline. However, it only ever
649
+ matches one character, even if newlines are coded as CRLF. Without this option,
650
+ a dot does not match when the current position is at a newline. This option is
651
+ equivalent to Perl's /s option, and it can be changed within a pattern by a
652
+ (?s) option setting. A negative class such as [^a] always matches newline
653
+ characters, independent of the setting of this option.
654
+ .sp
655
+ PCRE_DUPNAMES
656
+ .sp
657
+ If this bit is set, names used to identify capturing subpatterns need not be
658
+ unique. This can be helpful for certain types of pattern when it is known that
659
+ only one instance of the named subpattern can ever be matched. There are more
660
+ details of named subpatterns below; see also the
661
+ .\" HREF
662
+ \fBpcrepattern\fP
663
+ .\"
664
+ documentation.
665
+ .sp
666
+ PCRE_EXTENDED
667
+ .sp
668
+ If this bit is set, most white space characters in the pattern are totally
669
+ ignored except when escaped or inside a character class. However, white space
670
+ is not allowed within sequences such as (?> that introduce various
671
+ parenthesized subpatterns, nor within a numerical quantifier such as {1,3}.
672
+ However, ignorable white space is permitted between an item and a following
673
+ quantifier and between a quantifier and a following + that indicates
674
+ possessiveness.
675
+ .P
676
+ White space did not used to include the VT character (code 11), because Perl
677
+ did not treat this character as white space. However, Perl changed at release
678
+ 5.18, so PCRE followed at release 8.34, and VT is now treated as white space.
679
+ .P
680
+ PCRE_EXTENDED also causes characters between an unescaped # outside a character
681
+ class and the next newline, inclusive, to be ignored. PCRE_EXTENDED is
682
+ equivalent to Perl's /x option, and it can be changed within a pattern by a
683
+ (?x) option setting.
684
+ .P
685
+ Which characters are interpreted as newlines is controlled by the options
686
+ passed to \fBpcre_compile()\fP or by a special sequence at the start of the
687
+ pattern, as described in the section entitled
688
+ .\" HTML <a href="pcrepattern.html#newlines">
689
+ .\" </a>
690
+ "Newline conventions"
691
+ .\"
692
+ in the \fBpcrepattern\fP documentation. Note that the end of this type of
693
+ comment is a literal newline sequence in the pattern; escape sequences that
694
+ happen to represent a newline do not count.
695
+ .P
696
+ This option makes it possible to include comments inside complicated patterns.
697
+ Note, however, that this applies only to data characters. White space characters
698
+ may never appear within special character sequences in a pattern, for example
699
+ within the sequence (?( that introduces a conditional subpattern.
700
+ .sp
701
+ PCRE_EXTRA
702
+ .sp
703
+ This option was invented in order to turn on additional functionality of PCRE
704
+ that is incompatible with Perl, but it is currently of very little use. When
705
+ set, any backslash in a pattern that is followed by a letter that has no
706
+ special meaning causes an error, thus reserving these combinations for future
707
+ expansion. By default, as in Perl, a backslash followed by a letter with no
708
+ special meaning is treated as a literal. (Perl can, however, be persuaded to
709
+ give an error for this, by running it with the -w option.) There are at present
710
+ no other features controlled by this option. It can also be set by a (?X)
711
+ option setting within a pattern.
712
+ .sp
713
+ PCRE_FIRSTLINE
714
+ .sp
715
+ If this option is set, an unanchored pattern is required to match before or at
716
+ the first newline in the subject string, though the matched text may continue
717
+ over the newline.
718
+ .sp
719
+ PCRE_JAVASCRIPT_COMPAT
720
+ .sp
721
+ If this option is set, PCRE's behaviour is changed in some ways so that it is
722
+ compatible with JavaScript rather than Perl. The changes are as follows:
723
+ .P
724
+ (1) A lone closing square bracket in a pattern causes a compile-time error,
725
+ because this is illegal in JavaScript (by default it is treated as a data
726
+ character). Thus, the pattern AB]CD becomes illegal when this option is set.
727
+ .P
728
+ (2) At run time, a back reference to an unset subpattern group matches an empty
729
+ string (by default this causes the current matching alternative to fail). A
730
+ pattern such as (\e1)(a) succeeds when this option is set (assuming it can find
731
+ an "a" in the subject), whereas it fails by default, for Perl compatibility.
732
+ .P
733
+ (3) \eU matches an upper case "U" character; by default \eU causes a compile
734
+ time error (Perl uses \eU to upper case subsequent characters).
735
+ .P
736
+ (4) \eu matches a lower case "u" character unless it is followed by four
737
+ hexadecimal digits, in which case the hexadecimal number defines the code point
738
+ to match. By default, \eu causes a compile time error (Perl uses it to upper
739
+ case the following character).
740
+ .P
741
+ (5) \ex matches a lower case "x" character unless it is followed by two
742
+ hexadecimal digits, in which case the hexadecimal number defines the code point
743
+ to match. By default, as in Perl, a hexadecimal number is always expected after
744
+ \ex, but it may have zero, one, or two digits (so, for example, \exz matches a
745
+ binary zero character followed by z).
746
+ .sp
747
+ PCRE_MULTILINE
748
+ .sp
749
+ By default, for the purposes of matching "start of line" and "end of line",
750
+ PCRE treats the subject string as consisting of a single line of characters,
751
+ even if it actually contains newlines. The "start of line" metacharacter (^)
752
+ matches only at the start of the string, and the "end of line" metacharacter
753
+ ($) matches only at the end of the string, or before a terminating newline
754
+ (except when PCRE_DOLLAR_ENDONLY is set). Note, however, that unless
755
+ PCRE_DOTALL is set, the "any character" metacharacter (.) does not match at a
756
+ newline. This behaviour (for ^, $, and dot) is the same as Perl.
757
+ .P
758
+ When PCRE_MULTILINE it is set, the "start of line" and "end of line" constructs
759
+ match immediately following or immediately before internal newlines in the
760
+ subject string, respectively, as well as at the very start and end. This is
761
+ equivalent to Perl's /m option, and it can be changed within a pattern by a
762
+ (?m) option setting. If there are no newlines in a subject string, or no
763
+ occurrences of ^ or $ in a pattern, setting PCRE_MULTILINE has no effect.
764
+ .sp
765
+ PCRE_NEVER_UTF
766
+ .sp
767
+ This option locks out interpretation of the pattern as UTF-8 (or UTF-16 or
768
+ UTF-32 in the 16-bit and 32-bit libraries). In particular, it prevents the
769
+ creator of the pattern from switching to UTF interpretation by starting the
770
+ pattern with (*UTF). This may be useful in applications that process patterns
771
+ from external sources. The combination of PCRE_UTF8 and PCRE_NEVER_UTF also
772
+ causes an error.
773
+ .sp
774
+ PCRE_NEWLINE_CR
775
+ PCRE_NEWLINE_LF
776
+ PCRE_NEWLINE_CRLF
777
+ PCRE_NEWLINE_ANYCRLF
778
+ PCRE_NEWLINE_ANY
779
+ .sp
780
+ These options override the default newline definition that was chosen when PCRE
781
+ was built. Setting the first or the second specifies that a newline is
782
+ indicated by a single character (CR or LF, respectively). Setting
783
+ PCRE_NEWLINE_CRLF specifies that a newline is indicated by the two-character
784
+ CRLF sequence. Setting PCRE_NEWLINE_ANYCRLF specifies that any of the three
785
+ preceding sequences should be recognized. Setting PCRE_NEWLINE_ANY specifies
786
+ that any Unicode newline sequence should be recognized.
787
+ .P
788
+ In an ASCII/Unicode environment, the Unicode newline sequences are the three
789
+ just mentioned, plus the single characters VT (vertical tab, U+000B), FF (form
790
+ feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS
791
+ (paragraph separator, U+2029). For the 8-bit library, the last two are
792
+ recognized only in UTF-8 mode.
793
+ .P
794
+ When PCRE is compiled to run in an EBCDIC (mainframe) environment, the code for
795
+ CR is 0x0d, the same as ASCII. However, the character code for LF is normally
796
+ 0x15, though in some EBCDIC environments 0x25 is used. Whichever of these is
797
+ not LF is made to correspond to Unicode's NEL character. EBCDIC codes are all
798
+ less than 256. For more details, see the
799
+ .\" HREF
800
+ \fBpcrebuild\fP
801
+ .\"
802
+ documentation.
803
+ .P
804
+ The newline setting in the options word uses three bits that are treated
805
+ as a number, giving eight possibilities. Currently only six are used (default
806
+ plus the five values above). This means that if you set more than one newline
807
+ option, the combination may or may not be sensible. For example,
808
+ PCRE_NEWLINE_CR with PCRE_NEWLINE_LF is equivalent to PCRE_NEWLINE_CRLF, but
809
+ other combinations may yield unused numbers and cause an error.
810
+ .P
811
+ The only time that a line break in a pattern is specially recognized when
812
+ compiling is when PCRE_EXTENDED is set. CR and LF are white space characters,
813
+ and so are ignored in this mode. Also, an unescaped # outside a character class
814
+ indicates a comment that lasts until after the next line break sequence. In
815
+ other circumstances, line break sequences in patterns are treated as literal
816
+ data.
817
+ .P
818
+ The newline option that is set at compile time becomes the default that is used
819
+ for \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP, but it can be overridden.
820
+ .sp
821
+ PCRE_NO_AUTO_CAPTURE
822
+ .sp
823
+ If this option is set, it disables the use of numbered capturing parentheses in
824
+ the pattern. Any opening parenthesis that is not followed by ? behaves as if it
825
+ were followed by ?: but named parentheses can still be used for capturing (and
826
+ they acquire numbers in the usual way). There is no equivalent of this option
827
+ in Perl.
828
+ .sp
829
+ PCRE_NO_AUTO_POSSESS
830
+ .sp
831
+ If this option is set, it disables "auto-possessification". This is an
832
+ optimization that, for example, turns a+b into a++b in order to avoid
833
+ backtracks into a+ that can never be successful. However, if callouts are in
834
+ use, auto-possessification means that some of them are never taken. You can set
835
+ this option if you want the matching functions to do a full unoptimized search
836
+ and run all the callouts, but it is mainly provided for testing purposes.
837
+ .sp
838
+ PCRE_NO_START_OPTIMIZE
839
+ .sp
840
+ This is an option that acts at matching time; that is, it is really an option
841
+ for \fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP. If it is set at compile time,
842
+ it is remembered with the compiled pattern and assumed at matching time. This
843
+ is necessary if you want to use JIT execution, because the JIT compiler needs
844
+ to know whether or not this option is set. For details see the discussion of
845
+ PCRE_NO_START_OPTIMIZE
846
+ .\" HTML <a href="#execoptions">
847
+ .\" </a>
848
+ below.
849
+ .\"
850
+ .sp
851
+ PCRE_UCP
852
+ .sp
853
+ This option changes the way PCRE processes \eB, \eb, \eD, \ed, \eS, \es, \eW,
854
+ \ew, and some of the POSIX character classes. By default, only ASCII characters
855
+ are recognized, but if PCRE_UCP is set, Unicode properties are used instead to
856
+ classify characters. More details are given in the section on
857
+ .\" HTML <a href="pcre.html#genericchartypes">
858
+ .\" </a>
859
+ generic character types
860
+ .\"
861
+ in the
862
+ .\" HREF
863
+ \fBpcrepattern\fP
864
+ .\"
865
+ page. If you set PCRE_UCP, matching one of the items it affects takes much
866
+ longer. The option is available only if PCRE has been compiled with Unicode
867
+ property support.
868
+ .sp
869
+ PCRE_UNGREEDY
870
+ .sp
871
+ This option inverts the "greediness" of the quantifiers so that they are not
872
+ greedy by default, but become greedy if followed by "?". It is not compatible
873
+ with Perl. It can also be set by a (?U) option setting within the pattern.
874
+ .sp
875
+ PCRE_UTF8
876
+ .sp
877
+ This option causes PCRE to regard both the pattern and the subject as strings
878
+ of UTF-8 characters instead of single-byte strings. However, it is available
879
+ only when PCRE is built to include UTF support. If not, the use of this option
880
+ provokes an error. Details of how this option changes the behaviour of PCRE are
881
+ given in the
882
+ .\" HREF
883
+ \fBpcreunicode\fP
884
+ .\"
885
+ page.
886
+ .sp
887
+ PCRE_NO_UTF8_CHECK
888
+ .sp
889
+ When PCRE_UTF8 is set, the validity of the pattern as a UTF-8 string is
890
+ automatically checked. There is a discussion about the
891
+ .\" HTML <a href="pcreunicode.html#utf8strings">
892
+ .\" </a>
893
+ validity of UTF-8 strings
894
+ .\"
895
+ in the
896
+ .\" HREF
897
+ \fBpcreunicode\fP
898
+ .\"
899
+ page. If an invalid UTF-8 sequence is found, \fBpcre_compile()\fP returns an
900
+ error. If you already know that your pattern is valid, and you want to skip
901
+ this check for performance reasons, you can set the PCRE_NO_UTF8_CHECK option.
902
+ When it is set, the effect of passing an invalid UTF-8 string as a pattern is
903
+ undefined. It may cause your program to crash or loop. Note that this option
904
+ can also be passed to \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP, to suppress
905
+ the validity checking of subject strings only. If the same string is being
906
+ matched many times, the option can be safely set for the second and subsequent
907
+ matchings to improve performance.
908
+ .
909
+ .
910
+ .SH "COMPILATION ERROR CODES"
911
+ .rs
912
+ .sp
913
+ The following table lists the error codes than may be returned by
914
+ \fBpcre_compile2()\fP, along with the error messages that may be returned by
915
+ both compiling functions. Note that error messages are always 8-bit ASCII
916
+ strings, even in 16-bit or 32-bit mode. As PCRE has developed, some error codes
917
+ have fallen out of use. To avoid confusion, they have not been re-used.
918
+ .sp
919
+ 0 no error
920
+ 1 \e at end of pattern
921
+ 2 \ec at end of pattern
922
+ 3 unrecognized character follows \e
923
+ 4 numbers out of order in {} quantifier
924
+ 5 number too big in {} quantifier
925
+ 6 missing terminating ] for character class
926
+ 7 invalid escape sequence in character class
927
+ 8 range out of order in character class
928
+ 9 nothing to repeat
929
+ 10 [this code is not in use]
930
+ 11 internal error: unexpected repeat
931
+ 12 unrecognized character after (? or (?-
932
+ 13 POSIX named classes are supported only within a class
933
+ 14 missing )
934
+ 15 reference to non-existent subpattern
935
+ 16 erroffset passed as NULL
936
+ 17 unknown option bit(s) set
937
+ 18 missing ) after comment
938
+ 19 [this code is not in use]
939
+ 20 regular expression is too large
940
+ 21 failed to get memory
941
+ 22 unmatched parentheses
942
+ 23 internal error: code overflow
943
+ 24 unrecognized character after (?<
944
+ 25 lookbehind assertion is not fixed length
945
+ 26 malformed number or name after (?(
946
+ 27 conditional group contains more than two branches
947
+ 28 assertion expected after (?(
948
+ 29 (?R or (?[+-]digits must be followed by )
949
+ 30 unknown POSIX class name
950
+ 31 POSIX collating elements are not supported
951
+ 32 this version of PCRE is compiled without UTF support
952
+ 33 [this code is not in use]
953
+ 34 character value in \ex{} or \eo{} is too large
954
+ 35 invalid condition (?(0)
955
+ 36 \eC not allowed in lookbehind assertion
956
+ 37 PCRE does not support \eL, \el, \eN{name}, \eU, or \eu
957
+ 38 number after (?C is > 255
958
+ 39 closing ) for (?C expected
959
+ 40 recursive call could loop indefinitely
960
+ 41 unrecognized character after (?P
961
+ 42 syntax error in subpattern name (missing terminator)
962
+ 43 two named subpatterns have the same name
963
+ 44 invalid UTF-8 string (specifically UTF-8)
964
+ 45 support for \eP, \ep, and \eX has not been compiled
965
+ 46 malformed \eP or \ep sequence
966
+ 47 unknown property name after \eP or \ep
967
+ 48 subpattern name is too long (maximum 32 characters)
968
+ 49 too many named subpatterns (maximum 10000)
969
+ 50 [this code is not in use]
970
+ 51 octal value is greater than \e377 in 8-bit non-UTF-8 mode
971
+ 52 internal error: overran compiling workspace
972
+ 53 internal error: previously-checked referenced subpattern
973
+ not found
974
+ 54 DEFINE group contains more than one branch
975
+ 55 repeating a DEFINE group is not allowed
976
+ 56 inconsistent NEWLINE options
977
+ 57 \eg is not followed by a braced, angle-bracketed, or quoted
978
+ name/number or by a plain number
979
+ 58 a numbered reference must not be zero
980
+ 59 an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)
981
+ 60 (*VERB) not recognized or malformed
982
+ 61 number is too big
983
+ 62 subpattern name expected
984
+ 63 digit expected after (?+
985
+ 64 ] is an invalid data character in JavaScript compatibility mode
986
+ 65 different names for subpatterns of the same number are
987
+ not allowed
988
+ 66 (*MARK) must have an argument
989
+ 67 this version of PCRE is not compiled with Unicode property
990
+ support
991
+ 68 \ec must be followed by an ASCII character
992
+ 69 \ek is not followed by a braced, angle-bracketed, or quoted name
993
+ 70 internal error: unknown opcode in find_fixedlength()
994
+ 71 \eN is not supported in a class
995
+ 72 too many forward references
996
+ 73 disallowed Unicode code point (>= 0xd800 && <= 0xdfff)
997
+ 74 invalid UTF-16 string (specifically UTF-16)
998
+ 75 name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
999
+ 76 character value in \eu.... sequence is too large
1000
+ 77 invalid UTF-32 string (specifically UTF-32)
1001
+ 78 setting UTF is disabled by the application
1002
+ 79 non-hex character in \ex{} (closing brace missing?)
1003
+ 80 non-octal character in \eo{} (closing brace missing?)
1004
+ 81 missing opening brace after \eo
1005
+ 82 parentheses are too deeply nested
1006
+ 83 invalid range in character class
1007
+ 84 group name must start with a non-digit
1008
+ 85 parentheses are too deeply nested (stack check)
1009
+ .sp
1010
+ The numbers 32 and 10000 in errors 48 and 49 are defaults; different values may
1011
+ be used if the limits were changed when PCRE was built.
1012
+ .
1013
+ .
1014
+ .\" HTML <a name="studyingapattern"></a>
1015
+ .SH "STUDYING A PATTERN"
1016
+ .rs
1017
+ .sp
1018
+ .nf
1019
+ .B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP,
1020
+ .B " const char **\fIerrptr\fP);"
1021
+ .fi
1022
+ .PP
1023
+ If a compiled pattern is going to be used several times, it is worth spending
1024
+ more time analyzing it in order to speed up the time taken for matching. The
1025
+ function \fBpcre_study()\fP takes a pointer to a compiled pattern as its first
1026
+ argument. If studying the pattern produces additional information that will
1027
+ help speed up matching, \fBpcre_study()\fP returns a pointer to a
1028
+ \fBpcre_extra\fP block, in which the \fIstudy_data\fP field points to the
1029
+ results of the study.
1030
+ .P
1031
+ The returned value from \fBpcre_study()\fP can be passed directly to
1032
+ \fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP. However, a \fBpcre_extra\fP block
1033
+ also contains other fields that can be set by the caller before the block is
1034
+ passed; these are described
1035
+ .\" HTML <a href="#extradata">
1036
+ .\" </a>
1037
+ below
1038
+ .\"
1039
+ in the section on matching a pattern.
1040
+ .P
1041
+ If studying the pattern does not produce any useful information,
1042
+ \fBpcre_study()\fP returns NULL by default. In that circumstance, if the
1043
+ calling program wants to pass any of the other fields to \fBpcre_exec()\fP or
1044
+ \fBpcre_dfa_exec()\fP, it must set up its own \fBpcre_extra\fP block. However,
1045
+ if \fBpcre_study()\fP is called with the PCRE_STUDY_EXTRA_NEEDED option, it
1046
+ returns a \fBpcre_extra\fP block even if studying did not find any additional
1047
+ information. It may still return NULL, however, if an error occurs in
1048
+ \fBpcre_study()\fP.
1049
+ .P
1050
+ The second argument of \fBpcre_study()\fP contains option bits. There are three
1051
+ further options in addition to PCRE_STUDY_EXTRA_NEEDED:
1052
+ .sp
1053
+ PCRE_STUDY_JIT_COMPILE
1054
+ PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1055
+ PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
1056
+ .sp
1057
+ If any of these are set, and the just-in-time compiler is available, the
1058
+ pattern is further compiled into machine code that executes much faster than
1059
+ the \fBpcre_exec()\fP interpretive matching function. If the just-in-time
1060
+ compiler is not available, these options are ignored. All undefined bits in the
1061
+ \fIoptions\fP argument must be zero.
1062
+ .P
1063
+ JIT compilation is a heavyweight optimization. It can take some time for
1064
+ patterns to be analyzed, and for one-off matches and simple patterns the
1065
+ benefit of faster execution might be offset by a much slower study time.
1066
+ Not all patterns can be optimized by the JIT compiler. For those that cannot be
1067
+ handled, matching automatically falls back to the \fBpcre_exec()\fP
1068
+ interpreter. For more details, see the
1069
+ .\" HREF
1070
+ \fBpcrejit\fP
1071
+ .\"
1072
+ documentation.
1073
+ .P
1074
+ The third argument for \fBpcre_study()\fP is a pointer for an error message. If
1075
+ studying succeeds (even if no data is returned), the variable it points to is
1076
+ set to NULL. Otherwise it is set to point to a textual error message. This is a
1077
+ static string that is part of the library. You must not try to free it. You
1078
+ should test the error pointer for NULL after calling \fBpcre_study()\fP, to be
1079
+ sure that it has run successfully.
1080
+ .P
1081
+ When you are finished with a pattern, you can free the memory used for the
1082
+ study data by calling \fBpcre_free_study()\fP. This function was added to the
1083
+ API for release 8.20. For earlier versions, the memory could be freed with
1084
+ \fBpcre_free()\fP, just like the pattern itself. This will still work in cases
1085
+ where JIT optimization is not used, but it is advisable to change to the new
1086
+ function when convenient.
1087
+ .P
1088
+ This is a typical way in which \fBpcre_study\fP() is used (except that in a
1089
+ real application there should be tests for errors):
1090
+ .sp
1091
+ int rc;
1092
+ pcre *re;
1093
+ pcre_extra *sd;
1094
+ re = pcre_compile("pattern", 0, &error, &erroroffset, NULL);
1095
+ sd = pcre_study(
1096
+ re, /* result of pcre_compile() */
1097
+ 0, /* no options */
1098
+ &error); /* set to NULL or points to a message */
1099
+ rc = pcre_exec( /* see below for details of pcre_exec() options */
1100
+ re, sd, "subject", 7, 0, 0, ovector, 30);
1101
+ ...
1102
+ pcre_free_study(sd);
1103
+ pcre_free(re);
1104
+ .sp
1105
+ Studying a pattern does two things: first, a lower bound for the length of
1106
+ subject string that is needed to match the pattern is computed. This does not
1107
+ mean that there are any strings of that length that match, but it does
1108
+ guarantee that no shorter strings match. The value is used to avoid wasting
1109
+ time by trying to match strings that are shorter than the lower bound. You can
1110
+ find out the value in a calling program via the \fBpcre_fullinfo()\fP function.
1111
+ .P
1112
+ Studying a pattern is also useful for non-anchored patterns that do not have a
1113
+ single fixed starting character. A bitmap of possible starting bytes is
1114
+ created. This speeds up finding a position in the subject at which to start
1115
+ matching. (In 16-bit mode, the bitmap is used for 16-bit values less than 256.
1116
+ In 32-bit mode, the bitmap is used for 32-bit values less than 256.)
1117
+ .P
1118
+ These two optimizations apply to both \fBpcre_exec()\fP and
1119
+ \fBpcre_dfa_exec()\fP, and the information is also used by the JIT compiler.
1120
+ The optimizations can be disabled by setting the PCRE_NO_START_OPTIMIZE option.
1121
+ You might want to do this if your pattern contains callouts or (*MARK) and you
1122
+ want to make use of these facilities in cases where matching fails.
1123
+ .P
1124
+ PCRE_NO_START_OPTIMIZE can be specified at either compile time or execution
1125
+ time. However, if PCRE_NO_START_OPTIMIZE is passed to \fBpcre_exec()\fP, (that
1126
+ is, after any JIT compilation has happened) JIT execution is disabled. For JIT
1127
+ execution to work with PCRE_NO_START_OPTIMIZE, the option must be set at
1128
+ compile time.
1129
+ .P
1130
+ There is a longer discussion of PCRE_NO_START_OPTIMIZE
1131
+ .\" HTML <a href="#execoptions">
1132
+ .\" </a>
1133
+ below.
1134
+ .\"
1135
+ .
1136
+ .
1137
+ .\" HTML <a name="localesupport"></a>
1138
+ .SH "LOCALE SUPPORT"
1139
+ .rs
1140
+ .sp
1141
+ PCRE handles caseless matching, and determines whether characters are letters,
1142
+ digits, or whatever, by reference to a set of tables, indexed by character
1143
+ code point. When running in UTF-8 mode, or in the 16- or 32-bit libraries, this
1144
+ applies only to characters with code points less than 256. By default,
1145
+ higher-valued code points never match escapes such as \ew or \ed. However, if
1146
+ PCRE is built with Unicode property support, all characters can be tested with
1147
+ \ep and \eP, or, alternatively, the PCRE_UCP option can be set when a pattern
1148
+ is compiled; this causes \ew and friends to use Unicode property support
1149
+ instead of the built-in tables.
1150
+ .P
1151
+ The use of locales with Unicode is discouraged. If you are handling characters
1152
+ with code points greater than 128, you should either use Unicode support, or
1153
+ use locales, but not try to mix the two.
1154
+ .P
1155
+ PCRE contains an internal set of tables that are used when the final argument
1156
+ of \fBpcre_compile()\fP is NULL. These are sufficient for many applications.
1157
+ Normally, the internal tables recognize only ASCII characters. However, when
1158
+ PCRE is built, it is possible to cause the internal tables to be rebuilt in the
1159
+ default "C" locale of the local system, which may cause them to be different.
1160
+ .P
1161
+ The internal tables can always be overridden by tables supplied by the
1162
+ application that calls PCRE. These may be created in a different locale from
1163
+ the default. As more and more applications change to using Unicode, the need
1164
+ for this locale support is expected to die away.
1165
+ .P
1166
+ External tables are built by calling the \fBpcre_maketables()\fP function,
1167
+ which has no arguments, in the relevant locale. The result can then be passed
1168
+ to \fBpcre_compile()\fP as often as necessary. For example, to build and use
1169
+ tables that are appropriate for the French locale (where accented characters
1170
+ with values greater than 128 are treated as letters), the following code could
1171
+ be used:
1172
+ .sp
1173
+ setlocale(LC_CTYPE, "fr_FR");
1174
+ tables = pcre_maketables();
1175
+ re = pcre_compile(..., tables);
1176
+ .sp
1177
+ The locale name "fr_FR" is used on Linux and other Unix-like systems; if you
1178
+ are using Windows, the name for the French locale is "french".
1179
+ .P
1180
+ When \fBpcre_maketables()\fP runs, the tables are built in memory that is
1181
+ obtained via \fBpcre_malloc\fP. It is the caller's responsibility to ensure
1182
+ that the memory containing the tables remains available for as long as it is
1183
+ needed.
1184
+ .P
1185
+ The pointer that is passed to \fBpcre_compile()\fP is saved with the compiled
1186
+ pattern, and the same tables are used via this pointer by \fBpcre_study()\fP
1187
+ and also by \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP. Thus, for any single
1188
+ pattern, compilation, studying and matching all happen in the same locale, but
1189
+ different patterns can be processed in different locales.
1190
+ .P
1191
+ It is possible to pass a table pointer or NULL (indicating the use of the
1192
+ internal tables) to \fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP (see the
1193
+ discussion below in the section on matching a pattern). This facility is
1194
+ provided for use with pre-compiled patterns that have been saved and reloaded.
1195
+ Character tables are not saved with patterns, so if a non-standard table was
1196
+ used at compile time, it must be provided again when the reloaded pattern is
1197
+ matched. Attempting to use this facility to match a pattern in a different
1198
+ locale from the one in which it was compiled is likely to lead to anomalous
1199
+ (usually incorrect) results.
1200
+ .
1201
+ .
1202
+ .\" HTML <a name="infoaboutpattern"></a>
1203
+ .SH "INFORMATION ABOUT A PATTERN"
1204
+ .rs
1205
+ .sp
1206
+ .nf
1207
+ .B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
1208
+ .B " int \fIwhat\fP, void *\fIwhere\fP);"
1209
+ .fi
1210
+ .PP
1211
+ The \fBpcre_fullinfo()\fP function returns information about a compiled
1212
+ pattern. It replaces the \fBpcre_info()\fP function, which was removed from the
1213
+ library at version 8.30, after more than 10 years of obsolescence.
1214
+ .P
1215
+ The first argument for \fBpcre_fullinfo()\fP is a pointer to the compiled
1216
+ pattern. The second argument is the result of \fBpcre_study()\fP, or NULL if
1217
+ the pattern was not studied. The third argument specifies which piece of
1218
+ information is required, and the fourth argument is a pointer to a variable
1219
+ to receive the data. The yield of the function is zero for success, or one of
1220
+ the following negative numbers:
1221
+ .sp
1222
+ PCRE_ERROR_NULL the argument \fIcode\fP was NULL
1223
+ the argument \fIwhere\fP was NULL
1224
+ PCRE_ERROR_BADMAGIC the "magic number" was not found
1225
+ PCRE_ERROR_BADENDIANNESS the pattern was compiled with different
1226
+ endianness
1227
+ PCRE_ERROR_BADOPTION the value of \fIwhat\fP was invalid
1228
+ PCRE_ERROR_UNSET the requested field is not set
1229
+ .sp
1230
+ The "magic number" is placed at the start of each compiled pattern as an simple
1231
+ check against passing an arbitrary memory pointer. The endianness error can
1232
+ occur if a compiled pattern is saved and reloaded on a different host. Here is
1233
+ a typical call of \fBpcre_fullinfo()\fP, to obtain the length of the compiled
1234
+ pattern:
1235
+ .sp
1236
+ int rc;
1237
+ size_t length;
1238
+ rc = pcre_fullinfo(
1239
+ re, /* result of pcre_compile() */
1240
+ sd, /* result of pcre_study(), or NULL */
1241
+ PCRE_INFO_SIZE, /* what is required */
1242
+ &length); /* where to put the data */
1243
+ .sp
1244
+ The possible values for the third argument are defined in \fBpcre.h\fP, and are
1245
+ as follows:
1246
+ .sp
1247
+ PCRE_INFO_BACKREFMAX
1248
+ .sp
1249
+ Return the number of the highest back reference in the pattern. The fourth
1250
+ argument should point to an \fBint\fP variable. Zero is returned if there are
1251
+ no back references.
1252
+ .sp
1253
+ PCRE_INFO_CAPTURECOUNT
1254
+ .sp
1255
+ Return the number of capturing subpatterns in the pattern. The fourth argument
1256
+ should point to an \fBint\fP variable.
1257
+ .sp
1258
+ PCRE_INFO_DEFAULT_TABLES
1259
+ .sp
1260
+ Return a pointer to the internal default character tables within PCRE. The
1261
+ fourth argument should point to an \fBunsigned char *\fP variable. This
1262
+ information call is provided for internal use by the \fBpcre_study()\fP
1263
+ function. External callers can cause PCRE to use its internal tables by passing
1264
+ a NULL table pointer.
1265
+ .sp
1266
+ PCRE_INFO_FIRSTBYTE (deprecated)
1267
+ .sp
1268
+ Return information about the first data unit of any matched string, for a
1269
+ non-anchored pattern. The name of this option refers to the 8-bit library,
1270
+ where data units are bytes. The fourth argument should point to an \fBint\fP
1271
+ variable. Negative values are used for special cases. However, this means that
1272
+ when the 32-bit library is in non-UTF-32 mode, the full 32-bit range of
1273
+ characters cannot be returned. For this reason, this value is deprecated; use
1274
+ PCRE_INFO_FIRSTCHARACTERFLAGS and PCRE_INFO_FIRSTCHARACTER instead.
1275
+ .P
1276
+ If there is a fixed first value, for example, the letter "c" from a pattern
1277
+ such as (cat|cow|coyote), its value is returned. In the 8-bit library, the
1278
+ value is always less than 256. In the 16-bit library the value can be up to
1279
+ 0xffff. In the 32-bit library the value can be up to 0x10ffff.
1280
+ .P
1281
+ If there is no fixed first value, and if either
1282
+ .sp
1283
+ (a) the pattern was compiled with the PCRE_MULTILINE option, and every branch
1284
+ starts with "^", or
1285
+ .sp
1286
+ (b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not set
1287
+ (if it were set, the pattern would be anchored),
1288
+ .sp
1289
+ -1 is returned, indicating that the pattern matches only at the start of a
1290
+ subject string or after any newline within the string. Otherwise -2 is
1291
+ returned. For anchored patterns, -2 is returned.
1292
+ .sp
1293
+ PCRE_INFO_FIRSTCHARACTER
1294
+ .sp
1295
+ Return the value of the first data unit (non-UTF character) of any matched
1296
+ string in the situation where PCRE_INFO_FIRSTCHARACTERFLAGS returns 1;
1297
+ otherwise return 0. The fourth argument should point to an \fBuint_t\fP
1298
+ variable.
1299
+ .P
1300
+ In the 8-bit library, the value is always less than 256. In the 16-bit library
1301
+ the value can be up to 0xffff. In the 32-bit library in UTF-32 mode the value
1302
+ can be up to 0x10ffff, and up to 0xffffffff when not using UTF-32 mode.
1303
+ .sp
1304
+ PCRE_INFO_FIRSTCHARACTERFLAGS
1305
+ .sp
1306
+ Return information about the first data unit of any matched string, for a
1307
+ non-anchored pattern. The fourth argument should point to an \fBint\fP
1308
+ variable.
1309
+ .P
1310
+ If there is a fixed first value, for example, the letter "c" from a pattern
1311
+ such as (cat|cow|coyote), 1 is returned, and the character value can be
1312
+ retrieved using PCRE_INFO_FIRSTCHARACTER. If there is no fixed first value, and
1313
+ if either
1314
+ .sp
1315
+ (a) the pattern was compiled with the PCRE_MULTILINE option, and every branch
1316
+ starts with "^", or
1317
+ .sp
1318
+ (b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not set
1319
+ (if it were set, the pattern would be anchored),
1320
+ .sp
1321
+ 2 is returned, indicating that the pattern matches only at the start of a
1322
+ subject string or after any newline within the string. Otherwise 0 is
1323
+ returned. For anchored patterns, 0 is returned.
1324
+ .sp
1325
+ PCRE_INFO_FIRSTTABLE
1326
+ .sp
1327
+ If the pattern was studied, and this resulted in the construction of a 256-bit
1328
+ table indicating a fixed set of values for the first data unit in any matching
1329
+ string, a pointer to the table is returned. Otherwise NULL is returned. The
1330
+ fourth argument should point to an \fBunsigned char *\fP variable.
1331
+ .sp
1332
+ PCRE_INFO_HASCRORLF
1333
+ .sp
1334
+ Return 1 if the pattern contains any explicit matches for CR or LF characters,
1335
+ otherwise 0. The fourth argument should point to an \fBint\fP variable. An
1336
+ explicit match is either a literal CR or LF character, or \er or \en.
1337
+ .sp
1338
+ PCRE_INFO_JCHANGED
1339
+ .sp
1340
+ Return 1 if the (?J) or (?-J) option setting is used in the pattern, otherwise
1341
+ 0. The fourth argument should point to an \fBint\fP variable. (?J) and
1342
+ (?-J) set and unset the local PCRE_DUPNAMES option, respectively.
1343
+ .sp
1344
+ PCRE_INFO_JIT
1345
+ .sp
1346
+ Return 1 if the pattern was studied with one of the JIT options, and
1347
+ just-in-time compiling was successful. The fourth argument should point to an
1348
+ \fBint\fP variable. A return value of 0 means that JIT support is not available
1349
+ in this version of PCRE, or that the pattern was not studied with a JIT option,
1350
+ or that the JIT compiler could not handle this particular pattern. See the
1351
+ .\" HREF
1352
+ \fBpcrejit\fP
1353
+ .\"
1354
+ documentation for details of what can and cannot be handled.
1355
+ .sp
1356
+ PCRE_INFO_JITSIZE
1357
+ .sp
1358
+ If the pattern was successfully studied with a JIT option, return the size of
1359
+ the JIT compiled code, otherwise return zero. The fourth argument should point
1360
+ to a \fBsize_t\fP variable.
1361
+ .sp
1362
+ PCRE_INFO_LASTLITERAL
1363
+ .sp
1364
+ Return the value of the rightmost literal data unit that must exist in any
1365
+ matched string, other than at its start, if such a value has been recorded. The
1366
+ fourth argument should point to an \fBint\fP variable. If there is no such
1367
+ value, -1 is returned. For anchored patterns, a last literal value is recorded
1368
+ only if it follows something of variable length. For example, for the pattern
1369
+ /^a\ed+z\ed+/ the returned value is "z", but for /^a\edz\ed/ the returned value
1370
+ is -1.
1371
+ .P
1372
+ Since for the 32-bit library using the non-UTF-32 mode, this function is unable
1373
+ to return the full 32-bit range of characters, this value is deprecated;
1374
+ instead the PCRE_INFO_REQUIREDCHARFLAGS and PCRE_INFO_REQUIREDCHAR values should
1375
+ be used.
1376
+ .sp
1377
+ PCRE_INFO_MATCH_EMPTY
1378
+ .sp
1379
+ Return 1 if the pattern can match an empty string, otherwise 0. The fourth
1380
+ argument should point to an \fBint\fP variable.
1381
+ .sp
1382
+ PCRE_INFO_MATCHLIMIT
1383
+ .sp
1384
+ If the pattern set a match limit by including an item of the form
1385
+ (*LIMIT_MATCH=nnnn) at the start, the value is returned. The fourth argument
1386
+ should point to an unsigned 32-bit integer. If no such value has been set, the
1387
+ call to \fBpcre_fullinfo()\fP returns the error PCRE_ERROR_UNSET.
1388
+ .sp
1389
+ PCRE_INFO_MAXLOOKBEHIND
1390
+ .sp
1391
+ Return the number of characters (NB not data units) in the longest lookbehind
1392
+ assertion in the pattern. This information is useful when doing multi-segment
1393
+ matching using the partial matching facilities. Note that the simple assertions
1394
+ \eb and \eB require a one-character lookbehind. \eA also registers a
1395
+ one-character lookbehind, though it does not actually inspect the previous
1396
+ character. This is to ensure that at least one character from the old segment
1397
+ is retained when a new segment is processed. Otherwise, if there are no
1398
+ lookbehinds in the pattern, \eA might match incorrectly at the start of a new
1399
+ segment.
1400
+ .sp
1401
+ PCRE_INFO_MINLENGTH
1402
+ .sp
1403
+ If the pattern was studied and a minimum length for matching subject strings
1404
+ was computed, its value is returned. Otherwise the returned value is -1. The
1405
+ value is a number of characters, which in UTF mode may be different from the
1406
+ number of data units. The fourth argument should point to an \fBint\fP
1407
+ variable. A non-negative value is a lower bound to the length of any matching
1408
+ string. There may not be any strings of that length that do actually match, but
1409
+ every string that does match is at least that long.
1410
+ .sp
1411
+ PCRE_INFO_NAMECOUNT
1412
+ PCRE_INFO_NAMEENTRYSIZE
1413
+ PCRE_INFO_NAMETABLE
1414
+ .sp
1415
+ PCRE supports the use of named as well as numbered capturing parentheses. The
1416
+ names are just an additional way of identifying the parentheses, which still
1417
+ acquire numbers. Several convenience functions such as
1418
+ \fBpcre_get_named_substring()\fP are provided for extracting captured
1419
+ substrings by name. It is also possible to extract the data directly, by first
1420
+ converting the name to a number in order to access the correct pointers in the
1421
+ output vector (described with \fBpcre_exec()\fP below). To do the conversion,
1422
+ you need to use the name-to-number map, which is described by these three
1423
+ values.
1424
+ .P
1425
+ The map consists of a number of fixed-size entries. PCRE_INFO_NAMECOUNT gives
1426
+ the number of entries, and PCRE_INFO_NAMEENTRYSIZE gives the size of each
1427
+ entry; both of these return an \fBint\fP value. The entry size depends on the
1428
+ length of the longest name. PCRE_INFO_NAMETABLE returns a pointer to the first
1429
+ entry of the table. This is a pointer to \fBchar\fP in the 8-bit library, where
1430
+ the first two bytes of each entry are the number of the capturing parenthesis,
1431
+ most significant byte first. In the 16-bit library, the pointer points to
1432
+ 16-bit data units, the first of which contains the parenthesis number. In the
1433
+ 32-bit library, the pointer points to 32-bit data units, the first of which
1434
+ contains the parenthesis number. The rest of the entry is the corresponding
1435
+ name, zero terminated.
1436
+ .P
1437
+ The names are in alphabetical order. If (?| is used to create multiple groups
1438
+ with the same number, as described in the
1439
+ .\" HTML <a href="pcrepattern.html#dupsubpatternnumber">
1440
+ .\" </a>
1441
+ section on duplicate subpattern numbers
1442
+ .\"
1443
+ in the
1444
+ .\" HREF
1445
+ \fBpcrepattern\fP
1446
+ .\"
1447
+ page, the groups may be given the same name, but there is only one entry in the
1448
+ table. Different names for groups of the same number are not permitted.
1449
+ Duplicate names for subpatterns with different numbers are permitted,
1450
+ but only if PCRE_DUPNAMES is set. They appear in the table in the order in
1451
+ which they were found in the pattern. In the absence of (?| this is the order
1452
+ of increasing number; when (?| is used this is not necessarily the case because
1453
+ later subpatterns may have lower numbers.
1454
+ .P
1455
+ As a simple example of the name/number table, consider the following pattern
1456
+ after compilation by the 8-bit library (assume PCRE_EXTENDED is set, so white
1457
+ space - including newlines - is ignored):
1458
+ .sp
1459
+ .\" JOIN
1460
+ (?<date> (?<year>(\ed\ed)?\ed\ed) -
1461
+ (?<month>\ed\ed) - (?<day>\ed\ed) )
1462
+ .sp
1463
+ There are four named subpatterns, so the table has four entries, and each entry
1464
+ in the table is eight bytes long. The table is as follows, with non-printing
1465
+ bytes shows in hexadecimal, and undefined bytes shown as ??:
1466
+ .sp
1467
+ 00 01 d a t e 00 ??
1468
+ 00 05 d a y 00 ?? ??
1469
+ 00 04 m o n t h 00
1470
+ 00 02 y e a r 00 ??
1471
+ .sp
1472
+ When writing code to extract data from named subpatterns using the
1473
+ name-to-number map, remember that the length of the entries is likely to be
1474
+ different for each compiled pattern.
1475
+ .sp
1476
+ PCRE_INFO_OKPARTIAL
1477
+ .sp
1478
+ Return 1 if the pattern can be used for partial matching with
1479
+ \fBpcre_exec()\fP, otherwise 0. The fourth argument should point to an
1480
+ \fBint\fP variable. From release 8.00, this always returns 1, because the
1481
+ restrictions that previously applied to partial matching have been lifted. The
1482
+ .\" HREF
1483
+ \fBpcrepartial\fP
1484
+ .\"
1485
+ documentation gives details of partial matching.
1486
+ .sp
1487
+ PCRE_INFO_OPTIONS
1488
+ .sp
1489
+ Return a copy of the options with which the pattern was compiled. The fourth
1490
+ argument should point to an \fBunsigned long int\fP variable. These option bits
1491
+ are those specified in the call to \fBpcre_compile()\fP, modified by any
1492
+ top-level option settings at the start of the pattern itself. In other words,
1493
+ they are the options that will be in force when matching starts. For example,
1494
+ if the pattern /(?im)abc(?-i)d/ is compiled with the PCRE_EXTENDED option, the
1495
+ result is PCRE_CASELESS, PCRE_MULTILINE, and PCRE_EXTENDED.
1496
+ .P
1497
+ A pattern is automatically anchored by PCRE if all of its top-level
1498
+ alternatives begin with one of the following:
1499
+ .sp
1500
+ ^ unless PCRE_MULTILINE is set
1501
+ \eA always
1502
+ \eG always
1503
+ .\" JOIN
1504
+ .* if PCRE_DOTALL is set and there are no back
1505
+ references to the subpattern in which .* appears
1506
+ .sp
1507
+ For such patterns, the PCRE_ANCHORED bit is set in the options returned by
1508
+ \fBpcre_fullinfo()\fP.
1509
+ .sp
1510
+ PCRE_INFO_RECURSIONLIMIT
1511
+ .sp
1512
+ If the pattern set a recursion limit by including an item of the form
1513
+ (*LIMIT_RECURSION=nnnn) at the start, the value is returned. The fourth
1514
+ argument should point to an unsigned 32-bit integer. If no such value has been
1515
+ set, the call to \fBpcre_fullinfo()\fP returns the error PCRE_ERROR_UNSET.
1516
+ .sp
1517
+ PCRE_INFO_SIZE
1518
+ .sp
1519
+ Return the size of the compiled pattern in bytes (for all three libraries). The
1520
+ fourth argument should point to a \fBsize_t\fP variable. This value does not
1521
+ include the size of the \fBpcre\fP structure that is returned by
1522
+ \fBpcre_compile()\fP. The value that is passed as the argument to
1523
+ \fBpcre_malloc()\fP when \fBpcre_compile()\fP is getting memory in which to
1524
+ place the compiled data is the value returned by this option plus the size of
1525
+ the \fBpcre\fP structure. Studying a compiled pattern, with or without JIT,
1526
+ does not alter the value returned by this option.
1527
+ .sp
1528
+ PCRE_INFO_STUDYSIZE
1529
+ .sp
1530
+ Return the size in bytes (for all three libraries) of the data block pointed to
1531
+ by the \fIstudy_data\fP field in a \fBpcre_extra\fP block. If \fBpcre_extra\fP
1532
+ is NULL, or there is no study data, zero is returned. The fourth argument
1533
+ should point to a \fBsize_t\fP variable. The \fIstudy_data\fP field is set by
1534
+ \fBpcre_study()\fP to record information that will speed up matching (see the
1535
+ section entitled
1536
+ .\" HTML <a href="#studyingapattern">
1537
+ .\" </a>
1538
+ "Studying a pattern"
1539
+ .\"
1540
+ above). The format of the \fIstudy_data\fP block is private, but its length
1541
+ is made available via this option so that it can be saved and restored (see the
1542
+ .\" HREF
1543
+ \fBpcreprecompile\fP
1544
+ .\"
1545
+ documentation for details).
1546
+ .sp
1547
+ PCRE_INFO_REQUIREDCHARFLAGS
1548
+ .sp
1549
+ Returns 1 if there is a rightmost literal data unit that must exist in any
1550
+ matched string, other than at its start. The fourth argument should point to
1551
+ an \fBint\fP variable. If there is no such value, 0 is returned. If returning
1552
+ 1, the character value itself can be retrieved using PCRE_INFO_REQUIREDCHAR.
1553
+ .P
1554
+ For anchored patterns, a last literal value is recorded only if it follows
1555
+ something of variable length. For example, for the pattern /^a\ed+z\ed+/ the
1556
+ returned value 1 (with "z" returned from PCRE_INFO_REQUIREDCHAR), but for
1557
+ /^a\edz\ed/ the returned value is 0.
1558
+ .sp
1559
+ PCRE_INFO_REQUIREDCHAR
1560
+ .sp
1561
+ Return the value of the rightmost literal data unit that must exist in any
1562
+ matched string, other than at its start, if such a value has been recorded. The
1563
+ fourth argument should point to an \fBuint32_t\fP variable. If there is no such
1564
+ value, 0 is returned.
1565
+ .
1566
+ .
1567
+ .SH "REFERENCE COUNTS"
1568
+ .rs
1569
+ .sp
1570
+ .B int pcre_refcount(pcre *\fIcode\fP, int \fIadjust\fP);
1571
+ .PP
1572
+ The \fBpcre_refcount()\fP function is used to maintain a reference count in the
1573
+ data block that contains a compiled pattern. It is provided for the benefit of
1574
+ applications that operate in an object-oriented manner, where different parts
1575
+ of the application may be using the same compiled pattern, but you want to free
1576
+ the block when they are all done.
1577
+ .P
1578
+ When a pattern is compiled, the reference count field is initialized to zero.
1579
+ It is changed only by calling this function, whose action is to add the
1580
+ \fIadjust\fP value (which may be positive or negative) to it. The yield of the
1581
+ function is the new value. However, the value of the count is constrained to
1582
+ lie between 0 and 65535, inclusive. If the new value is outside these limits,
1583
+ it is forced to the appropriate limit value.
1584
+ .P
1585
+ Except when it is zero, the reference count is not correctly preserved if a
1586
+ pattern is compiled on one host and then transferred to a host whose byte-order
1587
+ is different. (This seems a highly unlikely scenario.)
1588
+ .
1589
+ .
1590
+ .SH "MATCHING A PATTERN: THE TRADITIONAL FUNCTION"
1591
+ .rs
1592
+ .sp
1593
+ .nf
1594
+ .B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
1595
+ .B " const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
1596
+ .B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
1597
+ .fi
1598
+ .P
1599
+ The function \fBpcre_exec()\fP is called to match a subject string against a
1600
+ compiled pattern, which is passed in the \fIcode\fP argument. If the
1601
+ pattern was studied, the result of the study should be passed in the
1602
+ \fIextra\fP argument. You can call \fBpcre_exec()\fP with the same \fIcode\fP
1603
+ and \fIextra\fP arguments as many times as you like, in order to match
1604
+ different subject strings with the same pattern.
1605
+ .P
1606
+ This function is the main matching facility of the library, and it operates in
1607
+ a Perl-like manner. For specialist use there is also an alternative matching
1608
+ function, which is described
1609
+ .\" HTML <a href="#dfamatch">
1610
+ .\" </a>
1611
+ below
1612
+ .\"
1613
+ in the section about the \fBpcre_dfa_exec()\fP function.
1614
+ .P
1615
+ In most applications, the pattern will have been compiled (and optionally
1616
+ studied) in the same process that calls \fBpcre_exec()\fP. However, it is
1617
+ possible to save compiled patterns and study data, and then use them later
1618
+ in different processes, possibly even on different hosts. For a discussion
1619
+ about this, see the
1620
+ .\" HREF
1621
+ \fBpcreprecompile\fP
1622
+ .\"
1623
+ documentation.
1624
+ .P
1625
+ Here is an example of a simple call to \fBpcre_exec()\fP:
1626
+ .sp
1627
+ int rc;
1628
+ int ovector[30];
1629
+ rc = pcre_exec(
1630
+ re, /* result of pcre_compile() */
1631
+ NULL, /* we didn't study the pattern */
1632
+ "some string", /* the subject string */
1633
+ 11, /* the length of the subject string */
1634
+ 0, /* start at offset 0 in the subject */
1635
+ 0, /* default options */
1636
+ ovector, /* vector of integers for substring information */
1637
+ 30); /* number of elements (NOT size in bytes) */
1638
+ .
1639
+ .
1640
+ .\" HTML <a name="extradata"></a>
1641
+ .SS "Extra data for \fBpcre_exec()\fR"
1642
+ .rs
1643
+ .sp
1644
+ If the \fIextra\fP argument is not NULL, it must point to a \fBpcre_extra\fP
1645
+ data block. The \fBpcre_study()\fP function returns such a block (when it
1646
+ doesn't return NULL), but you can also create one for yourself, and pass
1647
+ additional information in it. The \fBpcre_extra\fP block contains the following
1648
+ fields (not necessarily in this order):
1649
+ .sp
1650
+ unsigned long int \fIflags\fP;
1651
+ void *\fIstudy_data\fP;
1652
+ void *\fIexecutable_jit\fP;
1653
+ unsigned long int \fImatch_limit\fP;
1654
+ unsigned long int \fImatch_limit_recursion\fP;
1655
+ void *\fIcallout_data\fP;
1656
+ const unsigned char *\fItables\fP;
1657
+ unsigned char **\fImark\fP;
1658
+ .sp
1659
+ In the 16-bit version of this structure, the \fImark\fP field has type
1660
+ "PCRE_UCHAR16 **".
1661
+ .sp
1662
+ In the 32-bit version of this structure, the \fImark\fP field has type
1663
+ "PCRE_UCHAR32 **".
1664
+ .P
1665
+ The \fIflags\fP field is used to specify which of the other fields are set. The
1666
+ flag bits are:
1667
+ .sp
1668
+ PCRE_EXTRA_CALLOUT_DATA
1669
+ PCRE_EXTRA_EXECUTABLE_JIT
1670
+ PCRE_EXTRA_MARK
1671
+ PCRE_EXTRA_MATCH_LIMIT
1672
+ PCRE_EXTRA_MATCH_LIMIT_RECURSION
1673
+ PCRE_EXTRA_STUDY_DATA
1674
+ PCRE_EXTRA_TABLES
1675
+ .sp
1676
+ Other flag bits should be set to zero. The \fIstudy_data\fP field and sometimes
1677
+ the \fIexecutable_jit\fP field are set in the \fBpcre_extra\fP block that is
1678
+ returned by \fBpcre_study()\fP, together with the appropriate flag bits. You
1679
+ should not set these yourself, but you may add to the block by setting other
1680
+ fields and their corresponding flag bits.
1681
+ .P
1682
+ The \fImatch_limit\fP field provides a means of preventing PCRE from using up a
1683
+ vast amount of resources when running patterns that are not going to match,
1684
+ but which have a very large number of possibilities in their search trees. The
1685
+ classic example is a pattern that uses nested unlimited repeats.
1686
+ .P
1687
+ Internally, \fBpcre_exec()\fP uses a function called \fBmatch()\fP, which it
1688
+ calls repeatedly (sometimes recursively). The limit set by \fImatch_limit\fP is
1689
+ imposed on the number of times this function is called during a match, which
1690
+ has the effect of limiting the amount of backtracking that can take place. For
1691
+ patterns that are not anchored, the count restarts from zero for each position
1692
+ in the subject string.
1693
+ .P
1694
+ When \fBpcre_exec()\fP is called with a pattern that was successfully studied
1695
+ with a JIT option, the way that the matching is executed is entirely different.
1696
+ However, there is still the possibility of runaway matching that goes on for a
1697
+ very long time, and so the \fImatch_limit\fP value is also used in this case
1698
+ (but in a different way) to limit how long the matching can continue.
1699
+ .P
1700
+ The default value for the limit can be set when PCRE is built; the default
1701
+ default is 10 million, which handles all but the most extreme cases. You can
1702
+ override the default by suppling \fBpcre_exec()\fP with a \fBpcre_extra\fP
1703
+ block in which \fImatch_limit\fP is set, and PCRE_EXTRA_MATCH_LIMIT is set in
1704
+ the \fIflags\fP field. If the limit is exceeded, \fBpcre_exec()\fP returns
1705
+ PCRE_ERROR_MATCHLIMIT.
1706
+ .P
1707
+ A value for the match limit may also be supplied by an item at the start of a
1708
+ pattern of the form
1709
+ .sp
1710
+ (*LIMIT_MATCH=d)
1711
+ .sp
1712
+ where d is a decimal number. However, such a setting is ignored unless d is
1713
+ less than the limit set by the caller of \fBpcre_exec()\fP or, if no such limit
1714
+ is set, less than the default.
1715
+ .P
1716
+ The \fImatch_limit_recursion\fP field is similar to \fImatch_limit\fP, but
1717
+ instead of limiting the total number of times that \fBmatch()\fP is called, it
1718
+ limits the depth of recursion. The recursion depth is a smaller number than the
1719
+ total number of calls, because not all calls to \fBmatch()\fP are recursive.
1720
+ This limit is of use only if it is set smaller than \fImatch_limit\fP.
1721
+ .P
1722
+ Limiting the recursion depth limits the amount of machine stack that can be
1723
+ used, or, when PCRE has been compiled to use memory on the heap instead of the
1724
+ stack, the amount of heap memory that can be used. This limit is not relevant,
1725
+ and is ignored, when matching is done using JIT compiled code.
1726
+ .P
1727
+ The default value for \fImatch_limit_recursion\fP can be set when PCRE is
1728
+ built; the default default is the same value as the default for
1729
+ \fImatch_limit\fP. You can override the default by suppling \fBpcre_exec()\fP
1730
+ with a \fBpcre_extra\fP block in which \fImatch_limit_recursion\fP is set, and
1731
+ PCRE_EXTRA_MATCH_LIMIT_RECURSION is set in the \fIflags\fP field. If the limit
1732
+ is exceeded, \fBpcre_exec()\fP returns PCRE_ERROR_RECURSIONLIMIT.
1733
+ .P
1734
+ A value for the recursion limit may also be supplied by an item at the start of
1735
+ a pattern of the form
1736
+ .sp
1737
+ (*LIMIT_RECURSION=d)
1738
+ .sp
1739
+ where d is a decimal number. However, such a setting is ignored unless d is
1740
+ less than the limit set by the caller of \fBpcre_exec()\fP or, if no such limit
1741
+ is set, less than the default.
1742
+ .P
1743
+ The \fIcallout_data\fP field is used in conjunction with the "callout" feature,
1744
+ and is described in the
1745
+ .\" HREF
1746
+ \fBpcrecallout\fP
1747
+ .\"
1748
+ documentation.
1749
+ .P
1750
+ The \fItables\fP field is provided for use with patterns that have been
1751
+ pre-compiled using custom character tables, saved to disc or elsewhere, and
1752
+ then reloaded, because the tables that were used to compile a pattern are not
1753
+ saved with it. See the
1754
+ .\" HREF
1755
+ \fBpcreprecompile\fP
1756
+ .\"
1757
+ documentation for a discussion of saving compiled patterns for later use. If
1758
+ NULL is passed using this mechanism, it forces PCRE's internal tables to be
1759
+ used.
1760
+ .P
1761
+ \fBWarning:\fP The tables that \fBpcre_exec()\fP uses must be the same as those
1762
+ that were used when the pattern was compiled. If this is not the case, the
1763
+ behaviour of \fBpcre_exec()\fP is undefined. Therefore, when a pattern is
1764
+ compiled and matched in the same process, this field should never be set. In
1765
+ this (the most common) case, the correct table pointer is automatically passed
1766
+ with the compiled pattern from \fBpcre_compile()\fP to \fBpcre_exec()\fP.
1767
+ .P
1768
+ If PCRE_EXTRA_MARK is set in the \fIflags\fP field, the \fImark\fP field must
1769
+ be set to point to a suitable variable. If the pattern contains any
1770
+ backtracking control verbs such as (*MARK:NAME), and the execution ends up with
1771
+ a name to pass back, a pointer to the name string (zero terminated) is placed
1772
+ in the variable pointed to by the \fImark\fP field. The names are within the
1773
+ compiled pattern; if you wish to retain such a name you must copy it before
1774
+ freeing the memory of a compiled pattern. If there is no name to pass back, the
1775
+ variable pointed to by the \fImark\fP field is set to NULL. For details of the
1776
+ backtracking control verbs, see the section entitled
1777
+ .\" HTML <a href="pcrepattern#backtrackcontrol">
1778
+ .\" </a>
1779
+ "Backtracking control"
1780
+ .\"
1781
+ in the
1782
+ .\" HREF
1783
+ \fBpcrepattern\fP
1784
+ .\"
1785
+ documentation.
1786
+ .
1787
+ .
1788
+ .\" HTML <a name="execoptions"></a>
1789
+ .SS "Option bits for \fBpcre_exec()\fP"
1790
+ .rs
1791
+ .sp
1792
+ The unused bits of the \fIoptions\fP argument for \fBpcre_exec()\fP must be
1793
+ zero. The only bits that may be set are PCRE_ANCHORED, PCRE_NEWLINE_\fIxxx\fP,
1794
+ PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART,
1795
+ PCRE_NO_START_OPTIMIZE, PCRE_NO_UTF8_CHECK, PCRE_PARTIAL_HARD, and
1796
+ PCRE_PARTIAL_SOFT.
1797
+ .P
1798
+ If the pattern was successfully studied with one of the just-in-time (JIT)
1799
+ compile options, the only supported options for JIT execution are
1800
+ PCRE_NO_UTF8_CHECK, PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY,
1801
+ PCRE_NOTEMPTY_ATSTART, PCRE_PARTIAL_HARD, and PCRE_PARTIAL_SOFT. If an
1802
+ unsupported option is used, JIT execution is disabled and the normal
1803
+ interpretive code in \fBpcre_exec()\fP is run.
1804
+ .sp
1805
+ PCRE_ANCHORED
1806
+ .sp
1807
+ The PCRE_ANCHORED option limits \fBpcre_exec()\fP to matching at the first
1808
+ matching position. If a pattern was compiled with PCRE_ANCHORED, or turned out
1809
+ to be anchored by virtue of its contents, it cannot be made unachored at
1810
+ matching time.
1811
+ .sp
1812
+ PCRE_BSR_ANYCRLF
1813
+ PCRE_BSR_UNICODE
1814
+ .sp
1815
+ These options (which are mutually exclusive) control what the \eR escape
1816
+ sequence matches. The choice is either to match only CR, LF, or CRLF, or to
1817
+ match any Unicode newline sequence. These options override the choice that was
1818
+ made or defaulted when the pattern was compiled.
1819
+ .sp
1820
+ PCRE_NEWLINE_CR
1821
+ PCRE_NEWLINE_LF
1822
+ PCRE_NEWLINE_CRLF
1823
+ PCRE_NEWLINE_ANYCRLF
1824
+ PCRE_NEWLINE_ANY
1825
+ .sp
1826
+ These options override the newline definition that was chosen or defaulted when
1827
+ the pattern was compiled. For details, see the description of
1828
+ \fBpcre_compile()\fP above. During matching, the newline choice affects the
1829
+ behaviour of the dot, circumflex, and dollar metacharacters. It may also alter
1830
+ the way the match position is advanced after a match failure for an unanchored
1831
+ pattern.
1832
+ .P
1833
+ When PCRE_NEWLINE_CRLF, PCRE_NEWLINE_ANYCRLF, or PCRE_NEWLINE_ANY is set, and a
1834
+ match attempt for an unanchored pattern fails when the current position is at a
1835
+ CRLF sequence, and the pattern contains no explicit matches for CR or LF
1836
+ characters, the match position is advanced by two characters instead of one, in
1837
+ other words, to after the CRLF.
1838
+ .P
1839
+ The above rule is a compromise that makes the most common cases work as
1840
+ expected. For example, if the pattern is .+A (and the PCRE_DOTALL option is not
1841
+ set), it does not match the string "\er\enA" because, after failing at the
1842
+ start, it skips both the CR and the LF before retrying. However, the pattern
1843
+ [\er\en]A does match that string, because it contains an explicit CR or LF
1844
+ reference, and so advances only by one character after the first failure.
1845
+ .P
1846
+ An explicit match for CR of LF is either a literal appearance of one of those
1847
+ characters, or one of the \er or \en escape sequences. Implicit matches such as
1848
+ [^X] do not count, nor does \es (which includes CR and LF in the characters
1849
+ that it matches).
1850
+ .P
1851
+ Notwithstanding the above, anomalous effects may still occur when CRLF is a
1852
+ valid newline sequence and explicit \er or \en escapes appear in the pattern.
1853
+ .sp
1854
+ PCRE_NOTBOL
1855
+ .sp
1856
+ This option specifies that first character of the subject string is not the
1857
+ beginning of a line, so the circumflex metacharacter should not match before
1858
+ it. Setting this without PCRE_MULTILINE (at compile time) causes circumflex
1859
+ never to match. This option affects only the behaviour of the circumflex
1860
+ metacharacter. It does not affect \eA.
1861
+ .sp
1862
+ PCRE_NOTEOL
1863
+ .sp
1864
+ This option specifies that the end of the subject string is not the end of a
1865
+ line, so the dollar metacharacter should not match it nor (except in multiline
1866
+ mode) a newline immediately before it. Setting this without PCRE_MULTILINE (at
1867
+ compile time) causes dollar never to match. This option affects only the
1868
+ behaviour of the dollar metacharacter. It does not affect \eZ or \ez.
1869
+ .sp
1870
+ PCRE_NOTEMPTY
1871
+ .sp
1872
+ An empty string is not considered to be a valid match if this option is set. If
1873
+ there are alternatives in the pattern, they are tried. If all the alternatives
1874
+ match the empty string, the entire match fails. For example, if the pattern
1875
+ .sp
1876
+ a?b?
1877
+ .sp
1878
+ is applied to a string not beginning with "a" or "b", it matches an empty
1879
+ string at the start of the subject. With PCRE_NOTEMPTY set, this match is not
1880
+ valid, so PCRE searches further into the string for occurrences of "a" or "b".
1881
+ .sp
1882
+ PCRE_NOTEMPTY_ATSTART
1883
+ .sp
1884
+ This is like PCRE_NOTEMPTY, except that an empty string match that is not at
1885
+ the start of the subject is permitted. If the pattern is anchored, such a match
1886
+ can occur only if the pattern contains \eK.
1887
+ .P
1888
+ Perl has no direct equivalent of PCRE_NOTEMPTY or PCRE_NOTEMPTY_ATSTART, but it
1889
+ does make a special case of a pattern match of the empty string within its
1890
+ \fBsplit()\fP function, and when using the /g modifier. It is possible to
1891
+ emulate Perl's behaviour after matching a null string by first trying the match
1892
+ again at the same offset with PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED, and then
1893
+ if that fails, by advancing the starting offset (see below) and trying an
1894
+ ordinary match again. There is some code that demonstrates how to do this in
1895
+ the
1896
+ .\" HREF
1897
+ \fBpcredemo\fP
1898
+ .\"
1899
+ sample program. In the most general case, you have to check to see if the
1900
+ newline convention recognizes CRLF as a newline, and if so, and the current
1901
+ character is CR followed by LF, advance the starting offset by two characters
1902
+ instead of one.
1903
+ .sp
1904
+ PCRE_NO_START_OPTIMIZE
1905
+ .sp
1906
+ There are a number of optimizations that \fBpcre_exec()\fP uses at the start of
1907
+ a match, in order to speed up the process. For example, if it is known that an
1908
+ unanchored match must start with a specific character, it searches the subject
1909
+ for that character, and fails immediately if it cannot find it, without
1910
+ actually running the main matching function. This means that a special item
1911
+ such as (*COMMIT) at the start of a pattern is not considered until after a
1912
+ suitable starting point for the match has been found. Also, when callouts or
1913
+ (*MARK) items are in use, these "start-up" optimizations can cause them to be
1914
+ skipped if the pattern is never actually used. The start-up optimizations are
1915
+ in effect a pre-scan of the subject that takes place before the pattern is run.
1916
+ .P
1917
+ The PCRE_NO_START_OPTIMIZE option disables the start-up optimizations, possibly
1918
+ causing performance to suffer, but ensuring that in cases where the result is
1919
+ "no match", the callouts do occur, and that items such as (*COMMIT) and (*MARK)
1920
+ are considered at every possible starting position in the subject string. If
1921
+ PCRE_NO_START_OPTIMIZE is set at compile time, it cannot be unset at matching
1922
+ time. The use of PCRE_NO_START_OPTIMIZE at matching time (that is, passing it
1923
+ to \fBpcre_exec()\fP) disables JIT execution; in this situation, matching is
1924
+ always done using interpretively.
1925
+ .P
1926
+ Setting PCRE_NO_START_OPTIMIZE can change the outcome of a matching operation.
1927
+ Consider the pattern
1928
+ .sp
1929
+ (*COMMIT)ABC
1930
+ .sp
1931
+ When this is compiled, PCRE records the fact that a match must start with the
1932
+ character "A". Suppose the subject string is "DEFABC". The start-up
1933
+ optimization scans along the subject, finds "A" and runs the first match
1934
+ attempt from there. The (*COMMIT) item means that the pattern must match the
1935
+ current starting position, which in this case, it does. However, if the same
1936
+ match is run with PCRE_NO_START_OPTIMIZE set, the initial scan along the
1937
+ subject string does not happen. The first match attempt is run starting from
1938
+ "D" and when this fails, (*COMMIT) prevents any further matches being tried, so
1939
+ the overall result is "no match". If the pattern is studied, more start-up
1940
+ optimizations may be used. For example, a minimum length for the subject may be
1941
+ recorded. Consider the pattern
1942
+ .sp
1943
+ (*MARK:A)(X|Y)
1944
+ .sp
1945
+ The minimum length for a match is one character. If the subject is "ABC", there
1946
+ will be attempts to match "ABC", "BC", "C", and then finally an empty string.
1947
+ If the pattern is studied, the final attempt does not take place, because PCRE
1948
+ knows that the subject is too short, and so the (*MARK) is never encountered.
1949
+ In this case, studying the pattern does not affect the overall match result,
1950
+ which is still "no match", but it does affect the auxiliary information that is
1951
+ returned.
1952
+ .sp
1953
+ PCRE_NO_UTF8_CHECK
1954
+ .sp
1955
+ When PCRE_UTF8 is set at compile time, the validity of the subject as a UTF-8
1956
+ string is automatically checked when \fBpcre_exec()\fP is subsequently called.
1957
+ The entire string is checked before any other processing takes place. The value
1958
+ of \fIstartoffset\fP is also checked to ensure that it points to the start of a
1959
+ UTF-8 character. There is a discussion about the
1960
+ .\" HTML <a href="pcreunicode.html#utf8strings">
1961
+ .\" </a>
1962
+ validity of UTF-8 strings
1963
+ .\"
1964
+ in the
1965
+ .\" HREF
1966
+ \fBpcreunicode\fP
1967
+ .\"
1968
+ page. If an invalid sequence of bytes is found, \fBpcre_exec()\fP returns the
1969
+ error PCRE_ERROR_BADUTF8 or, if PCRE_PARTIAL_HARD is set and the problem is a
1970
+ truncated character at the end of the subject, PCRE_ERROR_SHORTUTF8. In both
1971
+ cases, information about the precise nature of the error may also be returned
1972
+ (see the descriptions of these errors in the section entitled \fIError return
1973
+ values from\fP \fBpcre_exec()\fP
1974
+ .\" HTML <a href="#errorlist">
1975
+ .\" </a>
1976
+ below).
1977
+ .\"
1978
+ If \fIstartoffset\fP contains a value that does not point to the start of a
1979
+ UTF-8 character (or to the end of the subject), PCRE_ERROR_BADUTF8_OFFSET is
1980
+ returned.
1981
+ .P
1982
+ If you already know that your subject is valid, and you want to skip these
1983
+ checks for performance reasons, you can set the PCRE_NO_UTF8_CHECK option when
1984
+ calling \fBpcre_exec()\fP. You might want to do this for the second and
1985
+ subsequent calls to \fBpcre_exec()\fP if you are making repeated calls to find
1986
+ all the matches in a single subject string. However, you should be sure that
1987
+ the value of \fIstartoffset\fP points to the start of a character (or the end
1988
+ of the subject). When PCRE_NO_UTF8_CHECK is set, the effect of passing an
1989
+ invalid string as a subject or an invalid value of \fIstartoffset\fP is
1990
+ undefined. Your program may crash or loop.
1991
+ .sp
1992
+ PCRE_PARTIAL_HARD
1993
+ PCRE_PARTIAL_SOFT
1994
+ .sp
1995
+ These options turn on the partial matching feature. For backwards
1996
+ compatibility, PCRE_PARTIAL is a synonym for PCRE_PARTIAL_SOFT. A partial match
1997
+ occurs if the end of the subject string is reached successfully, but there are
1998
+ not enough subject characters to complete the match. If this happens when
1999
+ PCRE_PARTIAL_SOFT (but not PCRE_PARTIAL_HARD) is set, matching continues by
2000
+ testing any remaining alternatives. Only if no complete match can be found is
2001
+ PCRE_ERROR_PARTIAL returned instead of PCRE_ERROR_NOMATCH. In other words,
2002
+ PCRE_PARTIAL_SOFT says that the caller is prepared to handle a partial match,
2003
+ but only if no complete match can be found.
2004
+ .P
2005
+ If PCRE_PARTIAL_HARD is set, it overrides PCRE_PARTIAL_SOFT. In this case, if a
2006
+ partial match is found, \fBpcre_exec()\fP immediately returns
2007
+ PCRE_ERROR_PARTIAL, without considering any other alternatives. In other words,
2008
+ when PCRE_PARTIAL_HARD is set, a partial match is considered to be more
2009
+ important that an alternative complete match.
2010
+ .P
2011
+ In both cases, the portion of the string that was inspected when the partial
2012
+ match was found is set as the first matching string. There is a more detailed
2013
+ discussion of partial and multi-segment matching, with examples, in the
2014
+ .\" HREF
2015
+ \fBpcrepartial\fP
2016
+ .\"
2017
+ documentation.
2018
+ .
2019
+ .
2020
+ .SS "The string to be matched by \fBpcre_exec()\fP"
2021
+ .rs
2022
+ .sp
2023
+ The subject string is passed to \fBpcre_exec()\fP as a pointer in
2024
+ \fIsubject\fP, a length in \fIlength\fP, and a starting offset in
2025
+ \fIstartoffset\fP. The units for \fIlength\fP and \fIstartoffset\fP are bytes
2026
+ for the 8-bit library, 16-bit data items for the 16-bit library, and 32-bit
2027
+ data items for the 32-bit library.
2028
+ .P
2029
+ If \fIstartoffset\fP is negative or greater than the length of the subject,
2030
+ \fBpcre_exec()\fP returns PCRE_ERROR_BADOFFSET. When the starting offset is
2031
+ zero, the search for a match starts at the beginning of the subject, and this
2032
+ is by far the most common case. In UTF-8 or UTF-16 mode, the offset must point
2033
+ to the start of a character, or the end of the subject (in UTF-32 mode, one
2034
+ data unit equals one character, so all offsets are valid). Unlike the pattern
2035
+ string, the subject may contain binary zeroes.
2036
+ .P
2037
+ A non-zero starting offset is useful when searching for another match in the
2038
+ same subject by calling \fBpcre_exec()\fP again after a previous success.
2039
+ Setting \fIstartoffset\fP differs from just passing over a shortened string and
2040
+ setting PCRE_NOTBOL in the case of a pattern that begins with any kind of
2041
+ lookbehind. For example, consider the pattern
2042
+ .sp
2043
+ \eBiss\eB
2044
+ .sp
2045
+ which finds occurrences of "iss" in the middle of words. (\eB matches only if
2046
+ the current position in the subject is not a word boundary.) When applied to
2047
+ the string "Mississipi" the first call to \fBpcre_exec()\fP finds the first
2048
+ occurrence. If \fBpcre_exec()\fP is called again with just the remainder of the
2049
+ subject, namely "issipi", it does not match, because \eB is always false at the
2050
+ start of the subject, which is deemed to be a word boundary. However, if
2051
+ \fBpcre_exec()\fP is passed the entire string again, but with \fIstartoffset\fP
2052
+ set to 4, it finds the second occurrence of "iss" because it is able to look
2053
+ behind the starting point to discover that it is preceded by a letter.
2054
+ .P
2055
+ Finding all the matches in a subject is tricky when the pattern can match an
2056
+ empty string. It is possible to emulate Perl's /g behaviour by first trying the
2057
+ match again at the same offset, with the PCRE_NOTEMPTY_ATSTART and
2058
+ PCRE_ANCHORED options, and then if that fails, advancing the starting offset
2059
+ and trying an ordinary match again. There is some code that demonstrates how to
2060
+ do this in the
2061
+ .\" HREF
2062
+ \fBpcredemo\fP
2063
+ .\"
2064
+ sample program. In the most general case, you have to check to see if the
2065
+ newline convention recognizes CRLF as a newline, and if so, and the current
2066
+ character is CR followed by LF, advance the starting offset by two characters
2067
+ instead of one.
2068
+ .P
2069
+ If a non-zero starting offset is passed when the pattern is anchored, one
2070
+ attempt to match at the given offset is made. This can only succeed if the
2071
+ pattern does not require the match to be at the start of the subject.
2072
+ .
2073
+ .
2074
+ .SS "How \fBpcre_exec()\fP returns captured substrings"
2075
+ .rs
2076
+ .sp
2077
+ In general, a pattern matches a certain portion of the subject, and in
2078
+ addition, further substrings from the subject may be picked out by parts of the
2079
+ pattern. Following the usage in Jeffrey Friedl's book, this is called
2080
+ "capturing" in what follows, and the phrase "capturing subpattern" is used for
2081
+ a fragment of a pattern that picks out a substring. PCRE supports several other
2082
+ kinds of parenthesized subpattern that do not cause substrings to be captured.
2083
+ .P
2084
+ Captured substrings are returned to the caller via a vector of integers whose
2085
+ address is passed in \fIovector\fP. The number of elements in the vector is
2086
+ passed in \fIovecsize\fP, which must be a non-negative number. \fBNote\fP: this
2087
+ argument is NOT the size of \fIovector\fP in bytes.
2088
+ .P
2089
+ The first two-thirds of the vector is used to pass back captured substrings,
2090
+ each substring using a pair of integers. The remaining third of the vector is
2091
+ used as workspace by \fBpcre_exec()\fP while matching capturing subpatterns,
2092
+ and is not available for passing back information. The number passed in
2093
+ \fIovecsize\fP should always be a multiple of three. If it is not, it is
2094
+ rounded down.
2095
+ .P
2096
+ When a match is successful, information about captured substrings is returned
2097
+ in pairs of integers, starting at the beginning of \fIovector\fP, and
2098
+ continuing up to two-thirds of its length at the most. The first element of
2099
+ each pair is set to the offset of the first character in a substring, and the
2100
+ second is set to the offset of the first character after the end of a
2101
+ substring. These values are always data unit offsets, even in UTF mode. They
2102
+ are byte offsets in the 8-bit library, 16-bit data item offsets in the 16-bit
2103
+ library, and 32-bit data item offsets in the 32-bit library. \fBNote\fP: they
2104
+ are not character counts.
2105
+ .P
2106
+ The first pair of integers, \fIovector[0]\fP and \fIovector[1]\fP, identify the
2107
+ portion of the subject string matched by the entire pattern. The next pair is
2108
+ used for the first capturing subpattern, and so on. The value returned by
2109
+ \fBpcre_exec()\fP is one more than the highest numbered pair that has been set.
2110
+ For example, if two substrings have been captured, the returned value is 3. If
2111
+ there are no capturing subpatterns, the return value from a successful match is
2112
+ 1, indicating that just the first pair of offsets has been set.
2113
+ .P
2114
+ If a capturing subpattern is matched repeatedly, it is the last portion of the
2115
+ string that it matched that is returned.
2116
+ .P
2117
+ If the vector is too small to hold all the captured substring offsets, it is
2118
+ used as far as possible (up to two-thirds of its length), and the function
2119
+ returns a value of zero. If neither the actual string matched nor any captured
2120
+ substrings are of interest, \fBpcre_exec()\fP may be called with \fIovector\fP
2121
+ passed as NULL and \fIovecsize\fP as zero. However, if the pattern contains
2122
+ back references and the \fIovector\fP is not big enough to remember the related
2123
+ substrings, PCRE has to get additional memory for use during matching. Thus it
2124
+ is usually advisable to supply an \fIovector\fP of reasonable size.
2125
+ .P
2126
+ There are some cases where zero is returned (indicating vector overflow) when
2127
+ in fact the vector is exactly the right size for the final match. For example,
2128
+ consider the pattern
2129
+ .sp
2130
+ (a)(?:(b)c|bd)
2131
+ .sp
2132
+ If a vector of 6 elements (allowing for only 1 captured substring) is given
2133
+ with subject string "abd", \fBpcre_exec()\fP will try to set the second
2134
+ captured string, thereby recording a vector overflow, before failing to match
2135
+ "c" and backing up to try the second alternative. The zero return, however,
2136
+ does correctly indicate that the maximum number of slots (namely 2) have been
2137
+ filled. In similar cases where there is temporary overflow, but the final
2138
+ number of used slots is actually less than the maximum, a non-zero value is
2139
+ returned.
2140
+ .P
2141
+ The \fBpcre_fullinfo()\fP function can be used to find out how many capturing
2142
+ subpatterns there are in a compiled pattern. The smallest size for
2143
+ \fIovector\fP that will allow for \fIn\fP captured substrings, in addition to
2144
+ the offsets of the substring matched by the whole pattern, is (\fIn\fP+1)*3.
2145
+ .P
2146
+ It is possible for capturing subpattern number \fIn+1\fP to match some part of
2147
+ the subject when subpattern \fIn\fP has not been used at all. For example, if
2148
+ the string "abc" is matched against the pattern (a|(z))(bc) the return from the
2149
+ function is 4, and subpatterns 1 and 3 are matched, but 2 is not. When this
2150
+ happens, both values in the offset pairs corresponding to unused subpatterns
2151
+ are set to -1.
2152
+ .P
2153
+ Offset values that correspond to unused subpatterns at the end of the
2154
+ expression are also set to -1. For example, if the string "abc" is matched
2155
+ against the pattern (abc)(x(yz)?)? subpatterns 2 and 3 are not matched. The
2156
+ return from the function is 2, because the highest used capturing subpattern
2157
+ number is 1, and the offsets for for the second and third capturing subpatterns
2158
+ (assuming the vector is large enough, of course) are set to -1.
2159
+ .P
2160
+ \fBNote\fP: Elements in the first two-thirds of \fIovector\fP that do not
2161
+ correspond to capturing parentheses in the pattern are never changed. That is,
2162
+ if a pattern contains \fIn\fP capturing parentheses, no more than
2163
+ \fIovector[0]\fP to \fIovector[2n+1]\fP are set by \fBpcre_exec()\fP. The other
2164
+ elements (in the first two-thirds) retain whatever values they previously had.
2165
+ .P
2166
+ Some convenience functions are provided for extracting the captured substrings
2167
+ as separate strings. These are described below.
2168
+ .
2169
+ .
2170
+ .\" HTML <a name="errorlist"></a>
2171
+ .SS "Error return values from \fBpcre_exec()\fP"
2172
+ .rs
2173
+ .sp
2174
+ If \fBpcre_exec()\fP fails, it returns a negative number. The following are
2175
+ defined in the header file:
2176
+ .sp
2177
+ PCRE_ERROR_NOMATCH (-1)
2178
+ .sp
2179
+ The subject string did not match the pattern.
2180
+ .sp
2181
+ PCRE_ERROR_NULL (-2)
2182
+ .sp
2183
+ Either \fIcode\fP or \fIsubject\fP was passed as NULL, or \fIovector\fP was
2184
+ NULL and \fIovecsize\fP was not zero.
2185
+ .sp
2186
+ PCRE_ERROR_BADOPTION (-3)
2187
+ .sp
2188
+ An unrecognized bit was set in the \fIoptions\fP argument.
2189
+ .sp
2190
+ PCRE_ERROR_BADMAGIC (-4)
2191
+ .sp
2192
+ PCRE stores a 4-byte "magic number" at the start of the compiled code, to catch
2193
+ the case when it is passed a junk pointer and to detect when a pattern that was
2194
+ compiled in an environment of one endianness is run in an environment with the
2195
+ other endianness. This is the error that PCRE gives when the magic number is
2196
+ not present.
2197
+ .sp
2198
+ PCRE_ERROR_UNKNOWN_OPCODE (-5)
2199
+ .sp
2200
+ While running the pattern match, an unknown item was encountered in the
2201
+ compiled pattern. This error could be caused by a bug in PCRE or by overwriting
2202
+ of the compiled pattern.
2203
+ .sp
2204
+ PCRE_ERROR_NOMEMORY (-6)
2205
+ .sp
2206
+ If a pattern contains back references, but the \fIovector\fP that is passed to
2207
+ \fBpcre_exec()\fP is not big enough to remember the referenced substrings, PCRE
2208
+ gets a block of memory at the start of matching to use for this purpose. If the
2209
+ call via \fBpcre_malloc()\fP fails, this error is given. The memory is
2210
+ automatically freed at the end of matching.
2211
+ .P
2212
+ This error is also given if \fBpcre_stack_malloc()\fP fails in
2213
+ \fBpcre_exec()\fP. This can happen only when PCRE has been compiled with
2214
+ \fB--disable-stack-for-recursion\fP.
2215
+ .sp
2216
+ PCRE_ERROR_NOSUBSTRING (-7)
2217
+ .sp
2218
+ This error is used by the \fBpcre_copy_substring()\fP,
2219
+ \fBpcre_get_substring()\fP, and \fBpcre_get_substring_list()\fP functions (see
2220
+ below). It is never returned by \fBpcre_exec()\fP.
2221
+ .sp
2222
+ PCRE_ERROR_MATCHLIMIT (-8)
2223
+ .sp
2224
+ The backtracking limit, as specified by the \fImatch_limit\fP field in a
2225
+ \fBpcre_extra\fP structure (or defaulted) was reached. See the description
2226
+ above.
2227
+ .sp
2228
+ PCRE_ERROR_CALLOUT (-9)
2229
+ .sp
2230
+ This error is never generated by \fBpcre_exec()\fP itself. It is provided for
2231
+ use by callout functions that want to yield a distinctive error code. See the
2232
+ .\" HREF
2233
+ \fBpcrecallout\fP
2234
+ .\"
2235
+ documentation for details.
2236
+ .sp
2237
+ PCRE_ERROR_BADUTF8 (-10)
2238
+ .sp
2239
+ A string that contains an invalid UTF-8 byte sequence was passed as a subject,
2240
+ and the PCRE_NO_UTF8_CHECK option was not set. If the size of the output vector
2241
+ (\fIovecsize\fP) is at least 2, the byte offset to the start of the the invalid
2242
+ UTF-8 character is placed in the first element, and a reason code is placed in
2243
+ the second element. The reason codes are listed in the
2244
+ .\" HTML <a href="#badutf8reasons">
2245
+ .\" </a>
2246
+ following section.
2247
+ .\"
2248
+ For backward compatibility, if PCRE_PARTIAL_HARD is set and the problem is a
2249
+ truncated UTF-8 character at the end of the subject (reason codes 1 to 5),
2250
+ PCRE_ERROR_SHORTUTF8 is returned instead of PCRE_ERROR_BADUTF8.
2251
+ .sp
2252
+ PCRE_ERROR_BADUTF8_OFFSET (-11)
2253
+ .sp
2254
+ The UTF-8 byte sequence that was passed as a subject was checked and found to
2255
+ be valid (the PCRE_NO_UTF8_CHECK option was not set), but the value of
2256
+ \fIstartoffset\fP did not point to the beginning of a UTF-8 character or the
2257
+ end of the subject.
2258
+ .sp
2259
+ PCRE_ERROR_PARTIAL (-12)
2260
+ .sp
2261
+ The subject string did not match, but it did match partially. See the
2262
+ .\" HREF
2263
+ \fBpcrepartial\fP
2264
+ .\"
2265
+ documentation for details of partial matching.
2266
+ .sp
2267
+ PCRE_ERROR_BADPARTIAL (-13)
2268
+ .sp
2269
+ This code is no longer in use. It was formerly returned when the PCRE_PARTIAL
2270
+ option was used with a compiled pattern containing items that were not
2271
+ supported for partial matching. From release 8.00 onwards, there are no
2272
+ restrictions on partial matching.
2273
+ .sp
2274
+ PCRE_ERROR_INTERNAL (-14)
2275
+ .sp
2276
+ An unexpected internal error has occurred. This error could be caused by a bug
2277
+ in PCRE or by overwriting of the compiled pattern.
2278
+ .sp
2279
+ PCRE_ERROR_BADCOUNT (-15)
2280
+ .sp
2281
+ This error is given if the value of the \fIovecsize\fP argument is negative.
2282
+ .sp
2283
+ PCRE_ERROR_RECURSIONLIMIT (-21)
2284
+ .sp
2285
+ The internal recursion limit, as specified by the \fImatch_limit_recursion\fP
2286
+ field in a \fBpcre_extra\fP structure (or defaulted) was reached. See the
2287
+ description above.
2288
+ .sp
2289
+ PCRE_ERROR_BADNEWLINE (-23)
2290
+ .sp
2291
+ An invalid combination of PCRE_NEWLINE_\fIxxx\fP options was given.
2292
+ .sp
2293
+ PCRE_ERROR_BADOFFSET (-24)
2294
+ .sp
2295
+ The value of \fIstartoffset\fP was negative or greater than the length of the
2296
+ subject, that is, the value in \fIlength\fP.
2297
+ .sp
2298
+ PCRE_ERROR_SHORTUTF8 (-25)
2299
+ .sp
2300
+ This error is returned instead of PCRE_ERROR_BADUTF8 when the subject string
2301
+ ends with a truncated UTF-8 character and the PCRE_PARTIAL_HARD option is set.
2302
+ Information about the failure is returned as for PCRE_ERROR_BADUTF8. It is in
2303
+ fact sufficient to detect this case, but this special error code for
2304
+ PCRE_PARTIAL_HARD precedes the implementation of returned information; it is
2305
+ retained for backwards compatibility.
2306
+ .sp
2307
+ PCRE_ERROR_RECURSELOOP (-26)
2308
+ .sp
2309
+ This error is returned when \fBpcre_exec()\fP detects a recursion loop within
2310
+ the pattern. Specifically, it means that either the whole pattern or a
2311
+ subpattern has been called recursively for the second time at the same position
2312
+ in the subject string. Some simple patterns that might do this are detected and
2313
+ faulted at compile time, but more complicated cases, in particular mutual
2314
+ recursions between two different subpatterns, cannot be detected until run
2315
+ time.
2316
+ .sp
2317
+ PCRE_ERROR_JIT_STACKLIMIT (-27)
2318
+ .sp
2319
+ This error is returned when a pattern that was successfully studied using a
2320
+ JIT compile option is being matched, but the memory available for the
2321
+ just-in-time processing stack is not large enough. See the
2322
+ .\" HREF
2323
+ \fBpcrejit\fP
2324
+ .\"
2325
+ documentation for more details.
2326
+ .sp
2327
+ PCRE_ERROR_BADMODE (-28)
2328
+ .sp
2329
+ This error is given if a pattern that was compiled by the 8-bit library is
2330
+ passed to a 16-bit or 32-bit library function, or vice versa.
2331
+ .sp
2332
+ PCRE_ERROR_BADENDIANNESS (-29)
2333
+ .sp
2334
+ This error is given if a pattern that was compiled and saved is reloaded on a
2335
+ host with different endianness. The utility function
2336
+ \fBpcre_pattern_to_host_byte_order()\fP can be used to convert such a pattern
2337
+ so that it runs on the new host.
2338
+ .sp
2339
+ PCRE_ERROR_JIT_BADOPTION
2340
+ .sp
2341
+ This error is returned when a pattern that was successfully studied using a JIT
2342
+ compile option is being matched, but the matching mode (partial or complete
2343
+ match) does not correspond to any JIT compilation mode. When the JIT fast path
2344
+ function is used, this error may be also given for invalid options. See the
2345
+ .\" HREF
2346
+ \fBpcrejit\fP
2347
+ .\"
2348
+ documentation for more details.
2349
+ .sp
2350
+ PCRE_ERROR_BADLENGTH (-32)
2351
+ .sp
2352
+ This error is given if \fBpcre_exec()\fP is called with a negative value for
2353
+ the \fIlength\fP argument.
2354
+ .P
2355
+ Error numbers -16 to -20, -22, and 30 are not used by \fBpcre_exec()\fP.
2356
+ .
2357
+ .
2358
+ .\" HTML <a name="badutf8reasons"></a>
2359
+ .SS "Reason codes for invalid UTF-8 strings"
2360
+ .rs
2361
+ .sp
2362
+ This section applies only to the 8-bit library. The corresponding information
2363
+ for the 16-bit and 32-bit libraries is given in the
2364
+ .\" HREF
2365
+ \fBpcre16\fP
2366
+ .\"
2367
+ and
2368
+ .\" HREF
2369
+ \fBpcre32\fP
2370
+ .\"
2371
+ pages.
2372
+ .P
2373
+ When \fBpcre_exec()\fP returns either PCRE_ERROR_BADUTF8 or
2374
+ PCRE_ERROR_SHORTUTF8, and the size of the output vector (\fIovecsize\fP) is at
2375
+ least 2, the offset of the start of the invalid UTF-8 character is placed in
2376
+ the first output vector element (\fIovector[0]\fP) and a reason code is placed
2377
+ in the second element (\fIovector[1]\fP). The reason codes are given names in
2378
+ the \fBpcre.h\fP header file:
2379
+ .sp
2380
+ PCRE_UTF8_ERR1
2381
+ PCRE_UTF8_ERR2
2382
+ PCRE_UTF8_ERR3
2383
+ PCRE_UTF8_ERR4
2384
+ PCRE_UTF8_ERR5
2385
+ .sp
2386
+ The string ends with a truncated UTF-8 character; the code specifies how many
2387
+ bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8 characters to be
2388
+ no longer than 4 bytes, the encoding scheme (originally defined by RFC 2279)
2389
+ allows for up to 6 bytes, and this is checked first; hence the possibility of
2390
+ 4 or 5 missing bytes.
2391
+ .sp
2392
+ PCRE_UTF8_ERR6
2393
+ PCRE_UTF8_ERR7
2394
+ PCRE_UTF8_ERR8
2395
+ PCRE_UTF8_ERR9
2396
+ PCRE_UTF8_ERR10
2397
+ .sp
2398
+ The two most significant bits of the 2nd, 3rd, 4th, 5th, or 6th byte of the
2399
+ character do not have the binary value 0b10 (that is, either the most
2400
+ significant bit is 0, or the next bit is 1).
2401
+ .sp
2402
+ PCRE_UTF8_ERR11
2403
+ PCRE_UTF8_ERR12
2404
+ .sp
2405
+ A character that is valid by the RFC 2279 rules is either 5 or 6 bytes long;
2406
+ these code points are excluded by RFC 3629.
2407
+ .sp
2408
+ PCRE_UTF8_ERR13
2409
+ .sp
2410
+ A 4-byte character has a value greater than 0x10fff; these code points are
2411
+ excluded by RFC 3629.
2412
+ .sp
2413
+ PCRE_UTF8_ERR14
2414
+ .sp
2415
+ A 3-byte character has a value in the range 0xd800 to 0xdfff; this range of
2416
+ code points are reserved by RFC 3629 for use with UTF-16, and so are excluded
2417
+ from UTF-8.
2418
+ .sp
2419
+ PCRE_UTF8_ERR15
2420
+ PCRE_UTF8_ERR16
2421
+ PCRE_UTF8_ERR17
2422
+ PCRE_UTF8_ERR18
2423
+ PCRE_UTF8_ERR19
2424
+ .sp
2425
+ A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes for a
2426
+ value that can be represented by fewer bytes, which is invalid. For example,
2427
+ the two bytes 0xc0, 0xae give the value 0x2e, whose correct coding uses just
2428
+ one byte.
2429
+ .sp
2430
+ PCRE_UTF8_ERR20
2431
+ .sp
2432
+ The two most significant bits of the first byte of a character have the binary
2433
+ value 0b10 (that is, the most significant bit is 1 and the second is 0). Such a
2434
+ byte can only validly occur as the second or subsequent byte of a multi-byte
2435
+ character.
2436
+ .sp
2437
+ PCRE_UTF8_ERR21
2438
+ .sp
2439
+ The first byte of a character has the value 0xfe or 0xff. These values can
2440
+ never occur in a valid UTF-8 string.
2441
+ .sp
2442
+ PCRE_UTF8_ERR22
2443
+ .sp
2444
+ This error code was formerly used when the presence of a so-called
2445
+ "non-character" caused an error. Unicode corrigendum #9 makes it clear that
2446
+ such characters should not cause a string to be rejected, and so this code is
2447
+ no longer in use and is never returned.
2448
+ .
2449
+ .
2450
+ .SH "EXTRACTING CAPTURED SUBSTRINGS BY NUMBER"
2451
+ .rs
2452
+ .sp
2453
+ .nf
2454
+ .B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP,
2455
+ .B " int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,"
2456
+ .B " int \fIbuffersize\fP);"
2457
+ .sp
2458
+ .B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP,
2459
+ .B " int \fIstringcount\fP, int \fIstringnumber\fP,"
2460
+ .B " const char **\fIstringptr\fP);"
2461
+ .sp
2462
+ .B int pcre_get_substring_list(const char *\fIsubject\fP,
2463
+ .B " int *\fIovector\fP, int \fIstringcount\fP, const char ***\fIlistptr\fP);"
2464
+ .fi
2465
+ .PP
2466
+ Captured substrings can be accessed directly by using the offsets returned by
2467
+ \fBpcre_exec()\fP in \fIovector\fP. For convenience, the functions
2468
+ \fBpcre_copy_substring()\fP, \fBpcre_get_substring()\fP, and
2469
+ \fBpcre_get_substring_list()\fP are provided for extracting captured substrings
2470
+ as new, separate, zero-terminated strings. These functions identify substrings
2471
+ by number. The next section describes functions for extracting named
2472
+ substrings.
2473
+ .P
2474
+ A substring that contains a binary zero is correctly extracted and has a
2475
+ further zero added on the end, but the result is not, of course, a C string.
2476
+ However, you can process such a string by referring to the length that is
2477
+ returned by \fBpcre_copy_substring()\fP and \fBpcre_get_substring()\fP.
2478
+ Unfortunately, the interface to \fBpcre_get_substring_list()\fP is not adequate
2479
+ for handling strings containing binary zeros, because the end of the final
2480
+ string is not independently indicated.
2481
+ .P
2482
+ The first three arguments are the same for all three of these functions:
2483
+ \fIsubject\fP is the subject string that has just been successfully matched,
2484
+ \fIovector\fP is a pointer to the vector of integer offsets that was passed to
2485
+ \fBpcre_exec()\fP, and \fIstringcount\fP is the number of substrings that were
2486
+ captured by the match, including the substring that matched the entire regular
2487
+ expression. This is the value returned by \fBpcre_exec()\fP if it is greater
2488
+ than zero. If \fBpcre_exec()\fP returned zero, indicating that it ran out of
2489
+ space in \fIovector\fP, the value passed as \fIstringcount\fP should be the
2490
+ number of elements in the vector divided by three.
2491
+ .P
2492
+ The functions \fBpcre_copy_substring()\fP and \fBpcre_get_substring()\fP
2493
+ extract a single substring, whose number is given as \fIstringnumber\fP. A
2494
+ value of zero extracts the substring that matched the entire pattern, whereas
2495
+ higher values extract the captured substrings. For \fBpcre_copy_substring()\fP,
2496
+ the string is placed in \fIbuffer\fP, whose length is given by
2497
+ \fIbuffersize\fP, while for \fBpcre_get_substring()\fP a new block of memory is
2498
+ obtained via \fBpcre_malloc\fP, and its address is returned via
2499
+ \fIstringptr\fP. The yield of the function is the length of the string, not
2500
+ including the terminating zero, or one of these error codes:
2501
+ .sp
2502
+ PCRE_ERROR_NOMEMORY (-6)
2503
+ .sp
2504
+ The buffer was too small for \fBpcre_copy_substring()\fP, or the attempt to get
2505
+ memory failed for \fBpcre_get_substring()\fP.
2506
+ .sp
2507
+ PCRE_ERROR_NOSUBSTRING (-7)
2508
+ .sp
2509
+ There is no substring whose number is \fIstringnumber\fP.
2510
+ .P
2511
+ The \fBpcre_get_substring_list()\fP function extracts all available substrings
2512
+ and builds a list of pointers to them. All this is done in a single block of
2513
+ memory that is obtained via \fBpcre_malloc\fP. The address of the memory block
2514
+ is returned via \fIlistptr\fP, which is also the start of the list of string
2515
+ pointers. The end of the list is marked by a NULL pointer. The yield of the
2516
+ function is zero if all went well, or the error code
2517
+ .sp
2518
+ PCRE_ERROR_NOMEMORY (-6)
2519
+ .sp
2520
+ if the attempt to get the memory block failed.
2521
+ .P
2522
+ When any of these functions encounter a substring that is unset, which can
2523
+ happen when capturing subpattern number \fIn+1\fP matches some part of the
2524
+ subject, but subpattern \fIn\fP has not been used at all, they return an empty
2525
+ string. This can be distinguished from a genuine zero-length substring by
2526
+ inspecting the appropriate offset in \fIovector\fP, which is negative for unset
2527
+ substrings.
2528
+ .P
2529
+ The two convenience functions \fBpcre_free_substring()\fP and
2530
+ \fBpcre_free_substring_list()\fP can be used to free the memory returned by
2531
+ a previous call of \fBpcre_get_substring()\fP or
2532
+ \fBpcre_get_substring_list()\fP, respectively. They do nothing more than call
2533
+ the function pointed to by \fBpcre_free\fP, which of course could be called
2534
+ directly from a C program. However, PCRE is used in some situations where it is
2535
+ linked via a special interface to another programming language that cannot use
2536
+ \fBpcre_free\fP directly; it is for these cases that the functions are
2537
+ provided.
2538
+ .
2539
+ .
2540
+ .SH "EXTRACTING CAPTURED SUBSTRINGS BY NAME"
2541
+ .rs
2542
+ .sp
2543
+ .nf
2544
+ .B int pcre_get_stringnumber(const pcre *\fIcode\fP,
2545
+ .B " const char *\fIname\fP);"
2546
+ .sp
2547
+ .B int pcre_copy_named_substring(const pcre *\fIcode\fP,
2548
+ .B " const char *\fIsubject\fP, int *\fIovector\fP,"
2549
+ .B " int \fIstringcount\fP, const char *\fIstringname\fP,"
2550
+ .B " char *\fIbuffer\fP, int \fIbuffersize\fP);"
2551
+ .sp
2552
+ .B int pcre_get_named_substring(const pcre *\fIcode\fP,
2553
+ .B " const char *\fIsubject\fP, int *\fIovector\fP,"
2554
+ .B " int \fIstringcount\fP, const char *\fIstringname\fP,"
2555
+ .B " const char **\fIstringptr\fP);"
2556
+ .fi
2557
+ .PP
2558
+ To extract a substring by name, you first have to find associated number.
2559
+ For example, for this pattern
2560
+ .sp
2561
+ (a+)b(?<xxx>\ed+)...
2562
+ .sp
2563
+ the number of the subpattern called "xxx" is 2. If the name is known to be
2564
+ unique (PCRE_DUPNAMES was not set), you can find the number from the name by
2565
+ calling \fBpcre_get_stringnumber()\fP. The first argument is the compiled
2566
+ pattern, and the second is the name. The yield of the function is the
2567
+ subpattern number, or PCRE_ERROR_NOSUBSTRING (-7) if there is no subpattern of
2568
+ that name.
2569
+ .P
2570
+ Given the number, you can extract the substring directly, or use one of the
2571
+ functions described in the previous section. For convenience, there are also
2572
+ two functions that do the whole job.
2573
+ .P
2574
+ Most of the arguments of \fBpcre_copy_named_substring()\fP and
2575
+ \fBpcre_get_named_substring()\fP are the same as those for the similarly named
2576
+ functions that extract by number. As these are described in the previous
2577
+ section, they are not re-described here. There are just two differences:
2578
+ .P
2579
+ First, instead of a substring number, a substring name is given. Second, there
2580
+ is an extra argument, given at the start, which is a pointer to the compiled
2581
+ pattern. This is needed in order to gain access to the name-to-number
2582
+ translation table.
2583
+ .P
2584
+ These functions call \fBpcre_get_stringnumber()\fP, and if it succeeds, they
2585
+ then call \fBpcre_copy_substring()\fP or \fBpcre_get_substring()\fP, as
2586
+ appropriate. \fBNOTE:\fP If PCRE_DUPNAMES is set and there are duplicate names,
2587
+ the behaviour may not be what you want (see the next section).
2588
+ .P
2589
+ \fBWarning:\fP If the pattern uses the (?| feature to set up multiple
2590
+ subpatterns with the same number, as described in the
2591
+ .\" HTML <a href="pcrepattern.html#dupsubpatternnumber">
2592
+ .\" </a>
2593
+ section on duplicate subpattern numbers
2594
+ .\"
2595
+ in the
2596
+ .\" HREF
2597
+ \fBpcrepattern\fP
2598
+ .\"
2599
+ page, you cannot use names to distinguish the different subpatterns, because
2600
+ names are not included in the compiled code. The matching process uses only
2601
+ numbers. For this reason, the use of different names for subpatterns of the
2602
+ same number causes an error at compile time.
2603
+ .
2604
+ .
2605
+ .SH "DUPLICATE SUBPATTERN NAMES"
2606
+ .rs
2607
+ .sp
2608
+ .nf
2609
+ .B int pcre_get_stringtable_entries(const pcre *\fIcode\fP,
2610
+ .B " const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);"
2611
+ .fi
2612
+ .PP
2613
+ When a pattern is compiled with the PCRE_DUPNAMES option, names for subpatterns
2614
+ are not required to be unique. (Duplicate names are always allowed for
2615
+ subpatterns with the same number, created by using the (?| feature. Indeed, if
2616
+ such subpatterns are named, they are required to use the same names.)
2617
+ .P
2618
+ Normally, patterns with duplicate names are such that in any one match, only
2619
+ one of the named subpatterns participates. An example is shown in the
2620
+ .\" HREF
2621
+ \fBpcrepattern\fP
2622
+ .\"
2623
+ documentation.
2624
+ .P
2625
+ When duplicates are present, \fBpcre_copy_named_substring()\fP and
2626
+ \fBpcre_get_named_substring()\fP return the first substring corresponding to
2627
+ the given name that is set. If none are set, PCRE_ERROR_NOSUBSTRING (-7) is
2628
+ returned; no data is returned. The \fBpcre_get_stringnumber()\fP function
2629
+ returns one of the numbers that are associated with the name, but it is not
2630
+ defined which it is.
2631
+ .P
2632
+ If you want to get full details of all captured substrings for a given name,
2633
+ you must use the \fBpcre_get_stringtable_entries()\fP function. The first
2634
+ argument is the compiled pattern, and the second is the name. The third and
2635
+ fourth are pointers to variables which are updated by the function. After it
2636
+ has run, they point to the first and last entries in the name-to-number table
2637
+ for the given name. The function itself returns the length of each entry, or
2638
+ PCRE_ERROR_NOSUBSTRING (-7) if there are none. The format of the table is
2639
+ described above in the section entitled \fIInformation about a pattern\fP
2640
+ .\" HTML <a href="#infoaboutpattern">
2641
+ .\" </a>
2642
+ above.
2643
+ .\"
2644
+ Given all the relevant entries for the name, you can extract each of their
2645
+ numbers, and hence the captured data, if any.
2646
+ .
2647
+ .
2648
+ .SH "FINDING ALL POSSIBLE MATCHES"
2649
+ .rs
2650
+ .sp
2651
+ The traditional matching function uses a similar algorithm to Perl, which stops
2652
+ when it finds the first match, starting at a given point in the subject. If you
2653
+ want to find all possible matches, or the longest possible match, consider
2654
+ using the alternative matching function (see below) instead. If you cannot use
2655
+ the alternative function, but still need to find all possible matches, you
2656
+ can kludge it up by making use of the callout facility, which is described in
2657
+ the
2658
+ .\" HREF
2659
+ \fBpcrecallout\fP
2660
+ .\"
2661
+ documentation.
2662
+ .P
2663
+ What you have to do is to insert a callout right at the end of the pattern.
2664
+ When your callout function is called, extract and save the current matched
2665
+ substring. Then return 1, which forces \fBpcre_exec()\fP to backtrack and try
2666
+ other alternatives. Ultimately, when it runs out of matches, \fBpcre_exec()\fP
2667
+ will yield PCRE_ERROR_NOMATCH.
2668
+ .
2669
+ .
2670
+ .SH "OBTAINING AN ESTIMATE OF STACK USAGE"
2671
+ .rs
2672
+ .sp
2673
+ Matching certain patterns using \fBpcre_exec()\fP can use a lot of process
2674
+ stack, which in certain environments can be rather limited in size. Some users
2675
+ find it helpful to have an estimate of the amount of stack that is used by
2676
+ \fBpcre_exec()\fP, to help them set recursion limits, as described in the
2677
+ .\" HREF
2678
+ \fBpcrestack\fP
2679
+ .\"
2680
+ documentation. The estimate that is output by \fBpcretest\fP when called with
2681
+ the \fB-m\fP and \fB-C\fP options is obtained by calling \fBpcre_exec\fP with
2682
+ the values NULL, NULL, NULL, -999, and -999 for its first five arguments.
2683
+ .P
2684
+ Normally, if its first argument is NULL, \fBpcre_exec()\fP immediately returns
2685
+ the negative error code PCRE_ERROR_NULL, but with this special combination of
2686
+ arguments, it returns instead a negative number whose absolute value is the
2687
+ approximate stack frame size in bytes. (A negative number is used so that it is
2688
+ clear that no match has happened.) The value is approximate because in some
2689
+ cases, recursive calls to \fBpcre_exec()\fP occur when there are one or two
2690
+ additional variables on the stack.
2691
+ .P
2692
+ If PCRE has been compiled to use the heap instead of the stack for recursion,
2693
+ the value returned is the size of each block that is obtained from the heap.
2694
+ .
2695
+ .
2696
+ .\" HTML <a name="dfamatch"></a>
2697
+ .SH "MATCHING A PATTERN: THE ALTERNATIVE FUNCTION"
2698
+ .rs
2699
+ .sp
2700
+ .nf
2701
+ .B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
2702
+ .B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
2703
+ .B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
2704
+ .B " int *\fIworkspace\fP, int \fIwscount\fP);"
2705
+ .fi
2706
+ .P
2707
+ The function \fBpcre_dfa_exec()\fP is called to match a subject string against
2708
+ a compiled pattern, using a matching algorithm that scans the subject string
2709
+ just once, and does not backtrack. This has different characteristics to the
2710
+ normal algorithm, and is not compatible with Perl. Some of the features of PCRE
2711
+ patterns are not supported. Nevertheless, there are times when this kind of
2712
+ matching can be useful. For a discussion of the two matching algorithms, and a
2713
+ list of features that \fBpcre_dfa_exec()\fP does not support, see the
2714
+ .\" HREF
2715
+ \fBpcrematching\fP
2716
+ .\"
2717
+ documentation.
2718
+ .P
2719
+ The arguments for the \fBpcre_dfa_exec()\fP function are the same as for
2720
+ \fBpcre_exec()\fP, plus two extras. The \fIovector\fP argument is used in a
2721
+ different way, and this is described below. The other common arguments are used
2722
+ in the same way as for \fBpcre_exec()\fP, so their description is not repeated
2723
+ here.
2724
+ .P
2725
+ The two additional arguments provide workspace for the function. The workspace
2726
+ vector should contain at least 20 elements. It is used for keeping track of
2727
+ multiple paths through the pattern tree. More workspace will be needed for
2728
+ patterns and subjects where there are a lot of potential matches.
2729
+ .P
2730
+ Here is an example of a simple call to \fBpcre_dfa_exec()\fP:
2731
+ .sp
2732
+ int rc;
2733
+ int ovector[10];
2734
+ int wspace[20];
2735
+ rc = pcre_dfa_exec(
2736
+ re, /* result of pcre_compile() */
2737
+ NULL, /* we didn't study the pattern */
2738
+ "some string", /* the subject string */
2739
+ 11, /* the length of the subject string */
2740
+ 0, /* start at offset 0 in the subject */
2741
+ 0, /* default options */
2742
+ ovector, /* vector of integers for substring information */
2743
+ 10, /* number of elements (NOT size in bytes) */
2744
+ wspace, /* working space vector */
2745
+ 20); /* number of elements (NOT size in bytes) */
2746
+ .
2747
+ .SS "Option bits for \fBpcre_dfa_exec()\fP"
2748
+ .rs
2749
+ .sp
2750
+ The unused bits of the \fIoptions\fP argument for \fBpcre_dfa_exec()\fP must be
2751
+ zero. The only bits that may be set are PCRE_ANCHORED, PCRE_NEWLINE_\fIxxx\fP,
2752
+ PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART,
2753
+ PCRE_NO_UTF8_CHECK, PCRE_BSR_ANYCRLF, PCRE_BSR_UNICODE, PCRE_NO_START_OPTIMIZE,
2754
+ PCRE_PARTIAL_HARD, PCRE_PARTIAL_SOFT, PCRE_DFA_SHORTEST, and PCRE_DFA_RESTART.
2755
+ All but the last four of these are exactly the same as for \fBpcre_exec()\fP,
2756
+ so their description is not repeated here.
2757
+ .sp
2758
+ PCRE_PARTIAL_HARD
2759
+ PCRE_PARTIAL_SOFT
2760
+ .sp
2761
+ These have the same general effect as they do for \fBpcre_exec()\fP, but the
2762
+ details are slightly different. When PCRE_PARTIAL_HARD is set for
2763
+ \fBpcre_dfa_exec()\fP, it returns PCRE_ERROR_PARTIAL if the end of the subject
2764
+ is reached and there is still at least one matching possibility that requires
2765
+ additional characters. This happens even if some complete matches have also
2766
+ been found. When PCRE_PARTIAL_SOFT is set, the return code PCRE_ERROR_NOMATCH
2767
+ is converted into PCRE_ERROR_PARTIAL if the end of the subject is reached,
2768
+ there have been no complete matches, but there is still at least one matching
2769
+ possibility. The portion of the string that was inspected when the longest
2770
+ partial match was found is set as the first matching string in both cases.
2771
+ There is a more detailed discussion of partial and multi-segment matching, with
2772
+ examples, in the
2773
+ .\" HREF
2774
+ \fBpcrepartial\fP
2775
+ .\"
2776
+ documentation.
2777
+ .sp
2778
+ PCRE_DFA_SHORTEST
2779
+ .sp
2780
+ Setting the PCRE_DFA_SHORTEST option causes the matching algorithm to stop as
2781
+ soon as it has found one match. Because of the way the alternative algorithm
2782
+ works, this is necessarily the shortest possible match at the first possible
2783
+ matching point in the subject string.
2784
+ .sp
2785
+ PCRE_DFA_RESTART
2786
+ .sp
2787
+ When \fBpcre_dfa_exec()\fP returns a partial match, it is possible to call it
2788
+ again, with additional subject characters, and have it continue with the same
2789
+ match. The PCRE_DFA_RESTART option requests this action; when it is set, the
2790
+ \fIworkspace\fP and \fIwscount\fP options must reference the same vector as
2791
+ before because data about the match so far is left in them after a partial
2792
+ match. There is more discussion of this facility in the
2793
+ .\" HREF
2794
+ \fBpcrepartial\fP
2795
+ .\"
2796
+ documentation.
2797
+ .
2798
+ .
2799
+ .SS "Successful returns from \fBpcre_dfa_exec()\fP"
2800
+ .rs
2801
+ .sp
2802
+ When \fBpcre_dfa_exec()\fP succeeds, it may have matched more than one
2803
+ substring in the subject. Note, however, that all the matches from one run of
2804
+ the function start at the same point in the subject. The shorter matches are
2805
+ all initial substrings of the longer matches. For example, if the pattern
2806
+ .sp
2807
+ <.*>
2808
+ .sp
2809
+ is matched against the string
2810
+ .sp
2811
+ This is <something> <something else> <something further> no more
2812
+ .sp
2813
+ the three matched strings are
2814
+ .sp
2815
+ <something>
2816
+ <something> <something else>
2817
+ <something> <something else> <something further>
2818
+ .sp
2819
+ On success, the yield of the function is a number greater than zero, which is
2820
+ the number of matched substrings. The substrings themselves are returned in
2821
+ \fIovector\fP. Each string uses two elements; the first is the offset to the
2822
+ start, and the second is the offset to the end. In fact, all the strings have
2823
+ the same start offset. (Space could have been saved by giving this only once,
2824
+ but it was decided to retain some compatibility with the way \fBpcre_exec()\fP
2825
+ returns data, even though the meaning of the strings is different.)
2826
+ .P
2827
+ The strings are returned in reverse order of length; that is, the longest
2828
+ matching string is given first. If there were too many matches to fit into
2829
+ \fIovector\fP, the yield of the function is zero, and the vector is filled with
2830
+ the longest matches. Unlike \fBpcre_exec()\fP, \fBpcre_dfa_exec()\fP can use
2831
+ the entire \fIovector\fP for returning matched strings.
2832
+ .P
2833
+ NOTE: PCRE's "auto-possessification" optimization usually applies to character
2834
+ repeats at the end of a pattern (as well as internally). For example, the
2835
+ pattern "a\ed+" is compiled as if it were "a\ed++" because there is no point
2836
+ even considering the possibility of backtracking into the repeated digits. For
2837
+ DFA matching, this means that only one possible match is found. If you really
2838
+ do want multiple matches in such cases, either use an ungreedy repeat
2839
+ ("a\ed+?") or set the PCRE_NO_AUTO_POSSESS option when compiling.
2840
+ .
2841
+ .
2842
+ .SS "Error returns from \fBpcre_dfa_exec()\fP"
2843
+ .rs
2844
+ .sp
2845
+ The \fBpcre_dfa_exec()\fP function returns a negative number when it fails.
2846
+ Many of the errors are the same as for \fBpcre_exec()\fP, and these are
2847
+ described
2848
+ .\" HTML <a href="#errorlist">
2849
+ .\" </a>
2850
+ above.
2851
+ .\"
2852
+ There are in addition the following errors that are specific to
2853
+ \fBpcre_dfa_exec()\fP:
2854
+ .sp
2855
+ PCRE_ERROR_DFA_UITEM (-16)
2856
+ .sp
2857
+ This return is given if \fBpcre_dfa_exec()\fP encounters an item in the pattern
2858
+ that it does not support, for instance, the use of \eC or a back reference.
2859
+ .sp
2860
+ PCRE_ERROR_DFA_UCOND (-17)
2861
+ .sp
2862
+ This return is given if \fBpcre_dfa_exec()\fP encounters a condition item that
2863
+ uses a back reference for the condition, or a test for recursion in a specific
2864
+ group. These are not supported.
2865
+ .sp
2866
+ PCRE_ERROR_DFA_UMLIMIT (-18)
2867
+ .sp
2868
+ This return is given if \fBpcre_dfa_exec()\fP is called with an \fIextra\fP
2869
+ block that contains a setting of the \fImatch_limit\fP or
2870
+ \fImatch_limit_recursion\fP fields. This is not supported (these fields are
2871
+ meaningless for DFA matching).
2872
+ .sp
2873
+ PCRE_ERROR_DFA_WSSIZE (-19)
2874
+ .sp
2875
+ This return is given if \fBpcre_dfa_exec()\fP runs out of space in the
2876
+ \fIworkspace\fP vector.
2877
+ .sp
2878
+ PCRE_ERROR_DFA_RECURSE (-20)
2879
+ .sp
2880
+ When a recursive subpattern is processed, the matching function calls itself
2881
+ recursively, using private vectors for \fIovector\fP and \fIworkspace\fP. This
2882
+ error is given if the output vector is not large enough. This should be
2883
+ extremely rare, as a vector of size 1000 is used.
2884
+ .sp
2885
+ PCRE_ERROR_DFA_BADRESTART (-30)
2886
+ .sp
2887
+ When \fBpcre_dfa_exec()\fP is called with the \fBPCRE_DFA_RESTART\fP option,
2888
+ some plausibility checks are made on the contents of the workspace, which
2889
+ should contain data about the previous partial match. If any of these checks
2890
+ fail, this error is given.
2891
+ .
2892
+ .
2893
+ .SH "SEE ALSO"
2894
+ .rs
2895
+ .sp
2896
+ \fBpcre16\fP(3), \fBpcre32\fP(3), \fBpcrebuild\fP(3), \fBpcrecallout\fP(3),
2897
+ \fBpcrecpp(3)\fP(3), \fBpcrematching\fP(3), \fBpcrepartial\fP(3),
2898
+ \fBpcreposix\fP(3), \fBpcreprecompile\fP(3), \fBpcresample\fP(3),
2899
+ \fBpcrestack\fP(3).
2900
+ .
2901
+ .
2902
+ .SH AUTHOR
2903
+ .rs
2904
+ .sp
2905
+ .nf
2906
+ Philip Hazel
2907
+ University Computing Service
2908
+ Cambridge CB2 3QH, England.
2909
+ .fi
2910
+ .
2911
+ .
2912
+ .SH REVISION
2913
+ .rs
2914
+ .sp
2915
+ .nf
2916
+ Last updated: 18 December 2015
2917
+ Copyright (c) 1997-2015 University of Cambridge.
2918
+ .fi