rroonga 6.0.7-x86-mingw32 → 6.0.9-x86-mingw32

Sign up to get free protection for your applications and to get access to all the features.
Files changed (797) hide show
  1. checksums.yaml +4 -4
  2. data/doc/text/cross-compile.md +24 -23
  3. data/doc/text/news.md +10 -0
  4. data/ext/groonga/rb-grn-database.c +33 -0
  5. data/ext/groonga/rb-grn-id.c +19 -0
  6. data/ext/groonga/rb-grn-table.c +3 -1
  7. data/ext/groonga/rb-grn.h +1 -1
  8. data/lib/2.1/groonga.so +0 -0
  9. data/lib/2.2/groonga.so +0 -0
  10. data/lib/2.3/groonga.so +0 -0
  11. data/rroonga-build.rb +3 -3
  12. data/test/test-database.rb +21 -1
  13. data/test/test-id.rb +16 -0
  14. data/vendor/local/bin/grndb.exe +0 -0
  15. data/vendor/local/bin/groonga-benchmark.exe +0 -0
  16. data/vendor/local/bin/groonga-suggest-create-dataset.exe +0 -0
  17. data/vendor/local/bin/groonga.exe +0 -0
  18. data/vendor/local/bin/libgroonga-0.dll +0 -0
  19. data/vendor/local/bin/libmecab-2.dll +0 -0
  20. data/vendor/local/bin/libmsgpackc.dll +0 -0
  21. data/vendor/local/bin/libonig-5.dll +0 -0
  22. data/vendor/local/bin/libpcre-1.dll +0 -0
  23. data/vendor/local/bin/libpcrecpp-0.dll +0 -0
  24. data/vendor/local/bin/libpcreposix-0.dll +0 -0
  25. data/vendor/local/bin/lz4.exe +0 -0
  26. data/vendor/local/bin/lz4c.exe +0 -0
  27. data/vendor/local/bin/lz4cat +0 -0
  28. data/vendor/local/bin/mecab.exe +0 -0
  29. data/vendor/local/bin/pcre-config +133 -0
  30. data/vendor/local/bin/pcregrep.exe +0 -0
  31. data/vendor/local/bin/pcretest.exe +0 -0
  32. data/vendor/local/bin/zlib1.dll +0 -0
  33. data/vendor/local/include/groonga/groonga/db.h +22 -0
  34. data/vendor/local/include/groonga/groonga/groonga.h +21 -1
  35. data/vendor/local/include/groonga/groonga/id.h +1 -0
  36. data/vendor/local/include/pcre.h +677 -0
  37. data/vendor/local/include/pcre_scanner.h +172 -0
  38. data/vendor/local/include/pcre_stringpiece.h +180 -0
  39. data/vendor/local/include/pcrecpp.h +710 -0
  40. data/vendor/local/include/pcrecpparg.h +174 -0
  41. data/vendor/local/include/pcreposix.h +146 -0
  42. data/vendor/local/lib/groonga/plugins/functions/number.a +0 -0
  43. data/vendor/local/lib/groonga/plugins/functions/number.dll +0 -0
  44. data/vendor/local/lib/groonga/plugins/functions/number.dll.a +0 -0
  45. data/vendor/local/lib/groonga/plugins/functions/string.a +0 -0
  46. data/vendor/local/lib/groonga/plugins/functions/string.dll +0 -0
  47. data/vendor/local/lib/groonga/plugins/functions/string.dll.a +0 -0
  48. data/vendor/local/lib/groonga/plugins/functions/time.a +0 -0
  49. data/vendor/local/lib/groonga/plugins/functions/time.dll +0 -0
  50. data/vendor/local/lib/groonga/plugins/functions/time.dll.a +0 -0
  51. data/vendor/local/lib/groonga/plugins/functions/vector.a +0 -0
  52. data/vendor/local/lib/groonga/plugins/functions/vector.dll +0 -0
  53. data/vendor/local/lib/groonga/plugins/functions/vector.dll.a +0 -0
  54. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.a +0 -0
  55. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll +0 -0
  56. data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll.a +0 -0
  57. data/vendor/local/lib/groonga/plugins/sharding/logical_table_remove.rb +253 -23
  58. data/vendor/local/lib/groonga/plugins/suggest/suggest.a +0 -0
  59. data/vendor/local/lib/groonga/plugins/suggest/suggest.dll +0 -0
  60. data/vendor/local/lib/groonga/plugins/suggest/suggest.dll.a +0 -0
  61. data/vendor/local/lib/groonga/plugins/table/table.a +0 -0
  62. data/vendor/local/lib/groonga/plugins/table/table.dll +0 -0
  63. data/vendor/local/lib/groonga/plugins/table/table.dll.a +0 -0
  64. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.a +0 -0
  65. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll +0 -0
  66. data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll.a +0 -0
  67. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.a +0 -0
  68. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll +0 -0
  69. data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll.a +0 -0
  70. data/vendor/local/lib/groonga/scripts/ruby/context/rc.rb +12 -4
  71. data/vendor/local/lib/groonga/scripts/ruby/context.rb +19 -0
  72. data/vendor/local/lib/groonga/scripts/ruby/database.rb +36 -18
  73. data/vendor/local/lib/groonga/scripts/ruby/scan_info_data.rb +13 -10
  74. data/vendor/local/lib/libgroonga.a +0 -0
  75. data/vendor/local/lib/libgroonga.dll.a +0 -0
  76. data/vendor/local/lib/liblz4.a +0 -0
  77. data/vendor/local/lib/liblz4.dll +0 -0
  78. data/vendor/local/lib/liblz4.dll.1 +0 -0
  79. data/vendor/local/lib/liblz4.dll.1.5.0 +0 -0
  80. data/vendor/local/lib/libmecab.a +0 -0
  81. data/vendor/local/lib/libmecab.dll.a +0 -0
  82. data/vendor/local/lib/libmsgpackc.a +0 -0
  83. data/vendor/local/lib/libmsgpackc.dll.a +0 -0
  84. data/vendor/local/lib/libonig.a +0 -0
  85. data/vendor/local/lib/libonig.dll.a +0 -0
  86. data/vendor/local/lib/libpcre.a +0 -0
  87. data/vendor/local/lib/libpcre.dll.a +0 -0
  88. data/vendor/local/lib/libpcre.la +41 -0
  89. data/vendor/local/lib/libpcrecpp.a +0 -0
  90. data/vendor/local/lib/libpcrecpp.dll.a +0 -0
  91. data/vendor/local/lib/libpcrecpp.la +41 -0
  92. data/vendor/local/lib/libpcreposix.a +0 -0
  93. data/vendor/local/lib/libpcreposix.dll.a +0 -0
  94. data/vendor/local/lib/libpcreposix.la +41 -0
  95. data/vendor/local/lib/libz.a +0 -0
  96. data/vendor/local/lib/libz.dll.a +0 -0
  97. data/vendor/local/lib/pkgconfig/groonga.pc +2 -2
  98. data/vendor/local/lib/pkgconfig/libpcre.pc +13 -0
  99. data/vendor/local/lib/pkgconfig/libpcrecpp.pc +12 -0
  100. data/vendor/local/lib/pkgconfig/libpcreposix.pc +13 -0
  101. data/vendor/local/libexec/mecab/mecab-cost-train.exe +0 -0
  102. data/vendor/local/libexec/mecab/mecab-dict-gen.exe +0 -0
  103. data/vendor/local/libexec/mecab/mecab-dict-index.exe +0 -0
  104. data/vendor/local/libexec/mecab/mecab-system-eval.exe +0 -0
  105. data/vendor/local/libexec/mecab/mecab-test-gen.exe +0 -0
  106. data/vendor/local/sbin/groonga-httpd.exe +0 -0
  107. data/vendor/local/share/doc/groonga/en/html/.buildinfo +1 -1
  108. data/vendor/local/share/doc/groonga/en/html/_sources/install/centos.txt +3 -3
  109. data/vendor/local/share/doc/groonga/en/html/_sources/install/debian.txt +3 -3
  110. data/vendor/local/share/doc/groonga/en/html/_sources/install/fedora.txt +3 -3
  111. data/vendor/local/share/doc/groonga/en/html/_sources/install/mac_os_x.txt +3 -3
  112. data/vendor/local/share/doc/groonga/en/html/_sources/install/others.txt +3 -3
  113. data/vendor/local/share/doc/groonga/en/html/_sources/install/solaris.txt +3 -3
  114. data/vendor/local/share/doc/groonga/en/html/_sources/install/ubuntu.txt +3 -3
  115. data/vendor/local/share/doc/groonga/en/html/_sources/install/windows.txt +9 -9
  116. data/vendor/local/share/doc/groonga/en/html/_sources/limitations.txt +24 -5
  117. data/vendor/local/share/doc/groonga/en/html/_sources/news.txt +156 -4
  118. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/lock_acquire.txt +1 -1
  119. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/lock_release.txt +1 -1
  120. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_table_remove.txt +86 -0
  121. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/object_list.txt +23 -11
  122. data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_copy.txt +64 -0
  123. data/vendor/local/share/doc/groonga/en/html/_sources/reference/tables.txt +88 -45
  124. data/vendor/local/share/doc/groonga/en/html/characteristic.html +5 -5
  125. data/vendor/local/share/doc/groonga/en/html/client.html +5 -5
  126. data/vendor/local/share/doc/groonga/en/html/community.html +5 -5
  127. data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_autotools.html +5 -5
  128. data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_cmake.html +5 -5
  129. data/vendor/local/share/doc/groonga/en/html/contribution/development/build/windows_cmake.html +5 -5
  130. data/vendor/local/share/doc/groonga/en/html/contribution/development/build.html +5 -5
  131. data/vendor/local/share/doc/groonga/en/html/contribution/development/com.html +5 -5
  132. data/vendor/local/share/doc/groonga/en/html/contribution/development/cooperation.html +5 -5
  133. data/vendor/local/share/doc/groonga/en/html/contribution/development/query.html +5 -5
  134. data/vendor/local/share/doc/groonga/en/html/contribution/development/release.html +5 -5
  135. data/vendor/local/share/doc/groonga/en/html/contribution/development/repository.html +5 -5
  136. data/vendor/local/share/doc/groonga/en/html/contribution/development/test.html +5 -5
  137. data/vendor/local/share/doc/groonga/en/html/contribution/development.html +5 -5
  138. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/c-api.html +5 -5
  139. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/i18n.html +5 -5
  140. data/vendor/local/share/doc/groonga/en/html/contribution/documentation/introduction.html +5 -5
  141. data/vendor/local/share/doc/groonga/en/html/contribution/documentation.html +5 -5
  142. data/vendor/local/share/doc/groonga/en/html/contribution/report.html +5 -5
  143. data/vendor/local/share/doc/groonga/en/html/contribution.html +5 -5
  144. data/vendor/local/share/doc/groonga/en/html/development/travis-ci.html +5 -5
  145. data/vendor/local/share/doc/groonga/en/html/development.html +5 -5
  146. data/vendor/local/share/doc/groonga/en/html/genindex.html +5 -5
  147. data/vendor/local/share/doc/groonga/en/html/index.html +15 -14
  148. data/vendor/local/share/doc/groonga/en/html/install/centos.html +8 -8
  149. data/vendor/local/share/doc/groonga/en/html/install/debian.html +8 -8
  150. data/vendor/local/share/doc/groonga/en/html/install/fedora.html +8 -8
  151. data/vendor/local/share/doc/groonga/en/html/install/mac_os_x.html +8 -8
  152. data/vendor/local/share/doc/groonga/en/html/install/others.html +8 -8
  153. data/vendor/local/share/doc/groonga/en/html/install/solaris.html +8 -8
  154. data/vendor/local/share/doc/groonga/en/html/install/ubuntu.html +8 -8
  155. data/vendor/local/share/doc/groonga/en/html/install/windows.html +14 -14
  156. data/vendor/local/share/doc/groonga/en/html/install.html +5 -5
  157. data/vendor/local/share/doc/groonga/en/html/limitations.html +28 -9
  158. data/vendor/local/share/doc/groonga/en/html/news/0.x.html +5 -5
  159. data/vendor/local/share/doc/groonga/en/html/news/1.0.x.html +5 -5
  160. data/vendor/local/share/doc/groonga/en/html/news/1.1.x.html +5 -5
  161. data/vendor/local/share/doc/groonga/en/html/news/1.2.x.html +5 -5
  162. data/vendor/local/share/doc/groonga/en/html/news/1.3.x.html +5 -5
  163. data/vendor/local/share/doc/groonga/en/html/news/2.x.html +5 -5
  164. data/vendor/local/share/doc/groonga/en/html/news/3.x.html +5 -5
  165. data/vendor/local/share/doc/groonga/en/html/news/4.x.html +5 -5
  166. data/vendor/local/share/doc/groonga/en/html/news/5.x.html +5 -5
  167. data/vendor/local/share/doc/groonga/en/html/news/senna.html +5 -5
  168. data/vendor/local/share/doc/groonga/en/html/news.html +196 -61
  169. data/vendor/local/share/doc/groonga/en/html/objects.inv +0 -0
  170. data/vendor/local/share/doc/groonga/en/html/reference/alias.html +5 -5
  171. data/vendor/local/share/doc/groonga/en/html/reference/api/global_configurations.html +5 -5
  172. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_cache.html +5 -5
  173. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_column.html +5 -5
  174. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_command_version.html +5 -5
  175. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_content_type.html +5 -5
  176. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ctx.html +5 -5
  177. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_db.html +5 -5
  178. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_encoding.html +5 -5
  179. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_expr.html +5 -5
  180. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_geo.html +5 -5
  181. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_hook.html +5 -5
  182. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ii.html +5 -5
  183. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_index_cursor.html +5 -5
  184. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_info.html +5 -5
  185. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_match_escalation.html +5 -5
  186. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_obj.html +5 -5
  187. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_proc.html +5 -5
  188. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_search.html +5 -5
  189. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table.html +5 -5
  190. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table_cursor.html +5 -5
  191. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_thread.html +5 -5
  192. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_type.html +5 -5
  193. data/vendor/local/share/doc/groonga/en/html/reference/api/grn_user_data.html +5 -5
  194. data/vendor/local/share/doc/groonga/en/html/reference/api/overview.html +5 -5
  195. data/vendor/local/share/doc/groonga/en/html/reference/api/plugin.html +5 -5
  196. data/vendor/local/share/doc/groonga/en/html/reference/api.html +5 -5
  197. data/vendor/local/share/doc/groonga/en/html/reference/cast.html +5 -5
  198. data/vendor/local/share/doc/groonga/en/html/reference/column.html +5 -5
  199. data/vendor/local/share/doc/groonga/en/html/reference/columns/index.html +5 -5
  200. data/vendor/local/share/doc/groonga/en/html/reference/columns/pseudo.html +5 -5
  201. data/vendor/local/share/doc/groonga/en/html/reference/columns/scalar.html +5 -5
  202. data/vendor/local/share/doc/groonga/en/html/reference/columns/vector.html +5 -5
  203. data/vendor/local/share/doc/groonga/en/html/reference/command/command_version.html +5 -5
  204. data/vendor/local/share/doc/groonga/en/html/reference/command/output_format.html +5 -5
  205. data/vendor/local/share/doc/groonga/en/html/reference/command/pretty_print.html +5 -5
  206. data/vendor/local/share/doc/groonga/en/html/reference/command/request_id.html +5 -5
  207. data/vendor/local/share/doc/groonga/en/html/reference/command/request_timeout.html +5 -5
  208. data/vendor/local/share/doc/groonga/en/html/reference/command/return_code.html +5 -5
  209. data/vendor/local/share/doc/groonga/en/html/reference/command.html +15 -14
  210. data/vendor/local/share/doc/groonga/en/html/reference/commands/cache_limit.html +5 -5
  211. data/vendor/local/share/doc/groonga/en/html/reference/commands/check.html +5 -5
  212. data/vendor/local/share/doc/groonga/en/html/reference/commands/clearlock.html +5 -5
  213. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_copy.html +5 -5
  214. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_create.html +5 -5
  215. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_list.html +5 -5
  216. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_remove.html +5 -5
  217. data/vendor/local/share/doc/groonga/en/html/reference/commands/column_rename.html +5 -5
  218. data/vendor/local/share/doc/groonga/en/html/reference/commands/config_delete.html +5 -5
  219. data/vendor/local/share/doc/groonga/en/html/reference/commands/config_get.html +5 -5
  220. data/vendor/local/share/doc/groonga/en/html/reference/commands/config_set.html +5 -5
  221. data/vendor/local/share/doc/groonga/en/html/reference/commands/database_unmap.html +5 -5
  222. data/vendor/local/share/doc/groonga/en/html/reference/commands/define_selector.html +5 -5
  223. data/vendor/local/share/doc/groonga/en/html/reference/commands/defrag.html +5 -5
  224. data/vendor/local/share/doc/groonga/en/html/reference/commands/delete.html +5 -5
  225. data/vendor/local/share/doc/groonga/en/html/reference/commands/dump.html +5 -5
  226. data/vendor/local/share/doc/groonga/en/html/reference/commands/io_flush.html +5 -5
  227. data/vendor/local/share/doc/groonga/en/html/reference/commands/load.html +5 -5
  228. data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_acquire.html +6 -6
  229. data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_clear.html +5 -5
  230. data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_release.html +6 -6
  231. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_level.html +5 -5
  232. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_put.html +5 -5
  233. data/vendor/local/share/doc/groonga/en/html/reference/commands/log_reopen.html +5 -5
  234. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_count.html +5 -5
  235. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_parameters.html +5 -5
  236. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_range_filter.html +5 -5
  237. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_select.html +5 -5
  238. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_shard_list.html +5 -5
  239. data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_table_remove.html +98 -8
  240. data/vendor/local/share/doc/groonga/en/html/reference/commands/normalize.html +5 -5
  241. data/vendor/local/share/doc/groonga/en/html/reference/commands/normalizer_list.html +5 -5
  242. data/vendor/local/share/doc/groonga/en/html/reference/commands/object_exist.html +5 -5
  243. data/vendor/local/share/doc/groonga/en/html/reference/commands/object_inspect.html +5 -5
  244. data/vendor/local/share/doc/groonga/en/html/reference/commands/object_list.html +32 -18
  245. data/vendor/local/share/doc/groonga/en/html/reference/commands/object_remove.html +5 -5
  246. data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_register.html +5 -5
  247. data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_unregister.html +5 -5
  248. data/vendor/local/share/doc/groonga/en/html/reference/commands/query_expand.html +5 -5
  249. data/vendor/local/share/doc/groonga/en/html/reference/commands/quit.html +5 -5
  250. data/vendor/local/share/doc/groonga/en/html/reference/commands/range_filter.html +5 -5
  251. data/vendor/local/share/doc/groonga/en/html/reference/commands/register.html +5 -5
  252. data/vendor/local/share/doc/groonga/en/html/reference/commands/reindex.html +5 -5
  253. data/vendor/local/share/doc/groonga/en/html/reference/commands/request_cancel.html +5 -5
  254. data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_eval.html +5 -5
  255. data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_load.html +5 -5
  256. data/vendor/local/share/doc/groonga/en/html/reference/commands/schema.html +5 -5
  257. data/vendor/local/share/doc/groonga/en/html/reference/commands/select.html +5 -5
  258. data/vendor/local/share/doc/groonga/en/html/reference/commands/shutdown.html +5 -5
  259. data/vendor/local/share/doc/groonga/en/html/reference/commands/status.html +5 -5
  260. data/vendor/local/share/doc/groonga/en/html/reference/commands/suggest.html +10 -10
  261. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_copy.html +200 -0
  262. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_create.html +52 -52
  263. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_list.html +25 -25
  264. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_remove.html +41 -41
  265. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_rename.html +31 -31
  266. data/vendor/local/share/doc/groonga/en/html/reference/commands/table_tokenize.html +41 -41
  267. data/vendor/local/share/doc/groonga/en/html/reference/commands/thread_limit.html +31 -31
  268. data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenize.html +43 -43
  269. data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenizer_list.html +25 -25
  270. data/vendor/local/share/doc/groonga/en/html/reference/commands/truncate.html +25 -25
  271. data/vendor/local/share/doc/groonga/en/html/reference/configuration.html +5 -5
  272. data/vendor/local/share/doc/groonga/en/html/reference/executables/grndb.html +5 -5
  273. data/vendor/local/share/doc/groonga/en/html/reference/executables/grnslap.html +5 -5
  274. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-benchmark.html +5 -5
  275. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-httpd.html +5 -5
  276. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-server-http.html +5 -5
  277. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-create-dataset.html +5 -5
  278. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-httpd.html +5 -5
  279. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-learner.html +5 -5
  280. data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga.html +5 -5
  281. data/vendor/local/share/doc/groonga/en/html/reference/executables.html +5 -5
  282. data/vendor/local/share/doc/groonga/en/html/reference/function.html +5 -5
  283. data/vendor/local/share/doc/groonga/en/html/reference/functions/between.html +5 -5
  284. data/vendor/local/share/doc/groonga/en/html/reference/functions/edit_distance.html +5 -5
  285. data/vendor/local/share/doc/groonga/en/html/reference/functions/fuzzy_search.html +5 -5
  286. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_distance.html +5 -5
  287. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_circle.html +5 -5
  288. data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_rectangle.html +5 -5
  289. data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_full.html +5 -5
  290. data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_html.html +5 -5
  291. data/vendor/local/share/doc/groonga/en/html/reference/functions/html_untag.html +5 -5
  292. data/vendor/local/share/doc/groonga/en/html/reference/functions/in_values.html +5 -5
  293. data/vendor/local/share/doc/groonga/en/html/reference/functions/now.html +5 -5
  294. data/vendor/local/share/doc/groonga/en/html/reference/functions/number_classify.html +5 -5
  295. data/vendor/local/share/doc/groonga/en/html/reference/functions/prefix_rk_search.html +5 -5
  296. data/vendor/local/share/doc/groonga/en/html/reference/functions/query.html +5 -5
  297. data/vendor/local/share/doc/groonga/en/html/reference/functions/rand.html +5 -5
  298. data/vendor/local/share/doc/groonga/en/html/reference/functions/record_number.html +5 -5
  299. data/vendor/local/share/doc/groonga/en/html/reference/functions/snippet_html.html +5 -5
  300. data/vendor/local/share/doc/groonga/en/html/reference/functions/string_substring.html +5 -5
  301. data/vendor/local/share/doc/groonga/en/html/reference/functions/sub_filter.html +5 -5
  302. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_day.html +5 -5
  303. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_hour.html +5 -5
  304. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_minute.html +5 -5
  305. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_month.html +5 -5
  306. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_second.html +5 -5
  307. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_week.html +5 -5
  308. data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_year.html +5 -5
  309. data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_size.html +5 -5
  310. data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_slice.html +5 -5
  311. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/query_syntax.html +5 -5
  312. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/script_syntax.html +5 -5
  313. data/vendor/local/share/doc/groonga/en/html/reference/grn_expr.html +5 -5
  314. data/vendor/local/share/doc/groonga/en/html/reference/indexing.html +5 -5
  315. data/vendor/local/share/doc/groonga/en/html/reference/log.html +5 -5
  316. data/vendor/local/share/doc/groonga/en/html/reference/normalizers.html +5 -5
  317. data/vendor/local/share/doc/groonga/en/html/reference/operations/geolocation_search.html +5 -5
  318. data/vendor/local/share/doc/groonga/en/html/reference/operations/prefix_rk_search.html +5 -5
  319. data/vendor/local/share/doc/groonga/en/html/reference/operations.html +5 -5
  320. data/vendor/local/share/doc/groonga/en/html/reference/output.html +5 -5
  321. data/vendor/local/share/doc/groonga/en/html/reference/query_expanders/tsv.html +5 -5
  322. data/vendor/local/share/doc/groonga/en/html/reference/query_expanders.html +5 -5
  323. data/vendor/local/share/doc/groonga/en/html/reference/regular_expression.html +5 -5
  324. data/vendor/local/share/doc/groonga/en/html/reference/scorer.html +5 -5
  325. data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_at_most.html +5 -5
  326. data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_idf.html +5 -5
  327. data/vendor/local/share/doc/groonga/en/html/reference/sharding.html +5 -5
  328. data/vendor/local/share/doc/groonga/en/html/reference/suggest/completion.html +5 -5
  329. data/vendor/local/share/doc/groonga/en/html/reference/suggest/correction.html +5 -5
  330. data/vendor/local/share/doc/groonga/en/html/reference/suggest/introduction.html +5 -5
  331. data/vendor/local/share/doc/groonga/en/html/reference/suggest/suggestion.html +5 -5
  332. data/vendor/local/share/doc/groonga/en/html/reference/suggest.html +5 -5
  333. data/vendor/local/share/doc/groonga/en/html/reference/tables.html +41 -34
  334. data/vendor/local/share/doc/groonga/en/html/reference/token_filters.html +5 -5
  335. data/vendor/local/share/doc/groonga/en/html/reference/tokenizers.html +5 -5
  336. data/vendor/local/share/doc/groonga/en/html/reference/tuning.html +5 -5
  337. data/vendor/local/share/doc/groonga/en/html/reference/types.html +9 -9
  338. data/vendor/local/share/doc/groonga/en/html/reference.html +15 -14
  339. data/vendor/local/share/doc/groonga/en/html/search.html +5 -5
  340. data/vendor/local/share/doc/groonga/en/html/searchindex.js +1 -1
  341. data/vendor/local/share/doc/groonga/en/html/server/gqtp.html +5 -5
  342. data/vendor/local/share/doc/groonga/en/html/server/http/comparison.html +5 -5
  343. data/vendor/local/share/doc/groonga/en/html/server/http/groonga-httpd.html +5 -5
  344. data/vendor/local/share/doc/groonga/en/html/server/http/groonga.html +5 -5
  345. data/vendor/local/share/doc/groonga/en/html/server/http.html +5 -5
  346. data/vendor/local/share/doc/groonga/en/html/server/memcached.html +5 -5
  347. data/vendor/local/share/doc/groonga/en/html/server/package.html +5 -5
  348. data/vendor/local/share/doc/groonga/en/html/server.html +5 -5
  349. data/vendor/local/share/doc/groonga/en/html/spec/gqtp.html +5 -5
  350. data/vendor/local/share/doc/groonga/en/html/spec/search.html +5 -5
  351. data/vendor/local/share/doc/groonga/en/html/spec.html +5 -5
  352. data/vendor/local/share/doc/groonga/en/html/troubleshooting/different_results_with_the_same_keyword.html +5 -5
  353. data/vendor/local/share/doc/groonga/en/html/troubleshooting/mmap_cannot_allocate_memory.html +5 -5
  354. data/vendor/local/share/doc/groonga/en/html/troubleshooting.html +5 -5
  355. data/vendor/local/share/doc/groonga/en/html/tutorial/data.html +5 -5
  356. data/vendor/local/share/doc/groonga/en/html/tutorial/drilldown.html +5 -5
  357. data/vendor/local/share/doc/groonga/en/html/tutorial/index.html +5 -5
  358. data/vendor/local/share/doc/groonga/en/html/tutorial/introduction.html +5 -5
  359. data/vendor/local/share/doc/groonga/en/html/tutorial/lexicon.html +5 -5
  360. data/vendor/local/share/doc/groonga/en/html/tutorial/match_columns.html +5 -5
  361. data/vendor/local/share/doc/groonga/en/html/tutorial/micro_blog.html +5 -5
  362. data/vendor/local/share/doc/groonga/en/html/tutorial/network.html +5 -5
  363. data/vendor/local/share/doc/groonga/en/html/tutorial/patricia_trie.html +5 -5
  364. data/vendor/local/share/doc/groonga/en/html/tutorial/query_expansion.html +5 -5
  365. data/vendor/local/share/doc/groonga/en/html/tutorial/search.html +5 -5
  366. data/vendor/local/share/doc/groonga/en/html/tutorial.html +5 -5
  367. data/vendor/local/share/doc/groonga/ja/html/.buildinfo +1 -1
  368. data/vendor/local/share/doc/groonga/ja/html/_sources/install/centos.txt +3 -3
  369. data/vendor/local/share/doc/groonga/ja/html/_sources/install/debian.txt +3 -3
  370. data/vendor/local/share/doc/groonga/ja/html/_sources/install/fedora.txt +3 -3
  371. data/vendor/local/share/doc/groonga/ja/html/_sources/install/mac_os_x.txt +3 -3
  372. data/vendor/local/share/doc/groonga/ja/html/_sources/install/others.txt +3 -3
  373. data/vendor/local/share/doc/groonga/ja/html/_sources/install/solaris.txt +3 -3
  374. data/vendor/local/share/doc/groonga/ja/html/_sources/install/ubuntu.txt +3 -3
  375. data/vendor/local/share/doc/groonga/ja/html/_sources/install/windows.txt +9 -9
  376. data/vendor/local/share/doc/groonga/ja/html/_sources/limitations.txt +24 -5
  377. data/vendor/local/share/doc/groonga/ja/html/_sources/news.txt +156 -4
  378. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/lock_acquire.txt +1 -1
  379. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/lock_release.txt +1 -1
  380. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_table_remove.txt +86 -0
  381. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/object_list.txt +23 -11
  382. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_copy.txt +64 -0
  383. data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tables.txt +88 -45
  384. data/vendor/local/share/doc/groonga/ja/html/characteristic.html +5 -5
  385. data/vendor/local/share/doc/groonga/ja/html/client.html +5 -5
  386. data/vendor/local/share/doc/groonga/ja/html/community.html +5 -5
  387. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_autotools.html +5 -5
  388. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_cmake.html +5 -5
  389. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/windows_cmake.html +5 -5
  390. data/vendor/local/share/doc/groonga/ja/html/contribution/development/build.html +5 -5
  391. data/vendor/local/share/doc/groonga/ja/html/contribution/development/com.html +5 -5
  392. data/vendor/local/share/doc/groonga/ja/html/contribution/development/cooperation.html +5 -5
  393. data/vendor/local/share/doc/groonga/ja/html/contribution/development/query.html +5 -5
  394. data/vendor/local/share/doc/groonga/ja/html/contribution/development/release.html +5 -5
  395. data/vendor/local/share/doc/groonga/ja/html/contribution/development/repository.html +5 -5
  396. data/vendor/local/share/doc/groonga/ja/html/contribution/development/test.html +5 -5
  397. data/vendor/local/share/doc/groonga/ja/html/contribution/development.html +5 -5
  398. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/c-api.html +5 -5
  399. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/i18n.html +5 -5
  400. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/introduction.html +5 -5
  401. data/vendor/local/share/doc/groonga/ja/html/contribution/documentation.html +5 -5
  402. data/vendor/local/share/doc/groonga/ja/html/contribution/report.html +5 -5
  403. data/vendor/local/share/doc/groonga/ja/html/contribution.html +5 -5
  404. data/vendor/local/share/doc/groonga/ja/html/development/travis-ci.html +5 -5
  405. data/vendor/local/share/doc/groonga/ja/html/development.html +5 -5
  406. data/vendor/local/share/doc/groonga/ja/html/genindex.html +5 -5
  407. data/vendor/local/share/doc/groonga/ja/html/index.html +15 -14
  408. data/vendor/local/share/doc/groonga/ja/html/install/centos.html +8 -8
  409. data/vendor/local/share/doc/groonga/ja/html/install/debian.html +8 -8
  410. data/vendor/local/share/doc/groonga/ja/html/install/fedora.html +8 -8
  411. data/vendor/local/share/doc/groonga/ja/html/install/mac_os_x.html +8 -8
  412. data/vendor/local/share/doc/groonga/ja/html/install/others.html +8 -8
  413. data/vendor/local/share/doc/groonga/ja/html/install/solaris.html +8 -8
  414. data/vendor/local/share/doc/groonga/ja/html/install/ubuntu.html +8 -8
  415. data/vendor/local/share/doc/groonga/ja/html/install/windows.html +14 -14
  416. data/vendor/local/share/doc/groonga/ja/html/install.html +5 -5
  417. data/vendor/local/share/doc/groonga/ja/html/limitations.html +21 -8
  418. data/vendor/local/share/doc/groonga/ja/html/news/0.x.html +5 -5
  419. data/vendor/local/share/doc/groonga/ja/html/news/1.0.x.html +5 -5
  420. data/vendor/local/share/doc/groonga/ja/html/news/1.1.x.html +5 -5
  421. data/vendor/local/share/doc/groonga/ja/html/news/1.2.x.html +5 -5
  422. data/vendor/local/share/doc/groonga/ja/html/news/1.3.x.html +5 -5
  423. data/vendor/local/share/doc/groonga/ja/html/news/2.x.html +5 -5
  424. data/vendor/local/share/doc/groonga/ja/html/news/3.x.html +5 -5
  425. data/vendor/local/share/doc/groonga/ja/html/news/4.x.html +5 -5
  426. data/vendor/local/share/doc/groonga/ja/html/news/5.x.html +5 -5
  427. data/vendor/local/share/doc/groonga/ja/html/news/senna.html +5 -5
  428. data/vendor/local/share/doc/groonga/ja/html/news.html +185 -61
  429. data/vendor/local/share/doc/groonga/ja/html/objects.inv +0 -0
  430. data/vendor/local/share/doc/groonga/ja/html/reference/alias.html +5 -5
  431. data/vendor/local/share/doc/groonga/ja/html/reference/api/global_configurations.html +5 -5
  432. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_cache.html +5 -5
  433. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_column.html +5 -5
  434. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_command_version.html +5 -5
  435. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_content_type.html +5 -5
  436. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ctx.html +5 -5
  437. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_db.html +5 -5
  438. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_encoding.html +5 -5
  439. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_expr.html +5 -5
  440. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_geo.html +5 -5
  441. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_hook.html +5 -5
  442. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ii.html +5 -5
  443. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_index_cursor.html +5 -5
  444. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_info.html +5 -5
  445. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_match_escalation.html +5 -5
  446. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_obj.html +5 -5
  447. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_proc.html +5 -5
  448. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_search.html +5 -5
  449. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table.html +5 -5
  450. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table_cursor.html +5 -5
  451. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_thread.html +5 -5
  452. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_type.html +5 -5
  453. data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_user_data.html +5 -5
  454. data/vendor/local/share/doc/groonga/ja/html/reference/api/overview.html +5 -5
  455. data/vendor/local/share/doc/groonga/ja/html/reference/api/plugin.html +5 -5
  456. data/vendor/local/share/doc/groonga/ja/html/reference/api.html +5 -5
  457. data/vendor/local/share/doc/groonga/ja/html/reference/cast.html +5 -5
  458. data/vendor/local/share/doc/groonga/ja/html/reference/column.html +5 -5
  459. data/vendor/local/share/doc/groonga/ja/html/reference/columns/index.html +5 -5
  460. data/vendor/local/share/doc/groonga/ja/html/reference/columns/pseudo.html +5 -5
  461. data/vendor/local/share/doc/groonga/ja/html/reference/columns/scalar.html +5 -5
  462. data/vendor/local/share/doc/groonga/ja/html/reference/columns/vector.html +5 -5
  463. data/vendor/local/share/doc/groonga/ja/html/reference/command/command_version.html +5 -5
  464. data/vendor/local/share/doc/groonga/ja/html/reference/command/output_format.html +5 -5
  465. data/vendor/local/share/doc/groonga/ja/html/reference/command/pretty_print.html +5 -5
  466. data/vendor/local/share/doc/groonga/ja/html/reference/command/request_id.html +5 -5
  467. data/vendor/local/share/doc/groonga/ja/html/reference/command/request_timeout.html +5 -5
  468. data/vendor/local/share/doc/groonga/ja/html/reference/command/return_code.html +5 -5
  469. data/vendor/local/share/doc/groonga/ja/html/reference/command.html +15 -14
  470. data/vendor/local/share/doc/groonga/ja/html/reference/commands/cache_limit.html +5 -5
  471. data/vendor/local/share/doc/groonga/ja/html/reference/commands/check.html +5 -5
  472. data/vendor/local/share/doc/groonga/ja/html/reference/commands/clearlock.html +5 -5
  473. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_copy.html +5 -5
  474. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_create.html +5 -5
  475. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_list.html +5 -5
  476. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_remove.html +5 -5
  477. data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_rename.html +5 -5
  478. data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_delete.html +5 -5
  479. data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_get.html +5 -5
  480. data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_set.html +5 -5
  481. data/vendor/local/share/doc/groonga/ja/html/reference/commands/database_unmap.html +5 -5
  482. data/vendor/local/share/doc/groonga/ja/html/reference/commands/define_selector.html +5 -5
  483. data/vendor/local/share/doc/groonga/ja/html/reference/commands/defrag.html +5 -5
  484. data/vendor/local/share/doc/groonga/ja/html/reference/commands/delete.html +5 -5
  485. data/vendor/local/share/doc/groonga/ja/html/reference/commands/dump.html +5 -5
  486. data/vendor/local/share/doc/groonga/ja/html/reference/commands/io_flush.html +5 -5
  487. data/vendor/local/share/doc/groonga/ja/html/reference/commands/load.html +5 -5
  488. data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_acquire.html +6 -6
  489. data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_clear.html +5 -5
  490. data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_release.html +6 -6
  491. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_level.html +5 -5
  492. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_put.html +5 -5
  493. data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_reopen.html +5 -5
  494. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_count.html +5 -5
  495. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_parameters.html +5 -5
  496. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_range_filter.html +5 -5
  497. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_select.html +5 -5
  498. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_shard_list.html +5 -5
  499. data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_table_remove.html +88 -8
  500. data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalize.html +5 -5
  501. data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalizer_list.html +5 -5
  502. data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_exist.html +5 -5
  503. data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_inspect.html +5 -5
  504. data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_list.html +103 -103
  505. data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_remove.html +6 -6
  506. data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_register.html +5 -5
  507. data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_unregister.html +5 -5
  508. data/vendor/local/share/doc/groonga/ja/html/reference/commands/query_expand.html +5 -5
  509. data/vendor/local/share/doc/groonga/ja/html/reference/commands/quit.html +5 -5
  510. data/vendor/local/share/doc/groonga/ja/html/reference/commands/range_filter.html +5 -5
  511. data/vendor/local/share/doc/groonga/ja/html/reference/commands/register.html +5 -5
  512. data/vendor/local/share/doc/groonga/ja/html/reference/commands/reindex.html +5 -5
  513. data/vendor/local/share/doc/groonga/ja/html/reference/commands/request_cancel.html +5 -5
  514. data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_eval.html +5 -5
  515. data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_load.html +5 -5
  516. data/vendor/local/share/doc/groonga/ja/html/reference/commands/schema.html +5 -5
  517. data/vendor/local/share/doc/groonga/ja/html/reference/commands/select.html +5 -5
  518. data/vendor/local/share/doc/groonga/ja/html/reference/commands/shutdown.html +5 -5
  519. data/vendor/local/share/doc/groonga/ja/html/reference/commands/status.html +5 -5
  520. data/vendor/local/share/doc/groonga/ja/html/reference/commands/suggest.html +10 -10
  521. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_copy.html +201 -0
  522. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_create.html +52 -52
  523. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_list.html +25 -25
  524. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_remove.html +41 -41
  525. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_rename.html +31 -31
  526. data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_tokenize.html +41 -41
  527. data/vendor/local/share/doc/groonga/ja/html/reference/commands/thread_limit.html +31 -31
  528. data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenize.html +43 -43
  529. data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenizer_list.html +25 -25
  530. data/vendor/local/share/doc/groonga/ja/html/reference/commands/truncate.html +25 -25
  531. data/vendor/local/share/doc/groonga/ja/html/reference/configuration.html +5 -5
  532. data/vendor/local/share/doc/groonga/ja/html/reference/executables/grndb.html +5 -5
  533. data/vendor/local/share/doc/groonga/ja/html/reference/executables/grnslap.html +5 -5
  534. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-benchmark.html +5 -5
  535. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-httpd.html +5 -5
  536. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-server-http.html +5 -5
  537. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-create-dataset.html +5 -5
  538. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-httpd.html +6 -6
  539. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-learner.html +5 -5
  540. data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga.html +5 -5
  541. data/vendor/local/share/doc/groonga/ja/html/reference/executables.html +5 -5
  542. data/vendor/local/share/doc/groonga/ja/html/reference/function.html +5 -5
  543. data/vendor/local/share/doc/groonga/ja/html/reference/functions/between.html +5 -5
  544. data/vendor/local/share/doc/groonga/ja/html/reference/functions/edit_distance.html +5 -5
  545. data/vendor/local/share/doc/groonga/ja/html/reference/functions/fuzzy_search.html +5 -5
  546. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_distance.html +5 -5
  547. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_circle.html +5 -5
  548. data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_rectangle.html +5 -5
  549. data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_full.html +5 -5
  550. data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_html.html +5 -5
  551. data/vendor/local/share/doc/groonga/ja/html/reference/functions/html_untag.html +5 -5
  552. data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_values.html +5 -5
  553. data/vendor/local/share/doc/groonga/ja/html/reference/functions/now.html +5 -5
  554. data/vendor/local/share/doc/groonga/ja/html/reference/functions/number_classify.html +5 -5
  555. data/vendor/local/share/doc/groonga/ja/html/reference/functions/prefix_rk_search.html +5 -5
  556. data/vendor/local/share/doc/groonga/ja/html/reference/functions/query.html +5 -5
  557. data/vendor/local/share/doc/groonga/ja/html/reference/functions/rand.html +5 -5
  558. data/vendor/local/share/doc/groonga/ja/html/reference/functions/record_number.html +5 -5
  559. data/vendor/local/share/doc/groonga/ja/html/reference/functions/snippet_html.html +5 -5
  560. data/vendor/local/share/doc/groonga/ja/html/reference/functions/string_substring.html +5 -5
  561. data/vendor/local/share/doc/groonga/ja/html/reference/functions/sub_filter.html +5 -5
  562. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_day.html +5 -5
  563. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_hour.html +5 -5
  564. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_minute.html +5 -5
  565. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_month.html +5 -5
  566. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_second.html +5 -5
  567. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_week.html +5 -5
  568. data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_year.html +5 -5
  569. data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_size.html +5 -5
  570. data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_slice.html +5 -5
  571. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/query_syntax.html +5 -5
  572. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/script_syntax.html +5 -5
  573. data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr.html +5 -5
  574. data/vendor/local/share/doc/groonga/ja/html/reference/indexing.html +5 -5
  575. data/vendor/local/share/doc/groonga/ja/html/reference/log.html +5 -5
  576. data/vendor/local/share/doc/groonga/ja/html/reference/normalizers.html +5 -5
  577. data/vendor/local/share/doc/groonga/ja/html/reference/operations/geolocation_search.html +5 -5
  578. data/vendor/local/share/doc/groonga/ja/html/reference/operations/prefix_rk_search.html +5 -5
  579. data/vendor/local/share/doc/groonga/ja/html/reference/operations.html +5 -5
  580. data/vendor/local/share/doc/groonga/ja/html/reference/output.html +5 -5
  581. data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders/tsv.html +5 -5
  582. data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders.html +5 -5
  583. data/vendor/local/share/doc/groonga/ja/html/reference/regular_expression.html +5 -5
  584. data/vendor/local/share/doc/groonga/ja/html/reference/scorer.html +5 -5
  585. data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_at_most.html +5 -5
  586. data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_idf.html +5 -5
  587. data/vendor/local/share/doc/groonga/ja/html/reference/sharding.html +5 -5
  588. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/completion.html +5 -5
  589. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/correction.html +5 -5
  590. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/introduction.html +5 -5
  591. data/vendor/local/share/doc/groonga/ja/html/reference/suggest/suggestion.html +5 -5
  592. data/vendor/local/share/doc/groonga/ja/html/reference/suggest.html +5 -5
  593. data/vendor/local/share/doc/groonga/ja/html/reference/tables.html +33 -10
  594. data/vendor/local/share/doc/groonga/ja/html/reference/token_filters.html +5 -5
  595. data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers.html +5 -5
  596. data/vendor/local/share/doc/groonga/ja/html/reference/tuning.html +5 -5
  597. data/vendor/local/share/doc/groonga/ja/html/reference/types.html +9 -9
  598. data/vendor/local/share/doc/groonga/ja/html/reference.html +15 -14
  599. data/vendor/local/share/doc/groonga/ja/html/search.html +5 -5
  600. data/vendor/local/share/doc/groonga/ja/html/searchindex.js +1 -1
  601. data/vendor/local/share/doc/groonga/ja/html/server/gqtp.html +5 -5
  602. data/vendor/local/share/doc/groonga/ja/html/server/http/comparison.html +5 -5
  603. data/vendor/local/share/doc/groonga/ja/html/server/http/groonga-httpd.html +5 -5
  604. data/vendor/local/share/doc/groonga/ja/html/server/http/groonga.html +5 -5
  605. data/vendor/local/share/doc/groonga/ja/html/server/http.html +5 -5
  606. data/vendor/local/share/doc/groonga/ja/html/server/memcached.html +5 -5
  607. data/vendor/local/share/doc/groonga/ja/html/server/package.html +5 -5
  608. data/vendor/local/share/doc/groonga/ja/html/server.html +5 -5
  609. data/vendor/local/share/doc/groonga/ja/html/spec/gqtp.html +5 -5
  610. data/vendor/local/share/doc/groonga/ja/html/spec/search.html +5 -5
  611. data/vendor/local/share/doc/groonga/ja/html/spec.html +5 -5
  612. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/different_results_with_the_same_keyword.html +5 -5
  613. data/vendor/local/share/doc/groonga/ja/html/troubleshooting/mmap_cannot_allocate_memory.html +5 -5
  614. data/vendor/local/share/doc/groonga/ja/html/troubleshooting.html +5 -5
  615. data/vendor/local/share/doc/groonga/ja/html/tutorial/data.html +5 -5
  616. data/vendor/local/share/doc/groonga/ja/html/tutorial/drilldown.html +5 -5
  617. data/vendor/local/share/doc/groonga/ja/html/tutorial/index.html +5 -5
  618. data/vendor/local/share/doc/groonga/ja/html/tutorial/introduction.html +5 -5
  619. data/vendor/local/share/doc/groonga/ja/html/tutorial/lexicon.html +5 -5
  620. data/vendor/local/share/doc/groonga/ja/html/tutorial/match_columns.html +5 -5
  621. data/vendor/local/share/doc/groonga/ja/html/tutorial/micro_blog.html +5 -5
  622. data/vendor/local/share/doc/groonga/ja/html/tutorial/network.html +5 -5
  623. data/vendor/local/share/doc/groonga/ja/html/tutorial/patricia_trie.html +5 -5
  624. data/vendor/local/share/doc/groonga/ja/html/tutorial/query_expansion.html +5 -5
  625. data/vendor/local/share/doc/groonga/ja/html/tutorial/search.html +5 -5
  626. data/vendor/local/share/doc/groonga/ja/html/tutorial.html +5 -5
  627. data/vendor/local/share/doc/pcre/AUTHORS +45 -0
  628. data/vendor/local/share/doc/pcre/COPYING +5 -0
  629. data/vendor/local/share/doc/pcre/ChangeLog +6010 -0
  630. data/vendor/local/share/doc/pcre/LICENCE +93 -0
  631. data/vendor/local/share/doc/pcre/NEWS +725 -0
  632. data/vendor/local/share/doc/pcre/README +1002 -0
  633. data/vendor/local/share/doc/pcre/html/NON-AUTOTOOLS-BUILD.txt +772 -0
  634. data/vendor/local/share/doc/pcre/html/README.txt +1002 -0
  635. data/vendor/local/share/doc/pcre/html/index.html +185 -0
  636. data/vendor/local/share/doc/pcre/html/pcre-config.html +109 -0
  637. data/vendor/local/share/doc/pcre/html/pcre.html +224 -0
  638. data/vendor/local/share/doc/pcre/html/pcre16.html +384 -0
  639. data/vendor/local/share/doc/pcre/html/pcre32.html +382 -0
  640. data/vendor/local/share/doc/pcre/html/pcre_assign_jit_stack.html +76 -0
  641. data/vendor/local/share/doc/pcre/html/pcre_compile.html +111 -0
  642. data/vendor/local/share/doc/pcre/html/pcre_compile2.html +115 -0
  643. data/vendor/local/share/doc/pcre/html/pcre_config.html +94 -0
  644. data/vendor/local/share/doc/pcre/html/pcre_copy_named_substring.html +65 -0
  645. data/vendor/local/share/doc/pcre/html/pcre_copy_substring.html +61 -0
  646. data/vendor/local/share/doc/pcre/html/pcre_dfa_exec.html +129 -0
  647. data/vendor/local/share/doc/pcre/html/pcre_exec.html +111 -0
  648. data/vendor/local/share/doc/pcre/html/pcre_free_study.html +46 -0
  649. data/vendor/local/share/doc/pcre/html/pcre_free_substring.html +46 -0
  650. data/vendor/local/share/doc/pcre/html/pcre_free_substring_list.html +46 -0
  651. data/vendor/local/share/doc/pcre/html/pcre_fullinfo.html +118 -0
  652. data/vendor/local/share/doc/pcre/html/pcre_get_named_substring.html +68 -0
  653. data/vendor/local/share/doc/pcre/html/pcre_get_stringnumber.html +57 -0
  654. data/vendor/local/share/doc/pcre/html/pcre_get_stringtable_entries.html +60 -0
  655. data/vendor/local/share/doc/pcre/html/pcre_get_substring.html +64 -0
  656. data/vendor/local/share/doc/pcre/html/pcre_get_substring_list.html +61 -0
  657. data/vendor/local/share/doc/pcre/html/pcre_jit_exec.html +108 -0
  658. data/vendor/local/share/doc/pcre/html/pcre_jit_stack_alloc.html +55 -0
  659. data/vendor/local/share/doc/pcre/html/pcre_jit_stack_free.html +48 -0
  660. data/vendor/local/share/doc/pcre/html/pcre_maketables.html +48 -0
  661. data/vendor/local/share/doc/pcre/html/pcre_pattern_to_host_byte_order.html +58 -0
  662. data/vendor/local/share/doc/pcre/html/pcre_refcount.html +51 -0
  663. data/vendor/local/share/doc/pcre/html/pcre_study.html +68 -0
  664. data/vendor/local/share/doc/pcre/html/pcre_utf16_to_host_byte_order.html +57 -0
  665. data/vendor/local/share/doc/pcre/html/pcre_utf32_to_host_byte_order.html +57 -0
  666. data/vendor/local/share/doc/pcre/html/pcre_version.html +46 -0
  667. data/vendor/local/share/doc/pcre/html/pcreapi.html +2921 -0
  668. data/vendor/local/share/doc/pcre/html/pcrebuild.html +534 -0
  669. data/vendor/local/share/doc/pcre/html/pcrecallout.html +286 -0
  670. data/vendor/local/share/doc/pcre/html/pcrecompat.html +235 -0
  671. data/vendor/local/share/doc/pcre/html/pcrecpp.html +368 -0
  672. data/vendor/local/share/doc/pcre/html/pcredemo.html +426 -0
  673. data/vendor/local/share/doc/pcre/html/pcregrep.html +759 -0
  674. data/vendor/local/share/doc/pcre/html/pcrejit.html +452 -0
  675. data/vendor/local/share/doc/pcre/html/pcrelimits.html +90 -0
  676. data/vendor/local/share/doc/pcre/html/pcrematching.html +242 -0
  677. data/vendor/local/share/doc/pcre/html/pcrepartial.html +509 -0
  678. data/vendor/local/share/doc/pcre/html/pcrepattern.html +3273 -0
  679. data/vendor/local/share/doc/pcre/html/pcreperform.html +195 -0
  680. data/vendor/local/share/doc/pcre/html/pcreposix.html +290 -0
  681. data/vendor/local/share/doc/pcre/html/pcreprecompile.html +163 -0
  682. data/vendor/local/share/doc/pcre/html/pcresample.html +110 -0
  683. data/vendor/local/share/doc/pcre/html/pcrestack.html +225 -0
  684. data/vendor/local/share/doc/pcre/html/pcresyntax.html +561 -0
  685. data/vendor/local/share/doc/pcre/html/pcretest.html +1158 -0
  686. data/vendor/local/share/doc/pcre/html/pcreunicode.html +262 -0
  687. data/vendor/local/share/doc/pcre/pcre-config.txt +86 -0
  688. data/vendor/local/share/doc/pcre/pcre.txt +10454 -0
  689. data/vendor/local/share/doc/pcre/pcregrep.txt +741 -0
  690. data/vendor/local/share/doc/pcre/pcretest.txt +1087 -0
  691. data/vendor/local/share/groonga/html/admin.old/js/groonga-admin.ja.js +11 -6
  692. data/vendor/local/share/groonga/html/admin.old/js/groonga-admin.js +11 -6
  693. data/vendor/local/share/license/pcre/LICENCE +93 -0
  694. data/vendor/local/share/man/man1/pcre-config.1 +92 -0
  695. data/vendor/local/share/man/man1/pcregrep.1 +683 -0
  696. data/vendor/local/share/man/man1/pcretest.1 +1156 -0
  697. data/vendor/local/share/man/man3/pcre.3 +230 -0
  698. data/vendor/local/share/man/man3/pcre16.3 +371 -0
  699. data/vendor/local/share/man/man3/pcre16_assign_jit_stack.3 +59 -0
  700. data/vendor/local/share/man/man3/pcre16_compile.3 +96 -0
  701. data/vendor/local/share/man/man3/pcre16_compile2.3 +101 -0
  702. data/vendor/local/share/man/man3/pcre16_config.3 +79 -0
  703. data/vendor/local/share/man/man3/pcre16_copy_named_substring.3 +51 -0
  704. data/vendor/local/share/man/man3/pcre16_copy_substring.3 +47 -0
  705. data/vendor/local/share/man/man3/pcre16_dfa_exec.3 +118 -0
  706. data/vendor/local/share/man/man3/pcre16_exec.3 +99 -0
  707. data/vendor/local/share/man/man3/pcre16_free_study.3 +31 -0
  708. data/vendor/local/share/man/man3/pcre16_free_substring.3 +31 -0
  709. data/vendor/local/share/man/man3/pcre16_free_substring_list.3 +31 -0
  710. data/vendor/local/share/man/man3/pcre16_fullinfo.3 +103 -0
  711. data/vendor/local/share/man/man3/pcre16_get_named_substring.3 +54 -0
  712. data/vendor/local/share/man/man3/pcre16_get_stringnumber.3 +43 -0
  713. data/vendor/local/share/man/man3/pcre16_get_stringtable_entries.3 +46 -0
  714. data/vendor/local/share/man/man3/pcre16_get_substring.3 +50 -0
  715. data/vendor/local/share/man/man3/pcre16_get_substring_list.3 +47 -0
  716. data/vendor/local/share/man/man3/pcre16_jit_exec.3 +96 -0
  717. data/vendor/local/share/man/man3/pcre16_jit_stack_alloc.3 +43 -0
  718. data/vendor/local/share/man/man3/pcre16_jit_stack_free.3 +35 -0
  719. data/vendor/local/share/man/man3/pcre16_maketables.3 +33 -0
  720. data/vendor/local/share/man/man3/pcre16_pattern_to_host_byte_order.3 +44 -0
  721. data/vendor/local/share/man/man3/pcre16_refcount.3 +36 -0
  722. data/vendor/local/share/man/man3/pcre16_study.3 +54 -0
  723. data/vendor/local/share/man/man3/pcre16_utf16_to_host_byte_order.3 +45 -0
  724. data/vendor/local/share/man/man3/pcre16_version.3 +31 -0
  725. data/vendor/local/share/man/man3/pcre32.3 +369 -0
  726. data/vendor/local/share/man/man3/pcre32_assign_jit_stack.3 +59 -0
  727. data/vendor/local/share/man/man3/pcre32_compile.3 +96 -0
  728. data/vendor/local/share/man/man3/pcre32_compile2.3 +101 -0
  729. data/vendor/local/share/man/man3/pcre32_config.3 +79 -0
  730. data/vendor/local/share/man/man3/pcre32_copy_named_substring.3 +51 -0
  731. data/vendor/local/share/man/man3/pcre32_copy_substring.3 +47 -0
  732. data/vendor/local/share/man/man3/pcre32_dfa_exec.3 +118 -0
  733. data/vendor/local/share/man/man3/pcre32_exec.3 +99 -0
  734. data/vendor/local/share/man/man3/pcre32_free_study.3 +31 -0
  735. data/vendor/local/share/man/man3/pcre32_free_substring.3 +31 -0
  736. data/vendor/local/share/man/man3/pcre32_free_substring_list.3 +31 -0
  737. data/vendor/local/share/man/man3/pcre32_fullinfo.3 +103 -0
  738. data/vendor/local/share/man/man3/pcre32_get_named_substring.3 +54 -0
  739. data/vendor/local/share/man/man3/pcre32_get_stringnumber.3 +43 -0
  740. data/vendor/local/share/man/man3/pcre32_get_stringtable_entries.3 +46 -0
  741. data/vendor/local/share/man/man3/pcre32_get_substring.3 +50 -0
  742. data/vendor/local/share/man/man3/pcre32_get_substring_list.3 +47 -0
  743. data/vendor/local/share/man/man3/pcre32_jit_exec.3 +96 -0
  744. data/vendor/local/share/man/man3/pcre32_jit_stack_alloc.3 +43 -0
  745. data/vendor/local/share/man/man3/pcre32_jit_stack_free.3 +35 -0
  746. data/vendor/local/share/man/man3/pcre32_maketables.3 +33 -0
  747. data/vendor/local/share/man/man3/pcre32_pattern_to_host_byte_order.3 +44 -0
  748. data/vendor/local/share/man/man3/pcre32_refcount.3 +36 -0
  749. data/vendor/local/share/man/man3/pcre32_study.3 +54 -0
  750. data/vendor/local/share/man/man3/pcre32_utf32_to_host_byte_order.3 +45 -0
  751. data/vendor/local/share/man/man3/pcre32_version.3 +31 -0
  752. data/vendor/local/share/man/man3/pcre_assign_jit_stack.3 +59 -0
  753. data/vendor/local/share/man/man3/pcre_compile.3 +96 -0
  754. data/vendor/local/share/man/man3/pcre_compile2.3 +101 -0
  755. data/vendor/local/share/man/man3/pcre_config.3 +79 -0
  756. data/vendor/local/share/man/man3/pcre_copy_named_substring.3 +51 -0
  757. data/vendor/local/share/man/man3/pcre_copy_substring.3 +47 -0
  758. data/vendor/local/share/man/man3/pcre_dfa_exec.3 +118 -0
  759. data/vendor/local/share/man/man3/pcre_exec.3 +99 -0
  760. data/vendor/local/share/man/man3/pcre_free_study.3 +31 -0
  761. data/vendor/local/share/man/man3/pcre_free_substring.3 +31 -0
  762. data/vendor/local/share/man/man3/pcre_free_substring_list.3 +31 -0
  763. data/vendor/local/share/man/man3/pcre_fullinfo.3 +103 -0
  764. data/vendor/local/share/man/man3/pcre_get_named_substring.3 +54 -0
  765. data/vendor/local/share/man/man3/pcre_get_stringnumber.3 +43 -0
  766. data/vendor/local/share/man/man3/pcre_get_stringtable_entries.3 +46 -0
  767. data/vendor/local/share/man/man3/pcre_get_substring.3 +50 -0
  768. data/vendor/local/share/man/man3/pcre_get_substring_list.3 +47 -0
  769. data/vendor/local/share/man/man3/pcre_jit_exec.3 +96 -0
  770. data/vendor/local/share/man/man3/pcre_jit_stack_alloc.3 +43 -0
  771. data/vendor/local/share/man/man3/pcre_jit_stack_free.3 +35 -0
  772. data/vendor/local/share/man/man3/pcre_maketables.3 +33 -0
  773. data/vendor/local/share/man/man3/pcre_pattern_to_host_byte_order.3 +44 -0
  774. data/vendor/local/share/man/man3/pcre_refcount.3 +36 -0
  775. data/vendor/local/share/man/man3/pcre_study.3 +54 -0
  776. data/vendor/local/share/man/man3/pcre_utf16_to_host_byte_order.3 +45 -0
  777. data/vendor/local/share/man/man3/pcre_utf32_to_host_byte_order.3 +45 -0
  778. data/vendor/local/share/man/man3/pcre_version.3 +31 -0
  779. data/vendor/local/share/man/man3/pcreapi.3 +2918 -0
  780. data/vendor/local/share/man/man3/pcrebuild.3 +550 -0
  781. data/vendor/local/share/man/man3/pcrecallout.3 +255 -0
  782. data/vendor/local/share/man/man3/pcrecompat.3 +200 -0
  783. data/vendor/local/share/man/man3/pcrecpp.3 +348 -0
  784. data/vendor/local/share/man/man3/pcredemo.3 +424 -0
  785. data/vendor/local/share/man/man3/pcrejit.3 +431 -0
  786. data/vendor/local/share/man/man3/pcrelimits.3 +71 -0
  787. data/vendor/local/share/man/man3/pcrematching.3 +214 -0
  788. data/vendor/local/share/man/man3/pcrepartial.3 +476 -0
  789. data/vendor/local/share/man/man3/pcrepattern.3 +3301 -0
  790. data/vendor/local/share/man/man3/pcreperform.3 +177 -0
  791. data/vendor/local/share/man/man3/pcreposix.3 +267 -0
  792. data/vendor/local/share/man/man3/pcreprecompile.3 +155 -0
  793. data/vendor/local/share/man/man3/pcresample.3 +99 -0
  794. data/vendor/local/share/man/man3/pcrestack.3 +215 -0
  795. data/vendor/local/share/man/man3/pcresyntax.3 +540 -0
  796. data/vendor/local/share/man/man3/pcreunicode.3 +249 -0
  797. metadata +255 -59
@@ -0,0 +1,2921 @@
1
+ <html>
2
+ <head>
3
+ <title>pcreapi specification</title>
4
+ </head>
5
+ <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
6
+ <h1>pcreapi man page</h1>
7
+ <p>
8
+ Return to the <a href="index.html">PCRE index page</a>.
9
+ </p>
10
+ <p>
11
+ This page is part of the PCRE HTML documentation. It was generated automatically
12
+ from the original man page. If there is any nonsense in it, please consult the
13
+ man page, in case the conversion went wrong.
14
+ <br>
15
+ <ul>
16
+ <li><a name="TOC1" href="#SEC1">PCRE NATIVE API BASIC FUNCTIONS</a>
17
+ <li><a name="TOC2" href="#SEC2">PCRE NATIVE API STRING EXTRACTION FUNCTIONS</a>
18
+ <li><a name="TOC3" href="#SEC3">PCRE NATIVE API AUXILIARY FUNCTIONS</a>
19
+ <li><a name="TOC4" href="#SEC4">PCRE NATIVE API INDIRECTED FUNCTIONS</a>
20
+ <li><a name="TOC5" href="#SEC5">PCRE 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a>
21
+ <li><a name="TOC6" href="#SEC6">PCRE API OVERVIEW</a>
22
+ <li><a name="TOC7" href="#SEC7">NEWLINES</a>
23
+ <li><a name="TOC8" href="#SEC8">MULTITHREADING</a>
24
+ <li><a name="TOC9" href="#SEC9">SAVING PRECOMPILED PATTERNS FOR LATER USE</a>
25
+ <li><a name="TOC10" href="#SEC10">CHECKING BUILD-TIME OPTIONS</a>
26
+ <li><a name="TOC11" href="#SEC11">COMPILING A PATTERN</a>
27
+ <li><a name="TOC12" href="#SEC12">COMPILATION ERROR CODES</a>
28
+ <li><a name="TOC13" href="#SEC13">STUDYING A PATTERN</a>
29
+ <li><a name="TOC14" href="#SEC14">LOCALE SUPPORT</a>
30
+ <li><a name="TOC15" href="#SEC15">INFORMATION ABOUT A PATTERN</a>
31
+ <li><a name="TOC16" href="#SEC16">REFERENCE COUNTS</a>
32
+ <li><a name="TOC17" href="#SEC17">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a>
33
+ <li><a name="TOC18" href="#SEC18">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>
34
+ <li><a name="TOC19" href="#SEC19">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
35
+ <li><a name="TOC20" href="#SEC20">DUPLICATE SUBPATTERN NAMES</a>
36
+ <li><a name="TOC21" href="#SEC21">FINDING ALL POSSIBLE MATCHES</a>
37
+ <li><a name="TOC22" href="#SEC22">OBTAINING AN ESTIMATE OF STACK USAGE</a>
38
+ <li><a name="TOC23" href="#SEC23">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
39
+ <li><a name="TOC24" href="#SEC24">SEE ALSO</a>
40
+ <li><a name="TOC25" href="#SEC25">AUTHOR</a>
41
+ <li><a name="TOC26" href="#SEC26">REVISION</a>
42
+ </ul>
43
+ <P>
44
+ <b>#include &#60;pcre.h&#62;</b>
45
+ </P>
46
+ <br><a name="SEC1" href="#TOC1">PCRE NATIVE API BASIC FUNCTIONS</a><br>
47
+ <P>
48
+ <b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
49
+ <b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
50
+ <b> const unsigned char *<i>tableptr</i>);</b>
51
+ <br>
52
+ <br>
53
+ <b>pcre *pcre_compile2(const char *<i>pattern</i>, int <i>options</i>,</b>
54
+ <b> int *<i>errorcodeptr</i>,</b>
55
+ <b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
56
+ <b> const unsigned char *<i>tableptr</i>);</b>
57
+ <br>
58
+ <br>
59
+ <b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
60
+ <b> const char **<i>errptr</i>);</b>
61
+ <br>
62
+ <br>
63
+ <b>void pcre_free_study(pcre_extra *<i>extra</i>);</b>
64
+ <br>
65
+ <br>
66
+ <b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
67
+ <b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
68
+ <b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
69
+ <br>
70
+ <br>
71
+ <b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
72
+ <b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
73
+ <b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
74
+ <b> int *<i>workspace</i>, int <i>wscount</i>);</b>
75
+ </P>
76
+ <br><a name="SEC2" href="#TOC1">PCRE NATIVE API STRING EXTRACTION FUNCTIONS</a><br>
77
+ <P>
78
+ <b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b>
79
+ <b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
80
+ <b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
81
+ <b> char *<i>buffer</i>, int <i>buffersize</i>);</b>
82
+ <br>
83
+ <br>
84
+ <b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
85
+ <b> int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
86
+ <b> int <i>buffersize</i>);</b>
87
+ <br>
88
+ <br>
89
+ <b>int pcre_get_named_substring(const pcre *<i>code</i>,</b>
90
+ <b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
91
+ <b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
92
+ <b> const char **<i>stringptr</i>);</b>
93
+ <br>
94
+ <br>
95
+ <b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
96
+ <b> const char *<i>name</i>);</b>
97
+ <br>
98
+ <br>
99
+ <b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
100
+ <b> const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
101
+ <br>
102
+ <br>
103
+ <b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
104
+ <b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
105
+ <b> const char **<i>stringptr</i>);</b>
106
+ <br>
107
+ <br>
108
+ <b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
109
+ <b> int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
110
+ <br>
111
+ <br>
112
+ <b>void pcre_free_substring(const char *<i>stringptr</i>);</b>
113
+ <br>
114
+ <br>
115
+ <b>void pcre_free_substring_list(const char **<i>stringptr</i>);</b>
116
+ </P>
117
+ <br><a name="SEC3" href="#TOC1">PCRE NATIVE API AUXILIARY FUNCTIONS</a><br>
118
+ <P>
119
+ <b>int pcre_jit_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
120
+ <b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
121
+ <b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
122
+ <b> pcre_jit_stack *<i>jstack</i>);</b>
123
+ <br>
124
+ <br>
125
+ <b>pcre_jit_stack *pcre_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
126
+ <br>
127
+ <br>
128
+ <b>void pcre_jit_stack_free(pcre_jit_stack *<i>stack</i>);</b>
129
+ <br>
130
+ <br>
131
+ <b>void pcre_assign_jit_stack(pcre_extra *<i>extra</i>,</b>
132
+ <b> pcre_jit_callback <i>callback</i>, void *<i>data</i>);</b>
133
+ <br>
134
+ <br>
135
+ <b>const unsigned char *pcre_maketables(void);</b>
136
+ <br>
137
+ <br>
138
+ <b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
139
+ <b> int <i>what</i>, void *<i>where</i>);</b>
140
+ <br>
141
+ <br>
142
+ <b>int pcre_refcount(pcre *<i>code</i>, int <i>adjust</i>);</b>
143
+ <br>
144
+ <br>
145
+ <b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b>
146
+ <br>
147
+ <br>
148
+ <b>const char *pcre_version(void);</b>
149
+ <br>
150
+ <br>
151
+ <b>int pcre_pattern_to_host_byte_order(pcre *<i>code</i>,</b>
152
+ <b> pcre_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
153
+ </P>
154
+ <br><a name="SEC4" href="#TOC1">PCRE NATIVE API INDIRECTED FUNCTIONS</a><br>
155
+ <P>
156
+ <b>void *(*pcre_malloc)(size_t);</b>
157
+ <br>
158
+ <br>
159
+ <b>void (*pcre_free)(void *);</b>
160
+ <br>
161
+ <br>
162
+ <b>void *(*pcre_stack_malloc)(size_t);</b>
163
+ <br>
164
+ <br>
165
+ <b>void (*pcre_stack_free)(void *);</b>
166
+ <br>
167
+ <br>
168
+ <b>int (*pcre_callout)(pcre_callout_block *);</b>
169
+ <br>
170
+ <br>
171
+ <b>int (*pcre_stack_guard)(void);</b>
172
+ </P>
173
+ <br><a name="SEC5" href="#TOC1">PCRE 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a><br>
174
+ <P>
175
+ As well as support for 8-bit character strings, PCRE also supports 16-bit
176
+ strings (from release 8.30) and 32-bit strings (from release 8.32), by means of
177
+ two additional libraries. They can be built as well as, or instead of, the
178
+ 8-bit library. To avoid too much complication, this document describes the
179
+ 8-bit versions of the functions, with only occasional references to the 16-bit
180
+ and 32-bit libraries.
181
+ </P>
182
+ <P>
183
+ The 16-bit and 32-bit functions operate in the same way as their 8-bit
184
+ counterparts; they just use different data types for their arguments and
185
+ results, and their names start with <b>pcre16_</b> or <b>pcre32_</b> instead of
186
+ <b>pcre_</b>. For every option that has UTF8 in its name (for example,
187
+ PCRE_UTF8), there are corresponding 16-bit and 32-bit names with UTF8 replaced
188
+ by UTF16 or UTF32, respectively. This facility is in fact just cosmetic; the
189
+ 16-bit and 32-bit option names define the same bit values.
190
+ </P>
191
+ <P>
192
+ References to bytes and UTF-8 in this document should be read as references to
193
+ 16-bit data units and UTF-16 when using the 16-bit library, or 32-bit data
194
+ units and UTF-32 when using the 32-bit library, unless specified otherwise.
195
+ More details of the specific differences for the 16-bit and 32-bit libraries
196
+ are given in the
197
+ <a href="pcre16.html"><b>pcre16</b></a>
198
+ and
199
+ <a href="pcre32.html"><b>pcre32</b></a>
200
+ pages.
201
+ </P>
202
+ <br><a name="SEC6" href="#TOC1">PCRE API OVERVIEW</a><br>
203
+ <P>
204
+ PCRE has its own native API, which is described in this document. There are
205
+ also some wrapper functions (for the 8-bit library only) that correspond to the
206
+ POSIX regular expression API, but they do not give access to all the
207
+ functionality. They are described in the
208
+ <a href="pcreposix.html"><b>pcreposix</b></a>
209
+ documentation. Both of these APIs define a set of C function calls. A C++
210
+ wrapper (again for the 8-bit library only) is also distributed with PCRE. It is
211
+ documented in the
212
+ <a href="pcrecpp.html"><b>pcrecpp</b></a>
213
+ page.
214
+ </P>
215
+ <P>
216
+ The native API C function prototypes are defined in the header file
217
+ <b>pcre.h</b>, and on Unix-like systems the (8-bit) library itself is called
218
+ <b>libpcre</b>. It can normally be accessed by adding <b>-lpcre</b> to the
219
+ command for linking an application that uses PCRE. The header file defines the
220
+ macros PCRE_MAJOR and PCRE_MINOR to contain the major and minor release numbers
221
+ for the library. Applications can use these to include support for different
222
+ releases of PCRE.
223
+ </P>
224
+ <P>
225
+ In a Windows environment, if you want to statically link an application program
226
+ against a non-dll <b>pcre.a</b> file, you must define PCRE_STATIC before
227
+ including <b>pcre.h</b> or <b>pcrecpp.h</b>, because otherwise the
228
+ <b>pcre_malloc()</b> and <b>pcre_free()</b> exported functions will be declared
229
+ <b>__declspec(dllimport)</b>, with unwanted results.
230
+ </P>
231
+ <P>
232
+ The functions <b>pcre_compile()</b>, <b>pcre_compile2()</b>, <b>pcre_study()</b>,
233
+ and <b>pcre_exec()</b> are used for compiling and matching regular expressions
234
+ in a Perl-compatible manner. A sample program that demonstrates the simplest
235
+ way of using them is provided in the file called <i>pcredemo.c</i> in the PCRE
236
+ source distribution. A listing of this program is given in the
237
+ <a href="pcredemo.html"><b>pcredemo</b></a>
238
+ documentation, and the
239
+ <a href="pcresample.html"><b>pcresample</b></a>
240
+ documentation describes how to compile and run it.
241
+ </P>
242
+ <P>
243
+ Just-in-time compiler support is an optional feature of PCRE that can be built
244
+ in appropriate hardware environments. It greatly speeds up the matching
245
+ performance of many patterns. Simple programs can easily request that it be
246
+ used if available, by setting an option that is ignored when it is not
247
+ relevant. More complicated programs might need to make use of the functions
248
+ <b>pcre_jit_stack_alloc()</b>, <b>pcre_jit_stack_free()</b>, and
249
+ <b>pcre_assign_jit_stack()</b> in order to control the JIT code's memory usage.
250
+ </P>
251
+ <P>
252
+ From release 8.32 there is also a direct interface for JIT execution, which
253
+ gives improved performance. The JIT-specific functions are discussed in the
254
+ <a href="pcrejit.html"><b>pcrejit</b></a>
255
+ documentation.
256
+ </P>
257
+ <P>
258
+ A second matching function, <b>pcre_dfa_exec()</b>, which is not
259
+ Perl-compatible, is also provided. This uses a different algorithm for the
260
+ matching. The alternative algorithm finds all possible matches (at a given
261
+ point in the subject), and scans the subject just once (unless there are
262
+ lookbehind assertions). However, this algorithm does not return captured
263
+ substrings. A description of the two matching algorithms and their advantages
264
+ and disadvantages is given in the
265
+ <a href="pcrematching.html"><b>pcrematching</b></a>
266
+ documentation.
267
+ </P>
268
+ <P>
269
+ In addition to the main compiling and matching functions, there are convenience
270
+ functions for extracting captured substrings from a subject string that is
271
+ matched by <b>pcre_exec()</b>. They are:
272
+ <pre>
273
+ <b>pcre_copy_substring()</b>
274
+ <b>pcre_copy_named_substring()</b>
275
+ <b>pcre_get_substring()</b>
276
+ <b>pcre_get_named_substring()</b>
277
+ <b>pcre_get_substring_list()</b>
278
+ <b>pcre_get_stringnumber()</b>
279
+ <b>pcre_get_stringtable_entries()</b>
280
+ </pre>
281
+ <b>pcre_free_substring()</b> and <b>pcre_free_substring_list()</b> are also
282
+ provided, to free the memory used for extracted strings.
283
+ </P>
284
+ <P>
285
+ The function <b>pcre_maketables()</b> is used to build a set of character tables
286
+ in the current locale for passing to <b>pcre_compile()</b>, <b>pcre_exec()</b>,
287
+ or <b>pcre_dfa_exec()</b>. This is an optional facility that is provided for
288
+ specialist use. Most commonly, no special tables are passed, in which case
289
+ internal tables that are generated when PCRE is built are used.
290
+ </P>
291
+ <P>
292
+ The function <b>pcre_fullinfo()</b> is used to find out information about a
293
+ compiled pattern. The function <b>pcre_version()</b> returns a pointer to a
294
+ string containing the version of PCRE and its date of release.
295
+ </P>
296
+ <P>
297
+ The function <b>pcre_refcount()</b> maintains a reference count in a data block
298
+ containing a compiled pattern. This is provided for the benefit of
299
+ object-oriented applications.
300
+ </P>
301
+ <P>
302
+ The global variables <b>pcre_malloc</b> and <b>pcre_free</b> initially contain
303
+ the entry points of the standard <b>malloc()</b> and <b>free()</b> functions,
304
+ respectively. PCRE calls the memory management functions via these variables,
305
+ so a calling program can replace them if it wishes to intercept the calls. This
306
+ should be done before calling any PCRE functions.
307
+ </P>
308
+ <P>
309
+ The global variables <b>pcre_stack_malloc</b> and <b>pcre_stack_free</b> are also
310
+ indirections to memory management functions. These special functions are used
311
+ only when PCRE is compiled to use the heap for remembering data, instead of
312
+ recursive function calls, when running the <b>pcre_exec()</b> function. See the
313
+ <a href="pcrebuild.html"><b>pcrebuild</b></a>
314
+ documentation for details of how to do this. It is a non-standard way of
315
+ building PCRE, for use in environments that have limited stacks. Because of the
316
+ greater use of memory management, it runs more slowly. Separate functions are
317
+ provided so that special-purpose external code can be used for this case. When
318
+ used, these functions always allocate memory blocks of the same size. There is
319
+ a discussion about PCRE's stack usage in the
320
+ <a href="pcrestack.html"><b>pcrestack</b></a>
321
+ documentation.
322
+ </P>
323
+ <P>
324
+ The global variable <b>pcre_callout</b> initially contains NULL. It can be set
325
+ by the caller to a "callout" function, which PCRE will then call at specified
326
+ points during a matching operation. Details are given in the
327
+ <a href="pcrecallout.html"><b>pcrecallout</b></a>
328
+ documentation.
329
+ </P>
330
+ <P>
331
+ The global variable <b>pcre_stack_guard</b> initially contains NULL. It can be
332
+ set by the caller to a function that is called by PCRE whenever it starts
333
+ to compile a parenthesized part of a pattern. When parentheses are nested, PCRE
334
+ uses recursive function calls, which use up the system stack. This function is
335
+ provided so that applications with restricted stacks can force a compilation
336
+ error if the stack runs out. The function should return zero if all is well, or
337
+ non-zero to force an error.
338
+ <a name="newlines"></a></P>
339
+ <br><a name="SEC7" href="#TOC1">NEWLINES</a><br>
340
+ <P>
341
+ PCRE supports five different conventions for indicating line breaks in
342
+ strings: a single CR (carriage return) character, a single LF (linefeed)
343
+ character, the two-character sequence CRLF, any of the three preceding, or any
344
+ Unicode newline sequence. The Unicode newline sequences are the three just
345
+ mentioned, plus the single characters VT (vertical tab, U+000B), FF (form feed,
346
+ U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS
347
+ (paragraph separator, U+2029).
348
+ </P>
349
+ <P>
350
+ Each of the first three conventions is used by at least one operating system as
351
+ its standard newline sequence. When PCRE is built, a default can be specified.
352
+ The default default is LF, which is the Unix standard. When PCRE is run, the
353
+ default can be overridden, either when a pattern is compiled, or when it is
354
+ matched.
355
+ </P>
356
+ <P>
357
+ At compile time, the newline convention can be specified by the <i>options</i>
358
+ argument of <b>pcre_compile()</b>, or it can be specified by special text at the
359
+ start of the pattern itself; this overrides any other settings. See the
360
+ <a href="pcrepattern.html"><b>pcrepattern</b></a>
361
+ page for details of the special character sequences.
362
+ </P>
363
+ <P>
364
+ In the PCRE documentation the word "newline" is used to mean "the character or
365
+ pair of characters that indicate a line break". The choice of newline
366
+ convention affects the handling of the dot, circumflex, and dollar
367
+ metacharacters, the handling of #-comments in /x mode, and, when CRLF is a
368
+ recognized line ending sequence, the match position advancement for a
369
+ non-anchored pattern. There is more detail about this in the
370
+ <a href="#execoptions">section on <b>pcre_exec()</b> options</a>
371
+ below.
372
+ </P>
373
+ <P>
374
+ The choice of newline convention does not affect the interpretation of
375
+ the \n or \r escape sequences, nor does it affect what \R matches, which is
376
+ controlled in a similar way, but by separate options.
377
+ </P>
378
+ <br><a name="SEC8" href="#TOC1">MULTITHREADING</a><br>
379
+ <P>
380
+ The PCRE functions can be used in multi-threading applications, with the
381
+ proviso that the memory management functions pointed to by <b>pcre_malloc</b>,
382
+ <b>pcre_free</b>, <b>pcre_stack_malloc</b>, and <b>pcre_stack_free</b>, and the
383
+ callout and stack-checking functions pointed to by <b>pcre_callout</b> and
384
+ <b>pcre_stack_guard</b>, are shared by all threads.
385
+ </P>
386
+ <P>
387
+ The compiled form of a regular expression is not altered during matching, so
388
+ the same compiled pattern can safely be used by several threads at once.
389
+ </P>
390
+ <P>
391
+ If the just-in-time optimization feature is being used, it needs separate
392
+ memory stack areas for each thread. See the
393
+ <a href="pcrejit.html"><b>pcrejit</b></a>
394
+ documentation for more details.
395
+ </P>
396
+ <br><a name="SEC9" href="#TOC1">SAVING PRECOMPILED PATTERNS FOR LATER USE</a><br>
397
+ <P>
398
+ The compiled form of a regular expression can be saved and re-used at a later
399
+ time, possibly by a different program, and even on a host other than the one on
400
+ which it was compiled. Details are given in the
401
+ <a href="pcreprecompile.html"><b>pcreprecompile</b></a>
402
+ documentation, which includes a description of the
403
+ <b>pcre_pattern_to_host_byte_order()</b> function. However, compiling a regular
404
+ expression with one version of PCRE for use with a different version is not
405
+ guaranteed to work and may cause crashes.
406
+ </P>
407
+ <br><a name="SEC10" href="#TOC1">CHECKING BUILD-TIME OPTIONS</a><br>
408
+ <P>
409
+ <b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b>
410
+ </P>
411
+ <P>
412
+ The function <b>pcre_config()</b> makes it possible for a PCRE client to
413
+ discover which optional features have been compiled into the PCRE library. The
414
+ <a href="pcrebuild.html"><b>pcrebuild</b></a>
415
+ documentation has more details about these optional features.
416
+ </P>
417
+ <P>
418
+ The first argument for <b>pcre_config()</b> is an integer, specifying which
419
+ information is required; the second argument is a pointer to a variable into
420
+ which the information is placed. The returned value is zero on success, or the
421
+ negative error code PCRE_ERROR_BADOPTION if the value in the first argument is
422
+ not recognized. The following information is available:
423
+ <pre>
424
+ PCRE_CONFIG_UTF8
425
+ </pre>
426
+ The output is an integer that is set to one if UTF-8 support is available;
427
+ otherwise it is set to zero. This value should normally be given to the 8-bit
428
+ version of this function, <b>pcre_config()</b>. If it is given to the 16-bit
429
+ or 32-bit version of this function, the result is PCRE_ERROR_BADOPTION.
430
+ <pre>
431
+ PCRE_CONFIG_UTF16
432
+ </pre>
433
+ The output is an integer that is set to one if UTF-16 support is available;
434
+ otherwise it is set to zero. This value should normally be given to the 16-bit
435
+ version of this function, <b>pcre16_config()</b>. If it is given to the 8-bit
436
+ or 32-bit version of this function, the result is PCRE_ERROR_BADOPTION.
437
+ <pre>
438
+ PCRE_CONFIG_UTF32
439
+ </pre>
440
+ The output is an integer that is set to one if UTF-32 support is available;
441
+ otherwise it is set to zero. This value should normally be given to the 32-bit
442
+ version of this function, <b>pcre32_config()</b>. If it is given to the 8-bit
443
+ or 16-bit version of this function, the result is PCRE_ERROR_BADOPTION.
444
+ <pre>
445
+ PCRE_CONFIG_UNICODE_PROPERTIES
446
+ </pre>
447
+ The output is an integer that is set to one if support for Unicode character
448
+ properties is available; otherwise it is set to zero.
449
+ <pre>
450
+ PCRE_CONFIG_JIT
451
+ </pre>
452
+ The output is an integer that is set to one if support for just-in-time
453
+ compiling is available; otherwise it is set to zero.
454
+ <pre>
455
+ PCRE_CONFIG_JITTARGET
456
+ </pre>
457
+ The output is a pointer to a zero-terminated "const char *" string. If JIT
458
+ support is available, the string contains the name of the architecture for
459
+ which the JIT compiler is configured, for example "x86 32bit (little endian +
460
+ unaligned)". If JIT support is not available, the result is NULL.
461
+ <pre>
462
+ PCRE_CONFIG_NEWLINE
463
+ </pre>
464
+ The output is an integer whose value specifies the default character sequence
465
+ that is recognized as meaning "newline". The values that are supported in
466
+ ASCII/Unicode environments are: 10 for LF, 13 for CR, 3338 for CRLF, -2 for
467
+ ANYCRLF, and -1 for ANY. In EBCDIC environments, CR, ANYCRLF, and ANY yield the
468
+ same values. However, the value for LF is normally 21, though some EBCDIC
469
+ environments use 37. The corresponding values for CRLF are 3349 and 3365. The
470
+ default should normally correspond to the standard sequence for your operating
471
+ system.
472
+ <pre>
473
+ PCRE_CONFIG_BSR
474
+ </pre>
475
+ The output is an integer whose value indicates what character sequences the \R
476
+ escape sequence matches by default. A value of 0 means that \R matches any
477
+ Unicode line ending sequence; a value of 1 means that \R matches only CR, LF,
478
+ or CRLF. The default can be overridden when a pattern is compiled or matched.
479
+ <pre>
480
+ PCRE_CONFIG_LINK_SIZE
481
+ </pre>
482
+ The output is an integer that contains the number of bytes used for internal
483
+ linkage in compiled regular expressions. For the 8-bit library, the value can
484
+ be 2, 3, or 4. For the 16-bit library, the value is either 2 or 4 and is still
485
+ a number of bytes. For the 32-bit library, the value is either 2 or 4 and is
486
+ still a number of bytes. The default value of 2 is sufficient for all but the
487
+ most massive patterns, since it allows the compiled pattern to be up to 64K in
488
+ size. Larger values allow larger regular expressions to be compiled, at the
489
+ expense of slower matching.
490
+ <pre>
491
+ PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
492
+ </pre>
493
+ The output is an integer that contains the threshold above which the POSIX
494
+ interface uses <b>malloc()</b> for output vectors. Further details are given in
495
+ the
496
+ <a href="pcreposix.html"><b>pcreposix</b></a>
497
+ documentation.
498
+ <pre>
499
+ PCRE_CONFIG_PARENS_LIMIT
500
+ </pre>
501
+ The output is a long integer that gives the maximum depth of nesting of
502
+ parentheses (of any kind) in a pattern. This limit is imposed to cap the amount
503
+ of system stack used when a pattern is compiled. It is specified when PCRE is
504
+ built; the default is 250. This limit does not take into account the stack that
505
+ may already be used by the calling application. For finer control over
506
+ compilation stack usage, you can set a pointer to an external checking function
507
+ in <b>pcre_stack_guard</b>.
508
+ <pre>
509
+ PCRE_CONFIG_MATCH_LIMIT
510
+ </pre>
511
+ The output is a long integer that gives the default limit for the number of
512
+ internal matching function calls in a <b>pcre_exec()</b> execution. Further
513
+ details are given with <b>pcre_exec()</b> below.
514
+ <pre>
515
+ PCRE_CONFIG_MATCH_LIMIT_RECURSION
516
+ </pre>
517
+ The output is a long integer that gives the default limit for the depth of
518
+ recursion when calling the internal matching function in a <b>pcre_exec()</b>
519
+ execution. Further details are given with <b>pcre_exec()</b> below.
520
+ <pre>
521
+ PCRE_CONFIG_STACKRECURSE
522
+ </pre>
523
+ The output is an integer that is set to one if internal recursion when running
524
+ <b>pcre_exec()</b> is implemented by recursive function calls that use the stack
525
+ to remember their state. This is the usual way that PCRE is compiled. The
526
+ output is zero if PCRE was compiled to use blocks of data on the heap instead
527
+ of recursive function calls. In this case, <b>pcre_stack_malloc</b> and
528
+ <b>pcre_stack_free</b> are called to manage memory blocks on the heap, thus
529
+ avoiding the use of the stack.
530
+ </P>
531
+ <br><a name="SEC11" href="#TOC1">COMPILING A PATTERN</a><br>
532
+ <P>
533
+ <b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
534
+ <b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
535
+ <b> const unsigned char *<i>tableptr</i>);</b>
536
+ <br>
537
+ <br>
538
+ <b>pcre *pcre_compile2(const char *<i>pattern</i>, int <i>options</i>,</b>
539
+ <b> int *<i>errorcodeptr</i>,</b>
540
+ <b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
541
+ <b> const unsigned char *<i>tableptr</i>);</b>
542
+ </P>
543
+ <P>
544
+ Either of the functions <b>pcre_compile()</b> or <b>pcre_compile2()</b> can be
545
+ called to compile a pattern into an internal form. The only difference between
546
+ the two interfaces is that <b>pcre_compile2()</b> has an additional argument,
547
+ <i>errorcodeptr</i>, via which a numerical error code can be returned. To avoid
548
+ too much repetition, we refer just to <b>pcre_compile()</b> below, but the
549
+ information applies equally to <b>pcre_compile2()</b>.
550
+ </P>
551
+ <P>
552
+ The pattern is a C string terminated by a binary zero, and is passed in the
553
+ <i>pattern</i> argument. A pointer to a single block of memory that is obtained
554
+ via <b>pcre_malloc</b> is returned. This contains the compiled code and related
555
+ data. The <b>pcre</b> type is defined for the returned block; this is a typedef
556
+ for a structure whose contents are not externally defined. It is up to the
557
+ caller to free the memory (via <b>pcre_free</b>) when it is no longer required.
558
+ </P>
559
+ <P>
560
+ Although the compiled code of a PCRE regex is relocatable, that is, it does not
561
+ depend on memory location, the complete <b>pcre</b> data block is not
562
+ fully relocatable, because it may contain a copy of the <i>tableptr</i>
563
+ argument, which is an address (see below).
564
+ </P>
565
+ <P>
566
+ The <i>options</i> argument contains various bit settings that affect the
567
+ compilation. It should be zero if no options are required. The available
568
+ options are described below. Some of them (in particular, those that are
569
+ compatible with Perl, but some others as well) can also be set and unset from
570
+ within the pattern (see the detailed description in the
571
+ <a href="pcrepattern.html"><b>pcrepattern</b></a>
572
+ documentation). For those options that can be different in different parts of
573
+ the pattern, the contents of the <i>options</i> argument specifies their
574
+ settings at the start of compilation and execution. The PCRE_ANCHORED,
575
+ PCRE_BSR_<i>xxx</i>, PCRE_NEWLINE_<i>xxx</i>, PCRE_NO_UTF8_CHECK, and
576
+ PCRE_NO_START_OPTIMIZE options can be set at the time of matching as well as at
577
+ compile time.
578
+ </P>
579
+ <P>
580
+ If <i>errptr</i> is NULL, <b>pcre_compile()</b> returns NULL immediately.
581
+ Otherwise, if compilation of a pattern fails, <b>pcre_compile()</b> returns
582
+ NULL, and sets the variable pointed to by <i>errptr</i> to point to a textual
583
+ error message. This is a static string that is part of the library. You must
584
+ not try to free it. Normally, the offset from the start of the pattern to the
585
+ data unit that was being processed when the error was discovered is placed in
586
+ the variable pointed to by <i>erroffset</i>, which must not be NULL (if it is,
587
+ an immediate error is given). However, for an invalid UTF-8 or UTF-16 string,
588
+ the offset is that of the first data unit of the failing character.
589
+ </P>
590
+ <P>
591
+ Some errors are not detected until the whole pattern has been scanned; in these
592
+ cases, the offset passed back is the length of the pattern. Note that the
593
+ offset is in data units, not characters, even in a UTF mode. It may sometimes
594
+ point into the middle of a UTF-8 or UTF-16 character.
595
+ </P>
596
+ <P>
597
+ If <b>pcre_compile2()</b> is used instead of <b>pcre_compile()</b>, and the
598
+ <i>errorcodeptr</i> argument is not NULL, a non-zero error code number is
599
+ returned via this argument in the event of an error. This is in addition to the
600
+ textual error message. Error codes and messages are listed below.
601
+ </P>
602
+ <P>
603
+ If the final argument, <i>tableptr</i>, is NULL, PCRE uses a default set of
604
+ character tables that are built when PCRE is compiled, using the default C
605
+ locale. Otherwise, <i>tableptr</i> must be an address that is the result of a
606
+ call to <b>pcre_maketables()</b>. This value is stored with the compiled
607
+ pattern, and used again by <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b> when the
608
+ pattern is matched. For more discussion, see the section on locale support
609
+ below.
610
+ </P>
611
+ <P>
612
+ This code fragment shows a typical straightforward call to <b>pcre_compile()</b>:
613
+ <pre>
614
+ pcre *re;
615
+ const char *error;
616
+ int erroffset;
617
+ re = pcre_compile(
618
+ "^A.*Z", /* the pattern */
619
+ 0, /* default options */
620
+ &error, /* for error message */
621
+ &erroffset, /* for error offset */
622
+ NULL); /* use default character tables */
623
+ </pre>
624
+ The following names for option bits are defined in the <b>pcre.h</b> header
625
+ file:
626
+ <pre>
627
+ PCRE_ANCHORED
628
+ </pre>
629
+ If this bit is set, the pattern is forced to be "anchored", that is, it is
630
+ constrained to match only at the first matching point in the string that is
631
+ being searched (the "subject string"). This effect can also be achieved by
632
+ appropriate constructs in the pattern itself, which is the only way to do it in
633
+ Perl.
634
+ <pre>
635
+ PCRE_AUTO_CALLOUT
636
+ </pre>
637
+ If this bit is set, <b>pcre_compile()</b> automatically inserts callout items,
638
+ all with number 255, before each pattern item. For discussion of the callout
639
+ facility, see the
640
+ <a href="pcrecallout.html"><b>pcrecallout</b></a>
641
+ documentation.
642
+ <pre>
643
+ PCRE_BSR_ANYCRLF
644
+ PCRE_BSR_UNICODE
645
+ </pre>
646
+ These options (which are mutually exclusive) control what the \R escape
647
+ sequence matches. The choice is either to match only CR, LF, or CRLF, or to
648
+ match any Unicode newline sequence. The default is specified when PCRE is
649
+ built. It can be overridden from within the pattern, or by setting an option
650
+ when a compiled pattern is matched.
651
+ <pre>
652
+ PCRE_CASELESS
653
+ </pre>
654
+ If this bit is set, letters in the pattern match both upper and lower case
655
+ letters. It is equivalent to Perl's /i option, and it can be changed within a
656
+ pattern by a (?i) option setting. In UTF-8 mode, PCRE always understands the
657
+ concept of case for characters whose values are less than 128, so caseless
658
+ matching is always possible. For characters with higher values, the concept of
659
+ case is supported if PCRE is compiled with Unicode property support, but not
660
+ otherwise. If you want to use caseless matching for characters 128 and above,
661
+ you must ensure that PCRE is compiled with Unicode property support as well as
662
+ with UTF-8 support.
663
+ <pre>
664
+ PCRE_DOLLAR_ENDONLY
665
+ </pre>
666
+ If this bit is set, a dollar metacharacter in the pattern matches only at the
667
+ end of the subject string. Without this option, a dollar also matches
668
+ immediately before a newline at the end of the string (but not before any other
669
+ newlines). The PCRE_DOLLAR_ENDONLY option is ignored if PCRE_MULTILINE is set.
670
+ There is no equivalent to this option in Perl, and no way to set it within a
671
+ pattern.
672
+ <pre>
673
+ PCRE_DOTALL
674
+ </pre>
675
+ If this bit is set, a dot metacharacter in the pattern matches a character of
676
+ any value, including one that indicates a newline. However, it only ever
677
+ matches one character, even if newlines are coded as CRLF. Without this option,
678
+ a dot does not match when the current position is at a newline. This option is
679
+ equivalent to Perl's /s option, and it can be changed within a pattern by a
680
+ (?s) option setting. A negative class such as [^a] always matches newline
681
+ characters, independent of the setting of this option.
682
+ <pre>
683
+ PCRE_DUPNAMES
684
+ </pre>
685
+ If this bit is set, names used to identify capturing subpatterns need not be
686
+ unique. This can be helpful for certain types of pattern when it is known that
687
+ only one instance of the named subpattern can ever be matched. There are more
688
+ details of named subpatterns below; see also the
689
+ <a href="pcrepattern.html"><b>pcrepattern</b></a>
690
+ documentation.
691
+ <pre>
692
+ PCRE_EXTENDED
693
+ </pre>
694
+ If this bit is set, most white space characters in the pattern are totally
695
+ ignored except when escaped or inside a character class. However, white space
696
+ is not allowed within sequences such as (?&#62; that introduce various
697
+ parenthesized subpatterns, nor within a numerical quantifier such as {1,3}.
698
+ However, ignorable white space is permitted between an item and a following
699
+ quantifier and between a quantifier and a following + that indicates
700
+ possessiveness.
701
+ </P>
702
+ <P>
703
+ White space did not used to include the VT character (code 11), because Perl
704
+ did not treat this character as white space. However, Perl changed at release
705
+ 5.18, so PCRE followed at release 8.34, and VT is now treated as white space.
706
+ </P>
707
+ <P>
708
+ PCRE_EXTENDED also causes characters between an unescaped # outside a character
709
+ class and the next newline, inclusive, to be ignored. PCRE_EXTENDED is
710
+ equivalent to Perl's /x option, and it can be changed within a pattern by a
711
+ (?x) option setting.
712
+ </P>
713
+ <P>
714
+ Which characters are interpreted as newlines is controlled by the options
715
+ passed to <b>pcre_compile()</b> or by a special sequence at the start of the
716
+ pattern, as described in the section entitled
717
+ <a href="pcrepattern.html#newlines">"Newline conventions"</a>
718
+ in the <b>pcrepattern</b> documentation. Note that the end of this type of
719
+ comment is a literal newline sequence in the pattern; escape sequences that
720
+ happen to represent a newline do not count.
721
+ </P>
722
+ <P>
723
+ This option makes it possible to include comments inside complicated patterns.
724
+ Note, however, that this applies only to data characters. White space characters
725
+ may never appear within special character sequences in a pattern, for example
726
+ within the sequence (?( that introduces a conditional subpattern.
727
+ <pre>
728
+ PCRE_EXTRA
729
+ </pre>
730
+ This option was invented in order to turn on additional functionality of PCRE
731
+ that is incompatible with Perl, but it is currently of very little use. When
732
+ set, any backslash in a pattern that is followed by a letter that has no
733
+ special meaning causes an error, thus reserving these combinations for future
734
+ expansion. By default, as in Perl, a backslash followed by a letter with no
735
+ special meaning is treated as a literal. (Perl can, however, be persuaded to
736
+ give an error for this, by running it with the -w option.) There are at present
737
+ no other features controlled by this option. It can also be set by a (?X)
738
+ option setting within a pattern.
739
+ <pre>
740
+ PCRE_FIRSTLINE
741
+ </pre>
742
+ If this option is set, an unanchored pattern is required to match before or at
743
+ the first newline in the subject string, though the matched text may continue
744
+ over the newline.
745
+ <pre>
746
+ PCRE_JAVASCRIPT_COMPAT
747
+ </pre>
748
+ If this option is set, PCRE's behaviour is changed in some ways so that it is
749
+ compatible with JavaScript rather than Perl. The changes are as follows:
750
+ </P>
751
+ <P>
752
+ (1) A lone closing square bracket in a pattern causes a compile-time error,
753
+ because this is illegal in JavaScript (by default it is treated as a data
754
+ character). Thus, the pattern AB]CD becomes illegal when this option is set.
755
+ </P>
756
+ <P>
757
+ (2) At run time, a back reference to an unset subpattern group matches an empty
758
+ string (by default this causes the current matching alternative to fail). A
759
+ pattern such as (\1)(a) succeeds when this option is set (assuming it can find
760
+ an "a" in the subject), whereas it fails by default, for Perl compatibility.
761
+ </P>
762
+ <P>
763
+ (3) \U matches an upper case "U" character; by default \U causes a compile
764
+ time error (Perl uses \U to upper case subsequent characters).
765
+ </P>
766
+ <P>
767
+ (4) \u matches a lower case "u" character unless it is followed by four
768
+ hexadecimal digits, in which case the hexadecimal number defines the code point
769
+ to match. By default, \u causes a compile time error (Perl uses it to upper
770
+ case the following character).
771
+ </P>
772
+ <P>
773
+ (5) \x matches a lower case "x" character unless it is followed by two
774
+ hexadecimal digits, in which case the hexadecimal number defines the code point
775
+ to match. By default, as in Perl, a hexadecimal number is always expected after
776
+ \x, but it may have zero, one, or two digits (so, for example, \xz matches a
777
+ binary zero character followed by z).
778
+ <pre>
779
+ PCRE_MULTILINE
780
+ </pre>
781
+ By default, for the purposes of matching "start of line" and "end of line",
782
+ PCRE treats the subject string as consisting of a single line of characters,
783
+ even if it actually contains newlines. The "start of line" metacharacter (^)
784
+ matches only at the start of the string, and the "end of line" metacharacter
785
+ ($) matches only at the end of the string, or before a terminating newline
786
+ (except when PCRE_DOLLAR_ENDONLY is set). Note, however, that unless
787
+ PCRE_DOTALL is set, the "any character" metacharacter (.) does not match at a
788
+ newline. This behaviour (for ^, $, and dot) is the same as Perl.
789
+ </P>
790
+ <P>
791
+ When PCRE_MULTILINE it is set, the "start of line" and "end of line" constructs
792
+ match immediately following or immediately before internal newlines in the
793
+ subject string, respectively, as well as at the very start and end. This is
794
+ equivalent to Perl's /m option, and it can be changed within a pattern by a
795
+ (?m) option setting. If there are no newlines in a subject string, or no
796
+ occurrences of ^ or $ in a pattern, setting PCRE_MULTILINE has no effect.
797
+ <pre>
798
+ PCRE_NEVER_UTF
799
+ </pre>
800
+ This option locks out interpretation of the pattern as UTF-8 (or UTF-16 or
801
+ UTF-32 in the 16-bit and 32-bit libraries). In particular, it prevents the
802
+ creator of the pattern from switching to UTF interpretation by starting the
803
+ pattern with (*UTF). This may be useful in applications that process patterns
804
+ from external sources. The combination of PCRE_UTF8 and PCRE_NEVER_UTF also
805
+ causes an error.
806
+ <pre>
807
+ PCRE_NEWLINE_CR
808
+ PCRE_NEWLINE_LF
809
+ PCRE_NEWLINE_CRLF
810
+ PCRE_NEWLINE_ANYCRLF
811
+ PCRE_NEWLINE_ANY
812
+ </pre>
813
+ These options override the default newline definition that was chosen when PCRE
814
+ was built. Setting the first or the second specifies that a newline is
815
+ indicated by a single character (CR or LF, respectively). Setting
816
+ PCRE_NEWLINE_CRLF specifies that a newline is indicated by the two-character
817
+ CRLF sequence. Setting PCRE_NEWLINE_ANYCRLF specifies that any of the three
818
+ preceding sequences should be recognized. Setting PCRE_NEWLINE_ANY specifies
819
+ that any Unicode newline sequence should be recognized.
820
+ </P>
821
+ <P>
822
+ In an ASCII/Unicode environment, the Unicode newline sequences are the three
823
+ just mentioned, plus the single characters VT (vertical tab, U+000B), FF (form
824
+ feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS
825
+ (paragraph separator, U+2029). For the 8-bit library, the last two are
826
+ recognized only in UTF-8 mode.
827
+ </P>
828
+ <P>
829
+ When PCRE is compiled to run in an EBCDIC (mainframe) environment, the code for
830
+ CR is 0x0d, the same as ASCII. However, the character code for LF is normally
831
+ 0x15, though in some EBCDIC environments 0x25 is used. Whichever of these is
832
+ not LF is made to correspond to Unicode's NEL character. EBCDIC codes are all
833
+ less than 256. For more details, see the
834
+ <a href="pcrebuild.html"><b>pcrebuild</b></a>
835
+ documentation.
836
+ </P>
837
+ <P>
838
+ The newline setting in the options word uses three bits that are treated
839
+ as a number, giving eight possibilities. Currently only six are used (default
840
+ plus the five values above). This means that if you set more than one newline
841
+ option, the combination may or may not be sensible. For example,
842
+ PCRE_NEWLINE_CR with PCRE_NEWLINE_LF is equivalent to PCRE_NEWLINE_CRLF, but
843
+ other combinations may yield unused numbers and cause an error.
844
+ </P>
845
+ <P>
846
+ The only time that a line break in a pattern is specially recognized when
847
+ compiling is when PCRE_EXTENDED is set. CR and LF are white space characters,
848
+ and so are ignored in this mode. Also, an unescaped # outside a character class
849
+ indicates a comment that lasts until after the next line break sequence. In
850
+ other circumstances, line break sequences in patterns are treated as literal
851
+ data.
852
+ </P>
853
+ <P>
854
+ The newline option that is set at compile time becomes the default that is used
855
+ for <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b>, but it can be overridden.
856
+ <pre>
857
+ PCRE_NO_AUTO_CAPTURE
858
+ </pre>
859
+ If this option is set, it disables the use of numbered capturing parentheses in
860
+ the pattern. Any opening parenthesis that is not followed by ? behaves as if it
861
+ were followed by ?: but named parentheses can still be used for capturing (and
862
+ they acquire numbers in the usual way). There is no equivalent of this option
863
+ in Perl.
864
+ <pre>
865
+ PCRE_NO_AUTO_POSSESS
866
+ </pre>
867
+ If this option is set, it disables "auto-possessification". This is an
868
+ optimization that, for example, turns a+b into a++b in order to avoid
869
+ backtracks into a+ that can never be successful. However, if callouts are in
870
+ use, auto-possessification means that some of them are never taken. You can set
871
+ this option if you want the matching functions to do a full unoptimized search
872
+ and run all the callouts, but it is mainly provided for testing purposes.
873
+ <pre>
874
+ PCRE_NO_START_OPTIMIZE
875
+ </pre>
876
+ This is an option that acts at matching time; that is, it is really an option
877
+ for <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>. If it is set at compile time,
878
+ it is remembered with the compiled pattern and assumed at matching time. This
879
+ is necessary if you want to use JIT execution, because the JIT compiler needs
880
+ to know whether or not this option is set. For details see the discussion of
881
+ PCRE_NO_START_OPTIMIZE
882
+ <a href="#execoptions">below.</a>
883
+ <pre>
884
+ PCRE_UCP
885
+ </pre>
886
+ This option changes the way PCRE processes \B, \b, \D, \d, \S, \s, \W,
887
+ \w, and some of the POSIX character classes. By default, only ASCII characters
888
+ are recognized, but if PCRE_UCP is set, Unicode properties are used instead to
889
+ classify characters. More details are given in the section on
890
+ <a href="pcre.html#genericchartypes">generic character types</a>
891
+ in the
892
+ <a href="pcrepattern.html"><b>pcrepattern</b></a>
893
+ page. If you set PCRE_UCP, matching one of the items it affects takes much
894
+ longer. The option is available only if PCRE has been compiled with Unicode
895
+ property support.
896
+ <pre>
897
+ PCRE_UNGREEDY
898
+ </pre>
899
+ This option inverts the "greediness" of the quantifiers so that they are not
900
+ greedy by default, but become greedy if followed by "?". It is not compatible
901
+ with Perl. It can also be set by a (?U) option setting within the pattern.
902
+ <pre>
903
+ PCRE_UTF8
904
+ </pre>
905
+ This option causes PCRE to regard both the pattern and the subject as strings
906
+ of UTF-8 characters instead of single-byte strings. However, it is available
907
+ only when PCRE is built to include UTF support. If not, the use of this option
908
+ provokes an error. Details of how this option changes the behaviour of PCRE are
909
+ given in the
910
+ <a href="pcreunicode.html"><b>pcreunicode</b></a>
911
+ page.
912
+ <pre>
913
+ PCRE_NO_UTF8_CHECK
914
+ </pre>
915
+ When PCRE_UTF8 is set, the validity of the pattern as a UTF-8 string is
916
+ automatically checked. There is a discussion about the
917
+ <a href="pcreunicode.html#utf8strings">validity of UTF-8 strings</a>
918
+ in the
919
+ <a href="pcreunicode.html"><b>pcreunicode</b></a>
920
+ page. If an invalid UTF-8 sequence is found, <b>pcre_compile()</b> returns an
921
+ error. If you already know that your pattern is valid, and you want to skip
922
+ this check for performance reasons, you can set the PCRE_NO_UTF8_CHECK option.
923
+ When it is set, the effect of passing an invalid UTF-8 string as a pattern is
924
+ undefined. It may cause your program to crash or loop. Note that this option
925
+ can also be passed to <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b>, to suppress
926
+ the validity checking of subject strings only. If the same string is being
927
+ matched many times, the option can be safely set for the second and subsequent
928
+ matchings to improve performance.
929
+ </P>
930
+ <br><a name="SEC12" href="#TOC1">COMPILATION ERROR CODES</a><br>
931
+ <P>
932
+ The following table lists the error codes than may be returned by
933
+ <b>pcre_compile2()</b>, along with the error messages that may be returned by
934
+ both compiling functions. Note that error messages are always 8-bit ASCII
935
+ strings, even in 16-bit or 32-bit mode. As PCRE has developed, some error codes
936
+ have fallen out of use. To avoid confusion, they have not been re-used.
937
+ <pre>
938
+ 0 no error
939
+ 1 \ at end of pattern
940
+ 2 \c at end of pattern
941
+ 3 unrecognized character follows \
942
+ 4 numbers out of order in {} quantifier
943
+ 5 number too big in {} quantifier
944
+ 6 missing terminating ] for character class
945
+ 7 invalid escape sequence in character class
946
+ 8 range out of order in character class
947
+ 9 nothing to repeat
948
+ 10 [this code is not in use]
949
+ 11 internal error: unexpected repeat
950
+ 12 unrecognized character after (? or (?-
951
+ 13 POSIX named classes are supported only within a class
952
+ 14 missing )
953
+ 15 reference to non-existent subpattern
954
+ 16 erroffset passed as NULL
955
+ 17 unknown option bit(s) set
956
+ 18 missing ) after comment
957
+ 19 [this code is not in use]
958
+ 20 regular expression is too large
959
+ 21 failed to get memory
960
+ 22 unmatched parentheses
961
+ 23 internal error: code overflow
962
+ 24 unrecognized character after (?&#60;
963
+ 25 lookbehind assertion is not fixed length
964
+ 26 malformed number or name after (?(
965
+ 27 conditional group contains more than two branches
966
+ 28 assertion expected after (?(
967
+ 29 (?R or (?[+-]digits must be followed by )
968
+ 30 unknown POSIX class name
969
+ 31 POSIX collating elements are not supported
970
+ 32 this version of PCRE is compiled without UTF support
971
+ 33 [this code is not in use]
972
+ 34 character value in \x{} or \o{} is too large
973
+ 35 invalid condition (?(0)
974
+ 36 \C not allowed in lookbehind assertion
975
+ 37 PCRE does not support \L, \l, \N{name}, \U, or \u
976
+ 38 number after (?C is &#62; 255
977
+ 39 closing ) for (?C expected
978
+ 40 recursive call could loop indefinitely
979
+ 41 unrecognized character after (?P
980
+ 42 syntax error in subpattern name (missing terminator)
981
+ 43 two named subpatterns have the same name
982
+ 44 invalid UTF-8 string (specifically UTF-8)
983
+ 45 support for \P, \p, and \X has not been compiled
984
+ 46 malformed \P or \p sequence
985
+ 47 unknown property name after \P or \p
986
+ 48 subpattern name is too long (maximum 32 characters)
987
+ 49 too many named subpatterns (maximum 10000)
988
+ 50 [this code is not in use]
989
+ 51 octal value is greater than \377 in 8-bit non-UTF-8 mode
990
+ 52 internal error: overran compiling workspace
991
+ 53 internal error: previously-checked referenced subpattern
992
+ not found
993
+ 54 DEFINE group contains more than one branch
994
+ 55 repeating a DEFINE group is not allowed
995
+ 56 inconsistent NEWLINE options
996
+ 57 \g is not followed by a braced, angle-bracketed, or quoted
997
+ name/number or by a plain number
998
+ 58 a numbered reference must not be zero
999
+ 59 an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)
1000
+ 60 (*VERB) not recognized or malformed
1001
+ 61 number is too big
1002
+ 62 subpattern name expected
1003
+ 63 digit expected after (?+
1004
+ 64 ] is an invalid data character in JavaScript compatibility mode
1005
+ 65 different names for subpatterns of the same number are
1006
+ not allowed
1007
+ 66 (*MARK) must have an argument
1008
+ 67 this version of PCRE is not compiled with Unicode property
1009
+ support
1010
+ 68 \c must be followed by an ASCII character
1011
+ 69 \k is not followed by a braced, angle-bracketed, or quoted name
1012
+ 70 internal error: unknown opcode in find_fixedlength()
1013
+ 71 \N is not supported in a class
1014
+ 72 too many forward references
1015
+ 73 disallowed Unicode code point (&#62;= 0xd800 && &#60;= 0xdfff)
1016
+ 74 invalid UTF-16 string (specifically UTF-16)
1017
+ 75 name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
1018
+ 76 character value in \u.... sequence is too large
1019
+ 77 invalid UTF-32 string (specifically UTF-32)
1020
+ 78 setting UTF is disabled by the application
1021
+ 79 non-hex character in \x{} (closing brace missing?)
1022
+ 80 non-octal character in \o{} (closing brace missing?)
1023
+ 81 missing opening brace after \o
1024
+ 82 parentheses are too deeply nested
1025
+ 83 invalid range in character class
1026
+ 84 group name must start with a non-digit
1027
+ 85 parentheses are too deeply nested (stack check)
1028
+ </pre>
1029
+ The numbers 32 and 10000 in errors 48 and 49 are defaults; different values may
1030
+ be used if the limits were changed when PCRE was built.
1031
+ <a name="studyingapattern"></a></P>
1032
+ <br><a name="SEC13" href="#TOC1">STUDYING A PATTERN</a><br>
1033
+ <P>
1034
+ <b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
1035
+ <b> const char **<i>errptr</i>);</b>
1036
+ </P>
1037
+ <P>
1038
+ If a compiled pattern is going to be used several times, it is worth spending
1039
+ more time analyzing it in order to speed up the time taken for matching. The
1040
+ function <b>pcre_study()</b> takes a pointer to a compiled pattern as its first
1041
+ argument. If studying the pattern produces additional information that will
1042
+ help speed up matching, <b>pcre_study()</b> returns a pointer to a
1043
+ <b>pcre_extra</b> block, in which the <i>study_data</i> field points to the
1044
+ results of the study.
1045
+ </P>
1046
+ <P>
1047
+ The returned value from <b>pcre_study()</b> can be passed directly to
1048
+ <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>. However, a <b>pcre_extra</b> block
1049
+ also contains other fields that can be set by the caller before the block is
1050
+ passed; these are described
1051
+ <a href="#extradata">below</a>
1052
+ in the section on matching a pattern.
1053
+ </P>
1054
+ <P>
1055
+ If studying the pattern does not produce any useful information,
1056
+ <b>pcre_study()</b> returns NULL by default. In that circumstance, if the
1057
+ calling program wants to pass any of the other fields to <b>pcre_exec()</b> or
1058
+ <b>pcre_dfa_exec()</b>, it must set up its own <b>pcre_extra</b> block. However,
1059
+ if <b>pcre_study()</b> is called with the PCRE_STUDY_EXTRA_NEEDED option, it
1060
+ returns a <b>pcre_extra</b> block even if studying did not find any additional
1061
+ information. It may still return NULL, however, if an error occurs in
1062
+ <b>pcre_study()</b>.
1063
+ </P>
1064
+ <P>
1065
+ The second argument of <b>pcre_study()</b> contains option bits. There are three
1066
+ further options in addition to PCRE_STUDY_EXTRA_NEEDED:
1067
+ <pre>
1068
+ PCRE_STUDY_JIT_COMPILE
1069
+ PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1070
+ PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
1071
+ </pre>
1072
+ If any of these are set, and the just-in-time compiler is available, the
1073
+ pattern is further compiled into machine code that executes much faster than
1074
+ the <b>pcre_exec()</b> interpretive matching function. If the just-in-time
1075
+ compiler is not available, these options are ignored. All undefined bits in the
1076
+ <i>options</i> argument must be zero.
1077
+ </P>
1078
+ <P>
1079
+ JIT compilation is a heavyweight optimization. It can take some time for
1080
+ patterns to be analyzed, and for one-off matches and simple patterns the
1081
+ benefit of faster execution might be offset by a much slower study time.
1082
+ Not all patterns can be optimized by the JIT compiler. For those that cannot be
1083
+ handled, matching automatically falls back to the <b>pcre_exec()</b>
1084
+ interpreter. For more details, see the
1085
+ <a href="pcrejit.html"><b>pcrejit</b></a>
1086
+ documentation.
1087
+ </P>
1088
+ <P>
1089
+ The third argument for <b>pcre_study()</b> is a pointer for an error message. If
1090
+ studying succeeds (even if no data is returned), the variable it points to is
1091
+ set to NULL. Otherwise it is set to point to a textual error message. This is a
1092
+ static string that is part of the library. You must not try to free it. You
1093
+ should test the error pointer for NULL after calling <b>pcre_study()</b>, to be
1094
+ sure that it has run successfully.
1095
+ </P>
1096
+ <P>
1097
+ When you are finished with a pattern, you can free the memory used for the
1098
+ study data by calling <b>pcre_free_study()</b>. This function was added to the
1099
+ API for release 8.20. For earlier versions, the memory could be freed with
1100
+ <b>pcre_free()</b>, just like the pattern itself. This will still work in cases
1101
+ where JIT optimization is not used, but it is advisable to change to the new
1102
+ function when convenient.
1103
+ </P>
1104
+ <P>
1105
+ This is a typical way in which <b>pcre_study</b>() is used (except that in a
1106
+ real application there should be tests for errors):
1107
+ <pre>
1108
+ int rc;
1109
+ pcre *re;
1110
+ pcre_extra *sd;
1111
+ re = pcre_compile("pattern", 0, &error, &erroroffset, NULL);
1112
+ sd = pcre_study(
1113
+ re, /* result of pcre_compile() */
1114
+ 0, /* no options */
1115
+ &error); /* set to NULL or points to a message */
1116
+ rc = pcre_exec( /* see below for details of pcre_exec() options */
1117
+ re, sd, "subject", 7, 0, 0, ovector, 30);
1118
+ ...
1119
+ pcre_free_study(sd);
1120
+ pcre_free(re);
1121
+ </pre>
1122
+ Studying a pattern does two things: first, a lower bound for the length of
1123
+ subject string that is needed to match the pattern is computed. This does not
1124
+ mean that there are any strings of that length that match, but it does
1125
+ guarantee that no shorter strings match. The value is used to avoid wasting
1126
+ time by trying to match strings that are shorter than the lower bound. You can
1127
+ find out the value in a calling program via the <b>pcre_fullinfo()</b> function.
1128
+ </P>
1129
+ <P>
1130
+ Studying a pattern is also useful for non-anchored patterns that do not have a
1131
+ single fixed starting character. A bitmap of possible starting bytes is
1132
+ created. This speeds up finding a position in the subject at which to start
1133
+ matching. (In 16-bit mode, the bitmap is used for 16-bit values less than 256.
1134
+ In 32-bit mode, the bitmap is used for 32-bit values less than 256.)
1135
+ </P>
1136
+ <P>
1137
+ These two optimizations apply to both <b>pcre_exec()</b> and
1138
+ <b>pcre_dfa_exec()</b>, and the information is also used by the JIT compiler.
1139
+ The optimizations can be disabled by setting the PCRE_NO_START_OPTIMIZE option.
1140
+ You might want to do this if your pattern contains callouts or (*MARK) and you
1141
+ want to make use of these facilities in cases where matching fails.
1142
+ </P>
1143
+ <P>
1144
+ PCRE_NO_START_OPTIMIZE can be specified at either compile time or execution
1145
+ time. However, if PCRE_NO_START_OPTIMIZE is passed to <b>pcre_exec()</b>, (that
1146
+ is, after any JIT compilation has happened) JIT execution is disabled. For JIT
1147
+ execution to work with PCRE_NO_START_OPTIMIZE, the option must be set at
1148
+ compile time.
1149
+ </P>
1150
+ <P>
1151
+ There is a longer discussion of PCRE_NO_START_OPTIMIZE
1152
+ <a href="#execoptions">below.</a>
1153
+ <a name="localesupport"></a></P>
1154
+ <br><a name="SEC14" href="#TOC1">LOCALE SUPPORT</a><br>
1155
+ <P>
1156
+ PCRE handles caseless matching, and determines whether characters are letters,
1157
+ digits, or whatever, by reference to a set of tables, indexed by character
1158
+ code point. When running in UTF-8 mode, or in the 16- or 32-bit libraries, this
1159
+ applies only to characters with code points less than 256. By default,
1160
+ higher-valued code points never match escapes such as \w or \d. However, if
1161
+ PCRE is built with Unicode property support, all characters can be tested with
1162
+ \p and \P, or, alternatively, the PCRE_UCP option can be set when a pattern
1163
+ is compiled; this causes \w and friends to use Unicode property support
1164
+ instead of the built-in tables.
1165
+ </P>
1166
+ <P>
1167
+ The use of locales with Unicode is discouraged. If you are handling characters
1168
+ with code points greater than 128, you should either use Unicode support, or
1169
+ use locales, but not try to mix the two.
1170
+ </P>
1171
+ <P>
1172
+ PCRE contains an internal set of tables that are used when the final argument
1173
+ of <b>pcre_compile()</b> is NULL. These are sufficient for many applications.
1174
+ Normally, the internal tables recognize only ASCII characters. However, when
1175
+ PCRE is built, it is possible to cause the internal tables to be rebuilt in the
1176
+ default "C" locale of the local system, which may cause them to be different.
1177
+ </P>
1178
+ <P>
1179
+ The internal tables can always be overridden by tables supplied by the
1180
+ application that calls PCRE. These may be created in a different locale from
1181
+ the default. As more and more applications change to using Unicode, the need
1182
+ for this locale support is expected to die away.
1183
+ </P>
1184
+ <P>
1185
+ External tables are built by calling the <b>pcre_maketables()</b> function,
1186
+ which has no arguments, in the relevant locale. The result can then be passed
1187
+ to <b>pcre_compile()</b> as often as necessary. For example, to build and use
1188
+ tables that are appropriate for the French locale (where accented characters
1189
+ with values greater than 128 are treated as letters), the following code could
1190
+ be used:
1191
+ <pre>
1192
+ setlocale(LC_CTYPE, "fr_FR");
1193
+ tables = pcre_maketables();
1194
+ re = pcre_compile(..., tables);
1195
+ </pre>
1196
+ The locale name "fr_FR" is used on Linux and other Unix-like systems; if you
1197
+ are using Windows, the name for the French locale is "french".
1198
+ </P>
1199
+ <P>
1200
+ When <b>pcre_maketables()</b> runs, the tables are built in memory that is
1201
+ obtained via <b>pcre_malloc</b>. It is the caller's responsibility to ensure
1202
+ that the memory containing the tables remains available for as long as it is
1203
+ needed.
1204
+ </P>
1205
+ <P>
1206
+ The pointer that is passed to <b>pcre_compile()</b> is saved with the compiled
1207
+ pattern, and the same tables are used via this pointer by <b>pcre_study()</b>
1208
+ and also by <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b>. Thus, for any single
1209
+ pattern, compilation, studying and matching all happen in the same locale, but
1210
+ different patterns can be processed in different locales.
1211
+ </P>
1212
+ <P>
1213
+ It is possible to pass a table pointer or NULL (indicating the use of the
1214
+ internal tables) to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b> (see the
1215
+ discussion below in the section on matching a pattern). This facility is
1216
+ provided for use with pre-compiled patterns that have been saved and reloaded.
1217
+ Character tables are not saved with patterns, so if a non-standard table was
1218
+ used at compile time, it must be provided again when the reloaded pattern is
1219
+ matched. Attempting to use this facility to match a pattern in a different
1220
+ locale from the one in which it was compiled is likely to lead to anomalous
1221
+ (usually incorrect) results.
1222
+ <a name="infoaboutpattern"></a></P>
1223
+ <br><a name="SEC15" href="#TOC1">INFORMATION ABOUT A PATTERN</a><br>
1224
+ <P>
1225
+ <b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
1226
+ <b> int <i>what</i>, void *<i>where</i>);</b>
1227
+ </P>
1228
+ <P>
1229
+ The <b>pcre_fullinfo()</b> function returns information about a compiled
1230
+ pattern. It replaces the <b>pcre_info()</b> function, which was removed from the
1231
+ library at version 8.30, after more than 10 years of obsolescence.
1232
+ </P>
1233
+ <P>
1234
+ The first argument for <b>pcre_fullinfo()</b> is a pointer to the compiled
1235
+ pattern. The second argument is the result of <b>pcre_study()</b>, or NULL if
1236
+ the pattern was not studied. The third argument specifies which piece of
1237
+ information is required, and the fourth argument is a pointer to a variable
1238
+ to receive the data. The yield of the function is zero for success, or one of
1239
+ the following negative numbers:
1240
+ <pre>
1241
+ PCRE_ERROR_NULL the argument <i>code</i> was NULL
1242
+ the argument <i>where</i> was NULL
1243
+ PCRE_ERROR_BADMAGIC the "magic number" was not found
1244
+ PCRE_ERROR_BADENDIANNESS the pattern was compiled with different
1245
+ endianness
1246
+ PCRE_ERROR_BADOPTION the value of <i>what</i> was invalid
1247
+ PCRE_ERROR_UNSET the requested field is not set
1248
+ </pre>
1249
+ The "magic number" is placed at the start of each compiled pattern as an simple
1250
+ check against passing an arbitrary memory pointer. The endianness error can
1251
+ occur if a compiled pattern is saved and reloaded on a different host. Here is
1252
+ a typical call of <b>pcre_fullinfo()</b>, to obtain the length of the compiled
1253
+ pattern:
1254
+ <pre>
1255
+ int rc;
1256
+ size_t length;
1257
+ rc = pcre_fullinfo(
1258
+ re, /* result of pcre_compile() */
1259
+ sd, /* result of pcre_study(), or NULL */
1260
+ PCRE_INFO_SIZE, /* what is required */
1261
+ &length); /* where to put the data */
1262
+ </pre>
1263
+ The possible values for the third argument are defined in <b>pcre.h</b>, and are
1264
+ as follows:
1265
+ <pre>
1266
+ PCRE_INFO_BACKREFMAX
1267
+ </pre>
1268
+ Return the number of the highest back reference in the pattern. The fourth
1269
+ argument should point to an <b>int</b> variable. Zero is returned if there are
1270
+ no back references.
1271
+ <pre>
1272
+ PCRE_INFO_CAPTURECOUNT
1273
+ </pre>
1274
+ Return the number of capturing subpatterns in the pattern. The fourth argument
1275
+ should point to an <b>int</b> variable.
1276
+ <pre>
1277
+ PCRE_INFO_DEFAULT_TABLES
1278
+ </pre>
1279
+ Return a pointer to the internal default character tables within PCRE. The
1280
+ fourth argument should point to an <b>unsigned char *</b> variable. This
1281
+ information call is provided for internal use by the <b>pcre_study()</b>
1282
+ function. External callers can cause PCRE to use its internal tables by passing
1283
+ a NULL table pointer.
1284
+ <pre>
1285
+ PCRE_INFO_FIRSTBYTE (deprecated)
1286
+ </pre>
1287
+ Return information about the first data unit of any matched string, for a
1288
+ non-anchored pattern. The name of this option refers to the 8-bit library,
1289
+ where data units are bytes. The fourth argument should point to an <b>int</b>
1290
+ variable. Negative values are used for special cases. However, this means that
1291
+ when the 32-bit library is in non-UTF-32 mode, the full 32-bit range of
1292
+ characters cannot be returned. For this reason, this value is deprecated; use
1293
+ PCRE_INFO_FIRSTCHARACTERFLAGS and PCRE_INFO_FIRSTCHARACTER instead.
1294
+ </P>
1295
+ <P>
1296
+ If there is a fixed first value, for example, the letter "c" from a pattern
1297
+ such as (cat|cow|coyote), its value is returned. In the 8-bit library, the
1298
+ value is always less than 256. In the 16-bit library the value can be up to
1299
+ 0xffff. In the 32-bit library the value can be up to 0x10ffff.
1300
+ </P>
1301
+ <P>
1302
+ If there is no fixed first value, and if either
1303
+ <br>
1304
+ <br>
1305
+ (a) the pattern was compiled with the PCRE_MULTILINE option, and every branch
1306
+ starts with "^", or
1307
+ <br>
1308
+ <br>
1309
+ (b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not set
1310
+ (if it were set, the pattern would be anchored),
1311
+ <br>
1312
+ <br>
1313
+ -1 is returned, indicating that the pattern matches only at the start of a
1314
+ subject string or after any newline within the string. Otherwise -2 is
1315
+ returned. For anchored patterns, -2 is returned.
1316
+ <pre>
1317
+ PCRE_INFO_FIRSTCHARACTER
1318
+ </pre>
1319
+ Return the value of the first data unit (non-UTF character) of any matched
1320
+ string in the situation where PCRE_INFO_FIRSTCHARACTERFLAGS returns 1;
1321
+ otherwise return 0. The fourth argument should point to an <b>uint_t</b>
1322
+ variable.
1323
+ </P>
1324
+ <P>
1325
+ In the 8-bit library, the value is always less than 256. In the 16-bit library
1326
+ the value can be up to 0xffff. In the 32-bit library in UTF-32 mode the value
1327
+ can be up to 0x10ffff, and up to 0xffffffff when not using UTF-32 mode.
1328
+ <pre>
1329
+ PCRE_INFO_FIRSTCHARACTERFLAGS
1330
+ </pre>
1331
+ Return information about the first data unit of any matched string, for a
1332
+ non-anchored pattern. The fourth argument should point to an <b>int</b>
1333
+ variable.
1334
+ </P>
1335
+ <P>
1336
+ If there is a fixed first value, for example, the letter "c" from a pattern
1337
+ such as (cat|cow|coyote), 1 is returned, and the character value can be
1338
+ retrieved using PCRE_INFO_FIRSTCHARACTER. If there is no fixed first value, and
1339
+ if either
1340
+ <br>
1341
+ <br>
1342
+ (a) the pattern was compiled with the PCRE_MULTILINE option, and every branch
1343
+ starts with "^", or
1344
+ <br>
1345
+ <br>
1346
+ (b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not set
1347
+ (if it were set, the pattern would be anchored),
1348
+ <br>
1349
+ <br>
1350
+ 2 is returned, indicating that the pattern matches only at the start of a
1351
+ subject string or after any newline within the string. Otherwise 0 is
1352
+ returned. For anchored patterns, 0 is returned.
1353
+ <pre>
1354
+ PCRE_INFO_FIRSTTABLE
1355
+ </pre>
1356
+ If the pattern was studied, and this resulted in the construction of a 256-bit
1357
+ table indicating a fixed set of values for the first data unit in any matching
1358
+ string, a pointer to the table is returned. Otherwise NULL is returned. The
1359
+ fourth argument should point to an <b>unsigned char *</b> variable.
1360
+ <pre>
1361
+ PCRE_INFO_HASCRORLF
1362
+ </pre>
1363
+ Return 1 if the pattern contains any explicit matches for CR or LF characters,
1364
+ otherwise 0. The fourth argument should point to an <b>int</b> variable. An
1365
+ explicit match is either a literal CR or LF character, or \r or \n.
1366
+ <pre>
1367
+ PCRE_INFO_JCHANGED
1368
+ </pre>
1369
+ Return 1 if the (?J) or (?-J) option setting is used in the pattern, otherwise
1370
+ 0. The fourth argument should point to an <b>int</b> variable. (?J) and
1371
+ (?-J) set and unset the local PCRE_DUPNAMES option, respectively.
1372
+ <pre>
1373
+ PCRE_INFO_JIT
1374
+ </pre>
1375
+ Return 1 if the pattern was studied with one of the JIT options, and
1376
+ just-in-time compiling was successful. The fourth argument should point to an
1377
+ <b>int</b> variable. A return value of 0 means that JIT support is not available
1378
+ in this version of PCRE, or that the pattern was not studied with a JIT option,
1379
+ or that the JIT compiler could not handle this particular pattern. See the
1380
+ <a href="pcrejit.html"><b>pcrejit</b></a>
1381
+ documentation for details of what can and cannot be handled.
1382
+ <pre>
1383
+ PCRE_INFO_JITSIZE
1384
+ </pre>
1385
+ If the pattern was successfully studied with a JIT option, return the size of
1386
+ the JIT compiled code, otherwise return zero. The fourth argument should point
1387
+ to a <b>size_t</b> variable.
1388
+ <pre>
1389
+ PCRE_INFO_LASTLITERAL
1390
+ </pre>
1391
+ Return the value of the rightmost literal data unit that must exist in any
1392
+ matched string, other than at its start, if such a value has been recorded. The
1393
+ fourth argument should point to an <b>int</b> variable. If there is no such
1394
+ value, -1 is returned. For anchored patterns, a last literal value is recorded
1395
+ only if it follows something of variable length. For example, for the pattern
1396
+ /^a\d+z\d+/ the returned value is "z", but for /^a\dz\d/ the returned value
1397
+ is -1.
1398
+ </P>
1399
+ <P>
1400
+ Since for the 32-bit library using the non-UTF-32 mode, this function is unable
1401
+ to return the full 32-bit range of characters, this value is deprecated;
1402
+ instead the PCRE_INFO_REQUIREDCHARFLAGS and PCRE_INFO_REQUIREDCHAR values should
1403
+ be used.
1404
+ <pre>
1405
+ PCRE_INFO_MATCH_EMPTY
1406
+ </pre>
1407
+ Return 1 if the pattern can match an empty string, otherwise 0. The fourth
1408
+ argument should point to an <b>int</b> variable.
1409
+ <pre>
1410
+ PCRE_INFO_MATCHLIMIT
1411
+ </pre>
1412
+ If the pattern set a match limit by including an item of the form
1413
+ (*LIMIT_MATCH=nnnn) at the start, the value is returned. The fourth argument
1414
+ should point to an unsigned 32-bit integer. If no such value has been set, the
1415
+ call to <b>pcre_fullinfo()</b> returns the error PCRE_ERROR_UNSET.
1416
+ <pre>
1417
+ PCRE_INFO_MAXLOOKBEHIND
1418
+ </pre>
1419
+ Return the number of characters (NB not data units) in the longest lookbehind
1420
+ assertion in the pattern. This information is useful when doing multi-segment
1421
+ matching using the partial matching facilities. Note that the simple assertions
1422
+ \b and \B require a one-character lookbehind. \A also registers a
1423
+ one-character lookbehind, though it does not actually inspect the previous
1424
+ character. This is to ensure that at least one character from the old segment
1425
+ is retained when a new segment is processed. Otherwise, if there are no
1426
+ lookbehinds in the pattern, \A might match incorrectly at the start of a new
1427
+ segment.
1428
+ <pre>
1429
+ PCRE_INFO_MINLENGTH
1430
+ </pre>
1431
+ If the pattern was studied and a minimum length for matching subject strings
1432
+ was computed, its value is returned. Otherwise the returned value is -1. The
1433
+ value is a number of characters, which in UTF mode may be different from the
1434
+ number of data units. The fourth argument should point to an <b>int</b>
1435
+ variable. A non-negative value is a lower bound to the length of any matching
1436
+ string. There may not be any strings of that length that do actually match, but
1437
+ every string that does match is at least that long.
1438
+ <pre>
1439
+ PCRE_INFO_NAMECOUNT
1440
+ PCRE_INFO_NAMEENTRYSIZE
1441
+ PCRE_INFO_NAMETABLE
1442
+ </pre>
1443
+ PCRE supports the use of named as well as numbered capturing parentheses. The
1444
+ names are just an additional way of identifying the parentheses, which still
1445
+ acquire numbers. Several convenience functions such as
1446
+ <b>pcre_get_named_substring()</b> are provided for extracting captured
1447
+ substrings by name. It is also possible to extract the data directly, by first
1448
+ converting the name to a number in order to access the correct pointers in the
1449
+ output vector (described with <b>pcre_exec()</b> below). To do the conversion,
1450
+ you need to use the name-to-number map, which is described by these three
1451
+ values.
1452
+ </P>
1453
+ <P>
1454
+ The map consists of a number of fixed-size entries. PCRE_INFO_NAMECOUNT gives
1455
+ the number of entries, and PCRE_INFO_NAMEENTRYSIZE gives the size of each
1456
+ entry; both of these return an <b>int</b> value. The entry size depends on the
1457
+ length of the longest name. PCRE_INFO_NAMETABLE returns a pointer to the first
1458
+ entry of the table. This is a pointer to <b>char</b> in the 8-bit library, where
1459
+ the first two bytes of each entry are the number of the capturing parenthesis,
1460
+ most significant byte first. In the 16-bit library, the pointer points to
1461
+ 16-bit data units, the first of which contains the parenthesis number. In the
1462
+ 32-bit library, the pointer points to 32-bit data units, the first of which
1463
+ contains the parenthesis number. The rest of the entry is the corresponding
1464
+ name, zero terminated.
1465
+ </P>
1466
+ <P>
1467
+ The names are in alphabetical order. If (?| is used to create multiple groups
1468
+ with the same number, as described in the
1469
+ <a href="pcrepattern.html#dupsubpatternnumber">section on duplicate subpattern numbers</a>
1470
+ in the
1471
+ <a href="pcrepattern.html"><b>pcrepattern</b></a>
1472
+ page, the groups may be given the same name, but there is only one entry in the
1473
+ table. Different names for groups of the same number are not permitted.
1474
+ Duplicate names for subpatterns with different numbers are permitted,
1475
+ but only if PCRE_DUPNAMES is set. They appear in the table in the order in
1476
+ which they were found in the pattern. In the absence of (?| this is the order
1477
+ of increasing number; when (?| is used this is not necessarily the case because
1478
+ later subpatterns may have lower numbers.
1479
+ </P>
1480
+ <P>
1481
+ As a simple example of the name/number table, consider the following pattern
1482
+ after compilation by the 8-bit library (assume PCRE_EXTENDED is set, so white
1483
+ space - including newlines - is ignored):
1484
+ <pre>
1485
+ (?&#60;date&#62; (?&#60;year&#62;(\d\d)?\d\d) - (?&#60;month&#62;\d\d) - (?&#60;day&#62;\d\d) )
1486
+ </pre>
1487
+ There are four named subpatterns, so the table has four entries, and each entry
1488
+ in the table is eight bytes long. The table is as follows, with non-printing
1489
+ bytes shows in hexadecimal, and undefined bytes shown as ??:
1490
+ <pre>
1491
+ 00 01 d a t e 00 ??
1492
+ 00 05 d a y 00 ?? ??
1493
+ 00 04 m o n t h 00
1494
+ 00 02 y e a r 00 ??
1495
+ </pre>
1496
+ When writing code to extract data from named subpatterns using the
1497
+ name-to-number map, remember that the length of the entries is likely to be
1498
+ different for each compiled pattern.
1499
+ <pre>
1500
+ PCRE_INFO_OKPARTIAL
1501
+ </pre>
1502
+ Return 1 if the pattern can be used for partial matching with
1503
+ <b>pcre_exec()</b>, otherwise 0. The fourth argument should point to an
1504
+ <b>int</b> variable. From release 8.00, this always returns 1, because the
1505
+ restrictions that previously applied to partial matching have been lifted. The
1506
+ <a href="pcrepartial.html"><b>pcrepartial</b></a>
1507
+ documentation gives details of partial matching.
1508
+ <pre>
1509
+ PCRE_INFO_OPTIONS
1510
+ </pre>
1511
+ Return a copy of the options with which the pattern was compiled. The fourth
1512
+ argument should point to an <b>unsigned long int</b> variable. These option bits
1513
+ are those specified in the call to <b>pcre_compile()</b>, modified by any
1514
+ top-level option settings at the start of the pattern itself. In other words,
1515
+ they are the options that will be in force when matching starts. For example,
1516
+ if the pattern /(?im)abc(?-i)d/ is compiled with the PCRE_EXTENDED option, the
1517
+ result is PCRE_CASELESS, PCRE_MULTILINE, and PCRE_EXTENDED.
1518
+ </P>
1519
+ <P>
1520
+ A pattern is automatically anchored by PCRE if all of its top-level
1521
+ alternatives begin with one of the following:
1522
+ <pre>
1523
+ ^ unless PCRE_MULTILINE is set
1524
+ \A always
1525
+ \G always
1526
+ .* if PCRE_DOTALL is set and there are no back references to the subpattern in which .* appears
1527
+ </pre>
1528
+ For such patterns, the PCRE_ANCHORED bit is set in the options returned by
1529
+ <b>pcre_fullinfo()</b>.
1530
+ <pre>
1531
+ PCRE_INFO_RECURSIONLIMIT
1532
+ </pre>
1533
+ If the pattern set a recursion limit by including an item of the form
1534
+ (*LIMIT_RECURSION=nnnn) at the start, the value is returned. The fourth
1535
+ argument should point to an unsigned 32-bit integer. If no such value has been
1536
+ set, the call to <b>pcre_fullinfo()</b> returns the error PCRE_ERROR_UNSET.
1537
+ <pre>
1538
+ PCRE_INFO_SIZE
1539
+ </pre>
1540
+ Return the size of the compiled pattern in bytes (for all three libraries). The
1541
+ fourth argument should point to a <b>size_t</b> variable. This value does not
1542
+ include the size of the <b>pcre</b> structure that is returned by
1543
+ <b>pcre_compile()</b>. The value that is passed as the argument to
1544
+ <b>pcre_malloc()</b> when <b>pcre_compile()</b> is getting memory in which to
1545
+ place the compiled data is the value returned by this option plus the size of
1546
+ the <b>pcre</b> structure. Studying a compiled pattern, with or without JIT,
1547
+ does not alter the value returned by this option.
1548
+ <pre>
1549
+ PCRE_INFO_STUDYSIZE
1550
+ </pre>
1551
+ Return the size in bytes (for all three libraries) of the data block pointed to
1552
+ by the <i>study_data</i> field in a <b>pcre_extra</b> block. If <b>pcre_extra</b>
1553
+ is NULL, or there is no study data, zero is returned. The fourth argument
1554
+ should point to a <b>size_t</b> variable. The <i>study_data</i> field is set by
1555
+ <b>pcre_study()</b> to record information that will speed up matching (see the
1556
+ section entitled
1557
+ <a href="#studyingapattern">"Studying a pattern"</a>
1558
+ above). The format of the <i>study_data</i> block is private, but its length
1559
+ is made available via this option so that it can be saved and restored (see the
1560
+ <a href="pcreprecompile.html"><b>pcreprecompile</b></a>
1561
+ documentation for details).
1562
+ <pre>
1563
+ PCRE_INFO_REQUIREDCHARFLAGS
1564
+ </pre>
1565
+ Returns 1 if there is a rightmost literal data unit that must exist in any
1566
+ matched string, other than at its start. The fourth argument should point to
1567
+ an <b>int</b> variable. If there is no such value, 0 is returned. If returning
1568
+ 1, the character value itself can be retrieved using PCRE_INFO_REQUIREDCHAR.
1569
+ </P>
1570
+ <P>
1571
+ For anchored patterns, a last literal value is recorded only if it follows
1572
+ something of variable length. For example, for the pattern /^a\d+z\d+/ the
1573
+ returned value 1 (with "z" returned from PCRE_INFO_REQUIREDCHAR), but for
1574
+ /^a\dz\d/ the returned value is 0.
1575
+ <pre>
1576
+ PCRE_INFO_REQUIREDCHAR
1577
+ </pre>
1578
+ Return the value of the rightmost literal data unit that must exist in any
1579
+ matched string, other than at its start, if such a value has been recorded. The
1580
+ fourth argument should point to an <b>uint32_t</b> variable. If there is no such
1581
+ value, 0 is returned.
1582
+ </P>
1583
+ <br><a name="SEC16" href="#TOC1">REFERENCE COUNTS</a><br>
1584
+ <P>
1585
+ <b>int pcre_refcount(pcre *<i>code</i>, int <i>adjust</i>);</b>
1586
+ </P>
1587
+ <P>
1588
+ The <b>pcre_refcount()</b> function is used to maintain a reference count in the
1589
+ data block that contains a compiled pattern. It is provided for the benefit of
1590
+ applications that operate in an object-oriented manner, where different parts
1591
+ of the application may be using the same compiled pattern, but you want to free
1592
+ the block when they are all done.
1593
+ </P>
1594
+ <P>
1595
+ When a pattern is compiled, the reference count field is initialized to zero.
1596
+ It is changed only by calling this function, whose action is to add the
1597
+ <i>adjust</i> value (which may be positive or negative) to it. The yield of the
1598
+ function is the new value. However, the value of the count is constrained to
1599
+ lie between 0 and 65535, inclusive. If the new value is outside these limits,
1600
+ it is forced to the appropriate limit value.
1601
+ </P>
1602
+ <P>
1603
+ Except when it is zero, the reference count is not correctly preserved if a
1604
+ pattern is compiled on one host and then transferred to a host whose byte-order
1605
+ is different. (This seems a highly unlikely scenario.)
1606
+ </P>
1607
+ <br><a name="SEC17" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
1608
+ <P>
1609
+ <b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
1610
+ <b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
1611
+ <b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
1612
+ </P>
1613
+ <P>
1614
+ The function <b>pcre_exec()</b> is called to match a subject string against a
1615
+ compiled pattern, which is passed in the <i>code</i> argument. If the
1616
+ pattern was studied, the result of the study should be passed in the
1617
+ <i>extra</i> argument. You can call <b>pcre_exec()</b> with the same <i>code</i>
1618
+ and <i>extra</i> arguments as many times as you like, in order to match
1619
+ different subject strings with the same pattern.
1620
+ </P>
1621
+ <P>
1622
+ This function is the main matching facility of the library, and it operates in
1623
+ a Perl-like manner. For specialist use there is also an alternative matching
1624
+ function, which is described
1625
+ <a href="#dfamatch">below</a>
1626
+ in the section about the <b>pcre_dfa_exec()</b> function.
1627
+ </P>
1628
+ <P>
1629
+ In most applications, the pattern will have been compiled (and optionally
1630
+ studied) in the same process that calls <b>pcre_exec()</b>. However, it is
1631
+ possible to save compiled patterns and study data, and then use them later
1632
+ in different processes, possibly even on different hosts. For a discussion
1633
+ about this, see the
1634
+ <a href="pcreprecompile.html"><b>pcreprecompile</b></a>
1635
+ documentation.
1636
+ </P>
1637
+ <P>
1638
+ Here is an example of a simple call to <b>pcre_exec()</b>:
1639
+ <pre>
1640
+ int rc;
1641
+ int ovector[30];
1642
+ rc = pcre_exec(
1643
+ re, /* result of pcre_compile() */
1644
+ NULL, /* we didn't study the pattern */
1645
+ "some string", /* the subject string */
1646
+ 11, /* the length of the subject string */
1647
+ 0, /* start at offset 0 in the subject */
1648
+ 0, /* default options */
1649
+ ovector, /* vector of integers for substring information */
1650
+ 30); /* number of elements (NOT size in bytes) */
1651
+ <a name="extradata"></a></PRE>
1652
+ </P>
1653
+ <br><b>
1654
+ Extra data for <b>pcre_exec()</b>
1655
+ </b><br>
1656
+ <P>
1657
+ If the <i>extra</i> argument is not NULL, it must point to a <b>pcre_extra</b>
1658
+ data block. The <b>pcre_study()</b> function returns such a block (when it
1659
+ doesn't return NULL), but you can also create one for yourself, and pass
1660
+ additional information in it. The <b>pcre_extra</b> block contains the following
1661
+ fields (not necessarily in this order):
1662
+ <pre>
1663
+ unsigned long int <i>flags</i>;
1664
+ void *<i>study_data</i>;
1665
+ void *<i>executable_jit</i>;
1666
+ unsigned long int <i>match_limit</i>;
1667
+ unsigned long int <i>match_limit_recursion</i>;
1668
+ void *<i>callout_data</i>;
1669
+ const unsigned char *<i>tables</i>;
1670
+ unsigned char **<i>mark</i>;
1671
+ </pre>
1672
+ In the 16-bit version of this structure, the <i>mark</i> field has type
1673
+ "PCRE_UCHAR16 **".
1674
+ <br>
1675
+ <br>
1676
+ In the 32-bit version of this structure, the <i>mark</i> field has type
1677
+ "PCRE_UCHAR32 **".
1678
+ </P>
1679
+ <P>
1680
+ The <i>flags</i> field is used to specify which of the other fields are set. The
1681
+ flag bits are:
1682
+ <pre>
1683
+ PCRE_EXTRA_CALLOUT_DATA
1684
+ PCRE_EXTRA_EXECUTABLE_JIT
1685
+ PCRE_EXTRA_MARK
1686
+ PCRE_EXTRA_MATCH_LIMIT
1687
+ PCRE_EXTRA_MATCH_LIMIT_RECURSION
1688
+ PCRE_EXTRA_STUDY_DATA
1689
+ PCRE_EXTRA_TABLES
1690
+ </pre>
1691
+ Other flag bits should be set to zero. The <i>study_data</i> field and sometimes
1692
+ the <i>executable_jit</i> field are set in the <b>pcre_extra</b> block that is
1693
+ returned by <b>pcre_study()</b>, together with the appropriate flag bits. You
1694
+ should not set these yourself, but you may add to the block by setting other
1695
+ fields and their corresponding flag bits.
1696
+ </P>
1697
+ <P>
1698
+ The <i>match_limit</i> field provides a means of preventing PCRE from using up a
1699
+ vast amount of resources when running patterns that are not going to match,
1700
+ but which have a very large number of possibilities in their search trees. The
1701
+ classic example is a pattern that uses nested unlimited repeats.
1702
+ </P>
1703
+ <P>
1704
+ Internally, <b>pcre_exec()</b> uses a function called <b>match()</b>, which it
1705
+ calls repeatedly (sometimes recursively). The limit set by <i>match_limit</i> is
1706
+ imposed on the number of times this function is called during a match, which
1707
+ has the effect of limiting the amount of backtracking that can take place. For
1708
+ patterns that are not anchored, the count restarts from zero for each position
1709
+ in the subject string.
1710
+ </P>
1711
+ <P>
1712
+ When <b>pcre_exec()</b> is called with a pattern that was successfully studied
1713
+ with a JIT option, the way that the matching is executed is entirely different.
1714
+ However, there is still the possibility of runaway matching that goes on for a
1715
+ very long time, and so the <i>match_limit</i> value is also used in this case
1716
+ (but in a different way) to limit how long the matching can continue.
1717
+ </P>
1718
+ <P>
1719
+ The default value for the limit can be set when PCRE is built; the default
1720
+ default is 10 million, which handles all but the most extreme cases. You can
1721
+ override the default by suppling <b>pcre_exec()</b> with a <b>pcre_extra</b>
1722
+ block in which <i>match_limit</i> is set, and PCRE_EXTRA_MATCH_LIMIT is set in
1723
+ the <i>flags</i> field. If the limit is exceeded, <b>pcre_exec()</b> returns
1724
+ PCRE_ERROR_MATCHLIMIT.
1725
+ </P>
1726
+ <P>
1727
+ A value for the match limit may also be supplied by an item at the start of a
1728
+ pattern of the form
1729
+ <pre>
1730
+ (*LIMIT_MATCH=d)
1731
+ </pre>
1732
+ where d is a decimal number. However, such a setting is ignored unless d is
1733
+ less than the limit set by the caller of <b>pcre_exec()</b> or, if no such limit
1734
+ is set, less than the default.
1735
+ </P>
1736
+ <P>
1737
+ The <i>match_limit_recursion</i> field is similar to <i>match_limit</i>, but
1738
+ instead of limiting the total number of times that <b>match()</b> is called, it
1739
+ limits the depth of recursion. The recursion depth is a smaller number than the
1740
+ total number of calls, because not all calls to <b>match()</b> are recursive.
1741
+ This limit is of use only if it is set smaller than <i>match_limit</i>.
1742
+ </P>
1743
+ <P>
1744
+ Limiting the recursion depth limits the amount of machine stack that can be
1745
+ used, or, when PCRE has been compiled to use memory on the heap instead of the
1746
+ stack, the amount of heap memory that can be used. This limit is not relevant,
1747
+ and is ignored, when matching is done using JIT compiled code.
1748
+ </P>
1749
+ <P>
1750
+ The default value for <i>match_limit_recursion</i> can be set when PCRE is
1751
+ built; the default default is the same value as the default for
1752
+ <i>match_limit</i>. You can override the default by suppling <b>pcre_exec()</b>
1753
+ with a <b>pcre_extra</b> block in which <i>match_limit_recursion</i> is set, and
1754
+ PCRE_EXTRA_MATCH_LIMIT_RECURSION is set in the <i>flags</i> field. If the limit
1755
+ is exceeded, <b>pcre_exec()</b> returns PCRE_ERROR_RECURSIONLIMIT.
1756
+ </P>
1757
+ <P>
1758
+ A value for the recursion limit may also be supplied by an item at the start of
1759
+ a pattern of the form
1760
+ <pre>
1761
+ (*LIMIT_RECURSION=d)
1762
+ </pre>
1763
+ where d is a decimal number. However, such a setting is ignored unless d is
1764
+ less than the limit set by the caller of <b>pcre_exec()</b> or, if no such limit
1765
+ is set, less than the default.
1766
+ </P>
1767
+ <P>
1768
+ The <i>callout_data</i> field is used in conjunction with the "callout" feature,
1769
+ and is described in the
1770
+ <a href="pcrecallout.html"><b>pcrecallout</b></a>
1771
+ documentation.
1772
+ </P>
1773
+ <P>
1774
+ The <i>tables</i> field is provided for use with patterns that have been
1775
+ pre-compiled using custom character tables, saved to disc or elsewhere, and
1776
+ then reloaded, because the tables that were used to compile a pattern are not
1777
+ saved with it. See the
1778
+ <a href="pcreprecompile.html"><b>pcreprecompile</b></a>
1779
+ documentation for a discussion of saving compiled patterns for later use. If
1780
+ NULL is passed using this mechanism, it forces PCRE's internal tables to be
1781
+ used.
1782
+ </P>
1783
+ <P>
1784
+ <b>Warning:</b> The tables that <b>pcre_exec()</b> uses must be the same as those
1785
+ that were used when the pattern was compiled. If this is not the case, the
1786
+ behaviour of <b>pcre_exec()</b> is undefined. Therefore, when a pattern is
1787
+ compiled and matched in the same process, this field should never be set. In
1788
+ this (the most common) case, the correct table pointer is automatically passed
1789
+ with the compiled pattern from <b>pcre_compile()</b> to <b>pcre_exec()</b>.
1790
+ </P>
1791
+ <P>
1792
+ If PCRE_EXTRA_MARK is set in the <i>flags</i> field, the <i>mark</i> field must
1793
+ be set to point to a suitable variable. If the pattern contains any
1794
+ backtracking control verbs such as (*MARK:NAME), and the execution ends up with
1795
+ a name to pass back, a pointer to the name string (zero terminated) is placed
1796
+ in the variable pointed to by the <i>mark</i> field. The names are within the
1797
+ compiled pattern; if you wish to retain such a name you must copy it before
1798
+ freeing the memory of a compiled pattern. If there is no name to pass back, the
1799
+ variable pointed to by the <i>mark</i> field is set to NULL. For details of the
1800
+ backtracking control verbs, see the section entitled
1801
+ <a href="pcrepattern#backtrackcontrol">"Backtracking control"</a>
1802
+ in the
1803
+ <a href="pcrepattern.html"><b>pcrepattern</b></a>
1804
+ documentation.
1805
+ <a name="execoptions"></a></P>
1806
+ <br><b>
1807
+ Option bits for <b>pcre_exec()</b>
1808
+ </b><br>
1809
+ <P>
1810
+ The unused bits of the <i>options</i> argument for <b>pcre_exec()</b> must be
1811
+ zero. The only bits that may be set are PCRE_ANCHORED, PCRE_NEWLINE_<i>xxx</i>,
1812
+ PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART,
1813
+ PCRE_NO_START_OPTIMIZE, PCRE_NO_UTF8_CHECK, PCRE_PARTIAL_HARD, and
1814
+ PCRE_PARTIAL_SOFT.
1815
+ </P>
1816
+ <P>
1817
+ If the pattern was successfully studied with one of the just-in-time (JIT)
1818
+ compile options, the only supported options for JIT execution are
1819
+ PCRE_NO_UTF8_CHECK, PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY,
1820
+ PCRE_NOTEMPTY_ATSTART, PCRE_PARTIAL_HARD, and PCRE_PARTIAL_SOFT. If an
1821
+ unsupported option is used, JIT execution is disabled and the normal
1822
+ interpretive code in <b>pcre_exec()</b> is run.
1823
+ <pre>
1824
+ PCRE_ANCHORED
1825
+ </pre>
1826
+ The PCRE_ANCHORED option limits <b>pcre_exec()</b> to matching at the first
1827
+ matching position. If a pattern was compiled with PCRE_ANCHORED, or turned out
1828
+ to be anchored by virtue of its contents, it cannot be made unachored at
1829
+ matching time.
1830
+ <pre>
1831
+ PCRE_BSR_ANYCRLF
1832
+ PCRE_BSR_UNICODE
1833
+ </pre>
1834
+ These options (which are mutually exclusive) control what the \R escape
1835
+ sequence matches. The choice is either to match only CR, LF, or CRLF, or to
1836
+ match any Unicode newline sequence. These options override the choice that was
1837
+ made or defaulted when the pattern was compiled.
1838
+ <pre>
1839
+ PCRE_NEWLINE_CR
1840
+ PCRE_NEWLINE_LF
1841
+ PCRE_NEWLINE_CRLF
1842
+ PCRE_NEWLINE_ANYCRLF
1843
+ PCRE_NEWLINE_ANY
1844
+ </pre>
1845
+ These options override the newline definition that was chosen or defaulted when
1846
+ the pattern was compiled. For details, see the description of
1847
+ <b>pcre_compile()</b> above. During matching, the newline choice affects the
1848
+ behaviour of the dot, circumflex, and dollar metacharacters. It may also alter
1849
+ the way the match position is advanced after a match failure for an unanchored
1850
+ pattern.
1851
+ </P>
1852
+ <P>
1853
+ When PCRE_NEWLINE_CRLF, PCRE_NEWLINE_ANYCRLF, or PCRE_NEWLINE_ANY is set, and a
1854
+ match attempt for an unanchored pattern fails when the current position is at a
1855
+ CRLF sequence, and the pattern contains no explicit matches for CR or LF
1856
+ characters, the match position is advanced by two characters instead of one, in
1857
+ other words, to after the CRLF.
1858
+ </P>
1859
+ <P>
1860
+ The above rule is a compromise that makes the most common cases work as
1861
+ expected. For example, if the pattern is .+A (and the PCRE_DOTALL option is not
1862
+ set), it does not match the string "\r\nA" because, after failing at the
1863
+ start, it skips both the CR and the LF before retrying. However, the pattern
1864
+ [\r\n]A does match that string, because it contains an explicit CR or LF
1865
+ reference, and so advances only by one character after the first failure.
1866
+ </P>
1867
+ <P>
1868
+ An explicit match for CR of LF is either a literal appearance of one of those
1869
+ characters, or one of the \r or \n escape sequences. Implicit matches such as
1870
+ [^X] do not count, nor does \s (which includes CR and LF in the characters
1871
+ that it matches).
1872
+ </P>
1873
+ <P>
1874
+ Notwithstanding the above, anomalous effects may still occur when CRLF is a
1875
+ valid newline sequence and explicit \r or \n escapes appear in the pattern.
1876
+ <pre>
1877
+ PCRE_NOTBOL
1878
+ </pre>
1879
+ This option specifies that first character of the subject string is not the
1880
+ beginning of a line, so the circumflex metacharacter should not match before
1881
+ it. Setting this without PCRE_MULTILINE (at compile time) causes circumflex
1882
+ never to match. This option affects only the behaviour of the circumflex
1883
+ metacharacter. It does not affect \A.
1884
+ <pre>
1885
+ PCRE_NOTEOL
1886
+ </pre>
1887
+ This option specifies that the end of the subject string is not the end of a
1888
+ line, so the dollar metacharacter should not match it nor (except in multiline
1889
+ mode) a newline immediately before it. Setting this without PCRE_MULTILINE (at
1890
+ compile time) causes dollar never to match. This option affects only the
1891
+ behaviour of the dollar metacharacter. It does not affect \Z or \z.
1892
+ <pre>
1893
+ PCRE_NOTEMPTY
1894
+ </pre>
1895
+ An empty string is not considered to be a valid match if this option is set. If
1896
+ there are alternatives in the pattern, they are tried. If all the alternatives
1897
+ match the empty string, the entire match fails. For example, if the pattern
1898
+ <pre>
1899
+ a?b?
1900
+ </pre>
1901
+ is applied to a string not beginning with "a" or "b", it matches an empty
1902
+ string at the start of the subject. With PCRE_NOTEMPTY set, this match is not
1903
+ valid, so PCRE searches further into the string for occurrences of "a" or "b".
1904
+ <pre>
1905
+ PCRE_NOTEMPTY_ATSTART
1906
+ </pre>
1907
+ This is like PCRE_NOTEMPTY, except that an empty string match that is not at
1908
+ the start of the subject is permitted. If the pattern is anchored, such a match
1909
+ can occur only if the pattern contains \K.
1910
+ </P>
1911
+ <P>
1912
+ Perl has no direct equivalent of PCRE_NOTEMPTY or PCRE_NOTEMPTY_ATSTART, but it
1913
+ does make a special case of a pattern match of the empty string within its
1914
+ <b>split()</b> function, and when using the /g modifier. It is possible to
1915
+ emulate Perl's behaviour after matching a null string by first trying the match
1916
+ again at the same offset with PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED, and then
1917
+ if that fails, by advancing the starting offset (see below) and trying an
1918
+ ordinary match again. There is some code that demonstrates how to do this in
1919
+ the
1920
+ <a href="pcredemo.html"><b>pcredemo</b></a>
1921
+ sample program. In the most general case, you have to check to see if the
1922
+ newline convention recognizes CRLF as a newline, and if so, and the current
1923
+ character is CR followed by LF, advance the starting offset by two characters
1924
+ instead of one.
1925
+ <pre>
1926
+ PCRE_NO_START_OPTIMIZE
1927
+ </pre>
1928
+ There are a number of optimizations that <b>pcre_exec()</b> uses at the start of
1929
+ a match, in order to speed up the process. For example, if it is known that an
1930
+ unanchored match must start with a specific character, it searches the subject
1931
+ for that character, and fails immediately if it cannot find it, without
1932
+ actually running the main matching function. This means that a special item
1933
+ such as (*COMMIT) at the start of a pattern is not considered until after a
1934
+ suitable starting point for the match has been found. Also, when callouts or
1935
+ (*MARK) items are in use, these "start-up" optimizations can cause them to be
1936
+ skipped if the pattern is never actually used. The start-up optimizations are
1937
+ in effect a pre-scan of the subject that takes place before the pattern is run.
1938
+ </P>
1939
+ <P>
1940
+ The PCRE_NO_START_OPTIMIZE option disables the start-up optimizations, possibly
1941
+ causing performance to suffer, but ensuring that in cases where the result is
1942
+ "no match", the callouts do occur, and that items such as (*COMMIT) and (*MARK)
1943
+ are considered at every possible starting position in the subject string. If
1944
+ PCRE_NO_START_OPTIMIZE is set at compile time, it cannot be unset at matching
1945
+ time. The use of PCRE_NO_START_OPTIMIZE at matching time (that is, passing it
1946
+ to <b>pcre_exec()</b>) disables JIT execution; in this situation, matching is
1947
+ always done using interpretively.
1948
+ </P>
1949
+ <P>
1950
+ Setting PCRE_NO_START_OPTIMIZE can change the outcome of a matching operation.
1951
+ Consider the pattern
1952
+ <pre>
1953
+ (*COMMIT)ABC
1954
+ </pre>
1955
+ When this is compiled, PCRE records the fact that a match must start with the
1956
+ character "A". Suppose the subject string is "DEFABC". The start-up
1957
+ optimization scans along the subject, finds "A" and runs the first match
1958
+ attempt from there. The (*COMMIT) item means that the pattern must match the
1959
+ current starting position, which in this case, it does. However, if the same
1960
+ match is run with PCRE_NO_START_OPTIMIZE set, the initial scan along the
1961
+ subject string does not happen. The first match attempt is run starting from
1962
+ "D" and when this fails, (*COMMIT) prevents any further matches being tried, so
1963
+ the overall result is "no match". If the pattern is studied, more start-up
1964
+ optimizations may be used. For example, a minimum length for the subject may be
1965
+ recorded. Consider the pattern
1966
+ <pre>
1967
+ (*MARK:A)(X|Y)
1968
+ </pre>
1969
+ The minimum length for a match is one character. If the subject is "ABC", there
1970
+ will be attempts to match "ABC", "BC", "C", and then finally an empty string.
1971
+ If the pattern is studied, the final attempt does not take place, because PCRE
1972
+ knows that the subject is too short, and so the (*MARK) is never encountered.
1973
+ In this case, studying the pattern does not affect the overall match result,
1974
+ which is still "no match", but it does affect the auxiliary information that is
1975
+ returned.
1976
+ <pre>
1977
+ PCRE_NO_UTF8_CHECK
1978
+ </pre>
1979
+ When PCRE_UTF8 is set at compile time, the validity of the subject as a UTF-8
1980
+ string is automatically checked when <b>pcre_exec()</b> is subsequently called.
1981
+ The entire string is checked before any other processing takes place. The value
1982
+ of <i>startoffset</i> is also checked to ensure that it points to the start of a
1983
+ UTF-8 character. There is a discussion about the
1984
+ <a href="pcreunicode.html#utf8strings">validity of UTF-8 strings</a>
1985
+ in the
1986
+ <a href="pcreunicode.html"><b>pcreunicode</b></a>
1987
+ page. If an invalid sequence of bytes is found, <b>pcre_exec()</b> returns the
1988
+ error PCRE_ERROR_BADUTF8 or, if PCRE_PARTIAL_HARD is set and the problem is a
1989
+ truncated character at the end of the subject, PCRE_ERROR_SHORTUTF8. In both
1990
+ cases, information about the precise nature of the error may also be returned
1991
+ (see the descriptions of these errors in the section entitled \fIError return
1992
+ values from\fP <b>pcre_exec()</b>
1993
+ <a href="#errorlist">below).</a>
1994
+ If <i>startoffset</i> contains a value that does not point to the start of a
1995
+ UTF-8 character (or to the end of the subject), PCRE_ERROR_BADUTF8_OFFSET is
1996
+ returned.
1997
+ </P>
1998
+ <P>
1999
+ If you already know that your subject is valid, and you want to skip these
2000
+ checks for performance reasons, you can set the PCRE_NO_UTF8_CHECK option when
2001
+ calling <b>pcre_exec()</b>. You might want to do this for the second and
2002
+ subsequent calls to <b>pcre_exec()</b> if you are making repeated calls to find
2003
+ all the matches in a single subject string. However, you should be sure that
2004
+ the value of <i>startoffset</i> points to the start of a character (or the end
2005
+ of the subject). When PCRE_NO_UTF8_CHECK is set, the effect of passing an
2006
+ invalid string as a subject or an invalid value of <i>startoffset</i> is
2007
+ undefined. Your program may crash or loop.
2008
+ <pre>
2009
+ PCRE_PARTIAL_HARD
2010
+ PCRE_PARTIAL_SOFT
2011
+ </pre>
2012
+ These options turn on the partial matching feature. For backwards
2013
+ compatibility, PCRE_PARTIAL is a synonym for PCRE_PARTIAL_SOFT. A partial match
2014
+ occurs if the end of the subject string is reached successfully, but there are
2015
+ not enough subject characters to complete the match. If this happens when
2016
+ PCRE_PARTIAL_SOFT (but not PCRE_PARTIAL_HARD) is set, matching continues by
2017
+ testing any remaining alternatives. Only if no complete match can be found is
2018
+ PCRE_ERROR_PARTIAL returned instead of PCRE_ERROR_NOMATCH. In other words,
2019
+ PCRE_PARTIAL_SOFT says that the caller is prepared to handle a partial match,
2020
+ but only if no complete match can be found.
2021
+ </P>
2022
+ <P>
2023
+ If PCRE_PARTIAL_HARD is set, it overrides PCRE_PARTIAL_SOFT. In this case, if a
2024
+ partial match is found, <b>pcre_exec()</b> immediately returns
2025
+ PCRE_ERROR_PARTIAL, without considering any other alternatives. In other words,
2026
+ when PCRE_PARTIAL_HARD is set, a partial match is considered to be more
2027
+ important that an alternative complete match.
2028
+ </P>
2029
+ <P>
2030
+ In both cases, the portion of the string that was inspected when the partial
2031
+ match was found is set as the first matching string. There is a more detailed
2032
+ discussion of partial and multi-segment matching, with examples, in the
2033
+ <a href="pcrepartial.html"><b>pcrepartial</b></a>
2034
+ documentation.
2035
+ </P>
2036
+ <br><b>
2037
+ The string to be matched by <b>pcre_exec()</b>
2038
+ </b><br>
2039
+ <P>
2040
+ The subject string is passed to <b>pcre_exec()</b> as a pointer in
2041
+ <i>subject</i>, a length in <i>length</i>, and a starting offset in
2042
+ <i>startoffset</i>. The units for <i>length</i> and <i>startoffset</i> are bytes
2043
+ for the 8-bit library, 16-bit data items for the 16-bit library, and 32-bit
2044
+ data items for the 32-bit library.
2045
+ </P>
2046
+ <P>
2047
+ If <i>startoffset</i> is negative or greater than the length of the subject,
2048
+ <b>pcre_exec()</b> returns PCRE_ERROR_BADOFFSET. When the starting offset is
2049
+ zero, the search for a match starts at the beginning of the subject, and this
2050
+ is by far the most common case. In UTF-8 or UTF-16 mode, the offset must point
2051
+ to the start of a character, or the end of the subject (in UTF-32 mode, one
2052
+ data unit equals one character, so all offsets are valid). Unlike the pattern
2053
+ string, the subject may contain binary zeroes.
2054
+ </P>
2055
+ <P>
2056
+ A non-zero starting offset is useful when searching for another match in the
2057
+ same subject by calling <b>pcre_exec()</b> again after a previous success.
2058
+ Setting <i>startoffset</i> differs from just passing over a shortened string and
2059
+ setting PCRE_NOTBOL in the case of a pattern that begins with any kind of
2060
+ lookbehind. For example, consider the pattern
2061
+ <pre>
2062
+ \Biss\B
2063
+ </pre>
2064
+ which finds occurrences of "iss" in the middle of words. (\B matches only if
2065
+ the current position in the subject is not a word boundary.) When applied to
2066
+ the string "Mississipi" the first call to <b>pcre_exec()</b> finds the first
2067
+ occurrence. If <b>pcre_exec()</b> is called again with just the remainder of the
2068
+ subject, namely "issipi", it does not match, because \B is always false at the
2069
+ start of the subject, which is deemed to be a word boundary. However, if
2070
+ <b>pcre_exec()</b> is passed the entire string again, but with <i>startoffset</i>
2071
+ set to 4, it finds the second occurrence of "iss" because it is able to look
2072
+ behind the starting point to discover that it is preceded by a letter.
2073
+ </P>
2074
+ <P>
2075
+ Finding all the matches in a subject is tricky when the pattern can match an
2076
+ empty string. It is possible to emulate Perl's /g behaviour by first trying the
2077
+ match again at the same offset, with the PCRE_NOTEMPTY_ATSTART and
2078
+ PCRE_ANCHORED options, and then if that fails, advancing the starting offset
2079
+ and trying an ordinary match again. There is some code that demonstrates how to
2080
+ do this in the
2081
+ <a href="pcredemo.html"><b>pcredemo</b></a>
2082
+ sample program. In the most general case, you have to check to see if the
2083
+ newline convention recognizes CRLF as a newline, and if so, and the current
2084
+ character is CR followed by LF, advance the starting offset by two characters
2085
+ instead of one.
2086
+ </P>
2087
+ <P>
2088
+ If a non-zero starting offset is passed when the pattern is anchored, one
2089
+ attempt to match at the given offset is made. This can only succeed if the
2090
+ pattern does not require the match to be at the start of the subject.
2091
+ </P>
2092
+ <br><b>
2093
+ How <b>pcre_exec()</b> returns captured substrings
2094
+ </b><br>
2095
+ <P>
2096
+ In general, a pattern matches a certain portion of the subject, and in
2097
+ addition, further substrings from the subject may be picked out by parts of the
2098
+ pattern. Following the usage in Jeffrey Friedl's book, this is called
2099
+ "capturing" in what follows, and the phrase "capturing subpattern" is used for
2100
+ a fragment of a pattern that picks out a substring. PCRE supports several other
2101
+ kinds of parenthesized subpattern that do not cause substrings to be captured.
2102
+ </P>
2103
+ <P>
2104
+ Captured substrings are returned to the caller via a vector of integers whose
2105
+ address is passed in <i>ovector</i>. The number of elements in the vector is
2106
+ passed in <i>ovecsize</i>, which must be a non-negative number. <b>Note</b>: this
2107
+ argument is NOT the size of <i>ovector</i> in bytes.
2108
+ </P>
2109
+ <P>
2110
+ The first two-thirds of the vector is used to pass back captured substrings,
2111
+ each substring using a pair of integers. The remaining third of the vector is
2112
+ used as workspace by <b>pcre_exec()</b> while matching capturing subpatterns,
2113
+ and is not available for passing back information. The number passed in
2114
+ <i>ovecsize</i> should always be a multiple of three. If it is not, it is
2115
+ rounded down.
2116
+ </P>
2117
+ <P>
2118
+ When a match is successful, information about captured substrings is returned
2119
+ in pairs of integers, starting at the beginning of <i>ovector</i>, and
2120
+ continuing up to two-thirds of its length at the most. The first element of
2121
+ each pair is set to the offset of the first character in a substring, and the
2122
+ second is set to the offset of the first character after the end of a
2123
+ substring. These values are always data unit offsets, even in UTF mode. They
2124
+ are byte offsets in the 8-bit library, 16-bit data item offsets in the 16-bit
2125
+ library, and 32-bit data item offsets in the 32-bit library. <b>Note</b>: they
2126
+ are not character counts.
2127
+ </P>
2128
+ <P>
2129
+ The first pair of integers, <i>ovector[0]</i> and <i>ovector[1]</i>, identify the
2130
+ portion of the subject string matched by the entire pattern. The next pair is
2131
+ used for the first capturing subpattern, and so on. The value returned by
2132
+ <b>pcre_exec()</b> is one more than the highest numbered pair that has been set.
2133
+ For example, if two substrings have been captured, the returned value is 3. If
2134
+ there are no capturing subpatterns, the return value from a successful match is
2135
+ 1, indicating that just the first pair of offsets has been set.
2136
+ </P>
2137
+ <P>
2138
+ If a capturing subpattern is matched repeatedly, it is the last portion of the
2139
+ string that it matched that is returned.
2140
+ </P>
2141
+ <P>
2142
+ If the vector is too small to hold all the captured substring offsets, it is
2143
+ used as far as possible (up to two-thirds of its length), and the function
2144
+ returns a value of zero. If neither the actual string matched nor any captured
2145
+ substrings are of interest, <b>pcre_exec()</b> may be called with <i>ovector</i>
2146
+ passed as NULL and <i>ovecsize</i> as zero. However, if the pattern contains
2147
+ back references and the <i>ovector</i> is not big enough to remember the related
2148
+ substrings, PCRE has to get additional memory for use during matching. Thus it
2149
+ is usually advisable to supply an <i>ovector</i> of reasonable size.
2150
+ </P>
2151
+ <P>
2152
+ There are some cases where zero is returned (indicating vector overflow) when
2153
+ in fact the vector is exactly the right size for the final match. For example,
2154
+ consider the pattern
2155
+ <pre>
2156
+ (a)(?:(b)c|bd)
2157
+ </pre>
2158
+ If a vector of 6 elements (allowing for only 1 captured substring) is given
2159
+ with subject string "abd", <b>pcre_exec()</b> will try to set the second
2160
+ captured string, thereby recording a vector overflow, before failing to match
2161
+ "c" and backing up to try the second alternative. The zero return, however,
2162
+ does correctly indicate that the maximum number of slots (namely 2) have been
2163
+ filled. In similar cases where there is temporary overflow, but the final
2164
+ number of used slots is actually less than the maximum, a non-zero value is
2165
+ returned.
2166
+ </P>
2167
+ <P>
2168
+ The <b>pcre_fullinfo()</b> function can be used to find out how many capturing
2169
+ subpatterns there are in a compiled pattern. The smallest size for
2170
+ <i>ovector</i> that will allow for <i>n</i> captured substrings, in addition to
2171
+ the offsets of the substring matched by the whole pattern, is (<i>n</i>+1)*3.
2172
+ </P>
2173
+ <P>
2174
+ It is possible for capturing subpattern number <i>n+1</i> to match some part of
2175
+ the subject when subpattern <i>n</i> has not been used at all. For example, if
2176
+ the string "abc" is matched against the pattern (a|(z))(bc) the return from the
2177
+ function is 4, and subpatterns 1 and 3 are matched, but 2 is not. When this
2178
+ happens, both values in the offset pairs corresponding to unused subpatterns
2179
+ are set to -1.
2180
+ </P>
2181
+ <P>
2182
+ Offset values that correspond to unused subpatterns at the end of the
2183
+ expression are also set to -1. For example, if the string "abc" is matched
2184
+ against the pattern (abc)(x(yz)?)? subpatterns 2 and 3 are not matched. The
2185
+ return from the function is 2, because the highest used capturing subpattern
2186
+ number is 1, and the offsets for for the second and third capturing subpatterns
2187
+ (assuming the vector is large enough, of course) are set to -1.
2188
+ </P>
2189
+ <P>
2190
+ <b>Note</b>: Elements in the first two-thirds of <i>ovector</i> that do not
2191
+ correspond to capturing parentheses in the pattern are never changed. That is,
2192
+ if a pattern contains <i>n</i> capturing parentheses, no more than
2193
+ <i>ovector[0]</i> to <i>ovector[2n+1]</i> are set by <b>pcre_exec()</b>. The other
2194
+ elements (in the first two-thirds) retain whatever values they previously had.
2195
+ </P>
2196
+ <P>
2197
+ Some convenience functions are provided for extracting the captured substrings
2198
+ as separate strings. These are described below.
2199
+ <a name="errorlist"></a></P>
2200
+ <br><b>
2201
+ Error return values from <b>pcre_exec()</b>
2202
+ </b><br>
2203
+ <P>
2204
+ If <b>pcre_exec()</b> fails, it returns a negative number. The following are
2205
+ defined in the header file:
2206
+ <pre>
2207
+ PCRE_ERROR_NOMATCH (-1)
2208
+ </pre>
2209
+ The subject string did not match the pattern.
2210
+ <pre>
2211
+ PCRE_ERROR_NULL (-2)
2212
+ </pre>
2213
+ Either <i>code</i> or <i>subject</i> was passed as NULL, or <i>ovector</i> was
2214
+ NULL and <i>ovecsize</i> was not zero.
2215
+ <pre>
2216
+ PCRE_ERROR_BADOPTION (-3)
2217
+ </pre>
2218
+ An unrecognized bit was set in the <i>options</i> argument.
2219
+ <pre>
2220
+ PCRE_ERROR_BADMAGIC (-4)
2221
+ </pre>
2222
+ PCRE stores a 4-byte "magic number" at the start of the compiled code, to catch
2223
+ the case when it is passed a junk pointer and to detect when a pattern that was
2224
+ compiled in an environment of one endianness is run in an environment with the
2225
+ other endianness. This is the error that PCRE gives when the magic number is
2226
+ not present.
2227
+ <pre>
2228
+ PCRE_ERROR_UNKNOWN_OPCODE (-5)
2229
+ </pre>
2230
+ While running the pattern match, an unknown item was encountered in the
2231
+ compiled pattern. This error could be caused by a bug in PCRE or by overwriting
2232
+ of the compiled pattern.
2233
+ <pre>
2234
+ PCRE_ERROR_NOMEMORY (-6)
2235
+ </pre>
2236
+ If a pattern contains back references, but the <i>ovector</i> that is passed to
2237
+ <b>pcre_exec()</b> is not big enough to remember the referenced substrings, PCRE
2238
+ gets a block of memory at the start of matching to use for this purpose. If the
2239
+ call via <b>pcre_malloc()</b> fails, this error is given. The memory is
2240
+ automatically freed at the end of matching.
2241
+ </P>
2242
+ <P>
2243
+ This error is also given if <b>pcre_stack_malloc()</b> fails in
2244
+ <b>pcre_exec()</b>. This can happen only when PCRE has been compiled with
2245
+ <b>--disable-stack-for-recursion</b>.
2246
+ <pre>
2247
+ PCRE_ERROR_NOSUBSTRING (-7)
2248
+ </pre>
2249
+ This error is used by the <b>pcre_copy_substring()</b>,
2250
+ <b>pcre_get_substring()</b>, and <b>pcre_get_substring_list()</b> functions (see
2251
+ below). It is never returned by <b>pcre_exec()</b>.
2252
+ <pre>
2253
+ PCRE_ERROR_MATCHLIMIT (-8)
2254
+ </pre>
2255
+ The backtracking limit, as specified by the <i>match_limit</i> field in a
2256
+ <b>pcre_extra</b> structure (or defaulted) was reached. See the description
2257
+ above.
2258
+ <pre>
2259
+ PCRE_ERROR_CALLOUT (-9)
2260
+ </pre>
2261
+ This error is never generated by <b>pcre_exec()</b> itself. It is provided for
2262
+ use by callout functions that want to yield a distinctive error code. See the
2263
+ <a href="pcrecallout.html"><b>pcrecallout</b></a>
2264
+ documentation for details.
2265
+ <pre>
2266
+ PCRE_ERROR_BADUTF8 (-10)
2267
+ </pre>
2268
+ A string that contains an invalid UTF-8 byte sequence was passed as a subject,
2269
+ and the PCRE_NO_UTF8_CHECK option was not set. If the size of the output vector
2270
+ (<i>ovecsize</i>) is at least 2, the byte offset to the start of the the invalid
2271
+ UTF-8 character is placed in the first element, and a reason code is placed in
2272
+ the second element. The reason codes are listed in the
2273
+ <a href="#badutf8reasons">following section.</a>
2274
+ For backward compatibility, if PCRE_PARTIAL_HARD is set and the problem is a
2275
+ truncated UTF-8 character at the end of the subject (reason codes 1 to 5),
2276
+ PCRE_ERROR_SHORTUTF8 is returned instead of PCRE_ERROR_BADUTF8.
2277
+ <pre>
2278
+ PCRE_ERROR_BADUTF8_OFFSET (-11)
2279
+ </pre>
2280
+ The UTF-8 byte sequence that was passed as a subject was checked and found to
2281
+ be valid (the PCRE_NO_UTF8_CHECK option was not set), but the value of
2282
+ <i>startoffset</i> did not point to the beginning of a UTF-8 character or the
2283
+ end of the subject.
2284
+ <pre>
2285
+ PCRE_ERROR_PARTIAL (-12)
2286
+ </pre>
2287
+ The subject string did not match, but it did match partially. See the
2288
+ <a href="pcrepartial.html"><b>pcrepartial</b></a>
2289
+ documentation for details of partial matching.
2290
+ <pre>
2291
+ PCRE_ERROR_BADPARTIAL (-13)
2292
+ </pre>
2293
+ This code is no longer in use. It was formerly returned when the PCRE_PARTIAL
2294
+ option was used with a compiled pattern containing items that were not
2295
+ supported for partial matching. From release 8.00 onwards, there are no
2296
+ restrictions on partial matching.
2297
+ <pre>
2298
+ PCRE_ERROR_INTERNAL (-14)
2299
+ </pre>
2300
+ An unexpected internal error has occurred. This error could be caused by a bug
2301
+ in PCRE or by overwriting of the compiled pattern.
2302
+ <pre>
2303
+ PCRE_ERROR_BADCOUNT (-15)
2304
+ </pre>
2305
+ This error is given if the value of the <i>ovecsize</i> argument is negative.
2306
+ <pre>
2307
+ PCRE_ERROR_RECURSIONLIMIT (-21)
2308
+ </pre>
2309
+ The internal recursion limit, as specified by the <i>match_limit_recursion</i>
2310
+ field in a <b>pcre_extra</b> structure (or defaulted) was reached. See the
2311
+ description above.
2312
+ <pre>
2313
+ PCRE_ERROR_BADNEWLINE (-23)
2314
+ </pre>
2315
+ An invalid combination of PCRE_NEWLINE_<i>xxx</i> options was given.
2316
+ <pre>
2317
+ PCRE_ERROR_BADOFFSET (-24)
2318
+ </pre>
2319
+ The value of <i>startoffset</i> was negative or greater than the length of the
2320
+ subject, that is, the value in <i>length</i>.
2321
+ <pre>
2322
+ PCRE_ERROR_SHORTUTF8 (-25)
2323
+ </pre>
2324
+ This error is returned instead of PCRE_ERROR_BADUTF8 when the subject string
2325
+ ends with a truncated UTF-8 character and the PCRE_PARTIAL_HARD option is set.
2326
+ Information about the failure is returned as for PCRE_ERROR_BADUTF8. It is in
2327
+ fact sufficient to detect this case, but this special error code for
2328
+ PCRE_PARTIAL_HARD precedes the implementation of returned information; it is
2329
+ retained for backwards compatibility.
2330
+ <pre>
2331
+ PCRE_ERROR_RECURSELOOP (-26)
2332
+ </pre>
2333
+ This error is returned when <b>pcre_exec()</b> detects a recursion loop within
2334
+ the pattern. Specifically, it means that either the whole pattern or a
2335
+ subpattern has been called recursively for the second time at the same position
2336
+ in the subject string. Some simple patterns that might do this are detected and
2337
+ faulted at compile time, but more complicated cases, in particular mutual
2338
+ recursions between two different subpatterns, cannot be detected until run
2339
+ time.
2340
+ <pre>
2341
+ PCRE_ERROR_JIT_STACKLIMIT (-27)
2342
+ </pre>
2343
+ This error is returned when a pattern that was successfully studied using a
2344
+ JIT compile option is being matched, but the memory available for the
2345
+ just-in-time processing stack is not large enough. See the
2346
+ <a href="pcrejit.html"><b>pcrejit</b></a>
2347
+ documentation for more details.
2348
+ <pre>
2349
+ PCRE_ERROR_BADMODE (-28)
2350
+ </pre>
2351
+ This error is given if a pattern that was compiled by the 8-bit library is
2352
+ passed to a 16-bit or 32-bit library function, or vice versa.
2353
+ <pre>
2354
+ PCRE_ERROR_BADENDIANNESS (-29)
2355
+ </pre>
2356
+ This error is given if a pattern that was compiled and saved is reloaded on a
2357
+ host with different endianness. The utility function
2358
+ <b>pcre_pattern_to_host_byte_order()</b> can be used to convert such a pattern
2359
+ so that it runs on the new host.
2360
+ <pre>
2361
+ PCRE_ERROR_JIT_BADOPTION
2362
+ </pre>
2363
+ This error is returned when a pattern that was successfully studied using a JIT
2364
+ compile option is being matched, but the matching mode (partial or complete
2365
+ match) does not correspond to any JIT compilation mode. When the JIT fast path
2366
+ function is used, this error may be also given for invalid options. See the
2367
+ <a href="pcrejit.html"><b>pcrejit</b></a>
2368
+ documentation for more details.
2369
+ <pre>
2370
+ PCRE_ERROR_BADLENGTH (-32)
2371
+ </pre>
2372
+ This error is given if <b>pcre_exec()</b> is called with a negative value for
2373
+ the <i>length</i> argument.
2374
+ </P>
2375
+ <P>
2376
+ Error numbers -16 to -20, -22, and 30 are not used by <b>pcre_exec()</b>.
2377
+ <a name="badutf8reasons"></a></P>
2378
+ <br><b>
2379
+ Reason codes for invalid UTF-8 strings
2380
+ </b><br>
2381
+ <P>
2382
+ This section applies only to the 8-bit library. The corresponding information
2383
+ for the 16-bit and 32-bit libraries is given in the
2384
+ <a href="pcre16.html"><b>pcre16</b></a>
2385
+ and
2386
+ <a href="pcre32.html"><b>pcre32</b></a>
2387
+ pages.
2388
+ </P>
2389
+ <P>
2390
+ When <b>pcre_exec()</b> returns either PCRE_ERROR_BADUTF8 or
2391
+ PCRE_ERROR_SHORTUTF8, and the size of the output vector (<i>ovecsize</i>) is at
2392
+ least 2, the offset of the start of the invalid UTF-8 character is placed in
2393
+ the first output vector element (<i>ovector[0]</i>) and a reason code is placed
2394
+ in the second element (<i>ovector[1]</i>). The reason codes are given names in
2395
+ the <b>pcre.h</b> header file:
2396
+ <pre>
2397
+ PCRE_UTF8_ERR1
2398
+ PCRE_UTF8_ERR2
2399
+ PCRE_UTF8_ERR3
2400
+ PCRE_UTF8_ERR4
2401
+ PCRE_UTF8_ERR5
2402
+ </pre>
2403
+ The string ends with a truncated UTF-8 character; the code specifies how many
2404
+ bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8 characters to be
2405
+ no longer than 4 bytes, the encoding scheme (originally defined by RFC 2279)
2406
+ allows for up to 6 bytes, and this is checked first; hence the possibility of
2407
+ 4 or 5 missing bytes.
2408
+ <pre>
2409
+ PCRE_UTF8_ERR6
2410
+ PCRE_UTF8_ERR7
2411
+ PCRE_UTF8_ERR8
2412
+ PCRE_UTF8_ERR9
2413
+ PCRE_UTF8_ERR10
2414
+ </pre>
2415
+ The two most significant bits of the 2nd, 3rd, 4th, 5th, or 6th byte of the
2416
+ character do not have the binary value 0b10 (that is, either the most
2417
+ significant bit is 0, or the next bit is 1).
2418
+ <pre>
2419
+ PCRE_UTF8_ERR11
2420
+ PCRE_UTF8_ERR12
2421
+ </pre>
2422
+ A character that is valid by the RFC 2279 rules is either 5 or 6 bytes long;
2423
+ these code points are excluded by RFC 3629.
2424
+ <pre>
2425
+ PCRE_UTF8_ERR13
2426
+ </pre>
2427
+ A 4-byte character has a value greater than 0x10fff; these code points are
2428
+ excluded by RFC 3629.
2429
+ <pre>
2430
+ PCRE_UTF8_ERR14
2431
+ </pre>
2432
+ A 3-byte character has a value in the range 0xd800 to 0xdfff; this range of
2433
+ code points are reserved by RFC 3629 for use with UTF-16, and so are excluded
2434
+ from UTF-8.
2435
+ <pre>
2436
+ PCRE_UTF8_ERR15
2437
+ PCRE_UTF8_ERR16
2438
+ PCRE_UTF8_ERR17
2439
+ PCRE_UTF8_ERR18
2440
+ PCRE_UTF8_ERR19
2441
+ </pre>
2442
+ A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes for a
2443
+ value that can be represented by fewer bytes, which is invalid. For example,
2444
+ the two bytes 0xc0, 0xae give the value 0x2e, whose correct coding uses just
2445
+ one byte.
2446
+ <pre>
2447
+ PCRE_UTF8_ERR20
2448
+ </pre>
2449
+ The two most significant bits of the first byte of a character have the binary
2450
+ value 0b10 (that is, the most significant bit is 1 and the second is 0). Such a
2451
+ byte can only validly occur as the second or subsequent byte of a multi-byte
2452
+ character.
2453
+ <pre>
2454
+ PCRE_UTF8_ERR21
2455
+ </pre>
2456
+ The first byte of a character has the value 0xfe or 0xff. These values can
2457
+ never occur in a valid UTF-8 string.
2458
+ <pre>
2459
+ PCRE_UTF8_ERR22
2460
+ </pre>
2461
+ This error code was formerly used when the presence of a so-called
2462
+ "non-character" caused an error. Unicode corrigendum #9 makes it clear that
2463
+ such characters should not cause a string to be rejected, and so this code is
2464
+ no longer in use and is never returned.
2465
+ </P>
2466
+ <br><a name="SEC18" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
2467
+ <P>
2468
+ <b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
2469
+ <b> int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
2470
+ <b> int <i>buffersize</i>);</b>
2471
+ <br>
2472
+ <br>
2473
+ <b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
2474
+ <b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
2475
+ <b> const char **<i>stringptr</i>);</b>
2476
+ <br>
2477
+ <br>
2478
+ <b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
2479
+ <b> int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
2480
+ </P>
2481
+ <P>
2482
+ Captured substrings can be accessed directly by using the offsets returned by
2483
+ <b>pcre_exec()</b> in <i>ovector</i>. For convenience, the functions
2484
+ <b>pcre_copy_substring()</b>, <b>pcre_get_substring()</b>, and
2485
+ <b>pcre_get_substring_list()</b> are provided for extracting captured substrings
2486
+ as new, separate, zero-terminated strings. These functions identify substrings
2487
+ by number. The next section describes functions for extracting named
2488
+ substrings.
2489
+ </P>
2490
+ <P>
2491
+ A substring that contains a binary zero is correctly extracted and has a
2492
+ further zero added on the end, but the result is not, of course, a C string.
2493
+ However, you can process such a string by referring to the length that is
2494
+ returned by <b>pcre_copy_substring()</b> and <b>pcre_get_substring()</b>.
2495
+ Unfortunately, the interface to <b>pcre_get_substring_list()</b> is not adequate
2496
+ for handling strings containing binary zeros, because the end of the final
2497
+ string is not independently indicated.
2498
+ </P>
2499
+ <P>
2500
+ The first three arguments are the same for all three of these functions:
2501
+ <i>subject</i> is the subject string that has just been successfully matched,
2502
+ <i>ovector</i> is a pointer to the vector of integer offsets that was passed to
2503
+ <b>pcre_exec()</b>, and <i>stringcount</i> is the number of substrings that were
2504
+ captured by the match, including the substring that matched the entire regular
2505
+ expression. This is the value returned by <b>pcre_exec()</b> if it is greater
2506
+ than zero. If <b>pcre_exec()</b> returned zero, indicating that it ran out of
2507
+ space in <i>ovector</i>, the value passed as <i>stringcount</i> should be the
2508
+ number of elements in the vector divided by three.
2509
+ </P>
2510
+ <P>
2511
+ The functions <b>pcre_copy_substring()</b> and <b>pcre_get_substring()</b>
2512
+ extract a single substring, whose number is given as <i>stringnumber</i>. A
2513
+ value of zero extracts the substring that matched the entire pattern, whereas
2514
+ higher values extract the captured substrings. For <b>pcre_copy_substring()</b>,
2515
+ the string is placed in <i>buffer</i>, whose length is given by
2516
+ <i>buffersize</i>, while for <b>pcre_get_substring()</b> a new block of memory is
2517
+ obtained via <b>pcre_malloc</b>, and its address is returned via
2518
+ <i>stringptr</i>. The yield of the function is the length of the string, not
2519
+ including the terminating zero, or one of these error codes:
2520
+ <pre>
2521
+ PCRE_ERROR_NOMEMORY (-6)
2522
+ </pre>
2523
+ The buffer was too small for <b>pcre_copy_substring()</b>, or the attempt to get
2524
+ memory failed for <b>pcre_get_substring()</b>.
2525
+ <pre>
2526
+ PCRE_ERROR_NOSUBSTRING (-7)
2527
+ </pre>
2528
+ There is no substring whose number is <i>stringnumber</i>.
2529
+ </P>
2530
+ <P>
2531
+ The <b>pcre_get_substring_list()</b> function extracts all available substrings
2532
+ and builds a list of pointers to them. All this is done in a single block of
2533
+ memory that is obtained via <b>pcre_malloc</b>. The address of the memory block
2534
+ is returned via <i>listptr</i>, which is also the start of the list of string
2535
+ pointers. The end of the list is marked by a NULL pointer. The yield of the
2536
+ function is zero if all went well, or the error code
2537
+ <pre>
2538
+ PCRE_ERROR_NOMEMORY (-6)
2539
+ </pre>
2540
+ if the attempt to get the memory block failed.
2541
+ </P>
2542
+ <P>
2543
+ When any of these functions encounter a substring that is unset, which can
2544
+ happen when capturing subpattern number <i>n+1</i> matches some part of the
2545
+ subject, but subpattern <i>n</i> has not been used at all, they return an empty
2546
+ string. This can be distinguished from a genuine zero-length substring by
2547
+ inspecting the appropriate offset in <i>ovector</i>, which is negative for unset
2548
+ substrings.
2549
+ </P>
2550
+ <P>
2551
+ The two convenience functions <b>pcre_free_substring()</b> and
2552
+ <b>pcre_free_substring_list()</b> can be used to free the memory returned by
2553
+ a previous call of <b>pcre_get_substring()</b> or
2554
+ <b>pcre_get_substring_list()</b>, respectively. They do nothing more than call
2555
+ the function pointed to by <b>pcre_free</b>, which of course could be called
2556
+ directly from a C program. However, PCRE is used in some situations where it is
2557
+ linked via a special interface to another programming language that cannot use
2558
+ <b>pcre_free</b> directly; it is for these cases that the functions are
2559
+ provided.
2560
+ </P>
2561
+ <br><a name="SEC19" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
2562
+ <P>
2563
+ <b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
2564
+ <b> const char *<i>name</i>);</b>
2565
+ <br>
2566
+ <br>
2567
+ <b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b>
2568
+ <b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
2569
+ <b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
2570
+ <b> char *<i>buffer</i>, int <i>buffersize</i>);</b>
2571
+ <br>
2572
+ <br>
2573
+ <b>int pcre_get_named_substring(const pcre *<i>code</i>,</b>
2574
+ <b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
2575
+ <b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
2576
+ <b> const char **<i>stringptr</i>);</b>
2577
+ </P>
2578
+ <P>
2579
+ To extract a substring by name, you first have to find associated number.
2580
+ For example, for this pattern
2581
+ <pre>
2582
+ (a+)b(?&#60;xxx&#62;\d+)...
2583
+ </pre>
2584
+ the number of the subpattern called "xxx" is 2. If the name is known to be
2585
+ unique (PCRE_DUPNAMES was not set), you can find the number from the name by
2586
+ calling <b>pcre_get_stringnumber()</b>. The first argument is the compiled
2587
+ pattern, and the second is the name. The yield of the function is the
2588
+ subpattern number, or PCRE_ERROR_NOSUBSTRING (-7) if there is no subpattern of
2589
+ that name.
2590
+ </P>
2591
+ <P>
2592
+ Given the number, you can extract the substring directly, or use one of the
2593
+ functions described in the previous section. For convenience, there are also
2594
+ two functions that do the whole job.
2595
+ </P>
2596
+ <P>
2597
+ Most of the arguments of <b>pcre_copy_named_substring()</b> and
2598
+ <b>pcre_get_named_substring()</b> are the same as those for the similarly named
2599
+ functions that extract by number. As these are described in the previous
2600
+ section, they are not re-described here. There are just two differences:
2601
+ </P>
2602
+ <P>
2603
+ First, instead of a substring number, a substring name is given. Second, there
2604
+ is an extra argument, given at the start, which is a pointer to the compiled
2605
+ pattern. This is needed in order to gain access to the name-to-number
2606
+ translation table.
2607
+ </P>
2608
+ <P>
2609
+ These functions call <b>pcre_get_stringnumber()</b>, and if it succeeds, they
2610
+ then call <b>pcre_copy_substring()</b> or <b>pcre_get_substring()</b>, as
2611
+ appropriate. <b>NOTE:</b> If PCRE_DUPNAMES is set and there are duplicate names,
2612
+ the behaviour may not be what you want (see the next section).
2613
+ </P>
2614
+ <P>
2615
+ <b>Warning:</b> If the pattern uses the (?| feature to set up multiple
2616
+ subpatterns with the same number, as described in the
2617
+ <a href="pcrepattern.html#dupsubpatternnumber">section on duplicate subpattern numbers</a>
2618
+ in the
2619
+ <a href="pcrepattern.html"><b>pcrepattern</b></a>
2620
+ page, you cannot use names to distinguish the different subpatterns, because
2621
+ names are not included in the compiled code. The matching process uses only
2622
+ numbers. For this reason, the use of different names for subpatterns of the
2623
+ same number causes an error at compile time.
2624
+ </P>
2625
+ <br><a name="SEC20" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
2626
+ <P>
2627
+ <b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
2628
+ <b> const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
2629
+ </P>
2630
+ <P>
2631
+ When a pattern is compiled with the PCRE_DUPNAMES option, names for subpatterns
2632
+ are not required to be unique. (Duplicate names are always allowed for
2633
+ subpatterns with the same number, created by using the (?| feature. Indeed, if
2634
+ such subpatterns are named, they are required to use the same names.)
2635
+ </P>
2636
+ <P>
2637
+ Normally, patterns with duplicate names are such that in any one match, only
2638
+ one of the named subpatterns participates. An example is shown in the
2639
+ <a href="pcrepattern.html"><b>pcrepattern</b></a>
2640
+ documentation.
2641
+ </P>
2642
+ <P>
2643
+ When duplicates are present, <b>pcre_copy_named_substring()</b> and
2644
+ <b>pcre_get_named_substring()</b> return the first substring corresponding to
2645
+ the given name that is set. If none are set, PCRE_ERROR_NOSUBSTRING (-7) is
2646
+ returned; no data is returned. The <b>pcre_get_stringnumber()</b> function
2647
+ returns one of the numbers that are associated with the name, but it is not
2648
+ defined which it is.
2649
+ </P>
2650
+ <P>
2651
+ If you want to get full details of all captured substrings for a given name,
2652
+ you must use the <b>pcre_get_stringtable_entries()</b> function. The first
2653
+ argument is the compiled pattern, and the second is the name. The third and
2654
+ fourth are pointers to variables which are updated by the function. After it
2655
+ has run, they point to the first and last entries in the name-to-number table
2656
+ for the given name. The function itself returns the length of each entry, or
2657
+ PCRE_ERROR_NOSUBSTRING (-7) if there are none. The format of the table is
2658
+ described above in the section entitled <i>Information about a pattern</i>
2659
+ <a href="#infoaboutpattern">above.</a>
2660
+ Given all the relevant entries for the name, you can extract each of their
2661
+ numbers, and hence the captured data, if any.
2662
+ </P>
2663
+ <br><a name="SEC21" href="#TOC1">FINDING ALL POSSIBLE MATCHES</a><br>
2664
+ <P>
2665
+ The traditional matching function uses a similar algorithm to Perl, which stops
2666
+ when it finds the first match, starting at a given point in the subject. If you
2667
+ want to find all possible matches, or the longest possible match, consider
2668
+ using the alternative matching function (see below) instead. If you cannot use
2669
+ the alternative function, but still need to find all possible matches, you
2670
+ can kludge it up by making use of the callout facility, which is described in
2671
+ the
2672
+ <a href="pcrecallout.html"><b>pcrecallout</b></a>
2673
+ documentation.
2674
+ </P>
2675
+ <P>
2676
+ What you have to do is to insert a callout right at the end of the pattern.
2677
+ When your callout function is called, extract and save the current matched
2678
+ substring. Then return 1, which forces <b>pcre_exec()</b> to backtrack and try
2679
+ other alternatives. Ultimately, when it runs out of matches, <b>pcre_exec()</b>
2680
+ will yield PCRE_ERROR_NOMATCH.
2681
+ </P>
2682
+ <br><a name="SEC22" href="#TOC1">OBTAINING AN ESTIMATE OF STACK USAGE</a><br>
2683
+ <P>
2684
+ Matching certain patterns using <b>pcre_exec()</b> can use a lot of process
2685
+ stack, which in certain environments can be rather limited in size. Some users
2686
+ find it helpful to have an estimate of the amount of stack that is used by
2687
+ <b>pcre_exec()</b>, to help them set recursion limits, as described in the
2688
+ <a href="pcrestack.html"><b>pcrestack</b></a>
2689
+ documentation. The estimate that is output by <b>pcretest</b> when called with
2690
+ the <b>-m</b> and <b>-C</b> options is obtained by calling <b>pcre_exec</b> with
2691
+ the values NULL, NULL, NULL, -999, and -999 for its first five arguments.
2692
+ </P>
2693
+ <P>
2694
+ Normally, if its first argument is NULL, <b>pcre_exec()</b> immediately returns
2695
+ the negative error code PCRE_ERROR_NULL, but with this special combination of
2696
+ arguments, it returns instead a negative number whose absolute value is the
2697
+ approximate stack frame size in bytes. (A negative number is used so that it is
2698
+ clear that no match has happened.) The value is approximate because in some
2699
+ cases, recursive calls to <b>pcre_exec()</b> occur when there are one or two
2700
+ additional variables on the stack.
2701
+ </P>
2702
+ <P>
2703
+ If PCRE has been compiled to use the heap instead of the stack for recursion,
2704
+ the value returned is the size of each block that is obtained from the heap.
2705
+ <a name="dfamatch"></a></P>
2706
+ <br><a name="SEC23" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
2707
+ <P>
2708
+ <b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
2709
+ <b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
2710
+ <b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
2711
+ <b> int *<i>workspace</i>, int <i>wscount</i>);</b>
2712
+ </P>
2713
+ <P>
2714
+ The function <b>pcre_dfa_exec()</b> is called to match a subject string against
2715
+ a compiled pattern, using a matching algorithm that scans the subject string
2716
+ just once, and does not backtrack. This has different characteristics to the
2717
+ normal algorithm, and is not compatible with Perl. Some of the features of PCRE
2718
+ patterns are not supported. Nevertheless, there are times when this kind of
2719
+ matching can be useful. For a discussion of the two matching algorithms, and a
2720
+ list of features that <b>pcre_dfa_exec()</b> does not support, see the
2721
+ <a href="pcrematching.html"><b>pcrematching</b></a>
2722
+ documentation.
2723
+ </P>
2724
+ <P>
2725
+ The arguments for the <b>pcre_dfa_exec()</b> function are the same as for
2726
+ <b>pcre_exec()</b>, plus two extras. The <i>ovector</i> argument is used in a
2727
+ different way, and this is described below. The other common arguments are used
2728
+ in the same way as for <b>pcre_exec()</b>, so their description is not repeated
2729
+ here.
2730
+ </P>
2731
+ <P>
2732
+ The two additional arguments provide workspace for the function. The workspace
2733
+ vector should contain at least 20 elements. It is used for keeping track of
2734
+ multiple paths through the pattern tree. More workspace will be needed for
2735
+ patterns and subjects where there are a lot of potential matches.
2736
+ </P>
2737
+ <P>
2738
+ Here is an example of a simple call to <b>pcre_dfa_exec()</b>:
2739
+ <pre>
2740
+ int rc;
2741
+ int ovector[10];
2742
+ int wspace[20];
2743
+ rc = pcre_dfa_exec(
2744
+ re, /* result of pcre_compile() */
2745
+ NULL, /* we didn't study the pattern */
2746
+ "some string", /* the subject string */
2747
+ 11, /* the length of the subject string */
2748
+ 0, /* start at offset 0 in the subject */
2749
+ 0, /* default options */
2750
+ ovector, /* vector of integers for substring information */
2751
+ 10, /* number of elements (NOT size in bytes) */
2752
+ wspace, /* working space vector */
2753
+ 20); /* number of elements (NOT size in bytes) */
2754
+ </PRE>
2755
+ </P>
2756
+ <br><b>
2757
+ Option bits for <b>pcre_dfa_exec()</b>
2758
+ </b><br>
2759
+ <P>
2760
+ The unused bits of the <i>options</i> argument for <b>pcre_dfa_exec()</b> must be
2761
+ zero. The only bits that may be set are PCRE_ANCHORED, PCRE_NEWLINE_<i>xxx</i>,
2762
+ PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART,
2763
+ PCRE_NO_UTF8_CHECK, PCRE_BSR_ANYCRLF, PCRE_BSR_UNICODE, PCRE_NO_START_OPTIMIZE,
2764
+ PCRE_PARTIAL_HARD, PCRE_PARTIAL_SOFT, PCRE_DFA_SHORTEST, and PCRE_DFA_RESTART.
2765
+ All but the last four of these are exactly the same as for <b>pcre_exec()</b>,
2766
+ so their description is not repeated here.
2767
+ <pre>
2768
+ PCRE_PARTIAL_HARD
2769
+ PCRE_PARTIAL_SOFT
2770
+ </pre>
2771
+ These have the same general effect as they do for <b>pcre_exec()</b>, but the
2772
+ details are slightly different. When PCRE_PARTIAL_HARD is set for
2773
+ <b>pcre_dfa_exec()</b>, it returns PCRE_ERROR_PARTIAL if the end of the subject
2774
+ is reached and there is still at least one matching possibility that requires
2775
+ additional characters. This happens even if some complete matches have also
2776
+ been found. When PCRE_PARTIAL_SOFT is set, the return code PCRE_ERROR_NOMATCH
2777
+ is converted into PCRE_ERROR_PARTIAL if the end of the subject is reached,
2778
+ there have been no complete matches, but there is still at least one matching
2779
+ possibility. The portion of the string that was inspected when the longest
2780
+ partial match was found is set as the first matching string in both cases.
2781
+ There is a more detailed discussion of partial and multi-segment matching, with
2782
+ examples, in the
2783
+ <a href="pcrepartial.html"><b>pcrepartial</b></a>
2784
+ documentation.
2785
+ <pre>
2786
+ PCRE_DFA_SHORTEST
2787
+ </pre>
2788
+ Setting the PCRE_DFA_SHORTEST option causes the matching algorithm to stop as
2789
+ soon as it has found one match. Because of the way the alternative algorithm
2790
+ works, this is necessarily the shortest possible match at the first possible
2791
+ matching point in the subject string.
2792
+ <pre>
2793
+ PCRE_DFA_RESTART
2794
+ </pre>
2795
+ When <b>pcre_dfa_exec()</b> returns a partial match, it is possible to call it
2796
+ again, with additional subject characters, and have it continue with the same
2797
+ match. The PCRE_DFA_RESTART option requests this action; when it is set, the
2798
+ <i>workspace</i> and <i>wscount</i> options must reference the same vector as
2799
+ before because data about the match so far is left in them after a partial
2800
+ match. There is more discussion of this facility in the
2801
+ <a href="pcrepartial.html"><b>pcrepartial</b></a>
2802
+ documentation.
2803
+ </P>
2804
+ <br><b>
2805
+ Successful returns from <b>pcre_dfa_exec()</b>
2806
+ </b><br>
2807
+ <P>
2808
+ When <b>pcre_dfa_exec()</b> succeeds, it may have matched more than one
2809
+ substring in the subject. Note, however, that all the matches from one run of
2810
+ the function start at the same point in the subject. The shorter matches are
2811
+ all initial substrings of the longer matches. For example, if the pattern
2812
+ <pre>
2813
+ &#60;.*&#62;
2814
+ </pre>
2815
+ is matched against the string
2816
+ <pre>
2817
+ This is &#60;something&#62; &#60;something else&#62; &#60;something further&#62; no more
2818
+ </pre>
2819
+ the three matched strings are
2820
+ <pre>
2821
+ &#60;something&#62;
2822
+ &#60;something&#62; &#60;something else&#62;
2823
+ &#60;something&#62; &#60;something else&#62; &#60;something further&#62;
2824
+ </pre>
2825
+ On success, the yield of the function is a number greater than zero, which is
2826
+ the number of matched substrings. The substrings themselves are returned in
2827
+ <i>ovector</i>. Each string uses two elements; the first is the offset to the
2828
+ start, and the second is the offset to the end. In fact, all the strings have
2829
+ the same start offset. (Space could have been saved by giving this only once,
2830
+ but it was decided to retain some compatibility with the way <b>pcre_exec()</b>
2831
+ returns data, even though the meaning of the strings is different.)
2832
+ </P>
2833
+ <P>
2834
+ The strings are returned in reverse order of length; that is, the longest
2835
+ matching string is given first. If there were too many matches to fit into
2836
+ <i>ovector</i>, the yield of the function is zero, and the vector is filled with
2837
+ the longest matches. Unlike <b>pcre_exec()</b>, <b>pcre_dfa_exec()</b> can use
2838
+ the entire <i>ovector</i> for returning matched strings.
2839
+ </P>
2840
+ <P>
2841
+ NOTE: PCRE's "auto-possessification" optimization usually applies to character
2842
+ repeats at the end of a pattern (as well as internally). For example, the
2843
+ pattern "a\d+" is compiled as if it were "a\d++" because there is no point
2844
+ even considering the possibility of backtracking into the repeated digits. For
2845
+ DFA matching, this means that only one possible match is found. If you really
2846
+ do want multiple matches in such cases, either use an ungreedy repeat
2847
+ ("a\d+?") or set the PCRE_NO_AUTO_POSSESS option when compiling.
2848
+ </P>
2849
+ <br><b>
2850
+ Error returns from <b>pcre_dfa_exec()</b>
2851
+ </b><br>
2852
+ <P>
2853
+ The <b>pcre_dfa_exec()</b> function returns a negative number when it fails.
2854
+ Many of the errors are the same as for <b>pcre_exec()</b>, and these are
2855
+ described
2856
+ <a href="#errorlist">above.</a>
2857
+ There are in addition the following errors that are specific to
2858
+ <b>pcre_dfa_exec()</b>:
2859
+ <pre>
2860
+ PCRE_ERROR_DFA_UITEM (-16)
2861
+ </pre>
2862
+ This return is given if <b>pcre_dfa_exec()</b> encounters an item in the pattern
2863
+ that it does not support, for instance, the use of \C or a back reference.
2864
+ <pre>
2865
+ PCRE_ERROR_DFA_UCOND (-17)
2866
+ </pre>
2867
+ This return is given if <b>pcre_dfa_exec()</b> encounters a condition item that
2868
+ uses a back reference for the condition, or a test for recursion in a specific
2869
+ group. These are not supported.
2870
+ <pre>
2871
+ PCRE_ERROR_DFA_UMLIMIT (-18)
2872
+ </pre>
2873
+ This return is given if <b>pcre_dfa_exec()</b> is called with an <i>extra</i>
2874
+ block that contains a setting of the <i>match_limit</i> or
2875
+ <i>match_limit_recursion</i> fields. This is not supported (these fields are
2876
+ meaningless for DFA matching).
2877
+ <pre>
2878
+ PCRE_ERROR_DFA_WSSIZE (-19)
2879
+ </pre>
2880
+ This return is given if <b>pcre_dfa_exec()</b> runs out of space in the
2881
+ <i>workspace</i> vector.
2882
+ <pre>
2883
+ PCRE_ERROR_DFA_RECURSE (-20)
2884
+ </pre>
2885
+ When a recursive subpattern is processed, the matching function calls itself
2886
+ recursively, using private vectors for <i>ovector</i> and <i>workspace</i>. This
2887
+ error is given if the output vector is not large enough. This should be
2888
+ extremely rare, as a vector of size 1000 is used.
2889
+ <pre>
2890
+ PCRE_ERROR_DFA_BADRESTART (-30)
2891
+ </pre>
2892
+ When <b>pcre_dfa_exec()</b> is called with the <b>PCRE_DFA_RESTART</b> option,
2893
+ some plausibility checks are made on the contents of the workspace, which
2894
+ should contain data about the previous partial match. If any of these checks
2895
+ fail, this error is given.
2896
+ </P>
2897
+ <br><a name="SEC24" href="#TOC1">SEE ALSO</a><br>
2898
+ <P>
2899
+ <b>pcre16</b>(3), <b>pcre32</b>(3), <b>pcrebuild</b>(3), <b>pcrecallout</b>(3),
2900
+ <b>pcrecpp(3)</b>(3), <b>pcrematching</b>(3), <b>pcrepartial</b>(3),
2901
+ <b>pcreposix</b>(3), <b>pcreprecompile</b>(3), <b>pcresample</b>(3),
2902
+ <b>pcrestack</b>(3).
2903
+ </P>
2904
+ <br><a name="SEC25" href="#TOC1">AUTHOR</a><br>
2905
+ <P>
2906
+ Philip Hazel
2907
+ <br>
2908
+ University Computing Service
2909
+ <br>
2910
+ Cambridge CB2 3QH, England.
2911
+ <br>
2912
+ </P>
2913
+ <br><a name="SEC26" href="#TOC1">REVISION</a><br>
2914
+ <P>
2915
+ Last updated: 18 December 2015
2916
+ <br>
2917
+ Copyright &copy; 1997-2015 University of Cambridge.
2918
+ <br>
2919
+ <p>
2920
+ Return to the <a href="index.html">PCRE index page</a>.
2921
+ </p>