rroonga 7.0.2-x64-mingw32 → 7.1.1-x64-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/Rakefile +2 -2
 - data/doc/text/news.md +46 -7
 - data/ext/groonga/rb-grn-array.c +1 -272
 - data/ext/groonga/rb-grn-column-cache.c +240 -0
 - data/ext/groonga/rb-grn-column.c +1 -1
 - data/ext/groonga/rb-grn-context.c +28 -4
 - data/ext/groonga/rb-grn-expression.c +23 -1
 - data/ext/groonga/rb-grn-object.c +44 -1
 - data/ext/groonga/rb-grn-procedure.c +16 -1
 - data/ext/groonga/rb-grn-query-logger.c +55 -6
 - data/ext/groonga/rb-grn-table.c +170 -1
 - data/ext/groonga/rb-grn-utils.c +21 -2
 - data/ext/groonga/rb-grn.h +18 -3
 - data/ext/groonga/rb-groonga.c +2 -1
 - data/lib/2.1/groonga.so +0 -0
 - data/lib/2.2/groonga.so +0 -0
 - data/lib/2.3/groonga.so +0 -0
 - data/lib/2.4/groonga.so +0 -0
 - data/lib/2.5/groonga.so +0 -0
 - data/lib/groonga.rb +8 -5
 - data/lib/groonga/column.rb +0 -5
 - data/lib/groonga/database.rb +0 -10
 - data/lib/groonga/index-column.rb +0 -10
 - data/lib/groonga/query-logger.rb +1 -1
 - data/rroonga-build.rb +6 -6
 - data/rroonga.gemspec +1 -1
 - data/test/groonga-test-utils.rb +5 -8
 - data/test/test-array.rb +1 -131
 - data/test/test-column-cache.rb +46 -0
 - data/test/test-command-select.rb +36 -1
 - data/test/test-context.rb +1 -2
 - data/test/test-database.rb +16 -2
 - data/test/test-logger.rb +13 -1
 - data/test/test-procedure.rb +7 -1
 - data/test/test-query-logger.rb +12 -1
 - data/test/test-table-arrow.rb +193 -0
 - data/test/test-table-offset-and-limit.rb +3 -1
 - data/vendor/local/bin/cv2pdb.exe +0 -0
 - data/vendor/local/bin/generate-pdb.bat +36 -0
 - data/vendor/local/bin/grndb.exe +0 -0
 - data/vendor/local/bin/groonga-benchmark.exe +0 -0
 - data/vendor/local/bin/groonga-suggest-create-dataset.exe +0 -0
 - data/vendor/local/bin/groonga.exe +0 -0
 - data/vendor/local/bin/libgcc_s_seh-1.dll +0 -0
 - data/vendor/local/bin/libgroonga-0.dll +0 -0
 - data/vendor/local/bin/libmecab-2.dll +0 -0
 - data/vendor/local/bin/libmsgpackc.dll +0 -0
 - data/vendor/local/bin/libonigmo-6.dll +0 -0
 - data/vendor/local/bin/libpcre-1.dll +0 -0
 - data/vendor/local/bin/libpcrecpp-0.dll +0 -0
 - data/vendor/local/bin/libpcreposix-0.dll +0 -0
 - data/vendor/local/bin/libstdc++-6.dll +0 -0
 - data/vendor/local/bin/lz4.exe +0 -0
 - data/vendor/local/bin/lz4c.exe +0 -0
 - data/vendor/local/bin/lz4cat +0 -0
 - data/vendor/local/bin/mecab.exe +0 -0
 - data/vendor/local/bin/pcre-config +1 -1
 - data/vendor/local/bin/pcregrep.exe +0 -0
 - data/vendor/local/bin/pcretest.exe +0 -0
 - data/vendor/local/bin/zlib1.dll +0 -0
 - data/vendor/local/etc/groonga/httpd/groonga-httpd.conf +1 -1
 - data/vendor/local/include/groonga/groonga.h +1 -0
 - data/vendor/local/include/groonga/groonga.hpp +21 -0
 - data/vendor/local/include/groonga/groonga/arrow.h +38 -0
 - data/vendor/local/include/groonga/groonga/arrow.hpp +21 -0
 - data/vendor/local/include/groonga/groonga/column.h +9 -0
 - data/vendor/local/include/groonga/groonga/expr.h +9 -1
 - data/vendor/local/include/groonga/groonga/groonga.h +19 -3
 - data/vendor/local/include/groonga/groonga/obj.h +3 -0
 - data/vendor/local/include/groonga/groonga/operator.h +2 -1
 - data/vendor/local/include/groonga/groonga/plugin.h +8 -0
 - data/vendor/local/include/groonga/groonga/portability.h +19 -1
 - data/vendor/local/include/groonga/groonga/table.h +14 -0
 - data/vendor/local/include/groonga/groonga/util.h +3 -0
 - data/vendor/local/include/groonga/groonga/window_function.h +2 -0
 - data/vendor/local/include/pcre.h +2 -2
 - data/vendor/local/include/pcre_stringpiece.h +2 -2
 - data/vendor/local/lib/groonga/plugins/functions/index_column.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/index_column.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/index_column.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/index_column.la +41 -0
 - data/vendor/local/lib/groonga/plugins/functions/math.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/math.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/math.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/math.la +41 -0
 - data/vendor/local/lib/groonga/plugins/functions/number.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/number.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/number.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/number.la +1 -1
 - data/vendor/local/lib/groonga/plugins/functions/string.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/string.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/string.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/string.la +1 -1
 - data/vendor/local/lib/groonga/plugins/functions/time.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/time.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/time.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/time.la +1 -1
 - data/vendor/local/lib/groonga/plugins/functions/vector.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/vector.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/vector.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/functions/vector.la +1 -1
 - data/vendor/local/lib/groonga/plugins/normalizers/mysql.a +0 -0
 - data/vendor/local/lib/groonga/plugins/normalizers/mysql.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/normalizers/mysql.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/normalizers/mysql.la +1 -1
 - data/vendor/local/lib/groonga/plugins/query_expanders/tsv.a +0 -0
 - data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/query_expanders/tsv.la +1 -1
 - data/vendor/local/lib/groonga/plugins/ruby/eval.rb +1 -1
 - data/vendor/local/lib/groonga/plugins/sharding.rb +3 -0
 - data/vendor/local/lib/groonga/plugins/sharding/dynamic_columns.rb +152 -0
 - data/vendor/local/lib/groonga/plugins/sharding/keys_parsable.rb +12 -0
 - data/vendor/local/lib/groonga/plugins/sharding/logical_count.rb +149 -106
 - data/vendor/local/lib/groonga/plugins/sharding/logical_enumerator.rb +11 -3
 - data/vendor/local/lib/groonga/plugins/sharding/logical_range_filter.rb +80 -6
 - data/vendor/local/lib/groonga/plugins/sharding/logical_select.rb +43 -206
 - data/vendor/local/lib/groonga/plugins/sharding/range_expression_builder.rb +15 -0
 - data/vendor/local/lib/groonga/plugins/suggest/suggest.a +0 -0
 - data/vendor/local/lib/groonga/plugins/suggest/suggest.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/suggest/suggest.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/suggest/suggest.la +1 -1
 - data/vendor/local/lib/groonga/plugins/token_filters/stop_word.a +0 -0
 - data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/token_filters/stop_word.la +1 -1
 - data/vendor/local/lib/groonga/plugins/tokenizers/mecab.a +0 -0
 - data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll +0 -0
 - data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll.a +0 -0
 - data/vendor/local/lib/groonga/plugins/tokenizers/mecab.la +1 -1
 - data/vendor/local/lib/groonga/scripts/ruby/command_line/grndb.rb +163 -1
 - data/vendor/local/lib/groonga/scripts/ruby/command_line_parser.rb +12 -0
 - data/vendor/local/lib/groonga/scripts/ruby/expression_tree/function_call.rb +8 -3
 - data/vendor/local/lib/groonga/scripts/ruby/expression_tree_builder.rb +1 -0
 - data/{lib/groonga/table.rb → vendor/local/lib/groonga/scripts/ruby/groonga-log.rb} +6 -12
 - data/vendor/local/lib/groonga/scripts/ruby/groonga-log/parser.rb +81 -0
 - data/vendor/local/lib/groonga/scripts/ruby/groonga-log/statistic.rb +23 -0
 - data/vendor/local/lib/groonga/scripts/ruby/groonga-log/version.rb +3 -0
 - data/vendor/local/lib/groonga/scripts/ruby/initialize/post.rb +10 -0
 - data/vendor/local/lib/groonga/scripts/ruby/labeled_arguments.rb +21 -0
 - data/vendor/local/lib/groonga/scripts/ruby/logger/level.rb +8 -2
 - data/vendor/local/lib/groonga/scripts/ruby/object.rb +7 -0
 - data/vendor/local/lib/groonga/scripts/ruby/scan_info.rb +3 -0
 - data/vendor/local/lib/groonga/scripts/ruby/scan_info_builder.rb +2 -0
 - data/vendor/local/lib/groonga/scripts/ruby/scan_info_data.rb +40 -9
 - data/vendor/local/lib/groonga/scripts/ruby/table.rb +12 -2
 - data/vendor/local/lib/libgroonga.a +0 -0
 - data/vendor/local/lib/libgroonga.dll.a +0 -0
 - data/vendor/local/lib/libgroonga.la +1 -1
 - data/vendor/local/lib/liblz4.a +0 -0
 - data/vendor/local/lib/liblz4.dll +0 -0
 - data/vendor/local/lib/liblz4.dll.1 +0 -0
 - data/vendor/local/lib/liblz4.dll.1.5.0 +0 -0
 - data/vendor/local/lib/libmecab.a +0 -0
 - data/vendor/local/lib/libmecab.dll.a +0 -0
 - data/vendor/local/lib/libmecab.la +2 -2
 - data/vendor/local/lib/libmsgpackc.a +0 -0
 - data/vendor/local/lib/libmsgpackc.dll.a +0 -0
 - data/vendor/local/lib/libonigmo.a +0 -0
 - data/vendor/local/lib/libonigmo.dll.a +0 -0
 - data/vendor/local/lib/libpcre.a +0 -0
 - data/vendor/local/lib/libpcre.dll.a +0 -0
 - data/vendor/local/lib/libpcre.la +1 -1
 - data/vendor/local/lib/libpcrecpp.a +0 -0
 - data/vendor/local/lib/libpcrecpp.dll.a +0 -0
 - data/vendor/local/lib/libpcreposix.a +0 -0
 - data/vendor/local/lib/libpcreposix.dll.a +0 -0
 - data/vendor/local/lib/libpcreposix.la +1 -1
 - data/vendor/local/lib/libz.a +0 -0
 - data/vendor/local/lib/libz.dll.a +0 -0
 - data/vendor/local/lib/pkgconfig/groonga.pc +2 -2
 - data/vendor/local/lib/pkgconfig/libpcre.pc +1 -1
 - data/vendor/local/lib/pkgconfig/libpcrecpp.pc +1 -1
 - data/vendor/local/lib/pkgconfig/libpcreposix.pc +1 -1
 - data/vendor/local/libexec/mecab/mecab-cost-train.exe +0 -0
 - data/vendor/local/libexec/mecab/mecab-dict-gen.exe +0 -0
 - data/vendor/local/libexec/mecab/mecab-dict-index.exe +0 -0
 - data/vendor/local/libexec/mecab/mecab-system-eval.exe +0 -0
 - data/vendor/local/libexec/mecab/mecab-test-gen.exe +0 -0
 - data/vendor/local/share/doc/groonga/en/html/.buildinfo +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_static/basic.css +47 -19
 - data/vendor/local/share/doc/groonga/en/html/_static/comment-bright.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/comment-close.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/comment.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/doctools.js +1 -1
 - data/vendor/local/share/doc/groonga/en/html/_static/down-pressed.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/down.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/file.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/{jquery-1.11.1.js → jquery-3.1.0.js} +4245 -4479
 - data/vendor/local/share/doc/groonga/en/html/_static/jquery.js +4 -4
 - data/vendor/local/share/doc/groonga/en/html/_static/minus.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/plus.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/searchtools.js +112 -5
 - data/vendor/local/share/doc/groonga/en/html/_static/up-pressed.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/up.png +0 -0
 - data/vendor/local/share/doc/groonga/en/html/_static/websupport.js +1 -1
 - data/vendor/local/share/doc/groonga/en/html/characteristic.html +9 -19
 - data/vendor/local/share/doc/groonga/en/html/client.html +9 -19
 - data/vendor/local/share/doc/groonga/en/html/community.html +9 -19
 - data/vendor/local/share/doc/groonga/en/html/contribution.html +9 -19
 - data/vendor/local/share/doc/groonga/en/html/contribution/development.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/build.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_autotools.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_cmake.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/build/windows_cmake.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/com.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/cooperation.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/query.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/release.html +10 -21
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/repository.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/contribution/development/test.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/contribution/documentation.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/contribution/documentation/c-api.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/contribution/documentation/i18n.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/contribution/documentation/introduction.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/contribution/report.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/development.html +13 -23
 - data/vendor/local/share/doc/groonga/en/html/development/travis-ci.html +15 -26
 - data/vendor/local/share/doc/groonga/en/html/genindex.html +819 -1560
 - data/vendor/local/share/doc/groonga/en/html/index.html +42 -45
 - data/vendor/local/share/doc/groonga/en/html/install.html +11 -20
 - data/vendor/local/share/doc/groonga/en/html/install/centos.html +14 -25
 - data/vendor/local/share/doc/groonga/en/html/install/debian.html +92 -28
 - data/vendor/local/share/doc/groonga/en/html/install/fedora.html +13 -24
 - data/vendor/local/share/doc/groonga/en/html/install/mac_os_x.html +13 -24
 - data/vendor/local/share/doc/groonga/en/html/install/others.html +13 -24
 - data/vendor/local/share/doc/groonga/en/html/install/solaris.html +12 -23
 - data/vendor/local/share/doc/groonga/en/html/install/ubuntu.html +15 -25
 - data/vendor/local/share/doc/groonga/en/html/install/windows.html +18 -29
 - data/vendor/local/share/doc/groonga/en/html/limitations.html +13 -23
 - data/vendor/local/share/doc/groonga/en/html/news.html +650 -43
 - data/vendor/local/share/doc/groonga/en/html/news/0.x.html +15 -25
 - data/vendor/local/share/doc/groonga/en/html/news/1.0.x.html +13 -23
 - data/vendor/local/share/doc/groonga/en/html/news/1.1.x.html +10 -20
 - data/vendor/local/share/doc/groonga/en/html/news/1.2.x.html +12 -22
 - data/vendor/local/share/doc/groonga/en/html/news/1.3.x.html +10 -20
 - data/vendor/local/share/doc/groonga/en/html/news/2.x.html +11 -21
 - data/vendor/local/share/doc/groonga/en/html/news/3.x.html +10 -20
 - data/vendor/local/share/doc/groonga/en/html/news/4.x.html +10 -20
 - data/vendor/local/share/doc/groonga/en/html/news/5.x.html +10 -20
 - data/vendor/local/share/doc/groonga/en/html/news/6.x.html +10 -20
 - data/vendor/local/share/doc/groonga/en/html/news/senna.html +10 -20
 - data/vendor/local/share/doc/groonga/en/html/objects.inv +0 -0
 - data/vendor/local/share/doc/groonga/en/html/reference.html +32 -40
 - data/vendor/local/share/doc/groonga/en/html/reference/alias.html +12 -24
 - data/vendor/local/share/doc/groonga/en/html/reference/api.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/global_configurations.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_cache.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_column.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_command_version.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_content_type.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ctx.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_db.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_encoding.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_expr.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_geo.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_hook.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ii.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_index_cursor.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_info.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_match_escalation.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_obj.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_proc.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_search.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table_cursor.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_thread.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_type.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/grn_user_data.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/overview.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/api/plugin.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/cast.html +10 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/column.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/columns/index.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/columns/pseudo.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/columns/scalar.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/columns/vector.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/command.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/command/command_version.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/command/output_format.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/command/pretty_print.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/command/request_id.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/command/request_timeout.html +10 -21
 - data/vendor/local/share/doc/groonga/en/html/reference/command/return_code.html +10 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/cache_limit.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/check.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/clearlock.html +11 -22
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/column_copy.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/column_create.html +35 -37
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/column_list.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/column_remove.html +10 -21
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/column_rename.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/config_delete.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/config_get.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/config_set.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/database_unmap.html +10 -21
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/define_selector.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/defrag.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/delete.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/dump.html +56 -28
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/io_flush.html +42 -21
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/load.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_acquire.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_clear.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_release.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/log_level.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/log_put.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/log_reopen.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_count.html +583 -107
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_parameters.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_range_filter.html +1143 -51
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_select.html +166 -29
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_shard_list.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_table_remove.html +13 -24
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/normalize.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/normalizer_list.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/object_exist.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/object_inspect.html +12 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/object_list.html +138 -143
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/object_remove.html +14 -24
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_register.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_unregister.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/query_expand.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/quit.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/range_filter.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/register.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/reindex.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/request_cancel.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_eval.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_load.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/schema.html +95 -65
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/select.html +37 -25
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/shutdown.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/status.html +15 -26
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/suggest.html +14 -33
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/table_copy.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/table_create.html +11 -21
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/table_list.html +9 -60
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/table_remove.html +12 -23
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/table_rename.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/table_tokenize.html +15 -53
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/thread_limit.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenize.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenizer_list.html +9 -23
 - data/vendor/local/share/doc/groonga/en/html/reference/commands/truncate.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/configuration.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/executables.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/grndb.html +118 -21
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/grnslap.html +11 -22
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-benchmark.html +21 -32
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-httpd.html +17 -27
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-server-http.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-create-dataset.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-httpd.html +26 -39
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-learner.html +17 -28
 - data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga.html +44 -55
 - data/vendor/local/share/doc/groonga/en/html/reference/function.html +29 -39
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/between.html +56 -111
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/edit_distance.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/fuzzy_search.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_distance.html +11 -22
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_circle.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_rectangle.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_full.html +11 -22
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_html.html +11 -22
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/html_untag.html +10 -21
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/in_records.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/in_values.html +15 -26
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/math_abs.html +237 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/now.html +30 -41
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/number_classify.html +27 -38
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/prefix_rk_search.html +37 -50
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/query.html +50 -61
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/rand.html +31 -42
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/snippet_html.html +36 -47
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/string_length.html +27 -38
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/string_substring.html +27 -38
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/sub_filter.html +35 -46
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_day.html +27 -38
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_hour.html +27 -38
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_minute.html +27 -38
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_month.html +27 -38
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_second.html +27 -38
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_week.html +27 -38
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_year.html +27 -38
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_new.html +29 -40
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_size.html +32 -43
 - data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_slice.html +23 -34
 - data/vendor/local/share/doc/groonga/en/html/reference/grn_expr.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/query_syntax.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/script_syntax.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/indexing.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/log.html +11 -22
 - data/vendor/local/share/doc/groonga/en/html/reference/normalizers.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/operations.html +13 -24
 - data/vendor/local/share/doc/groonga/en/html/reference/operations/geolocation_search.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/operations/prefix_rk_search.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/output.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/query_expanders.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/query_expanders/tsv.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/regular_expression.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/scorer.html +14 -25
 - data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_at_most.html +10 -21
 - data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_idf.html +10 -21
 - data/vendor/local/share/doc/groonga/en/html/reference/sharding.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/suggest.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/suggest/completion.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/suggest/correction.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/suggest/introduction.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/suggest/suggestion.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/tables.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/token_filters.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/tokenizers.html +22 -37
 - data/vendor/local/share/doc/groonga/en/html/reference/tuning.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/types.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/reference/window_function.html +16 -26
 - data/vendor/local/share/doc/groonga/en/html/reference/window_functions/record_number.html +14 -25
 - data/vendor/local/share/doc/groonga/en/html/reference/window_functions/window_count.html +152 -0
 - data/vendor/local/share/doc/groonga/en/html/reference/window_functions/window_record_number.html +28 -39
 - data/vendor/local/share/doc/groonga/en/html/reference/window_functions/window_sum.html +23 -34
 - data/vendor/local/share/doc/groonga/en/html/search.html +9 -12
 - data/vendor/local/share/doc/groonga/en/html/searchindex.js +1 -1
 - data/vendor/local/share/doc/groonga/en/html/server.html +9 -19
 - data/vendor/local/share/doc/groonga/en/html/server/gqtp.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/server/http.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/server/http/comparison.html +10 -21
 - data/vendor/local/share/doc/groonga/en/html/server/http/groonga-httpd.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/server/http/groonga.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/server/memcached.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/server/package.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/spec.html +9 -19
 - data/vendor/local/share/doc/groonga/en/html/spec/gqtp.html +17 -27
 - data/vendor/local/share/doc/groonga/en/html/spec/search.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/troubleshooting.html +18 -24
 - data/vendor/local/share/doc/groonga/en/html/troubleshooting/different_results_with_the_same_keyword.html +20 -31
 - data/vendor/local/share/doc/groonga/en/html/troubleshooting/how_to_analyze_error_message.html +188 -0
 - data/vendor/local/share/doc/groonga/en/html/troubleshooting/mmap_cannot_allocate_memory.html +22 -33
 - data/vendor/local/share/doc/groonga/en/html/tutorial.html +9 -19
 - data/vendor/local/share/doc/groonga/en/html/tutorial/data.html +10 -21
 - data/vendor/local/share/doc/groonga/en/html/tutorial/drilldown.html +12 -23
 - data/vendor/local/share/doc/groonga/en/html/tutorial/index.html +10 -21
 - data/vendor/local/share/doc/groonga/en/html/tutorial/introduction.html +21 -31
 - data/vendor/local/share/doc/groonga/en/html/tutorial/lexicon.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/tutorial/match_columns.html +174 -22
 - data/vendor/local/share/doc/groonga/en/html/tutorial/micro_blog.html +10 -81
 - data/vendor/local/share/doc/groonga/en/html/tutorial/network.html +16 -26
 - data/vendor/local/share/doc/groonga/en/html/tutorial/patricia_trie.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/tutorial/query_expansion.html +9 -20
 - data/vendor/local/share/doc/groonga/en/html/tutorial/search.html +14 -25
 - data/vendor/local/share/doc/groonga/ja/html/.buildinfo +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_static/basic.css +47 -19
 - data/vendor/local/share/doc/groonga/ja/html/_static/comment-bright.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/comment-close.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/comment.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/doctools.js +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/_static/down-pressed.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/down.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/file.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/{jquery-1.11.1.js → jquery-3.1.0.js} +4245 -4479
 - data/vendor/local/share/doc/groonga/ja/html/_static/jquery.js +4 -4
 - data/vendor/local/share/doc/groonga/ja/html/_static/minus.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/plus.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/searchtools.js +112 -5
 - data/vendor/local/share/doc/groonga/ja/html/_static/up-pressed.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/up.png +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/_static/websupport.js +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/characteristic.html +9 -19
 - data/vendor/local/share/doc/groonga/ja/html/client.html +9 -19
 - data/vendor/local/share/doc/groonga/ja/html/community.html +11 -23
 - data/vendor/local/share/doc/groonga/ja/html/contribution.html +12 -25
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/build.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_autotools.html +13 -28
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_cmake.html +12 -26
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/windows_cmake.html +16 -34
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/com.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/cooperation.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/query.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/release.html +10 -21
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/repository.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/contribution/development/test.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/contribution/documentation.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/c-api.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/i18n.html +20 -42
 - data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/introduction.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/contribution/report.html +11 -24
 - data/vendor/local/share/doc/groonga/ja/html/development.html +13 -23
 - data/vendor/local/share/doc/groonga/ja/html/development/travis-ci.html +19 -38
 - data/vendor/local/share/doc/groonga/ja/html/genindex.html +819 -1560
 - data/vendor/local/share/doc/groonga/ja/html/index.html +41 -44
 - data/vendor/local/share/doc/groonga/ja/html/install.html +11 -20
 - data/vendor/local/share/doc/groonga/ja/html/install/centos.html +18 -33
 - data/vendor/local/share/doc/groonga/ja/html/install/debian.html +84 -32
 - data/vendor/local/share/doc/groonga/ja/html/install/fedora.html +15 -28
 - data/vendor/local/share/doc/groonga/ja/html/install/mac_os_x.html +13 -24
 - data/vendor/local/share/doc/groonga/ja/html/install/others.html +28 -55
 - data/vendor/local/share/doc/groonga/ja/html/install/solaris.html +12 -23
 - data/vendor/local/share/doc/groonga/ja/html/install/ubuntu.html +17 -29
 - data/vendor/local/share/doc/groonga/ja/html/install/windows.html +18 -29
 - data/vendor/local/share/doc/groonga/ja/html/limitations.html +17 -35
 - data/vendor/local/share/doc/groonga/ja/html/news.html +516 -158
 - data/vendor/local/share/doc/groonga/ja/html/news/0.x.html +15 -25
 - data/vendor/local/share/doc/groonga/ja/html/news/1.0.x.html +12 -22
 - data/vendor/local/share/doc/groonga/ja/html/news/1.1.x.html +10 -20
 - data/vendor/local/share/doc/groonga/ja/html/news/1.2.x.html +144 -288
 - data/vendor/local/share/doc/groonga/ja/html/news/1.3.x.html +36 -72
 - data/vendor/local/share/doc/groonga/ja/html/news/2.x.html +266 -532
 - data/vendor/local/share/doc/groonga/ja/html/news/3.x.html +224 -441
 - data/vendor/local/share/doc/groonga/ja/html/news/4.x.html +258 -516
 - data/vendor/local/share/doc/groonga/ja/html/news/5.x.html +282 -562
 - data/vendor/local/share/doc/groonga/ja/html/news/6.x.html +213 -426
 - data/vendor/local/share/doc/groonga/ja/html/news/senna.html +10 -20
 - data/vendor/local/share/doc/groonga/ja/html/objects.inv +0 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference.html +32 -40
 - data/vendor/local/share/doc/groonga/ja/html/reference/alias.html +14 -28
 - data/vendor/local/share/doc/groonga/ja/html/reference/api.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/global_configurations.html +11 -24
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_cache.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_column.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_command_version.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_content_type.html +10 -22
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ctx.html +15 -32
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_db.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_encoding.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_expr.html +11 -24
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_geo.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_hook.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ii.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_index_cursor.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_info.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_match_escalation.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_obj.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_proc.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_search.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table_cursor.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_thread.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_type.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_user_data.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/overview.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/api/plugin.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/cast.html +10 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/column.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/columns/index.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/columns/pseudo.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/columns/scalar.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/columns/vector.html +15 -32
 - data/vendor/local/share/doc/groonga/ja/html/reference/command.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/command/command_version.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/command/output_format.html +10 -22
 - data/vendor/local/share/doc/groonga/ja/html/reference/command/pretty_print.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/command/request_id.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/command/request_timeout.html +10 -21
 - data/vendor/local/share/doc/groonga/ja/html/reference/command/return_code.html +12 -24
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/cache_limit.html +11 -24
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/check.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/clearlock.html +11 -22
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_copy.html +31 -64
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_create.html +56 -85
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_list.html +29 -60
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_remove.html +10 -21
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_rename.html +14 -30
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_delete.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_get.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_set.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/database_unmap.html +10 -21
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/define_selector.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/defrag.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/delete.html +15 -32
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/dump.html +55 -28
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/io_flush.html +45 -45
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/load.html +15 -32
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_acquire.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_clear.html +11 -24
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_release.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_level.html +15 -32
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_put.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_reopen.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_count.html +498 -106
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_parameters.html +13 -27
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_range_filter.html +1051 -56
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_select.html +166 -56
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_shard_list.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_table_remove.html +33 -63
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalize.html +17 -36
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalizer_list.html +13 -28
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_exist.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_inspect.html +54 -104
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_list.html +183 -233
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_remove.html +14 -24
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_register.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_unregister.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/query_expand.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/quit.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/range_filter.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/register.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/reindex.html +11 -24
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/request_cancel.html +14 -30
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_eval.html +10 -22
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_load.html +10 -22
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/schema.html +136 -147
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/select.html +145 -271
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/shutdown.html +10 -22
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/status.html +23 -42
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/suggest.html +28 -61
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_copy.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_create.html +32 -65
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_list.html +9 -60
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_remove.html +22 -43
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_rename.html +12 -26
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_tokenize.html +15 -53
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/thread_limit.html +11 -24
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenize.html +18 -38
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenizer_list.html +13 -31
 - data/vendor/local/share/doc/groonga/ja/html/reference/commands/truncate.html +11 -24
 - data/vendor/local/share/doc/groonga/ja/html/reference/configuration.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/grndb.html +113 -29
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/grnslap.html +11 -22
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-benchmark.html +21 -32
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-httpd.html +20 -33
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-server-http.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-create-dataset.html +10 -22
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-httpd.html +70 -127
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-learner.html +17 -28
 - data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga.html +71 -109
 - data/vendor/local/share/doc/groonga/ja/html/reference/function.html +29 -39
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/between.html +46 -102
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/edit_distance.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/fuzzy_search.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_distance.html +14 -28
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_circle.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_rectangle.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_full.html +11 -22
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_html.html +11 -22
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/html_untag.html +10 -21
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_records.html +23 -48
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_values.html +16 -27
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/math_abs.html +237 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/now.html +30 -41
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/number_classify.html +27 -38
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/prefix_rk_search.html +37 -50
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/query.html +64 -89
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/rand.html +31 -42
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/snippet_html.html +42 -59
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/string_length.html +27 -38
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/string_substring.html +27 -38
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/sub_filter.html +38 -52
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_day.html +27 -38
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_hour.html +27 -38
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_minute.html +27 -38
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_month.html +27 -38
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_second.html +27 -38
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_week.html +27 -38
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_year.html +27 -38
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_new.html +29 -40
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_size.html +32 -43
 - data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_slice.html +23 -34
 - data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr.html +13 -28
 - data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/query_syntax.html +18 -38
 - data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/script_syntax.html +17 -36
 - data/vendor/local/share/doc/groonga/ja/html/reference/indexing.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/log.html +37 -74
 - data/vendor/local/share/doc/groonga/ja/html/reference/normalizers.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/operations.html +13 -24
 - data/vendor/local/share/doc/groonga/ja/html/reference/operations/geolocation_search.html +18 -38
 - data/vendor/local/share/doc/groonga/ja/html/reference/operations/prefix_rk_search.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/output.html +12 -26
 - data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders/tsv.html +15 -32
 - data/vendor/local/share/doc/groonga/ja/html/reference/regular_expression.html +29 -60
 - data/vendor/local/share/doc/groonga/ja/html/reference/scorer.html +16 -29
 - data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_at_most.html +10 -21
 - data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_idf.html +10 -21
 - data/vendor/local/share/doc/groonga/ja/html/reference/sharding.html +16 -34
 - data/vendor/local/share/doc/groonga/ja/html/reference/suggest.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/suggest/completion.html +34 -70
 - data/vendor/local/share/doc/groonga/ja/html/reference/suggest/correction.html +22 -46
 - data/vendor/local/share/doc/groonga/ja/html/reference/suggest/introduction.html +16 -34
 - data/vendor/local/share/doc/groonga/ja/html/reference/suggest/suggestion.html +19 -40
 - data/vendor/local/share/doc/groonga/ja/html/reference/tables.html +31 -64
 - data/vendor/local/share/doc/groonga/ja/html/reference/token_filters.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers.html +42 -77
 - data/vendor/local/share/doc/groonga/ja/html/reference/tuning.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/types.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/reference/window_function.html +16 -26
 - data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/record_number.html +14 -25
 - data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/window_count.html +153 -0
 - data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/window_record_number.html +28 -39
 - data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/window_sum.html +23 -34
 - data/vendor/local/share/doc/groonga/ja/html/search.html +9 -12
 - data/vendor/local/share/doc/groonga/ja/html/searchindex.js +1 -1
 - data/vendor/local/share/doc/groonga/ja/html/server.html +9 -19
 - data/vendor/local/share/doc/groonga/ja/html/server/gqtp.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/server/http.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/server/http/comparison.html +47 -95
 - data/vendor/local/share/doc/groonga/ja/html/server/http/groonga-httpd.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/server/http/groonga.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/server/memcached.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/server/package.html +26 -54
 - data/vendor/local/share/doc/groonga/ja/html/spec.html +9 -19
 - data/vendor/local/share/doc/groonga/ja/html/spec/gqtp.html +50 -93
 - data/vendor/local/share/doc/groonga/ja/html/spec/search.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/troubleshooting.html +18 -24
 - data/vendor/local/share/doc/groonga/ja/html/troubleshooting/different_results_with_the_same_keyword.html +14 -25
 - data/vendor/local/share/doc/groonga/ja/html/troubleshooting/how_to_analyze_error_message.html +186 -0
 - data/vendor/local/share/doc/groonga/ja/html/troubleshooting/mmap_cannot_allocate_memory.html +22 -35
 - data/vendor/local/share/doc/groonga/ja/html/tutorial.html +9 -19
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/data.html +12 -25
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/drilldown.html +15 -29
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/index.html +10 -21
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/introduction.html +29 -47
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/lexicon.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/match_columns.html +177 -28
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/micro_blog.html +34 -129
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/network.html +16 -26
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/patricia_trie.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/query_expansion.html +9 -20
 - data/vendor/local/share/doc/groonga/ja/html/tutorial/search.html +18 -33
 - data/vendor/local/share/doc/pcre/AUTHORS +3 -3
 - data/vendor/local/share/doc/pcre/ChangeLog +94 -0
 - data/vendor/local/share/doc/pcre/LICENCE +3 -3
 - data/vendor/local/share/doc/pcre/NEWS +12 -0
 - data/vendor/local/share/doc/pcre/html/pcrecompat.html +1 -1
 - data/vendor/local/share/doc/pcre/html/pcrejit.html +52 -5
 - data/vendor/local/share/doc/pcre/html/pcrepattern.html +20 -17
 - data/vendor/local/share/doc/pcre/html/pcretest.html +7 -2
 - data/vendor/local/share/doc/pcre/pcre.txt +1103 -1055
 - data/vendor/local/share/doc/pcre/pcretest.txt +6 -2
 - data/vendor/local/share/groonga/groonga-log/README.md +44 -0
 - data/vendor/local/share/groonga/groonga-log/lgpl-2.1.txt +502 -0
 - data/vendor/local/share/groonga/mruby/LEGAL +4 -0
 - data/vendor/local/share/license/cv2pdb/LICENSE +201 -0
 - data/vendor/local/share/license/cv2pdb/README +138 -0
 - data/vendor/local/share/license/groonga-log/README.md +44 -0
 - data/vendor/local/share/license/groonga-log/lgpl-2.1.txt +502 -0
 - data/vendor/local/share/license/pcre/LICENCE +3 -3
 - data/vendor/local/share/man/man1/pcretest.1 +7 -3
 - data/vendor/local/share/man/man3/pcrecompat.3 +1 -1
 - data/vendor/local/share/man/man3/pcrejit.3 +48 -6
 - data/vendor/local/share/man/man3/pcrepattern.3 +20 -17
 - metadata +102 -559
 - data/lib/groonga/statistic-measurer.rb +0 -37
 - data/test/test-statistic-measurer.rb +0 -55
 - data/vendor/local/share/doc/groonga/en/html/_sources/characteristic.txt +0 -70
 - data/vendor/local/share/doc/groonga/en/html/_sources/client.txt +0 -19
 - data/vendor/local/share/doc/groonga/en/html/_sources/community.txt +0 -49
 - data/vendor/local/share/doc/groonga/en/html/_sources/contribution.txt +0 -26
 - data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development.txt +0 -14
 - data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/build.txt +0 -19
 - data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/build/unix_autotools.txt +0 -101
 - data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/build/unix_cmake.txt +0 -94
 - data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/build/windows_cmake.txt +0 -93
 - data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/com.txt +0 -20
 - data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/cooperation.txt +0 -75
 - data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/query.txt +0 -214
 - data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/release.txt +0 -790
 - data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/repository.txt +0 -16
 - data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/test.txt +0 -120
 - data/vendor/local/share/doc/groonga/en/html/_sources/contribution/documentation.txt +0 -18
 - data/vendor/local/share/doc/groonga/en/html/_sources/contribution/documentation/c-api.txt +0 -14
 - data/vendor/local/share/doc/groonga/en/html/_sources/contribution/documentation/i18n.txt +0 -200
 - data/vendor/local/share/doc/groonga/en/html/_sources/contribution/documentation/introduction.txt +0 -81
 - data/vendor/local/share/doc/groonga/en/html/_sources/contribution/report.txt +0 -27
 - data/vendor/local/share/doc/groonga/en/html/_sources/development.txt +0 -16
 - data/vendor/local/share/doc/groonga/en/html/_sources/development/travis-ci.txt +0 -66
 - data/vendor/local/share/doc/groonga/en/html/_sources/index.txt +0 -33
 - data/vendor/local/share/doc/groonga/en/html/_sources/install.txt +0 -28
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/centos.txt +0 -106
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/debian.txt +0 -107
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/fedora.txt +0 -97
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/mac_os_x.txt +0 -66
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/others.txt +0 -273
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/solaris.txt +0 -43
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/ubuntu.txt +0 -99
 - data/vendor/local/share/doc/groonga/en/html/_sources/install/windows.txt +0 -92
 - data/vendor/local/share/doc/groonga/en/html/_sources/limitations.txt +0 -58
 - data/vendor/local/share/doc/groonga/en/html/_sources/news.txt +0 -315
 - data/vendor/local/share/doc/groonga/en/html/_sources/news/0.x.txt +0 -126
 - data/vendor/local/share/doc/groonga/en/html/_sources/news/1.0.x.txt +0 -289
 - data/vendor/local/share/doc/groonga/en/html/_sources/news/1.1.x.txt +0 -31
 - data/vendor/local/share/doc/groonga/en/html/_sources/news/1.2.x.txt +0 -390
 - data/vendor/local/share/doc/groonga/en/html/_sources/news/1.3.x.txt +0 -52
 - data/vendor/local/share/doc/groonga/en/html/_sources/news/2.x.txt +0 -623
 - data/vendor/local/share/doc/groonga/en/html/_sources/news/3.x.txt +0 -539
 - data/vendor/local/share/doc/groonga/en/html/_sources/news/4.x.txt +0 -689
 - data/vendor/local/share/doc/groonga/en/html/_sources/news/5.x.txt +0 -1250
 - data/vendor/local/share/doc/groonga/en/html/_sources/news/6.x.txt +0 -1086
 - data/vendor/local/share/doc/groonga/en/html/_sources/news/senna.txt +0 -109
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference.txt +0 -35
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/alias.txt +0 -164
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api.txt +0 -18
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/global_configurations.txt +0 -49
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_cache.txt +0 -114
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_column.txt +0 -198
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_command_version.txt +0 -37
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_content_type.txt +0 -39
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_ctx.txt +0 -195
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_db.txt +0 -134
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_encoding.txt +0 -49
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_expr.txt +0 -136
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_geo.txt +0 -55
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_hook.txt +0 -67
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_ii.txt +0 -35
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_index_cursor.txt +0 -44
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_info.txt +0 -56
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_match_escalation.txt +0 -39
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_obj.txt +0 -269
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_proc.txt +0 -56
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_search.txt +0 -31
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_table.txt +0 -219
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_table_cursor.txt +0 -109
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_thread.txt +0 -122
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_type.txt +0 -31
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_user_data.txt +0 -29
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/overview.txt +0 -54
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/plugin.txt +0 -156
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/cast.txt +0 -8
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/column.txt +0 -34
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/columns/index.txt +0 -19
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/columns/pseudo.txt +0 -40
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/columns/scalar.txt +0 -19
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/columns/vector.txt +0 -332
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/command.txt +0 -23
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/command/command_version.txt +0 -75
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/command/output_format.txt +0 -228
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/command/pretty_print.txt +0 -45
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/command/request_id.txt +0 -41
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/command/request_timeout.txt +0 -78
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/command/return_code.txt +0 -117
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/cache_limit.txt +0 -87
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/check.txt +0 -161
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/clearlock.txt +0 -60
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_copy.txt +0 -381
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_create.txt +0 -800
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_list.txt +0 -209
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_remove.txt +0 -57
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_rename.txt +0 -101
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/config_delete.txt +0 -95
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/config_get.txt +0 -96
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/config_set.txt +0 -96
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/database_unmap.txt +0 -85
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/define_selector.txt +0 -110
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/defrag.txt +0 -55
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/delete.txt +0 -122
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/dump.txt +0 -202
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/io_flush.txt +0 -266
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/load.txt +0 -100
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/lock_acquire.txt +0 -102
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/lock_clear.txt +0 -90
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/lock_release.txt +0 -98
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/log_level.txt +0 -87
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/log_put.txt +0 -65
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/log_reopen.txt +0 -62
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_count.txt +0 -171
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_parameters.txt +0 -134
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_range_filter.txt +0 -195
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_select.txt +0 -1359
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_shard_list.txt +0 -103
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_table_remove.txt +0 -541
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/normalize.txt +0 -155
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/normalizer_list.txt +0 -64
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/object_exist.txt +0 -95
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/object_inspect.txt +0 -899
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/object_list.txt +0 -405
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/object_remove.txt +0 -140
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_register.txt +0 -64
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_unregister.txt +0 -63
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/query_expand.txt +0 -38
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/quit.txt +0 -38
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/range_filter.txt +0 -28
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/register.txt +0 -69
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/reindex.txt +0 -142
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/request_cancel.txt +0 -134
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_eval.txt +0 -71
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_load.txt +0 -71
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/schema.txt +0 -627
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/select.txt +0 -2776
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/shutdown.txt +0 -113
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/status.txt +0 -151
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/suggest.txt +0 -271
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_copy.txt +0 -64
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_create.txt +0 -380
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_list.txt +0 -81
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_remove.txt +0 -309
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_rename.txt +0 -90
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_tokenize.txt +0 -120
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/thread_limit.txt +0 -110
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/tokenize.txt +0 -248
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/tokenizer_list.txt +0 -63
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/truncate.txt +0 -95
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/configuration.txt +0 -50
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables.txt +0 -14
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/grndb.txt +0 -117
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/grnslap.txt +0 -68
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga-benchmark.txt +0 -287
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga-httpd.txt +0 -552
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga-server-http.txt +0 -57
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga-suggest-create-dataset.txt +0 -63
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga-suggest-httpd.txt +0 -470
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga-suggest-learner.txt +0 -94
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga.txt +0 -473
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/function.txt +0 -20
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/between.txt +0 -105
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/edit_distance.txt +0 -48
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/fuzzy_search.txt +0 -23
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/geo_distance.txt +0 -300
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/geo_in_circle.txt +0 -81
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/geo_in_rectangle.txt +0 -55
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_full.txt +0 -127
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_html.txt +0 -105
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/html_untag.txt +0 -80
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/in_records.txt +0 -195
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/in_values.txt +0 -82
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/now.txt +0 -36
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/number_classify.txt +0 -20
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/prefix_rk_search.txt +0 -158
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/query.txt +0 -254
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/rand.txt +0 -43
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/snippet_html.txt +0 -114
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/string_length.txt +0 -33
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/string_substring.txt +0 -27
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/sub_filter.txt +0 -137
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/time_classify_day.txt +0 -18
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/time_classify_hour.txt +0 -18
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/time_classify_minute.txt +0 -18
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/time_classify_month.txt +0 -20
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/time_classify_second.txt +0 -18
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/time_classify_week.txt +0 -18
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/time_classify_year.txt +0 -18
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/vector_new.txt +0 -38
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/vector_size.txt +0 -76
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/vector_slice.txt +0 -27
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr.txt +0 -59
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/query_syntax.txt +0 -652
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/script_syntax.txt +0 -1126
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/indexing.txt +0 -112
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/log.txt +0 -236
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/normalizers.txt +0 -133
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/operations.txt +0 -16
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/operations/geolocation_search.txt +0 -52
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/operations/prefix_rk_search.txt +0 -76
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/output.txt +0 -164
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/query_expanders.txt +0 -12
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/query_expanders/tsv.txt +0 -153
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/regular_expression.txt +0 -436
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorer.txt +0 -218
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorers/scorer_tf_at_most.txt +0 -136
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorers/scorer_tf_idf.txt +0 -157
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/sharding.txt +0 -104
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/suggest.txt +0 -17
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/suggest/completion.txt +0 -271
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/suggest/correction.txt +0 -148
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/suggest/introduction.txt +0 -96
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/suggest/suggestion.txt +0 -132
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/tables.txt +0 -216
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/token_filters.txt +0 -120
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/tokenizers.txt +0 -517
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/tuning.txt +0 -177
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/types.txt +0 -170
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/window_function.txt +0 -22
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/window_functions/record_number.txt +0 -28
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/window_functions/window_record_number.txt +0 -25
 - data/vendor/local/share/doc/groonga/en/html/_sources/reference/window_functions/window_sum.txt +0 -25
 - data/vendor/local/share/doc/groonga/en/html/_sources/server.txt +0 -15
 - data/vendor/local/share/doc/groonga/en/html/_sources/server/gqtp.txt +0 -48
 - data/vendor/local/share/doc/groonga/en/html/_sources/server/http.txt +0 -25
 - data/vendor/local/share/doc/groonga/en/html/_sources/server/http/comparison.txt +0 -298
 - data/vendor/local/share/doc/groonga/en/html/_sources/server/http/groonga-httpd.txt +0 -8
 - data/vendor/local/share/doc/groonga/en/html/_sources/server/http/groonga.txt +0 -8
 - data/vendor/local/share/doc/groonga/en/html/_sources/server/memcached.txt +0 -16
 - data/vendor/local/share/doc/groonga/en/html/_sources/server/package.txt +0 -209
 - data/vendor/local/share/doc/groonga/en/html/_sources/spec.txt +0 -13
 - data/vendor/local/share/doc/groonga/en/html/_sources/spec/gqtp.txt +0 -280
 - data/vendor/local/share/doc/groonga/en/html/_sources/spec/search.txt +0 -115
 - data/vendor/local/share/doc/groonga/en/html/_sources/troubleshooting.txt +0 -13
 - data/vendor/local/share/doc/groonga/en/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +0 -135
 - data/vendor/local/share/doc/groonga/en/html/_sources/troubleshooting/mmap_cannot_allocate_memory.txt +0 -45
 - data/vendor/local/share/doc/groonga/en/html/_sources/tutorial.txt +0 -22
 - data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/data.txt +0 -173
 - data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/drilldown.txt +0 -130
 - data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/index.txt +0 -123
 - data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/introduction.txt +0 -294
 - data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/lexicon.txt +0 -12
 - data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/match_columns.txt +0 -234
 - data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/micro_blog.txt +0 -539
 - data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/network.txt +0 -64
 - data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/patricia_trie.txt +0 -58
 - data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/query_expansion.txt +0 -69
 - data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/search.txt +0 -123
 - data/vendor/local/share/doc/groonga/ja/html/_sources/characteristic.txt +0 -70
 - data/vendor/local/share/doc/groonga/ja/html/_sources/client.txt +0 -19
 - data/vendor/local/share/doc/groonga/ja/html/_sources/community.txt +0 -49
 - data/vendor/local/share/doc/groonga/ja/html/_sources/contribution.txt +0 -26
 - data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development.txt +0 -14
 - data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/build.txt +0 -19
 - data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/build/unix_autotools.txt +0 -101
 - data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/build/unix_cmake.txt +0 -94
 - data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/build/windows_cmake.txt +0 -93
 - data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/com.txt +0 -20
 - data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/cooperation.txt +0 -75
 - data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/query.txt +0 -214
 - data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/release.txt +0 -790
 - data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/repository.txt +0 -16
 - data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/test.txt +0 -120
 - data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/documentation.txt +0 -18
 - data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/documentation/c-api.txt +0 -14
 - data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/documentation/i18n.txt +0 -200
 - data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/documentation/introduction.txt +0 -81
 - data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/report.txt +0 -27
 - data/vendor/local/share/doc/groonga/ja/html/_sources/development.txt +0 -16
 - data/vendor/local/share/doc/groonga/ja/html/_sources/development/travis-ci.txt +0 -66
 - data/vendor/local/share/doc/groonga/ja/html/_sources/index.txt +0 -33
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install.txt +0 -28
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/centos.txt +0 -106
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/debian.txt +0 -107
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/fedora.txt +0 -97
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/mac_os_x.txt +0 -66
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/others.txt +0 -273
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/solaris.txt +0 -43
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/ubuntu.txt +0 -99
 - data/vendor/local/share/doc/groonga/ja/html/_sources/install/windows.txt +0 -92
 - data/vendor/local/share/doc/groonga/ja/html/_sources/limitations.txt +0 -58
 - data/vendor/local/share/doc/groonga/ja/html/_sources/news.txt +0 -315
 - data/vendor/local/share/doc/groonga/ja/html/_sources/news/0.x.txt +0 -126
 - data/vendor/local/share/doc/groonga/ja/html/_sources/news/1.0.x.txt +0 -289
 - data/vendor/local/share/doc/groonga/ja/html/_sources/news/1.1.x.txt +0 -31
 - data/vendor/local/share/doc/groonga/ja/html/_sources/news/1.2.x.txt +0 -390
 - data/vendor/local/share/doc/groonga/ja/html/_sources/news/1.3.x.txt +0 -52
 - data/vendor/local/share/doc/groonga/ja/html/_sources/news/2.x.txt +0 -623
 - data/vendor/local/share/doc/groonga/ja/html/_sources/news/3.x.txt +0 -539
 - data/vendor/local/share/doc/groonga/ja/html/_sources/news/4.x.txt +0 -689
 - data/vendor/local/share/doc/groonga/ja/html/_sources/news/5.x.txt +0 -1250
 - data/vendor/local/share/doc/groonga/ja/html/_sources/news/6.x.txt +0 -1086
 - data/vendor/local/share/doc/groonga/ja/html/_sources/news/senna.txt +0 -109
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference.txt +0 -35
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/alias.txt +0 -164
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api.txt +0 -18
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/global_configurations.txt +0 -49
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_cache.txt +0 -114
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_column.txt +0 -198
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_command_version.txt +0 -37
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_content_type.txt +0 -39
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_ctx.txt +0 -195
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_db.txt +0 -134
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_encoding.txt +0 -49
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_expr.txt +0 -136
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_geo.txt +0 -55
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_hook.txt +0 -67
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_ii.txt +0 -35
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_index_cursor.txt +0 -44
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_info.txt +0 -56
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_match_escalation.txt +0 -39
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_obj.txt +0 -269
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_proc.txt +0 -56
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_search.txt +0 -31
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_table.txt +0 -219
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_table_cursor.txt +0 -109
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_thread.txt +0 -122
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_type.txt +0 -31
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_user_data.txt +0 -29
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/overview.txt +0 -54
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/plugin.txt +0 -156
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/cast.txt +0 -8
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/column.txt +0 -34
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/columns/index.txt +0 -19
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/columns/pseudo.txt +0 -40
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/columns/scalar.txt +0 -19
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/columns/vector.txt +0 -332
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/command.txt +0 -23
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/command/command_version.txt +0 -75
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/command/output_format.txt +0 -228
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/command/pretty_print.txt +0 -45
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/command/request_id.txt +0 -41
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/command/request_timeout.txt +0 -78
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/command/return_code.txt +0 -117
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/cache_limit.txt +0 -87
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/check.txt +0 -161
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/clearlock.txt +0 -60
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_copy.txt +0 -381
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_create.txt +0 -800
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_list.txt +0 -209
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_remove.txt +0 -57
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_rename.txt +0 -101
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/config_delete.txt +0 -95
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/config_get.txt +0 -96
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/config_set.txt +0 -96
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/database_unmap.txt +0 -85
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/define_selector.txt +0 -110
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/defrag.txt +0 -55
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/delete.txt +0 -122
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/dump.txt +0 -202
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/io_flush.txt +0 -266
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/load.txt +0 -100
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/lock_acquire.txt +0 -102
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/lock_clear.txt +0 -90
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/lock_release.txt +0 -98
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/log_level.txt +0 -87
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/log_put.txt +0 -65
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/log_reopen.txt +0 -62
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_count.txt +0 -171
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_parameters.txt +0 -134
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_range_filter.txt +0 -195
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_select.txt +0 -1359
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_shard_list.txt +0 -103
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_table_remove.txt +0 -541
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/normalize.txt +0 -155
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/normalizer_list.txt +0 -64
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/object_exist.txt +0 -95
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/object_inspect.txt +0 -899
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/object_list.txt +0 -405
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/object_remove.txt +0 -140
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_register.txt +0 -64
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_unregister.txt +0 -63
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/query_expand.txt +0 -38
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/quit.txt +0 -38
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/range_filter.txt +0 -28
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/register.txt +0 -69
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/reindex.txt +0 -142
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/request_cancel.txt +0 -134
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_eval.txt +0 -71
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_load.txt +0 -71
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/schema.txt +0 -627
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/select.txt +0 -2776
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/shutdown.txt +0 -113
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/status.txt +0 -151
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/suggest.txt +0 -271
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_copy.txt +0 -64
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_create.txt +0 -380
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_list.txt +0 -81
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_remove.txt +0 -309
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_rename.txt +0 -90
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_tokenize.txt +0 -120
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/thread_limit.txt +0 -110
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/tokenize.txt +0 -248
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/tokenizer_list.txt +0 -63
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/truncate.txt +0 -95
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/configuration.txt +0 -50
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables.txt +0 -14
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/grndb.txt +0 -117
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/grnslap.txt +0 -68
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga-benchmark.txt +0 -287
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga-httpd.txt +0 -552
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga-server-http.txt +0 -57
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga-suggest-create-dataset.txt +0 -63
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga-suggest-httpd.txt +0 -470
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga-suggest-learner.txt +0 -94
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga.txt +0 -473
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/function.txt +0 -20
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/between.txt +0 -105
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/edit_distance.txt +0 -48
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/fuzzy_search.txt +0 -23
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/geo_distance.txt +0 -300
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/geo_in_circle.txt +0 -81
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/geo_in_rectangle.txt +0 -55
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_full.txt +0 -127
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_html.txt +0 -105
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/html_untag.txt +0 -80
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/in_records.txt +0 -195
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/in_values.txt +0 -82
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/now.txt +0 -36
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/number_classify.txt +0 -20
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/prefix_rk_search.txt +0 -158
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/query.txt +0 -254
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/rand.txt +0 -43
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/snippet_html.txt +0 -114
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/string_length.txt +0 -33
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/string_substring.txt +0 -27
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/sub_filter.txt +0 -137
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/time_classify_day.txt +0 -18
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/time_classify_hour.txt +0 -18
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/time_classify_minute.txt +0 -18
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/time_classify_month.txt +0 -20
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/time_classify_second.txt +0 -18
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/time_classify_week.txt +0 -18
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/time_classify_year.txt +0 -18
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/vector_new.txt +0 -38
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/vector_size.txt +0 -76
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/vector_slice.txt +0 -27
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr.txt +0 -59
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/query_syntax.txt +0 -652
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/script_syntax.txt +0 -1126
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/indexing.txt +0 -112
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/log.txt +0 -236
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/normalizers.txt +0 -133
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/operations.txt +0 -16
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/operations/geolocation_search.txt +0 -52
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/operations/prefix_rk_search.txt +0 -76
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/output.txt +0 -164
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/query_expanders.txt +0 -12
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/query_expanders/tsv.txt +0 -153
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/regular_expression.txt +0 -436
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorer.txt +0 -218
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorers/scorer_tf_at_most.txt +0 -136
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorers/scorer_tf_idf.txt +0 -157
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/sharding.txt +0 -104
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/suggest.txt +0 -17
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/suggest/completion.txt +0 -271
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/suggest/correction.txt +0 -148
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/suggest/introduction.txt +0 -96
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/suggest/suggestion.txt +0 -132
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tables.txt +0 -216
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/token_filters.txt +0 -120
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tokenizers.txt +0 -517
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tuning.txt +0 -177
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/types.txt +0 -170
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/window_function.txt +0 -22
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/window_functions/record_number.txt +0 -28
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/window_functions/window_record_number.txt +0 -25
 - data/vendor/local/share/doc/groonga/ja/html/_sources/reference/window_functions/window_sum.txt +0 -25
 - data/vendor/local/share/doc/groonga/ja/html/_sources/server.txt +0 -15
 - data/vendor/local/share/doc/groonga/ja/html/_sources/server/gqtp.txt +0 -48
 - data/vendor/local/share/doc/groonga/ja/html/_sources/server/http.txt +0 -25
 - data/vendor/local/share/doc/groonga/ja/html/_sources/server/http/comparison.txt +0 -298
 - data/vendor/local/share/doc/groonga/ja/html/_sources/server/http/groonga-httpd.txt +0 -8
 - data/vendor/local/share/doc/groonga/ja/html/_sources/server/http/groonga.txt +0 -8
 - data/vendor/local/share/doc/groonga/ja/html/_sources/server/memcached.txt +0 -16
 - data/vendor/local/share/doc/groonga/ja/html/_sources/server/package.txt +0 -209
 - data/vendor/local/share/doc/groonga/ja/html/_sources/spec.txt +0 -13
 - data/vendor/local/share/doc/groonga/ja/html/_sources/spec/gqtp.txt +0 -280
 - data/vendor/local/share/doc/groonga/ja/html/_sources/spec/search.txt +0 -115
 - data/vendor/local/share/doc/groonga/ja/html/_sources/troubleshooting.txt +0 -13
 - data/vendor/local/share/doc/groonga/ja/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +0 -135
 - data/vendor/local/share/doc/groonga/ja/html/_sources/troubleshooting/mmap_cannot_allocate_memory.txt +0 -45
 - data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial.txt +0 -22
 - data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/data.txt +0 -173
 - data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/drilldown.txt +0 -130
 - data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/index.txt +0 -123
 - data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/introduction.txt +0 -294
 - data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/lexicon.txt +0 -12
 - data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/match_columns.txt +0 -234
 - data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/micro_blog.txt +0 -539
 - data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/network.txt +0 -64
 - data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/patricia_trie.txt +0 -58
 - data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/query_expansion.txt +0 -69
 - data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/search.txt +0 -123
 
| 
         @@ -358,24 +358,24 @@ When PCRE is compiled in EBCDIC mode, \a, \e, \f, \n, \r, and \t 
     | 
|
| 
       358 
358 
     | 
    
         
             
            generate the appropriate EBCDIC code values. The \c escape is processed
         
     | 
| 
       359 
359 
     | 
    
         
             
            as specified for Perl in the <b>perlebcdic</b> document. The only characters
         
     | 
| 
       360 
360 
     | 
    
         
             
            that are allowed after \c are A-Z, a-z, or one of @, [, \, ], ^, _, or ?. Any
         
     | 
| 
       361 
     | 
    
         
            -
            other character provokes a compile-time error. The sequence  
     | 
| 
       362 
     | 
    
         
            -
            character code 0; the letters (in either case) encode characters 1-26 
     | 
| 
       363 
     | 
    
         
            -
            to hex 1A); [, \, ], ^, and _ encode characters 27-31 (hex 1B to hex 
     | 
| 
       364 
     | 
    
         
            -
             
     | 
| 
      
 361 
     | 
    
         
            +
            other character provokes a compile-time error. The sequence \c@ encodes
         
     | 
| 
      
 362 
     | 
    
         
            +
            character code 0; after \c the letters (in either case) encode characters 1-26
         
     | 
| 
      
 363 
     | 
    
         
            +
            (hex 01 to hex 1A); [, \, ], ^, and _ encode characters 27-31 (hex 1B to hex
         
     | 
| 
      
 364 
     | 
    
         
            +
            1F), and \c? becomes either 255 (hex FF) or 95 (hex 5F).
         
     | 
| 
       365 
365 
     | 
    
         
             
            </P>
         
     | 
| 
       366 
366 
     | 
    
         
             
            <P>
         
     | 
| 
       367 
     | 
    
         
            -
            Thus, apart from  
     | 
| 
      
 367 
     | 
    
         
            +
            Thus, apart from \c?, these escapes generate the same character code values as
         
     | 
| 
       368 
368 
     | 
    
         
             
            they do in an ASCII environment, though the meanings of the values mostly
         
     | 
| 
       369 
     | 
    
         
            -
            differ. For example, \ 
     | 
| 
      
 369 
     | 
    
         
            +
            differ. For example, \cG always generates code value 7, which is BEL in ASCII
         
     | 
| 
       370 
370 
     | 
    
         
             
            but DEL in EBCDIC.
         
     | 
| 
       371 
371 
     | 
    
         
             
            </P>
         
     | 
| 
       372 
372 
     | 
    
         
             
            <P>
         
     | 
| 
       373 
     | 
    
         
            -
            The sequence  
     | 
| 
      
 373 
     | 
    
         
            +
            The sequence \c? generates DEL (127, hex 7F) in an ASCII environment, but
         
     | 
| 
       374 
374 
     | 
    
         
             
            because 127 is not a control character in EBCDIC, Perl makes it generate the
         
     | 
| 
       375 
375 
     | 
    
         
             
            APC character. Unfortunately, there are several variants of EBCDIC. In most of
         
     | 
| 
       376 
376 
     | 
    
         
             
            them the APC character has the value 255 (hex FF), but in the one Perl calls
         
     | 
| 
       377 
377 
     | 
    
         
             
            POSIX-BC its value is 95 (hex 5F). If certain other characters have POSIX-BC
         
     | 
| 
       378 
     | 
    
         
            -
            values, PCRE makes  
     | 
| 
      
 378 
     | 
    
         
            +
            values, PCRE makes \c? generate 95; otherwise it generates 255.
         
     | 
| 
       379 
379 
     | 
    
         
             
            </P>
         
     | 
| 
       380 
380 
     | 
    
         
             
            <P>
         
     | 
| 
       381 
381 
     | 
    
         
             
            After \0 up to two further octal digits are read. If there are fewer than two
         
     | 
| 
         @@ -1512,13 +1512,8 @@ J, U and X respectively. 
     | 
|
| 
       1512 
1512 
     | 
    
         
             
            <P>
         
     | 
| 
       1513 
1513 
     | 
    
         
             
            When one of these option changes occurs at top level (that is, not inside
         
     | 
| 
       1514 
1514 
     | 
    
         
             
            subpattern parentheses), the change applies to the remainder of the pattern
         
     | 
| 
       1515 
     | 
    
         
            -
            that follows.  
     | 
| 
       1516 
     | 
    
         
            -
             
     | 
| 
       1517 
     | 
    
         
            -
            extracted by the <b>pcre_fullinfo()</b> function).
         
     | 
| 
       1518 
     | 
    
         
            -
            </P>
         
     | 
| 
       1519 
     | 
    
         
            -
            <P>
         
     | 
| 
       1520 
     | 
    
         
            -
            An option change within a subpattern (see below for a description of
         
     | 
| 
       1521 
     | 
    
         
            -
            subpatterns) affects only that part of the subpattern that follows it, so
         
     | 
| 
      
 1515 
     | 
    
         
            +
            that follows. An option change within a subpattern (see below for a description
         
     | 
| 
      
 1516 
     | 
    
         
            +
            of subpatterns) affects only that part of the subpattern that follows it, so
         
     | 
| 
       1522 
1517 
     | 
    
         
             
            <pre>
         
     | 
| 
       1523 
1518 
     | 
    
         
             
              (a(?i)b)c
         
     | 
| 
       1524 
1519 
     | 
    
         
             
            </pre>
         
     | 
| 
         @@ -2160,6 +2155,14 @@ capturing is carried out only for positive assertions. (Perl sometimes, but not 
     | 
|
| 
       2160 
2155 
     | 
    
         
             
            always, does do capturing in negative assertions.)
         
     | 
| 
       2161 
2156 
     | 
    
         
             
            </P>
         
     | 
| 
       2162 
2157 
     | 
    
         
             
            <P>
         
     | 
| 
      
 2158 
     | 
    
         
            +
            WARNING: If a positive assertion containing one or more capturing subpatterns
         
     | 
| 
      
 2159 
     | 
    
         
            +
            succeeds, but failure to match later in the pattern causes backtracking over
         
     | 
| 
      
 2160 
     | 
    
         
            +
            this assertion, the captures within the assertion are reset only if no higher
         
     | 
| 
      
 2161 
     | 
    
         
            +
            numbered captures are already set. This is, unfortunately, a fundamental
         
     | 
| 
      
 2162 
     | 
    
         
            +
            limitation of the current implementation, and as PCRE1 is now in
         
     | 
| 
      
 2163 
     | 
    
         
            +
            maintenance-only status, it is unlikely ever to change.
         
     | 
| 
      
 2164 
     | 
    
         
            +
            </P>
         
     | 
| 
      
 2165 
     | 
    
         
            +
            <P>
         
     | 
| 
       2163 
2166 
     | 
    
         
             
            For compatibility with Perl, assertion subpatterns may be repeated; though
         
     | 
| 
       2164 
2167 
     | 
    
         
             
            it makes no sense to assert the same thing several times, the side effect of
         
     | 
| 
       2165 
2168 
     | 
    
         
             
            capturing parentheses may occasionally be useful. In practice, there only three
         
     | 
| 
         @@ -3264,9 +3267,9 @@ Cambridge CB2 3QH, England. 
     | 
|
| 
       3264 
3267 
     | 
    
         
             
            </P>
         
     | 
| 
       3265 
3268 
     | 
    
         
             
            <br><a name="SEC30" href="#TOC1">REVISION</a><br>
         
     | 
| 
       3266 
3269 
     | 
    
         
             
            <P>
         
     | 
| 
       3267 
     | 
    
         
            -
            Last updated:  
     | 
| 
      
 3270 
     | 
    
         
            +
            Last updated: 23 October 2016
         
     | 
| 
       3268 
3271 
     | 
    
         
             
            <br>
         
     | 
| 
       3269 
     | 
    
         
            -
            Copyright © 1997- 
     | 
| 
      
 3272 
     | 
    
         
            +
            Copyright © 1997-2016 University of Cambridge.
         
     | 
| 
       3270 
3273 
     | 
    
         
             
            <br>
         
     | 
| 
       3271 
3274 
     | 
    
         
             
            <p>
         
     | 
| 
       3272 
3275 
     | 
    
         
             
            Return to the <a href="index.html">PCRE index page</a>.
         
     | 
| 
         @@ -74,6 +74,11 @@ newline as data characters. However, in some Windows environments character 26 
     | 
|
| 
       74 
74 
     | 
    
         
             
            maximum portability, therefore, it is safest to use only ASCII characters in
         
     | 
| 
       75 
75 
     | 
    
         
             
            <b>pcretest</b> input files.
         
     | 
| 
       76 
76 
     | 
    
         
             
            </P>
         
     | 
| 
      
 77 
     | 
    
         
            +
            <P>
         
     | 
| 
      
 78 
     | 
    
         
            +
            The input is processed using using C's string functions, so must not
         
     | 
| 
      
 79 
     | 
    
         
            +
            contain binary zeroes, even though in Unix-like environments, <b>fgets()</b>
         
     | 
| 
      
 80 
     | 
    
         
            +
            treats any bytes other than newline as data characters.
         
     | 
| 
      
 81 
     | 
    
         
            +
            </P>
         
     | 
| 
       77 
82 
     | 
    
         
             
            <br><a name="SEC3" href="#TOC1">PCRE's 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
         
     | 
| 
       78 
83 
     | 
    
         
             
            <P>
         
     | 
| 
       79 
84 
     | 
    
         
             
            From release 8.30, two separate PCRE libraries can be built. The original one
         
     | 
| 
         @@ -1149,9 +1154,9 @@ Cambridge CB2 3QH, England. 
     | 
|
| 
       1149 
1154 
     | 
    
         
             
            </P>
         
     | 
| 
       1150 
1155 
     | 
    
         
             
            <br><a name="SEC17" href="#TOC1">REVISION</a><br>
         
     | 
| 
       1151 
1156 
     | 
    
         
             
            <P>
         
     | 
| 
       1152 
     | 
    
         
            -
            Last updated:  
     | 
| 
      
 1157 
     | 
    
         
            +
            Last updated: 23 February 2017
         
     | 
| 
       1153 
1158 
     | 
    
         
             
            <br>
         
     | 
| 
       1154 
     | 
    
         
            -
            Copyright © 1997- 
     | 
| 
      
 1159 
     | 
    
         
            +
            Copyright © 1997-2017 University of Cambridge.
         
     | 
| 
       1155 
1160 
     | 
    
         
             
            <br>
         
     | 
| 
       1156 
1161 
     | 
    
         
             
            <p>
         
     | 
| 
       1157 
1162 
     | 
    
         
             
            Return to the <a href="index.html">PCRE index page</a>.
         
     | 
| 
         @@ -4640,7 +4640,7 @@ DIFFERENCES BETWEEN PCRE AND PERL 
     | 
|
| 
       4640 
4640 
     | 
    
         
             
                   pattern names is not as general as Perl's. This is a consequence of the
         
     | 
| 
       4641 
4641 
     | 
    
         
             
                   fact the PCRE works internally just with numbers, using an external ta-
         
     | 
| 
       4642 
4642 
     | 
    
         
             
                   ble to translate between numbers and names. In  particular,  a  pattern
         
     | 
| 
       4643 
     | 
    
         
            -
                   such  as  (?|(?<a>A)|(?<b 
     | 
| 
      
 4643 
     | 
    
         
            +
                   such  as  (?|(?<a>A)|(?<b>B),  where the two capturing parentheses have
         
     | 
| 
       4644 
4644 
     | 
    
         
             
                   the same number but different names, is not supported,  and  causes  an
         
     | 
| 
       4645 
4645 
     | 
    
         
             
                   error  at compile time. If it were allowed, it would not be possible to
         
     | 
| 
       4646 
4646 
     | 
    
         
             
                   distinguish which parentheses matched, because both names map  to  cap-
         
     | 
| 
         @@ -5028,55 +5028,56 @@ BACKSLASH 
     | 
|
| 
       5028 
5028 
     | 
    
         
             
                   ate the appropriate EBCDIC code values. The \c escape is  processed  as
         
     | 
| 
       5029 
5029 
     | 
    
         
             
                   specified for Perl in the perlebcdic document. The only characters that
         
     | 
| 
       5030 
5030 
     | 
    
         
             
                   are allowed after \c are A-Z, a-z, or one of @, [, \, ], ^,  _,  or  ?.
         
     | 
| 
       5031 
     | 
    
         
            -
                   Any  other  character  provokes  a 
     | 
| 
       5032 
     | 
    
         
            -
                   encodes character code 0; the letters (in either case)  encode 
     | 
| 
       5033 
     | 
    
         
            -
                    
     | 
| 
       5034 
     | 
    
         
            -
                   (hex 1B to hex 1F), and  
     | 
| 
       5035 
     | 
    
         
            -
             
     | 
| 
       5036 
     | 
    
         
            -
             
     | 
| 
       5037 
     | 
    
         
            -
                    
     | 
| 
       5038 
     | 
    
         
            -
                   values  
     | 
| 
      
 5031 
     | 
    
         
            +
                   Any  other  character  provokes  a compile-time error. The sequence \c@
         
     | 
| 
      
 5032 
     | 
    
         
            +
                   encodes character code 0; after \c the letters (in either case)  encode
         
     | 
| 
      
 5033 
     | 
    
         
            +
                   characters 1-26 (hex 01 to hex 1A); [, \, ], ^, and _ encode characters
         
     | 
| 
      
 5034 
     | 
    
         
            +
                   27-31 (hex 1B to hex 1F), and \c? becomes either 255  (hex  FF)  or  95
         
     | 
| 
      
 5035 
     | 
    
         
            +
                   (hex 5F).
         
     | 
| 
      
 5036 
     | 
    
         
            +
             
     | 
| 
      
 5037 
     | 
    
         
            +
                   Thus,  apart  from  \c?, these escapes generate the same character code
         
     | 
| 
      
 5038 
     | 
    
         
            +
                   values as they do in an ASCII environment, though the meanings  of  the
         
     | 
| 
      
 5039 
     | 
    
         
            +
                   values  mostly  differ. For example, \cG always generates code value 7,
         
     | 
| 
       5039 
5040 
     | 
    
         
             
                   which is BEL in ASCII but DEL in EBCDIC.
         
     | 
| 
       5040 
5041 
     | 
    
         | 
| 
       5041 
     | 
    
         
            -
                   The 
     | 
| 
       5042 
     | 
    
         
            -
                   but 
     | 
| 
       5043 
     | 
    
         
            -
                   generate 
     | 
| 
       5044 
     | 
    
         
            -
                   of 
     | 
| 
       5045 
     | 
    
         
            -
                   FF), 
     | 
| 
       5046 
     | 
    
         
            -
                   certain 
     | 
| 
      
 5042 
     | 
    
         
            +
                   The sequence \c? generates DEL (127, hex 7F) in an  ASCII  environment,
         
     | 
| 
      
 5043 
     | 
    
         
            +
                   but  because  127  is  not a control character in EBCDIC, Perl makes it
         
     | 
| 
      
 5044 
     | 
    
         
            +
                   generate the APC character. Unfortunately, there are  several  variants
         
     | 
| 
      
 5045 
     | 
    
         
            +
                   of  EBCDIC.  In  most  of them the APC character has the value 255 (hex
         
     | 
| 
      
 5046 
     | 
    
         
            +
                   FF), but in the one Perl calls POSIX-BC its value is 95  (hex  5F).  If
         
     | 
| 
      
 5047 
     | 
    
         
            +
                   certain  other characters have POSIX-BC values, PCRE makes \c? generate
         
     | 
| 
       5047 
5048 
     | 
    
         
             
                   95; otherwise it generates 255.
         
     | 
| 
       5048 
5049 
     | 
    
         | 
| 
       5049 
     | 
    
         
            -
                   After 
     | 
| 
       5050 
     | 
    
         
            -
                   than 
     | 
| 
      
 5050 
     | 
    
         
            +
                   After \0 up to two further octal digits are read. If  there  are  fewer
         
     | 
| 
      
 5051 
     | 
    
         
            +
                   than  two  digits,  just  those  that  are  present  are used. Thus the
         
     | 
| 
       5051 
5052 
     | 
    
         
             
                   sequence \0\x\015 specifies two binary zeros followed by a CR character
         
     | 
| 
       5052 
5053 
     | 
    
         
             
                   (code value 13). Make sure you supply two digits after the initial zero
         
     | 
| 
       5053 
5054 
     | 
    
         
             
                   if the pattern character that follows is itself an octal digit.
         
     | 
| 
       5054 
5055 
     | 
    
         | 
| 
       5055 
     | 
    
         
            -
                   The 
     | 
| 
       5056 
     | 
    
         
            -
                   in 
     | 
| 
       5057 
     | 
    
         
            -
                   recent 
     | 
| 
       5058 
     | 
    
         
            -
                   points 
     | 
| 
      
 5056 
     | 
    
         
            +
                   The escape \o must be followed by a sequence of octal digits,  enclosed
         
     | 
| 
      
 5057 
     | 
    
         
            +
                   in  braces.  An  error occurs if this is not the case. This escape is a
         
     | 
| 
      
 5058 
     | 
    
         
            +
                   recent addition to Perl; it provides way of specifying  character  code
         
     | 
| 
      
 5059 
     | 
    
         
            +
                   points  as  octal  numbers  greater than 0777, and it also allows octal
         
     | 
| 
       5059 
5060 
     | 
    
         
             
                   numbers and back references to be unambiguously specified.
         
     | 
| 
       5060 
5061 
     | 
    
         | 
| 
       5061 
5062 
     | 
    
         
             
                   For greater clarity and unambiguity, it is best to avoid following \ by
         
     | 
| 
       5062 
5063 
     | 
    
         
             
                   a digit greater than zero. Instead, use \o{} or \x{} to specify charac-
         
     | 
| 
       5063 
     | 
    
         
            -
                   ter 
     | 
| 
      
 5064 
     | 
    
         
            +
                   ter numbers, and \g{} to specify back references. The  following  para-
         
     | 
| 
       5064 
5065 
     | 
    
         
             
                   graphs describe the old, ambiguous syntax.
         
     | 
| 
       5065 
5066 
     | 
    
         | 
| 
       5066 
5067 
     | 
    
         
             
                   The handling of a backslash followed by a digit other than 0 is compli-
         
     | 
| 
       5067 
     | 
    
         
            -
                   cated, 
     | 
| 
      
 5068 
     | 
    
         
            +
                   cated, and Perl has changed in recent releases, causing  PCRE  also  to
         
     | 
| 
       5068 
5069 
     | 
    
         
             
                   change. Outside a character class, PCRE reads the digit and any follow-
         
     | 
| 
       5069 
     | 
    
         
            -
                   ing 
     | 
| 
       5070 
     | 
    
         
            -
                   there 
     | 
| 
       5071 
     | 
    
         
            -
                   in 
     | 
| 
       5072 
     | 
    
         
            -
                   description 
     | 
| 
      
 5070 
     | 
    
         
            +
                   ing digits as a decimal number. If the number is less  than  8,  or  if
         
     | 
| 
      
 5071 
     | 
    
         
            +
                   there  have been at least that many previous capturing left parentheses
         
     | 
| 
      
 5072 
     | 
    
         
            +
                   in the expression, the entire sequence is taken as a back reference.  A
         
     | 
| 
      
 5073 
     | 
    
         
            +
                   description  of how this works is given later, following the discussion
         
     | 
| 
       5073 
5074 
     | 
    
         
             
                   of parenthesized subpatterns.
         
     | 
| 
       5074 
5075 
     | 
    
         | 
| 
       5075 
     | 
    
         
            -
                   Inside 
     | 
| 
      
 5076 
     | 
    
         
            +
                   Inside a character class, or if  the  decimal  number  following  \  is
         
     | 
| 
       5076 
5077 
     | 
    
         
             
                   greater than 7 and there have not been that many capturing subpatterns,
         
     | 
| 
       5077 
     | 
    
         
            -
                   PCRE 
     | 
| 
      
 5078 
     | 
    
         
            +
                   PCRE handles \8 and \9 as the literal characters "8" and "9", and  oth-
         
     | 
| 
       5078 
5079 
     | 
    
         
             
                   erwise re-reads up to three octal digits following the backslash, using
         
     | 
| 
       5079 
     | 
    
         
            -
                   them 
     | 
| 
      
 5080 
     | 
    
         
            +
                   them to generate a data character.  Any  subsequent  digits  stand  for
         
     | 
| 
       5080 
5081 
     | 
    
         
             
                   themselves. For example:
         
     | 
| 
       5081 
5082 
     | 
    
         | 
| 
       5082 
5083 
     | 
    
         
             
                     \040   is another way of writing an ASCII space
         
     | 
| 
         @@ -5094,31 +5095,31 @@ BACKSLASH 
     | 
|
| 
       5094 
5095 
     | 
    
         
             
                     \81    is either a back reference, or the two
         
     | 
| 
       5095 
5096 
     | 
    
         
             
                               characters "8" and "1"
         
     | 
| 
       5096 
5097 
     | 
    
         | 
| 
       5097 
     | 
    
         
            -
                   Note 
     | 
| 
       5098 
     | 
    
         
            -
                   syntax 
     | 
| 
      
 5098 
     | 
    
         
            +
                   Note  that octal values of 100 or greater that are specified using this
         
     | 
| 
      
 5099 
     | 
    
         
            +
                   syntax must not be introduced by a leading zero, because no  more  than
         
     | 
| 
       5099 
5100 
     | 
    
         
             
                   three octal digits are ever read.
         
     | 
| 
       5100 
5101 
     | 
    
         | 
| 
       5101 
     | 
    
         
            -
                   By 
     | 
| 
       5102 
     | 
    
         
            -
                   decimal 
     | 
| 
      
 5102 
     | 
    
         
            +
                   By  default, after \x that is not followed by {, from zero to two hexa-
         
     | 
| 
      
 5103 
     | 
    
         
            +
                   decimal digits are read (letters can be in upper or  lower  case).  Any
         
     | 
| 
       5103 
5104 
     | 
    
         
             
                   number of hexadecimal digits may appear between \x{ and }. If a charac-
         
     | 
| 
       5104 
     | 
    
         
            -
                   ter 
     | 
| 
      
 5105 
     | 
    
         
            +
                   ter other than a hexadecimal digit appears between \x{  and  },  or  if
         
     | 
| 
       5105 
5106 
     | 
    
         
             
                   there is no terminating }, an error occurs.
         
     | 
| 
       5106 
5107 
     | 
    
         | 
| 
       5107 
     | 
    
         
            -
                   If 
     | 
| 
       5108 
     | 
    
         
            -
                   is 
     | 
| 
       5109 
     | 
    
         
            -
                   its. 
     | 
| 
      
 5108 
     | 
    
         
            +
                   If  the  PCRE_JAVASCRIPT_COMPAT option is set, the interpretation of \x
         
     | 
| 
      
 5109 
     | 
    
         
            +
                   is as just described only when it is followed by two  hexadecimal  dig-
         
     | 
| 
      
 5110 
     | 
    
         
            +
                   its.   Otherwise,  it  matches  a  literal "x" character. In JavaScript
         
     | 
| 
       5110 
5111 
     | 
    
         
             
                   mode, support for code points greater than 256 is provided by \u, which
         
     | 
| 
       5111 
     | 
    
         
            -
                   must 
     | 
| 
      
 5112 
     | 
    
         
            +
                   must  be  followed  by  four hexadecimal digits; otherwise it matches a
         
     | 
| 
       5112 
5113 
     | 
    
         
             
                   literal "u" character.
         
     | 
| 
       5113 
5114 
     | 
    
         | 
| 
       5114 
5115 
     | 
    
         
             
                   Characters whose value is less than 256 can be defined by either of the
         
     | 
| 
       5115 
     | 
    
         
            -
                   two 
     | 
| 
      
 5116 
     | 
    
         
            +
                   two  syntaxes for \x (or by \u in JavaScript mode). There is no differ-
         
     | 
| 
       5116 
5117 
     | 
    
         
             
                   ence in the way they are handled. For example, \xdc is exactly the same
         
     | 
| 
       5117 
5118 
     | 
    
         
             
                   as \x{dc} (or \u00dc in JavaScript mode).
         
     | 
| 
       5118 
5119 
     | 
    
         | 
| 
       5119 
5120 
     | 
    
         
             
               Constraints on character values
         
     | 
| 
       5120 
5121 
     | 
    
         | 
| 
       5121 
     | 
    
         
            -
                   Characters 
     | 
| 
      
 5122 
     | 
    
         
            +
                   Characters  that  are  specified using octal or hexadecimal numbers are
         
     | 
| 
       5122 
5123 
     | 
    
         
             
                   limited to certain values, as follows:
         
     | 
| 
       5123 
5124 
     | 
    
         | 
| 
       5124 
5125 
     | 
    
         
             
                     8-bit non-UTF mode    less than 0x100
         
     | 
| 
         @@ -5128,44 +5129,44 @@ BACKSLASH 
     | 
|
| 
       5128 
5129 
     | 
    
         
             
                     32-bit non-UTF mode   less than 0x100000000
         
     | 
| 
       5129 
5130 
     | 
    
         
             
                     32-bit UTF-32 mode    less than 0x10ffff and a valid codepoint
         
     | 
| 
       5130 
5131 
     | 
    
         | 
| 
       5131 
     | 
    
         
            -
                   Invalid 
     | 
| 
      
 5132 
     | 
    
         
            +
                   Invalid Unicode codepoints are the range  0xd800  to  0xdfff  (the  so-
         
     | 
| 
       5132 
5133 
     | 
    
         
             
                   called "surrogate" codepoints), and 0xffef.
         
     | 
| 
       5133 
5134 
     | 
    
         | 
| 
       5134 
5135 
     | 
    
         
             
               Escape sequences in character classes
         
     | 
| 
       5135 
5136 
     | 
    
         | 
| 
       5136 
5137 
     | 
    
         
             
                   All the sequences that define a single character value can be used both
         
     | 
| 
       5137 
     | 
    
         
            -
                   inside 
     | 
| 
      
 5138 
     | 
    
         
            +
                   inside and outside character classes. In addition, inside  a  character
         
     | 
| 
       5138 
5139 
     | 
    
         
             
                   class, \b is interpreted as the backspace character (hex 08).
         
     | 
| 
       5139 
5140 
     | 
    
         | 
| 
       5140 
     | 
    
         
            -
                   \N 
     | 
| 
       5141 
     | 
    
         
            -
                   inside 
     | 
| 
       5142 
     | 
    
         
            -
                   they 
     | 
| 
       5143 
     | 
    
         
            -
                   default, 
     | 
| 
      
 5141 
     | 
    
         
            +
                   \N  is not allowed in a character class. \B, \R, and \X are not special
         
     | 
| 
      
 5142 
     | 
    
         
            +
                   inside a character class. Like  other  unrecognized  escape  sequences,
         
     | 
| 
      
 5143 
     | 
    
         
            +
                   they  are  treated  as  the  literal  characters  "B",  "R", and "X" by
         
     | 
| 
      
 5144 
     | 
    
         
            +
                   default, but cause an error if the PCRE_EXTRA option is set. Outside  a
         
     | 
| 
       5144 
5145 
     | 
    
         
             
                   character class, these sequences have different meanings.
         
     | 
| 
       5145 
5146 
     | 
    
         | 
| 
       5146 
5147 
     | 
    
         
             
               Unsupported escape sequences
         
     | 
| 
       5147 
5148 
     | 
    
         | 
| 
       5148 
     | 
    
         
            -
                   In 
     | 
| 
       5149 
     | 
    
         
            -
                   handler 
     | 
| 
       5150 
     | 
    
         
            -
                   default, 
     | 
| 
       5151 
     | 
    
         
            -
                   PCRE_JAVASCRIPT_COMPAT 
     | 
| 
      
 5149 
     | 
    
         
            +
                   In  Perl, the sequences \l, \L, \u, and \U are recognized by its string
         
     | 
| 
      
 5150 
     | 
    
         
            +
                   handler and used  to  modify  the  case  of  following  characters.  By
         
     | 
| 
      
 5151 
     | 
    
         
            +
                   default,  PCRE does not support these escape sequences. However, if the
         
     | 
| 
      
 5152 
     | 
    
         
            +
                   PCRE_JAVASCRIPT_COMPAT option is set, \U matches a "U"  character,  and
         
     | 
| 
       5152 
5153 
     | 
    
         
             
                   \u can be used to define a character by code point, as described in the
         
     | 
| 
       5153 
5154 
     | 
    
         
             
                   previous section.
         
     | 
| 
       5154 
5155 
     | 
    
         | 
| 
       5155 
5156 
     | 
    
         
             
               Absolute and relative back references
         
     | 
| 
       5156 
5157 
     | 
    
         | 
| 
       5157 
     | 
    
         
            -
                   The 
     | 
| 
       5158 
     | 
    
         
            -
                   ally 
     | 
| 
      
 5158 
     | 
    
         
            +
                   The sequence \g followed by an unsigned or a negative  number,  option-
         
     | 
| 
      
 5159 
     | 
    
         
            +
                   ally  enclosed  in braces, is an absolute or relative back reference. A
         
     | 
| 
       5159 
5160 
     | 
    
         
             
                   named back reference can be coded as \g{name}. Back references are dis-
         
     | 
| 
       5160 
5161 
     | 
    
         
             
                   cussed later, following the discussion of parenthesized subpatterns.
         
     | 
| 
       5161 
5162 
     | 
    
         | 
| 
       5162 
5163 
     | 
    
         
             
               Absolute and relative subroutine calls
         
     | 
| 
       5163 
5164 
     | 
    
         | 
| 
       5164 
     | 
    
         
            -
                   For 
     | 
| 
      
 5165 
     | 
    
         
            +
                   For  compatibility with Oniguruma, the non-Perl syntax \g followed by a
         
     | 
| 
       5165 
5166 
     | 
    
         
             
                   name or a number enclosed either in angle brackets or single quotes, is
         
     | 
| 
       5166 
     | 
    
         
            -
                   an 
     | 
| 
       5167 
     | 
    
         
            -
                   Details 
     | 
| 
       5168 
     | 
    
         
            -
                   \g<...> 
     | 
| 
      
 5167 
     | 
    
         
            +
                   an  alternative  syntax for referencing a subpattern as a "subroutine".
         
     | 
| 
      
 5168 
     | 
    
         
            +
                   Details are discussed later.   Note  that  \g{...}  (Perl  syntax)  and
         
     | 
| 
      
 5169 
     | 
    
         
            +
                   \g<...>  (Oniguruma  syntax)  are  not synonymous. The former is a back
         
     | 
| 
       5169 
5170 
     | 
    
         
             
                   reference; the latter is a subroutine call.
         
     | 
| 
       5170 
5171 
     | 
    
         | 
| 
       5171 
5172 
     | 
    
         
             
               Generic character types
         
     | 
| 
         @@ -5184,59 +5185,59 @@ BACKSLASH 
     | 
|
| 
       5184 
5185 
     | 
    
         
             
                     \W     any "non-word" character
         
     | 
| 
       5185 
5186 
     | 
    
         | 
| 
       5186 
5187 
     | 
    
         
             
                   There is also the single sequence \N, which matches a non-newline char-
         
     | 
| 
       5187 
     | 
    
         
            -
                   acter.   
     | 
| 
       5188 
     | 
    
         
            -
                   not 
     | 
| 
      
 5188 
     | 
    
         
            +
                   acter.   This  is the same as the "." metacharacter when PCRE_DOTALL is
         
     | 
| 
      
 5189 
     | 
    
         
            +
                   not set. Perl also uses \N to match characters by name; PCRE  does  not
         
     | 
| 
       5189 
5190 
     | 
    
         
             
                   support this.
         
     | 
| 
       5190 
5191 
     | 
    
         | 
| 
       5191 
     | 
    
         
            -
                   Each 
     | 
| 
       5192 
     | 
    
         
            -
                   plete 
     | 
| 
       5193 
     | 
    
         
            -
                   matches 
     | 
| 
       5194 
     | 
    
         
            -
                   inside 
     | 
| 
       5195 
     | 
    
         
            -
                   the 
     | 
| 
       5196 
     | 
    
         
            -
                   the 
     | 
| 
      
 5192 
     | 
    
         
            +
                   Each  pair of lower and upper case escape sequences partitions the com-
         
     | 
| 
      
 5193 
     | 
    
         
            +
                   plete set of characters into two disjoint  sets.  Any  given  character
         
     | 
| 
      
 5194 
     | 
    
         
            +
                   matches  one, and only one, of each pair. The sequences can appear both
         
     | 
| 
      
 5195 
     | 
    
         
            +
                   inside and outside character classes. They each match one character  of
         
     | 
| 
      
 5196 
     | 
    
         
            +
                   the  appropriate  type.  If the current matching point is at the end of
         
     | 
| 
      
 5197 
     | 
    
         
            +
                   the subject string, all of them fail, because there is no character  to
         
     | 
| 
       5197 
5198 
     | 
    
         
             
                   match.
         
     | 
| 
       5198 
5199 
     | 
    
         | 
| 
       5199 
     | 
    
         
            -
                   For 
     | 
| 
       5200 
     | 
    
         
            -
                   (code 
     | 
| 
       5201 
     | 
    
         
            -
                   However, 
     | 
| 
       5202 
     | 
    
         
            -
                   release 
     | 
| 
       5203 
     | 
    
         
            -
                   (11), 
     | 
| 
      
 5200 
     | 
    
         
            +
                   For  compatibility with Perl, \s did not used to match the VT character
         
     | 
| 
      
 5201 
     | 
    
         
            +
                   (code 11), which made it different from the the  POSIX  "space"  class.
         
     | 
| 
      
 5202 
     | 
    
         
            +
                   However,  Perl  added  VT  at  release  5.18, and PCRE followed suit at
         
     | 
| 
      
 5203 
     | 
    
         
            +
                   release 8.34. The default \s characters are now HT  (9),  LF  (10),  VT
         
     | 
| 
      
 5204 
     | 
    
         
            +
                   (11),  FF  (12),  CR  (13),  and space (32), which are defined as white
         
     | 
| 
       5204 
5205 
     | 
    
         
             
                   space in the "C" locale. This list may vary if locale-specific matching
         
     | 
| 
       5205 
     | 
    
         
            -
                   is 
     | 
| 
       5206 
     | 
    
         
            -
                   character 
     | 
| 
      
 5206 
     | 
    
         
            +
                   is  taking place. For example, in some locales the "non-breaking space"
         
     | 
| 
      
 5207 
     | 
    
         
            +
                   character (\xA0) is recognized as white space, and  in  others  the  VT
         
     | 
| 
       5207 
5208 
     | 
    
         
             
                   character is not.
         
     | 
| 
       5208 
5209 
     | 
    
         | 
| 
       5209 
     | 
    
         
            -
                   A 
     | 
| 
       5210 
     | 
    
         
            -
                   or 
     | 
| 
       5211 
     | 
    
         
            -
                   trolled 
     | 
| 
       5212 
     | 
    
         
            -
                   specific 
     | 
| 
       5213 
     | 
    
         
            -
                   page). 
     | 
| 
       5214 
     | 
    
         
            -
                   systems, 
     | 
| 
       5215 
     | 
    
         
            -
                   are 
     | 
| 
      
 5210 
     | 
    
         
            +
                   A  "word"  character is an underscore or any character that is a letter
         
     | 
| 
      
 5211 
     | 
    
         
            +
                   or digit.  By default, the definition of letters  and  digits  is  con-
         
     | 
| 
      
 5212 
     | 
    
         
            +
                   trolled  by PCRE's low-valued character tables, and may vary if locale-
         
     | 
| 
      
 5213 
     | 
    
         
            +
                   specific matching is taking place (see "Locale support" in the  pcreapi
         
     | 
| 
      
 5214 
     | 
    
         
            +
                   page).  For  example,  in  a French locale such as "fr_FR" in Unix-like
         
     | 
| 
      
 5215 
     | 
    
         
            +
                   systems, or "french" in Windows, some character codes greater than  127
         
     | 
| 
      
 5216 
     | 
    
         
            +
                   are  used  for  accented letters, and these are then matched by \w. The
         
     | 
| 
       5216 
5217 
     | 
    
         
             
                   use of locales with Unicode is discouraged.
         
     | 
| 
       5217 
5218 
     | 
    
         | 
| 
       5218 
     | 
    
         
            -
                   By 
     | 
| 
      
 5219 
     | 
    
         
            +
                   By default, characters whose code points are  greater  than  127  never
         
     | 
| 
       5219 
5220 
     | 
    
         
             
                   match \d, \s, or \w, and always match \D, \S, and \W, although this may
         
     | 
| 
       5220 
     | 
    
         
            -
                   vary 
     | 
| 
       5221 
     | 
    
         
            -
                   is 
     | 
| 
       5222 
     | 
    
         
            -
                   from 
     | 
| 
       5223 
     | 
    
         
            -
                   sons. 
     | 
| 
       5224 
     | 
    
         
            -
                   PCRE_UCP 
     | 
| 
      
 5221 
     | 
    
         
            +
                   vary for characters in the range 128-255 when locale-specific  matching
         
     | 
| 
      
 5222 
     | 
    
         
            +
                   is  happening.   These  escape sequences retain their original meanings
         
     | 
| 
      
 5223 
     | 
    
         
            +
                   from before Unicode support was available, mainly for  efficiency  rea-
         
     | 
| 
      
 5224 
     | 
    
         
            +
                   sons.  If  PCRE  is  compiled  with  Unicode  property support, and the
         
     | 
| 
      
 5225 
     | 
    
         
            +
                   PCRE_UCP option is set, the behaviour is changed so that Unicode  prop-
         
     | 
| 
       5225 
5226 
     | 
    
         
             
                   erties are used to determine character types, as follows:
         
     | 
| 
       5226 
5227 
     | 
    
         | 
| 
       5227 
5228 
     | 
    
         
             
                     \d  any character that matches \p{Nd} (decimal digit)
         
     | 
| 
       5228 
5229 
     | 
    
         
             
                     \s  any character that matches \p{Z} or \h or \v
         
     | 
| 
       5229 
5230 
     | 
    
         
             
                     \w  any character that matches \p{L} or \p{N}, plus underscore
         
     | 
| 
       5230 
5231 
     | 
    
         | 
| 
       5231 
     | 
    
         
            -
                   The 
     | 
| 
       5232 
     | 
    
         
            -
                   \d 
     | 
| 
       5233 
     | 
    
         
            -
                   as 
     | 
| 
       5234 
     | 
    
         
            -
                   affects 
     | 
| 
      
 5232 
     | 
    
         
            +
                   The  upper case escapes match the inverse sets of characters. Note that
         
     | 
| 
      
 5233 
     | 
    
         
            +
                   \d matches only decimal digits, whereas \w matches any  Unicode  digit,
         
     | 
| 
      
 5234 
     | 
    
         
            +
                   as  well as any Unicode letter, and underscore. Note also that PCRE_UCP
         
     | 
| 
      
 5235 
     | 
    
         
            +
                   affects \b, and \B because they are defined in  terms  of  \w  and  \W.
         
     | 
| 
       5235 
5236 
     | 
    
         
             
                   Matching these sequences is noticeably slower when PCRE_UCP is set.
         
     | 
| 
       5236 
5237 
     | 
    
         | 
| 
       5237 
     | 
    
         
            -
                   The 
     | 
| 
       5238 
     | 
    
         
            -
                   at 
     | 
| 
       5239 
     | 
    
         
            -
                   ASCII 
     | 
| 
      
 5238 
     | 
    
         
            +
                   The  sequences  \h, \H, \v, and \V are features that were added to Perl
         
     | 
| 
      
 5239 
     | 
    
         
            +
                   at release 5.10. In contrast to the other sequences, which  match  only
         
     | 
| 
      
 5240 
     | 
    
         
            +
                   ASCII  characters  by  default,  these always match certain high-valued
         
     | 
| 
       5240 
5241 
     | 
    
         
             
                   code points, whether or not PCRE_UCP is set. The horizontal space char-
         
     | 
| 
       5241 
5242 
     | 
    
         
             
                   acters are:
         
     | 
| 
       5242 
5243 
     | 
    
         | 
| 
         @@ -5275,110 +5276,110 @@ BACKSLASH 
     | 
|
| 
       5275 
5276 
     | 
    
         | 
| 
       5276 
5277 
     | 
    
         
             
               Newline sequences
         
     | 
| 
       5277 
5278 
     | 
    
         | 
| 
       5278 
     | 
    
         
            -
                   Outside 
     | 
| 
       5279 
     | 
    
         
            -
                   any 
     | 
| 
      
 5279 
     | 
    
         
            +
                   Outside a character class, by default, the escape sequence  \R  matches
         
     | 
| 
      
 5280 
     | 
    
         
            +
                   any  Unicode newline sequence. In 8-bit non-UTF-8 mode \R is equivalent
         
     | 
| 
       5280 
5281 
     | 
    
         
             
                   to the following:
         
     | 
| 
       5281 
5282 
     | 
    
         | 
| 
       5282 
5283 
     | 
    
         
             
                     (?>\r\n|\n|\x0b|\f|\r|\x85)
         
     | 
| 
       5283 
5284 
     | 
    
         | 
| 
       5284 
     | 
    
         
            -
                   This 
     | 
| 
      
 5285 
     | 
    
         
            +
                   This is an example of an "atomic group", details  of  which  are  given
         
     | 
| 
       5285 
5286 
     | 
    
         
             
                   below.  This particular group matches either the two-character sequence
         
     | 
| 
       5286 
     | 
    
         
            -
                   CR 
     | 
| 
       5287 
     | 
    
         
            -
                   U+000A), 
     | 
| 
       5288 
     | 
    
         
            -
                   riage 
     | 
| 
      
 5287 
     | 
    
         
            +
                   CR followed by LF, or  one  of  the  single  characters  LF  (linefeed,
         
     | 
| 
      
 5288 
     | 
    
         
            +
                   U+000A),  VT  (vertical  tab, U+000B), FF (form feed, U+000C), CR (car-
         
     | 
| 
      
 5289 
     | 
    
         
            +
                   riage return, U+000D), or NEL (next line,  U+0085).  The  two-character
         
     | 
| 
       5289 
5290 
     | 
    
         
             
                   sequence is treated as a single unit that cannot be split.
         
     | 
| 
       5290 
5291 
     | 
    
         | 
| 
       5291 
     | 
    
         
            -
                   In 
     | 
| 
      
 5292 
     | 
    
         
            +
                   In  other modes, two additional characters whose codepoints are greater
         
     | 
| 
       5292 
5293 
     | 
    
         
             
                   than 255 are added: LS (line separator, U+2028) and PS (paragraph sepa-
         
     | 
| 
       5293 
     | 
    
         
            -
                   rator, 
     | 
| 
      
 5294 
     | 
    
         
            +
                   rator,  U+2029).   Unicode character property support is not needed for
         
     | 
| 
       5294 
5295 
     | 
    
         
             
                   these characters to be recognized.
         
     | 
| 
       5295 
5296 
     | 
    
         | 
| 
       5296 
5297 
     | 
    
         
             
                   It is possible to restrict \R to match only CR, LF, or CRLF (instead of
         
     | 
| 
       5297 
     | 
    
         
            -
                   the 
     | 
| 
      
 5298 
     | 
    
         
            +
                   the  complete  set  of  Unicode  line  endings)  by  setting the option
         
     | 
| 
       5298 
5299 
     | 
    
         
             
                   PCRE_BSR_ANYCRLF either at compile time or when the pattern is matched.
         
     | 
| 
       5299 
5300 
     | 
    
         
             
                   (BSR is an abbrevation for "backslash R".) This can be made the default
         
     | 
| 
       5300 
     | 
    
         
            -
                   when 
     | 
| 
       5301 
     | 
    
         
            -
                   requested 
     | 
| 
       5302 
     | 
    
         
            -
                   specify 
     | 
| 
      
 5301 
     | 
    
         
            +
                   when PCRE is built; if this is the case, the  other  behaviour  can  be
         
     | 
| 
      
 5302 
     | 
    
         
            +
                   requested  via  the  PCRE_BSR_UNICODE  option.   It is also possible to
         
     | 
| 
      
 5303 
     | 
    
         
            +
                   specify these settings by starting a pattern string  with  one  of  the
         
     | 
| 
       5303 
5304 
     | 
    
         
             
                   following sequences:
         
     | 
| 
       5304 
5305 
     | 
    
         | 
| 
       5305 
5306 
     | 
    
         
             
                     (*BSR_ANYCRLF)   CR, LF, or CRLF only
         
     | 
| 
       5306 
5307 
     | 
    
         
             
                     (*BSR_UNICODE)   any Unicode newline sequence
         
     | 
| 
       5307 
5308 
     | 
    
         | 
| 
       5308 
5309 
     | 
    
         
             
                   These override the default and the options given to the compiling func-
         
     | 
| 
       5309 
     | 
    
         
            -
                   tion, 
     | 
| 
       5310 
     | 
    
         
            -
                   matching 
     | 
| 
       5311 
     | 
    
         
            -
                   Perl-compatible, 
     | 
| 
       5312 
     | 
    
         
            -
                   and 
     | 
| 
       5313 
     | 
    
         
            -
                   present, 
     | 
| 
      
 5310 
     | 
    
         
            +
                   tion, but they can themselves be  overridden  by  options  given  to  a
         
     | 
| 
      
 5311 
     | 
    
         
            +
                   matching  function.  Note  that  these  special settings, which are not
         
     | 
| 
      
 5312 
     | 
    
         
            +
                   Perl-compatible, are recognized only at the very start  of  a  pattern,
         
     | 
| 
      
 5313 
     | 
    
         
            +
                   and  that  they  must  be  in  upper  case. If more than one of them is
         
     | 
| 
      
 5314 
     | 
    
         
            +
                   present, the last one is used. They can be combined with  a  change  of
         
     | 
| 
       5314 
5315 
     | 
    
         
             
                   newline convention; for example, a pattern can start with:
         
     | 
| 
       5315 
5316 
     | 
    
         | 
| 
       5316 
5317 
     | 
    
         
             
                     (*ANY)(*BSR_ANYCRLF)
         
     | 
| 
       5317 
5318 
     | 
    
         | 
| 
       5318 
     | 
    
         
            -
                   They 
     | 
| 
      
 5319 
     | 
    
         
            +
                   They  can also be combined with the (*UTF8), (*UTF16), (*UTF32), (*UTF)
         
     | 
| 
       5319 
5320 
     | 
    
         
             
                   or (*UCP) special sequences. Inside a character class, \R is treated as
         
     | 
| 
       5320 
     | 
    
         
            -
                   an 
     | 
| 
      
 5321 
     | 
    
         
            +
                   an  unrecognized  escape  sequence,  and  so  matches the letter "R" by
         
     | 
| 
       5321 
5322 
     | 
    
         
             
                   default, but causes an error if PCRE_EXTRA is set.
         
     | 
| 
       5322 
5323 
     | 
    
         | 
| 
       5323 
5324 
     | 
    
         
             
               Unicode character properties
         
     | 
| 
       5324 
5325 
     | 
    
         | 
| 
       5325 
5326 
     | 
    
         
             
                   When PCRE is built with Unicode character property support, three addi-
         
     | 
| 
       5326 
     | 
    
         
            -
                   tional 
     | 
| 
       5327 
     | 
    
         
            -
                   are 
     | 
| 
       5328 
     | 
    
         
            -
                   course 
     | 
| 
      
 5327 
     | 
    
         
            +
                   tional  escape sequences that match characters with specific properties
         
     | 
| 
      
 5328 
     | 
    
         
            +
                   are available.  When in 8-bit non-UTF-8 mode, these  sequences  are  of
         
     | 
| 
      
 5329 
     | 
    
         
            +
                   course  limited  to  testing  characters whose codepoints are less than
         
     | 
| 
       5329 
5330 
     | 
    
         
             
                   256, but they do work in this mode.  The extra escape sequences are:
         
     | 
| 
       5330 
5331 
     | 
    
         | 
| 
       5331 
5332 
     | 
    
         
             
                     \p{xx}   a character with the xx property
         
     | 
| 
       5332 
5333 
     | 
    
         
             
                     \P{xx}   a character without the xx property
         
     | 
| 
       5333 
5334 
     | 
    
         
             
                     \X       a Unicode extended grapheme cluster
         
     | 
| 
       5334 
5335 
     | 
    
         | 
| 
       5335 
     | 
    
         
            -
                   The 
     | 
| 
      
 5336 
     | 
    
         
            +
                   The property names represented by xx above are limited to  the  Unicode
         
     | 
| 
       5336 
5337 
     | 
    
         
             
                   script names, the general category properties, "Any", which matches any
         
     | 
| 
       5337 
     | 
    
         
            -
                   character 
     | 
| 
       5338 
     | 
    
         
            -
                   (described 
     | 
| 
       5339 
     | 
    
         
            -
                   sicalSymbols" 
     | 
| 
      
 5338 
     | 
    
         
            +
                   character  (including  newline),  and  some  special  PCRE   properties
         
     | 
| 
      
 5339 
     | 
    
         
            +
                   (described  in the next section).  Other Perl properties such as "InMu-
         
     | 
| 
      
 5340 
     | 
    
         
            +
                   sicalSymbols" are not currently supported by PCRE.  Note  that  \P{Any}
         
     | 
| 
       5340 
5341 
     | 
    
         
             
                   does not match any characters, so always causes a match failure.
         
     | 
| 
       5341 
5342 
     | 
    
         | 
| 
       5342 
5343 
     | 
    
         
             
                   Sets of Unicode characters are defined as belonging to certain scripts.
         
     | 
| 
       5343 
     | 
    
         
            -
                   A 
     | 
| 
      
 5344 
     | 
    
         
            +
                   A character from one of these sets can be matched using a script  name.
         
     | 
| 
       5344 
5345 
     | 
    
         
             
                   For example:
         
     | 
| 
       5345 
5346 
     | 
    
         | 
| 
       5346 
5347 
     | 
    
         
             
                     \p{Greek}
         
     | 
| 
       5347 
5348 
     | 
    
         
             
                     \P{Han}
         
     | 
| 
       5348 
5349 
     | 
    
         | 
| 
       5349 
     | 
    
         
            -
                   Those 
     | 
| 
      
 5350 
     | 
    
         
            +
                   Those  that are not part of an identified script are lumped together as
         
     | 
| 
       5350 
5351 
     | 
    
         
             
                   "Common". The current list of scripts is:
         
     | 
| 
       5351 
5352 
     | 
    
         | 
| 
       5352 
     | 
    
         
            -
                   Arabic, 
     | 
| 
       5353 
     | 
    
         
            -
                   Bopomofo, 
     | 
| 
      
 5353 
     | 
    
         
            +
                   Arabic, Armenian, Avestan, Balinese, Bamum, Bassa_Vah, Batak,  Bengali,
         
     | 
| 
      
 5354 
     | 
    
         
            +
                   Bopomofo,  Brahmi,  Braille, Buginese, Buhid, Canadian_Aboriginal, Car-
         
     | 
| 
       5354 
5355 
     | 
    
         
             
                   ian, Caucasian_Albanian, Chakma, Cham, Cherokee, Common, Coptic, Cunei-
         
     | 
| 
       5355 
5356 
     | 
    
         
             
                   form, Cypriot, Cyrillic, Deseret, Devanagari, Duployan, Egyptian_Hiero-
         
     | 
| 
       5356 
5357 
     | 
    
         
             
                   glyphs,  Elbasan,  Ethiopic,  Georgian,  Glagolitic,  Gothic,  Grantha,
         
     | 
| 
       5357 
     | 
    
         
            -
                   Greek, 
     | 
| 
       5358 
     | 
    
         
            -
                   Imperial_Aramaic, 
     | 
| 
       5359 
     | 
    
         
            -
                   tional_Parthian, 
     | 
| 
       5360 
     | 
    
         
            -
                   Kharoshthi, 
     | 
| 
       5361 
     | 
    
         
            -
                   ear_A, 
     | 
| 
       5362 
     | 
    
         
            -
                   Manichaean, 
     | 
| 
       5363 
     | 
    
         
            -
                   Meroitic_Hieroglyphs, 
     | 
| 
       5364 
     | 
    
         
            -
                   New_Tai_Lue, 
     | 
| 
      
 5358 
     | 
    
         
            +
                   Greek,  Gujarati,  Gurmukhi,  Han,  Hangul,  Hanunoo, Hebrew, Hiragana,
         
     | 
| 
      
 5359 
     | 
    
         
            +
                   Imperial_Aramaic,    Inherited,     Inscriptional_Pahlavi,     Inscrip-
         
     | 
| 
      
 5360 
     | 
    
         
            +
                   tional_Parthian,   Javanese,   Kaithi,   Kannada,  Katakana,  Kayah_Li,
         
     | 
| 
      
 5361 
     | 
    
         
            +
                   Kharoshthi, Khmer, Khojki, Khudawadi, Lao, Latin, Lepcha,  Limbu,  Lin-
         
     | 
| 
      
 5362 
     | 
    
         
            +
                   ear_A,  Linear_B,  Lisu,  Lycian, Lydian, Mahajani, Malayalam, Mandaic,
         
     | 
| 
      
 5363 
     | 
    
         
            +
                   Manichaean,     Meetei_Mayek,     Mende_Kikakui,      Meroitic_Cursive,
         
     | 
| 
      
 5364 
     | 
    
         
            +
                   Meroitic_Hieroglyphs,  Miao,  Modi, Mongolian, Mro, Myanmar, Nabataean,
         
     | 
| 
      
 5365 
     | 
    
         
            +
                   New_Tai_Lue,  Nko,  Ogham,  Ol_Chiki,  Old_Italic,   Old_North_Arabian,
         
     | 
| 
       5365 
5366 
     | 
    
         
             
                   Old_Permic, Old_Persian, Old_South_Arabian, Old_Turkic, Oriya, Osmanya,
         
     | 
| 
       5366 
5367 
     | 
    
         
             
                   Pahawh_Hmong,    Palmyrene,    Pau_Cin_Hau,    Phags_Pa,    Phoenician,
         
     | 
| 
       5367 
     | 
    
         
            -
                   Psalter_Pahlavi, 
     | 
| 
       5368 
     | 
    
         
            -
                   vian, 
     | 
| 
       5369 
     | 
    
         
            -
                   Tagalog, 
     | 
| 
       5370 
     | 
    
         
            -
                   Thaana, 
     | 
| 
      
 5368 
     | 
    
         
            +
                   Psalter_Pahlavi,  Rejang,  Runic,  Samaritan, Saurashtra, Sharada, Sha-
         
     | 
| 
      
 5369 
     | 
    
         
            +
                   vian, Siddham, Sinhala, Sora_Sompeng, Sundanese, Syloti_Nagri,  Syriac,
         
     | 
| 
      
 5370 
     | 
    
         
            +
                   Tagalog,  Tagbanwa,  Tai_Le,  Tai_Tham, Tai_Viet, Takri, Tamil, Telugu,
         
     | 
| 
      
 5371 
     | 
    
         
            +
                   Thaana, Thai, Tibetan, Tifinagh, Tirhuta, Ugaritic,  Vai,  Warang_Citi,
         
     | 
| 
       5371 
5372 
     | 
    
         
             
                   Yi.
         
     | 
| 
       5372 
5373 
     | 
    
         | 
| 
       5373 
5374 
     | 
    
         
             
                   Each character has exactly one Unicode general category property, spec-
         
     | 
| 
       5374 
     | 
    
         
            -
                   ified 
     | 
| 
       5375 
     | 
    
         
            -
                   tion 
     | 
| 
       5376 
     | 
    
         
            -
                   brace 
     | 
| 
      
 5375 
     | 
    
         
            +
                   ified by a two-letter abbreviation. For compatibility with Perl,  nega-
         
     | 
| 
      
 5376 
     | 
    
         
            +
                   tion  can  be  specified  by including a circumflex between the opening
         
     | 
| 
      
 5377 
     | 
    
         
            +
                   brace and the property name.  For  example,  \p{^Lu}  is  the  same  as
         
     | 
| 
       5377 
5378 
     | 
    
         
             
                   \P{Lu}.
         
     | 
| 
       5378 
5379 
     | 
    
         | 
| 
       5379 
5380 
     | 
    
         
             
                   If only one letter is specified with \p or \P, it includes all the gen-
         
     | 
| 
       5380 
     | 
    
         
            -
                   eral 
     | 
| 
       5381 
     | 
    
         
            -
                   the 
     | 
| 
      
 5381 
     | 
    
         
            +
                   eral category properties that start with that letter. In this case,  in
         
     | 
| 
      
 5382 
     | 
    
         
            +
                   the  absence of negation, the curly brackets in the escape sequence are
         
     | 
| 
       5382 
5383 
     | 
    
         
             
                   optional; these two examples have the same effect:
         
     | 
| 
       5383 
5384 
     | 
    
         | 
| 
       5384 
5385 
     | 
    
         
             
                     \p{L}
         
     | 
| 
         @@ -5430,73 +5431,73 @@ BACKSLASH 
     | 
|
| 
       5430 
5431 
     | 
    
         
             
                     Zp    Paragraph separator
         
     | 
| 
       5431 
5432 
     | 
    
         
             
                     Zs    Space separator
         
     | 
| 
       5432 
5433 
     | 
    
         | 
| 
       5433 
     | 
    
         
            -
                   The 
     | 
| 
       5434 
     | 
    
         
            -
                   has 
     | 
| 
      
 5434 
     | 
    
         
            +
                   The special property L& is also supported: it matches a character  that
         
     | 
| 
      
 5435 
     | 
    
         
            +
                   has  the  Lu,  Ll, or Lt property, in other words, a letter that is not
         
     | 
| 
       5435 
5436 
     | 
    
         
             
                   classified as a modifier or "other".
         
     | 
| 
       5436 
5437 
     | 
    
         | 
| 
       5437 
     | 
    
         
            -
                   The 
     | 
| 
       5438 
     | 
    
         
            -
                   U+D800 
     | 
| 
       5439 
     | 
    
         
            -
                   so 
     | 
| 
      
 5438 
     | 
    
         
            +
                   The Cs (Surrogate) property applies only to  characters  in  the  range
         
     | 
| 
      
 5439 
     | 
    
         
            +
                   U+D800  to U+DFFF. Such characters are not valid in Unicode strings and
         
     | 
| 
      
 5440 
     | 
    
         
            +
                   so cannot be tested by PCRE, unless  UTF  validity  checking  has  been
         
     | 
| 
       5440 
5441 
     | 
    
         
             
                   turned    off    (see    the    discussion    of    PCRE_NO_UTF8_CHECK,
         
     | 
| 
       5441 
     | 
    
         
            -
                   PCRE_NO_UTF16_CHECK 
     | 
| 
      
 5442 
     | 
    
         
            +
                   PCRE_NO_UTF16_CHECK and PCRE_NO_UTF32_CHECK in the pcreapi page).  Perl
         
     | 
| 
       5442 
5443 
     | 
    
         
             
                   does not support the Cs property.
         
     | 
| 
       5443 
5444 
     | 
    
         | 
| 
       5444 
     | 
    
         
            -
                   The 
     | 
| 
       5445 
     | 
    
         
            -
                   \p{Letter}) 
     | 
| 
      
 5445 
     | 
    
         
            +
                   The  long  synonyms  for  property  names  that  Perl supports (such as
         
     | 
| 
      
 5446 
     | 
    
         
            +
                   \p{Letter}) are not supported by PCRE, nor is it  permitted  to  prefix
         
     | 
| 
       5446 
5447 
     | 
    
         
             
                   any of these properties with "Is".
         
     | 
| 
       5447 
5448 
     | 
    
         | 
| 
       5448 
5449 
     | 
    
         
             
                   No character that is in the Unicode table has the Cn (unassigned) prop-
         
     | 
| 
       5449 
5450 
     | 
    
         
             
                   erty.  Instead, this property is assumed for any code point that is not
         
     | 
| 
       5450 
5451 
     | 
    
         
             
                   in the Unicode table.
         
     | 
| 
       5451 
5452 
     | 
    
         | 
| 
       5452 
     | 
    
         
            -
                   Specifying 
     | 
| 
       5453 
     | 
    
         
            -
                   For 
     | 
| 
      
 5453 
     | 
    
         
            +
                   Specifying  caseless  matching  does not affect these escape sequences.
         
     | 
| 
      
 5454 
     | 
    
         
            +
                   For example, \p{Lu} always matches only upper  case  letters.  This  is
         
     | 
| 
       5454 
5455 
     | 
    
         
             
                   different from the behaviour of current versions of Perl.
         
     | 
| 
       5455 
5456 
     | 
    
         | 
| 
       5456 
     | 
    
         
            -
                   Matching 
     | 
| 
       5457 
     | 
    
         
            -
                   to 
     | 
| 
      
 5457 
     | 
    
         
            +
                   Matching  characters  by Unicode property is not fast, because PCRE has
         
     | 
| 
      
 5458 
     | 
    
         
            +
                   to do a multistage table lookup in order to find  a  character's  prop-
         
     | 
| 
       5458 
5459 
     | 
    
         
             
                   erty. That is why the traditional escape sequences such as \d and \w do
         
     | 
| 
       5459 
5460 
     | 
    
         
             
                   not use Unicode properties in PCRE by default, though you can make them
         
     | 
| 
       5460 
     | 
    
         
            -
                   do 
     | 
| 
      
 5461 
     | 
    
         
            +
                   do  so  by  setting the PCRE_UCP option or by starting the pattern with
         
     | 
| 
       5461 
5462 
     | 
    
         
             
                   (*UCP).
         
     | 
| 
       5462 
5463 
     | 
    
         | 
| 
       5463 
5464 
     | 
    
         
             
               Extended grapheme clusters
         
     | 
| 
       5464 
5465 
     | 
    
         | 
| 
       5465 
     | 
    
         
            -
                   The 
     | 
| 
      
 5466 
     | 
    
         
            +
                   The \X escape matches any number of Unicode  characters  that  form  an
         
     | 
| 
       5466 
5467 
     | 
    
         
             
                   "extended grapheme cluster", and treats the sequence as an atomic group
         
     | 
| 
       5467 
     | 
    
         
            -
                   (see 
     | 
| 
      
 5468 
     | 
    
         
            +
                   (see below).  Up to and including release 8.31, PCRE  matched  an  ear-
         
     | 
| 
       5468 
5469 
     | 
    
         
             
                   lier, simpler definition that was equivalent to
         
     | 
| 
       5469 
5470 
     | 
    
         | 
| 
       5470 
5471 
     | 
    
         
             
                     (?>\PM\pM*)
         
     | 
| 
       5471 
5472 
     | 
    
         | 
| 
       5472 
     | 
    
         
            -
                   That 
     | 
| 
       5473 
     | 
    
         
            -
                   by 
     | 
| 
       5474 
     | 
    
         
            -
                   the 
     | 
| 
      
 5473 
     | 
    
         
            +
                   That  is,  it matched a character without the "mark" property, followed
         
     | 
| 
      
 5474 
     | 
    
         
            +
                   by zero or more characters with the "mark"  property.  Characters  with
         
     | 
| 
      
 5475 
     | 
    
         
            +
                   the  "mark"  property are typically non-spacing accents that affect the
         
     | 
| 
       5475 
5476 
     | 
    
         
             
                   preceding character.
         
     | 
| 
       5476 
5477 
     | 
    
         | 
| 
       5477 
     | 
    
         
            -
                   This 
     | 
| 
       5478 
     | 
    
         
            -
                   cated 
     | 
| 
       5479 
     | 
    
         
            -
                   breaking 
     | 
| 
       5480 
     | 
    
         
            -
                   define 
     | 
| 
      
 5478 
     | 
    
         
            +
                   This simple definition was extended in Unicode to include more  compli-
         
     | 
| 
      
 5479 
     | 
    
         
            +
                   cated  kinds of composite character by giving each character a grapheme
         
     | 
| 
      
 5480 
     | 
    
         
            +
                   breaking property, and creating rules  that  use  these  properties  to
         
     | 
| 
      
 5481 
     | 
    
         
            +
                   define  the  boundaries  of  extended grapheme clusters. In releases of
         
     | 
| 
       5481 
5482 
     | 
    
         
             
                   PCRE later than 8.31, \X matches one of these clusters.
         
     | 
| 
       5482 
5483 
     | 
    
         | 
| 
       5483 
     | 
    
         
            -
                   \X 
     | 
| 
      
 5484 
     | 
    
         
            +
                   \X always matches at least one character. Then it  decides  whether  to
         
     | 
| 
       5484 
5485 
     | 
    
         
             
                   add additional characters according to the following rules for ending a
         
     | 
| 
       5485 
5486 
     | 
    
         
             
                   cluster:
         
     | 
| 
       5486 
5487 
     | 
    
         | 
| 
       5487 
5488 
     | 
    
         
             
                   1. End at the end of the subject string.
         
     | 
| 
       5488 
5489 
     | 
    
         | 
| 
       5489 
     | 
    
         
            -
                   2. 
     | 
| 
      
 5490 
     | 
    
         
            +
                   2. Do not end between CR and LF; otherwise end after any control  char-
         
     | 
| 
       5490 
5491 
     | 
    
         
             
                   acter.
         
     | 
| 
       5491 
5492 
     | 
    
         | 
| 
       5492 
     | 
    
         
            -
                   3. 
     | 
| 
       5493 
     | 
    
         
            -
                   characters 
     | 
| 
       5494 
     | 
    
         
            -
                   be 
     | 
| 
      
 5493 
     | 
    
         
            +
                   3.  Do  not  break  Hangul (a Korean script) syllable sequences. Hangul
         
     | 
| 
      
 5494 
     | 
    
         
            +
                   characters are of five types: L, V, T, LV, and LVT. An L character  may
         
     | 
| 
      
 5495 
     | 
    
         
            +
                   be  followed by an L, V, LV, or LVT character; an LV or V character may
         
     | 
| 
       5495 
5496 
     | 
    
         
             
                   be followed by a V or T character; an LVT or T character may be follwed
         
     | 
| 
       5496 
5497 
     | 
    
         
             
                   only by a T character.
         
     | 
| 
       5497 
5498 
     | 
    
         | 
| 
       5498 
     | 
    
         
            -
                   4. 
     | 
| 
       5499 
     | 
    
         
            -
                   with 
     | 
| 
      
 5499 
     | 
    
         
            +
                   4.  Do not end before extending characters or spacing marks. Characters
         
     | 
| 
      
 5500 
     | 
    
         
            +
                   with the "mark" property always have  the  "extend"  grapheme  breaking
         
     | 
| 
       5500 
5501 
     | 
    
         
             
                   property.
         
     | 
| 
       5501 
5502 
     | 
    
         | 
| 
       5502 
5503 
     | 
    
         
             
                   5. Do not end after prepend characters.
         
     | 
| 
         @@ -5505,9 +5506,9 @@ BACKSLASH 
     | 
|
| 
       5505 
5506 
     | 
    
         | 
| 
       5506 
5507 
     | 
    
         
             
               PCRE's additional properties
         
     | 
| 
       5507 
5508 
     | 
    
         | 
| 
       5508 
     | 
    
         
            -
                   As 
     | 
| 
       5509 
     | 
    
         
            -
                   ports 
     | 
| 
       5510 
     | 
    
         
            -
                   sequences 
     | 
| 
      
 5509 
     | 
    
         
            +
                   As  well  as the standard Unicode properties described above, PCRE sup-
         
     | 
| 
      
 5510 
     | 
    
         
            +
                   ports four more that make it possible  to  convert  traditional  escape
         
     | 
| 
      
 5511 
     | 
    
         
            +
                   sequences  such as \w and \s to use Unicode properties. PCRE uses these
         
     | 
| 
       5511 
5512 
     | 
    
         
             
                   non-standard, non-Perl properties internally when PCRE_UCP is set. How-
         
     | 
| 
       5512 
5513 
     | 
    
         
             
                   ever, they may also be used explicitly. These properties are:
         
     | 
| 
       5513 
5514 
     | 
    
         | 
| 
         @@ -5516,54 +5517,54 @@ BACKSLASH 
     | 
|
| 
       5516 
5517 
     | 
    
         
             
                     Xsp   Any Perl space character
         
     | 
| 
       5517 
5518 
     | 
    
         
             
                     Xwd   Any Perl "word" character
         
     | 
| 
       5518 
5519 
     | 
    
         | 
| 
       5519 
     | 
    
         
            -
                   Xan 
     | 
| 
       5520 
     | 
    
         
            -
                   ber) 
     | 
| 
       5521 
     | 
    
         
            -
                   form 
     | 
| 
       5522 
     | 
    
         
            -
                   (separator) 
     | 
| 
       5523 
     | 
    
         
            -
                   tical 
     | 
| 
       5524 
     | 
    
         
            -
                   lowed 
     | 
| 
      
 5520 
     | 
    
         
            +
                   Xan  matches  characters that have either the L (letter) or the N (num-
         
     | 
| 
      
 5521 
     | 
    
         
            +
                   ber) property. Xps matches the characters tab, linefeed, vertical  tab,
         
     | 
| 
      
 5522 
     | 
    
         
            +
                   form  feed,  or carriage return, and any other character that has the Z
         
     | 
| 
      
 5523 
     | 
    
         
            +
                   (separator) property.  Xsp is the same as Xps; it used to exclude  ver-
         
     | 
| 
      
 5524 
     | 
    
         
            +
                   tical  tab,  for Perl compatibility, but Perl changed, and so PCRE fol-
         
     | 
| 
      
 5525 
     | 
    
         
            +
                   lowed at release 8.34. Xwd matches the same  characters  as  Xan,  plus
         
     | 
| 
       5525 
5526 
     | 
    
         
             
                   underscore.
         
     | 
| 
       5526 
5527 
     | 
    
         | 
| 
       5527 
     | 
    
         
            -
                   There 
     | 
| 
       5528 
     | 
    
         
            -
                   ter 
     | 
| 
       5529 
     | 
    
         
            -
                   other 
     | 
| 
       5530 
     | 
    
         
            -
                   accent), 
     | 
| 
       5531 
     | 
    
         
            -
                   equal 
     | 
| 
       5532 
     | 
    
         
            -
                   most 
     | 
| 
       5533 
     | 
    
         
            -
                   are 
     | 
| 
      
 5528 
     | 
    
         
            +
                   There  is another non-standard property, Xuc, which matches any charac-
         
     | 
| 
      
 5529 
     | 
    
         
            +
                   ter that can be represented by a Universal Character Name  in  C++  and
         
     | 
| 
      
 5530 
     | 
    
         
            +
                   other  programming  languages.  These are the characters $, @, ` (grave
         
     | 
| 
      
 5531 
     | 
    
         
            +
                   accent), and all characters with Unicode code points  greater  than  or
         
     | 
| 
      
 5532 
     | 
    
         
            +
                   equal  to U+00A0, except for the surrogates U+D800 to U+DFFF. Note that
         
     | 
| 
      
 5533 
     | 
    
         
            +
                   most base (ASCII) characters are excluded. (Universal  Character  Names
         
     | 
| 
      
 5534 
     | 
    
         
            +
                   are  of  the  form \uHHHH or \UHHHHHHHH where H is a hexadecimal digit.
         
     | 
| 
       5534 
5535 
     | 
    
         
             
                   Note that the Xuc property does not match these sequences but the char-
         
     | 
| 
       5535 
5536 
     | 
    
         
             
                   acters that they represent.)
         
     | 
| 
       5536 
5537 
     | 
    
         | 
| 
       5537 
5538 
     | 
    
         
             
               Resetting the match start
         
     | 
| 
       5538 
5539 
     | 
    
         | 
| 
       5539 
     | 
    
         
            -
                   The 
     | 
| 
      
 5540 
     | 
    
         
            +
                   The  escape sequence \K causes any previously matched characters not to
         
     | 
| 
       5540 
5541 
     | 
    
         
             
                   be included in the final matched sequence. For example, the pattern:
         
     | 
| 
       5541 
5542 
     | 
    
         | 
| 
       5542 
5543 
     | 
    
         
             
                     foo\Kbar
         
     | 
| 
       5543 
5544 
     | 
    
         | 
| 
       5544 
     | 
    
         
            -
                   matches 
     | 
| 
       5545 
     | 
    
         
            -
                   is 
     | 
| 
       5546 
     | 
    
         
            -
                   this 
     | 
| 
       5547 
     | 
    
         
            -
                   to 
     | 
| 
       5548 
     | 
    
         
            -
                   not 
     | 
| 
      
 5545 
     | 
    
         
            +
                   matches "foobar", but reports that it has matched "bar".  This  feature
         
     | 
| 
      
 5546 
     | 
    
         
            +
                   is  similar  to  a lookbehind assertion (described below).  However, in
         
     | 
| 
      
 5547 
     | 
    
         
            +
                   this case, the part of the subject before the real match does not  have
         
     | 
| 
      
 5548 
     | 
    
         
            +
                   to  be of fixed length, as lookbehind assertions do. The use of \K does
         
     | 
| 
      
 5549 
     | 
    
         
            +
                   not interfere with the setting of captured  substrings.   For  example,
         
     | 
| 
       5549 
5550 
     | 
    
         
             
                   when the pattern
         
     | 
| 
       5550 
5551 
     | 
    
         | 
| 
       5551 
5552 
     | 
    
         
             
                     (foo)\Kbar
         
     | 
| 
       5552 
5553 
     | 
    
         | 
| 
       5553 
5554 
     | 
    
         
             
                   matches "foobar", the first substring is still set to "foo".
         
     | 
| 
       5554 
5555 
     | 
    
         | 
| 
       5555 
     | 
    
         
            -
                   Perl 
     | 
| 
       5556 
     | 
    
         
            -
                   defined". 
     | 
| 
       5557 
     | 
    
         
            -
                   assertions, 
     | 
| 
       5558 
     | 
    
         
            -
                   pattern 
     | 
| 
      
 5556 
     | 
    
         
            +
                   Perl  documents  that  the  use  of  \K  within assertions is "not well
         
     | 
| 
      
 5557 
     | 
    
         
            +
                   defined". In PCRE, \K is acted upon  when  it  occurs  inside  positive
         
     | 
| 
      
 5558 
     | 
    
         
            +
                   assertions,  but  is  ignored  in negative assertions. Note that when a
         
     | 
| 
      
 5559 
     | 
    
         
            +
                   pattern such as (?=ab\K) matches, the reported start of the  match  can
         
     | 
| 
       5559 
5560 
     | 
    
         
             
                   be greater than the end of the match.
         
     | 
| 
       5560 
5561 
     | 
    
         | 
| 
       5561 
5562 
     | 
    
         
             
               Simple assertions
         
     | 
| 
       5562 
5563 
     | 
    
         | 
| 
       5563 
     | 
    
         
            -
                   The 
     | 
| 
       5564 
     | 
    
         
            -
                   tion 
     | 
| 
       5565 
     | 
    
         
            -
                   a 
     | 
| 
       5566 
     | 
    
         
            -
                   use 
     | 
| 
      
 5564 
     | 
    
         
            +
                   The  final use of backslash is for certain simple assertions. An asser-
         
     | 
| 
      
 5565 
     | 
    
         
            +
                   tion specifies a condition that has to be met at a particular point  in
         
     | 
| 
      
 5566 
     | 
    
         
            +
                   a  match, without consuming any characters from the subject string. The
         
     | 
| 
      
 5567 
     | 
    
         
            +
                   use of subpatterns for more complicated assertions is described  below.
         
     | 
| 
       5567 
5568 
     | 
    
         
             
                   The backslashed assertions are:
         
     | 
| 
       5568 
5569 
     | 
    
         | 
| 
       5569 
5570 
     | 
    
         
             
                     \b     matches at a word boundary
         
     | 
| 
         @@ -5574,161 +5575,161 @@ BACKSLASH 
     | 
|
| 
       5574 
5575 
     | 
    
         
             
                     \z     matches only at the end of the subject
         
     | 
| 
       5575 
5576 
     | 
    
         
             
                     \G     matches at the first matching position in the subject
         
     | 
| 
       5576 
5577 
     | 
    
         | 
| 
       5577 
     | 
    
         
            -
                   Inside 
     | 
| 
       5578 
     | 
    
         
            -
                   backspace 
     | 
| 
       5579 
     | 
    
         
            -
                   character 
     | 
| 
      
 5578 
     | 
    
         
            +
                   Inside  a  character  class, \b has a different meaning; it matches the
         
     | 
| 
      
 5579 
     | 
    
         
            +
                   backspace character. If any other of  these  assertions  appears  in  a
         
     | 
| 
      
 5580 
     | 
    
         
            +
                   character  class, by default it matches the corresponding literal char-
         
     | 
| 
       5580 
5581 
     | 
    
         
             
                   acter  (for  example,  \B  matches  the  letter  B).  However,  if  the
         
     | 
| 
       5581 
     | 
    
         
            -
                   PCRE_EXTRA 
     | 
| 
      
 5582 
     | 
    
         
            +
                   PCRE_EXTRA  option is set, an "invalid escape sequence" error is gener-
         
     | 
| 
       5582 
5583 
     | 
    
         
             
                   ated instead.
         
     | 
| 
       5583 
5584 
     | 
    
         | 
| 
       5584 
     | 
    
         
            -
                   A 
     | 
| 
       5585 
     | 
    
         
            -
                   character 
     | 
| 
       5586 
     | 
    
         
            -
                   one 
     | 
| 
       5587 
     | 
    
         
            -
                   string 
     | 
| 
       5588 
     | 
    
         
            -
                   UTF 
     | 
| 
       5589 
     | 
    
         
            -
                   PCRE_UCP 
     | 
| 
       5590 
     | 
    
         
            -
                   PCRE 
     | 
| 
       5591 
     | 
    
         
            -
                   quence. 
     | 
| 
      
 5585 
     | 
    
         
            +
                   A word boundary is a position in the subject string where  the  current
         
     | 
| 
      
 5586 
     | 
    
         
            +
                   character  and  the previous character do not both match \w or \W (i.e.
         
     | 
| 
      
 5587 
     | 
    
         
            +
                   one matches \w and the other matches \W), or the start or  end  of  the
         
     | 
| 
      
 5588 
     | 
    
         
            +
                   string  if  the  first or last character matches \w, respectively. In a
         
     | 
| 
      
 5589 
     | 
    
         
            +
                   UTF mode, the meanings of \w and \W  can  be  changed  by  setting  the
         
     | 
| 
      
 5590 
     | 
    
         
            +
                   PCRE_UCP  option. When this is done, it also affects \b and \B. Neither
         
     | 
| 
      
 5591 
     | 
    
         
            +
                   PCRE nor Perl has a separate "start of word" or "end of  word"  metase-
         
     | 
| 
      
 5592 
     | 
    
         
            +
                   quence.  However,  whatever follows \b normally determines which it is.
         
     | 
| 
       5592 
5593 
     | 
    
         
             
                   For example, the fragment \ba matches "a" at the start of a word.
         
     | 
| 
       5593 
5594 
     | 
    
         | 
| 
       5594 
     | 
    
         
            -
                   The 
     | 
| 
      
 5595 
     | 
    
         
            +
                   The \A, \Z, and \z assertions differ from  the  traditional  circumflex
         
     | 
| 
       5595 
5596 
     | 
    
         
             
                   and dollar (described in the next section) in that they only ever match
         
     | 
| 
       5596 
     | 
    
         
            -
                   at 
     | 
| 
       5597 
     | 
    
         
            -
                   set. 
     | 
| 
      
 5597 
     | 
    
         
            +
                   at the very start and end of the subject string, whatever  options  are
         
     | 
| 
      
 5598 
     | 
    
         
            +
                   set.  Thus,  they are independent of multiline mode. These three asser-
         
     | 
| 
       5598 
5599 
     | 
    
         
             
                   tions are not affected by the PCRE_NOTBOL or PCRE_NOTEOL options, which
         
     | 
| 
       5599 
     | 
    
         
            -
                   affect 
     | 
| 
       5600 
     | 
    
         
            -
                   However, 
     | 
| 
      
 5600 
     | 
    
         
            +
                   affect  only the behaviour of the circumflex and dollar metacharacters.
         
     | 
| 
      
 5601 
     | 
    
         
            +
                   However, if the startoffset argument of pcre_exec() is non-zero,  indi-
         
     | 
| 
       5601 
5602 
     | 
    
         
             
                   cating that matching is to start at a point other than the beginning of
         
     | 
| 
       5602 
     | 
    
         
            -
                   the 
     | 
| 
      
 5603 
     | 
    
         
            +
                   the subject, \A can never match. The difference between \Z  and  \z  is
         
     | 
| 
       5603 
5604 
     | 
    
         
             
                   that \Z matches before a newline at the end of the string as well as at
         
     | 
| 
       5604 
5605 
     | 
    
         
             
                   the very end, whereas \z matches only at the end.
         
     | 
| 
       5605 
5606 
     | 
    
         | 
| 
       5606 
     | 
    
         
            -
                   The 
     | 
| 
       5607 
     | 
    
         
            -
                   the 
     | 
| 
       5608 
     | 
    
         
            -
                   of 
     | 
| 
       5609 
     | 
    
         
            -
                   non-zero. 
     | 
| 
      
 5607 
     | 
    
         
            +
                   The \G assertion is true only when the current matching position is  at
         
     | 
| 
      
 5608 
     | 
    
         
            +
                   the  start point of the match, as specified by the startoffset argument
         
     | 
| 
      
 5609 
     | 
    
         
            +
                   of pcre_exec(). It differs from \A when the  value  of  startoffset  is
         
     | 
| 
      
 5610 
     | 
    
         
            +
                   non-zero.  By calling pcre_exec() multiple times with appropriate argu-
         
     | 
| 
       5610 
5611 
     | 
    
         
             
                   ments, you can mimic Perl's /g option, and it is in this kind of imple-
         
     | 
| 
       5611 
5612 
     | 
    
         
             
                   mentation where \G can be useful.
         
     | 
| 
       5612 
5613 
     | 
    
         | 
| 
       5613 
     | 
    
         
            -
                   Note, 
     | 
| 
      
 5614 
     | 
    
         
            +
                   Note,  however,  that  PCRE's interpretation of \G, as the start of the
         
     | 
| 
       5614 
5615 
     | 
    
         
             
                   current match, is subtly different from Perl's, which defines it as the
         
     | 
| 
       5615 
     | 
    
         
            -
                   end 
     | 
| 
       5616 
     | 
    
         
            -
                   previously 
     | 
| 
      
 5616 
     | 
    
         
            +
                   end  of  the  previous  match. In Perl, these can be different when the
         
     | 
| 
      
 5617 
     | 
    
         
            +
                   previously matched string was empty. Because PCRE does just  one  match
         
     | 
| 
       5617 
5618 
     | 
    
         
             
                   at a time, it cannot reproduce this behaviour.
         
     | 
| 
       5618 
5619 
     | 
    
         | 
| 
       5619 
     | 
    
         
            -
                   If 
     | 
| 
      
 5620 
     | 
    
         
            +
                   If  all  the alternatives of a pattern begin with \G, the expression is
         
     | 
| 
       5620 
5621 
     | 
    
         
             
                   anchored to the starting match position, and the "anchored" flag is set
         
     | 
| 
       5621 
5622 
     | 
    
         
             
                   in the compiled regular expression.
         
     | 
| 
       5622 
5623 
     | 
    
         | 
| 
       5623 
5624 
     | 
    
         | 
| 
       5624 
5625 
     | 
    
         
             
            CIRCUMFLEX AND DOLLAR
         
     | 
| 
       5625 
5626 
     | 
    
         | 
| 
       5626 
     | 
    
         
            -
                   The 
     | 
| 
       5627 
     | 
    
         
            -
                   That 
     | 
| 
      
 5627 
     | 
    
         
            +
                   The  circumflex  and  dollar  metacharacters are zero-width assertions.
         
     | 
| 
      
 5628 
     | 
    
         
            +
                   That is, they test for a particular condition being true  without  con-
         
     | 
| 
       5628 
5629 
     | 
    
         
             
                   suming any characters from the subject string.
         
     | 
| 
       5629 
5630 
     | 
    
         | 
| 
       5630 
5631 
     | 
    
         
             
                   Outside a character class, in the default matching mode, the circumflex
         
     | 
| 
       5631 
     | 
    
         
            -
                   character 
     | 
| 
       5632 
     | 
    
         
            -
                   point 
     | 
| 
       5633 
     | 
    
         
            -
                   ment 
     | 
| 
       5634 
     | 
    
         
            -
                   PCRE_MULTILINE 
     | 
| 
      
 5632 
     | 
    
         
            +
                   character is an assertion that is true only  if  the  current  matching
         
     | 
| 
      
 5633 
     | 
    
         
            +
                   point  is  at the start of the subject string. If the startoffset argu-
         
     | 
| 
      
 5634 
     | 
    
         
            +
                   ment of pcre_exec() is non-zero, circumflex  can  never  match  if  the
         
     | 
| 
      
 5635 
     | 
    
         
            +
                   PCRE_MULTILINE  option  is  unset. Inside a character class, circumflex
         
     | 
| 
       5635 
5636 
     | 
    
         
             
                   has an entirely different meaning (see below).
         
     | 
| 
       5636 
5637 
     | 
    
         | 
| 
       5637 
     | 
    
         
            -
                   Circumflex 
     | 
| 
       5638 
     | 
    
         
            -
                   of 
     | 
| 
       5639 
     | 
    
         
            -
                   alternative 
     | 
| 
       5640 
     | 
    
         
            -
                   branch. 
     | 
| 
       5641 
     | 
    
         
            -
                   if 
     | 
| 
       5642 
     | 
    
         
            -
                   ject, 
     | 
| 
      
 5638 
     | 
    
         
            +
                   Circumflex need not be the first character of the pattern if  a  number
         
     | 
| 
      
 5639 
     | 
    
         
            +
                   of  alternatives are involved, but it should be the first thing in each
         
     | 
| 
      
 5640 
     | 
    
         
            +
                   alternative in which it appears if the pattern is ever  to  match  that
         
     | 
| 
      
 5641 
     | 
    
         
            +
                   branch.  If all possible alternatives start with a circumflex, that is,
         
     | 
| 
      
 5642 
     | 
    
         
            +
                   if the pattern is constrained to match only at the start  of  the  sub-
         
     | 
| 
      
 5643 
     | 
    
         
            +
                   ject,  it  is  said  to be an "anchored" pattern. (There are also other
         
     | 
| 
       5643 
5644 
     | 
    
         
             
                   constructs that can cause a pattern to be anchored.)
         
     | 
| 
       5644 
5645 
     | 
    
         | 
| 
       5645 
     | 
    
         
            -
                   The 
     | 
| 
       5646 
     | 
    
         
            -
                   matching 
     | 
| 
       5647 
     | 
    
         
            -
                   before 
     | 
| 
       5648 
     | 
    
         
            -
                   that 
     | 
| 
      
 5646 
     | 
    
         
            +
                   The dollar character is an assertion that is true only if  the  current
         
     | 
| 
      
 5647 
     | 
    
         
            +
                   matching  point  is  at  the  end of the subject string, or immediately
         
     | 
| 
      
 5648 
     | 
    
         
            +
                   before a newline at the end of the string (by default). Note,  however,
         
     | 
| 
      
 5649 
     | 
    
         
            +
                   that  it  does  not  actually match the newline. Dollar need not be the
         
     | 
| 
       5649 
5650 
     | 
    
         
             
                   last character of the pattern if a number of alternatives are involved,
         
     | 
| 
       5650 
     | 
    
         
            -
                   but 
     | 
| 
      
 5651 
     | 
    
         
            +
                   but  it should be the last item in any branch in which it appears. Dol-
         
     | 
| 
       5651 
5652 
     | 
    
         
             
                   lar has no special meaning in a character class.
         
     | 
| 
       5652 
5653 
     | 
    
         | 
| 
       5653 
     | 
    
         
            -
                   The 
     | 
| 
       5654 
     | 
    
         
            -
                   very 
     | 
| 
      
 5654 
     | 
    
         
            +
                   The meaning of dollar can be changed so that it  matches  only  at  the
         
     | 
| 
      
 5655 
     | 
    
         
            +
                   very  end  of  the string, by setting the PCRE_DOLLAR_ENDONLY option at
         
     | 
| 
       5655 
5656 
     | 
    
         
             
                   compile time. This does not affect the \Z assertion.
         
     | 
| 
       5656 
5657 
     | 
    
         | 
| 
       5657 
5658 
     | 
    
         
             
                   The meanings of the circumflex and dollar characters are changed if the
         
     | 
| 
       5658 
     | 
    
         
            -
                   PCRE_MULTILINE 
     | 
| 
       5659 
     | 
    
         
            -
                   matches 
     | 
| 
       5660 
     | 
    
         
            -
                   the 
     | 
| 
       5661 
     | 
    
         
            -
                   string. 
     | 
| 
       5662 
     | 
    
         
            -
                   at 
     | 
| 
       5663 
     | 
    
         
            -
                   as 
     | 
| 
      
 5659 
     | 
    
         
            +
                   PCRE_MULTILINE  option  is  set.  When  this  is the case, a circumflex
         
     | 
| 
      
 5660 
     | 
    
         
            +
                   matches immediately after internal newlines as well as at the start  of
         
     | 
| 
      
 5661 
     | 
    
         
            +
                   the  subject  string.  It  does not match after a newline that ends the
         
     | 
| 
      
 5662 
     | 
    
         
            +
                   string. A dollar matches before any newlines in the string, as well  as
         
     | 
| 
      
 5663 
     | 
    
         
            +
                   at  the very end, when PCRE_MULTILINE is set. When newline is specified
         
     | 
| 
      
 5664 
     | 
    
         
            +
                   as the two-character sequence CRLF, isolated CR and  LF  characters  do
         
     | 
| 
       5664 
5665 
     | 
    
         
             
                   not indicate newlines.
         
     | 
| 
       5665 
5666 
     | 
    
         | 
| 
       5666 
     | 
    
         
            -
                   For 
     | 
| 
       5667 
     | 
    
         
            -
                   (where 
     | 
| 
       5668 
     | 
    
         
            -
                   Consequently, 
     | 
| 
       5669 
     | 
    
         
            -
                   all 
     | 
| 
       5670 
     | 
    
         
            -
                   match 
     | 
| 
       5671 
     | 
    
         
            -
                   pcre_exec() 
     | 
| 
      
 5667 
     | 
    
         
            +
                   For  example, the pattern /^abc$/ matches the subject string "def\nabc"
         
     | 
| 
      
 5668 
     | 
    
         
            +
                   (where \n represents a newline) in multiline mode, but  not  otherwise.
         
     | 
| 
      
 5669 
     | 
    
         
            +
                   Consequently,  patterns  that  are anchored in single line mode because
         
     | 
| 
      
 5670 
     | 
    
         
            +
                   all branches start with ^ are not anchored in  multiline  mode,  and  a
         
     | 
| 
      
 5671 
     | 
    
         
            +
                   match  for  circumflex  is  possible  when  the startoffset argument of
         
     | 
| 
      
 5672 
     | 
    
         
            +
                   pcre_exec() is non-zero. The PCRE_DOLLAR_ENDONLY option is  ignored  if
         
     | 
| 
       5672 
5673 
     | 
    
         
             
                   PCRE_MULTILINE is set.
         
     | 
| 
       5673 
5674 
     | 
    
         | 
| 
       5674 
     | 
    
         
            -
                   Note 
     | 
| 
       5675 
     | 
    
         
            -
                   and 
     | 
| 
       5676 
     | 
    
         
            -
                   start 
     | 
| 
      
 5675 
     | 
    
         
            +
                   Note  that  the sequences \A, \Z, and \z can be used to match the start
         
     | 
| 
      
 5676 
     | 
    
         
            +
                   and end of the subject in both modes, and if all branches of a  pattern
         
     | 
| 
      
 5677 
     | 
    
         
            +
                   start  with  \A it is always anchored, whether or not PCRE_MULTILINE is
         
     | 
| 
       5677 
5678 
     | 
    
         
             
                   set.
         
     | 
| 
       5678 
5679 
     | 
    
         | 
| 
       5679 
5680 
     | 
    
         | 
| 
       5680 
5681 
     | 
    
         
             
            FULL STOP (PERIOD, DOT) AND \N
         
     | 
| 
       5681 
5682 
     | 
    
         | 
| 
       5682 
5683 
     | 
    
         
             
                   Outside a character class, a dot in the pattern matches any one charac-
         
     | 
| 
       5683 
     | 
    
         
            -
                   ter 
     | 
| 
      
 5684 
     | 
    
         
            +
                   ter  in  the subject string except (by default) a character that signi-
         
     | 
| 
       5684 
5685 
     | 
    
         
             
                   fies the end of a line.
         
     | 
| 
       5685 
5686 
     | 
    
         | 
| 
       5686 
     | 
    
         
            -
                   When 
     | 
| 
       5687 
     | 
    
         
            -
                   that 
     | 
| 
       5688 
     | 
    
         
            -
                   not 
     | 
| 
       5689 
     | 
    
         
            -
                   matches 
     | 
| 
       5690 
     | 
    
         
            -
                   code 
     | 
| 
      
 5687 
     | 
    
         
            +
                   When a line ending is defined as a single character, dot never  matches
         
     | 
| 
      
 5688 
     | 
    
         
            +
                   that  character; when the two-character sequence CRLF is used, dot does
         
     | 
| 
      
 5689 
     | 
    
         
            +
                   not match CR if it is immediately followed  by  LF,  but  otherwise  it
         
     | 
| 
      
 5690 
     | 
    
         
            +
                   matches  all characters (including isolated CRs and LFs). When any Uni-
         
     | 
| 
      
 5691 
     | 
    
         
            +
                   code line endings are being recognized, dot does not match CR or LF  or
         
     | 
| 
       5691 
5692 
     | 
    
         
             
                   any of the other line ending characters.
         
     | 
| 
       5692 
5693 
     | 
    
         | 
| 
       5693 
     | 
    
         
            -
                   The 
     | 
| 
       5694 
     | 
    
         
            -
                   PCRE_DOTALL 
     | 
| 
      
 5694 
     | 
    
         
            +
                   The  behaviour  of  dot  with regard to newlines can be changed. If the
         
     | 
| 
      
 5695 
     | 
    
         
            +
                   PCRE_DOTALL option is set, a dot matches  any  one  character,  without
         
     | 
| 
       5695 
5696 
     | 
    
         
             
                   exception. If the two-character sequence CRLF is present in the subject
         
     | 
| 
       5696 
5697 
     | 
    
         
             
                   string, it takes two dots to match it.
         
     | 
| 
       5697 
5698 
     | 
    
         | 
| 
       5698 
     | 
    
         
            -
                   The 
     | 
| 
       5699 
     | 
    
         
            -
                   flex 
     | 
| 
      
 5699 
     | 
    
         
            +
                   The handling of dot is entirely independent of the handling of  circum-
         
     | 
| 
      
 5700 
     | 
    
         
            +
                   flex  and  dollar,  the  only relationship being that they both involve
         
     | 
| 
       5700 
5701 
     | 
    
         
             
                   newlines. Dot has no special meaning in a character class.
         
     | 
| 
       5701 
5702 
     | 
    
         | 
| 
       5702 
     | 
    
         
            -
                   The 
     | 
| 
       5703 
     | 
    
         
            -
                   affected 
     | 
| 
       5704 
     | 
    
         
            -
                   character 
     | 
| 
      
 5703 
     | 
    
         
            +
                   The escape sequence \N behaves like  a  dot,  except  that  it  is  not
         
     | 
| 
      
 5704 
     | 
    
         
            +
                   affected  by  the  PCRE_DOTALL  option.  In other words, it matches any
         
     | 
| 
      
 5705 
     | 
    
         
            +
                   character except one that signifies the end of a line. Perl  also  uses
         
     | 
| 
       5705 
5706 
     | 
    
         
             
                   \N to match characters by name; PCRE does not support this.
         
     | 
| 
       5706 
5707 
     | 
    
         | 
| 
       5707 
5708 
     | 
    
         | 
| 
       5708 
5709 
     | 
    
         
             
            MATCHING A SINGLE DATA UNIT
         
     | 
| 
       5709 
5710 
     | 
    
         | 
| 
       5710 
     | 
    
         
            -
                   Outside 
     | 
| 
       5711 
     | 
    
         
            -
                   unit, 
     | 
| 
       5712 
     | 
    
         
            -
                   unit 
     | 
| 
       5713 
     | 
    
         
            -
                   32-bit 
     | 
| 
       5714 
     | 
    
         
            -
                   line-ending 
     | 
| 
      
 5711 
     | 
    
         
            +
                   Outside  a character class, the escape sequence \C matches any one data
         
     | 
| 
      
 5712 
     | 
    
         
            +
                   unit, whether or not a UTF mode is set. In the 8-bit library, one  data
         
     | 
| 
      
 5713 
     | 
    
         
            +
                   unit  is  one  byte;  in the 16-bit library it is a 16-bit unit; in the
         
     | 
| 
      
 5714 
     | 
    
         
            +
                   32-bit library it is a 32-bit unit. Unlike a  dot,  \C  always  matches
         
     | 
| 
      
 5715 
     | 
    
         
            +
                   line-ending  characters.  The  feature  is provided in Perl in order to
         
     | 
| 
       5715 
5716 
     | 
    
         
             
                   match individual bytes in UTF-8 mode, but it is unclear how it can use-
         
     | 
| 
       5716 
     | 
    
         
            -
                   fully 
     | 
| 
       5717 
     | 
    
         
            -
                   units, 
     | 
| 
      
 5717 
     | 
    
         
            +
                   fully  be  used.  Because  \C breaks up characters into individual data
         
     | 
| 
      
 5718 
     | 
    
         
            +
                   units, matching one unit with \C in a UTF mode means that the  rest  of
         
     | 
| 
       5718 
5719 
     | 
    
         
             
                   the string may start with a malformed UTF character. This has undefined
         
     | 
| 
       5719 
5720 
     | 
    
         
             
                   results, because PCRE assumes that it is dealing with valid UTF strings
         
     | 
| 
       5720 
     | 
    
         
            -
                   (and 
     | 
| 
       5721 
     | 
    
         
            -
                   PCRE_NO_UTF8_CHECK, 
     | 
| 
      
 5721 
     | 
    
         
            +
                   (and  by  default  it checks this at the start of processing unless the
         
     | 
| 
      
 5722 
     | 
    
         
            +
                   PCRE_NO_UTF8_CHECK, PCRE_NO_UTF16_CHECK or  PCRE_NO_UTF32_CHECK  option
         
     | 
| 
       5722 
5723 
     | 
    
         
             
                   is used).
         
     | 
| 
       5723 
5724 
     | 
    
         | 
| 
       5724 
     | 
    
         
            -
                   PCRE 
     | 
| 
       5725 
     | 
    
         
            -
                   below) 
     | 
| 
      
 5725 
     | 
    
         
            +
                   PCRE  does  not  allow \C to appear in lookbehind assertions (described
         
     | 
| 
      
 5726 
     | 
    
         
            +
                   below) in a UTF mode, because this would make it impossible  to  calcu-
         
     | 
| 
       5726 
5727 
     | 
    
         
             
                   late the length of the lookbehind.
         
     | 
| 
       5727 
5728 
     | 
    
         | 
| 
       5728 
5729 
     | 
    
         
             
                   In general, the \C escape sequence is best avoided. However, one way of
         
     | 
| 
       5729 
     | 
    
         
            -
                   using 
     | 
| 
       5730 
     | 
    
         
            -
                   a 
     | 
| 
       5731 
     | 
    
         
            -
                   tern, 
     | 
| 
      
 5730 
     | 
    
         
            +
                   using it that avoids the problem of malformed UTF characters is to  use
         
     | 
| 
      
 5731 
     | 
    
         
            +
                   a  lookahead to check the length of the next character, as in this pat-
         
     | 
| 
      
 5732 
     | 
    
         
            +
                   tern, which could be used with a UTF-8 string (ignore white  space  and
         
     | 
| 
       5732 
5733 
     | 
    
         
             
                   line breaks):
         
     | 
| 
       5733 
5734 
     | 
    
         | 
| 
       5734 
5735 
     | 
    
         
             
                     (?| (?=[\x00-\x7f])(\C) |
         
     | 
| 
         @@ -5736,11 +5737,11 @@ MATCHING A SINGLE DATA UNIT 
     | 
|
| 
       5736 
5737 
     | 
    
         
             
                         (?=[\x{800}-\x{ffff}])(\C)(\C)(\C) |
         
     | 
| 
       5737 
5738 
     | 
    
         
             
                         (?=[\x{10000}-\x{1fffff}])(\C)(\C)(\C)(\C))
         
     | 
| 
       5738 
5739 
     | 
    
         | 
| 
       5739 
     | 
    
         
            -
                   A 
     | 
| 
       5740 
     | 
    
         
            -
                   in 
     | 
| 
       5741 
     | 
    
         
            -
                   assertions 
     | 
| 
       5742 
     | 
    
         
            -
                   for 
     | 
| 
       5743 
     | 
    
         
            -
                   character's 
     | 
| 
      
 5740 
     | 
    
         
            +
                   A  group  that starts with (?| resets the capturing parentheses numbers
         
     | 
| 
      
 5741 
     | 
    
         
            +
                   in each alternative (see "Duplicate  Subpattern  Numbers"  below).  The
         
     | 
| 
      
 5742 
     | 
    
         
            +
                   assertions  at  the start of each branch check the next UTF-8 character
         
     | 
| 
      
 5743 
     | 
    
         
            +
                   for values whose encoding uses 1, 2, 3, or 4 bytes,  respectively.  The
         
     | 
| 
      
 5744 
     | 
    
         
            +
                   character's  individual bytes are then captured by the appropriate num-
         
     | 
| 
       5744 
5745 
     | 
    
         
             
                   ber of groups.
         
     | 
| 
       5745 
5746 
     | 
    
         | 
| 
       5746 
5747 
     | 
    
         | 
| 
         @@ -5750,109 +5751,109 @@ SQUARE BRACKETS AND CHARACTER CLASSES 
     | 
|
| 
       5750 
5751 
     | 
    
         
             
                   closing square bracket. A closing square bracket on its own is not spe-
         
     | 
| 
       5751 
5752 
     | 
    
         
             
                   cial by default.  However, if the PCRE_JAVASCRIPT_COMPAT option is set,
         
     | 
| 
       5752 
5753 
     | 
    
         
             
                   a lone closing square bracket causes a compile-time error. If a closing
         
     | 
| 
       5753 
     | 
    
         
            -
                   square 
     | 
| 
       5754 
     | 
    
         
            -
                   first 
     | 
| 
      
 5754 
     | 
    
         
            +
                   square bracket is required as a member of the class, it should  be  the
         
     | 
| 
      
 5755 
     | 
    
         
            +
                   first  data  character  in  the  class (after an initial circumflex, if
         
     | 
| 
       5755 
5756 
     | 
    
         
             
                   present) or escaped with a backslash.
         
     | 
| 
       5756 
5757 
     | 
    
         | 
| 
       5757 
     | 
    
         
            -
                   A 
     | 
| 
       5758 
     | 
    
         
            -
                   mode, 
     | 
| 
      
 5758 
     | 
    
         
            +
                   A character class matches a single character in the subject. In  a  UTF
         
     | 
| 
      
 5759 
     | 
    
         
            +
                   mode,  the  character  may  be  more than one data unit long. A matched
         
     | 
| 
       5759 
5760 
     | 
    
         
             
                   character must be in the set of characters defined by the class, unless
         
     | 
| 
       5760 
     | 
    
         
            -
                   the 
     | 
| 
      
 5761 
     | 
    
         
            +
                   the  first  character in the class definition is a circumflex, in which
         
     | 
| 
       5761 
5762 
     | 
    
         
             
                   case the subject character must not be in the set defined by the class.
         
     | 
| 
       5762 
     | 
    
         
            -
                   If 
     | 
| 
      
 5763 
     | 
    
         
            +
                   If  a  circumflex is actually required as a member of the class, ensure
         
     | 
| 
       5763 
5764 
     | 
    
         
             
                   it is not the first character, or escape it with a backslash.
         
     | 
| 
       5764 
5765 
     | 
    
         | 
| 
       5765 
     | 
    
         
            -
                   For 
     | 
| 
       5766 
     | 
    
         
            -
                   while 
     | 
| 
      
 5766 
     | 
    
         
            +
                   For example, the character class [aeiou] matches any lower case  vowel,
         
     | 
| 
      
 5767 
     | 
    
         
            +
                   while  [^aeiou]  matches  any character that is not a lower case vowel.
         
     | 
| 
       5767 
5768 
     | 
    
         
             
                   Note that a circumflex is just a convenient notation for specifying the
         
     | 
| 
       5768 
     | 
    
         
            -
                   characters 
     | 
| 
       5769 
     | 
    
         
            -
                   class 
     | 
| 
       5770 
     | 
    
         
            -
                   sumes 
     | 
| 
      
 5769 
     | 
    
         
            +
                   characters  that  are in the class by enumerating those that are not. A
         
     | 
| 
      
 5770 
     | 
    
         
            +
                   class that starts with a circumflex is not an assertion; it still  con-
         
     | 
| 
      
 5771 
     | 
    
         
            +
                   sumes  a  character  from the subject string, and therefore it fails if
         
     | 
| 
       5771 
5772 
     | 
    
         
             
                   the current pointer is at the end of the string.
         
     | 
| 
       5772 
5773 
     | 
    
         | 
| 
       5773 
5774 
     | 
    
         
             
                   In UTF-8 (UTF-16, UTF-32) mode, characters with values greater than 255
         
     | 
| 
       5774 
     | 
    
         
            -
                   (0xffff) 
     | 
| 
      
 5775 
     | 
    
         
            +
                   (0xffff)  can be included in a class as a literal string of data units,
         
     | 
| 
       5775 
5776 
     | 
    
         
             
                   or by using the \x{ escaping mechanism.
         
     | 
| 
       5776 
5777 
     | 
    
         | 
| 
       5777 
     | 
    
         
            -
                   When 
     | 
| 
       5778 
     | 
    
         
            -
                   their 
     | 
| 
       5779 
     | 
    
         
            -
                   [aeiou] 
     | 
| 
       5780 
     | 
    
         
            -
                   match 
     | 
| 
       5781 
     | 
    
         
            -
                   understands 
     | 
| 
       5782 
     | 
    
         
            -
                   than 
     | 
| 
       5783 
     | 
    
         
            -
                   higher 
     | 
| 
       5784 
     | 
    
         
            -
                   with 
     | 
| 
       5785 
     | 
    
         
            -
                   caseless 
     | 
| 
       5786 
     | 
    
         
            -
                   ensure 
     | 
| 
      
 5778 
     | 
    
         
            +
                   When caseless matching is set, any letters in a  class  represent  both
         
     | 
| 
      
 5779 
     | 
    
         
            +
                   their  upper  case  and lower case versions, so for example, a caseless
         
     | 
| 
      
 5780 
     | 
    
         
            +
                   [aeiou] matches "A" as well as "a", and a caseless  [^aeiou]  does  not
         
     | 
| 
      
 5781 
     | 
    
         
            +
                   match  "A", whereas a caseful version would. In a UTF mode, PCRE always
         
     | 
| 
      
 5782 
     | 
    
         
            +
                   understands the concept of case for characters whose  values  are  less
         
     | 
| 
      
 5783 
     | 
    
         
            +
                   than  128, so caseless matching is always possible. For characters with
         
     | 
| 
      
 5784 
     | 
    
         
            +
                   higher values, the concept of case is supported  if  PCRE  is  compiled
         
     | 
| 
      
 5785 
     | 
    
         
            +
                   with  Unicode  property support, but not otherwise.  If you want to use
         
     | 
| 
      
 5786 
     | 
    
         
            +
                   caseless matching in a UTF mode for characters 128 and above, you  must
         
     | 
| 
      
 5787 
     | 
    
         
            +
                   ensure  that  PCRE is compiled with Unicode property support as well as
         
     | 
| 
       5787 
5788 
     | 
    
         
             
                   with UTF support.
         
     | 
| 
       5788 
5789 
     | 
    
         | 
| 
       5789 
     | 
    
         
            -
                   Characters 
     | 
| 
       5790 
     | 
    
         
            -
                   special 
     | 
| 
       5791 
     | 
    
         
            -
                   sequence 
     | 
| 
      
 5790 
     | 
    
         
            +
                   Characters that might indicate line breaks are  never  treated  in  any
         
     | 
| 
      
 5791 
     | 
    
         
            +
                   special  way  when  matching  character  classes,  whatever line-ending
         
     | 
| 
      
 5792 
     | 
    
         
            +
                   sequence is in  use,  and  whatever  setting  of  the  PCRE_DOTALL  and
         
     | 
| 
       5792 
5793 
     | 
    
         
             
                   PCRE_MULTILINE options is used. A class such as [^a] always matches one
         
     | 
| 
       5793 
5794 
     | 
    
         
             
                   of these characters.
         
     | 
| 
       5794 
5795 
     | 
    
         | 
| 
       5795 
     | 
    
         
            -
                   The 
     | 
| 
       5796 
     | 
    
         
            -
                   ters 
     | 
| 
       5797 
     | 
    
         
            -
                   between 
     | 
| 
       5798 
     | 
    
         
            -
                   class, 
     | 
| 
       5799 
     | 
    
         
            -
                   where 
     | 
| 
      
 5796 
     | 
    
         
            +
                   The minus (hyphen) character can be used to specify a range of  charac-
         
     | 
| 
      
 5797 
     | 
    
         
            +
                   ters  in  a  character  class.  For  example,  [d-m] matches any letter
         
     | 
| 
      
 5798 
     | 
    
         
            +
                   between d and m, inclusive. If a  minus  character  is  required  in  a
         
     | 
| 
      
 5799 
     | 
    
         
            +
                   class,  it  must  be  escaped  with a backslash or appear in a position
         
     | 
| 
      
 5800 
     | 
    
         
            +
                   where it cannot be interpreted as indicating a range, typically as  the
         
     | 
| 
       5800 
5801 
     | 
    
         
             
                   first or last character in the class, or immediately after a range. For
         
     | 
| 
       5801 
     | 
    
         
            -
                   example, 
     | 
| 
      
 5802 
     | 
    
         
            +
                   example, [b-d-z] matches letters in the range b to d, a hyphen  charac-
         
     | 
| 
       5802 
5803 
     | 
    
         
             
                   ter, or z.
         
     | 
| 
       5803 
5804 
     | 
    
         | 
| 
       5804 
5805 
     | 
    
         
             
                   It is not possible to have the literal character "]" as the end charac-
         
     | 
| 
       5805 
     | 
    
         
            -
                   ter 
     | 
| 
       5806 
     | 
    
         
            -
                   two 
     | 
| 
       5807 
     | 
    
         
            -
                   would 
     | 
| 
       5808 
     | 
    
         
            -
                   backslash 
     | 
| 
       5809 
     | 
    
         
            -
                   preted 
     | 
| 
       5810 
     | 
    
         
            -
                   The 
     | 
| 
      
 5806 
     | 
    
         
            +
                   ter of a range. A pattern such as [W-]46] is interpreted as a class  of
         
     | 
| 
      
 5807 
     | 
    
         
            +
                   two  characters ("W" and "-") followed by a literal string "46]", so it
         
     | 
| 
      
 5808 
     | 
    
         
            +
                   would match "W46]" or "-46]". However, if the "]"  is  escaped  with  a
         
     | 
| 
      
 5809 
     | 
    
         
            +
                   backslash  it is interpreted as the end of range, so [W-\]46] is inter-
         
     | 
| 
      
 5810 
     | 
    
         
            +
                   preted as a class containing a range followed by two other  characters.
         
     | 
| 
      
 5811 
     | 
    
         
            +
                   The  octal or hexadecimal representation of "]" can also be used to end
         
     | 
| 
       5811 
5812 
     | 
    
         
             
                   a range.
         
     | 
| 
       5812 
5813 
     | 
    
         | 
| 
       5813 
     | 
    
         
            -
                   An 
     | 
| 
       5814 
     | 
    
         
            -
                   escape 
     | 
| 
       5815 
     | 
    
         
            -
                   at 
     | 
| 
      
 5814 
     | 
    
         
            +
                   An error is generated if a POSIX character  class  (see  below)  or  an
         
     | 
| 
      
 5815 
     | 
    
         
            +
                   escape  sequence other than one that defines a single character appears
         
     | 
| 
      
 5816 
     | 
    
         
            +
                   at a point where a range ending character  is  expected.  For  example,
         
     | 
| 
       5816 
5817 
     | 
    
         
             
                   [z-\xff] is valid, but [A-\d] and [A-[:digit:]] are not.
         
     | 
| 
       5817 
5818 
     | 
    
         | 
| 
       5818 
     | 
    
         
            -
                   Ranges 
     | 
| 
       5819 
     | 
    
         
            -
                   also 
     | 
| 
       5820 
     | 
    
         
            -
                   [\000-\037]. 
     | 
| 
      
 5819 
     | 
    
         
            +
                   Ranges  operate in the collating sequence of character values. They can
         
     | 
| 
      
 5820 
     | 
    
         
            +
                   also  be  used  for  characters  specified  numerically,  for   example
         
     | 
| 
      
 5821 
     | 
    
         
            +
                   [\000-\037].  Ranges  can include any characters that are valid for the
         
     | 
| 
       5821 
5822 
     | 
    
         
             
                   current mode.
         
     | 
| 
       5822 
5823 
     | 
    
         | 
| 
       5823 
5824 
     | 
    
         
             
                   If a range that includes letters is used when caseless matching is set,
         
     | 
| 
       5824 
5825 
     | 
    
         
             
                   it matches the letters in either case. For example, [W-c] is equivalent
         
     | 
| 
       5825 
     | 
    
         
            -
                   to 
     | 
| 
       5826 
     | 
    
         
            -
                   character 
     | 
| 
       5827 
     | 
    
         
            -
                   accented 
     | 
| 
       5828 
     | 
    
         
            -
                   concept 
     | 
| 
      
 5826 
     | 
    
         
            +
                   to [][\\^_`wxyzabc], matched caselessly, and  in  a  non-UTF  mode,  if
         
     | 
| 
      
 5827 
     | 
    
         
            +
                   character  tables  for  a French locale are in use, [\xc8-\xcb] matches
         
     | 
| 
      
 5828 
     | 
    
         
            +
                   accented E characters in both cases. In UTF modes,  PCRE  supports  the
         
     | 
| 
      
 5829 
     | 
    
         
            +
                   concept  of  case for characters with values greater than 128 only when
         
     | 
| 
       5829 
5830 
     | 
    
         
             
                   it is compiled with Unicode property support.
         
     | 
| 
       5830 
5831 
     | 
    
         | 
| 
       5831 
     | 
    
         
            -
                   The 
     | 
| 
      
 5832 
     | 
    
         
            +
                   The character escape sequences \d, \D, \h, \H, \p, \P, \s, \S, \v,  \V,
         
     | 
| 
       5832 
5833 
     | 
    
         
             
                   \w, and \W may appear in a character class, and add the characters that
         
     | 
| 
       5833 
     | 
    
         
            -
                   they 
     | 
| 
       5834 
     | 
    
         
            -
                   mal 
     | 
| 
       5835 
     | 
    
         
            -
                   \d, 
     | 
| 
       5836 
     | 
    
         
            -
                   appear 
     | 
| 
      
 5834 
     | 
    
         
            +
                   they match to the class. For example, [\dABCDEF] matches any  hexadeci-
         
     | 
| 
      
 5835 
     | 
    
         
            +
                   mal  digit.  In  UTF modes, the PCRE_UCP option affects the meanings of
         
     | 
| 
      
 5836 
     | 
    
         
            +
                   \d, \s, \w and their upper case partners, just as  it  does  when  they
         
     | 
| 
      
 5837 
     | 
    
         
            +
                   appear  outside a character class, as described in the section entitled
         
     | 
| 
       5837 
5838 
     | 
    
         
             
                   "Generic character types" above. The escape sequence \b has a different
         
     | 
| 
       5838 
     | 
    
         
            -
                   meaning 
     | 
| 
       5839 
     | 
    
         
            -
                   The 
     | 
| 
       5840 
     | 
    
         
            -
                   class. 
     | 
| 
       5841 
     | 
    
         
            -
                   as 
     | 
| 
      
 5839 
     | 
    
         
            +
                   meaning  inside  a character class; it matches the backspace character.
         
     | 
| 
      
 5840 
     | 
    
         
            +
                   The sequences \B, \N, \R, and \X are not  special  inside  a  character
         
     | 
| 
      
 5841 
     | 
    
         
            +
                   class.  Like  any other unrecognized escape sequences, they are treated
         
     | 
| 
      
 5842 
     | 
    
         
            +
                   as the literal characters "B", "N", "R", and "X" by default, but  cause
         
     | 
| 
       5842 
5843 
     | 
    
         
             
                   an error if the PCRE_EXTRA option is set.
         
     | 
| 
       5843 
5844 
     | 
    
         | 
| 
       5844 
     | 
    
         
            -
                   A 
     | 
| 
       5845 
     | 
    
         
            -
                   types 
     | 
| 
       5846 
     | 
    
         
            -
                   lower 
     | 
| 
      
 5845 
     | 
    
         
            +
                   A  circumflex  can  conveniently  be used with the upper case character
         
     | 
| 
      
 5846 
     | 
    
         
            +
                   types to specify a more restricted set of characters than the  matching
         
     | 
| 
      
 5847 
     | 
    
         
            +
                   lower  case  type.  For example, the class [^\W_] matches any letter or
         
     | 
| 
       5847 
5848 
     | 
    
         
             
                   digit, but not underscore, whereas [\w] includes underscore. A positive
         
     | 
| 
       5848 
5849 
     | 
    
         
             
                   character class should be read as "something OR something OR ..." and a
         
     | 
| 
       5849 
5850 
     | 
    
         
             
                   negative class as "NOT something AND NOT something AND NOT ...".
         
     | 
| 
       5850 
5851 
     | 
    
         | 
| 
       5851 
     | 
    
         
            -
                   The 
     | 
| 
       5852 
     | 
    
         
            -
                   backslash, 
     | 
| 
       5853 
     | 
    
         
            -
                   range), 
     | 
| 
       5854 
     | 
    
         
            -
                   when 
     | 
| 
       5855 
     | 
    
         
            -
                   special 
     | 
| 
      
 5852 
     | 
    
         
            +
                   The only metacharacters that are recognized in  character  classes  are
         
     | 
| 
      
 5853 
     | 
    
         
            +
                   backslash,  hyphen  (only  where  it can be interpreted as specifying a
         
     | 
| 
      
 5854 
     | 
    
         
            +
                   range), circumflex (only at the start), opening  square  bracket  (only
         
     | 
| 
      
 5855 
     | 
    
         
            +
                   when  it can be interpreted as introducing a POSIX class name, or for a
         
     | 
| 
      
 5856 
     | 
    
         
            +
                   special compatibility feature - see the next  two  sections),  and  the
         
     | 
| 
       5856 
5857 
     | 
    
         
             
                   terminating  closing  square  bracket.  However,  escaping  other  non-
         
     | 
| 
       5857 
5858 
     | 
    
         
             
                   alphanumeric characters does no harm.
         
     | 
| 
       5858 
5859 
     | 
    
         | 
| 
         @@ -5860,7 +5861,7 @@ SQUARE BRACKETS AND CHARACTER CLASSES 
     | 
|
| 
       5860 
5861 
     | 
    
         
             
            POSIX CHARACTER CLASSES
         
     | 
| 
       5861 
5862 
     | 
    
         | 
| 
       5862 
5863 
     | 
    
         
             
                   Perl supports the POSIX notation for character classes. This uses names
         
     | 
| 
       5863 
     | 
    
         
            -
                   enclosed 
     | 
| 
      
 5864 
     | 
    
         
            +
                   enclosed  by  [: and :] within the enclosing square brackets. PCRE also
         
     | 
| 
       5864 
5865 
     | 
    
         
             
                   supports this notation. For example,
         
     | 
| 
       5865 
5866 
     | 
    
         | 
| 
       5866 
5867 
     | 
    
         
             
                     [01[:alpha:]%]
         
     | 
| 
         @@ -5883,28 +5884,28 @@ POSIX CHARACTER CLASSES 
     | 
|
| 
       5883 
5884 
     | 
    
         
             
                     word     "word" characters (same as \w)
         
     | 
| 
       5884 
5885 
     | 
    
         
             
                     xdigit   hexadecimal digits
         
     | 
| 
       5885 
5886 
     | 
    
         | 
| 
       5886 
     | 
    
         
            -
                   The 
     | 
| 
       5887 
     | 
    
         
            -
                   CR 
     | 
| 
       5888 
     | 
    
         
            -
                   the 
     | 
| 
      
 5887 
     | 
    
         
            +
                   The  default  "space" characters are HT (9), LF (10), VT (11), FF (12),
         
     | 
| 
      
 5888 
     | 
    
         
            +
                   CR (13), and space (32). If locale-specific matching is  taking  place,
         
     | 
| 
      
 5889 
     | 
    
         
            +
                   the  list  of  space characters may be different; there may be fewer or
         
     | 
| 
       5889 
5890 
     | 
    
         
             
                   more of them. "Space" used to be different to \s, which did not include
         
     | 
| 
       5890 
5891 
     | 
    
         
             
                   VT, for Perl compatibility.  However, Perl changed at release 5.18, and
         
     | 
| 
       5891 
     | 
    
         
            -
                   PCRE 
     | 
| 
      
 5892 
     | 
    
         
            +
                   PCRE followed at release 8.34.  "Space" and \s now match the  same  set
         
     | 
| 
       5892 
5893 
     | 
    
         
             
                   of characters.
         
     | 
| 
       5893 
5894 
     | 
    
         | 
| 
       5894 
     | 
    
         
            -
                   The 
     | 
| 
       5895 
     | 
    
         
            -
                   from 
     | 
| 
      
 5895 
     | 
    
         
            +
                   The  name  "word"  is  a Perl extension, and "blank" is a GNU extension
         
     | 
| 
      
 5896 
     | 
    
         
            +
                   from Perl 5.8. Another Perl extension is negation, which  is  indicated
         
     | 
| 
       5896 
5897 
     | 
    
         
             
                   by a ^ character after the colon. For example,
         
     | 
| 
       5897 
5898 
     | 
    
         | 
| 
       5898 
5899 
     | 
    
         
             
                     [12[:^digit:]]
         
     | 
| 
       5899 
5900 
     | 
    
         | 
| 
       5900 
     | 
    
         
            -
                   matches 
     | 
| 
      
 5901 
     | 
    
         
            +
                   matches  "1", "2", or any non-digit. PCRE (and Perl) also recognize the
         
     | 
| 
       5901 
5902 
     | 
    
         
             
                   POSIX syntax [.ch.] and [=ch=] where "ch" is a "collating element", but
         
     | 
| 
       5902 
5903 
     | 
    
         
             
                   these are not supported, and an error is given if they are encountered.
         
     | 
| 
       5903 
5904 
     | 
    
         | 
| 
       5904 
5905 
     | 
    
         
             
                   By default, characters with values greater than 128 do not match any of
         
     | 
| 
       5905 
     | 
    
         
            -
                   the 
     | 
| 
       5906 
     | 
    
         
            -
                   to 
     | 
| 
       5907 
     | 
    
         
            -
                   character 
     | 
| 
      
 5906 
     | 
    
         
            +
                   the POSIX character classes. However, if the PCRE_UCP option is  passed
         
     | 
| 
      
 5907 
     | 
    
         
            +
                   to  pcre_compile(),  some  of  the  classes are changed so that Unicode
         
     | 
| 
      
 5908 
     | 
    
         
            +
                   character properties are used. This is achieved  by  replacing  certain
         
     | 
| 
       5908 
5909 
     | 
    
         
             
                   POSIX classes by other sequences, as follows:
         
     | 
| 
       5909 
5910 
     | 
    
         | 
| 
       5910 
5911 
     | 
    
         
             
                     [:alnum:]  becomes  \p{Xan}
         
     | 
| 
         @@ -5916,10 +5917,10 @@ POSIX CHARACTER CLASSES 
     | 
|
| 
       5916 
5917 
     | 
    
         
             
                     [:upper:]  becomes  \p{Lu}
         
     | 
| 
       5917 
5918 
     | 
    
         
             
                     [:word:]   becomes  \p{Xwd}
         
     | 
| 
       5918 
5919 
     | 
    
         | 
| 
       5919 
     | 
    
         
            -
                   Negated 
     | 
| 
      
 5920 
     | 
    
         
            +
                   Negated  versions, such as [:^alpha:] use \P instead of \p. Three other
         
     | 
| 
       5920 
5921 
     | 
    
         
             
                   POSIX classes are handled specially in UCP mode:
         
     | 
| 
       5921 
5922 
     | 
    
         | 
| 
       5922 
     | 
    
         
            -
                   [:graph:] This 
     | 
| 
      
 5923 
     | 
    
         
            +
                   [:graph:] This matches characters that have glyphs that mark  the  page
         
     | 
| 
       5923 
5924 
     | 
    
         
             
                             when printed. In Unicode property terms, it matches all char-
         
     | 
| 
       5924 
5925 
     | 
    
         
             
                             acters with the L, M, N, P, S, or Cf properties, except for:
         
     | 
| 
       5925 
5926 
     | 
    
         | 
| 
         @@ -5928,58 +5929,58 @@ POSIX CHARACTER CLASSES 
     | 
|
| 
       5928 
5929 
     | 
    
         
             
                               U+2066 - U+2069  Various "isolate"s
         
     | 
| 
       5929 
5930 
     | 
    
         | 
| 
       5930 
5931 
     | 
    
         | 
| 
       5931 
     | 
    
         
            -
                   [:print:] This 
     | 
| 
       5932 
     | 
    
         
            -
                             characters 
     | 
| 
      
 5932 
     | 
    
         
            +
                   [:print:] This matches the same  characters  as  [:graph:]  plus  space
         
     | 
| 
      
 5933 
     | 
    
         
            +
                             characters  that  are  not controls, that is, characters with
         
     | 
| 
       5933 
5934 
     | 
    
         
             
                             the Zs property.
         
     | 
| 
       5934 
5935 
     | 
    
         | 
| 
       5935 
5936 
     | 
    
         
             
                   [:punct:] This matches all characters that have the Unicode P (punctua-
         
     | 
| 
       5936 
     | 
    
         
            -
                             tion) 
     | 
| 
      
 5937 
     | 
    
         
            +
                             tion)  property,  plus those characters whose code points are
         
     | 
| 
       5937 
5938 
     | 
    
         
             
                             less than 128 that have the S (Symbol) property.
         
     | 
| 
       5938 
5939 
     | 
    
         | 
| 
       5939 
     | 
    
         
            -
                   The 
     | 
| 
      
 5940 
     | 
    
         
            +
                   The other POSIX classes are unchanged, and match only  characters  with
         
     | 
| 
       5940 
5941 
     | 
    
         
             
                   code points less than 128.
         
     | 
| 
       5941 
5942 
     | 
    
         | 
| 
       5942 
5943 
     | 
    
         | 
| 
       5943 
5944 
     | 
    
         
             
            COMPATIBILITY FEATURE FOR WORD BOUNDARIES
         
     | 
| 
       5944 
5945 
     | 
    
         | 
| 
       5945 
     | 
    
         
            -
                   In 
     | 
| 
       5946 
     | 
    
         
            -
                   ugly 
     | 
| 
      
 5946 
     | 
    
         
            +
                   In  the POSIX.2 compliant library that was included in 4.4BSD Unix, the
         
     | 
| 
      
 5947 
     | 
    
         
            +
                   ugly syntax [[:<:]] and [[:>:]] is used for matching  "start  of  word"
         
     | 
| 
       5947 
5948 
     | 
    
         
             
                   and "end of word". PCRE treats these items as follows:
         
     | 
| 
       5948 
5949 
     | 
    
         | 
| 
       5949 
5950 
     | 
    
         
             
                     [[:<:]]  is converted to  \b(?=\w)
         
     | 
| 
       5950 
5951 
     | 
    
         
             
                     [[:>:]]  is converted to  \b(?<=\w)
         
     | 
| 
       5951 
5952 
     | 
    
         | 
| 
       5952 
5953 
     | 
    
         
             
                   Only these exact character sequences are recognized. A sequence such as
         
     | 
| 
       5953 
     | 
    
         
            -
                   [a[:<:]b] 
     | 
| 
       5954 
     | 
    
         
            -
                   support 
     | 
| 
      
 5954 
     | 
    
         
            +
                   [a[:<:]b] provokes error for an unrecognized  POSIX  class  name.  This
         
     | 
| 
      
 5955 
     | 
    
         
            +
                   support  is not compatible with Perl. It is provided to help migrations
         
     | 
| 
       5955 
5956 
     | 
    
         
             
                   from other environments, and is best not used in any new patterns. Note
         
     | 
| 
       5956 
     | 
    
         
            -
                   that 
     | 
| 
       5957 
     | 
    
         
            -
                   tions" 
     | 
| 
       5958 
     | 
    
         
            -
                   character 
     | 
| 
       5959 
     | 
    
         
            -
                   assertions 
     | 
| 
      
 5957 
     | 
    
         
            +
                   that  \b matches at the start and the end of a word (see "Simple asser-
         
     | 
| 
      
 5958 
     | 
    
         
            +
                   tions" above), and in a Perl-style pattern the preceding  or  following
         
     | 
| 
      
 5959 
     | 
    
         
            +
                   character  normally  shows  which  is  wanted, without the need for the
         
     | 
| 
      
 5960 
     | 
    
         
            +
                   assertions that are used above in order to give exactly the  POSIX  be-
         
     | 
| 
       5960 
5961 
     | 
    
         
             
                   haviour.
         
     | 
| 
       5961 
5962 
     | 
    
         | 
| 
       5962 
5963 
     | 
    
         | 
| 
       5963 
5964 
     | 
    
         
             
            VERTICAL BAR
         
     | 
| 
       5964 
5965 
     | 
    
         | 
| 
       5965 
     | 
    
         
            -
                   Vertical 
     | 
| 
      
 5966 
     | 
    
         
            +
                   Vertical  bar characters are used to separate alternative patterns. For
         
     | 
| 
       5966 
5967 
     | 
    
         
             
                   example, the pattern
         
     | 
| 
       5967 
5968 
     | 
    
         | 
| 
       5968 
5969 
     | 
    
         
             
                     gilbert|sullivan
         
     | 
| 
       5969 
5970 
     | 
    
         | 
| 
       5970 
     | 
    
         
            -
                   matches 
     | 
| 
       5971 
     | 
    
         
            -
                   appear, 
     | 
| 
      
 5971 
     | 
    
         
            +
                   matches either "gilbert" or "sullivan". Any number of alternatives  may
         
     | 
| 
      
 5972 
     | 
    
         
            +
                   appear,  and  an  empty  alternative  is  permitted (matching the empty
         
     | 
| 
       5972 
5973 
     | 
    
         
             
                   string). The matching process tries each alternative in turn, from left
         
     | 
| 
       5973 
     | 
    
         
            -
                   to 
     | 
| 
       5974 
     | 
    
         
            -
                   are 
     | 
| 
      
 5974 
     | 
    
         
            +
                   to  right, and the first one that succeeds is used. If the alternatives
         
     | 
| 
      
 5975 
     | 
    
         
            +
                   are within a subpattern (defined below), "succeeds" means matching  the
         
     | 
| 
       5975 
5976 
     | 
    
         
             
                   rest of the main pattern as well as the alternative in the subpattern.
         
     | 
| 
       5976 
5977 
     | 
    
         | 
| 
       5977 
5978 
     | 
    
         | 
| 
       5978 
5979 
     | 
    
         
             
            INTERNAL OPTION SETTING
         
     | 
| 
       5979 
5980 
     | 
    
         | 
| 
       5980 
     | 
    
         
            -
                   The 
     | 
| 
       5981 
     | 
    
         
            -
                   PCRE_EXTENDED 
     | 
| 
       5982 
     | 
    
         
            -
                   within 
     | 
| 
      
 5981 
     | 
    
         
            +
                   The  settings  of  the  PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and
         
     | 
| 
      
 5982 
     | 
    
         
            +
                   PCRE_EXTENDED options (which are Perl-compatible) can be  changed  from
         
     | 
| 
      
 5983 
     | 
    
         
            +
                   within  the  pattern  by  a  sequence  of  Perl option letters enclosed
         
     | 
| 
       5983 
5984 
     | 
    
         
             
                   between "(?" and ")".  The option letters are
         
     | 
| 
       5984 
5985 
     | 
    
         | 
| 
       5985 
5986 
     | 
    
         
             
                     i  for PCRE_CASELESS
         
     | 
| 
         @@ -5989,51 +5990,47 @@ INTERNAL OPTION SETTING 
     | 
|
| 
       5989 
5990 
     | 
    
         | 
| 
       5990 
5991 
     | 
    
         
             
                   For example, (?im) sets caseless, multiline matching. It is also possi-
         
     | 
| 
       5991 
5992 
     | 
    
         
             
                   ble to unset these options by preceding the letter with a hyphen, and a
         
     | 
| 
       5992 
     | 
    
         
            -
                   combined 
     | 
| 
       5993 
     | 
    
         
            -
                   LESS 
     | 
| 
       5994 
     | 
    
         
            -
                   is 
     | 
| 
      
 5993 
     | 
    
         
            +
                   combined setting and unsetting such as (?im-sx), which sets  PCRE_CASE-
         
     | 
| 
      
 5994 
     | 
    
         
            +
                   LESS  and PCRE_MULTILINE while unsetting PCRE_DOTALL and PCRE_EXTENDED,
         
     | 
| 
      
 5995 
     | 
    
         
            +
                   is also permitted. If a  letter  appears  both  before  and  after  the
         
     | 
| 
       5995 
5996 
     | 
    
         
             
                   hyphen, the option is unset.
         
     | 
| 
       5996 
5997 
     | 
    
         | 
| 
       5997 
     | 
    
         
            -
                   The 
     | 
| 
       5998 
     | 
    
         
            -
                   can 
     | 
| 
      
 5998 
     | 
    
         
            +
                   The  PCRE-specific options PCRE_DUPNAMES, PCRE_UNGREEDY, and PCRE_EXTRA
         
     | 
| 
      
 5999 
     | 
    
         
            +
                   can be changed in the same way as the Perl-compatible options by  using
         
     | 
| 
       5999 
6000 
     | 
    
         
             
                   the characters J, U and X respectively.
         
     | 
| 
       6000 
6001 
     | 
    
         | 
| 
       6001 
     | 
    
         
            -
                   When 
     | 
| 
       6002 
     | 
    
         
            -
                   inside 
     | 
| 
       6003 
     | 
    
         
            -
                   the 
     | 
| 
       6004 
     | 
    
         
            -
                    
     | 
| 
       6005 
     | 
    
         
            -
                    
     | 
| 
       6006 
     | 
    
         
            -
             
     | 
| 
       6007 
     | 
    
         
            -
                   An option change within a subpattern (see below for  a  description  of
         
     | 
| 
       6008 
     | 
    
         
            -
                   subpatterns)  affects only that part of the subpattern that follows it,
         
     | 
| 
       6009 
     | 
    
         
            -
                   so
         
     | 
| 
      
 6002 
     | 
    
         
            +
                   When  one  of  these  option  changes occurs at top level (that is, not
         
     | 
| 
      
 6003 
     | 
    
         
            +
                   inside subpattern parentheses), the change applies to the remainder  of
         
     | 
| 
      
 6004 
     | 
    
         
            +
                   the  pattern  that  follows.  An option change within a subpattern (see
         
     | 
| 
      
 6005 
     | 
    
         
            +
                   below for a description of subpatterns) affects only that part  of  the
         
     | 
| 
      
 6006 
     | 
    
         
            +
                   subpattern that follows it, so
         
     | 
| 
       6010 
6007 
     | 
    
         | 
| 
       6011 
6008 
     | 
    
         
             
                     (a(?i)b)c
         
     | 
| 
       6012 
6009 
     | 
    
         | 
| 
       6013 
6010 
     | 
    
         
             
                   matches abc and aBc and no other strings (assuming PCRE_CASELESS is not
         
     | 
| 
       6014 
     | 
    
         
            -
                   used). 
     | 
| 
       6015 
     | 
    
         
            -
                   in 
     | 
| 
       6016 
     | 
    
         
            -
                   do 
     | 
| 
      
 6011 
     | 
    
         
            +
                   used).  By this means, options can be made to have  different  settings
         
     | 
| 
      
 6012 
     | 
    
         
            +
                   in  different parts of the pattern. Any changes made in one alternative
         
     | 
| 
      
 6013 
     | 
    
         
            +
                   do carry on into subsequent branches within the  same  subpattern.  For
         
     | 
| 
       6017 
6014 
     | 
    
         
             
                   example,
         
     | 
| 
       6018 
6015 
     | 
    
         | 
| 
       6019 
6016 
     | 
    
         
             
                     (a(?i)b|c)
         
     | 
| 
       6020 
6017 
     | 
    
         | 
| 
       6021 
     | 
    
         
            -
                   matches 
     | 
| 
       6022 
     | 
    
         
            -
                   first 
     | 
| 
       6023 
     | 
    
         
            -
                   the 
     | 
| 
      
 6018 
     | 
    
         
            +
                   matches  "ab",  "aB",  "c",  and "C", even though when matching "C" the
         
     | 
| 
      
 6019 
     | 
    
         
            +
                   first branch is abandoned before the option setting.  This  is  because
         
     | 
| 
      
 6020 
     | 
    
         
            +
                   the  effects  of option settings happen at compile time. There would be
         
     | 
| 
       6024 
6021 
     | 
    
         
             
                   some very weird behaviour otherwise.
         
     | 
| 
       6025 
6022 
     | 
    
         | 
| 
       6026 
     | 
    
         
            -
                   Note: 
     | 
| 
       6027 
     | 
    
         
            -
                   application 
     | 
| 
       6028 
     | 
    
         
            -
                   some 
     | 
| 
       6029 
     | 
    
         
            -
                   (*CRLF) 
     | 
| 
       6030 
     | 
    
         
            -
                   defaulted. 
     | 
| 
       6031 
     | 
    
         
            -
                   sequences" 
     | 
| 
       6032 
     | 
    
         
            -
                   (*UCP) 
     | 
| 
       6033 
     | 
    
         
            -
                   erty 
     | 
| 
       6034 
     | 
    
         
            -
                   PCRE_UTF32 
     | 
| 
       6035 
     | 
    
         
            -
                   is 
     | 
| 
       6036 
     | 
    
         
            -
                   ever, 
     | 
| 
      
 6023 
     | 
    
         
            +
                   Note: There are other PCRE-specific options that  can  be  set  by  the
         
     | 
| 
      
 6024 
     | 
    
         
            +
                   application  when  the  compiling  or matching functions are called. In
         
     | 
| 
      
 6025 
     | 
    
         
            +
                   some cases the pattern can contain special leading  sequences  such  as
         
     | 
| 
      
 6026 
     | 
    
         
            +
                   (*CRLF)  to  override  what  the  application  has set or what has been
         
     | 
| 
      
 6027 
     | 
    
         
            +
                   defaulted.  Details  are  given  in  the  section   entitled   "Newline
         
     | 
| 
      
 6028 
     | 
    
         
            +
                   sequences"  above.  There  are also the (*UTF8), (*UTF16),(*UTF32), and
         
     | 
| 
      
 6029 
     | 
    
         
            +
                   (*UCP) leading sequences that can be used to set UTF and Unicode  prop-
         
     | 
| 
      
 6030 
     | 
    
         
            +
                   erty  modes;  they are equivalent to setting the PCRE_UTF8, PCRE_UTF16,
         
     | 
| 
      
 6031 
     | 
    
         
            +
                   PCRE_UTF32 and the PCRE_UCP options, respectively. The (*UTF)  sequence
         
     | 
| 
      
 6032 
     | 
    
         
            +
                   is  a  generic version that can be used with any of the libraries. How-
         
     | 
| 
      
 6033 
     | 
    
         
            +
                   ever, the application can set the PCRE_NEVER_UTF  option,  which  locks
         
     | 
| 
       6037 
6034 
     | 
    
         
             
                   out the use of the (*UTF) sequences.
         
     | 
| 
       6038 
6035 
     | 
    
         | 
| 
       6039 
6036 
     | 
    
         | 
| 
         @@ -6046,18 +6043,18 @@ SUBPATTERNS 
     | 
|
| 
       6046 
6043 
     | 
    
         | 
| 
       6047 
6044 
     | 
    
         
             
                     cat(aract|erpillar|)
         
     | 
| 
       6048 
6045 
     | 
    
         | 
| 
       6049 
     | 
    
         
            -
                   matches 
     | 
| 
      
 6046 
     | 
    
         
            +
                   matches "cataract", "caterpillar", or "cat". Without  the  parentheses,
         
     | 
| 
       6050 
6047 
     | 
    
         
             
                   it would match "cataract", "erpillar" or an empty string.
         
     | 
| 
       6051 
6048 
     | 
    
         | 
| 
       6052 
     | 
    
         
            -
                   2. 
     | 
| 
       6053 
     | 
    
         
            -
                   that, 
     | 
| 
      
 6049 
     | 
    
         
            +
                   2.  It  sets  up  the  subpattern as a capturing subpattern. This means
         
     | 
| 
      
 6050 
     | 
    
         
            +
                   that, when the whole pattern  matches,  that  portion  of  the  subject
         
     | 
| 
       6054 
6051 
     | 
    
         
             
                   string that matched the subpattern is passed back to the caller via the
         
     | 
| 
       6055 
     | 
    
         
            -
                   ovector 
     | 
| 
       6056 
     | 
    
         
            -
                   traditional 
     | 
| 
      
 6052 
     | 
    
         
            +
                   ovector argument of the matching function. (This applies  only  to  the
         
     | 
| 
      
 6053 
     | 
    
         
            +
                   traditional  matching functions; the DFA matching functions do not sup-
         
     | 
| 
       6057 
6054 
     | 
    
         
             
                   port capturing.)
         
     | 
| 
       6058 
6055 
     | 
    
         | 
| 
       6059 
6056 
     | 
    
         
             
                   Opening parentheses are counted from left to right (starting from 1) to
         
     | 
| 
       6060 
     | 
    
         
            -
                   obtain 
     | 
| 
      
 6057 
     | 
    
         
            +
                   obtain  numbers  for  the  capturing  subpatterns.  For example, if the
         
     | 
| 
       6061 
6058 
     | 
    
         
             
                   string "the red king" is matched against the pattern
         
     | 
| 
       6062 
6059 
     | 
    
         | 
| 
       6063 
6060 
     | 
    
         
             
                     the ((red|white) (king|queen))
         
     | 
| 
         @@ -6065,12 +6062,12 @@ SUBPATTERNS 
     | 
|
| 
       6065 
6062 
     | 
    
         
             
                   the captured substrings are "red king", "red", and "king", and are num-
         
     | 
| 
       6066 
6063 
     | 
    
         
             
                   bered 1, 2, and 3, respectively.
         
     | 
| 
       6067 
6064 
     | 
    
         | 
| 
       6068 
     | 
    
         
            -
                   The 
     | 
| 
       6069 
     | 
    
         
            -
                   helpful. 
     | 
| 
       6070 
     | 
    
         
            -
                   without 
     | 
| 
       6071 
     | 
    
         
            -
                   by 
     | 
| 
       6072 
     | 
    
         
            -
                   ing, 
     | 
| 
       6073 
     | 
    
         
            -
                   capturing 
     | 
| 
      
 6065 
     | 
    
         
            +
                   The  fact  that  plain  parentheses  fulfil two functions is not always
         
     | 
| 
      
 6066 
     | 
    
         
            +
                   helpful.  There are often times when a grouping subpattern is  required
         
     | 
| 
      
 6067 
     | 
    
         
            +
                   without  a capturing requirement. If an opening parenthesis is followed
         
     | 
| 
      
 6068 
     | 
    
         
            +
                   by a question mark and a colon, the subpattern does not do any  captur-
         
     | 
| 
      
 6069 
     | 
    
         
            +
                   ing,  and  is  not  counted when computing the number of any subsequent
         
     | 
| 
      
 6070 
     | 
    
         
            +
                   capturing subpatterns. For example, if the string "the white queen"  is
         
     | 
| 
       6074 
6071 
     | 
    
         
             
                   matched against the pattern
         
     | 
| 
       6075 
6072 
     | 
    
         | 
| 
       6076 
6073 
     | 
    
         
             
                     the ((?:red|white) (king|queen))
         
     | 
| 
         @@ -6078,37 +6075,37 @@ SUBPATTERNS 
     | 
|
| 
       6078 
6075 
     | 
    
         
             
                   the captured substrings are "white queen" and "queen", and are numbered
         
     | 
| 
       6079 
6076 
     | 
    
         
             
                   1 and 2. The maximum number of capturing subpatterns is 65535.
         
     | 
| 
       6080 
6077 
     | 
    
         | 
| 
       6081 
     | 
    
         
            -
                   As 
     | 
| 
       6082 
     | 
    
         
            -
                   start 
     | 
| 
      
 6078 
     | 
    
         
            +
                   As a convenient shorthand, if any option settings are required  at  the
         
     | 
| 
      
 6079 
     | 
    
         
            +
                   start  of  a  non-capturing  subpattern,  the option letters may appear
         
     | 
| 
       6083 
6080 
     | 
    
         
             
                   between the "?" and the ":". Thus the two patterns
         
     | 
| 
       6084 
6081 
     | 
    
         | 
| 
       6085 
6082 
     | 
    
         
             
                     (?i:saturday|sunday)
         
     | 
| 
       6086 
6083 
     | 
    
         
             
                     (?:(?i)saturday|sunday)
         
     | 
| 
       6087 
6084 
     | 
    
         | 
| 
       6088 
6085 
     | 
    
         
             
                   match exactly the same set of strings. Because alternative branches are
         
     | 
| 
       6089 
     | 
    
         
            -
                   tried 
     | 
| 
       6090 
     | 
    
         
            -
                   the 
     | 
| 
       6091 
     | 
    
         
            -
                   subsequent 
     | 
| 
      
 6086 
     | 
    
         
            +
                   tried  from  left  to right, and options are not reset until the end of
         
     | 
| 
      
 6087 
     | 
    
         
            +
                   the subpattern is reached, an option setting in one branch does  affect
         
     | 
| 
      
 6088 
     | 
    
         
            +
                   subsequent  branches,  so  the above patterns match "SUNDAY" as well as
         
     | 
| 
       6092 
6089 
     | 
    
         
             
                   "Saturday".
         
     | 
| 
       6093 
6090 
     | 
    
         | 
| 
       6094 
6091 
     | 
    
         | 
| 
       6095 
6092 
     | 
    
         
             
            DUPLICATE SUBPATTERN NUMBERS
         
     | 
| 
       6096 
6093 
     | 
    
         | 
| 
       6097 
6094 
     | 
    
         
             
                   Perl 5.10 introduced a feature whereby each alternative in a subpattern
         
     | 
| 
       6098 
     | 
    
         
            -
                   uses 
     | 
| 
       6099 
     | 
    
         
            -
                   starts 
     | 
| 
      
 6095 
     | 
    
         
            +
                   uses  the same numbers for its capturing parentheses. Such a subpattern
         
     | 
| 
      
 6096 
     | 
    
         
            +
                   starts with (?| and is itself a non-capturing subpattern. For  example,
         
     | 
| 
       6100 
6097 
     | 
    
         
             
                   consider this pattern:
         
     | 
| 
       6101 
6098 
     | 
    
         | 
| 
       6102 
6099 
     | 
    
         
             
                     (?|(Sat)ur|(Sun))day
         
     | 
| 
       6103 
6100 
     | 
    
         | 
| 
       6104 
     | 
    
         
            -
                   Because 
     | 
| 
       6105 
     | 
    
         
            -
                   turing 
     | 
| 
       6106 
     | 
    
         
            -
                   you 
     | 
| 
       6107 
     | 
    
         
            -
                   matched. 
     | 
| 
      
 6101 
     | 
    
         
            +
                   Because  the two alternatives are inside a (?| group, both sets of cap-
         
     | 
| 
      
 6102 
     | 
    
         
            +
                   turing parentheses are numbered one. Thus, when  the  pattern  matches,
         
     | 
| 
      
 6103 
     | 
    
         
            +
                   you  can  look  at captured substring number one, whichever alternative
         
     | 
| 
      
 6104 
     | 
    
         
            +
                   matched. This construct is useful when you want to  capture  part,  but
         
     | 
| 
       6108 
6105 
     | 
    
         
             
                   not all, of one of a number of alternatives. Inside a (?| group, paren-
         
     | 
| 
       6109 
     | 
    
         
            -
                   theses 
     | 
| 
       6110 
     | 
    
         
            -
                   each 
     | 
| 
       6111 
     | 
    
         
            -
                   subpattern 
     | 
| 
      
 6106 
     | 
    
         
            +
                   theses are numbered as usual, but the number is reset at the  start  of
         
     | 
| 
      
 6107 
     | 
    
         
            +
                   each  branch.  The numbers of any capturing parentheses that follow the
         
     | 
| 
      
 6108 
     | 
    
         
            +
                   subpattern start after the highest number used in any branch. The  fol-
         
     | 
| 
       6112 
6109 
     | 
    
         
             
                   lowing example is taken from the Perl documentation. The numbers under-
         
     | 
| 
       6113 
6110 
     | 
    
         
             
                   neath show in which buffer the captured content will be stored.
         
     | 
| 
       6114 
6111 
     | 
    
         | 
| 
         @@ -6116,58 +6113,58 @@ DUPLICATE SUBPATTERN NUMBERS 
     | 
|
| 
       6116 
6113 
     | 
    
         
             
                     / ( a )  (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
         
     | 
| 
       6117 
6114 
     | 
    
         
             
                     # 1            2         2  3        2     3     4
         
     | 
| 
       6118 
6115 
     | 
    
         | 
| 
       6119 
     | 
    
         
            -
                   A 
     | 
| 
       6120 
     | 
    
         
            -
                   that 
     | 
| 
      
 6116 
     | 
    
         
            +
                   A back reference to a numbered subpattern uses the  most  recent  value
         
     | 
| 
      
 6117 
     | 
    
         
            +
                   that  is  set  for that number by any subpattern. The following pattern
         
     | 
| 
       6121 
6118 
     | 
    
         
             
                   matches "abcabc" or "defdef":
         
     | 
| 
       6122 
6119 
     | 
    
         | 
| 
       6123 
6120 
     | 
    
         
             
                     /(?|(abc)|(def))\1/
         
     | 
| 
       6124 
6121 
     | 
    
         | 
| 
       6125 
     | 
    
         
            -
                   In 
     | 
| 
       6126 
     | 
    
         
            -
                   to 
     | 
| 
      
 6122 
     | 
    
         
            +
                   In contrast, a subroutine call to a numbered subpattern  always  refers
         
     | 
| 
      
 6123 
     | 
    
         
            +
                   to  the  first  one in the pattern with the given number. The following
         
     | 
| 
       6127 
6124 
     | 
    
         
             
                   pattern matches "abcabc" or "defabc":
         
     | 
| 
       6128 
6125 
     | 
    
         | 
| 
       6129 
6126 
     | 
    
         
             
                     /(?|(abc)|(def))(?1)/
         
     | 
| 
       6130 
6127 
     | 
    
         | 
| 
       6131 
     | 
    
         
            -
                   If 
     | 
| 
       6132 
     | 
    
         
            -
                   unique 
     | 
| 
      
 6128 
     | 
    
         
            +
                   If a condition test for a subpattern's having matched refers to a  non-
         
     | 
| 
      
 6129 
     | 
    
         
            +
                   unique  number, the test is true if any of the subpatterns of that num-
         
     | 
| 
       6133 
6130 
     | 
    
         
             
                   ber have matched.
         
     | 
| 
       6134 
6131 
     | 
    
         | 
| 
       6135 
     | 
    
         
            -
                   An 
     | 
| 
      
 6132 
     | 
    
         
            +
                   An alternative approach to using this "branch reset" feature is to  use
         
     | 
| 
       6136 
6133 
     | 
    
         
             
                   duplicate named subpatterns, as described in the next section.
         
     | 
| 
       6137 
6134 
     | 
    
         | 
| 
       6138 
6135 
     | 
    
         | 
| 
       6139 
6136 
     | 
    
         
             
            NAMED SUBPATTERNS
         
     | 
| 
       6140 
6137 
     | 
    
         | 
| 
       6141 
     | 
    
         
            -
                   Identifying 
     | 
| 
       6142 
     | 
    
         
            -
                   very 
     | 
| 
       6143 
     | 
    
         
            -
                   sions. 
     | 
| 
       6144 
     | 
    
         
            -
                   change. 
     | 
| 
      
 6138 
     | 
    
         
            +
                   Identifying  capturing  parentheses  by number is simple, but it can be
         
     | 
| 
      
 6139 
     | 
    
         
            +
                   very hard to keep track of the numbers in complicated  regular  expres-
         
     | 
| 
      
 6140 
     | 
    
         
            +
                   sions.  Furthermore,  if  an  expression  is  modified, the numbers may
         
     | 
| 
      
 6141 
     | 
    
         
            +
                   change. To help with this difficulty, PCRE supports the naming of  sub-
         
     | 
| 
       6145 
6142 
     | 
    
         
             
                   patterns. This feature was not added to Perl until release 5.10. Python
         
     | 
| 
       6146 
     | 
    
         
            -
                   had 
     | 
| 
       6147 
     | 
    
         
            -
                   the 
     | 
| 
       6148 
     | 
    
         
            -
                   tax. 
     | 
| 
      
 6143 
     | 
    
         
            +
                   had the feature earlier, and PCRE introduced it at release  4.0,  using
         
     | 
| 
      
 6144 
     | 
    
         
            +
                   the  Python syntax. PCRE now supports both the Perl and the Python syn-
         
     | 
| 
      
 6145 
     | 
    
         
            +
                   tax. Perl allows identically numbered  subpatterns  to  have  different
         
     | 
| 
       6149 
6146 
     | 
    
         
             
                   names, but PCRE does not.
         
     | 
| 
       6150 
6147 
     | 
    
         | 
| 
       6151 
     | 
    
         
            -
                   In 
     | 
| 
       6152 
     | 
    
         
            -
                   or 
     | 
| 
       6153 
     | 
    
         
            -
                   to 
     | 
| 
       6154 
     | 
    
         
            -
                   references, 
     | 
| 
      
 6148 
     | 
    
         
            +
                   In  PCRE,  a subpattern can be named in one of three ways: (?<name>...)
         
     | 
| 
      
 6149 
     | 
    
         
            +
                   or (?'name'...) as in Perl, or (?P<name>...) as in  Python.  References
         
     | 
| 
      
 6150 
     | 
    
         
            +
                   to  capturing parentheses from other parts of the pattern, such as back
         
     | 
| 
      
 6151 
     | 
    
         
            +
                   references, recursion, and conditions, can be made by name as  well  as
         
     | 
| 
       6155 
6152 
     | 
    
         
             
                   by number.
         
     | 
| 
       6156 
6153 
     | 
    
         | 
| 
       6157 
     | 
    
         
            -
                   Names 
     | 
| 
       6158 
     | 
    
         
            -
                   must 
     | 
| 
       6159 
     | 
    
         
            -
                   allocated 
     | 
| 
       6160 
     | 
    
         
            -
                   present. 
     | 
| 
       6161 
     | 
    
         
            -
                   to-number 
     | 
| 
      
 6154 
     | 
    
         
            +
                   Names  consist of up to 32 alphanumeric characters and underscores, but
         
     | 
| 
      
 6155 
     | 
    
         
            +
                   must start with a non-digit.  Named  capturing  parentheses  are  still
         
     | 
| 
      
 6156 
     | 
    
         
            +
                   allocated  numbers  as  well as names, exactly as if the names were not
         
     | 
| 
      
 6157 
     | 
    
         
            +
                   present. The PCRE API provides function calls for extracting the  name-
         
     | 
| 
      
 6158 
     | 
    
         
            +
                   to-number  translation  table  from a compiled pattern. There is also a
         
     | 
| 
       6162 
6159 
     | 
    
         
             
                   convenience function for extracting a captured substring by name.
         
     | 
| 
       6163 
6160 
     | 
    
         | 
| 
       6164 
     | 
    
         
            -
                   By 
     | 
| 
      
 6161 
     | 
    
         
            +
                   By default, a name must be unique within a pattern, but it is  possible
         
     | 
| 
       6165 
6162 
     | 
    
         
             
                   to relax this constraint by setting the PCRE_DUPNAMES option at compile
         
     | 
| 
       6166 
     | 
    
         
            -
                   time. 
     | 
| 
       6167 
     | 
    
         
            -
                   the 
     | 
| 
       6168 
     | 
    
         
            -
                   cate 
     | 
| 
       6169 
     | 
    
         
            -
                   named 
     | 
| 
       6170 
     | 
    
         
            -
                   weekday, 
     | 
| 
      
 6163 
     | 
    
         
            +
                   time. (Duplicate names are also always permitted for  subpatterns  with
         
     | 
| 
      
 6164 
     | 
    
         
            +
                   the  same  number, set up as described in the previous section.) Dupli-
         
     | 
| 
      
 6165 
     | 
    
         
            +
                   cate names can be useful for patterns where only one  instance  of  the
         
     | 
| 
      
 6166 
     | 
    
         
            +
                   named  parentheses  can  match. Suppose you want to match the name of a
         
     | 
| 
      
 6167 
     | 
    
         
            +
                   weekday, either as a 3-letter abbreviation or as the full name, and  in
         
     | 
| 
       6171 
6168 
     | 
    
         
             
                   both cases you want to extract the abbreviation. This pattern (ignoring
         
     | 
| 
       6172 
6169 
     | 
    
         
             
                   the line breaks) does the job:
         
     | 
| 
       6173 
6170 
     | 
    
         | 
| 
         @@ -6177,18 +6174,18 @@ NAMED SUBPATTERNS 
     | 
|
| 
       6177 
6174 
     | 
    
         
             
                     (?<DN>Thu)(?:rsday)?|
         
     | 
| 
       6178 
6175 
     | 
    
         
             
                     (?<DN>Sat)(?:urday)?
         
     | 
| 
       6179 
6176 
     | 
    
         | 
| 
       6180 
     | 
    
         
            -
                   There 
     | 
| 
      
 6177 
     | 
    
         
            +
                   There are five capturing substrings, but only one is ever set  after  a
         
     | 
| 
       6181 
6178 
     | 
    
         
             
                   match.  (An alternative way of solving this problem is to use a "branch
         
     | 
| 
       6182 
6179 
     | 
    
         
             
                   reset" subpattern, as described in the previous section.)
         
     | 
| 
       6183 
6180 
     | 
    
         | 
| 
       6184 
     | 
    
         
            -
                   The 
     | 
| 
       6185 
     | 
    
         
            -
                   substring 
     | 
| 
       6186 
     | 
    
         
            -
                   that 
     | 
| 
      
 6181 
     | 
    
         
            +
                   The convenience function for extracting the data by  name  returns  the
         
     | 
| 
      
 6182 
     | 
    
         
            +
                   substring  for  the first (and in this example, the only) subpattern of
         
     | 
| 
      
 6183 
     | 
    
         
            +
                   that name that matched. This saves searching  to  find  which  numbered
         
     | 
| 
       6187 
6184 
     | 
    
         
             
                   subpattern it was.
         
     | 
| 
       6188 
6185 
     | 
    
         | 
| 
       6189 
     | 
    
         
            -
                   If 
     | 
| 
       6190 
     | 
    
         
            -
                   elsewhere 
     | 
| 
       6191 
     | 
    
         
            -
                   checked 
     | 
| 
      
 6186 
     | 
    
         
            +
                   If  you  make  a  back  reference to a non-unique named subpattern from
         
     | 
| 
      
 6187 
     | 
    
         
            +
                   elsewhere in the pattern, the subpatterns to which the name refers  are
         
     | 
| 
      
 6188 
     | 
    
         
            +
                   checked  in  the order in which they appear in the overall pattern. The
         
     | 
| 
       6192 
6189 
     | 
    
         
             
                   first one that is set is used for the reference. For example, this pat-
         
     | 
| 
       6193 
6190 
     | 
    
         
             
                   tern matches both "foofoo" and "barbar" but not "foobar" or "barfoo":
         
     | 
| 
       6194 
6191 
     | 
    
         | 
| 
         @@ -6196,29 +6193,29 @@ NAMED SUBPATTERNS 
     | 
|
| 
       6196 
6193 
     | 
    
         | 
| 
       6197 
6194 
     | 
    
         | 
| 
       6198 
6195 
     | 
    
         
             
                   If you make a subroutine call to a non-unique named subpattern, the one
         
     | 
| 
       6199 
     | 
    
         
            -
                   that 
     | 
| 
      
 6196 
     | 
    
         
            +
                   that corresponds to the first occurrence of the name is  used.  In  the
         
     | 
| 
       6200 
6197 
     | 
    
         
             
                   absence of duplicate numbers (see the previous section) this is the one
         
     | 
| 
       6201 
6198 
     | 
    
         
             
                   with the lowest number.
         
     | 
| 
       6202 
6199 
     | 
    
         | 
| 
       6203 
6200 
     | 
    
         
             
                   If you use a named reference in a condition test (see the section about
         
     | 
| 
       6204 
6201 
     | 
    
         
             
                   conditions below), either to check whether a subpattern has matched, or
         
     | 
| 
       6205 
     | 
    
         
            -
                   to 
     | 
| 
       6206 
     | 
    
         
            -
                   If 
     | 
| 
       6207 
     | 
    
         
            -
                   true. 
     | 
| 
       6208 
     | 
    
         
            -
                   details 
     | 
| 
      
 6202 
     | 
    
         
            +
                   to check for recursion, all subpatterns with the same name are  tested.
         
     | 
| 
      
 6203 
     | 
    
         
            +
                   If  the condition is true for any one of them, the overall condition is
         
     | 
| 
      
 6204 
     | 
    
         
            +
                   true. This is the same behaviour as  testing  by  number.  For  further
         
     | 
| 
      
 6205 
     | 
    
         
            +
                   details  of  the  interfaces  for  handling  named subpatterns, see the
         
     | 
| 
       6209 
6206 
     | 
    
         
             
                   pcreapi documentation.
         
     | 
| 
       6210 
6207 
     | 
    
         | 
| 
       6211 
6208 
     | 
    
         
             
                   Warning: You cannot use different names to distinguish between two sub-
         
     | 
| 
       6212 
     | 
    
         
            -
                   patterns 
     | 
| 
      
 6209 
     | 
    
         
            +
                   patterns  with  the same number because PCRE uses only the numbers when
         
     | 
| 
       6213 
6210 
     | 
    
         
             
                   matching. For this reason, an error is given at compile time if differ-
         
     | 
| 
       6214 
     | 
    
         
            -
                   ent 
     | 
| 
      
 6211 
     | 
    
         
            +
                   ent  names  are given to subpatterns with the same number. However, you
         
     | 
| 
       6215 
6212 
     | 
    
         
             
                   can always give the same name to subpatterns with the same number, even
         
     | 
| 
       6216 
6213 
     | 
    
         
             
                   when PCRE_DUPNAMES is not set.
         
     | 
| 
       6217 
6214 
     | 
    
         | 
| 
       6218 
6215 
     | 
    
         | 
| 
       6219 
6216 
     | 
    
         
             
            REPETITION
         
     | 
| 
       6220 
6217 
     | 
    
         | 
| 
       6221 
     | 
    
         
            -
                   Repetition 
     | 
| 
      
 6218 
     | 
    
         
            +
                   Repetition  is  specified  by  quantifiers, which can follow any of the
         
     | 
| 
       6222 
6219 
     | 
    
         
             
                   following items:
         
     | 
| 
       6223 
6220 
     | 
    
         | 
| 
       6224 
6221 
     | 
    
         
             
                     a literal data character
         
     | 
| 
         @@ -6232,17 +6229,17 @@ REPETITION 
     | 
|
| 
       6232 
6229 
     | 
    
         
             
                     a parenthesized subpattern (including assertions)
         
     | 
| 
       6233 
6230 
     | 
    
         
             
                     a subroutine call to a subpattern (recursive or otherwise)
         
     | 
| 
       6234 
6231 
     | 
    
         | 
| 
       6235 
     | 
    
         
            -
                   The 
     | 
| 
       6236 
     | 
    
         
            -
                   ber 
     | 
| 
       6237 
     | 
    
         
            -
                   (braces), 
     | 
| 
      
 6232 
     | 
    
         
            +
                   The general repetition quantifier specifies a minimum and maximum  num-
         
     | 
| 
      
 6233 
     | 
    
         
            +
                   ber  of  permitted matches, by giving the two numbers in curly brackets
         
     | 
| 
      
 6234 
     | 
    
         
            +
                   (braces), separated by a comma. The numbers must be  less  than  65536,
         
     | 
| 
       6238 
6235 
     | 
    
         
             
                   and the first must be less than or equal to the second. For example:
         
     | 
| 
       6239 
6236 
     | 
    
         | 
| 
       6240 
6237 
     | 
    
         
             
                     z{2,4}
         
     | 
| 
       6241 
6238 
     | 
    
         | 
| 
       6242 
     | 
    
         
            -
                   matches 
     | 
| 
       6243 
     | 
    
         
            -
                   special 
     | 
| 
       6244 
     | 
    
         
            -
                   present, 
     | 
| 
       6245 
     | 
    
         
            -
                   are 
     | 
| 
      
 6239 
     | 
    
         
            +
                   matches  "zz",  "zzz",  or  "zzzz". A closing brace on its own is not a
         
     | 
| 
      
 6240 
     | 
    
         
            +
                   special character. If the second number is omitted, but  the  comma  is
         
     | 
| 
      
 6241 
     | 
    
         
            +
                   present,  there  is  no upper limit; if the second number and the comma
         
     | 
| 
      
 6242 
     | 
    
         
            +
                   are both omitted, the quantifier specifies an exact number of  required
         
     | 
| 
       6246 
6243 
     | 
    
         
             
                   matches. Thus
         
     | 
| 
       6247 
6244 
     | 
    
         | 
| 
       6248 
6245 
     | 
    
         
             
                     [aeiou]{3,}
         
     | 
| 
         @@ -6251,50 +6248,50 @@ REPETITION 
     | 
|
| 
       6251 
6248 
     | 
    
         | 
| 
       6252 
6249 
     | 
    
         
             
                     \d{8}
         
     | 
| 
       6253 
6250 
     | 
    
         | 
| 
       6254 
     | 
    
         
            -
                   matches 
     | 
| 
       6255 
     | 
    
         
            -
                   position 
     | 
| 
       6256 
     | 
    
         
            -
                   the 
     | 
| 
      
 6251 
     | 
    
         
            +
                   matches  exactly  8  digits. An opening curly bracket that appears in a
         
     | 
| 
      
 6252 
     | 
    
         
            +
                   position where a quantifier is not allowed, or one that does not  match
         
     | 
| 
      
 6253 
     | 
    
         
            +
                   the  syntax of a quantifier, is taken as a literal character. For exam-
         
     | 
| 
       6257 
6254 
     | 
    
         
             
                   ple, {,6} is not a quantifier, but a literal string of four characters.
         
     | 
| 
       6258 
6255 
     | 
    
         | 
| 
       6259 
6256 
     | 
    
         
             
                   In UTF modes, quantifiers apply to characters rather than to individual
         
     | 
| 
       6260 
     | 
    
         
            -
                   data 
     | 
| 
      
 6257 
     | 
    
         
            +
                   data  units. Thus, for example, \x{100}{2} matches two characters, each
         
     | 
| 
       6261 
6258 
     | 
    
         
             
                   of which is represented by a two-byte sequence in a UTF-8 string. Simi-
         
     | 
| 
       6262 
     | 
    
         
            -
                   larly, 
     | 
| 
       6263 
     | 
    
         
            -
                   which 
     | 
| 
      
 6259 
     | 
    
         
            +
                   larly,  \X{3} matches three Unicode extended grapheme clusters, each of
         
     | 
| 
      
 6260 
     | 
    
         
            +
                   which may be several data units long (and  they  may  be  of  different
         
     | 
| 
       6264 
6261 
     | 
    
         
             
                   lengths).
         
     | 
| 
       6265 
6262 
     | 
    
         | 
| 
       6266 
6263 
     | 
    
         
             
                   The quantifier {0} is permitted, causing the expression to behave as if
         
     | 
| 
       6267 
6264 
     | 
    
         
             
                   the previous item and the quantifier were not present. This may be use-
         
     | 
| 
       6268 
     | 
    
         
            -
                   ful 
     | 
| 
      
 6265 
     | 
    
         
            +
                   ful  for  subpatterns that are referenced as subroutines from elsewhere
         
     | 
| 
       6269 
6266 
     | 
    
         
             
                   in the pattern (but see also the section entitled "Defining subpatterns
         
     | 
| 
       6270 
     | 
    
         
            -
                   for 
     | 
| 
      
 6267 
     | 
    
         
            +
                   for  use  by  reference only" below). Items other than subpatterns that
         
     | 
| 
       6271 
6268 
     | 
    
         
             
                   have a {0} quantifier are omitted from the compiled pattern.
         
     | 
| 
       6272 
6269 
     | 
    
         | 
| 
       6273 
     | 
    
         
            -
                   For 
     | 
| 
      
 6270 
     | 
    
         
            +
                   For convenience, the three most common quantifiers have  single-charac-
         
     | 
| 
       6274 
6271 
     | 
    
         
             
                   ter abbreviations:
         
     | 
| 
       6275 
6272 
     | 
    
         | 
| 
       6276 
6273 
     | 
    
         
             
                     *    is equivalent to {0,}
         
     | 
| 
       6277 
6274 
     | 
    
         
             
                     +    is equivalent to {1,}
         
     | 
| 
       6278 
6275 
     | 
    
         
             
                     ?    is equivalent to {0,1}
         
     | 
| 
       6279 
6276 
     | 
    
         | 
| 
       6280 
     | 
    
         
            -
                   It 
     | 
| 
      
 6277 
     | 
    
         
            +
                   It  is  possible  to construct infinite loops by following a subpattern
         
     | 
| 
       6281 
6278 
     | 
    
         
             
                   that can match no characters with a quantifier that has no upper limit,
         
     | 
| 
       6282 
6279 
     | 
    
         
             
                   for example:
         
     | 
| 
       6283 
6280 
     | 
    
         | 
| 
       6284 
6281 
     | 
    
         
             
                     (a?)*
         
     | 
| 
       6285 
6282 
     | 
    
         | 
| 
       6286 
6283 
     | 
    
         
             
                   Earlier versions of Perl and PCRE used to give an error at compile time
         
     | 
| 
       6287 
     | 
    
         
            -
                   for 
     | 
| 
       6288 
     | 
    
         
            -
                   useful, 
     | 
| 
       6289 
     | 
    
         
            -
                   subpattern 
     | 
| 
      
 6284 
     | 
    
         
            +
                   for such patterns. However, because there are cases where this  can  be
         
     | 
| 
      
 6285 
     | 
    
         
            +
                   useful,  such  patterns  are now accepted, but if any repetition of the
         
     | 
| 
      
 6286 
     | 
    
         
            +
                   subpattern does in fact match no characters, the loop is forcibly  bro-
         
     | 
| 
       6290 
6287 
     | 
    
         
             
                   ken.
         
     | 
| 
       6291 
6288 
     | 
    
         | 
| 
       6292 
     | 
    
         
            -
                   By 
     | 
| 
       6293 
     | 
    
         
            -
                   as 
     | 
| 
       6294 
     | 
    
         
            -
                   causing 
     | 
| 
      
 6289 
     | 
    
         
            +
                   By  default,  the quantifiers are "greedy", that is, they match as much
         
     | 
| 
      
 6290 
     | 
    
         
            +
                   as possible (up to the maximum  number  of  permitted  times),  without
         
     | 
| 
      
 6291 
     | 
    
         
            +
                   causing  the  rest of the pattern to fail. The classic example of where
         
     | 
| 
       6295 
6292 
     | 
    
         
             
                   this gives problems is in trying to match comments in C programs. These
         
     | 
| 
       6296 
     | 
    
         
            -
                   appear 
     | 
| 
       6297 
     | 
    
         
            -
                   characters 
     | 
| 
      
 6293 
     | 
    
         
            +
                   appear  between  /*  and  */ and within the comment, individual * and /
         
     | 
| 
      
 6294 
     | 
    
         
            +
                   characters may appear. An attempt to match C comments by  applying  the
         
     | 
| 
       6298 
6295 
     | 
    
         
             
                   pattern
         
     | 
| 
       6299 
6296 
     | 
    
         | 
| 
       6300 
6297 
     | 
    
         
             
                     /\*.*\*/
         
     | 
| 
         @@ -6303,19 +6300,19 @@ REPETITION 
     | 
|
| 
       6303 
6300 
     | 
    
         | 
| 
       6304 
6301 
     | 
    
         
             
                     /* first comment */  not comment  /* second comment */
         
     | 
| 
       6305 
6302 
     | 
    
         | 
| 
       6306 
     | 
    
         
            -
                   fails, 
     | 
| 
      
 6303 
     | 
    
         
            +
                   fails,  because it matches the entire string owing to the greediness of
         
     | 
| 
       6307 
6304 
     | 
    
         
             
                   the .*  item.
         
     | 
| 
       6308 
6305 
     | 
    
         | 
| 
       6309 
     | 
    
         
            -
                   However, 
     | 
| 
      
 6306 
     | 
    
         
            +
                   However, if a quantifier is followed by a question mark, it  ceases  to
         
     | 
| 
       6310 
6307 
     | 
    
         
             
                   be greedy, and instead matches the minimum number of times possible, so
         
     | 
| 
       6311 
6308 
     | 
    
         
             
                   the pattern
         
     | 
| 
       6312 
6309 
     | 
    
         | 
| 
       6313 
6310 
     | 
    
         
             
                     /\*.*?\*/
         
     | 
| 
       6314 
6311 
     | 
    
         | 
| 
       6315 
     | 
    
         
            -
                   does 
     | 
| 
       6316 
     | 
    
         
            -
                   quantifiers 
     | 
| 
       6317 
     | 
    
         
            -
                   matches. 
     | 
| 
       6318 
     | 
    
         
            -
                   quantifier 
     | 
| 
      
 6312 
     | 
    
         
            +
                   does the right thing with the C comments. The meaning  of  the  various
         
     | 
| 
      
 6313 
     | 
    
         
            +
                   quantifiers  is  not  otherwise  changed,  just the preferred number of
         
     | 
| 
      
 6314 
     | 
    
         
            +
                   matches.  Do not confuse this use of question mark with its  use  as  a
         
     | 
| 
      
 6315 
     | 
    
         
            +
                   quantifier  in its own right. Because it has two uses, it can sometimes
         
     | 
| 
       6319 
6316 
     | 
    
         
             
                   appear doubled, as in
         
     | 
| 
       6320 
6317 
     | 
    
         | 
| 
       6321 
6318 
     | 
    
         
             
                     \d??\d
         
     | 
| 
         @@ -6323,45 +6320,45 @@ REPETITION 
     | 
|
| 
       6323 
6320 
     | 
    
         
             
                   which matches one digit by preference, but can match two if that is the
         
     | 
| 
       6324 
6321 
     | 
    
         
             
                   only way the rest of the pattern matches.
         
     | 
| 
       6325 
6322 
     | 
    
         | 
| 
       6326 
     | 
    
         
            -
                   If 
     | 
| 
       6327 
     | 
    
         
            -
                   Perl), 
     | 
| 
       6328 
     | 
    
         
            -
                   can 
     | 
| 
      
 6323 
     | 
    
         
            +
                   If  the PCRE_UNGREEDY option is set (an option that is not available in
         
     | 
| 
      
 6324 
     | 
    
         
            +
                   Perl), the quantifiers are not greedy by default, but  individual  ones
         
     | 
| 
      
 6325 
     | 
    
         
            +
                   can  be  made  greedy  by following them with a question mark. In other
         
     | 
| 
       6329 
6326 
     | 
    
         
             
                   words, it inverts the default behaviour.
         
     | 
| 
       6330 
6327 
     | 
    
         | 
| 
       6331 
     | 
    
         
            -
                   When 
     | 
| 
       6332 
     | 
    
         
            -
                   count 
     | 
| 
       6333 
     | 
    
         
            -
                   required 
     | 
| 
      
 6328 
     | 
    
         
            +
                   When a parenthesized subpattern is quantified  with  a  minimum  repeat
         
     | 
| 
      
 6329 
     | 
    
         
            +
                   count  that is greater than 1 or with a limited maximum, more memory is
         
     | 
| 
      
 6330 
     | 
    
         
            +
                   required for the compiled pattern, in proportion to  the  size  of  the
         
     | 
| 
       6334 
6331 
     | 
    
         
             
                   minimum or maximum.
         
     | 
| 
       6335 
6332 
     | 
    
         | 
| 
       6336 
6333 
     | 
    
         
             
                   If a pattern starts with .* or .{0,} and the PCRE_DOTALL option (equiv-
         
     | 
| 
       6337 
     | 
    
         
            -
                   alent 
     | 
| 
       6338 
     | 
    
         
            -
                   the 
     | 
| 
       6339 
     | 
    
         
            -
                   tried 
     | 
| 
       6340 
     | 
    
         
            -
                   is 
     | 
| 
       6341 
     | 
    
         
            -
                   first. 
     | 
| 
      
 6334 
     | 
    
         
            +
                   alent to Perl's /s) is set, thus allowing the dot  to  match  newlines,
         
     | 
| 
      
 6335 
     | 
    
         
            +
                   the  pattern  is  implicitly anchored, because whatever follows will be
         
     | 
| 
      
 6336 
     | 
    
         
            +
                   tried against every character position in the subject string, so  there
         
     | 
| 
      
 6337 
     | 
    
         
            +
                   is  no  point  in  retrying the overall match at any position after the
         
     | 
| 
      
 6338 
     | 
    
         
            +
                   first. PCRE normally treats such a pattern as though it  were  preceded
         
     | 
| 
       6342 
6339 
     | 
    
         
             
                   by \A.
         
     | 
| 
       6343 
6340 
     | 
    
         | 
| 
       6344 
     | 
    
         
            -
                   In 
     | 
| 
       6345 
     | 
    
         
            -
                   lines, 
     | 
| 
      
 6341 
     | 
    
         
            +
                   In  cases  where  it  is known that the subject string contains no new-
         
     | 
| 
      
 6342 
     | 
    
         
            +
                   lines, it is worth setting PCRE_DOTALL in order to  obtain  this  opti-
         
     | 
| 
       6346 
6343 
     | 
    
         
             
                   mization, or alternatively using ^ to indicate anchoring explicitly.
         
     | 
| 
       6347 
6344 
     | 
    
         | 
| 
       6348 
     | 
    
         
            -
                   However, 
     | 
| 
      
 6345 
     | 
    
         
            +
                   However,  there  are  some cases where the optimization cannot be used.
         
     | 
| 
       6349 
6346 
     | 
    
         
             
                   When .*  is inside capturing parentheses that are the subject of a back
         
     | 
| 
       6350 
6347 
     | 
    
         
             
                   reference elsewhere in the pattern, a match at the start may fail where
         
     | 
| 
       6351 
6348 
     | 
    
         
             
                   a later one succeeds. Consider, for example:
         
     | 
| 
       6352 
6349 
     | 
    
         | 
| 
       6353 
6350 
     | 
    
         
             
                     (.*)abc\1
         
     | 
| 
       6354 
6351 
     | 
    
         | 
| 
       6355 
     | 
    
         
            -
                   If 
     | 
| 
      
 6352 
     | 
    
         
            +
                   If the subject is "xyz123abc123" the match point is the fourth  charac-
         
     | 
| 
       6356 
6353 
     | 
    
         
             
                   ter. For this reason, such a pattern is not implicitly anchored.
         
     | 
| 
       6357 
6354 
     | 
    
         | 
| 
       6358 
     | 
    
         
            -
                   Another 
     | 
| 
       6359 
     | 
    
         
            -
                   ing 
     | 
| 
      
 6355 
     | 
    
         
            +
                   Another  case where implicit anchoring is not applied is when the lead-
         
     | 
| 
      
 6356 
     | 
    
         
            +
                   ing .* is inside an atomic group. Once again, a match at the start  may
         
     | 
| 
       6360 
6357 
     | 
    
         
             
                   fail where a later one succeeds. Consider this pattern:
         
     | 
| 
       6361 
6358 
     | 
    
         | 
| 
       6362 
6359 
     | 
    
         
             
                     (?>.*?a)b
         
     | 
| 
       6363 
6360 
     | 
    
         | 
| 
       6364 
     | 
    
         
            -
                   It 
     | 
| 
      
 6361 
     | 
    
         
            +
                   It  matches "ab" in the subject "aab". The use of the backtracking con-
         
     | 
| 
       6365 
6362 
     | 
    
         
             
                   trol verbs (*PRUNE) and (*SKIP) also disable this optimization.
         
     | 
| 
       6366 
6363 
     | 
    
         | 
| 
       6367 
6364 
     | 
    
         
             
                   When a capturing subpattern is repeated, the value captured is the sub-
         
     | 
| 
         @@ -6370,8 +6367,8 @@ REPETITION 
     | 
|
| 
       6370 
6367 
     | 
    
         
             
                     (tweedle[dume]{3}\s*)+
         
     | 
| 
       6371 
6368 
     | 
    
         | 
| 
       6372 
6369 
     | 
    
         
             
                   has matched "tweedledum tweedledee" the value of the captured substring
         
     | 
| 
       6373 
     | 
    
         
            -
                   is 
     | 
| 
       6374 
     | 
    
         
            -
                   the 
     | 
| 
      
 6370 
     | 
    
         
            +
                   is "tweedledee". However, if there are  nested  capturing  subpatterns,
         
     | 
| 
      
 6371 
     | 
    
         
            +
                   the  corresponding captured values may have been set in previous itera-
         
     | 
| 
       6375 
6372 
     | 
    
         
             
                   tions. For example, after
         
     | 
| 
       6376 
6373 
     | 
    
         | 
| 
       6377 
6374 
     | 
    
         
             
                     /(a|(b))+/
         
     | 
| 
         @@ -6381,53 +6378,53 @@ REPETITION 
     | 
|
| 
       6381 
6378 
     | 
    
         | 
| 
       6382 
6379 
     | 
    
         
             
            ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS
         
     | 
| 
       6383 
6380 
     | 
    
         | 
| 
       6384 
     | 
    
         
            -
                   With 
     | 
| 
       6385 
     | 
    
         
            -
                   repetition, 
     | 
| 
       6386 
     | 
    
         
            -
                   to 
     | 
| 
       6387 
     | 
    
         
            -
                   rest 
     | 
| 
       6388 
     | 
    
         
            -
                   either 
     | 
| 
       6389 
     | 
    
         
            -
                   than 
     | 
| 
      
 6381 
     | 
    
         
            +
                   With both maximizing ("greedy") and minimizing ("ungreedy"  or  "lazy")
         
     | 
| 
      
 6382 
     | 
    
         
            +
                   repetition,  failure  of what follows normally causes the repeated item
         
     | 
| 
      
 6383 
     | 
    
         
            +
                   to be re-evaluated to see if a different number of repeats  allows  the
         
     | 
| 
      
 6384 
     | 
    
         
            +
                   rest  of  the pattern to match. Sometimes it is useful to prevent this,
         
     | 
| 
      
 6385 
     | 
    
         
            +
                   either to change the nature of the match, or to cause it  fail  earlier
         
     | 
| 
      
 6386 
     | 
    
         
            +
                   than  it otherwise might, when the author of the pattern knows there is
         
     | 
| 
       6390 
6387 
     | 
    
         
             
                   no point in carrying on.
         
     | 
| 
       6391 
6388 
     | 
    
         | 
| 
       6392 
     | 
    
         
            -
                   Consider, 
     | 
| 
      
 6389 
     | 
    
         
            +
                   Consider, for example, the pattern \d+foo when applied to  the  subject
         
     | 
| 
       6393 
6390 
     | 
    
         
             
                   line
         
     | 
| 
       6394 
6391 
     | 
    
         | 
| 
       6395 
6392 
     | 
    
         
             
                     123456bar
         
     | 
| 
       6396 
6393 
     | 
    
         | 
| 
       6397 
6394 
     | 
    
         
             
                   After matching all 6 digits and then failing to match "foo", the normal
         
     | 
| 
       6398 
     | 
    
         
            -
                   action 
     | 
| 
       6399 
     | 
    
         
            -
                   \d+ 
     | 
| 
       6400 
     | 
    
         
            -
                   "Atomic 
     | 
| 
       6401 
     | 
    
         
            -
                   the 
     | 
| 
      
 6395 
     | 
    
         
            +
                   action of the matcher is to try again with only 5 digits  matching  the
         
     | 
| 
      
 6396 
     | 
    
         
            +
                   \d+  item,  and  then  with  4,  and  so on, before ultimately failing.
         
     | 
| 
      
 6397 
     | 
    
         
            +
                   "Atomic grouping" (a term taken from Jeffrey  Friedl's  book)  provides
         
     | 
| 
      
 6398 
     | 
    
         
            +
                   the  means for specifying that once a subpattern has matched, it is not
         
     | 
| 
       6402 
6399 
     | 
    
         
             
                   to be re-evaluated in this way.
         
     | 
| 
       6403 
6400 
     | 
    
         | 
| 
       6404 
     | 
    
         
            -
                   If 
     | 
| 
       6405 
     | 
    
         
            -
                   up 
     | 
| 
      
 6401 
     | 
    
         
            +
                   If we use atomic grouping for the previous example, the  matcher  gives
         
     | 
| 
      
 6402 
     | 
    
         
            +
                   up  immediately  on failing to match "foo" the first time. The notation
         
     | 
| 
       6406 
6403 
     | 
    
         
             
                   is a kind of special parenthesis, starting with (?> as in this example:
         
     | 
| 
       6407 
6404 
     | 
    
         | 
| 
       6408 
6405 
     | 
    
         
             
                     (?>\d+)foo
         
     | 
| 
       6409 
6406 
     | 
    
         | 
| 
       6410 
     | 
    
         
            -
                   This 
     | 
| 
       6411 
     | 
    
         
            -
                   tains 
     | 
| 
       6412 
     | 
    
         
            -
                   prevented 
     | 
| 
      
 6407 
     | 
    
         
            +
                   This kind of parenthesis "locks up" the  part of the  pattern  it  con-
         
     | 
| 
      
 6408 
     | 
    
         
            +
                   tains  once  it  has matched, and a failure further into the pattern is
         
     | 
| 
      
 6409 
     | 
    
         
            +
                   prevented from backtracking into it. Backtracking past it  to  previous
         
     | 
| 
       6413 
6410 
     | 
    
         
             
                   items, however, works as normal.
         
     | 
| 
       6414 
6411 
     | 
    
         | 
| 
       6415 
     | 
    
         
            -
                   An 
     | 
| 
       6416 
     | 
    
         
            -
                   the 
     | 
| 
      
 6412 
     | 
    
         
            +
                   An  alternative  description  is that a subpattern of this type matches
         
     | 
| 
      
 6413 
     | 
    
         
            +
                   the string of characters that an  identical  standalone  pattern  would
         
     | 
| 
       6417 
6414 
     | 
    
         
             
                   match, if anchored at the current point in the subject string.
         
     | 
| 
       6418 
6415 
     | 
    
         | 
| 
       6419 
6416 
     | 
    
         
             
                   Atomic grouping subpatterns are not capturing subpatterns. Simple cases
         
     | 
| 
       6420 
6417 
     | 
    
         
             
                   such as the above example can be thought of as a maximizing repeat that
         
     | 
| 
       6421 
     | 
    
         
            -
                   must 
     | 
| 
       6422 
     | 
    
         
            -
                   pared 
     | 
| 
      
 6418 
     | 
    
         
            +
                   must  swallow  everything  it can. So, while both \d+ and \d+? are pre-
         
     | 
| 
      
 6419 
     | 
    
         
            +
                   pared to adjust the number of digits they match in order  to  make  the
         
     | 
| 
       6423 
6420 
     | 
    
         
             
                   rest of the pattern match, (?>\d+) can only match an entire sequence of
         
     | 
| 
       6424 
6421 
     | 
    
         
             
                   digits.
         
     | 
| 
       6425 
6422 
     | 
    
         | 
| 
       6426 
     | 
    
         
            -
                   Atomic 
     | 
| 
       6427 
     | 
    
         
            -
                   subpatterns, 
     | 
| 
      
 6423 
     | 
    
         
            +
                   Atomic groups in general can of course contain arbitrarily  complicated
         
     | 
| 
      
 6424 
     | 
    
         
            +
                   subpatterns,  and  can  be  nested. However, when the subpattern for an
         
     | 
| 
       6428 
6425 
     | 
    
         
             
                   atomic group is just a single repeated item, as in the example above, a
         
     | 
| 
       6429 
     | 
    
         
            -
                   simpler 
     | 
| 
       6430 
     | 
    
         
            -
                   consists 
     | 
| 
      
 6426 
     | 
    
         
            +
                   simpler  notation,  called  a "possessive quantifier" can be used. This
         
     | 
| 
      
 6427 
     | 
    
         
            +
                   consists of an additional + character  following  a  quantifier.  Using
         
     | 
| 
       6431 
6428 
     | 
    
         
             
                   this notation, the previous example can be rewritten as
         
     | 
| 
       6432 
6429 
     | 
    
         | 
| 
       6433 
6430 
     | 
    
         
             
                     \d++foo
         
     | 
| 
         @@ -6437,45 +6434,45 @@ ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS 
     | 
|
| 
       6437 
6434 
     | 
    
         | 
| 
       6438 
6435 
     | 
    
         
             
                     (abc|xyz){2,3}+
         
     | 
| 
       6439 
6436 
     | 
    
         | 
| 
       6440 
     | 
    
         
            -
                   Possessive 
     | 
| 
      
 6437 
     | 
    
         
            +
                   Possessive  quantifiers  are  always  greedy;  the   setting   of   the
         
     | 
| 
       6441 
6438 
     | 
    
         
             
                   PCRE_UNGREEDY option is ignored. They are a convenient notation for the
         
     | 
| 
       6442 
     | 
    
         
            -
                   simpler 
     | 
| 
       6443 
     | 
    
         
            -
                   meaning 
     | 
| 
       6444 
     | 
    
         
            -
                   though 
     | 
| 
      
 6439 
     | 
    
         
            +
                   simpler forms of atomic group. However, there is no difference  in  the
         
     | 
| 
      
 6440 
     | 
    
         
            +
                   meaning  of  a  possessive  quantifier and the equivalent atomic group,
         
     | 
| 
      
 6441 
     | 
    
         
            +
                   though there may be a performance  difference;  possessive  quantifiers
         
     | 
| 
       6445 
6442 
     | 
    
         
             
                   should be slightly faster.
         
     | 
| 
       6446 
6443 
     | 
    
         | 
| 
       6447 
     | 
    
         
            -
                   The 
     | 
| 
       6448 
     | 
    
         
            -
                   tax. 
     | 
| 
      
 6444 
     | 
    
         
            +
                   The  possessive  quantifier syntax is an extension to the Perl 5.8 syn-
         
     | 
| 
      
 6445 
     | 
    
         
            +
                   tax.  Jeffrey Friedl originated the idea (and the name)  in  the  first
         
     | 
| 
       6449 
6446 
     | 
    
         
             
                   edition of his book. Mike McCloskey liked it, so implemented it when he
         
     | 
| 
       6450 
     | 
    
         
            -
                   built 
     | 
| 
      
 6447 
     | 
    
         
            +
                   built Sun's Java package, and PCRE copied it from there. It  ultimately
         
     | 
| 
       6451 
6448 
     | 
    
         
             
                   found its way into Perl at release 5.10.
         
     | 
| 
       6452 
6449 
     | 
    
         | 
| 
       6453 
6450 
     | 
    
         
             
                   PCRE has an optimization that automatically "possessifies" certain sim-
         
     | 
| 
       6454 
     | 
    
         
            -
                   ple 
     | 
| 
       6455 
     | 
    
         
            -
                   A++B 
     | 
| 
      
 6451 
     | 
    
         
            +
                   ple pattern constructs. For example, the sequence  A+B  is  treated  as
         
     | 
| 
      
 6452 
     | 
    
         
            +
                   A++B  because  there is no point in backtracking into a sequence of A's
         
     | 
| 
       6456 
6453 
     | 
    
         
             
                   when B must follow.
         
     | 
| 
       6457 
6454 
     | 
    
         | 
| 
       6458 
     | 
    
         
            -
                   When 
     | 
| 
       6459 
     | 
    
         
            -
                   can 
     | 
| 
       6460 
     | 
    
         
            -
                   atomic 
     | 
| 
      
 6455 
     | 
    
         
            +
                   When a pattern contains an unlimited repeat inside  a  subpattern  that
         
     | 
| 
      
 6456 
     | 
    
         
            +
                   can  itself  be  repeated  an  unlimited number of times, the use of an
         
     | 
| 
      
 6457 
     | 
    
         
            +
                   atomic group is the only way to avoid some  failing  matches  taking  a
         
     | 
| 
       6461 
6458 
     | 
    
         
             
                   very long time indeed. The pattern
         
     | 
| 
       6462 
6459 
     | 
    
         | 
| 
       6463 
6460 
     | 
    
         
             
                     (\D+|<\d+>)*[!?]
         
     | 
| 
       6464 
6461 
     | 
    
         | 
| 
       6465 
     | 
    
         
            -
                   matches 
     | 
| 
       6466 
     | 
    
         
            -
                   digits, 
     | 
| 
      
 6462 
     | 
    
         
            +
                   matches  an  unlimited number of substrings that either consist of non-
         
     | 
| 
      
 6463 
     | 
    
         
            +
                   digits, or digits enclosed in <>, followed by either ! or  ?.  When  it
         
     | 
| 
       6467 
6464 
     | 
    
         
             
                   matches, it runs quickly. However, if it is applied to
         
     | 
| 
       6468 
6465 
     | 
    
         | 
| 
       6469 
6466 
     | 
    
         
             
                     aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
         
     | 
| 
       6470 
6467 
     | 
    
         | 
| 
       6471 
     | 
    
         
            -
                   it 
     | 
| 
       6472 
     | 
    
         
            -
                   string 
     | 
| 
       6473 
     | 
    
         
            -
                   * 
     | 
| 
       6474 
     | 
    
         
            -
                   example 
     | 
| 
       6475 
     | 
    
         
            -
                   both 
     | 
| 
       6476 
     | 
    
         
            -
                   when 
     | 
| 
       6477 
     | 
    
         
            -
                   ter 
     | 
| 
       6478 
     | 
    
         
            -
                   in 
     | 
| 
      
 6468 
     | 
    
         
            +
                   it  takes  a  long  time  before reporting failure. This is because the
         
     | 
| 
      
 6469 
     | 
    
         
            +
                   string can be divided between the internal \D+ repeat and the  external
         
     | 
| 
      
 6470 
     | 
    
         
            +
                   *  repeat  in  a  large  number of ways, and all have to be tried. (The
         
     | 
| 
      
 6471 
     | 
    
         
            +
                   example uses [!?] rather than a single character at  the  end,  because
         
     | 
| 
      
 6472 
     | 
    
         
            +
                   both  PCRE  and  Perl have an optimization that allows for fast failure
         
     | 
| 
      
 6473 
     | 
    
         
            +
                   when a single character is used. They remember the last single  charac-
         
     | 
| 
      
 6474 
     | 
    
         
            +
                   ter  that  is required for a match, and fail early if it is not present
         
     | 
| 
      
 6475 
     | 
    
         
            +
                   in the string.) If the pattern is changed so that  it  uses  an  atomic
         
     | 
| 
       6479 
6476 
     | 
    
         
             
                   group, like this:
         
     | 
| 
       6480 
6477 
     | 
    
         | 
| 
       6481 
6478 
     | 
    
         
             
                     ((?>\D+)|<\d+>)*[!?]
         
     | 
| 
         @@ -6487,28 +6484,28 @@ BACK REFERENCES 
     | 
|
| 
       6487 
6484 
     | 
    
         | 
| 
       6488 
6485 
     | 
    
         
             
                   Outside a character class, a backslash followed by a digit greater than
         
     | 
| 
       6489 
6486 
     | 
    
         
             
                   0 (and possibly further digits) is a back reference to a capturing sub-
         
     | 
| 
       6490 
     | 
    
         
            -
                   pattern 
     | 
| 
      
 6487 
     | 
    
         
            +
                   pattern  earlier  (that is, to its left) in the pattern, provided there
         
     | 
| 
       6491 
6488 
     | 
    
         
             
                   have been that many previous capturing left parentheses.
         
     | 
| 
       6492 
6489 
     | 
    
         | 
| 
       6493 
6490 
     | 
    
         
             
                   However, if the decimal number following the backslash is less than 10,
         
     | 
| 
       6494 
     | 
    
         
            -
                   it 
     | 
| 
       6495 
     | 
    
         
            -
                   there 
     | 
| 
       6496 
     | 
    
         
            -
                   tern. 
     | 
| 
       6497 
     | 
    
         
            -
                   to 
     | 
| 
       6498 
     | 
    
         
            -
                   reference" 
     | 
| 
       6499 
     | 
    
         
            -
                   and 
     | 
| 
      
 6491 
     | 
    
         
            +
                   it  is  always  taken  as a back reference, and causes an error only if
         
     | 
| 
      
 6492 
     | 
    
         
            +
                   there are not that many capturing left parentheses in the  entire  pat-
         
     | 
| 
      
 6493 
     | 
    
         
            +
                   tern.  In  other words, the parentheses that are referenced need not be
         
     | 
| 
      
 6494 
     | 
    
         
            +
                   to the left of the reference for numbers less than 10. A "forward  back
         
     | 
| 
      
 6495 
     | 
    
         
            +
                   reference"  of  this  type can make sense when a repetition is involved
         
     | 
| 
      
 6496 
     | 
    
         
            +
                   and the subpattern to the right has participated in an  earlier  itera-
         
     | 
| 
       6500 
6497 
     | 
    
         
             
                   tion.
         
     | 
| 
       6501 
6498 
     | 
    
         | 
| 
       6502 
     | 
    
         
            -
                   It 
     | 
| 
       6503 
     | 
    
         
            -
                   subpattern 
     | 
| 
       6504 
     | 
    
         
            -
                   sequence 
     | 
| 
      
 6499 
     | 
    
         
            +
                   It  is  not  possible to have a numerical "forward back reference" to a
         
     | 
| 
      
 6500 
     | 
    
         
            +
                   subpattern whose number is 10 or  more  using  this  syntax  because  a
         
     | 
| 
      
 6501 
     | 
    
         
            +
                   sequence  such  as  \50 is interpreted as a character defined in octal.
         
     | 
| 
       6505 
6502 
     | 
    
         
             
                   See the subsection entitled "Non-printing characters" above for further
         
     | 
| 
       6506 
     | 
    
         
            -
                   details 
     | 
| 
       6507 
     | 
    
         
            -
                   such 
     | 
| 
      
 6503 
     | 
    
         
            +
                   details  of  the  handling of digits following a backslash. There is no
         
     | 
| 
      
 6504 
     | 
    
         
            +
                   such problem when named parentheses are used. A back reference  to  any
         
     | 
| 
       6508 
6505 
     | 
    
         
             
                   subpattern is possible using named parentheses (see below).
         
     | 
| 
       6509 
6506 
     | 
    
         | 
| 
       6510 
     | 
    
         
            -
                   Another 
     | 
| 
       6511 
     | 
    
         
            -
                   following 
     | 
| 
      
 6507 
     | 
    
         
            +
                   Another  way  of  avoiding  the ambiguity inherent in the use of digits
         
     | 
| 
      
 6508 
     | 
    
         
            +
                   following a backslash is to use the \g  escape  sequence.  This  escape
         
     | 
| 
       6512 
6509 
     | 
    
         
             
                   must be followed by an unsigned number or a negative number, optionally
         
     | 
| 
       6513 
6510 
     | 
    
         
             
                   enclosed in braces. These examples are all identical:
         
     | 
| 
       6514 
6511 
     | 
    
         | 
| 
         @@ -6516,7 +6513,7 @@ BACK REFERENCES 
     | 
|
| 
       6516 
6513 
     | 
    
         
             
                     (ring), \g1
         
     | 
| 
       6517 
6514 
     | 
    
         
             
                     (ring), \g{1}
         
     | 
| 
       6518 
6515 
     | 
    
         | 
| 
       6519 
     | 
    
         
            -
                   An 
     | 
| 
      
 6516 
     | 
    
         
            +
                   An unsigned number specifies an absolute reference without the  ambigu-
         
     | 
| 
       6520 
6517 
     | 
    
         
             
                   ity that is present in the older syntax. It is also useful when literal
         
     | 
| 
       6521 
6518 
     | 
    
         
             
                   digits follow the reference. A negative number is a relative reference.
         
     | 
| 
       6522 
6519 
     | 
    
         
             
                   Consider this example:
         
     | 
| 
         @@ -6525,33 +6522,33 @@ BACK REFERENCES 
     | 
|
| 
       6525 
6522 
     | 
    
         | 
| 
       6526 
6523 
     | 
    
         
             
                   The sequence \g{-1} is a reference to the most recently started captur-
         
     | 
| 
       6527 
6524 
     | 
    
         
             
                   ing subpattern before \g, that is, is it equivalent to \2 in this exam-
         
     | 
| 
       6528 
     | 
    
         
            -
                   ple. 
     | 
| 
       6529 
     | 
    
         
            -
                   references 
     | 
| 
       6530 
     | 
    
         
            -
                   are 
     | 
| 
      
 6525 
     | 
    
         
            +
                   ple.   Similarly, \g{-2} would be equivalent to \1. The use of relative
         
     | 
| 
      
 6526 
     | 
    
         
            +
                   references can be helpful in long patterns, and also in  patterns  that
         
     | 
| 
      
 6527 
     | 
    
         
            +
                   are  created  by  joining  together  fragments  that contain references
         
     | 
| 
       6531 
6528 
     | 
    
         
             
                   within themselves.
         
     | 
| 
       6532 
6529 
     | 
    
         | 
| 
       6533 
     | 
    
         
            -
                   A 
     | 
| 
       6534 
     | 
    
         
            -
                   pattern 
     | 
| 
      
 6530 
     | 
    
         
            +
                   A back reference matches whatever actually matched the  capturing  sub-
         
     | 
| 
      
 6531 
     | 
    
         
            +
                   pattern  in  the  current subject string, rather than anything matching
         
     | 
| 
       6535 
6532 
     | 
    
         
             
                   the subpattern itself (see "Subpatterns as subroutines" below for a way
         
     | 
| 
       6536 
6533 
     | 
    
         
             
                   of doing that). So the pattern
         
     | 
| 
       6537 
6534 
     | 
    
         | 
| 
       6538 
6535 
     | 
    
         
             
                     (sens|respons)e and \1ibility
         
     | 
| 
       6539 
6536 
     | 
    
         | 
| 
       6540 
     | 
    
         
            -
                   matches 
     | 
| 
       6541 
     | 
    
         
            -
                   not 
     | 
| 
       6542 
     | 
    
         
            -
                   time 
     | 
| 
      
 6537 
     | 
    
         
            +
                   matches  "sense and sensibility" and "response and responsibility", but
         
     | 
| 
      
 6538 
     | 
    
         
            +
                   not "sense and responsibility". If caseful matching is in force at  the
         
     | 
| 
      
 6539 
     | 
    
         
            +
                   time  of the back reference, the case of letters is relevant. For exam-
         
     | 
| 
       6543 
6540 
     | 
    
         
             
                   ple,
         
     | 
| 
       6544 
6541 
     | 
    
         | 
| 
       6545 
6542 
     | 
    
         
             
                     ((?i)rah)\s+\1
         
     | 
| 
       6546 
6543 
     | 
    
         | 
| 
       6547 
     | 
    
         
            -
                   matches 
     | 
| 
      
 6544 
     | 
    
         
            +
                   matches "rah rah" and "RAH RAH", but not "RAH  rah",  even  though  the
         
     | 
| 
       6548 
6545 
     | 
    
         
             
                   original capturing subpattern is matched caselessly.
         
     | 
| 
       6549 
6546 
     | 
    
         | 
| 
       6550 
     | 
    
         
            -
                   There 
     | 
| 
       6551 
     | 
    
         
            -
                   subpatterns. 
     | 
| 
       6552 
     | 
    
         
            -
                   \k'name' 
     | 
| 
      
 6547 
     | 
    
         
            +
                   There  are  several  different ways of writing back references to named
         
     | 
| 
      
 6548 
     | 
    
         
            +
                   subpatterns. The .NET syntax \k{name} and the Perl syntax  \k<name>  or
         
     | 
| 
      
 6549 
     | 
    
         
            +
                   \k'name'  are supported, as is the Python syntax (?P=name). Perl 5.10's
         
     | 
| 
       6553 
6550 
     | 
    
         
             
                   unified back reference syntax, in which \g can be used for both numeric
         
     | 
| 
       6554 
     | 
    
         
            -
                   and 
     | 
| 
      
 6551 
     | 
    
         
            +
                   and  named  references,  is  also supported. We could rewrite the above
         
     | 
| 
       6555 
6552 
     | 
    
         
             
                   example in any of the following ways:
         
     | 
| 
       6556 
6553 
     | 
    
         | 
| 
       6557 
6554 
     | 
    
         
             
                     (?<p1>(?i)rah)\s+\k<p1>
         
     | 
| 
         @@ -6559,84 +6556,92 @@ BACK REFERENCES 
     | 
|
| 
       6559 
6556 
     | 
    
         
             
                     (?P<p1>(?i)rah)\s+(?P=p1)
         
     | 
| 
       6560 
6557 
     | 
    
         
             
                     (?<p1>(?i)rah)\s+\g{p1}
         
     | 
| 
       6561 
6558 
     | 
    
         | 
| 
       6562 
     | 
    
         
            -
                   A 
     | 
| 
      
 6559 
     | 
    
         
            +
                   A subpattern that is referenced by  name  may  appear  in  the  pattern
         
     | 
| 
       6563 
6560 
     | 
    
         
             
                   before or after the reference.
         
     | 
| 
       6564 
6561 
     | 
    
         | 
| 
       6565 
     | 
    
         
            -
                   There 
     | 
| 
       6566 
     | 
    
         
            -
                   subpattern 
     | 
| 
      
 6562 
     | 
    
         
            +
                   There  may be more than one back reference to the same subpattern. If a
         
     | 
| 
      
 6563 
     | 
    
         
            +
                   subpattern has not actually been used in a particular match,  any  back
         
     | 
| 
       6567 
6564 
     | 
    
         
             
                   references to it always fail by default. For example, the pattern
         
     | 
| 
       6568 
6565 
     | 
    
         | 
| 
       6569 
6566 
     | 
    
         
             
                     (a|(bc))\2
         
     | 
| 
       6570 
6567 
     | 
    
         | 
| 
       6571 
     | 
    
         
            -
                   always 
     | 
| 
      
 6568 
     | 
    
         
            +
                   always  fails  if  it starts to match "a" rather than "bc". However, if
         
     | 
| 
       6572 
6569 
     | 
    
         
             
                   the PCRE_JAVASCRIPT_COMPAT option is set at compile time, a back refer-
         
     | 
| 
       6573 
6570 
     | 
    
         
             
                   ence to an unset value matches an empty string.
         
     | 
| 
       6574 
6571 
     | 
    
         | 
| 
       6575 
     | 
    
         
            -
                   Because 
     | 
| 
       6576 
     | 
    
         
            -
                   its 
     | 
| 
       6577 
     | 
    
         
            -
                   ence 
     | 
| 
       6578 
     | 
    
         
            -
                   delimiter 
     | 
| 
       6579 
     | 
    
         
            -
                   PCRE_EXTENDED 
     | 
| 
      
 6572 
     | 
    
         
            +
                   Because  there may be many capturing parentheses in a pattern, all dig-
         
     | 
| 
      
 6573 
     | 
    
         
            +
                   its following a backslash are taken as part of a potential back  refer-
         
     | 
| 
      
 6574 
     | 
    
         
            +
                   ence  number.   If  the  pattern continues with a digit character, some
         
     | 
| 
      
 6575 
     | 
    
         
            +
                   delimiter must  be  used  to  terminate  the  back  reference.  If  the
         
     | 
| 
      
 6576 
     | 
    
         
            +
                   PCRE_EXTENDED  option  is  set, this can be white space. Otherwise, the
         
     | 
| 
       6580 
6577 
     | 
    
         
             
                   \g{ syntax or an empty comment (see "Comments" below) can be used.
         
     | 
| 
       6581 
6578 
     | 
    
         | 
| 
       6582 
6579 
     | 
    
         
             
               Recursive back references
         
     | 
| 
       6583 
6580 
     | 
    
         | 
| 
       6584 
     | 
    
         
            -
                   A 
     | 
| 
       6585 
     | 
    
         
            -
                   fails 
     | 
| 
       6586 
     | 
    
         
            -
                   matches. 
     | 
| 
      
 6581 
     | 
    
         
            +
                   A back reference that occurs inside the parentheses to which it  refers
         
     | 
| 
      
 6582 
     | 
    
         
            +
                   fails  when  the subpattern is first used, so, for example, (a\1) never
         
     | 
| 
      
 6583 
     | 
    
         
            +
                   matches.  However, such references can be useful inside  repeated  sub-
         
     | 
| 
       6587 
6584 
     | 
    
         
             
                   patterns. For example, the pattern
         
     | 
| 
       6588 
6585 
     | 
    
         | 
| 
       6589 
6586 
     | 
    
         
             
                     (a|b\1)+
         
     | 
| 
       6590 
6587 
     | 
    
         | 
| 
       6591 
6588 
     | 
    
         
             
                   matches any number of "a"s and also "aba", "ababbaa" etc. At each iter-
         
     | 
| 
       6592 
     | 
    
         
            -
                   ation 
     | 
| 
       6593 
     | 
    
         
            -
                   string 
     | 
| 
       6594 
     | 
    
         
            -
                   work, 
     | 
| 
       6595 
     | 
    
         
            -
                   to 
     | 
| 
      
 6589 
     | 
    
         
            +
                   ation of the subpattern,  the  back  reference  matches  the  character
         
     | 
| 
      
 6590 
     | 
    
         
            +
                   string  corresponding  to  the previous iteration. In order for this to
         
     | 
| 
      
 6591 
     | 
    
         
            +
                   work, the pattern must be such that the first iteration does  not  need
         
     | 
| 
      
 6592 
     | 
    
         
            +
                   to  match the back reference. This can be done using alternation, as in
         
     | 
| 
       6596 
6593 
     | 
    
         
             
                   the example above, or by a quantifier with a minimum of zero.
         
     | 
| 
       6597 
6594 
     | 
    
         | 
| 
       6598 
     | 
    
         
            -
                   Back 
     | 
| 
       6599 
     | 
    
         
            -
                   treated 
     | 
| 
       6600 
     | 
    
         
            -
                   subsequent 
     | 
| 
      
 6595 
     | 
    
         
            +
                   Back references of this type cause the group that they reference to  be
         
     | 
| 
      
 6596 
     | 
    
         
            +
                   treated  as  an atomic group.  Once the whole group has been matched, a
         
     | 
| 
      
 6597 
     | 
    
         
            +
                   subsequent matching failure cannot cause backtracking into  the  middle
         
     | 
| 
       6601 
6598 
     | 
    
         
             
                   of the group.
         
     | 
| 
       6602 
6599 
     | 
    
         | 
| 
       6603 
6600 
     | 
    
         | 
| 
       6604 
6601 
     | 
    
         
             
            ASSERTIONS
         
     | 
| 
       6605 
6602 
     | 
    
         | 
| 
       6606 
     | 
    
         
            -
                   An 
     | 
| 
       6607 
     | 
    
         
            -
                   current 
     | 
| 
       6608 
     | 
    
         
            -
                   The 
     | 
| 
      
 6603 
     | 
    
         
            +
                   An  assertion  is  a  test on the characters following or preceding the
         
     | 
| 
      
 6604 
     | 
    
         
            +
                   current matching point that does not actually consume  any  characters.
         
     | 
| 
      
 6605 
     | 
    
         
            +
                   The  simple  assertions  coded  as  \b, \B, \A, \G, \Z, \z, ^ and $ are
         
     | 
| 
       6609 
6606 
     | 
    
         
             
                   described above.
         
     | 
| 
       6610 
6607 
     | 
    
         | 
| 
       6611 
     | 
    
         
            -
                   More 
     | 
| 
       6612 
     | 
    
         
            -
                   kinds: 
     | 
| 
       6613 
     | 
    
         
            -
                   string, 
     | 
| 
       6614 
     | 
    
         
            -
                   matched 
     | 
| 
      
 6608 
     | 
    
         
            +
                   More complicated assertions are coded as  subpatterns.  There  are  two
         
     | 
| 
      
 6609 
     | 
    
         
            +
                   kinds:  those  that  look  ahead of the current position in the subject
         
     | 
| 
      
 6610 
     | 
    
         
            +
                   string, and those that look  behind  it.  An  assertion  subpattern  is
         
     | 
| 
      
 6611 
     | 
    
         
            +
                   matched  in  the  normal way, except that it does not cause the current
         
     | 
| 
       6615 
6612 
     | 
    
         
             
                   matching position to be changed.
         
     | 
| 
       6616 
6613 
     | 
    
         | 
| 
       6617 
     | 
    
         
            -
                   Assertion 
     | 
| 
       6618 
     | 
    
         
            -
                   tion 
     | 
| 
       6619 
     | 
    
         
            -
                   the 
     | 
| 
       6620 
     | 
    
         
            -
                   tern. 
     | 
| 
      
 6614 
     | 
    
         
            +
                   Assertion subpatterns are not capturing subpatterns. If such an  asser-
         
     | 
| 
      
 6615 
     | 
    
         
            +
                   tion  contains  capturing  subpatterns within it, these are counted for
         
     | 
| 
      
 6616 
     | 
    
         
            +
                   the purposes of numbering the capturing subpatterns in the  whole  pat-
         
     | 
| 
      
 6617 
     | 
    
         
            +
                   tern.  However,  substring  capturing  is carried out only for positive
         
     | 
| 
       6621 
6618 
     | 
    
         
             
                   assertions. (Perl sometimes, but not always, does do capturing in nega-
         
     | 
| 
       6622 
6619 
     | 
    
         
             
                   tive assertions.)
         
     | 
| 
       6623 
6620 
     | 
    
         | 
| 
       6624 
     | 
    
         
            -
                    
     | 
| 
       6625 
     | 
    
         
            -
                    
     | 
| 
       6626 
     | 
    
         
            -
                    
     | 
| 
      
 6621 
     | 
    
         
            +
                   WARNING:  If a positive assertion containing one or more capturing sub-
         
     | 
| 
      
 6622 
     | 
    
         
            +
                   patterns succeeds, but failure to match later  in  the  pattern  causes
         
     | 
| 
      
 6623 
     | 
    
         
            +
                   backtracking over this assertion, the captures within the assertion are
         
     | 
| 
      
 6624 
     | 
    
         
            +
                   reset only if no higher numbered captures are  already  set.  This  is,
         
     | 
| 
      
 6625 
     | 
    
         
            +
                   unfortunately,  a fundamental limitation of the current implementation,
         
     | 
| 
      
 6626 
     | 
    
         
            +
                   and as PCRE1 is now in maintenance-only status, it is unlikely ever  to
         
     | 
| 
      
 6627 
     | 
    
         
            +
                   change.
         
     | 
| 
      
 6628 
     | 
    
         
            +
             
     | 
| 
      
 6629 
     | 
    
         
            +
                   For  compatibility  with  Perl,  assertion subpatterns may be repeated;
         
     | 
| 
      
 6630 
     | 
    
         
            +
                   though it makes no sense to assert the same thing  several  times,  the
         
     | 
| 
      
 6631 
     | 
    
         
            +
                   side  effect  of  capturing  parentheses may occasionally be useful. In
         
     | 
| 
       6627 
6632 
     | 
    
         
             
                   practice, there only three cases:
         
     | 
| 
       6628 
6633 
     | 
    
         | 
| 
       6629 
     | 
    
         
            -
                   (1) 
     | 
| 
       6630 
     | 
    
         
            -
                   matching. 
     | 
| 
      
 6634 
     | 
    
         
            +
                   (1) If the quantifier is {0}, the  assertion  is  never  obeyed  during
         
     | 
| 
      
 6635 
     | 
    
         
            +
                   matching.   However,  it  may  contain internal capturing parenthesized
         
     | 
| 
       6631 
6636 
     | 
    
         
             
                   groups that are called from elsewhere via the subroutine mechanism.
         
     | 
| 
       6632 
6637 
     | 
    
         | 
| 
       6633 
     | 
    
         
            -
                   (2) 
     | 
| 
       6634 
     | 
    
         
            -
                   as 
     | 
| 
      
 6638 
     | 
    
         
            +
                   (2) If quantifier is {0,n} where n is greater than zero, it is  treated
         
     | 
| 
      
 6639 
     | 
    
         
            +
                   as  if  it  were  {0,1}.  At run time, the rest of the pattern match is
         
     | 
| 
       6635 
6640 
     | 
    
         
             
                   tried with and without the assertion, the order depending on the greed-
         
     | 
| 
       6636 
6641 
     | 
    
         
             
                   iness of the quantifier.
         
     | 
| 
       6637 
6642 
     | 
    
         | 
| 
       6638 
     | 
    
         
            -
                   (3) 
     | 
| 
       6639 
     | 
    
         
            -
                   ignored. 
     | 
| 
      
 6643 
     | 
    
         
            +
                   (3)  If  the minimum repetition is greater than zero, the quantifier is
         
     | 
| 
      
 6644 
     | 
    
         
            +
                   ignored.  The assertion is obeyed just  once  when  encountered  during
         
     | 
| 
       6640 
6645 
     | 
    
         
             
                   matching.
         
     | 
| 
       6641 
6646 
     | 
    
         | 
| 
       6642 
6647 
     | 
    
         
             
               Lookahead assertions
         
     | 
| 
         @@ -6646,38 +6651,38 @@ ASSERTIONS 
     | 
|
| 
       6646 
6651 
     | 
    
         | 
| 
       6647 
6652 
     | 
    
         
             
                     \w+(?=;)
         
     | 
| 
       6648 
6653 
     | 
    
         | 
| 
       6649 
     | 
    
         
            -
                   matches 
     | 
| 
      
 6654 
     | 
    
         
            +
                   matches a word followed by a semicolon, but does not include the  semi-
         
     | 
| 
       6650 
6655 
     | 
    
         
             
                   colon in the match, and
         
     | 
| 
       6651 
6656 
     | 
    
         | 
| 
       6652 
6657 
     | 
    
         
             
                     foo(?!bar)
         
     | 
| 
       6653 
6658 
     | 
    
         | 
| 
       6654 
     | 
    
         
            -
                   matches 
     | 
| 
      
 6659 
     | 
    
         
            +
                   matches  any  occurrence  of  "foo" that is not followed by "bar". Note
         
     | 
| 
       6655 
6660 
     | 
    
         
             
                   that the apparently similar pattern
         
     | 
| 
       6656 
6661 
     | 
    
         | 
| 
       6657 
6662 
     | 
    
         
             
                     (?!foo)bar
         
     | 
| 
       6658 
6663 
     | 
    
         | 
| 
       6659 
     | 
    
         
            -
                   does 
     | 
| 
       6660 
     | 
    
         
            -
                   other 
     | 
| 
      
 6664 
     | 
    
         
            +
                   does not find an occurrence of "bar"  that  is  preceded  by  something
         
     | 
| 
      
 6665 
     | 
    
         
            +
                   other  than "foo"; it finds any occurrence of "bar" whatsoever, because
         
     | 
| 
       6661 
6666 
     | 
    
         
             
                   the assertion (?!foo) is always true when the next three characters are
         
     | 
| 
       6662 
6667 
     | 
    
         
             
                   "bar". A lookbehind assertion is needed to achieve the other effect.
         
     | 
| 
       6663 
6668 
     | 
    
         | 
| 
       6664 
6669 
     | 
    
         
             
                   If you want to force a matching failure at some point in a pattern, the
         
     | 
| 
       6665 
     | 
    
         
            -
                   most 
     | 
| 
       6666 
     | 
    
         
            -
                   always 
     | 
| 
      
 6670 
     | 
    
         
            +
                   most convenient way to do it is  with  (?!)  because  an  empty  string
         
     | 
| 
      
 6671 
     | 
    
         
            +
                   always  matches, so an assertion that requires there not to be an empty
         
     | 
| 
       6667 
6672 
     | 
    
         
             
                   string must always fail.  The backtracking control verb (*FAIL) or (*F)
         
     | 
| 
       6668 
6673 
     | 
    
         
             
                   is a synonym for (?!).
         
     | 
| 
       6669 
6674 
     | 
    
         | 
| 
       6670 
6675 
     | 
    
         
             
               Lookbehind assertions
         
     | 
| 
       6671 
6676 
     | 
    
         | 
| 
       6672 
     | 
    
         
            -
                   Lookbehind 
     | 
| 
      
 6677 
     | 
    
         
            +
                   Lookbehind  assertions start with (?<= for positive assertions and (?<!
         
     | 
| 
       6673 
6678 
     | 
    
         
             
                   for negative assertions. For example,
         
     | 
| 
       6674 
6679 
     | 
    
         | 
| 
       6675 
6680 
     | 
    
         
             
                     (?<!foo)bar
         
     | 
| 
       6676 
6681 
     | 
    
         | 
| 
       6677 
     | 
    
         
            -
                   does 
     | 
| 
       6678 
     | 
    
         
            -
                   contents 
     | 
| 
      
 6682 
     | 
    
         
            +
                   does find an occurrence of "bar" that is not  preceded  by  "foo".  The
         
     | 
| 
      
 6683 
     | 
    
         
            +
                   contents  of  a  lookbehind  assertion are restricted such that all the
         
     | 
| 
       6679 
6684 
     | 
    
         
             
                   strings it matches must have a fixed length. However, if there are sev-
         
     | 
| 
       6680 
     | 
    
         
            -
                   eral 
     | 
| 
      
 6685 
     | 
    
         
            +
                   eral  top-level  alternatives,  they  do  not all have to have the same
         
     | 
| 
       6681 
6686 
     | 
    
         
             
                   fixed length. Thus
         
     | 
| 
       6682 
6687 
     | 
    
         | 
| 
       6683 
6688 
     | 
    
         
             
                     (?<=bullock|donkey)
         
     | 
| 
         @@ -6686,62 +6691,62 @@ ASSERTIONS 
     | 
|
| 
       6686 
6691 
     | 
    
         | 
| 
       6687 
6692 
     | 
    
         
             
                     (?<!dogs?|cats?)
         
     | 
| 
       6688 
6693 
     | 
    
         | 
| 
       6689 
     | 
    
         
            -
                   causes 
     | 
| 
       6690 
     | 
    
         
            -
                   strings 
     | 
| 
      
 6694 
     | 
    
         
            +
                   causes an error at compile time. Branches that match  different  length
         
     | 
| 
      
 6695 
     | 
    
         
            +
                   strings  are permitted only at the top level of a lookbehind assertion.
         
     | 
| 
       6691 
6696 
     | 
    
         
             
                   This is an extension compared with Perl, which requires all branches to
         
     | 
| 
       6692 
6697 
     | 
    
         
             
                   match the same length of string. An assertion such as
         
     | 
| 
       6693 
6698 
     | 
    
         | 
| 
       6694 
6699 
     | 
    
         
             
                     (?<=ab(c|de))
         
     | 
| 
       6695 
6700 
     | 
    
         | 
| 
       6696 
     | 
    
         
            -
                   is 
     | 
| 
      
 6701 
     | 
    
         
            +
                   is  not  permitted,  because  its single top-level branch can match two
         
     | 
| 
       6697 
6702 
     | 
    
         
             
                   different lengths, but it is acceptable to PCRE if rewritten to use two
         
     | 
| 
       6698 
6703 
     | 
    
         
             
                   top-level branches:
         
     | 
| 
       6699 
6704 
     | 
    
         | 
| 
       6700 
6705 
     | 
    
         
             
                     (?<=abc|abde)
         
     | 
| 
       6701 
6706 
     | 
    
         | 
| 
       6702 
     | 
    
         
            -
                   In 
     | 
| 
      
 6707 
     | 
    
         
            +
                   In  some  cases, the escape sequence \K (see above) can be used instead
         
     | 
| 
       6703 
6708 
     | 
    
         
             
                   of a lookbehind assertion to get round the fixed-length restriction.
         
     | 
| 
       6704 
6709 
     | 
    
         | 
| 
       6705 
     | 
    
         
            -
                   The 
     | 
| 
       6706 
     | 
    
         
            -
                   to 
     | 
| 
      
 6710 
     | 
    
         
            +
                   The implementation of lookbehind assertions is, for  each  alternative,
         
     | 
| 
      
 6711 
     | 
    
         
            +
                   to  temporarily  move the current position back by the fixed length and
         
     | 
| 
       6707 
6712 
     | 
    
         
             
                   then try to match. If there are insufficient characters before the cur-
         
     | 
| 
       6708 
6713 
     | 
    
         
             
                   rent position, the assertion fails.
         
     | 
| 
       6709 
6714 
     | 
    
         | 
| 
       6710 
     | 
    
         
            -
                   In 
     | 
| 
       6711 
     | 
    
         
            -
                   gle 
     | 
| 
       6712 
     | 
    
         
            -
                   because 
     | 
| 
       6713 
     | 
    
         
            -
                   hind. 
     | 
| 
      
 6715 
     | 
    
         
            +
                   In  a UTF mode, PCRE does not allow the \C escape (which matches a sin-
         
     | 
| 
      
 6716 
     | 
    
         
            +
                   gle data unit even in a UTF mode) to appear in  lookbehind  assertions,
         
     | 
| 
      
 6717 
     | 
    
         
            +
                   because  it  makes it impossible to calculate the length of the lookbe-
         
     | 
| 
      
 6718 
     | 
    
         
            +
                   hind. The \X and \R escapes, which can match different numbers of  data
         
     | 
| 
       6714 
6719 
     | 
    
         
             
                   units, are also not permitted.
         
     | 
| 
       6715 
6720 
     | 
    
         | 
| 
       6716 
     | 
    
         
            -
                   "Subroutine" 
     | 
| 
       6717 
     | 
    
         
            -
                   lookbehinds, 
     | 
| 
      
 6721 
     | 
    
         
            +
                   "Subroutine"  calls  (see below) such as (?2) or (?&X) are permitted in
         
     | 
| 
      
 6722 
     | 
    
         
            +
                   lookbehinds, as long as the subpattern matches a  fixed-length  string.
         
     | 
| 
       6718 
6723 
     | 
    
         
             
                   Recursion, however, is not supported.
         
     | 
| 
       6719 
6724 
     | 
    
         | 
| 
       6720 
     | 
    
         
            -
                   Possessive 
     | 
| 
      
 6725 
     | 
    
         
            +
                   Possessive  quantifiers  can  be  used  in  conjunction with lookbehind
         
     | 
| 
       6721 
6726 
     | 
    
         
             
                   assertions to specify efficient matching of fixed-length strings at the
         
     | 
| 
       6722 
6727 
     | 
    
         
             
                   end of subject strings. Consider a simple pattern such as
         
     | 
| 
       6723 
6728 
     | 
    
         | 
| 
       6724 
6729 
     | 
    
         
             
                     abcd$
         
     | 
| 
       6725 
6730 
     | 
    
         | 
| 
       6726 
     | 
    
         
            -
                   when 
     | 
| 
      
 6731 
     | 
    
         
            +
                   when  applied  to  a  long string that does not match. Because matching
         
     | 
| 
       6727 
6732 
     | 
    
         
             
                   proceeds from left to right, PCRE will look for each "a" in the subject
         
     | 
| 
       6728 
     | 
    
         
            -
                   and 
     | 
| 
      
 6733 
     | 
    
         
            +
                   and  then  see  if what follows matches the rest of the pattern. If the
         
     | 
| 
       6729 
6734 
     | 
    
         
             
                   pattern is specified as
         
     | 
| 
       6730 
6735 
     | 
    
         | 
| 
       6731 
6736 
     | 
    
         
             
                     ^.*abcd$
         
     | 
| 
       6732 
6737 
     | 
    
         | 
| 
       6733 
     | 
    
         
            -
                   the 
     | 
| 
      
 6738 
     | 
    
         
            +
                   the initial .* matches the entire string at first, but when this  fails
         
     | 
| 
       6734 
6739 
     | 
    
         
             
                   (because there is no following "a"), it backtracks to match all but the
         
     | 
| 
       6735 
     | 
    
         
            -
                   last 
     | 
| 
       6736 
     | 
    
         
            -
                   again 
     | 
| 
      
 6740 
     | 
    
         
            +
                   last character, then all but the last two characters, and so  on.  Once
         
     | 
| 
      
 6741 
     | 
    
         
            +
                   again  the search for "a" covers the entire string, from right to left,
         
     | 
| 
       6737 
6742 
     | 
    
         
             
                   so we are no better off. However, if the pattern is written as
         
     | 
| 
       6738 
6743 
     | 
    
         | 
| 
       6739 
6744 
     | 
    
         
             
                     ^.*+(?<=abcd)
         
     | 
| 
       6740 
6745 
     | 
    
         | 
| 
       6741 
     | 
    
         
            -
                   there 
     | 
| 
       6742 
     | 
    
         
            -
                   entire 
     | 
| 
       6743 
     | 
    
         
            -
                   on 
     | 
| 
       6744 
     | 
    
         
            -
                   For 
     | 
| 
      
 6746 
     | 
    
         
            +
                   there can be no backtracking for the .*+ item; it can  match  only  the
         
     | 
| 
      
 6747 
     | 
    
         
            +
                   entire  string.  The subsequent lookbehind assertion does a single test
         
     | 
| 
      
 6748 
     | 
    
         
            +
                   on the last four characters. If it fails, the match fails  immediately.
         
     | 
| 
      
 6749 
     | 
    
         
            +
                   For  long  strings, this approach makes a significant difference to the
         
     | 
| 
       6745 
6750 
     | 
    
         
             
                   processing time.
         
     | 
| 
       6746 
6751 
     | 
    
         | 
| 
       6747 
6752 
     | 
    
         
             
               Using multiple assertions
         
     | 
| 
         @@ -6750,18 +6755,18 @@ ASSERTIONS 
     | 
|
| 
       6750 
6755 
     | 
    
         | 
| 
       6751 
6756 
     | 
    
         
             
                     (?<=\d{3})(?<!999)foo
         
     | 
| 
       6752 
6757 
     | 
    
         | 
| 
       6753 
     | 
    
         
            -
                   matches 
     | 
| 
       6754 
     | 
    
         
            -
                   each 
     | 
| 
       6755 
     | 
    
         
            -
                   the 
     | 
| 
       6756 
     | 
    
         
            -
                   characters 
     | 
| 
      
 6758 
     | 
    
         
            +
                   matches "foo" preceded by three digits that are not "999". Notice  that
         
     | 
| 
      
 6759 
     | 
    
         
            +
                   each  of  the  assertions is applied independently at the same point in
         
     | 
| 
      
 6760 
     | 
    
         
            +
                   the subject string. First there is a  check  that  the  previous  three
         
     | 
| 
      
 6761 
     | 
    
         
            +
                   characters  are  all  digits,  and  then there is a check that the same
         
     | 
| 
       6757 
6762 
     | 
    
         
             
                   three characters are not "999".  This pattern does not match "foo" pre-
         
     | 
| 
       6758 
     | 
    
         
            -
                   ceded 
     | 
| 
       6759 
     | 
    
         
            -
                   three 
     | 
| 
      
 6763 
     | 
    
         
            +
                   ceded  by  six  characters,  the first of which are digits and the last
         
     | 
| 
      
 6764 
     | 
    
         
            +
                   three of which are not "999". For example, it  doesn't  match  "123abc-
         
     | 
| 
       6760 
6765 
     | 
    
         
             
                   foo". A pattern to do that is
         
     | 
| 
       6761 
6766 
     | 
    
         | 
| 
       6762 
6767 
     | 
    
         
             
                     (?<=\d{3}...)(?<!999)foo
         
     | 
| 
       6763 
6768 
     | 
    
         | 
| 
       6764 
     | 
    
         
            -
                   This 
     | 
| 
      
 6769 
     | 
    
         
            +
                   This  time  the  first assertion looks at the preceding six characters,
         
     | 
| 
       6765 
6770 
     | 
    
         
             
                   checking that the first three are digits, and then the second assertion
         
     | 
| 
       6766 
6771 
     | 
    
         
             
                   checks that the preceding three characters are not "999".
         
     | 
| 
       6767 
6772 
     | 
    
         | 
| 
         @@ -6769,29 +6774,29 @@ ASSERTIONS 
     | 
|
| 
       6769 
6774 
     | 
    
         | 
| 
       6770 
6775 
     | 
    
         
             
                     (?<=(?<!foo)bar)baz
         
     | 
| 
       6771 
6776 
     | 
    
         | 
| 
       6772 
     | 
    
         
            -
                   matches 
     | 
| 
      
 6777 
     | 
    
         
            +
                   matches  an occurrence of "baz" that is preceded by "bar" which in turn
         
     | 
| 
       6773 
6778 
     | 
    
         
             
                   is not preceded by "foo", while
         
     | 
| 
       6774 
6779 
     | 
    
         | 
| 
       6775 
6780 
     | 
    
         
             
                     (?<=\d{3}(?!999)...)foo
         
     | 
| 
       6776 
6781 
     | 
    
         | 
| 
       6777 
     | 
    
         
            -
                   is 
     | 
| 
      
 6782 
     | 
    
         
            +
                   is another pattern that matches "foo" preceded by three digits and  any
         
     | 
| 
       6778 
6783 
     | 
    
         
             
                   three characters that are not "999".
         
     | 
| 
       6779 
6784 
     | 
    
         | 
| 
       6780 
6785 
     | 
    
         | 
| 
       6781 
6786 
     | 
    
         
             
            CONDITIONAL SUBPATTERNS
         
     | 
| 
       6782 
6787 
     | 
    
         | 
| 
       6783 
     | 
    
         
            -
                   It 
     | 
| 
       6784 
     | 
    
         
            -
                   ditionally 
     | 
| 
       6785 
     | 
    
         
            -
                   on 
     | 
| 
       6786 
     | 
    
         
            -
                   tern 
     | 
| 
      
 6788 
     | 
    
         
            +
                   It  is possible to cause the matching process to obey a subpattern con-
         
     | 
| 
      
 6789 
     | 
    
         
            +
                   ditionally or to choose between two alternative subpatterns,  depending
         
     | 
| 
      
 6790 
     | 
    
         
            +
                   on  the result of an assertion, or whether a specific capturing subpat-
         
     | 
| 
      
 6791 
     | 
    
         
            +
                   tern has already been matched. The two possible  forms  of  conditional
         
     | 
| 
       6787 
6792 
     | 
    
         
             
                   subpattern are:
         
     | 
| 
       6788 
6793 
     | 
    
         | 
| 
       6789 
6794 
     | 
    
         
             
                     (?(condition)yes-pattern)
         
     | 
| 
       6790 
6795 
     | 
    
         
             
                     (?(condition)yes-pattern|no-pattern)
         
     | 
| 
       6791 
6796 
     | 
    
         | 
| 
       6792 
     | 
    
         
            -
                   If 
     | 
| 
       6793 
     | 
    
         
            -
                   no-pattern 
     | 
| 
       6794 
     | 
    
         
            -
                   tives 
     | 
| 
      
 6797 
     | 
    
         
            +
                   If  the  condition is satisfied, the yes-pattern is used; otherwise the
         
     | 
| 
      
 6798 
     | 
    
         
            +
                   no-pattern (if present) is used. If there are more  than  two  alterna-
         
     | 
| 
      
 6799 
     | 
    
         
            +
                   tives  in  the subpattern, a compile-time error occurs. Each of the two
         
     | 
| 
       6795 
6800 
     | 
    
         
             
                   alternatives may itself contain nested subpatterns of any form, includ-
         
     | 
| 
       6796 
6801 
     | 
    
         
             
                   ing  conditional  subpatterns;  the  restriction  to  two  alternatives
         
     | 
| 
       6797 
6802 
     | 
    
         
             
                   applies only at the level of the condition. This pattern fragment is an
         
     | 
| 
         @@ -6800,68 +6805,68 @@ CONDITIONAL SUBPATTERNS 
     | 
|
| 
       6800 
6805 
     | 
    
         
             
                     (?(1) (A|B|C) | (D | (?(2)E|F) | E) )
         
     | 
| 
       6801 
6806 
     | 
    
         | 
| 
       6802 
6807 
     | 
    
         | 
| 
       6803 
     | 
    
         
            -
                   There 
     | 
| 
      
 6808 
     | 
    
         
            +
                   There  are  four  kinds of condition: references to subpatterns, refer-
         
     | 
| 
       6804 
6809 
     | 
    
         
             
                   ences to recursion, a pseudo-condition called DEFINE, and assertions.
         
     | 
| 
       6805 
6810 
     | 
    
         | 
| 
       6806 
6811 
     | 
    
         
             
               Checking for a used subpattern by number
         
     | 
| 
       6807 
6812 
     | 
    
         | 
| 
       6808 
     | 
    
         
            -
                   If 
     | 
| 
      
 6813 
     | 
    
         
            +
                   If the text between the parentheses consists of a sequence  of  digits,
         
     | 
| 
       6809 
6814 
     | 
    
         
             
                   the condition is true if a capturing subpattern of that number has pre-
         
     | 
| 
       6810 
     | 
    
         
            -
                   viously 
     | 
| 
       6811 
     | 
    
         
            -
                   the 
     | 
| 
       6812 
     | 
    
         
            -
                   numbers), 
     | 
| 
       6813 
     | 
    
         
            -
                   native 
     | 
| 
       6814 
     | 
    
         
            -
                   this 
     | 
| 
       6815 
     | 
    
         
            -
                   most 
     | 
| 
       6816 
     | 
    
         
            -
                   most 
     | 
| 
      
 6815 
     | 
    
         
            +
                   viously matched. If there is more than one  capturing  subpattern  with
         
     | 
| 
      
 6816 
     | 
    
         
            +
                   the  same  number  (see  the earlier section about duplicate subpattern
         
     | 
| 
      
 6817 
     | 
    
         
            +
                   numbers), the condition is true if any of them have matched. An  alter-
         
     | 
| 
      
 6818 
     | 
    
         
            +
                   native  notation is to precede the digits with a plus or minus sign. In
         
     | 
| 
      
 6819 
     | 
    
         
            +
                   this case, the subpattern number is relative rather than absolute.  The
         
     | 
| 
      
 6820 
     | 
    
         
            +
                   most  recently opened parentheses can be referenced by (?(-1), the next
         
     | 
| 
      
 6821 
     | 
    
         
            +
                   most recent by (?(-2), and so on. Inside loops it can also  make  sense
         
     | 
| 
       6817 
6822 
     | 
    
         
             
                   to refer to subsequent groups. The next parentheses to be opened can be
         
     | 
| 
       6818 
     | 
    
         
            -
                   referenced 
     | 
| 
      
 6823 
     | 
    
         
            +
                   referenced as (?(+1), and so on. (The value zero in any of these  forms
         
     | 
| 
       6819 
6824 
     | 
    
         
             
                   is not used; it provokes a compile-time error.)
         
     | 
| 
       6820 
6825 
     | 
    
         | 
| 
       6821 
     | 
    
         
            -
                   Consider 
     | 
| 
      
 6826 
     | 
    
         
            +
                   Consider  the  following  pattern, which contains non-significant white
         
     | 
| 
       6822 
6827 
     | 
    
         
             
                   space to make it more readable (assume the PCRE_EXTENDED option) and to
         
     | 
| 
       6823 
6828 
     | 
    
         
             
                   divide it into three parts for ease of discussion:
         
     | 
| 
       6824 
6829 
     | 
    
         | 
| 
       6825 
6830 
     | 
    
         
             
                     ( \( )?    [^()]+    (?(1) \) )
         
     | 
| 
       6826 
6831 
     | 
    
         | 
| 
       6827 
     | 
    
         
            -
                   The 
     | 
| 
      
 6832 
     | 
    
         
            +
                   The  first  part  matches  an optional opening parenthesis, and if that
         
     | 
| 
       6828 
6833 
     | 
    
         
             
                   character is present, sets it as the first captured substring. The sec-
         
     | 
| 
       6829 
     | 
    
         
            -
                   ond 
     | 
| 
       6830 
     | 
    
         
            -
                   third 
     | 
| 
       6831 
     | 
    
         
            -
                   first 
     | 
| 
       6832 
     | 
    
         
            -
                   started 
     | 
| 
       6833 
     | 
    
         
            -
                   yes-pattern 
     | 
| 
       6834 
     | 
    
         
            -
                   wise, 
     | 
| 
       6835 
     | 
    
         
            -
                   In 
     | 
| 
      
 6834 
     | 
    
         
            +
                   ond  part  matches one or more characters that are not parentheses. The
         
     | 
| 
      
 6835 
     | 
    
         
            +
                   third part is a conditional subpattern that tests whether  or  not  the
         
     | 
| 
      
 6836 
     | 
    
         
            +
                   first  set  of  parentheses  matched.  If they did, that is, if subject
         
     | 
| 
      
 6837 
     | 
    
         
            +
                   started with an opening parenthesis, the condition is true, and so  the
         
     | 
| 
      
 6838 
     | 
    
         
            +
                   yes-pattern  is  executed and a closing parenthesis is required. Other-
         
     | 
| 
      
 6839 
     | 
    
         
            +
                   wise, since no-pattern is not present, the subpattern matches  nothing.
         
     | 
| 
      
 6840 
     | 
    
         
            +
                   In  other  words,  this  pattern matches a sequence of non-parentheses,
         
     | 
| 
       6836 
6841 
     | 
    
         
             
                   optionally enclosed in parentheses.
         
     | 
| 
       6837 
6842 
     | 
    
         | 
| 
       6838 
     | 
    
         
            -
                   If 
     | 
| 
      
 6843 
     | 
    
         
            +
                   If you were embedding this pattern in a larger one,  you  could  use  a
         
     | 
| 
       6839 
6844 
     | 
    
         
             
                   relative reference:
         
     | 
| 
       6840 
6845 
     | 
    
         | 
| 
       6841 
6846 
     | 
    
         
             
                     ...other stuff... ( \( )?    [^()]+    (?(-1) \) ) ...
         
     | 
| 
       6842 
6847 
     | 
    
         | 
| 
       6843 
     | 
    
         
            -
                   This 
     | 
| 
      
 6848 
     | 
    
         
            +
                   This  makes  the  fragment independent of the parentheses in the larger
         
     | 
| 
       6844 
6849 
     | 
    
         
             
                   pattern.
         
     | 
| 
       6845 
6850 
     | 
    
         | 
| 
       6846 
6851 
     | 
    
         
             
               Checking for a used subpattern by name
         
     | 
| 
       6847 
6852 
     | 
    
         | 
| 
       6848 
     | 
    
         
            -
                   Perl 
     | 
| 
       6849 
     | 
    
         
            -
                   used 
     | 
| 
       6850 
     | 
    
         
            -
                   PCRE, 
     | 
| 
      
 6853 
     | 
    
         
            +
                   Perl uses the syntax (?(<name>)...) or (?('name')...)  to  test  for  a
         
     | 
| 
      
 6854 
     | 
    
         
            +
                   used  subpattern  by  name.  For compatibility with earlier versions of
         
     | 
| 
      
 6855 
     | 
    
         
            +
                   PCRE, which had this facility before Perl, the syntax  (?(name)...)  is
         
     | 
| 
       6851 
6856 
     | 
    
         
             
                   also recognized.
         
     | 
| 
       6852 
6857 
     | 
    
         | 
| 
       6853 
6858 
     | 
    
         
             
                   Rewriting the above example to use a named subpattern gives this:
         
     | 
| 
       6854 
6859 
     | 
    
         | 
| 
       6855 
6860 
     | 
    
         
             
                     (?<OPEN> \( )?    [^()]+    (?(<OPEN>) \) )
         
     | 
| 
       6856 
6861 
     | 
    
         | 
| 
       6857 
     | 
    
         
            -
                   If 
     | 
| 
       6858 
     | 
    
         
            -
                   is 
     | 
| 
      
 6862 
     | 
    
         
            +
                   If  the  name used in a condition of this kind is a duplicate, the test
         
     | 
| 
      
 6863 
     | 
    
         
            +
                   is applied to all subpatterns of the same name, and is true if any  one
         
     | 
| 
       6859 
6864 
     | 
    
         
             
                   of them has matched.
         
     | 
| 
       6860 
6865 
     | 
    
         | 
| 
       6861 
6866 
     | 
    
         
             
               Checking for pattern recursion
         
     | 
| 
       6862 
6867 
     | 
    
         | 
| 
       6863 
6868 
     | 
    
         
             
                   If the condition is the string (R), and there is no subpattern with the
         
     | 
| 
       6864 
     | 
    
         
            -
                   name 
     | 
| 
      
 6869 
     | 
    
         
            +
                   name R, the condition is true if a recursive call to the whole  pattern
         
     | 
| 
       6865 
6870 
     | 
    
         
             
                   or any subpattern has been made. If digits or a name preceded by amper-
         
     | 
| 
       6866 
6871 
     | 
    
         
             
                   sand follow the letter R, for example:
         
     | 
| 
       6867 
6872 
     | 
    
         | 
| 
         @@ -6869,51 +6874,51 @@ CONDITIONAL SUBPATTERNS 
     | 
|
| 
       6869 
6874 
     | 
    
         | 
| 
       6870 
6875 
     | 
    
         
             
                   the condition is true if the most recent recursion is into a subpattern
         
     | 
| 
       6871 
6876 
     | 
    
         
             
                   whose number or name is given. This condition does not check the entire
         
     | 
| 
       6872 
     | 
    
         
            -
                   recursion 
     | 
| 
      
 6877 
     | 
    
         
            +
                   recursion stack. If the name used in a condition  of  this  kind  is  a
         
     | 
| 
       6873 
6878 
     | 
    
         
             
                   duplicate, the test is applied to all subpatterns of the same name, and
         
     | 
| 
       6874 
6879 
     | 
    
         
             
                   is true if any one of them is the most recent recursion.
         
     | 
| 
       6875 
6880 
     | 
    
         | 
| 
       6876 
     | 
    
         
            -
                   At 
     | 
| 
      
 6881 
     | 
    
         
            +
                   At "top level", all these recursion test  conditions  are  false.   The
         
     | 
| 
       6877 
6882 
     | 
    
         
             
                   syntax for recursive patterns is described below.
         
     | 
| 
       6878 
6883 
     | 
    
         | 
| 
       6879 
6884 
     | 
    
         
             
               Defining subpatterns for use by reference only
         
     | 
| 
       6880 
6885 
     | 
    
         | 
| 
       6881 
     | 
    
         
            -
                   If 
     | 
| 
       6882 
     | 
    
         
            -
                   with 
     | 
| 
       6883 
     | 
    
         
            -
                   there 
     | 
| 
       6884 
     | 
    
         
            -
                   skipped 
     | 
| 
       6885 
     | 
    
         
            -
                   DEFINE 
     | 
| 
       6886 
     | 
    
         
            -
                   enced 
     | 
| 
       6887 
     | 
    
         
            -
                   example, 
     | 
| 
      
 6886 
     | 
    
         
            +
                   If  the  condition  is  the string (DEFINE), and there is no subpattern
         
     | 
| 
      
 6887 
     | 
    
         
            +
                   with the name DEFINE, the condition is  always  false.  In  this  case,
         
     | 
| 
      
 6888 
     | 
    
         
            +
                   there  may  be  only  one  alternative  in the subpattern. It is always
         
     | 
| 
      
 6889 
     | 
    
         
            +
                   skipped if control reaches this point  in  the  pattern;  the  idea  of
         
     | 
| 
      
 6890 
     | 
    
         
            +
                   DEFINE  is that it can be used to define subroutines that can be refer-
         
     | 
| 
      
 6891 
     | 
    
         
            +
                   enced from elsewhere. (The use of subroutines is described below.)  For
         
     | 
| 
      
 6892 
     | 
    
         
            +
                   example,  a  pattern  to match an IPv4 address such as "192.168.23.245"
         
     | 
| 
       6888 
6893 
     | 
    
         
             
                   could be written like this (ignore white space and line breaks):
         
     | 
| 
       6889 
6894 
     | 
    
         | 
| 
       6890 
6895 
     | 
    
         
             
                     (?(DEFINE) (?<byte> 2[0-4]\d | 25[0-5] | 1\d\d | [1-9]?\d) )
         
     | 
| 
       6891 
6896 
     | 
    
         
             
                     \b (?&byte) (\.(?&byte)){3} \b
         
     | 
| 
       6892 
6897 
     | 
    
         | 
| 
       6893 
     | 
    
         
            -
                   The 
     | 
| 
       6894 
     | 
    
         
            -
                   group 
     | 
| 
       6895 
     | 
    
         
            -
                   an 
     | 
| 
       6896 
     | 
    
         
            -
                   this 
     | 
| 
       6897 
     | 
    
         
            -
                   condition. 
     | 
| 
       6898 
     | 
    
         
            -
                   to 
     | 
| 
      
 6898 
     | 
    
         
            +
                   The first part of the pattern is a DEFINE group inside which a  another
         
     | 
| 
      
 6899 
     | 
    
         
            +
                   group  named "byte" is defined. This matches an individual component of
         
     | 
| 
      
 6900 
     | 
    
         
            +
                   an IPv4 address (a number less than 256). When  matching  takes  place,
         
     | 
| 
      
 6901 
     | 
    
         
            +
                   this  part  of  the pattern is skipped because DEFINE acts like a false
         
     | 
| 
      
 6902 
     | 
    
         
            +
                   condition. The rest of the pattern uses references to the  named  group
         
     | 
| 
      
 6903 
     | 
    
         
            +
                   to  match the four dot-separated components of an IPv4 address, insist-
         
     | 
| 
       6899 
6904 
     | 
    
         
             
                   ing on a word boundary at each end.
         
     | 
| 
       6900 
6905 
     | 
    
         | 
| 
       6901 
6906 
     | 
    
         
             
               Assertion conditions
         
     | 
| 
       6902 
6907 
     | 
    
         | 
| 
       6903 
     | 
    
         
            -
                   If 
     | 
| 
       6904 
     | 
    
         
            -
                   assertion. 
     | 
| 
       6905 
     | 
    
         
            -
                   assertion. 
     | 
| 
      
 6908 
     | 
    
         
            +
                   If the condition is not in any of the above  formats,  it  must  be  an
         
     | 
| 
      
 6909 
     | 
    
         
            +
                   assertion.   This may be a positive or negative lookahead or lookbehind
         
     | 
| 
      
 6910 
     | 
    
         
            +
                   assertion. Consider  this  pattern,  again  containing  non-significant
         
     | 
| 
       6906 
6911 
     | 
    
         
             
                   white space, and with the two alternatives on the second line:
         
     | 
| 
       6907 
6912 
     | 
    
         | 
| 
       6908 
6913 
     | 
    
         
             
                     (?(?=[^a-z]*[a-z])
         
     | 
| 
       6909 
6914 
     | 
    
         
             
                     \d{2}-[a-z]{3}-\d{2}  |  \d{2}-\d{2}-\d{2} )
         
     | 
| 
       6910 
6915 
     | 
    
         | 
| 
       6911 
     | 
    
         
            -
                   The 
     | 
| 
       6912 
     | 
    
         
            -
                   optional 
     | 
| 
       6913 
     | 
    
         
            -
                   it 
     | 
| 
       6914 
     | 
    
         
            -
                   letter 
     | 
| 
       6915 
     | 
    
         
            -
                   otherwise 
     | 
| 
       6916 
     | 
    
         
            -
                   strings 
     | 
| 
      
 6916 
     | 
    
         
            +
                   The  condition  is  a  positive  lookahead  assertion  that  matches an
         
     | 
| 
      
 6917 
     | 
    
         
            +
                   optional sequence of non-letters followed by a letter. In other  words,
         
     | 
| 
      
 6918 
     | 
    
         
            +
                   it  tests  for the presence of at least one letter in the subject. If a
         
     | 
| 
      
 6919 
     | 
    
         
            +
                   letter is found, the subject is matched against the first  alternative;
         
     | 
| 
      
 6920 
     | 
    
         
            +
                   otherwise  it  is  matched  against  the  second.  This pattern matches
         
     | 
| 
      
 6921 
     | 
    
         
            +
                   strings in one of the two forms dd-aaa-dd or dd-dd-dd,  where  aaa  are
         
     | 
| 
       6917 
6922 
     | 
    
         
             
                   letters and dd are digits.
         
     | 
| 
       6918 
6923 
     | 
    
         | 
| 
       6919 
6924 
     | 
    
         | 
| 
         @@ -6922,41 +6927,41 @@ COMMENTS 
     | 
|
| 
       6922 
6927 
     | 
    
         
             
                   There are two ways of including comments in patterns that are processed
         
     | 
| 
       6923 
6928 
     | 
    
         
             
                   by PCRE. In both cases, the start of the comment must not be in a char-
         
     | 
| 
       6924 
6929 
     | 
    
         
             
                   acter class, nor in the middle of any other sequence of related charac-
         
     | 
| 
       6925 
     | 
    
         
            -
                   ters 
     | 
| 
      
 6930 
     | 
    
         
            +
                   ters such as (?: or a subpattern name or number.  The  characters  that
         
     | 
| 
       6926 
6931 
     | 
    
         
             
                   make up a comment play no part in the pattern matching.
         
     | 
| 
       6927 
6932 
     | 
    
         | 
| 
       6928 
     | 
    
         
            -
                   The 
     | 
| 
       6929 
     | 
    
         
            -
                   next 
     | 
| 
      
 6933 
     | 
    
         
            +
                   The  sequence (?# marks the start of a comment that continues up to the
         
     | 
| 
      
 6934 
     | 
    
         
            +
                   next closing parenthesis. Nested parentheses are not permitted. If  the
         
     | 
| 
       6930 
6935 
     | 
    
         
             
                   PCRE_EXTENDED option is set, an unescaped # character also introduces a
         
     | 
| 
       6931 
     | 
    
         
            -
                   comment, 
     | 
| 
       6932 
     | 
    
         
            -
                   newline 
     | 
| 
      
 6936 
     | 
    
         
            +
                   comment, which in this case continues to  immediately  after  the  next
         
     | 
| 
      
 6937 
     | 
    
         
            +
                   newline  character  or character sequence in the pattern. Which charac-
         
     | 
| 
       6933 
6938 
     | 
    
         
             
                   ters are interpreted as newlines is controlled by the options passed to
         
     | 
| 
       6934 
     | 
    
         
            -
                   a 
     | 
| 
      
 6939 
     | 
    
         
            +
                   a  compiling function or by a special sequence at the start of the pat-
         
     | 
| 
       6935 
6940 
     | 
    
         
             
                   tern, as described in the section entitled "Newline conventions" above.
         
     | 
| 
       6936 
6941 
     | 
    
         
             
                   Note that the end of this type of comment is a literal newline sequence
         
     | 
| 
       6937 
     | 
    
         
            -
                   in 
     | 
| 
       6938 
     | 
    
         
            -
                   not 
     | 
| 
      
 6942 
     | 
    
         
            +
                   in the pattern; escape sequences that happen to represent a newline  do
         
     | 
| 
      
 6943 
     | 
    
         
            +
                   not  count.  For  example,  consider this pattern when PCRE_EXTENDED is
         
     | 
| 
       6939 
6944 
     | 
    
         
             
                   set, and the default newline convention is in force:
         
     | 
| 
       6940 
6945 
     | 
    
         | 
| 
       6941 
6946 
     | 
    
         
             
                     abc #comment \n still comment
         
     | 
| 
       6942 
6947 
     | 
    
         | 
| 
       6943 
     | 
    
         
            -
                   On 
     | 
| 
       6944 
     | 
    
         
            -
                   for 
     | 
| 
       6945 
     | 
    
         
            -
                   stage, 
     | 
| 
      
 6948 
     | 
    
         
            +
                   On encountering the # character, pcre_compile()  skips  along,  looking
         
     | 
| 
      
 6949 
     | 
    
         
            +
                   for  a newline in the pattern. The sequence \n is still literal at this
         
     | 
| 
      
 6950 
     | 
    
         
            +
                   stage, so it does not terminate the comment. Only an  actual  character
         
     | 
| 
       6946 
6951 
     | 
    
         
             
                   with the code value 0x0a (the default newline) does so.
         
     | 
| 
       6947 
6952 
     | 
    
         | 
| 
       6948 
6953 
     | 
    
         | 
| 
       6949 
6954 
     | 
    
         
             
            RECURSIVE PATTERNS
         
     | 
| 
       6950 
6955 
     | 
    
         | 
| 
       6951 
     | 
    
         
            -
                   Consider 
     | 
| 
       6952 
     | 
    
         
            -
                   unlimited 
     | 
| 
       6953 
     | 
    
         
            -
                   that 
     | 
| 
       6954 
     | 
    
         
            -
                   depth 
     | 
| 
      
 6956 
     | 
    
         
            +
                   Consider  the problem of matching a string in parentheses, allowing for
         
     | 
| 
      
 6957 
     | 
    
         
            +
                   unlimited nested parentheses. Without the use of  recursion,  the  best
         
     | 
| 
      
 6958 
     | 
    
         
            +
                   that  can  be  done  is  to use a pattern that matches up to some fixed
         
     | 
| 
      
 6959 
     | 
    
         
            +
                   depth of nesting. It is not possible to  handle  an  arbitrary  nesting
         
     | 
| 
       6955 
6960 
     | 
    
         
             
                   depth.
         
     | 
| 
       6956 
6961 
     | 
    
         | 
| 
       6957 
6962 
     | 
    
         
             
                   For some time, Perl has provided a facility that allows regular expres-
         
     | 
| 
       6958 
     | 
    
         
            -
                   sions 
     | 
| 
       6959 
     | 
    
         
            -
                   Perl 
     | 
| 
      
 6963 
     | 
    
         
            +
                   sions to recurse (amongst other things). It does this by  interpolating
         
     | 
| 
      
 6964 
     | 
    
         
            +
                   Perl  code in the expression at run time, and the code can refer to the
         
     | 
| 
       6960 
6965 
     | 
    
         
             
                   expression itself. A Perl pattern using code interpolation to solve the
         
     | 
| 
       6961 
6966 
     | 
    
         
             
                   parentheses problem can be created like this:
         
     | 
| 
       6962 
6967 
     | 
    
         | 
| 
         @@ -6966,201 +6971,201 @@ RECURSIVE PATTERNS 
     | 
|
| 
       6966 
6971 
     | 
    
         
             
                   refers recursively to the pattern in which it appears.
         
     | 
| 
       6967 
6972 
     | 
    
         | 
| 
       6968 
6973 
     | 
    
         
             
                   Obviously, PCRE cannot support the interpolation of Perl code. Instead,
         
     | 
| 
       6969 
     | 
    
         
            -
                   it 
     | 
| 
       6970 
     | 
    
         
            -
                   also 
     | 
| 
       6971 
     | 
    
         
            -
                   PCRE 
     | 
| 
      
 6974 
     | 
    
         
            +
                   it  supports  special  syntax  for recursion of the entire pattern, and
         
     | 
| 
      
 6975 
     | 
    
         
            +
                   also for individual subpattern recursion.  After  its  introduction  in
         
     | 
| 
      
 6976 
     | 
    
         
            +
                   PCRE  and  Python,  this  kind of recursion was subsequently introduced
         
     | 
| 
       6972 
6977 
     | 
    
         
             
                   into Perl at release 5.10.
         
     | 
| 
       6973 
6978 
     | 
    
         | 
| 
       6974 
     | 
    
         
            -
                   A 
     | 
| 
       6975 
     | 
    
         
            -
                   zero 
     | 
| 
       6976 
     | 
    
         
            -
                   subpattern 
     | 
| 
       6977 
     | 
    
         
            -
                   subpattern. 
     | 
| 
       6978 
     | 
    
         
            -
                   described 
     | 
| 
      
 6979 
     | 
    
         
            +
                   A special item that consists of (? followed by a  number  greater  than
         
     | 
| 
      
 6980 
     | 
    
         
            +
                   zero  and  a  closing parenthesis is a recursive subroutine call of the
         
     | 
| 
      
 6981 
     | 
    
         
            +
                   subpattern of the given number, provided that  it  occurs  inside  that
         
     | 
| 
      
 6982 
     | 
    
         
            +
                   subpattern.  (If  not,  it is a non-recursive subroutine call, which is
         
     | 
| 
      
 6983 
     | 
    
         
            +
                   described in the next section.) The special item  (?R)  or  (?0)  is  a
         
     | 
| 
       6979 
6984 
     | 
    
         
             
                   recursive call of the entire regular expression.
         
     | 
| 
       6980 
6985 
     | 
    
         | 
| 
       6981 
     | 
    
         
            -
                   This 
     | 
| 
      
 6986 
     | 
    
         
            +
                   This  PCRE  pattern  solves  the nested parentheses problem (assume the
         
     | 
| 
       6982 
6987 
     | 
    
         
             
                   PCRE_EXTENDED option is set so that white space is ignored):
         
     | 
| 
       6983 
6988 
     | 
    
         | 
| 
       6984 
6989 
     | 
    
         
             
                     \( ( [^()]++ | (?R) )* \)
         
     | 
| 
       6985 
6990 
     | 
    
         | 
| 
       6986 
     | 
    
         
            -
                   First 
     | 
| 
       6987 
     | 
    
         
            -
                   substrings 
     | 
| 
       6988 
     | 
    
         
            -
                   recursive 
     | 
| 
      
 6991 
     | 
    
         
            +
                   First it matches an opening parenthesis. Then it matches any number  of
         
     | 
| 
      
 6992 
     | 
    
         
            +
                   substrings  which  can  either  be  a sequence of non-parentheses, or a
         
     | 
| 
      
 6993 
     | 
    
         
            +
                   recursive match of the pattern itself (that is, a  correctly  parenthe-
         
     | 
| 
       6989 
6994 
     | 
    
         
             
                   sized substring).  Finally there is a closing parenthesis. Note the use
         
     | 
| 
       6990 
6995 
     | 
    
         
             
                   of a possessive quantifier to avoid backtracking into sequences of non-
         
     | 
| 
       6991 
6996 
     | 
    
         
             
                   parentheses.
         
     | 
| 
       6992 
6997 
     | 
    
         | 
| 
       6993 
     | 
    
         
            -
                   If 
     | 
| 
      
 6998 
     | 
    
         
            +
                   If  this  were  part of a larger pattern, you would not want to recurse
         
     | 
| 
       6994 
6999 
     | 
    
         
             
                   the entire pattern, so instead you could use this:
         
     | 
| 
       6995 
7000 
     | 
    
         | 
| 
       6996 
7001 
     | 
    
         
             
                     ( \( ( [^()]++ | (?1) )* \) )
         
     | 
| 
       6997 
7002 
     | 
    
         | 
| 
       6998 
     | 
    
         
            -
                   We 
     | 
| 
      
 7003 
     | 
    
         
            +
                   We have put the pattern into parentheses, and caused the  recursion  to
         
     | 
| 
       6999 
7004 
     | 
    
         
             
                   refer to them instead of the whole pattern.
         
     | 
| 
       7000 
7005 
     | 
    
         | 
| 
       7001 
     | 
    
         
            -
                   In 
     | 
| 
       7002 
     | 
    
         
            -
                   tricky. 
     | 
| 
      
 7006 
     | 
    
         
            +
                   In  a  larger  pattern,  keeping  track  of  parenthesis numbers can be
         
     | 
| 
      
 7007 
     | 
    
         
            +
                   tricky. This is made easier by the use of relative references.  Instead
         
     | 
| 
       7003 
7008 
     | 
    
         
             
                   of (?1) in the pattern above you can write (?-2) to refer to the second
         
     | 
| 
       7004 
     | 
    
         
            -
                   most 
     | 
| 
       7005 
     | 
    
         
            -
                   words, 
     | 
| 
      
 7009 
     | 
    
         
            +
                   most recently opened parentheses  preceding  the  recursion.  In  other
         
     | 
| 
      
 7010 
     | 
    
         
            +
                   words,  a  negative  number counts capturing parentheses leftwards from
         
     | 
| 
       7006 
7011 
     | 
    
         
             
                   the point at which it is encountered.
         
     | 
| 
       7007 
7012 
     | 
    
         | 
| 
       7008 
     | 
    
         
            -
                   It 
     | 
| 
       7009 
     | 
    
         
            -
                   writing 
     | 
| 
       7010 
     | 
    
         
            -
                   because 
     | 
| 
       7011 
     | 
    
         
            -
                   enced. 
     | 
| 
      
 7013 
     | 
    
         
            +
                   It is also possible to refer to  subsequently  opened  parentheses,  by
         
     | 
| 
      
 7014 
     | 
    
         
            +
                   writing  references  such  as (?+2). However, these cannot be recursive
         
     | 
| 
      
 7015 
     | 
    
         
            +
                   because the reference is not inside the  parentheses  that  are  refer-
         
     | 
| 
      
 7016 
     | 
    
         
            +
                   enced.  They are always non-recursive subroutine calls, as described in
         
     | 
| 
       7012 
7017 
     | 
    
         
             
                   the next section.
         
     | 
| 
       7013 
7018 
     | 
    
         | 
| 
       7014 
     | 
    
         
            -
                   An 
     | 
| 
       7015 
     | 
    
         
            -
                   syntax 
     | 
| 
      
 7019 
     | 
    
         
            +
                   An alternative approach is to use named parentheses instead.  The  Perl
         
     | 
| 
      
 7020 
     | 
    
         
            +
                   syntax  for  this  is (?&name); PCRE's earlier syntax (?P>name) is also
         
     | 
| 
       7016 
7021 
     | 
    
         
             
                   supported. We could rewrite the above example as follows:
         
     | 
| 
       7017 
7022 
     | 
    
         | 
| 
       7018 
7023 
     | 
    
         
             
                     (?<pn> \( ( [^()]++ | (?&pn) )* \) )
         
     | 
| 
       7019 
7024 
     | 
    
         | 
| 
       7020 
     | 
    
         
            -
                   If 
     | 
| 
      
 7025 
     | 
    
         
            +
                   If there is more than one subpattern with the same name,  the  earliest
         
     | 
| 
       7021 
7026 
     | 
    
         
             
                   one is used.
         
     | 
| 
       7022 
7027 
     | 
    
         | 
| 
       7023 
     | 
    
         
            -
                   This 
     | 
| 
      
 7028 
     | 
    
         
            +
                   This  particular  example pattern that we have been looking at contains
         
     | 
| 
       7024 
7029 
     | 
    
         
             
                   nested unlimited repeats, and so the use of a possessive quantifier for
         
     | 
| 
       7025 
7030 
     | 
    
         
             
                   matching strings of non-parentheses is important when applying the pat-
         
     | 
| 
       7026 
     | 
    
         
            -
                   tern 
     | 
| 
      
 7031 
     | 
    
         
            +
                   tern to strings that do not match. For example, when  this  pattern  is
         
     | 
| 
       7027 
7032 
     | 
    
         
             
                   applied to
         
     | 
| 
       7028 
7033 
     | 
    
         | 
| 
       7029 
7034 
     | 
    
         
             
                     (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()
         
     | 
| 
       7030 
7035 
     | 
    
         | 
| 
       7031 
     | 
    
         
            -
                   it 
     | 
| 
       7032 
     | 
    
         
            -
                   not 
     | 
| 
       7033 
     | 
    
         
            -
                   so 
     | 
| 
      
 7036 
     | 
    
         
            +
                   it  yields  "no  match" quickly. However, if a possessive quantifier is
         
     | 
| 
      
 7037 
     | 
    
         
            +
                   not used, the match runs for a very long time indeed because there  are
         
     | 
| 
      
 7038 
     | 
    
         
            +
                   so  many  different  ways the + and * repeats can carve up the subject,
         
     | 
| 
       7034 
7039 
     | 
    
         
             
                   and all have to be tested before failure can be reported.
         
     | 
| 
       7035 
7040 
     | 
    
         | 
| 
       7036 
     | 
    
         
            -
                   At 
     | 
| 
       7037 
     | 
    
         
            -
                   from 
     | 
| 
       7038 
     | 
    
         
            -
                   callout 
     | 
| 
      
 7041 
     | 
    
         
            +
                   At the end of a match, the values of capturing  parentheses  are  those
         
     | 
| 
      
 7042 
     | 
    
         
            +
                   from  the outermost level. If you want to obtain intermediate values, a
         
     | 
| 
      
 7043 
     | 
    
         
            +
                   callout function can be used (see below and the pcrecallout  documenta-
         
     | 
| 
       7039 
7044 
     | 
    
         
             
                   tion). If the pattern above is matched against
         
     | 
| 
       7040 
7045 
     | 
    
         | 
| 
       7041 
7046 
     | 
    
         
             
                     (ab(cd)ef)
         
     | 
| 
       7042 
7047 
     | 
    
         | 
| 
       7043 
     | 
    
         
            -
                   the 
     | 
| 
       7044 
     | 
    
         
            -
                   which 
     | 
| 
       7045 
     | 
    
         
            -
                   pattern 
     | 
| 
       7046 
     | 
    
         
            -
                   unset, 
     | 
| 
      
 7048 
     | 
    
         
            +
                   the  value  for  the  inner capturing parentheses (numbered 2) is "ef",
         
     | 
| 
      
 7049 
     | 
    
         
            +
                   which is the last value taken on at the top level. If a capturing  sub-
         
     | 
| 
      
 7050 
     | 
    
         
            +
                   pattern  is  not  matched at the top level, its final captured value is
         
     | 
| 
      
 7051 
     | 
    
         
            +
                   unset, even if it was (temporarily) set at a deeper  level  during  the
         
     | 
| 
       7047 
7052 
     | 
    
         
             
                   matching process.
         
     | 
| 
       7048 
7053 
     | 
    
         | 
| 
       7049 
     | 
    
         
            -
                   If 
     | 
| 
       7050 
     | 
    
         
            -
                   to 
     | 
| 
      
 7054 
     | 
    
         
            +
                   If  there are more than 15 capturing parentheses in a pattern, PCRE has
         
     | 
| 
      
 7055 
     | 
    
         
            +
                   to obtain extra memory to store data during a recursion, which it  does
         
     | 
| 
       7051 
7056 
     | 
    
         
             
                   by using pcre_malloc, freeing it via pcre_free afterwards. If no memory
         
     | 
| 
       7052 
7057 
     | 
    
         
             
                   can be obtained, the match fails with the PCRE_ERROR_NOMEMORY error.
         
     | 
| 
       7053 
7058 
     | 
    
         | 
| 
       7054 
     | 
    
         
            -
                   Do 
     | 
| 
       7055 
     | 
    
         
            -
                   recursion.   
     | 
| 
       7056 
     | 
    
         
            -
                   ets, 
     | 
| 
       7057 
     | 
    
         
            -
                   brackets 
     | 
| 
      
 7059 
     | 
    
         
            +
                   Do not confuse the (?R) item with the condition (R),  which  tests  for
         
     | 
| 
      
 7060 
     | 
    
         
            +
                   recursion.   Consider  this pattern, which matches text in angle brack-
         
     | 
| 
      
 7061 
     | 
    
         
            +
                   ets, allowing for arbitrary nesting. Only digits are allowed in  nested
         
     | 
| 
      
 7062 
     | 
    
         
            +
                   brackets  (that is, when recursing), whereas any characters are permit-
         
     | 
| 
       7058 
7063 
     | 
    
         
             
                   ted at the outer level.
         
     | 
| 
       7059 
7064 
     | 
    
         | 
| 
       7060 
7065 
     | 
    
         
             
                     < (?: (?(R) \d++  | [^<>]*+) | (?R)) * >
         
     | 
| 
       7061 
7066 
     | 
    
         | 
| 
       7062 
     | 
    
         
            -
                   In 
     | 
| 
       7063 
     | 
    
         
            -
                   two 
     | 
| 
      
 7067 
     | 
    
         
            +
                   In this pattern, (?(R) is the start of a conditional  subpattern,  with
         
     | 
| 
      
 7068 
     | 
    
         
            +
                   two  different  alternatives for the recursive and non-recursive cases.
         
     | 
| 
       7064 
7069 
     | 
    
         
             
                   The (?R) item is the actual recursive call.
         
     | 
| 
       7065 
7070 
     | 
    
         | 
| 
       7066 
7071 
     | 
    
         
             
               Differences in recursion processing between PCRE and Perl
         
     | 
| 
       7067 
7072 
     | 
    
         | 
| 
       7068 
     | 
    
         
            -
                   Recursion 
     | 
| 
       7069 
     | 
    
         
            -
                   In 
     | 
| 
      
 7073 
     | 
    
         
            +
                   Recursion processing in PCRE differs from Perl in two  important  ways.
         
     | 
| 
      
 7074 
     | 
    
         
            +
                   In  PCRE (like Python, but unlike Perl), a recursive subpattern call is
         
     | 
| 
       7070 
7075 
     | 
    
         
             
                   always treated as an atomic group. That is, once it has matched some of
         
     | 
| 
       7071 
7076 
     | 
    
         
             
                   the subject string, it is never re-entered, even if it contains untried
         
     | 
| 
       7072 
     | 
    
         
            -
                   alternatives 
     | 
| 
       7073 
     | 
    
         
            -
                   illustrated 
     | 
| 
       7074 
     | 
    
         
            -
                   dromic 
     | 
| 
      
 7077 
     | 
    
         
            +
                   alternatives and there is a subsequent matching failure.  This  can  be
         
     | 
| 
      
 7078 
     | 
    
         
            +
                   illustrated  by the following pattern, which purports to match a palin-
         
     | 
| 
      
 7079 
     | 
    
         
            +
                   dromic string that contains an odd number of characters  (for  example,
         
     | 
| 
       7075 
7080 
     | 
    
         
             
                   "a", "aba", "abcba", "abcdcba"):
         
     | 
| 
       7076 
7081 
     | 
    
         | 
| 
       7077 
7082 
     | 
    
         
             
                     ^(.|(.)(?1)\2)$
         
     | 
| 
       7078 
7083 
     | 
    
         | 
| 
       7079 
7084 
     | 
    
         
             
                   The idea is that it either matches a single character, or two identical
         
     | 
| 
       7080 
     | 
    
         
            -
                   characters 
     | 
| 
       7081 
     | 
    
         
            -
                   in 
     | 
| 
      
 7085 
     | 
    
         
            +
                   characters surrounding a sub-palindrome. In Perl, this  pattern  works;
         
     | 
| 
      
 7086 
     | 
    
         
            +
                   in  PCRE  it  does  not if the pattern is longer than three characters.
         
     | 
| 
       7082 
7087 
     | 
    
         
             
                   Consider the subject string "abcba":
         
     | 
| 
       7083 
7088 
     | 
    
         | 
| 
       7084 
     | 
    
         
            -
                   At 
     | 
| 
      
 7089 
     | 
    
         
            +
                   At the top level, the first character is matched, but as it is  not  at
         
     | 
| 
       7085 
7090 
     | 
    
         
             
                   the end of the string, the first alternative fails; the second alterna-
         
     | 
| 
       7086 
7091 
     | 
    
         
             
                   tive is taken and the recursion kicks in. The recursive call to subpat-
         
     | 
| 
       7087 
     | 
    
         
            -
                   tern 
     | 
| 
      
 7092 
     | 
    
         
            +
                   tern  1  successfully  matches the next character ("b"). (Note that the
         
     | 
| 
       7088 
7093 
     | 
    
         
             
                   beginning and end of line tests are not part of the recursion).
         
     | 
| 
       7089 
7094 
     | 
    
         | 
| 
       7090 
     | 
    
         
            -
                   Back 
     | 
| 
       7091 
     | 
    
         
            -
                   subpattern 
     | 
| 
       7092 
     | 
    
         
            -
                   is 
     | 
| 
       7093 
     | 
    
         
            -
                   and 
     | 
| 
       7094 
     | 
    
         
            -
                   enter 
     | 
| 
      
 7095 
     | 
    
         
            +
                   Back at the top level, the next character ("c") is compared  with  what
         
     | 
| 
      
 7096 
     | 
    
         
            +
                   subpattern  2 matched, which was "a". This fails. Because the recursion
         
     | 
| 
      
 7097 
     | 
    
         
            +
                   is treated as an atomic group, there are now  no  backtracking  points,
         
     | 
| 
      
 7098 
     | 
    
         
            +
                   and  so  the  entire  match fails. (Perl is able, at this point, to re-
         
     | 
| 
      
 7099 
     | 
    
         
            +
                   enter the recursion and try the second alternative.)  However,  if  the
         
     | 
| 
       7095 
7100 
     | 
    
         
             
                   pattern is written with the alternatives in the other order, things are
         
     | 
| 
       7096 
7101 
     | 
    
         
             
                   different:
         
     | 
| 
       7097 
7102 
     | 
    
         | 
| 
       7098 
7103 
     | 
    
         
             
                     ^((.)(?1)\2|.)$
         
     | 
| 
       7099 
7104 
     | 
    
         | 
| 
       7100 
     | 
    
         
            -
                   This 
     | 
| 
       7101 
     | 
    
         
            -
                   recurse 
     | 
| 
       7102 
     | 
    
         
            -
                   fails. 
     | 
| 
       7103 
     | 
    
         
            -
                   higher 
     | 
| 
      
 7105 
     | 
    
         
            +
                   This time, the recursing alternative is tried first, and  continues  to
         
     | 
| 
      
 7106 
     | 
    
         
            +
                   recurse  until  it runs out of characters, at which point the recursion
         
     | 
| 
      
 7107 
     | 
    
         
            +
                   fails. But this time we do have  another  alternative  to  try  at  the
         
     | 
| 
      
 7108 
     | 
    
         
            +
                   higher  level.  That  is  the  big difference: in the previous case the
         
     | 
| 
       7104 
7109 
     | 
    
         
             
                   remaining alternative is at a deeper recursion level, which PCRE cannot
         
     | 
| 
       7105 
7110 
     | 
    
         
             
                   use.
         
     | 
| 
       7106 
7111 
     | 
    
         | 
| 
       7107 
     | 
    
         
            -
                   To 
     | 
| 
       7108 
     | 
    
         
            -
                   just 
     | 
| 
      
 7112 
     | 
    
         
            +
                   To  change  the pattern so that it matches all palindromic strings, not
         
     | 
| 
      
 7113 
     | 
    
         
            +
                   just those with an odd number of characters, it is tempting  to  change
         
     | 
| 
       7109 
7114 
     | 
    
         
             
                   the pattern to this:
         
     | 
| 
       7110 
7115 
     | 
    
         | 
| 
       7111 
7116 
     | 
    
         
             
                     ^((.)(?1)\2|.?)$
         
     | 
| 
       7112 
7117 
     | 
    
         | 
| 
       7113 
     | 
    
         
            -
                   Again, 
     | 
| 
       7114 
     | 
    
         
            -
                   When 
     | 
| 
       7115 
     | 
    
         
            -
                   entered 
     | 
| 
       7116 
     | 
    
         
            -
                   separate 
     | 
| 
      
 7118 
     | 
    
         
            +
                   Again,  this  works  in Perl, but not in PCRE, and for the same reason.
         
     | 
| 
      
 7119 
     | 
    
         
            +
                   When a deeper recursion has matched a single character,  it  cannot  be
         
     | 
| 
      
 7120 
     | 
    
         
            +
                   entered  again  in  order  to match an empty string. The solution is to
         
     | 
| 
      
 7121 
     | 
    
         
            +
                   separate the two cases, and write out the odd and even cases as  alter-
         
     | 
| 
       7117 
7122 
     | 
    
         
             
                   natives at the higher level:
         
     | 
| 
       7118 
7123 
     | 
    
         | 
| 
       7119 
7124 
     | 
    
         
             
                     ^(?:((.)(?1)\2|)|((.)(?3)\4|.))
         
     | 
| 
       7120 
7125 
     | 
    
         | 
| 
       7121 
     | 
    
         
            -
                   If 
     | 
| 
      
 7126 
     | 
    
         
            +
                   If  you  want  to match typical palindromic phrases, the pattern has to
         
     | 
| 
       7122 
7127 
     | 
    
         
             
                   ignore all non-word characters, which can be done like this:
         
     | 
| 
       7123 
7128 
     | 
    
         | 
| 
       7124 
7129 
     | 
    
         
             
                     ^\W*+(?:((.)\W*+(?1)\W*+\2|)|((.)\W*+(?3)\W*+\4|\W*+.\W*+))\W*+$
         
     | 
| 
       7125 
7130 
     | 
    
         | 
| 
       7126 
7131 
     | 
    
         
             
                   If run with the PCRE_CASELESS option, this pattern matches phrases such
         
     | 
| 
       7127 
7132 
     | 
    
         
             
                   as "A man, a plan, a canal: Panama!" and it works well in both PCRE and
         
     | 
| 
       7128 
     | 
    
         
            -
                   Perl. 
     | 
| 
       7129 
     | 
    
         
            -
                   ing 
     | 
| 
       7130 
     | 
    
         
            -
                   great 
     | 
| 
      
 7133 
     | 
    
         
            +
                   Perl. Note the use of the possessive quantifier *+ to avoid  backtrack-
         
     | 
| 
      
 7134 
     | 
    
         
            +
                   ing  into  sequences of non-word characters. Without this, PCRE takes a
         
     | 
| 
      
 7135 
     | 
    
         
            +
                   great deal longer (ten times or more) to  match  typical  phrases,  and
         
     | 
| 
       7131 
7136 
     | 
    
         
             
                   Perl takes so long that you think it has gone into a loop.
         
     | 
| 
       7132 
7137 
     | 
    
         | 
| 
       7133 
     | 
    
         
            -
                   WARNING: 
     | 
| 
       7134 
     | 
    
         
            -
                   ject 
     | 
| 
       7135 
     | 
    
         
            -
                   entire 
     | 
| 
       7136 
     | 
    
         
            -
                   the 
     | 
| 
       7137 
     | 
    
         
            -
                   then 
     | 
| 
       7138 
     | 
    
         
            -
                   Once 
     | 
| 
      
 7138 
     | 
    
         
            +
                   WARNING:  The  palindrome-matching patterns above work only if the sub-
         
     | 
| 
      
 7139 
     | 
    
         
            +
                   ject string does not start with a palindrome that is shorter  than  the
         
     | 
| 
      
 7140 
     | 
    
         
            +
                   entire  string.  For example, although "abcba" is correctly matched, if
         
     | 
| 
      
 7141 
     | 
    
         
            +
                   the subject is "ababa", PCRE finds the palindrome "aba" at  the  start,
         
     | 
| 
      
 7142 
     | 
    
         
            +
                   then  fails at top level because the end of the string does not follow.
         
     | 
| 
      
 7143 
     | 
    
         
            +
                   Once again, it cannot jump back into the recursion to try other  alter-
         
     | 
| 
       7139 
7144 
     | 
    
         
             
                   natives, so the entire match fails.
         
     | 
| 
       7140 
7145 
     | 
    
         | 
| 
       7141 
     | 
    
         
            -
                   The 
     | 
| 
       7142 
     | 
    
         
            -
                   cessing 
     | 
| 
       7143 
     | 
    
         
            -
                   tern 
     | 
| 
       7144 
     | 
    
         
            -
                   it 
     | 
| 
       7145 
     | 
    
         
            -
                   sion, 
     | 
| 
      
 7146 
     | 
    
         
            +
                   The  second  way  in which PCRE and Perl differ in their recursion pro-
         
     | 
| 
      
 7147 
     | 
    
         
            +
                   cessing is in the handling of captured values. In Perl, when a  subpat-
         
     | 
| 
      
 7148 
     | 
    
         
            +
                   tern  is  called recursively or as a subpattern (see the next section),
         
     | 
| 
      
 7149 
     | 
    
         
            +
                   it has no access to any values that were captured  outside  the  recur-
         
     | 
| 
      
 7150 
     | 
    
         
            +
                   sion,  whereas  in  PCRE  these values can be referenced. Consider this
         
     | 
| 
       7146 
7151 
     | 
    
         
             
                   pattern:
         
     | 
| 
       7147 
7152 
     | 
    
         | 
| 
       7148 
7153 
     | 
    
         
             
                     ^(.)(\1|a(?2))
         
     | 
| 
       7149 
7154 
     | 
    
         | 
| 
       7150 
     | 
    
         
            -
                   In 
     | 
| 
       7151 
     | 
    
         
            -
                   match 
     | 
| 
       7152 
     | 
    
         
            -
                   to 
     | 
| 
       7153 
     | 
    
         
            -
                   the 
     | 
| 
       7154 
     | 
    
         
            -
                   In 
     | 
| 
      
 7155 
     | 
    
         
            +
                   In PCRE, this pattern matches "bab". The  first  capturing  parentheses
         
     | 
| 
      
 7156 
     | 
    
         
            +
                   match  "b",  then in the second group, when the back reference \1 fails
         
     | 
| 
      
 7157 
     | 
    
         
            +
                   to match "b", the second alternative matches "a" and then recurses.  In
         
     | 
| 
      
 7158 
     | 
    
         
            +
                   the  recursion,  \1 does now match "b" and so the whole match succeeds.
         
     | 
| 
      
 7159 
     | 
    
         
            +
                   In Perl, the pattern fails to match because inside the  recursive  call
         
     | 
| 
       7155 
7160 
     | 
    
         
             
                   \1 cannot access the externally set value.
         
     | 
| 
       7156 
7161 
     | 
    
         | 
| 
       7157 
7162 
     | 
    
         | 
| 
       7158 
7163 
     | 
    
         
             
            SUBPATTERNS AS SUBROUTINES
         
     | 
| 
       7159 
7164 
     | 
    
         | 
| 
       7160 
     | 
    
         
            -
                   If 
     | 
| 
       7161 
     | 
    
         
            -
                   name) 
     | 
| 
       7162 
     | 
    
         
            -
                   like 
     | 
| 
       7163 
     | 
    
         
            -
                   be 
     | 
| 
      
 7165 
     | 
    
         
            +
                   If  the  syntax for a recursive subpattern call (either by number or by
         
     | 
| 
      
 7166 
     | 
    
         
            +
                   name) is used outside the parentheses to which it refers,  it  operates
         
     | 
| 
      
 7167 
     | 
    
         
            +
                   like  a subroutine in a programming language. The called subpattern may
         
     | 
| 
      
 7168 
     | 
    
         
            +
                   be defined before or after the reference. A numbered reference  can  be
         
     | 
| 
       7164 
7169 
     | 
    
         
             
                   absolute or relative, as in these examples:
         
     | 
| 
       7165 
7170 
     | 
    
         | 
| 
       7166 
7171 
     | 
    
         
             
                     (...(absolute)...)...(?2)...
         
     | 
| 
         @@ -7171,79 +7176,79 @@ SUBPATTERNS AS SUBROUTINES 
     | 
|
| 
       7171 
7176 
     | 
    
         | 
| 
       7172 
7177 
     | 
    
         
             
                     (sens|respons)e and \1ibility
         
     | 
| 
       7173 
7178 
     | 
    
         | 
| 
       7174 
     | 
    
         
            -
                   matches 
     | 
| 
      
 7179 
     | 
    
         
            +
                   matches  "sense and sensibility" and "response and responsibility", but
         
     | 
| 
       7175 
7180 
     | 
    
         
             
                   not "sense and responsibility". If instead the pattern
         
     | 
| 
       7176 
7181 
     | 
    
         | 
| 
       7177 
7182 
     | 
    
         
             
                     (sens|respons)e and (?1)ibility
         
     | 
| 
       7178 
7183 
     | 
    
         | 
| 
       7179 
     | 
    
         
            -
                   is 
     | 
| 
       7180 
     | 
    
         
            -
                   two 
     | 
| 
      
 7184 
     | 
    
         
            +
                   is used, it does match "sense and responsibility" as well as the  other
         
     | 
| 
      
 7185 
     | 
    
         
            +
                   two  strings.  Another  example  is  given  in the discussion of DEFINE
         
     | 
| 
       7181 
7186 
     | 
    
         
             
                   above.
         
     | 
| 
       7182 
7187 
     | 
    
         | 
| 
       7183 
     | 
    
         
            -
                   All 
     | 
| 
       7184 
     | 
    
         
            -
                   atomic 
     | 
| 
      
 7188 
     | 
    
         
            +
                   All subroutine calls, whether recursive or not, are always  treated  as
         
     | 
| 
      
 7189 
     | 
    
         
            +
                   atomic  groups. That is, once a subroutine has matched some of the sub-
         
     | 
| 
       7185 
7190 
     | 
    
         
             
                   ject string, it is never re-entered, even if it contains untried alter-
         
     | 
| 
       7186 
     | 
    
         
            -
                   natives 
     | 
| 
       7187 
     | 
    
         
            -
                   parentheses 
     | 
| 
      
 7191 
     | 
    
         
            +
                   natives  and  there  is  a  subsequent  matching failure. Any capturing
         
     | 
| 
      
 7192 
     | 
    
         
            +
                   parentheses that are set during the subroutine  call  revert  to  their
         
     | 
| 
       7188 
7193 
     | 
    
         
             
                   previous values afterwards.
         
     | 
| 
       7189 
7194 
     | 
    
         | 
| 
       7190 
     | 
    
         
            -
                   Processing 
     | 
| 
       7191 
     | 
    
         
            -
                   tern 
     | 
| 
      
 7195 
     | 
    
         
            +
                   Processing  options  such as case-independence are fixed when a subpat-
         
     | 
| 
      
 7196 
     | 
    
         
            +
                   tern is defined, so if it is used as a subroutine, such options  cannot
         
     | 
| 
       7192 
7197 
     | 
    
         
             
                   be changed for different calls. For example, consider this pattern:
         
     | 
| 
       7193 
7198 
     | 
    
         | 
| 
       7194 
7199 
     | 
    
         
             
                     (abc)(?i:(?-1))
         
     | 
| 
       7195 
7200 
     | 
    
         | 
| 
       7196 
     | 
    
         
            -
                   It 
     | 
| 
      
 7201 
     | 
    
         
            +
                   It  matches  "abcabc". It does not match "abcABC" because the change of
         
     | 
| 
       7197 
7202 
     | 
    
         
             
                   processing option does not affect the called subpattern.
         
     | 
| 
       7198 
7203 
     | 
    
         | 
| 
       7199 
7204 
     | 
    
         | 
| 
       7200 
7205 
     | 
    
         
             
            ONIGURUMA SUBROUTINE SYNTAX
         
     | 
| 
       7201 
7206 
     | 
    
         | 
| 
       7202 
     | 
    
         
            -
                   For 
     | 
| 
      
 7207 
     | 
    
         
            +
                   For compatibility with Oniguruma, the non-Perl syntax \g followed by  a
         
     | 
| 
       7203 
7208 
     | 
    
         
             
                   name or a number enclosed either in angle brackets or single quotes, is
         
     | 
| 
       7204 
     | 
    
         
            -
                   an 
     | 
| 
       7205 
     | 
    
         
            -
                   possibly 
     | 
| 
      
 7209 
     | 
    
         
            +
                   an alternative syntax for referencing a  subpattern  as  a  subroutine,
         
     | 
| 
      
 7210 
     | 
    
         
            +
                   possibly  recursively. Here are two of the examples used above, rewrit-
         
     | 
| 
       7206 
7211 
     | 
    
         
             
                   ten using this syntax:
         
     | 
| 
       7207 
7212 
     | 
    
         | 
| 
       7208 
7213 
     | 
    
         
             
                     (?<pn> \( ( (?>[^()]+) | \g<pn> )* \) )
         
     | 
| 
       7209 
7214 
     | 
    
         
             
                     (sens|respons)e and \g'1'ibility
         
     | 
| 
       7210 
7215 
     | 
    
         | 
| 
       7211 
     | 
    
         
            -
                   PCRE 
     | 
| 
      
 7216 
     | 
    
         
            +
                   PCRE supports an extension to Oniguruma: if a number is preceded  by  a
         
     | 
| 
       7212 
7217 
     | 
    
         
             
                   plus or a minus sign it is taken as a relative reference. For example:
         
     | 
| 
       7213 
7218 
     | 
    
         | 
| 
       7214 
7219 
     | 
    
         
             
                     (abc)(?i:\g<-1>)
         
     | 
| 
       7215 
7220 
     | 
    
         | 
| 
       7216 
     | 
    
         
            -
                   Note 
     | 
| 
       7217 
     | 
    
         
            -
                   synonymous. 
     | 
| 
      
 7221 
     | 
    
         
            +
                   Note  that \g{...} (Perl syntax) and \g<...> (Oniguruma syntax) are not
         
     | 
| 
      
 7222 
     | 
    
         
            +
                   synonymous. The former is a back reference; the latter is a  subroutine
         
     | 
| 
       7218 
7223 
     | 
    
         
             
                   call.
         
     | 
| 
       7219 
7224 
     | 
    
         | 
| 
       7220 
7225 
     | 
    
         | 
| 
       7221 
7226 
     | 
    
         
             
            CALLOUTS
         
     | 
| 
       7222 
7227 
     | 
    
         | 
| 
       7223 
7228 
     | 
    
         
             
                   Perl has a feature whereby using the sequence (?{...}) causes arbitrary
         
     | 
| 
       7224 
     | 
    
         
            -
                   Perl 
     | 
| 
      
 7229 
     | 
    
         
            +
                   Perl code to be obeyed in the middle of matching a regular  expression.
         
     | 
| 
       7225 
7230 
     | 
    
         
             
                   This makes it possible, amongst other things, to extract different sub-
         
     | 
| 
       7226 
7231 
     | 
    
         
             
                   strings that match the same pair of parentheses when there is a repeti-
         
     | 
| 
       7227 
7232 
     | 
    
         
             
                   tion.
         
     | 
| 
       7228 
7233 
     | 
    
         | 
| 
       7229 
7234 
     | 
    
         
             
                   PCRE provides a similar feature, but of course it cannot obey arbitrary
         
     | 
| 
       7230 
7235 
     | 
    
         
             
                   Perl code. The feature is called "callout". The caller of PCRE provides
         
     | 
| 
       7231 
     | 
    
         
            -
                   an 
     | 
| 
       7232 
     | 
    
         
            -
                   pcre_callout 
     | 
| 
       7233 
     | 
    
         
            -
                   library). 
     | 
| 
      
 7236 
     | 
    
         
            +
                   an  external function by putting its entry point in the global variable
         
     | 
| 
      
 7237 
     | 
    
         
            +
                   pcre_callout (8-bit library) or pcre[16|32]_callout (16-bit  or  32-bit
         
     | 
| 
      
 7238 
     | 
    
         
            +
                   library).   By default, this variable contains NULL, which disables all
         
     | 
| 
       7234 
7239 
     | 
    
         
             
                   calling out.
         
     | 
| 
       7235 
7240 
     | 
    
         | 
| 
       7236 
     | 
    
         
            -
                   Within 
     | 
| 
       7237 
     | 
    
         
            -
                   external 
     | 
| 
       7238 
     | 
    
         
            -
                   callout 
     | 
| 
       7239 
     | 
    
         
            -
                   The 
     | 
| 
      
 7241 
     | 
    
         
            +
                   Within a regular expression, (?C) indicates the  points  at  which  the
         
     | 
| 
      
 7242 
     | 
    
         
            +
                   external  function  is  to be called. If you want to identify different
         
     | 
| 
      
 7243 
     | 
    
         
            +
                   callout points, you can put a number less than 256 after the letter  C.
         
     | 
| 
      
 7244 
     | 
    
         
            +
                   The  default  value is zero.  For example, this pattern has two callout
         
     | 
| 
       7240 
7245 
     | 
    
         
             
                   points:
         
     | 
| 
       7241 
7246 
     | 
    
         | 
| 
       7242 
7247 
     | 
    
         
             
                     (?C1)abc(?C2)def
         
     | 
| 
       7243 
7248 
     | 
    
         | 
| 
       7244 
     | 
    
         
            -
                   If 
     | 
| 
       7245 
     | 
    
         
            -
                   outs 
     | 
| 
       7246 
     | 
    
         
            -
                   are 
     | 
| 
      
 7249 
     | 
    
         
            +
                   If the PCRE_AUTO_CALLOUT flag is passed to a compiling function,  call-
         
     | 
| 
      
 7250 
     | 
    
         
            +
                   outs  are automatically installed before each item in the pattern. They
         
     | 
| 
      
 7251 
     | 
    
         
            +
                   are all numbered 255. If there is a conditional group  in  the  pattern
         
     | 
| 
       7247 
7252 
     | 
    
         
             
                   whose condition is an assertion, an additional callout is inserted just
         
     | 
| 
       7248 
7253 
     | 
    
         
             
                   before the condition. An explicit callout may also be set at this posi-
         
     | 
| 
       7249 
7254 
     | 
    
         
             
                   tion, as in this example:
         
     | 
| 
         @@ -7253,120 +7258,120 @@ CALLOUTS 
     | 
|
| 
       7253 
7258 
     | 
    
         
             
                   Note that this applies only to assertion conditions, not to other types
         
     | 
| 
       7254 
7259 
     | 
    
         
             
                   of condition.
         
     | 
| 
       7255 
7260 
     | 
    
         | 
| 
       7256 
     | 
    
         
            -
                   During 
     | 
| 
       7257 
     | 
    
         
            -
                   tion 
     | 
| 
       7258 
     | 
    
         
            -
                   position 
     | 
| 
       7259 
     | 
    
         
            -
                   supplied 
     | 
| 
      
 7261 
     | 
    
         
            +
                   During matching, when PCRE reaches a callout point, the external  func-
         
     | 
| 
      
 7262 
     | 
    
         
            +
                   tion  is  called.  It  is  provided with the number of the callout, the
         
     | 
| 
      
 7263 
     | 
    
         
            +
                   position in the pattern, and, optionally, one item of  data  originally
         
     | 
| 
      
 7264 
     | 
    
         
            +
                   supplied  by  the caller of the matching function. The callout function
         
     | 
| 
       7260 
7265 
     | 
    
         
             
                   may cause matching to proceed, to backtrack, or to fail altogether.
         
     | 
| 
       7261 
7266 
     | 
    
         | 
| 
       7262 
     | 
    
         
            -
                   By 
     | 
| 
       7263 
     | 
    
         
            -
                   and 
     | 
| 
       7264 
     | 
    
         
            -
                   skipped. 
     | 
| 
       7265 
     | 
    
         
            -
                   options 
     | 
| 
       7266 
     | 
    
         
            -
                   complete 
     | 
| 
      
 7267 
     | 
    
         
            +
                   By default, PCRE implements a number of optimizations at  compile  time
         
     | 
| 
      
 7268 
     | 
    
         
            +
                   and  matching  time, and one side-effect is that sometimes callouts are
         
     | 
| 
      
 7269 
     | 
    
         
            +
                   skipped. If you need all possible callouts to happen, you need  to  set
         
     | 
| 
      
 7270 
     | 
    
         
            +
                   options  that  disable  the relevant optimizations. More details, and a
         
     | 
| 
      
 7271 
     | 
    
         
            +
                   complete description of the interface  to  the  callout  function,  are
         
     | 
| 
       7267 
7272 
     | 
    
         
             
                   given in the pcrecallout documentation.
         
     | 
| 
       7268 
7273 
     | 
    
         | 
| 
       7269 
7274 
     | 
    
         | 
| 
       7270 
7275 
     | 
    
         
             
            BACKTRACKING CONTROL
         
     | 
| 
       7271 
7276 
     | 
    
         | 
| 
       7272 
     | 
    
         
            -
                   Perl 
     | 
| 
       7273 
     | 
    
         
            -
                   which 
     | 
| 
       7274 
     | 
    
         
            -
                   and 
     | 
| 
       7275 
     | 
    
         
            -
                   on 
     | 
| 
       7276 
     | 
    
         
            -
                   problems 
     | 
| 
      
 7277 
     | 
    
         
            +
                   Perl  5.10 introduced a number of "Special Backtracking Control Verbs",
         
     | 
| 
      
 7278 
     | 
    
         
            +
                   which are still described in the Perl  documentation  as  "experimental
         
     | 
| 
      
 7279 
     | 
    
         
            +
                   and  subject to change or removal in a future version of Perl". It goes
         
     | 
| 
      
 7280 
     | 
    
         
            +
                   on to say: "Their usage in production code should  be  noted  to  avoid
         
     | 
| 
      
 7281 
     | 
    
         
            +
                   problems  during upgrades." The same remarks apply to the PCRE features
         
     | 
| 
       7277 
7282 
     | 
    
         
             
                   described in this section.
         
     | 
| 
       7278 
7283 
     | 
    
         | 
| 
       7279 
     | 
    
         
            -
                   The 
     | 
| 
      
 7284 
     | 
    
         
            +
                   The new verbs make use of what was previously invalid syntax: an  open-
         
     | 
| 
       7280 
7285 
     | 
    
         
             
                   ing parenthesis followed by an asterisk. They are generally of the form
         
     | 
| 
       7281 
     | 
    
         
            -
                   (*VERB) 
     | 
| 
       7282 
     | 
    
         
            -
                   differently 
     | 
| 
      
 7286 
     | 
    
         
            +
                   (*VERB) or (*VERB:NAME). Some may take either form,  possibly  behaving
         
     | 
| 
      
 7287 
     | 
    
         
            +
                   differently  depending  on  whether or not a name is present. A name is
         
     | 
| 
       7283 
7288 
     | 
    
         
             
                   any sequence of characters that does not include a closing parenthesis.
         
     | 
| 
       7284 
7289 
     | 
    
         
             
                   The maximum length of name is 255 in the 8-bit library and 65535 in the
         
     | 
| 
       7285 
     | 
    
         
            -
                   16-bit 
     | 
| 
       7286 
     | 
    
         
            -
                   closing 
     | 
| 
       7287 
     | 
    
         
            -
                   the 
     | 
| 
      
 7290 
     | 
    
         
            +
                   16-bit and 32-bit libraries. If the name is  empty,  that  is,  if  the
         
     | 
| 
      
 7291 
     | 
    
         
            +
                   closing  parenthesis immediately follows the colon, the effect is as if
         
     | 
| 
      
 7292 
     | 
    
         
            +
                   the colon were not there.  Any number of these verbs  may  occur  in  a
         
     | 
| 
       7288 
7293 
     | 
    
         
             
                   pattern.
         
     | 
| 
       7289 
7294 
     | 
    
         | 
| 
       7290 
     | 
    
         
            -
                   Since 
     | 
| 
       7291 
     | 
    
         
            -
                   them 
     | 
| 
       7292 
     | 
    
         
            -
                   the 
     | 
| 
       7293 
     | 
    
         
            -
                   algorithm. 
     | 
| 
       7294 
     | 
    
         
            -
                   negative 
     | 
| 
      
 7295 
     | 
    
         
            +
                   Since  these  verbs  are  specifically related to backtracking, most of
         
     | 
| 
      
 7296 
     | 
    
         
            +
                   them can be used only when the pattern is to be matched  using  one  of
         
     | 
| 
      
 7297 
     | 
    
         
            +
                   the  traditional  matching  functions, because these use a backtracking
         
     | 
| 
      
 7298 
     | 
    
         
            +
                   algorithm. With the exception of (*FAIL), which behaves like a  failing
         
     | 
| 
      
 7299 
     | 
    
         
            +
                   negative  assertion,  the  backtracking control verbs cause an error if
         
     | 
| 
       7295 
7300 
     | 
    
         
             
                   encountered by a DFA matching function.
         
     | 
| 
       7296 
7301 
     | 
    
         | 
| 
       7297 
     | 
    
         
            -
                   The 
     | 
| 
      
 7302 
     | 
    
         
            +
                   The behaviour of these verbs in repeated  groups,  assertions,  and  in
         
     | 
| 
       7298 
7303 
     | 
    
         
             
                   subpatterns called as subroutines (whether or not recursively) is docu-
         
     | 
| 
       7299 
7304 
     | 
    
         
             
                   mented below.
         
     | 
| 
       7300 
7305 
     | 
    
         | 
| 
       7301 
7306 
     | 
    
         
             
               Optimizations that affect backtracking verbs
         
     | 
| 
       7302 
7307 
     | 
    
         | 
| 
       7303 
     | 
    
         
            -
                   PCRE 
     | 
| 
      
 7308 
     | 
    
         
            +
                   PCRE contains some optimizations that are used to speed up matching  by
         
     | 
| 
       7304 
7309 
     | 
    
         
             
                   running some checks at the start of each match attempt. For example, it
         
     | 
| 
       7305 
     | 
    
         
            -
                   may 
     | 
| 
      
 7310 
     | 
    
         
            +
                   may know the minimum length of matching subject, or that  a  particular
         
     | 
| 
       7306 
7311 
     | 
    
         
             
                   character must be present. When one of these optimizations bypasses the
         
     | 
| 
       7307 
     | 
    
         
            -
                   running 
     | 
| 
      
 7312 
     | 
    
         
            +
                   running of a match,  any  included  backtracking  verbs  will  not,  of
         
     | 
| 
       7308 
7313 
     | 
    
         
             
                   course, be processed. You can suppress the start-of-match optimizations
         
     | 
| 
       7309 
     | 
    
         
            -
                   by 
     | 
| 
      
 7314 
     | 
    
         
            +
                   by setting the PCRE_NO_START_OPTIMIZE  option  when  calling  pcre_com-
         
     | 
| 
       7310 
7315 
     | 
    
         
             
                   pile() or pcre_exec(), or by starting the pattern with (*NO_START_OPT).
         
     | 
| 
       7311 
7316 
     | 
    
         
             
                   There is more discussion of this option in the section entitled "Option
         
     | 
| 
       7312 
7317 
     | 
    
         
             
                   bits for pcre_exec()" in the pcreapi documentation.
         
     | 
| 
       7313 
7318 
     | 
    
         | 
| 
       7314 
     | 
    
         
            -
                   Experiments 
     | 
| 
      
 7319 
     | 
    
         
            +
                   Experiments  with  Perl  suggest that it too has similar optimizations,
         
     | 
| 
       7315 
7320 
     | 
    
         
             
                   sometimes leading to anomalous results.
         
     | 
| 
       7316 
7321 
     | 
    
         | 
| 
       7317 
7322 
     | 
    
         
             
               Verbs that act immediately
         
     | 
| 
       7318 
7323 
     | 
    
         | 
| 
       7319 
     | 
    
         
            -
                   The 
     | 
| 
      
 7324 
     | 
    
         
            +
                   The following verbs act as soon as they are encountered. They  may  not
         
     | 
| 
       7320 
7325 
     | 
    
         
             
                   be followed by a name.
         
     | 
| 
       7321 
7326 
     | 
    
         | 
| 
       7322 
7327 
     | 
    
         
             
                      (*ACCEPT)
         
     | 
| 
       7323 
7328 
     | 
    
         | 
| 
       7324 
     | 
    
         
            -
                   This 
     | 
| 
       7325 
     | 
    
         
            -
                   of 
     | 
| 
       7326 
     | 
    
         
            -
                   as 
     | 
| 
      
 7329 
     | 
    
         
            +
                   This  verb causes the match to end successfully, skipping the remainder
         
     | 
| 
      
 7330 
     | 
    
         
            +
                   of the pattern. However, when it is inside a subpattern that is  called
         
     | 
| 
      
 7331 
     | 
    
         
            +
                   as  a  subroutine, only that subpattern is ended successfully. Matching
         
     | 
| 
       7327 
7332 
     | 
    
         
             
                   then continues at the outer level. If (*ACCEPT) in triggered in a posi-
         
     | 
| 
       7328 
     | 
    
         
            -
                   tive 
     | 
| 
      
 7333 
     | 
    
         
            +
                   tive  assertion,  the  assertion succeeds; in a negative assertion, the
         
     | 
| 
       7329 
7334 
     | 
    
         
             
                   assertion fails.
         
     | 
| 
       7330 
7335 
     | 
    
         | 
| 
       7331 
     | 
    
         
            -
                   If 
     | 
| 
      
 7336 
     | 
    
         
            +
                   If (*ACCEPT) is inside capturing parentheses, the data so far  is  cap-
         
     | 
| 
       7332 
7337 
     | 
    
         
             
                   tured. For example:
         
     | 
| 
       7333 
7338 
     | 
    
         | 
| 
       7334 
7339 
     | 
    
         
             
                     A((?:A|B(*ACCEPT)|C)D)
         
     | 
| 
       7335 
7340 
     | 
    
         | 
| 
       7336 
     | 
    
         
            -
                   This 
     | 
| 
      
 7341 
     | 
    
         
            +
                   This  matches  "AB", "AAD", or "ACD"; when it matches "AB", "B" is cap-
         
     | 
| 
       7337 
7342 
     | 
    
         
             
                   tured by the outer parentheses.
         
     | 
| 
       7338 
7343 
     | 
    
         | 
| 
       7339 
7344 
     | 
    
         
             
                     (*FAIL) or (*F)
         
     | 
| 
       7340 
7345 
     | 
    
         | 
| 
       7341 
     | 
    
         
            -
                   This 
     | 
| 
       7342 
     | 
    
         
            -
                   is 
     | 
| 
       7343 
     | 
    
         
            -
                   that 
     | 
| 
       7344 
     | 
    
         
            -
                   Those 
     | 
| 
       7345 
     | 
    
         
            -
                   nearest 
     | 
| 
      
 7346 
     | 
    
         
            +
                   This verb causes a matching failure, forcing backtracking to occur.  It
         
     | 
| 
      
 7347 
     | 
    
         
            +
                   is  equivalent to (?!) but easier to read. The Perl documentation notes
         
     | 
| 
      
 7348 
     | 
    
         
            +
                   that it is probably useful only when combined  with  (?{})  or  (??{}).
         
     | 
| 
      
 7349 
     | 
    
         
            +
                   Those  are,  of course, Perl features that are not present in PCRE. The
         
     | 
| 
      
 7350 
     | 
    
         
            +
                   nearest equivalent is the callout feature, as for example in this  pat-
         
     | 
| 
       7346 
7351 
     | 
    
         
             
                   tern:
         
     | 
| 
       7347 
7352 
     | 
    
         | 
| 
       7348 
7353 
     | 
    
         
             
                     a+(?C)(*FAIL)
         
     | 
| 
       7349 
7354 
     | 
    
         | 
| 
       7350 
     | 
    
         
            -
                   A 
     | 
| 
      
 7355 
     | 
    
         
            +
                   A  match  with the string "aaaa" always fails, but the callout is taken
         
     | 
| 
       7351 
7356 
     | 
    
         
             
                   before each backtrack happens (in this example, 10 times).
         
     | 
| 
       7352 
7357 
     | 
    
         | 
| 
       7353 
7358 
     | 
    
         
             
               Recording which path was taken
         
     | 
| 
       7354 
7359 
     | 
    
         | 
| 
       7355 
     | 
    
         
            -
                   There 
     | 
| 
       7356 
     | 
    
         
            -
                   arrived 
     | 
| 
      
 7360 
     | 
    
         
            +
                   There is one verb whose main purpose  is  to  track  how  a  match  was
         
     | 
| 
      
 7361 
     | 
    
         
            +
                   arrived  at,  though  it  also  has a secondary use in conjunction with
         
     | 
| 
       7357 
7362 
     | 
    
         
             
                   advancing the match starting point (see (*SKIP) below).
         
     | 
| 
       7358 
7363 
     | 
    
         | 
| 
       7359 
7364 
     | 
    
         
             
                     (*MARK:NAME) or (*:NAME)
         
     | 
| 
       7360 
7365 
     | 
    
         | 
| 
       7361 
     | 
    
         
            -
                   A 
     | 
| 
       7362 
     | 
    
         
            -
                   instances 
     | 
| 
      
 7366 
     | 
    
         
            +
                   A name is always  required  with  this  verb.  There  may  be  as  many
         
     | 
| 
      
 7367 
     | 
    
         
            +
                   instances  of  (*MARK) as you like in a pattern, and their names do not
         
     | 
| 
       7363 
7368 
     | 
    
         
             
                   have to be unique.
         
     | 
| 
       7364 
7369 
     | 
    
         | 
| 
       7365 
     | 
    
         
            -
                   When 
     | 
| 
       7366 
     | 
    
         
            -
                   (*PRUNE:NAME), 
     | 
| 
       7367 
     | 
    
         
            -
                   the 
     | 
| 
       7368 
     | 
    
         
            -
                   pcre_exec()" 
     | 
| 
       7369 
     | 
    
         
            -
                   pcretest 
     | 
| 
      
 7370 
     | 
    
         
            +
                   When a match succeeds, the name of the  last-encountered  (*MARK:NAME),
         
     | 
| 
      
 7371 
     | 
    
         
            +
                   (*PRUNE:NAME),  or  (*THEN:NAME) on the matching path is passed back to
         
     | 
| 
      
 7372 
     | 
    
         
            +
                   the caller as  described  in  the  section  entitled  "Extra  data  for
         
     | 
| 
      
 7373 
     | 
    
         
            +
                   pcre_exec()"  in  the  pcreapi  documentation.  Here  is  an example of
         
     | 
| 
      
 7374 
     | 
    
         
            +
                   pcretest output, where the /K modifier requests the retrieval and  out-
         
     | 
| 
       7370 
7375 
     | 
    
         
             
                   putting of (*MARK) data:
         
     | 
| 
       7371 
7376 
     | 
    
         | 
| 
       7372 
7377 
     | 
    
         
             
                       re> /X(*MARK:A)Y|X(*MARK:B)Z/K
         
     | 
| 
         @@ -7378,73 +7383,73 @@ BACKTRACKING CONTROL 
     | 
|
| 
       7378 
7383 
     | 
    
         
             
                     MK: B
         
     | 
| 
       7379 
7384 
     | 
    
         | 
| 
       7380 
7385 
     | 
    
         
             
                   The (*MARK) name is tagged with "MK:" in this output, and in this exam-
         
     | 
| 
       7381 
     | 
    
         
            -
                   ple 
     | 
| 
       7382 
     | 
    
         
            -
                   efficient 
     | 
| 
      
 7386 
     | 
    
         
            +
                   ple it indicates which of the two alternatives matched. This is a  more
         
     | 
| 
      
 7387 
     | 
    
         
            +
                   efficient  way of obtaining this information than putting each alterna-
         
     | 
| 
       7383 
7388 
     | 
    
         
             
                   tive in its own capturing parentheses.
         
     | 
| 
       7384 
7389 
     | 
    
         | 
| 
       7385 
     | 
    
         
            -
                   If 
     | 
| 
       7386 
     | 
    
         
            -
                   true, 
     | 
| 
      
 7390 
     | 
    
         
            +
                   If a verb with a name is encountered in a positive  assertion  that  is
         
     | 
| 
      
 7391 
     | 
    
         
            +
                   true,  the  name  is recorded and passed back if it is the last-encoun-
         
     | 
| 
       7387 
7392 
     | 
    
         
             
                   tered. This does not happen for negative assertions or failing positive
         
     | 
| 
       7388 
7393 
     | 
    
         
             
                   assertions.
         
     | 
| 
       7389 
7394 
     | 
    
         | 
| 
       7390 
     | 
    
         
            -
                   After 
     | 
| 
      
 7395 
     | 
    
         
            +
                   After  a  partial match or a failed match, the last encountered name in
         
     | 
| 
       7391 
7396 
     | 
    
         
             
                   the entire match process is returned. For example:
         
     | 
| 
       7392 
7397 
     | 
    
         | 
| 
       7393 
7398 
     | 
    
         
             
                       re> /X(*MARK:A)Y|X(*MARK:B)Z/K
         
     | 
| 
       7394 
7399 
     | 
    
         
             
                     data> XP
         
     | 
| 
       7395 
7400 
     | 
    
         
             
                     No match, mark = B
         
     | 
| 
       7396 
7401 
     | 
    
         | 
| 
       7397 
     | 
    
         
            -
                   Note 
     | 
| 
      
 7402 
     | 
    
         
            +
                   Note that in this unanchored example the  mark  is  retained  from  the
         
     | 
| 
       7398 
7403 
     | 
    
         
             
                   match attempt that started at the letter "X" in the subject. Subsequent
         
     | 
| 
       7399 
7404 
     | 
    
         
             
                   match attempts starting at "P" and then with an empty string do not get
         
     | 
| 
       7400 
7405 
     | 
    
         
             
                   as far as the (*MARK) item, but nevertheless do not reset it.
         
     | 
| 
       7401 
7406 
     | 
    
         | 
| 
       7402 
     | 
    
         
            -
                   If 
     | 
| 
       7403 
     | 
    
         
            -
                   should 
     | 
| 
      
 7407 
     | 
    
         
            +
                   If  you  are  interested  in  (*MARK)  values after failed matches, you
         
     | 
| 
      
 7408 
     | 
    
         
            +
                   should probably set the PCRE_NO_START_OPTIMIZE option  (see  above)  to
         
     | 
| 
       7404 
7409 
     | 
    
         
             
                   ensure that the match is always attempted.
         
     | 
| 
       7405 
7410 
     | 
    
         | 
| 
       7406 
7411 
     | 
    
         
             
               Verbs that act after backtracking
         
     | 
| 
       7407 
7412 
     | 
    
         | 
| 
       7408 
7413 
     | 
    
         
             
                   The following verbs do nothing when they are encountered. Matching con-
         
     | 
| 
       7409 
     | 
    
         
            -
                   tinues 
     | 
| 
       7410 
     | 
    
         
            -
                   a 
     | 
| 
       7411 
     | 
    
         
            -
                   cannot 
     | 
| 
      
 7414 
     | 
    
         
            +
                   tinues with what follows, but if there is no subsequent match,  causing
         
     | 
| 
      
 7415 
     | 
    
         
            +
                   a  backtrack  to  the  verb, a failure is forced. That is, backtracking
         
     | 
| 
      
 7416 
     | 
    
         
            +
                   cannot pass to the left of the verb. However, when one of  these  verbs
         
     | 
| 
       7412 
7417 
     | 
    
         
             
                   appears inside an atomic group or an assertion that is true, its effect
         
     | 
| 
       7413 
     | 
    
         
            -
                   is 
     | 
| 
       7414 
     | 
    
         
            -
                   there 
     | 
| 
       7415 
     | 
    
         
            -
                   ing 
     | 
| 
       7416 
     | 
    
         
            -
                   tion. 
     | 
| 
      
 7418 
     | 
    
         
            +
                   is confined to that group, because once the  group  has  been  matched,
         
     | 
| 
      
 7419 
     | 
    
         
            +
                   there  is never any backtracking into it. In this situation, backtrack-
         
     | 
| 
      
 7420 
     | 
    
         
            +
                   ing can "jump back" to the left of the entire atomic  group  or  asser-
         
     | 
| 
      
 7421 
     | 
    
         
            +
                   tion.  (Remember  also,  as  stated  above, that this localization also
         
     | 
| 
       7417 
7422 
     | 
    
         
             
                   applies in subroutine calls.)
         
     | 
| 
       7418 
7423 
     | 
    
         | 
| 
       7419 
     | 
    
         
            -
                   These 
     | 
| 
       7420 
     | 
    
         
            -
                   tracking 
     | 
| 
       7421 
     | 
    
         
            -
                   when 
     | 
| 
      
 7424 
     | 
    
         
            +
                   These verbs differ in exactly what kind of failure  occurs  when  back-
         
     | 
| 
      
 7425 
     | 
    
         
            +
                   tracking  reaches  them.  The behaviour described below is what happens
         
     | 
| 
      
 7426 
     | 
    
         
            +
                   when the verb is not in a subroutine or an assertion.  Subsequent  sec-
         
     | 
| 
       7422 
7427 
     | 
    
         
             
                   tions cover these special cases.
         
     | 
| 
       7423 
7428 
     | 
    
         | 
| 
       7424 
7429 
     | 
    
         
             
                     (*COMMIT)
         
     | 
| 
       7425 
7430 
     | 
    
         | 
| 
       7426 
     | 
    
         
            -
                   This 
     | 
| 
      
 7431 
     | 
    
         
            +
                   This  verb, which may not be followed by a name, causes the whole match
         
     | 
| 
       7427 
7432 
     | 
    
         
             
                   to fail outright if there is a later matching failure that causes back-
         
     | 
| 
       7428 
     | 
    
         
            -
                   tracking 
     | 
| 
      
 7433 
     | 
    
         
            +
                   tracking  to  reach  it.  Even if the pattern is unanchored, no further
         
     | 
| 
       7429 
7434 
     | 
    
         
             
                   attempts to find a match by advancing the starting point take place. If
         
     | 
| 
       7430 
     | 
    
         
            -
                   (*COMMIT) 
     | 
| 
      
 7435 
     | 
    
         
            +
                   (*COMMIT)  is  the  only backtracking verb that is encountered, once it
         
     | 
| 
       7431 
7436 
     | 
    
         
             
                   has been passed pcre_exec() is committed to finding a match at the cur-
         
     | 
| 
       7432 
7437 
     | 
    
         
             
                   rent starting point, or not at all. For example:
         
     | 
| 
       7433 
7438 
     | 
    
         | 
| 
       7434 
7439 
     | 
    
         
             
                     a+(*COMMIT)b
         
     | 
| 
       7435 
7440 
     | 
    
         | 
| 
       7436 
     | 
    
         
            -
                   This 
     | 
| 
      
 7441 
     | 
    
         
            +
                   This  matches  "xxaab" but not "aacaab". It can be thought of as a kind
         
     | 
| 
       7437 
7442 
     | 
    
         
             
                   of dynamic anchor, or "I've started, so I must finish." The name of the
         
     | 
| 
       7438 
     | 
    
         
            -
                   most 
     | 
| 
      
 7443 
     | 
    
         
            +
                   most  recently passed (*MARK) in the path is passed back when (*COMMIT)
         
     | 
| 
       7439 
7444 
     | 
    
         
             
                   forces a match failure.
         
     | 
| 
       7440 
7445 
     | 
    
         | 
| 
       7441 
     | 
    
         
            -
                   If 
     | 
| 
       7442 
     | 
    
         
            -
                   one 
     | 
| 
      
 7446 
     | 
    
         
            +
                   If there is more than one backtracking verb in a pattern,  a  different
         
     | 
| 
      
 7447 
     | 
    
         
            +
                   one  that  follows  (*COMMIT) may be triggered first, so merely passing
         
     | 
| 
       7443 
7448 
     | 
    
         
             
                   (*COMMIT) during a match does not always guarantee that a match must be
         
     | 
| 
       7444 
7449 
     | 
    
         
             
                   at this starting point.
         
     | 
| 
       7445 
7450 
     | 
    
         | 
| 
       7446 
     | 
    
         
            -
                   Note 
     | 
| 
       7447 
     | 
    
         
            -
                   anchor, 
     | 
| 
      
 7451 
     | 
    
         
            +
                   Note  that  (*COMMIT)  at  the start of a pattern is not the same as an
         
     | 
| 
      
 7452 
     | 
    
         
            +
                   anchor, unless PCRE's start-of-match optimizations are turned  off,  as
         
     | 
| 
       7448 
7453 
     | 
    
         
             
                   shown in this output from pcretest:
         
     | 
| 
       7449 
7454 
     | 
    
         | 
| 
       7450 
7455 
     | 
    
         
             
                       re> /(*COMMIT)abc/
         
     | 
| 
         @@ -7455,207 +7460,207 @@ BACKTRACKING CONTROL 
     | 
|
| 
       7455 
7460 
     | 
    
         | 
| 
       7456 
7461 
     | 
    
         
             
                   For this pattern, PCRE knows that any match must start with "a", so the
         
     | 
| 
       7457 
7462 
     | 
    
         
             
                   optimization skips along the subject to "a" before applying the pattern
         
     | 
| 
       7458 
     | 
    
         
            -
                   to 
     | 
| 
       7459 
     | 
    
         
            -
                   ond 
     | 
| 
       7460 
     | 
    
         
            -
                   program. 
     | 
| 
      
 7463 
     | 
    
         
            +
                   to  the first set of data. The match attempt then succeeds. In the sec-
         
     | 
| 
      
 7464 
     | 
    
         
            +
                   ond set of data, the escape sequence \Y is interpreted by the  pcretest
         
     | 
| 
      
 7465 
     | 
    
         
            +
                   program.  It  causes  the  PCRE_NO_START_OPTIMIZE option to be set when
         
     | 
| 
       7461 
7466 
     | 
    
         
             
                   pcre_exec() is called.  This disables the optimization that skips along
         
     | 
| 
       7462 
7467 
     | 
    
         
             
                   to the first character. The pattern is now applied starting at "x", and
         
     | 
| 
       7463 
     | 
    
         
            -
                   so 
     | 
| 
      
 7468 
     | 
    
         
            +
                   so the (*COMMIT) causes the match to  fail  without  trying  any  other
         
     | 
| 
       7464 
7469 
     | 
    
         
             
                   starting points.
         
     | 
| 
       7465 
7470 
     | 
    
         | 
| 
       7466 
7471 
     | 
    
         
             
                     (*PRUNE) or (*PRUNE:NAME)
         
     | 
| 
       7467 
7472 
     | 
    
         | 
| 
       7468 
     | 
    
         
            -
                   This 
     | 
| 
      
 7473 
     | 
    
         
            +
                   This  verb causes the match to fail at the current starting position in
         
     | 
| 
       7469 
7474 
     | 
    
         
             
                   the subject if there is a later matching failure that causes backtrack-
         
     | 
| 
       7470 
     | 
    
         
            -
                   ing 
     | 
| 
       7471 
     | 
    
         
            -
                   advance 
     | 
| 
       7472 
     | 
    
         
            -
                   occur 
     | 
| 
       7473 
     | 
    
         
            -
                   matching 
     | 
| 
       7474 
     | 
    
         
            -
                   right, 
     | 
| 
       7475 
     | 
    
         
            -
                   (*PRUNE) 
     | 
| 
      
 7475 
     | 
    
         
            +
                   ing  to  reach it. If the pattern is unanchored, the normal "bumpalong"
         
     | 
| 
      
 7476 
     | 
    
         
            +
                   advance to the next starting character then happens.  Backtracking  can
         
     | 
| 
      
 7477 
     | 
    
         
            +
                   occur  as  usual to the left of (*PRUNE), before it is reached, or when
         
     | 
| 
      
 7478 
     | 
    
         
            +
                   matching to the right of (*PRUNE), but if there  is  no  match  to  the
         
     | 
| 
      
 7479 
     | 
    
         
            +
                   right,  backtracking cannot cross (*PRUNE). In simple cases, the use of
         
     | 
| 
      
 7480 
     | 
    
         
            +
                   (*PRUNE) is just an alternative to an atomic group or possessive  quan-
         
     | 
| 
       7476 
7481 
     | 
    
         
             
                   tifier, but there are some uses of (*PRUNE) that cannot be expressed in
         
     | 
| 
       7477 
     | 
    
         
            -
                   any 
     | 
| 
      
 7482 
     | 
    
         
            +
                   any other way. In an anchored pattern (*PRUNE) has the same  effect  as
         
     | 
| 
       7478 
7483 
     | 
    
         
             
                   (*COMMIT).
         
     | 
| 
       7479 
7484 
     | 
    
         | 
| 
       7480 
7485 
     | 
    
         
             
                   The   behaviour   of   (*PRUNE:NAME)   is   the   not   the   same   as
         
     | 
| 
       7481 
     | 
    
         
            -
                   (*MARK:NAME)(*PRUNE). 
     | 
| 
       7482 
     | 
    
         
            -
                   remembered 
     | 
| 
      
 7486 
     | 
    
         
            +
                   (*MARK:NAME)(*PRUNE).  It is like (*MARK:NAME)  in  that  the  name  is
         
     | 
| 
      
 7487 
     | 
    
         
            +
                   remembered  for  passing  back  to  the  caller.  However, (*SKIP:NAME)
         
     | 
| 
       7483 
7488 
     | 
    
         
             
                   searches only for names set with (*MARK).
         
     | 
| 
       7484 
7489 
     | 
    
         | 
| 
       7485 
7490 
     | 
    
         
             
                     (*SKIP)
         
     | 
| 
       7486 
7491 
     | 
    
         | 
| 
       7487 
     | 
    
         
            -
                   This 
     | 
| 
       7488 
     | 
    
         
            -
                   the 
     | 
| 
      
 7492 
     | 
    
         
            +
                   This verb, when given without a name, is like (*PRUNE), except that  if
         
     | 
| 
      
 7493 
     | 
    
         
            +
                   the  pattern  is unanchored, the "bumpalong" advance is not to the next
         
     | 
| 
       7489 
7494 
     | 
    
         
             
                   character, but to the position in the subject where (*SKIP) was encoun-
         
     | 
| 
       7490 
     | 
    
         
            -
                   tered. 
     | 
| 
      
 7495 
     | 
    
         
            +
                   tered.  (*SKIP)  signifies that whatever text was matched leading up to
         
     | 
| 
       7491 
7496 
     | 
    
         
             
                   it cannot be part of a successful match. Consider:
         
     | 
| 
       7492 
7497 
     | 
    
         | 
| 
       7493 
7498 
     | 
    
         
             
                     a+(*SKIP)b
         
     | 
| 
       7494 
7499 
     | 
    
         | 
| 
       7495 
     | 
    
         
            -
                   If 
     | 
| 
       7496 
     | 
    
         
            -
                   (starting 
     | 
| 
      
 7500 
     | 
    
         
            +
                   If the subject is "aaaac...",  after  the  first  match  attempt  fails
         
     | 
| 
      
 7501 
     | 
    
         
            +
                   (starting  at  the  first  character in the string), the starting point
         
     | 
| 
       7497 
7502 
     | 
    
         
             
                   skips on to start the next attempt at "c". Note that a possessive quan-
         
     | 
| 
       7498 
     | 
    
         
            -
                   tifer 
     | 
| 
       7499 
     | 
    
         
            -
                   suppress 
     | 
| 
       7500 
     | 
    
         
            -
                   attempt 
     | 
| 
      
 7503 
     | 
    
         
            +
                   tifer  does not have the same effect as this example; although it would
         
     | 
| 
      
 7504 
     | 
    
         
            +
                   suppress backtracking  during  the  first  match  attempt,  the  second
         
     | 
| 
      
 7505 
     | 
    
         
            +
                   attempt  would  start at the second character instead of skipping on to
         
     | 
| 
       7501 
7506 
     | 
    
         
             
                   "c".
         
     | 
| 
       7502 
7507 
     | 
    
         | 
| 
       7503 
7508 
     | 
    
         
             
                     (*SKIP:NAME)
         
     | 
| 
       7504 
7509 
     | 
    
         | 
| 
       7505 
7510 
     | 
    
         
             
                   When (*SKIP) has an associated name, its behaviour is modified. When it
         
     | 
| 
       7506 
7511 
     | 
    
         
             
                   is triggered, the previous path through the pattern is searched for the
         
     | 
| 
       7507 
     | 
    
         
            -
                   most 
     | 
| 
      
 7512 
     | 
    
         
            +
                   most recent (*MARK) that has the  same  name.  If  one  is  found,  the
         
     | 
| 
       7508 
7513 
     | 
    
         
             
                   "bumpalong" advance is to the subject position that corresponds to that
         
     | 
| 
       7509 
7514 
     | 
    
         
             
                   (*MARK) instead of to where (*SKIP) was encountered. If no (*MARK) with
         
     | 
| 
       7510 
7515 
     | 
    
         
             
                   a matching name is found, the (*SKIP) is ignored.
         
     | 
| 
       7511 
7516 
     | 
    
         | 
| 
       7512 
     | 
    
         
            -
                   Note 
     | 
| 
      
 7517 
     | 
    
         
            +
                   Note  that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It
         
     | 
| 
       7513 
7518 
     | 
    
         
             
                   ignores names that are set by (*PRUNE:NAME) or (*THEN:NAME).
         
     | 
| 
       7514 
7519 
     | 
    
         | 
| 
       7515 
7520 
     | 
    
         
             
                     (*THEN) or (*THEN:NAME)
         
     | 
| 
       7516 
7521 
     | 
    
         | 
| 
       7517 
     | 
    
         
            -
                   This 
     | 
| 
       7518 
     | 
    
         
            -
                   tracking 
     | 
| 
       7519 
     | 
    
         
            -
                   within 
     | 
| 
      
 7522 
     | 
    
         
            +
                   This verb causes a skip to the next innermost  alternative  when  back-
         
     | 
| 
      
 7523 
     | 
    
         
            +
                   tracking  reaches  it.  That  is,  it  cancels any further backtracking
         
     | 
| 
      
 7524 
     | 
    
         
            +
                   within the current alternative. Its name  comes  from  the  observation
         
     | 
| 
       7520 
7525 
     | 
    
         
             
                   that it can be used for a pattern-based if-then-else block:
         
     | 
| 
       7521 
7526 
     | 
    
         | 
| 
       7522 
7527 
     | 
    
         
             
                     ( COND1 (*THEN) FOO | COND2 (*THEN) BAR | COND3 (*THEN) BAZ ) ...
         
     | 
| 
       7523 
7528 
     | 
    
         | 
| 
       7524 
     | 
    
         
            -
                   If 
     | 
| 
       7525 
     | 
    
         
            -
                   after 
     | 
| 
       7526 
     | 
    
         
            -
                   skips 
     | 
| 
       7527 
     | 
    
         
            -
                   into 
     | 
| 
       7528 
     | 
    
         
            -
                   quently 
     | 
| 
       7529 
     | 
    
         
            -
                   track 
     | 
| 
      
 7529 
     | 
    
         
            +
                   If  the COND1 pattern matches, FOO is tried (and possibly further items
         
     | 
| 
      
 7530 
     | 
    
         
            +
                   after the end of the group if FOO succeeds); on  failure,  the  matcher
         
     | 
| 
      
 7531 
     | 
    
         
            +
                   skips  to  the second alternative and tries COND2, without backtracking
         
     | 
| 
      
 7532 
     | 
    
         
            +
                   into COND1. If that succeeds and BAR fails, COND3 is tried.  If  subse-
         
     | 
| 
      
 7533 
     | 
    
         
            +
                   quently  BAZ fails, there are no more alternatives, so there is a back-
         
     | 
| 
      
 7534 
     | 
    
         
            +
                   track to whatever came before the  entire  group.  If  (*THEN)  is  not
         
     | 
| 
       7530 
7535 
     | 
    
         
             
                   inside an alternation, it acts like (*PRUNE).
         
     | 
| 
       7531 
7536 
     | 
    
         | 
| 
       7532 
     | 
    
         
            -
                   The 
     | 
| 
       7533 
     | 
    
         
            -
                   (*MARK:NAME)(*THEN). 
     | 
| 
       7534 
     | 
    
         
            -
                   remembered 
     | 
| 
      
 7537 
     | 
    
         
            +
                   The    behaviour   of   (*THEN:NAME)   is   the   not   the   same   as
         
     | 
| 
      
 7538 
     | 
    
         
            +
                   (*MARK:NAME)(*THEN).  It is like  (*MARK:NAME)  in  that  the  name  is
         
     | 
| 
      
 7539 
     | 
    
         
            +
                   remembered  for  passing  back  to  the  caller.  However, (*SKIP:NAME)
         
     | 
| 
       7535 
7540 
     | 
    
         
             
                   searches only for names set with (*MARK).
         
     | 
| 
       7536 
7541 
     | 
    
         | 
| 
       7537 
     | 
    
         
            -
                   A 
     | 
| 
       7538 
     | 
    
         
            -
                   enclosing 
     | 
| 
       7539 
     | 
    
         
            -
                   alternative. 
     | 
| 
       7540 
     | 
    
         
            -
                   the 
     | 
| 
       7541 
     | 
    
         
            -
                   complex 
     | 
| 
      
 7542 
     | 
    
         
            +
                   A subpattern that does not contain a | character is just a part of  the
         
     | 
| 
      
 7543 
     | 
    
         
            +
                   enclosing  alternative;  it  is  not a nested alternation with only one
         
     | 
| 
      
 7544 
     | 
    
         
            +
                   alternative. The effect of (*THEN) extends beyond such a subpattern  to
         
     | 
| 
      
 7545 
     | 
    
         
            +
                   the  enclosing alternative. Consider this pattern, where A, B, etc. are
         
     | 
| 
      
 7546 
     | 
    
         
            +
                   complex pattern fragments that do not contain any | characters at  this
         
     | 
| 
       7542 
7547 
     | 
    
         
             
                   level:
         
     | 
| 
       7543 
7548 
     | 
    
         | 
| 
       7544 
7549 
     | 
    
         
             
                     A (B(*THEN)C) | D
         
     | 
| 
       7545 
7550 
     | 
    
         | 
| 
       7546 
     | 
    
         
            -
                   If 
     | 
| 
      
 7551 
     | 
    
         
            +
                   If  A and B are matched, but there is a failure in C, matching does not
         
     | 
| 
       7547 
7552 
     | 
    
         
             
                   backtrack into A; instead it moves to the next alternative, that is, D.
         
     | 
| 
       7548 
     | 
    
         
            -
                   However, 
     | 
| 
      
 7553 
     | 
    
         
            +
                   However,  if the subpattern containing (*THEN) is given an alternative,
         
     | 
| 
       7549 
7554 
     | 
    
         
             
                   it behaves differently:
         
     | 
| 
       7550 
7555 
     | 
    
         | 
| 
       7551 
7556 
     | 
    
         
             
                     A (B(*THEN)C | (*FAIL)) | D
         
     | 
| 
       7552 
7557 
     | 
    
         | 
| 
       7553 
     | 
    
         
            -
                   The 
     | 
| 
      
 7558 
     | 
    
         
            +
                   The effect of (*THEN) is now confined to the inner subpattern. After  a
         
     | 
| 
       7554 
7559 
     | 
    
         
             
                   failure in C, matching moves to (*FAIL), which causes the whole subpat-
         
     | 
| 
       7555 
     | 
    
         
            -
                   tern 
     | 
| 
      
 7560 
     | 
    
         
            +
                   tern to fail because there are no more alternatives  to  try.  In  this
         
     | 
| 
       7556 
7561 
     | 
    
         
             
                   case, matching does now backtrack into A.
         
     | 
| 
       7557 
7562 
     | 
    
         | 
| 
       7558 
     | 
    
         
            -
                   Note 
     | 
| 
       7559 
     | 
    
         
            -
                   alternatives, 
     | 
| 
      
 7563 
     | 
    
         
            +
                   Note  that  a  conditional  subpattern  is not considered as having two
         
     | 
| 
      
 7564 
     | 
    
         
            +
                   alternatives, because only one is ever used.  In  other  words,  the  |
         
     | 
| 
       7560 
7565 
     | 
    
         
             
                   character in a conditional subpattern has a different meaning. Ignoring
         
     | 
| 
       7561 
7566 
     | 
    
         
             
                   white space, consider:
         
     | 
| 
       7562 
7567 
     | 
    
         | 
| 
       7563 
7568 
     | 
    
         
             
                     ^.*? (?(?=a) a | b(*THEN)c )
         
     | 
| 
       7564 
7569 
     | 
    
         | 
| 
       7565 
     | 
    
         
            -
                   If 
     | 
| 
       7566 
     | 
    
         
            -
                   ungreedy, 
     | 
| 
       7567 
     | 
    
         
            -
                   then 
     | 
| 
       7568 
     | 
    
         
            -
                   point, 
     | 
| 
       7569 
     | 
    
         
            -
                   from 
     | 
| 
      
 7570 
     | 
    
         
            +
                   If the subject is "ba", this pattern does not  match.  Because  .*?  is
         
     | 
| 
      
 7571 
     | 
    
         
            +
                   ungreedy,  it  initially  matches  zero characters. The condition (?=a)
         
     | 
| 
      
 7572 
     | 
    
         
            +
                   then fails, the character "b" is matched,  but  "c"  is  not.  At  this
         
     | 
| 
      
 7573 
     | 
    
         
            +
                   point,  matching does not backtrack to .*? as might perhaps be expected
         
     | 
| 
      
 7574 
     | 
    
         
            +
                   from the presence of the | character.  The  conditional  subpattern  is
         
     | 
| 
       7570 
7575 
     | 
    
         
             
                   part of the single alternative that comprises the whole pattern, and so
         
     | 
| 
       7571 
     | 
    
         
            -
                   the 
     | 
| 
      
 7576 
     | 
    
         
            +
                   the match fails. (If there was a backtrack into  .*?,  allowing  it  to
         
     | 
| 
       7572 
7577 
     | 
    
         
             
                   match "b", the match would succeed.)
         
     | 
| 
       7573 
7578 
     | 
    
         | 
| 
       7574 
     | 
    
         
            -
                   The 
     | 
| 
      
 7579 
     | 
    
         
            +
                   The  verbs just described provide four different "strengths" of control
         
     | 
| 
       7575 
7580 
     | 
    
         
             
                   when subsequent matching fails. (*THEN) is the weakest, carrying on the
         
     | 
| 
       7576 
     | 
    
         
            -
                   match 
     | 
| 
       7577 
     | 
    
         
            -
                   at 
     | 
| 
       7578 
     | 
    
         
            -
                   character 
     | 
| 
      
 7581 
     | 
    
         
            +
                   match  at  the next alternative. (*PRUNE) comes next, failing the match
         
     | 
| 
      
 7582 
     | 
    
         
            +
                   at the current starting position, but allowing an advance to  the  next
         
     | 
| 
      
 7583 
     | 
    
         
            +
                   character  (for an unanchored pattern). (*SKIP) is similar, except that
         
     | 
| 
       7579 
7584 
     | 
    
         
             
                   the advance may be more than one character. (*COMMIT) is the strongest,
         
     | 
| 
       7580 
7585 
     | 
    
         
             
                   causing the entire match to fail.
         
     | 
| 
       7581 
7586 
     | 
    
         | 
| 
       7582 
7587 
     | 
    
         
             
               More than one backtracking verb
         
     | 
| 
       7583 
7588 
     | 
    
         | 
| 
       7584 
     | 
    
         
            -
                   If 
     | 
| 
       7585 
     | 
    
         
            -
                   that 
     | 
| 
      
 7589 
     | 
    
         
            +
                   If  more  than  one  backtracking verb is present in a pattern, the one
         
     | 
| 
      
 7590 
     | 
    
         
            +
                   that is backtracked onto first acts. For example,  consider  this  pat-
         
     | 
| 
       7586 
7591 
     | 
    
         
             
                   tern, where A, B, etc. are complex pattern fragments:
         
     | 
| 
       7587 
7592 
     | 
    
         | 
| 
       7588 
7593 
     | 
    
         
             
                     (A(*COMMIT)B(*THEN)C|ABD)
         
     | 
| 
       7589 
7594 
     | 
    
         | 
| 
       7590 
     | 
    
         
            -
                   If 
     | 
| 
      
 7595 
     | 
    
         
            +
                   If  A matches but B fails, the backtrack to (*COMMIT) causes the entire
         
     | 
| 
       7591 
7596 
     | 
    
         
             
                   match to fail. However, if A and B match, but C fails, the backtrack to
         
     | 
| 
       7592 
     | 
    
         
            -
                   (*THEN) 
     | 
| 
       7593 
     | 
    
         
            -
                   is 
     | 
| 
       7594 
     | 
    
         
            -
                   two 
     | 
| 
      
 7597 
     | 
    
         
            +
                   (*THEN)  causes  the next alternative (ABD) to be tried. This behaviour
         
     | 
| 
      
 7598 
     | 
    
         
            +
                   is consistent, but is not always the same as Perl's. It means  that  if
         
     | 
| 
      
 7599 
     | 
    
         
            +
                   two  or  more backtracking verbs appear in succession, all the the last
         
     | 
| 
       7595 
7600 
     | 
    
         
             
                   of them has no effect. Consider this example:
         
     | 
| 
       7596 
7601 
     | 
    
         | 
| 
       7597 
7602 
     | 
    
         
             
                     ...(*COMMIT)(*PRUNE)...
         
     | 
| 
       7598 
7603 
     | 
    
         | 
| 
       7599 
7604 
     | 
    
         
             
                   If there is a matching failure to the right, backtracking onto (*PRUNE)
         
     | 
| 
       7600 
     | 
    
         
            -
                   causes 
     | 
| 
      
 7605 
     | 
    
         
            +
                   causes  it to be triggered, and its action is taken. There can never be
         
     | 
| 
       7601 
7606 
     | 
    
         
             
                   a backtrack onto (*COMMIT).
         
     | 
| 
       7602 
7607 
     | 
    
         | 
| 
       7603 
7608 
     | 
    
         
             
               Backtracking verbs in repeated groups
         
     | 
| 
       7604 
7609 
     | 
    
         | 
| 
       7605 
     | 
    
         
            -
                   PCRE 
     | 
| 
      
 7610 
     | 
    
         
            +
                   PCRE differs from  Perl  in  its  handling  of  backtracking  verbs  in
         
     | 
| 
       7606 
7611 
     | 
    
         
             
                   repeated groups. For example, consider:
         
     | 
| 
       7607 
7612 
     | 
    
         | 
| 
       7608 
7613 
     | 
    
         
             
                     /(a(*COMMIT)b)+ac/
         
     | 
| 
       7609 
7614 
     | 
    
         | 
| 
       7610 
     | 
    
         
            -
                   If 
     | 
| 
      
 7615 
     | 
    
         
            +
                   If  the  subject  is  "abac",  Perl matches, but PCRE fails because the
         
     | 
| 
       7611 
7616 
     | 
    
         
             
                   (*COMMIT) in the second repeat of the group acts.
         
     | 
| 
       7612 
7617 
     | 
    
         | 
| 
       7613 
7618 
     | 
    
         
             
               Backtracking verbs in assertions
         
     | 
| 
       7614 
7619 
     | 
    
         | 
| 
       7615 
     | 
    
         
            -
                   (*FAIL) 
     | 
| 
      
 7620 
     | 
    
         
            +
                   (*FAIL) in an assertion has its normal effect: it forces  an  immediate
         
     | 
| 
       7616 
7621 
     | 
    
         
             
                   backtrack.
         
     | 
| 
       7617 
7622 
     | 
    
         | 
| 
       7618 
7623 
     | 
    
         
             
                   (*ACCEPT) in a positive assertion causes the assertion to succeed with-
         
     | 
| 
       7619 
     | 
    
         
            -
                   out 
     | 
| 
      
 7624 
     | 
    
         
            +
                   out any further processing. In a negative assertion,  (*ACCEPT)  causes
         
     | 
| 
       7620 
7625 
     | 
    
         
             
                   the assertion to fail without any further processing.
         
     | 
| 
       7621 
7626 
     | 
    
         | 
| 
       7622 
     | 
    
         
            -
                   The 
     | 
| 
       7623 
     | 
    
         
            -
                   in 
     | 
| 
       7624 
     | 
    
         
            -
                   alternative 
     | 
| 
      
 7627 
     | 
    
         
            +
                   The  other  backtracking verbs are not treated specially if they appear
         
     | 
| 
      
 7628 
     | 
    
         
            +
                   in a positive assertion. In  particular,  (*THEN)  skips  to  the  next
         
     | 
| 
      
 7629 
     | 
    
         
            +
                   alternative  in  the  innermost  enclosing group that has alternations,
         
     | 
| 
       7625 
7630 
     | 
    
         
             
                   whether or not this is within the assertion.
         
     | 
| 
       7626 
7631 
     | 
    
         | 
| 
       7627 
     | 
    
         
            -
                   Negative 
     | 
| 
       7628 
     | 
    
         
            -
                   changing 
     | 
| 
      
 7632 
     | 
    
         
            +
                   Negative assertions are, however, different, in order  to  ensure  that
         
     | 
| 
      
 7633 
     | 
    
         
            +
                   changing  a  positive  assertion  into a negative assertion changes its
         
     | 
| 
       7629 
7634 
     | 
    
         
             
                   result. Backtracking into (*COMMIT), (*SKIP), or (*PRUNE) causes a neg-
         
     | 
| 
       7630 
7635 
     | 
    
         
             
                   ative assertion to be true, without considering any further alternative
         
     | 
| 
       7631 
7636 
     | 
    
         
             
                   branches in the assertion.  Backtracking into (*THEN) causes it to skip
         
     | 
| 
       7632 
     | 
    
         
            -
                   to 
     | 
| 
       7633 
     | 
    
         
            -
                   haviour), 
     | 
| 
      
 7637 
     | 
    
         
            +
                   to  the next enclosing alternative within the assertion (the normal be-
         
     | 
| 
      
 7638 
     | 
    
         
            +
                   haviour), but if the assertion  does  not  have  such  an  alternative,
         
     | 
| 
       7634 
7639 
     | 
    
         
             
                   (*THEN) behaves like (*PRUNE).
         
     | 
| 
       7635 
7640 
     | 
    
         | 
| 
       7636 
7641 
     | 
    
         
             
               Backtracking verbs in subroutines
         
     | 
| 
       7637 
7642 
     | 
    
         | 
| 
       7638 
     | 
    
         
            -
                   These 
     | 
| 
      
 7643 
     | 
    
         
            +
                   These  behaviours  occur whether or not the subpattern is called recur-
         
     | 
| 
       7639 
7644 
     | 
    
         
             
                   sively.  Perl's treatment of subroutines is different in some cases.
         
     | 
| 
       7640 
7645 
     | 
    
         | 
| 
       7641 
     | 
    
         
            -
                   (*FAIL) 
     | 
| 
      
 7646 
     | 
    
         
            +
                   (*FAIL) in a subpattern called as a subroutine has its  normal  effect:
         
     | 
| 
       7642 
7647 
     | 
    
         
             
                   it forces an immediate backtrack.
         
     | 
| 
       7643 
7648 
     | 
    
         | 
| 
       7644 
     | 
    
         
            -
                   (*ACCEPT) 
     | 
| 
       7645 
     | 
    
         
            -
                   match 
     | 
| 
      
 7649 
     | 
    
         
            +
                   (*ACCEPT)  in a subpattern called as a subroutine causes the subroutine
         
     | 
| 
      
 7650 
     | 
    
         
            +
                   match to succeed without any further processing. Matching then  contin-
         
     | 
| 
       7646 
7651 
     | 
    
         
             
                   ues after the subroutine call.
         
     | 
| 
       7647 
7652 
     | 
    
         | 
| 
       7648 
7653 
     | 
    
         
             
                   (*COMMIT), (*SKIP), and (*PRUNE) in a subpattern called as a subroutine
         
     | 
| 
       7649 
7654 
     | 
    
         
             
                   cause the subroutine match to fail.
         
     | 
| 
       7650 
7655 
     | 
    
         | 
| 
       7651 
     | 
    
         
            -
                   (*THEN) 
     | 
| 
       7652 
     | 
    
         
            -
                   within 
     | 
| 
      
 7656 
     | 
    
         
            +
                   (*THEN) skips to the next alternative in the innermost enclosing  group
         
     | 
| 
      
 7657 
     | 
    
         
            +
                   within  the subpattern that has alternatives. If there is no such group
         
     | 
| 
       7653 
7658 
     | 
    
         
             
                   within the subpattern, (*THEN) causes the subroutine match to fail.
         
     | 
| 
       7654 
7659 
     | 
    
         | 
| 
       7655 
7660 
     | 
    
         | 
| 
       7656 
7661 
     | 
    
         
             
            SEE ALSO
         
     | 
| 
       7657 
7662 
     | 
    
         | 
| 
       7658 
     | 
    
         
            -
                   pcreapi(3), 
     | 
| 
      
 7663 
     | 
    
         
            +
                   pcreapi(3), pcrecallout(3),  pcrematching(3),  pcresyntax(3),  pcre(3),
         
     | 
| 
       7659 
7664 
     | 
    
         
             
                   pcre16(3), pcre32(3).
         
     | 
| 
       7660 
7665 
     | 
    
         | 
| 
       7661 
7666 
     | 
    
         | 
| 
         @@ -7668,8 +7673,8 @@ AUTHOR 
     | 
|
| 
       7668 
7673 
     | 
    
         | 
| 
       7669 
7674 
     | 
    
         
             
            REVISION
         
     | 
| 
       7670 
7675 
     | 
    
         | 
| 
       7671 
     | 
    
         
            -
                   Last updated:  
     | 
| 
       7672 
     | 
    
         
            -
                   Copyright (c) 1997- 
     | 
| 
      
 7676 
     | 
    
         
            +
                   Last updated: 23 October 2016
         
     | 
| 
      
 7677 
     | 
    
         
            +
                   Copyright (c) 1997-2016 University of Cambridge.
         
     | 
| 
       7673 
7678 
     | 
    
         
             
            ------------------------------------------------------------------------------
         
     | 
| 
       7674 
7679 
     | 
    
         | 
| 
       7675 
7680 
     | 
    
         | 
| 
         @@ -8360,7 +8365,11 @@ AVAILABILITY OF JIT SUPPORT 
     | 
|
| 
       8360 
8365 
     | 
    
         
             
                   If  your program may sometimes be linked with versions of PCRE that are
         
     | 
| 
       8361 
8366 
     | 
    
         
             
                   older than 8.20, but you want to use JIT when it is available, you  can
         
     | 
| 
       8362 
8367 
     | 
    
         
             
                   test the values of PCRE_MAJOR and PCRE_MINOR, or the existence of a JIT
         
     | 
| 
       8363 
     | 
    
         
            -
                   macro such as PCRE_CONFIG_JIT, for compile-time control of 
     | 
| 
      
 8368 
     | 
    
         
            +
                   macro such as PCRE_CONFIG_JIT, for compile-time control of  your  code.
         
     | 
| 
      
 8369 
     | 
    
         
            +
                   Also  beware that the pcre_jit_exec() function was not available at all
         
     | 
| 
      
 8370 
     | 
    
         
            +
                   before 8.32, and may not be available at all  if  PCRE  isn't  compiled
         
     | 
| 
      
 8371 
     | 
    
         
            +
                   with  --enable-jit.  See  the  "JIT  FAST  PATH  API" section below for
         
     | 
| 
      
 8372 
     | 
    
         
            +
                   details.
         
     | 
| 
       8364 
8373 
     | 
    
         | 
| 
       8365 
8374 
     | 
    
         | 
| 
       8366 
8375 
     | 
    
         
             
            SIMPLE USE OF JIT
         
     | 
| 
         @@ -8402,6 +8411,18 @@ SIMPLE USE OF JIT 
     | 
|
| 
       8402 
8411 
     | 
    
         
             
                     PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
         
     | 
| 
       8403 
8412 
     | 
    
         
             
                     PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
         
     | 
| 
       8404 
8413 
     | 
    
         | 
| 
      
 8414 
     | 
    
         
            +
                   If using pcre_jit_exec() and supporting a pre-8.32 version of PCRE, you
         
     | 
| 
      
 8415 
     | 
    
         
            +
                   can insert:
         
     | 
| 
      
 8416 
     | 
    
         
            +
             
     | 
| 
      
 8417 
     | 
    
         
            +
                      #if PCRE_MAJOR >= 8 && PCRE_MINOR >= 32
         
     | 
| 
      
 8418 
     | 
    
         
            +
                      pcre_jit_exec(...);
         
     | 
| 
      
 8419 
     | 
    
         
            +
                      #else
         
     | 
| 
      
 8420 
     | 
    
         
            +
                      pcre_exec(...)
         
     | 
| 
      
 8421 
     | 
    
         
            +
                      #endif
         
     | 
| 
      
 8422 
     | 
    
         
            +
             
     | 
| 
      
 8423 
     | 
    
         
            +
                   but  as described in the "JIT FAST PATH API" section below this assumes
         
     | 
| 
      
 8424 
     | 
    
         
            +
                   version 8.32 and later are compiled with --enable-jit, which may break.
         
     | 
| 
      
 8425 
     | 
    
         
            +
             
     | 
| 
       8405 
8426 
     | 
    
         
             
                   The JIT compiler generates different optimized code  for  each  of  the
         
     | 
| 
       8406 
8427 
     | 
    
         
             
                   three  modes  (normal, soft partial, hard partial). When pcre_exec() is
         
     | 
| 
       8407 
8428 
     | 
    
         
             
                   called, the appropriate code is run if it is available. Otherwise,  the
         
     | 
| 
         @@ -8691,6 +8712,33 @@ JIT FAST PATH API 
     | 
|
| 
       8691 
8712 
     | 
    
         
             
                   Bypassing  the  sanity  checks  and  the  pcre_exec() wrapping can give
         
     | 
| 
       8692 
8713 
     | 
    
         
             
                   speedups of more than 10%.
         
     | 
| 
       8693 
8714 
     | 
    
         | 
| 
      
 8715 
     | 
    
         
            +
                   Note that the pcre_jit_exec() function is not available in versions  of
         
     | 
| 
      
 8716 
     | 
    
         
            +
                   PCRE  before  8.32  (released in November 2012). If you need to support
         
     | 
| 
      
 8717 
     | 
    
         
            +
                   versions that old you must either use the slower pcre_exec(), or switch
         
     | 
| 
      
 8718 
     | 
    
         
            +
                   between  the  two  codepaths  by  checking the values of PCRE_MAJOR and
         
     | 
| 
      
 8719 
     | 
    
         
            +
                   PCRE_MINOR.
         
     | 
| 
      
 8720 
     | 
    
         
            +
             
     | 
| 
      
 8721 
     | 
    
         
            +
                   Due to an unfortunate implementation oversight, even in  versions  8.32
         
     | 
| 
      
 8722 
     | 
    
         
            +
                   and  later  there will be no pcre_jit_exec() stub function defined when
         
     | 
| 
      
 8723 
     | 
    
         
            +
                   PCRE is compiled with --disable-jit, which is the default, and  there's
         
     | 
| 
      
 8724 
     | 
    
         
            +
                   no  way  to  detect  whether  PCRE was compiled with --enable-jit via a
         
     | 
| 
      
 8725 
     | 
    
         
            +
                   macro.
         
     | 
| 
      
 8726 
     | 
    
         
            +
             
     | 
| 
      
 8727 
     | 
    
         
            +
                   If you need to support versions older than 8.32, or versions  that  may
         
     | 
| 
      
 8728 
     | 
    
         
            +
                   not   build   with   --enable-jit,  you  must  either  use  the  slower
         
     | 
| 
      
 8729 
     | 
    
         
            +
                   pcre_exec(), or switch between the two codepaths by checking the values
         
     | 
| 
      
 8730 
     | 
    
         
            +
                   of PCRE_MAJOR and PCRE_MINOR.
         
     | 
| 
      
 8731 
     | 
    
         
            +
             
     | 
| 
      
 8732 
     | 
    
         
            +
                   Switching  between the two by checking the version assumes that all the
         
     | 
| 
      
 8733 
     | 
    
         
            +
                   versions being targeted are built with --enable-jit.  To  also  support
         
     | 
| 
      
 8734 
     | 
    
         
            +
                   builds that may use --disable-jit either pcre_exec() must be used, or a
         
     | 
| 
      
 8735 
     | 
    
         
            +
                   compile-time check for JIT via pcre_config() (which assumes the runtime
         
     | 
| 
      
 8736 
     | 
    
         
            +
                   environment  will  be  the  same), or as the Git project decided to do,
         
     | 
| 
      
 8737 
     | 
    
         
            +
                   simply assume that pcre_jit_exec() is present in 8.32 or later unless a
         
     | 
| 
      
 8738 
     | 
    
         
            +
                   compile-time  flag  is  provided, see the "grep: un-break building with
         
     | 
| 
      
 8739 
     | 
    
         
            +
                   PCRE >= 8.32 without --enable-jit" commit in git.git for an example  of
         
     | 
| 
      
 8740 
     | 
    
         
            +
                   that.
         
     | 
| 
      
 8741 
     | 
    
         
            +
             
     | 
| 
       8694 
8742 
     | 
    
         | 
| 
       8695 
8743 
     | 
    
         
             
            SEE ALSO
         
     | 
| 
       8696 
8744 
     | 
    
         | 
| 
         @@ -8706,8 +8754,8 @@ AUTHOR 
     | 
|
| 
       8706 
8754 
     | 
    
         | 
| 
       8707 
8755 
     | 
    
         
             
            REVISION
         
     | 
| 
       8708 
8756 
     | 
    
         | 
| 
       8709 
     | 
    
         
            -
                   Last updated:  
     | 
| 
       8710 
     | 
    
         
            -
                   Copyright (c) 1997- 
     | 
| 
      
 8757 
     | 
    
         
            +
                   Last updated: 05 July 2017
         
     | 
| 
      
 8758 
     | 
    
         
            +
                   Copyright (c) 1997-2017 University of Cambridge.
         
     | 
| 
       8711 
8759 
     | 
    
         
             
            ------------------------------------------------------------------------------
         
     | 
| 
       8712 
8760 
     | 
    
         | 
| 
       8713 
8761 
     | 
    
         |