rroonga 7.0.2-x86-mingw32 → 7.1.1-x86-mingw32
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +2 -2
- data/doc/text/news.md +46 -7
- data/ext/groonga/rb-grn-array.c +1 -272
- data/ext/groonga/rb-grn-column-cache.c +240 -0
- data/ext/groonga/rb-grn-column.c +1 -1
- data/ext/groonga/rb-grn-context.c +28 -4
- data/ext/groonga/rb-grn-expression.c +23 -1
- data/ext/groonga/rb-grn-object.c +44 -1
- data/ext/groonga/rb-grn-procedure.c +16 -1
- data/ext/groonga/rb-grn-query-logger.c +55 -6
- data/ext/groonga/rb-grn-table.c +170 -1
- data/ext/groonga/rb-grn-utils.c +21 -2
- data/ext/groonga/rb-grn.h +18 -3
- data/ext/groonga/rb-groonga.c +2 -1
- data/lib/2.1/groonga.so +0 -0
- data/lib/2.2/groonga.so +0 -0
- data/lib/2.3/groonga.so +0 -0
- data/lib/2.4/groonga.so +0 -0
- data/lib/2.5/groonga.so +0 -0
- data/lib/groonga.rb +8 -5
- data/lib/groonga/column.rb +0 -5
- data/lib/groonga/database.rb +0 -10
- data/lib/groonga/index-column.rb +0 -10
- data/lib/groonga/query-logger.rb +1 -1
- data/rroonga-build.rb +6 -6
- data/rroonga.gemspec +1 -1
- data/test/groonga-test-utils.rb +5 -8
- data/test/test-array.rb +1 -131
- data/test/test-column-cache.rb +46 -0
- data/test/test-command-select.rb +36 -1
- data/test/test-context.rb +1 -2
- data/test/test-database.rb +16 -2
- data/test/test-logger.rb +13 -1
- data/test/test-procedure.rb +7 -1
- data/test/test-query-logger.rb +12 -1
- data/test/test-table-arrow.rb +193 -0
- data/test/test-table-offset-and-limit.rb +3 -1
- data/vendor/local/bin/cv2pdb.exe +0 -0
- data/vendor/local/bin/generate-pdb.bat +36 -0
- data/vendor/local/bin/grndb.exe +0 -0
- data/vendor/local/bin/groonga-benchmark.exe +0 -0
- data/vendor/local/bin/groonga-suggest-create-dataset.exe +0 -0
- data/vendor/local/bin/groonga.exe +0 -0
- data/vendor/local/bin/libgcc_s_sjlj-1.dll +0 -0
- data/vendor/local/bin/libgroonga-0.dll +0 -0
- data/vendor/local/bin/libmecab-2.dll +0 -0
- data/vendor/local/bin/libmsgpackc.dll +0 -0
- data/vendor/local/bin/libonigmo-6.dll +0 -0
- data/vendor/local/bin/libpcre-1.dll +0 -0
- data/vendor/local/bin/libpcrecpp-0.dll +0 -0
- data/vendor/local/bin/libpcreposix-0.dll +0 -0
- data/vendor/local/bin/libstdc++-6.dll +0 -0
- data/vendor/local/bin/lz4.exe +0 -0
- data/vendor/local/bin/lz4c.exe +0 -0
- data/vendor/local/bin/lz4cat +0 -0
- data/vendor/local/bin/mecab.exe +0 -0
- data/vendor/local/bin/pcre-config +1 -1
- data/vendor/local/bin/pcregrep.exe +0 -0
- data/vendor/local/bin/pcretest.exe +0 -0
- data/vendor/local/bin/zlib1.dll +0 -0
- data/vendor/local/etc/groonga/httpd/groonga-httpd.conf +1 -1
- data/vendor/local/include/groonga/groonga.h +1 -0
- data/vendor/local/include/groonga/groonga.hpp +21 -0
- data/vendor/local/include/groonga/groonga/arrow.h +38 -0
- data/vendor/local/include/groonga/groonga/arrow.hpp +21 -0
- data/vendor/local/include/groonga/groonga/column.h +9 -0
- data/vendor/local/include/groonga/groonga/expr.h +9 -1
- data/vendor/local/include/groonga/groonga/groonga.h +19 -3
- data/vendor/local/include/groonga/groonga/obj.h +3 -0
- data/vendor/local/include/groonga/groonga/operator.h +2 -1
- data/vendor/local/include/groonga/groonga/plugin.h +8 -0
- data/vendor/local/include/groonga/groonga/portability.h +19 -1
- data/vendor/local/include/groonga/groonga/table.h +14 -0
- data/vendor/local/include/groonga/groonga/util.h +3 -0
- data/vendor/local/include/groonga/groonga/window_function.h +2 -0
- data/vendor/local/include/pcre.h +2 -2
- data/vendor/local/include/pcre_stringpiece.h +2 -2
- data/vendor/local/lib/groonga/plugins/functions/index_column.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/index_column.dll +0 -0
- data/vendor/local/lib/groonga/plugins/functions/index_column.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/index_column.la +41 -0
- data/vendor/local/lib/groonga/plugins/functions/math.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/math.dll +0 -0
- data/vendor/local/lib/groonga/plugins/functions/math.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/math.la +41 -0
- data/vendor/local/lib/groonga/plugins/functions/number.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/number.dll +0 -0
- data/vendor/local/lib/groonga/plugins/functions/number.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/number.la +1 -1
- data/vendor/local/lib/groonga/plugins/functions/string.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/string.dll +0 -0
- data/vendor/local/lib/groonga/plugins/functions/string.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/string.la +1 -1
- data/vendor/local/lib/groonga/plugins/functions/time.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/time.dll +0 -0
- data/vendor/local/lib/groonga/plugins/functions/time.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/time.la +1 -1
- data/vendor/local/lib/groonga/plugins/functions/vector.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/vector.dll +0 -0
- data/vendor/local/lib/groonga/plugins/functions/vector.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/functions/vector.la +1 -1
- data/vendor/local/lib/groonga/plugins/normalizers/mysql.a +0 -0
- data/vendor/local/lib/groonga/plugins/normalizers/mysql.dll +0 -0
- data/vendor/local/lib/groonga/plugins/normalizers/mysql.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/normalizers/mysql.la +1 -1
- data/vendor/local/lib/groonga/plugins/query_expanders/tsv.a +0 -0
- data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll +0 -0
- data/vendor/local/lib/groonga/plugins/query_expanders/tsv.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/query_expanders/tsv.la +1 -1
- data/vendor/local/lib/groonga/plugins/ruby/eval.rb +1 -1
- data/vendor/local/lib/groonga/plugins/sharding.rb +3 -0
- data/vendor/local/lib/groonga/plugins/sharding/dynamic_columns.rb +152 -0
- data/vendor/local/lib/groonga/plugins/sharding/keys_parsable.rb +12 -0
- data/vendor/local/lib/groonga/plugins/sharding/logical_count.rb +149 -106
- data/vendor/local/lib/groonga/plugins/sharding/logical_enumerator.rb +11 -3
- data/vendor/local/lib/groonga/plugins/sharding/logical_range_filter.rb +80 -6
- data/vendor/local/lib/groonga/plugins/sharding/logical_select.rb +43 -206
- data/vendor/local/lib/groonga/plugins/sharding/range_expression_builder.rb +15 -0
- data/vendor/local/lib/groonga/plugins/suggest/suggest.a +0 -0
- data/vendor/local/lib/groonga/plugins/suggest/suggest.dll +0 -0
- data/vendor/local/lib/groonga/plugins/suggest/suggest.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/suggest/suggest.la +1 -1
- data/vendor/local/lib/groonga/plugins/token_filters/stop_word.a +0 -0
- data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll +0 -0
- data/vendor/local/lib/groonga/plugins/token_filters/stop_word.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/token_filters/stop_word.la +1 -1
- data/vendor/local/lib/groonga/plugins/tokenizers/mecab.a +0 -0
- data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll +0 -0
- data/vendor/local/lib/groonga/plugins/tokenizers/mecab.dll.a +0 -0
- data/vendor/local/lib/groonga/plugins/tokenizers/mecab.la +1 -1
- data/vendor/local/lib/groonga/scripts/ruby/command_line/grndb.rb +163 -1
- data/vendor/local/lib/groonga/scripts/ruby/command_line_parser.rb +12 -0
- data/vendor/local/lib/groonga/scripts/ruby/expression_tree/function_call.rb +8 -3
- data/vendor/local/lib/groonga/scripts/ruby/expression_tree_builder.rb +1 -0
- data/{lib/groonga/table.rb → vendor/local/lib/groonga/scripts/ruby/groonga-log.rb} +6 -12
- data/vendor/local/lib/groonga/scripts/ruby/groonga-log/parser.rb +81 -0
- data/vendor/local/lib/groonga/scripts/ruby/groonga-log/statistic.rb +23 -0
- data/vendor/local/lib/groonga/scripts/ruby/groonga-log/version.rb +3 -0
- data/vendor/local/lib/groonga/scripts/ruby/initialize/post.rb +10 -0
- data/vendor/local/lib/groonga/scripts/ruby/labeled_arguments.rb +21 -0
- data/vendor/local/lib/groonga/scripts/ruby/logger/level.rb +8 -2
- data/vendor/local/lib/groonga/scripts/ruby/object.rb +7 -0
- data/vendor/local/lib/groonga/scripts/ruby/scan_info.rb +3 -0
- data/vendor/local/lib/groonga/scripts/ruby/scan_info_builder.rb +2 -0
- data/vendor/local/lib/groonga/scripts/ruby/scan_info_data.rb +40 -9
- data/vendor/local/lib/groonga/scripts/ruby/table.rb +12 -2
- data/vendor/local/lib/libgroonga.a +0 -0
- data/vendor/local/lib/libgroonga.dll.a +0 -0
- data/vendor/local/lib/libgroonga.la +1 -1
- data/vendor/local/lib/liblz4.a +0 -0
- data/vendor/local/lib/liblz4.dll +0 -0
- data/vendor/local/lib/liblz4.dll.1 +0 -0
- data/vendor/local/lib/liblz4.dll.1.5.0 +0 -0
- data/vendor/local/lib/libmecab.a +0 -0
- data/vendor/local/lib/libmecab.dll.a +0 -0
- data/vendor/local/lib/libmecab.la +2 -2
- data/vendor/local/lib/libmsgpackc.a +0 -0
- data/vendor/local/lib/libmsgpackc.dll.a +0 -0
- data/vendor/local/lib/libonigmo.a +0 -0
- data/vendor/local/lib/libonigmo.dll.a +0 -0
- data/vendor/local/lib/libpcre.a +0 -0
- data/vendor/local/lib/libpcre.dll.a +0 -0
- data/vendor/local/lib/libpcre.la +1 -1
- data/vendor/local/lib/libpcrecpp.a +0 -0
- data/vendor/local/lib/libpcrecpp.dll.a +0 -0
- data/vendor/local/lib/libpcreposix.a +0 -0
- data/vendor/local/lib/libpcreposix.dll.a +0 -0
- data/vendor/local/lib/libpcreposix.la +1 -1
- data/vendor/local/lib/libz.a +0 -0
- data/vendor/local/lib/libz.dll.a +0 -0
- data/vendor/local/lib/pkgconfig/groonga.pc +2 -2
- data/vendor/local/lib/pkgconfig/libpcre.pc +1 -1
- data/vendor/local/lib/pkgconfig/libpcrecpp.pc +1 -1
- data/vendor/local/lib/pkgconfig/libpcreposix.pc +1 -1
- data/vendor/local/libexec/mecab/mecab-cost-train.exe +0 -0
- data/vendor/local/libexec/mecab/mecab-dict-gen.exe +0 -0
- data/vendor/local/libexec/mecab/mecab-dict-index.exe +0 -0
- data/vendor/local/libexec/mecab/mecab-system-eval.exe +0 -0
- data/vendor/local/libexec/mecab/mecab-test-gen.exe +0 -0
- data/vendor/local/share/doc/groonga/en/html/.buildinfo +1 -1
- data/vendor/local/share/doc/groonga/en/html/_static/basic.css +47 -19
- data/vendor/local/share/doc/groonga/en/html/_static/comment-bright.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/comment-close.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/comment.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/doctools.js +1 -1
- data/vendor/local/share/doc/groonga/en/html/_static/down-pressed.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/down.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/file.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/{jquery-1.11.1.js → jquery-3.1.0.js} +4245 -4479
- data/vendor/local/share/doc/groonga/en/html/_static/jquery.js +4 -4
- data/vendor/local/share/doc/groonga/en/html/_static/minus.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/plus.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/searchtools.js +112 -5
- data/vendor/local/share/doc/groonga/en/html/_static/up-pressed.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/up.png +0 -0
- data/vendor/local/share/doc/groonga/en/html/_static/websupport.js +1 -1
- data/vendor/local/share/doc/groonga/en/html/characteristic.html +9 -19
- data/vendor/local/share/doc/groonga/en/html/client.html +9 -19
- data/vendor/local/share/doc/groonga/en/html/community.html +9 -19
- data/vendor/local/share/doc/groonga/en/html/contribution.html +9 -19
- data/vendor/local/share/doc/groonga/en/html/contribution/development.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/contribution/development/build.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_autotools.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/contribution/development/build/unix_cmake.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/contribution/development/build/windows_cmake.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/contribution/development/com.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/contribution/development/cooperation.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/contribution/development/query.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/contribution/development/release.html +10 -21
- data/vendor/local/share/doc/groonga/en/html/contribution/development/repository.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/contribution/development/test.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/contribution/documentation.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/contribution/documentation/c-api.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/contribution/documentation/i18n.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/contribution/documentation/introduction.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/contribution/report.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/development.html +13 -23
- data/vendor/local/share/doc/groonga/en/html/development/travis-ci.html +15 -26
- data/vendor/local/share/doc/groonga/en/html/genindex.html +819 -1560
- data/vendor/local/share/doc/groonga/en/html/index.html +42 -45
- data/vendor/local/share/doc/groonga/en/html/install.html +11 -20
- data/vendor/local/share/doc/groonga/en/html/install/centos.html +14 -25
- data/vendor/local/share/doc/groonga/en/html/install/debian.html +92 -28
- data/vendor/local/share/doc/groonga/en/html/install/fedora.html +13 -24
- data/vendor/local/share/doc/groonga/en/html/install/mac_os_x.html +13 -24
- data/vendor/local/share/doc/groonga/en/html/install/others.html +13 -24
- data/vendor/local/share/doc/groonga/en/html/install/solaris.html +12 -23
- data/vendor/local/share/doc/groonga/en/html/install/ubuntu.html +15 -25
- data/vendor/local/share/doc/groonga/en/html/install/windows.html +18 -29
- data/vendor/local/share/doc/groonga/en/html/limitations.html +13 -23
- data/vendor/local/share/doc/groonga/en/html/news.html +650 -43
- data/vendor/local/share/doc/groonga/en/html/news/0.x.html +15 -25
- data/vendor/local/share/doc/groonga/en/html/news/1.0.x.html +13 -23
- data/vendor/local/share/doc/groonga/en/html/news/1.1.x.html +10 -20
- data/vendor/local/share/doc/groonga/en/html/news/1.2.x.html +12 -22
- data/vendor/local/share/doc/groonga/en/html/news/1.3.x.html +10 -20
- data/vendor/local/share/doc/groonga/en/html/news/2.x.html +11 -21
- data/vendor/local/share/doc/groonga/en/html/news/3.x.html +10 -20
- data/vendor/local/share/doc/groonga/en/html/news/4.x.html +10 -20
- data/vendor/local/share/doc/groonga/en/html/news/5.x.html +10 -20
- data/vendor/local/share/doc/groonga/en/html/news/6.x.html +10 -20
- data/vendor/local/share/doc/groonga/en/html/news/senna.html +10 -20
- data/vendor/local/share/doc/groonga/en/html/objects.inv +0 -0
- data/vendor/local/share/doc/groonga/en/html/reference.html +32 -40
- data/vendor/local/share/doc/groonga/en/html/reference/alias.html +12 -24
- data/vendor/local/share/doc/groonga/en/html/reference/api.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/global_configurations.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_cache.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_column.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_command_version.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_content_type.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ctx.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_db.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_encoding.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_expr.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_geo.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_hook.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_ii.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_index_cursor.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_info.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_match_escalation.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_obj.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_proc.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_search.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_table_cursor.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_thread.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_type.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/grn_user_data.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/overview.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/api/plugin.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/cast.html +10 -20
- data/vendor/local/share/doc/groonga/en/html/reference/column.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/columns/index.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/columns/pseudo.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/columns/scalar.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/columns/vector.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/command.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/command/command_version.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/command/output_format.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/command/pretty_print.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/command/request_id.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/command/request_timeout.html +10 -21
- data/vendor/local/share/doc/groonga/en/html/reference/command/return_code.html +10 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/cache_limit.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/check.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/clearlock.html +11 -22
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_copy.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_create.html +35 -37
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_list.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_remove.html +10 -21
- data/vendor/local/share/doc/groonga/en/html/reference/commands/column_rename.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/config_delete.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/config_get.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/config_set.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/database_unmap.html +10 -21
- data/vendor/local/share/doc/groonga/en/html/reference/commands/define_selector.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/defrag.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/delete.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/dump.html +56 -28
- data/vendor/local/share/doc/groonga/en/html/reference/commands/io_flush.html +42 -21
- data/vendor/local/share/doc/groonga/en/html/reference/commands/load.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_acquire.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_clear.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/lock_release.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/log_level.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/log_put.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/log_reopen.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_count.html +583 -107
- data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_parameters.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_range_filter.html +1143 -51
- data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_select.html +166 -29
- data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_shard_list.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/logical_table_remove.html +13 -24
- data/vendor/local/share/doc/groonga/en/html/reference/commands/normalize.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/normalizer_list.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/object_exist.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/object_inspect.html +12 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/object_list.html +138 -143
- data/vendor/local/share/doc/groonga/en/html/reference/commands/object_remove.html +14 -24
- data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_register.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/plugin_unregister.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/query_expand.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/quit.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/range_filter.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/register.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/reindex.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/request_cancel.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_eval.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/ruby_load.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/schema.html +95 -65
- data/vendor/local/share/doc/groonga/en/html/reference/commands/select.html +37 -25
- data/vendor/local/share/doc/groonga/en/html/reference/commands/shutdown.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/status.html +15 -26
- data/vendor/local/share/doc/groonga/en/html/reference/commands/suggest.html +14 -33
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_copy.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_create.html +11 -21
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_list.html +9 -60
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_remove.html +12 -23
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_rename.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/table_tokenize.html +15 -53
- data/vendor/local/share/doc/groonga/en/html/reference/commands/thread_limit.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenize.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/commands/tokenizer_list.html +9 -23
- data/vendor/local/share/doc/groonga/en/html/reference/commands/truncate.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/configuration.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/executables.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/executables/grndb.html +118 -21
- data/vendor/local/share/doc/groonga/en/html/reference/executables/grnslap.html +11 -22
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-benchmark.html +21 -32
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-httpd.html +17 -27
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-server-http.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-create-dataset.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-httpd.html +26 -39
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga-suggest-learner.html +17 -28
- data/vendor/local/share/doc/groonga/en/html/reference/executables/groonga.html +44 -55
- data/vendor/local/share/doc/groonga/en/html/reference/function.html +29 -39
- data/vendor/local/share/doc/groonga/en/html/reference/functions/between.html +56 -111
- data/vendor/local/share/doc/groonga/en/html/reference/functions/edit_distance.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/functions/fuzzy_search.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_distance.html +11 -22
- data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_circle.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/functions/geo_in_rectangle.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_full.html +11 -22
- data/vendor/local/share/doc/groonga/en/html/reference/functions/highlight_html.html +11 -22
- data/vendor/local/share/doc/groonga/en/html/reference/functions/html_untag.html +10 -21
- data/vendor/local/share/doc/groonga/en/html/reference/functions/in_records.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/functions/in_values.html +15 -26
- data/vendor/local/share/doc/groonga/en/html/reference/functions/math_abs.html +237 -0
- data/vendor/local/share/doc/groonga/en/html/reference/functions/now.html +30 -41
- data/vendor/local/share/doc/groonga/en/html/reference/functions/number_classify.html +27 -38
- data/vendor/local/share/doc/groonga/en/html/reference/functions/prefix_rk_search.html +37 -50
- data/vendor/local/share/doc/groonga/en/html/reference/functions/query.html +50 -61
- data/vendor/local/share/doc/groonga/en/html/reference/functions/rand.html +31 -42
- data/vendor/local/share/doc/groonga/en/html/reference/functions/snippet_html.html +36 -47
- data/vendor/local/share/doc/groonga/en/html/reference/functions/string_length.html +27 -38
- data/vendor/local/share/doc/groonga/en/html/reference/functions/string_substring.html +27 -38
- data/vendor/local/share/doc/groonga/en/html/reference/functions/sub_filter.html +35 -46
- data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_day.html +27 -38
- data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_hour.html +27 -38
- data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_minute.html +27 -38
- data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_month.html +27 -38
- data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_second.html +27 -38
- data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_week.html +27 -38
- data/vendor/local/share/doc/groonga/en/html/reference/functions/time_classify_year.html +27 -38
- data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_new.html +29 -40
- data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_size.html +32 -43
- data/vendor/local/share/doc/groonga/en/html/reference/functions/vector_slice.html +23 -34
- data/vendor/local/share/doc/groonga/en/html/reference/grn_expr.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/query_syntax.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/grn_expr/script_syntax.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/indexing.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/log.html +11 -22
- data/vendor/local/share/doc/groonga/en/html/reference/normalizers.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/operations.html +13 -24
- data/vendor/local/share/doc/groonga/en/html/reference/operations/geolocation_search.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/operations/prefix_rk_search.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/output.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/query_expanders.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/query_expanders/tsv.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/regular_expression.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/scorer.html +14 -25
- data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_at_most.html +10 -21
- data/vendor/local/share/doc/groonga/en/html/reference/scorers/scorer_tf_idf.html +10 -21
- data/vendor/local/share/doc/groonga/en/html/reference/sharding.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/suggest.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/suggest/completion.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/suggest/correction.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/suggest/introduction.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/suggest/suggestion.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/tables.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/token_filters.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/tokenizers.html +22 -37
- data/vendor/local/share/doc/groonga/en/html/reference/tuning.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/types.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/reference/window_function.html +16 -26
- data/vendor/local/share/doc/groonga/en/html/reference/window_functions/record_number.html +14 -25
- data/vendor/local/share/doc/groonga/en/html/reference/window_functions/window_count.html +152 -0
- data/vendor/local/share/doc/groonga/en/html/reference/window_functions/window_record_number.html +28 -39
- data/vendor/local/share/doc/groonga/en/html/reference/window_functions/window_sum.html +23 -34
- data/vendor/local/share/doc/groonga/en/html/search.html +9 -12
- data/vendor/local/share/doc/groonga/en/html/searchindex.js +1 -1
- data/vendor/local/share/doc/groonga/en/html/server.html +9 -19
- data/vendor/local/share/doc/groonga/en/html/server/gqtp.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/server/http.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/server/http/comparison.html +10 -21
- data/vendor/local/share/doc/groonga/en/html/server/http/groonga-httpd.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/server/http/groonga.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/server/memcached.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/server/package.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/spec.html +9 -19
- data/vendor/local/share/doc/groonga/en/html/spec/gqtp.html +17 -27
- data/vendor/local/share/doc/groonga/en/html/spec/search.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/troubleshooting.html +18 -24
- data/vendor/local/share/doc/groonga/en/html/troubleshooting/different_results_with_the_same_keyword.html +20 -31
- data/vendor/local/share/doc/groonga/en/html/troubleshooting/how_to_analyze_error_message.html +188 -0
- data/vendor/local/share/doc/groonga/en/html/troubleshooting/mmap_cannot_allocate_memory.html +22 -33
- data/vendor/local/share/doc/groonga/en/html/tutorial.html +9 -19
- data/vendor/local/share/doc/groonga/en/html/tutorial/data.html +10 -21
- data/vendor/local/share/doc/groonga/en/html/tutorial/drilldown.html +12 -23
- data/vendor/local/share/doc/groonga/en/html/tutorial/index.html +10 -21
- data/vendor/local/share/doc/groonga/en/html/tutorial/introduction.html +21 -31
- data/vendor/local/share/doc/groonga/en/html/tutorial/lexicon.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/tutorial/match_columns.html +174 -22
- data/vendor/local/share/doc/groonga/en/html/tutorial/micro_blog.html +10 -81
- data/vendor/local/share/doc/groonga/en/html/tutorial/network.html +16 -26
- data/vendor/local/share/doc/groonga/en/html/tutorial/patricia_trie.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/tutorial/query_expansion.html +9 -20
- data/vendor/local/share/doc/groonga/en/html/tutorial/search.html +14 -25
- data/vendor/local/share/doc/groonga/ja/html/.buildinfo +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_static/basic.css +47 -19
- data/vendor/local/share/doc/groonga/ja/html/_static/comment-bright.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/comment-close.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/comment.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/doctools.js +1 -1
- data/vendor/local/share/doc/groonga/ja/html/_static/down-pressed.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/down.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/file.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/{jquery-1.11.1.js → jquery-3.1.0.js} +4245 -4479
- data/vendor/local/share/doc/groonga/ja/html/_static/jquery.js +4 -4
- data/vendor/local/share/doc/groonga/ja/html/_static/minus.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/plus.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/searchtools.js +112 -5
- data/vendor/local/share/doc/groonga/ja/html/_static/up-pressed.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/up.png +0 -0
- data/vendor/local/share/doc/groonga/ja/html/_static/websupport.js +1 -1
- data/vendor/local/share/doc/groonga/ja/html/characteristic.html +9 -19
- data/vendor/local/share/doc/groonga/ja/html/client.html +9 -19
- data/vendor/local/share/doc/groonga/ja/html/community.html +11 -23
- data/vendor/local/share/doc/groonga/ja/html/contribution.html +12 -25
- data/vendor/local/share/doc/groonga/ja/html/contribution/development.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/build.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_autotools.html +13 -28
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/unix_cmake.html +12 -26
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/build/windows_cmake.html +16 -34
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/com.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/cooperation.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/query.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/release.html +10 -21
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/repository.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/contribution/development/test.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/contribution/documentation.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/c-api.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/i18n.html +20 -42
- data/vendor/local/share/doc/groonga/ja/html/contribution/documentation/introduction.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/contribution/report.html +11 -24
- data/vendor/local/share/doc/groonga/ja/html/development.html +13 -23
- data/vendor/local/share/doc/groonga/ja/html/development/travis-ci.html +19 -38
- data/vendor/local/share/doc/groonga/ja/html/genindex.html +819 -1560
- data/vendor/local/share/doc/groonga/ja/html/index.html +41 -44
- data/vendor/local/share/doc/groonga/ja/html/install.html +11 -20
- data/vendor/local/share/doc/groonga/ja/html/install/centos.html +18 -33
- data/vendor/local/share/doc/groonga/ja/html/install/debian.html +84 -32
- data/vendor/local/share/doc/groonga/ja/html/install/fedora.html +15 -28
- data/vendor/local/share/doc/groonga/ja/html/install/mac_os_x.html +13 -24
- data/vendor/local/share/doc/groonga/ja/html/install/others.html +28 -55
- data/vendor/local/share/doc/groonga/ja/html/install/solaris.html +12 -23
- data/vendor/local/share/doc/groonga/ja/html/install/ubuntu.html +17 -29
- data/vendor/local/share/doc/groonga/ja/html/install/windows.html +18 -29
- data/vendor/local/share/doc/groonga/ja/html/limitations.html +17 -35
- data/vendor/local/share/doc/groonga/ja/html/news.html +516 -158
- data/vendor/local/share/doc/groonga/ja/html/news/0.x.html +15 -25
- data/vendor/local/share/doc/groonga/ja/html/news/1.0.x.html +12 -22
- data/vendor/local/share/doc/groonga/ja/html/news/1.1.x.html +10 -20
- data/vendor/local/share/doc/groonga/ja/html/news/1.2.x.html +144 -288
- data/vendor/local/share/doc/groonga/ja/html/news/1.3.x.html +36 -72
- data/vendor/local/share/doc/groonga/ja/html/news/2.x.html +266 -532
- data/vendor/local/share/doc/groonga/ja/html/news/3.x.html +224 -441
- data/vendor/local/share/doc/groonga/ja/html/news/4.x.html +258 -516
- data/vendor/local/share/doc/groonga/ja/html/news/5.x.html +282 -562
- data/vendor/local/share/doc/groonga/ja/html/news/6.x.html +213 -426
- data/vendor/local/share/doc/groonga/ja/html/news/senna.html +10 -20
- data/vendor/local/share/doc/groonga/ja/html/objects.inv +0 -0
- data/vendor/local/share/doc/groonga/ja/html/reference.html +32 -40
- data/vendor/local/share/doc/groonga/ja/html/reference/alias.html +14 -28
- data/vendor/local/share/doc/groonga/ja/html/reference/api.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/api/global_configurations.html +11 -24
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_cache.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_column.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_command_version.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_content_type.html +10 -22
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ctx.html +15 -32
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_db.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_encoding.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_expr.html +11 -24
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_geo.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_hook.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_ii.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_index_cursor.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_info.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_match_escalation.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_obj.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_proc.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_search.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_table_cursor.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_thread.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_type.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/api/grn_user_data.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/api/overview.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/api/plugin.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/cast.html +10 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/column.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/columns/index.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/columns/pseudo.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/columns/scalar.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/columns/vector.html +15 -32
- data/vendor/local/share/doc/groonga/ja/html/reference/command.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/command/command_version.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/command/output_format.html +10 -22
- data/vendor/local/share/doc/groonga/ja/html/reference/command/pretty_print.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/command/request_id.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/command/request_timeout.html +10 -21
- data/vendor/local/share/doc/groonga/ja/html/reference/command/return_code.html +12 -24
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/cache_limit.html +11 -24
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/check.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/clearlock.html +11 -22
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_copy.html +31 -64
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_create.html +56 -85
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_list.html +29 -60
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_remove.html +10 -21
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/column_rename.html +14 -30
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_delete.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_get.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/config_set.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/database_unmap.html +10 -21
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/define_selector.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/defrag.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/delete.html +15 -32
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/dump.html +55 -28
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/io_flush.html +45 -45
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/load.html +15 -32
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_acquire.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_clear.html +11 -24
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/lock_release.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_level.html +15 -32
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_put.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/log_reopen.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_count.html +498 -106
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_parameters.html +13 -27
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_range_filter.html +1051 -56
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_select.html +166 -56
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_shard_list.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/logical_table_remove.html +33 -63
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalize.html +17 -36
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/normalizer_list.html +13 -28
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_exist.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_inspect.html +54 -104
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_list.html +183 -233
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/object_remove.html +14 -24
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_register.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/plugin_unregister.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/query_expand.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/quit.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/range_filter.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/register.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/reindex.html +11 -24
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/request_cancel.html +14 -30
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_eval.html +10 -22
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/ruby_load.html +10 -22
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/schema.html +136 -147
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/select.html +145 -271
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/shutdown.html +10 -22
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/status.html +23 -42
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/suggest.html +28 -61
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_copy.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_create.html +32 -65
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_list.html +9 -60
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_remove.html +22 -43
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_rename.html +12 -26
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/table_tokenize.html +15 -53
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/thread_limit.html +11 -24
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenize.html +18 -38
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/tokenizer_list.html +13 -31
- data/vendor/local/share/doc/groonga/ja/html/reference/commands/truncate.html +11 -24
- data/vendor/local/share/doc/groonga/ja/html/reference/configuration.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/executables.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/grndb.html +113 -29
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/grnslap.html +11 -22
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-benchmark.html +21 -32
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-httpd.html +20 -33
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-server-http.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-create-dataset.html +10 -22
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-httpd.html +70 -127
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga-suggest-learner.html +17 -28
- data/vendor/local/share/doc/groonga/ja/html/reference/executables/groonga.html +71 -109
- data/vendor/local/share/doc/groonga/ja/html/reference/function.html +29 -39
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/between.html +46 -102
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/edit_distance.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/fuzzy_search.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_distance.html +14 -28
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_circle.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/geo_in_rectangle.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_full.html +11 -22
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/highlight_html.html +11 -22
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/html_untag.html +10 -21
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_records.html +23 -48
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/in_values.html +16 -27
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/math_abs.html +237 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/now.html +30 -41
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/number_classify.html +27 -38
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/prefix_rk_search.html +37 -50
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/query.html +64 -89
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/rand.html +31 -42
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/snippet_html.html +42 -59
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/string_length.html +27 -38
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/string_substring.html +27 -38
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/sub_filter.html +38 -52
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_day.html +27 -38
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_hour.html +27 -38
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_minute.html +27 -38
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_month.html +27 -38
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_second.html +27 -38
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_week.html +27 -38
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/time_classify_year.html +27 -38
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_new.html +29 -40
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_size.html +32 -43
- data/vendor/local/share/doc/groonga/ja/html/reference/functions/vector_slice.html +23 -34
- data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr.html +13 -28
- data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/query_syntax.html +18 -38
- data/vendor/local/share/doc/groonga/ja/html/reference/grn_expr/script_syntax.html +17 -36
- data/vendor/local/share/doc/groonga/ja/html/reference/indexing.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/log.html +37 -74
- data/vendor/local/share/doc/groonga/ja/html/reference/normalizers.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/operations.html +13 -24
- data/vendor/local/share/doc/groonga/ja/html/reference/operations/geolocation_search.html +18 -38
- data/vendor/local/share/doc/groonga/ja/html/reference/operations/prefix_rk_search.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/output.html +12 -26
- data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/query_expanders/tsv.html +15 -32
- data/vendor/local/share/doc/groonga/ja/html/reference/regular_expression.html +29 -60
- data/vendor/local/share/doc/groonga/ja/html/reference/scorer.html +16 -29
- data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_at_most.html +10 -21
- data/vendor/local/share/doc/groonga/ja/html/reference/scorers/scorer_tf_idf.html +10 -21
- data/vendor/local/share/doc/groonga/ja/html/reference/sharding.html +16 -34
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest/completion.html +34 -70
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest/correction.html +22 -46
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest/introduction.html +16 -34
- data/vendor/local/share/doc/groonga/ja/html/reference/suggest/suggestion.html +19 -40
- data/vendor/local/share/doc/groonga/ja/html/reference/tables.html +31 -64
- data/vendor/local/share/doc/groonga/ja/html/reference/token_filters.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/tokenizers.html +42 -77
- data/vendor/local/share/doc/groonga/ja/html/reference/tuning.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/types.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/reference/window_function.html +16 -26
- data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/record_number.html +14 -25
- data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/window_count.html +153 -0
- data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/window_record_number.html +28 -39
- data/vendor/local/share/doc/groonga/ja/html/reference/window_functions/window_sum.html +23 -34
- data/vendor/local/share/doc/groonga/ja/html/search.html +9 -12
- data/vendor/local/share/doc/groonga/ja/html/searchindex.js +1 -1
- data/vendor/local/share/doc/groonga/ja/html/server.html +9 -19
- data/vendor/local/share/doc/groonga/ja/html/server/gqtp.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/server/http.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/server/http/comparison.html +47 -95
- data/vendor/local/share/doc/groonga/ja/html/server/http/groonga-httpd.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/server/http/groonga.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/server/memcached.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/server/package.html +26 -54
- data/vendor/local/share/doc/groonga/ja/html/spec.html +9 -19
- data/vendor/local/share/doc/groonga/ja/html/spec/gqtp.html +50 -93
- data/vendor/local/share/doc/groonga/ja/html/spec/search.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/troubleshooting.html +18 -24
- data/vendor/local/share/doc/groonga/ja/html/troubleshooting/different_results_with_the_same_keyword.html +14 -25
- data/vendor/local/share/doc/groonga/ja/html/troubleshooting/how_to_analyze_error_message.html +186 -0
- data/vendor/local/share/doc/groonga/ja/html/troubleshooting/mmap_cannot_allocate_memory.html +22 -35
- data/vendor/local/share/doc/groonga/ja/html/tutorial.html +9 -19
- data/vendor/local/share/doc/groonga/ja/html/tutorial/data.html +12 -25
- data/vendor/local/share/doc/groonga/ja/html/tutorial/drilldown.html +15 -29
- data/vendor/local/share/doc/groonga/ja/html/tutorial/index.html +10 -21
- data/vendor/local/share/doc/groonga/ja/html/tutorial/introduction.html +29 -47
- data/vendor/local/share/doc/groonga/ja/html/tutorial/lexicon.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/tutorial/match_columns.html +177 -28
- data/vendor/local/share/doc/groonga/ja/html/tutorial/micro_blog.html +34 -129
- data/vendor/local/share/doc/groonga/ja/html/tutorial/network.html +16 -26
- data/vendor/local/share/doc/groonga/ja/html/tutorial/patricia_trie.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/tutorial/query_expansion.html +9 -20
- data/vendor/local/share/doc/groonga/ja/html/tutorial/search.html +18 -33
- data/vendor/local/share/doc/pcre/AUTHORS +3 -3
- data/vendor/local/share/doc/pcre/ChangeLog +94 -0
- data/vendor/local/share/doc/pcre/LICENCE +3 -3
- data/vendor/local/share/doc/pcre/NEWS +12 -0
- data/vendor/local/share/doc/pcre/html/pcrecompat.html +1 -1
- data/vendor/local/share/doc/pcre/html/pcrejit.html +52 -5
- data/vendor/local/share/doc/pcre/html/pcrepattern.html +20 -17
- data/vendor/local/share/doc/pcre/html/pcretest.html +7 -2
- data/vendor/local/share/doc/pcre/pcre.txt +1103 -1055
- data/vendor/local/share/doc/pcre/pcretest.txt +6 -2
- data/vendor/local/share/groonga/groonga-log/README.md +44 -0
- data/vendor/local/share/groonga/groonga-log/lgpl-2.1.txt +502 -0
- data/vendor/local/share/groonga/mruby/LEGAL +4 -0
- data/vendor/local/share/license/cv2pdb/LICENSE +201 -0
- data/vendor/local/share/license/cv2pdb/README +138 -0
- data/vendor/local/share/license/groonga-log/README.md +44 -0
- data/vendor/local/share/license/groonga-log/lgpl-2.1.txt +502 -0
- data/vendor/local/share/license/pcre/LICENCE +3 -3
- data/vendor/local/share/man/man1/pcretest.1 +7 -3
- data/vendor/local/share/man/man3/pcrecompat.3 +1 -1
- data/vendor/local/share/man/man3/pcrejit.3 +48 -6
- data/vendor/local/share/man/man3/pcrepattern.3 +20 -17
- metadata +102 -559
- data/lib/groonga/statistic-measurer.rb +0 -37
- data/test/test-statistic-measurer.rb +0 -55
- data/vendor/local/share/doc/groonga/en/html/_sources/characteristic.txt +0 -70
- data/vendor/local/share/doc/groonga/en/html/_sources/client.txt +0 -19
- data/vendor/local/share/doc/groonga/en/html/_sources/community.txt +0 -49
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution.txt +0 -26
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development.txt +0 -14
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/build.txt +0 -19
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/build/unix_autotools.txt +0 -101
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/build/unix_cmake.txt +0 -94
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/build/windows_cmake.txt +0 -93
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/com.txt +0 -20
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/cooperation.txt +0 -75
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/query.txt +0 -214
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/release.txt +0 -790
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/repository.txt +0 -16
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/development/test.txt +0 -120
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/documentation.txt +0 -18
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/documentation/c-api.txt +0 -14
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/documentation/i18n.txt +0 -200
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/documentation/introduction.txt +0 -81
- data/vendor/local/share/doc/groonga/en/html/_sources/contribution/report.txt +0 -27
- data/vendor/local/share/doc/groonga/en/html/_sources/development.txt +0 -16
- data/vendor/local/share/doc/groonga/en/html/_sources/development/travis-ci.txt +0 -66
- data/vendor/local/share/doc/groonga/en/html/_sources/index.txt +0 -33
- data/vendor/local/share/doc/groonga/en/html/_sources/install.txt +0 -28
- data/vendor/local/share/doc/groonga/en/html/_sources/install/centos.txt +0 -106
- data/vendor/local/share/doc/groonga/en/html/_sources/install/debian.txt +0 -107
- data/vendor/local/share/doc/groonga/en/html/_sources/install/fedora.txt +0 -97
- data/vendor/local/share/doc/groonga/en/html/_sources/install/mac_os_x.txt +0 -66
- data/vendor/local/share/doc/groonga/en/html/_sources/install/others.txt +0 -273
- data/vendor/local/share/doc/groonga/en/html/_sources/install/solaris.txt +0 -43
- data/vendor/local/share/doc/groonga/en/html/_sources/install/ubuntu.txt +0 -99
- data/vendor/local/share/doc/groonga/en/html/_sources/install/windows.txt +0 -92
- data/vendor/local/share/doc/groonga/en/html/_sources/limitations.txt +0 -58
- data/vendor/local/share/doc/groonga/en/html/_sources/news.txt +0 -315
- data/vendor/local/share/doc/groonga/en/html/_sources/news/0.x.txt +0 -126
- data/vendor/local/share/doc/groonga/en/html/_sources/news/1.0.x.txt +0 -289
- data/vendor/local/share/doc/groonga/en/html/_sources/news/1.1.x.txt +0 -31
- data/vendor/local/share/doc/groonga/en/html/_sources/news/1.2.x.txt +0 -390
- data/vendor/local/share/doc/groonga/en/html/_sources/news/1.3.x.txt +0 -52
- data/vendor/local/share/doc/groonga/en/html/_sources/news/2.x.txt +0 -623
- data/vendor/local/share/doc/groonga/en/html/_sources/news/3.x.txt +0 -539
- data/vendor/local/share/doc/groonga/en/html/_sources/news/4.x.txt +0 -689
- data/vendor/local/share/doc/groonga/en/html/_sources/news/5.x.txt +0 -1250
- data/vendor/local/share/doc/groonga/en/html/_sources/news/6.x.txt +0 -1086
- data/vendor/local/share/doc/groonga/en/html/_sources/news/senna.txt +0 -109
- data/vendor/local/share/doc/groonga/en/html/_sources/reference.txt +0 -35
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/alias.txt +0 -164
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api.txt +0 -18
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/global_configurations.txt +0 -49
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_cache.txt +0 -114
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_column.txt +0 -198
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_command_version.txt +0 -37
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_content_type.txt +0 -39
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_ctx.txt +0 -195
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_db.txt +0 -134
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_encoding.txt +0 -49
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_expr.txt +0 -136
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_geo.txt +0 -55
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_hook.txt +0 -67
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_ii.txt +0 -35
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_index_cursor.txt +0 -44
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_info.txt +0 -56
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_match_escalation.txt +0 -39
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_obj.txt +0 -269
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_proc.txt +0 -56
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_search.txt +0 -31
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_table.txt +0 -219
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_table_cursor.txt +0 -109
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_thread.txt +0 -122
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_type.txt +0 -31
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/grn_user_data.txt +0 -29
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/overview.txt +0 -54
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/api/plugin.txt +0 -156
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/cast.txt +0 -8
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/column.txt +0 -34
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/columns/index.txt +0 -19
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/columns/pseudo.txt +0 -40
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/columns/scalar.txt +0 -19
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/columns/vector.txt +0 -332
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/command.txt +0 -23
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/command/command_version.txt +0 -75
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/command/output_format.txt +0 -228
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/command/pretty_print.txt +0 -45
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/command/request_id.txt +0 -41
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/command/request_timeout.txt +0 -78
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/command/return_code.txt +0 -117
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/cache_limit.txt +0 -87
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/check.txt +0 -161
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/clearlock.txt +0 -60
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_copy.txt +0 -381
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_create.txt +0 -800
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_list.txt +0 -209
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_remove.txt +0 -57
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/column_rename.txt +0 -101
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/config_delete.txt +0 -95
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/config_get.txt +0 -96
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/config_set.txt +0 -96
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/database_unmap.txt +0 -85
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/define_selector.txt +0 -110
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/defrag.txt +0 -55
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/delete.txt +0 -122
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/dump.txt +0 -202
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/io_flush.txt +0 -266
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/load.txt +0 -100
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/lock_acquire.txt +0 -102
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/lock_clear.txt +0 -90
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/lock_release.txt +0 -98
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/log_level.txt +0 -87
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/log_put.txt +0 -65
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/log_reopen.txt +0 -62
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_count.txt +0 -171
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_parameters.txt +0 -134
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_range_filter.txt +0 -195
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_select.txt +0 -1359
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_shard_list.txt +0 -103
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/logical_table_remove.txt +0 -541
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/normalize.txt +0 -155
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/normalizer_list.txt +0 -64
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/object_exist.txt +0 -95
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/object_inspect.txt +0 -899
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/object_list.txt +0 -405
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/object_remove.txt +0 -140
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_register.txt +0 -64
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/plugin_unregister.txt +0 -63
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/query_expand.txt +0 -38
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/quit.txt +0 -38
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/range_filter.txt +0 -28
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/register.txt +0 -69
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/reindex.txt +0 -142
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/request_cancel.txt +0 -134
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_eval.txt +0 -71
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/ruby_load.txt +0 -71
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/schema.txt +0 -627
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/select.txt +0 -2776
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/shutdown.txt +0 -113
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/status.txt +0 -151
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/suggest.txt +0 -271
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_copy.txt +0 -64
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_create.txt +0 -380
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_list.txt +0 -81
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_remove.txt +0 -309
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_rename.txt +0 -90
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/table_tokenize.txt +0 -120
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/thread_limit.txt +0 -110
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/tokenize.txt +0 -248
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/tokenizer_list.txt +0 -63
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/commands/truncate.txt +0 -95
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/configuration.txt +0 -50
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables.txt +0 -14
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/grndb.txt +0 -117
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/grnslap.txt +0 -68
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga-benchmark.txt +0 -287
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga-httpd.txt +0 -552
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga-server-http.txt +0 -57
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga-suggest-create-dataset.txt +0 -63
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga-suggest-httpd.txt +0 -470
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga-suggest-learner.txt +0 -94
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/executables/groonga.txt +0 -473
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/function.txt +0 -20
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/between.txt +0 -105
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/edit_distance.txt +0 -48
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/fuzzy_search.txt +0 -23
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/geo_distance.txt +0 -300
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/geo_in_circle.txt +0 -81
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/geo_in_rectangle.txt +0 -55
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_full.txt +0 -127
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/highlight_html.txt +0 -105
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/html_untag.txt +0 -80
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/in_records.txt +0 -195
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/in_values.txt +0 -82
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/now.txt +0 -36
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/number_classify.txt +0 -20
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/prefix_rk_search.txt +0 -158
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/query.txt +0 -254
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/rand.txt +0 -43
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/snippet_html.txt +0 -114
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/string_length.txt +0 -33
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/string_substring.txt +0 -27
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/sub_filter.txt +0 -137
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/time_classify_day.txt +0 -18
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/time_classify_hour.txt +0 -18
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/time_classify_minute.txt +0 -18
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/time_classify_month.txt +0 -20
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/time_classify_second.txt +0 -18
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/time_classify_week.txt +0 -18
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/time_classify_year.txt +0 -18
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/vector_new.txt +0 -38
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/vector_size.txt +0 -76
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/functions/vector_slice.txt +0 -27
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr.txt +0 -59
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/query_syntax.txt +0 -652
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/grn_expr/script_syntax.txt +0 -1126
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/indexing.txt +0 -112
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/log.txt +0 -236
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/normalizers.txt +0 -133
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/operations.txt +0 -16
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/operations/geolocation_search.txt +0 -52
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/operations/prefix_rk_search.txt +0 -76
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/output.txt +0 -164
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/query_expanders.txt +0 -12
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/query_expanders/tsv.txt +0 -153
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/regular_expression.txt +0 -436
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorer.txt +0 -218
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorers/scorer_tf_at_most.txt +0 -136
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/scorers/scorer_tf_idf.txt +0 -157
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/sharding.txt +0 -104
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/suggest.txt +0 -17
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/suggest/completion.txt +0 -271
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/suggest/correction.txt +0 -148
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/suggest/introduction.txt +0 -96
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/suggest/suggestion.txt +0 -132
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/tables.txt +0 -216
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/token_filters.txt +0 -120
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/tokenizers.txt +0 -517
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/tuning.txt +0 -177
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/types.txt +0 -170
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/window_function.txt +0 -22
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/window_functions/record_number.txt +0 -28
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/window_functions/window_record_number.txt +0 -25
- data/vendor/local/share/doc/groonga/en/html/_sources/reference/window_functions/window_sum.txt +0 -25
- data/vendor/local/share/doc/groonga/en/html/_sources/server.txt +0 -15
- data/vendor/local/share/doc/groonga/en/html/_sources/server/gqtp.txt +0 -48
- data/vendor/local/share/doc/groonga/en/html/_sources/server/http.txt +0 -25
- data/vendor/local/share/doc/groonga/en/html/_sources/server/http/comparison.txt +0 -298
- data/vendor/local/share/doc/groonga/en/html/_sources/server/http/groonga-httpd.txt +0 -8
- data/vendor/local/share/doc/groonga/en/html/_sources/server/http/groonga.txt +0 -8
- data/vendor/local/share/doc/groonga/en/html/_sources/server/memcached.txt +0 -16
- data/vendor/local/share/doc/groonga/en/html/_sources/server/package.txt +0 -209
- data/vendor/local/share/doc/groonga/en/html/_sources/spec.txt +0 -13
- data/vendor/local/share/doc/groonga/en/html/_sources/spec/gqtp.txt +0 -280
- data/vendor/local/share/doc/groonga/en/html/_sources/spec/search.txt +0 -115
- data/vendor/local/share/doc/groonga/en/html/_sources/troubleshooting.txt +0 -13
- data/vendor/local/share/doc/groonga/en/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +0 -135
- data/vendor/local/share/doc/groonga/en/html/_sources/troubleshooting/mmap_cannot_allocate_memory.txt +0 -45
- data/vendor/local/share/doc/groonga/en/html/_sources/tutorial.txt +0 -22
- data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/data.txt +0 -173
- data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/drilldown.txt +0 -130
- data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/index.txt +0 -123
- data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/introduction.txt +0 -294
- data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/lexicon.txt +0 -12
- data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/match_columns.txt +0 -234
- data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/micro_blog.txt +0 -539
- data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/network.txt +0 -64
- data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/patricia_trie.txt +0 -58
- data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/query_expansion.txt +0 -69
- data/vendor/local/share/doc/groonga/en/html/_sources/tutorial/search.txt +0 -123
- data/vendor/local/share/doc/groonga/ja/html/_sources/characteristic.txt +0 -70
- data/vendor/local/share/doc/groonga/ja/html/_sources/client.txt +0 -19
- data/vendor/local/share/doc/groonga/ja/html/_sources/community.txt +0 -49
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution.txt +0 -26
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development.txt +0 -14
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/build.txt +0 -19
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/build/unix_autotools.txt +0 -101
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/build/unix_cmake.txt +0 -94
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/build/windows_cmake.txt +0 -93
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/com.txt +0 -20
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/cooperation.txt +0 -75
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/query.txt +0 -214
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/release.txt +0 -790
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/repository.txt +0 -16
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/development/test.txt +0 -120
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/documentation.txt +0 -18
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/documentation/c-api.txt +0 -14
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/documentation/i18n.txt +0 -200
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/documentation/introduction.txt +0 -81
- data/vendor/local/share/doc/groonga/ja/html/_sources/contribution/report.txt +0 -27
- data/vendor/local/share/doc/groonga/ja/html/_sources/development.txt +0 -16
- data/vendor/local/share/doc/groonga/ja/html/_sources/development/travis-ci.txt +0 -66
- data/vendor/local/share/doc/groonga/ja/html/_sources/index.txt +0 -33
- data/vendor/local/share/doc/groonga/ja/html/_sources/install.txt +0 -28
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/centos.txt +0 -106
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/debian.txt +0 -107
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/fedora.txt +0 -97
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/mac_os_x.txt +0 -66
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/others.txt +0 -273
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/solaris.txt +0 -43
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/ubuntu.txt +0 -99
- data/vendor/local/share/doc/groonga/ja/html/_sources/install/windows.txt +0 -92
- data/vendor/local/share/doc/groonga/ja/html/_sources/limitations.txt +0 -58
- data/vendor/local/share/doc/groonga/ja/html/_sources/news.txt +0 -315
- data/vendor/local/share/doc/groonga/ja/html/_sources/news/0.x.txt +0 -126
- data/vendor/local/share/doc/groonga/ja/html/_sources/news/1.0.x.txt +0 -289
- data/vendor/local/share/doc/groonga/ja/html/_sources/news/1.1.x.txt +0 -31
- data/vendor/local/share/doc/groonga/ja/html/_sources/news/1.2.x.txt +0 -390
- data/vendor/local/share/doc/groonga/ja/html/_sources/news/1.3.x.txt +0 -52
- data/vendor/local/share/doc/groonga/ja/html/_sources/news/2.x.txt +0 -623
- data/vendor/local/share/doc/groonga/ja/html/_sources/news/3.x.txt +0 -539
- data/vendor/local/share/doc/groonga/ja/html/_sources/news/4.x.txt +0 -689
- data/vendor/local/share/doc/groonga/ja/html/_sources/news/5.x.txt +0 -1250
- data/vendor/local/share/doc/groonga/ja/html/_sources/news/6.x.txt +0 -1086
- data/vendor/local/share/doc/groonga/ja/html/_sources/news/senna.txt +0 -109
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference.txt +0 -35
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/alias.txt +0 -164
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api.txt +0 -18
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/global_configurations.txt +0 -49
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_cache.txt +0 -114
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_column.txt +0 -198
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_command_version.txt +0 -37
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_content_type.txt +0 -39
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_ctx.txt +0 -195
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_db.txt +0 -134
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_encoding.txt +0 -49
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_expr.txt +0 -136
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_geo.txt +0 -55
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_hook.txt +0 -67
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_ii.txt +0 -35
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_index_cursor.txt +0 -44
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_info.txt +0 -56
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_match_escalation.txt +0 -39
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_obj.txt +0 -269
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_proc.txt +0 -56
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_search.txt +0 -31
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_table.txt +0 -219
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_table_cursor.txt +0 -109
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_thread.txt +0 -122
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_type.txt +0 -31
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/grn_user_data.txt +0 -29
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/overview.txt +0 -54
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/api/plugin.txt +0 -156
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/cast.txt +0 -8
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/column.txt +0 -34
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/columns/index.txt +0 -19
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/columns/pseudo.txt +0 -40
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/columns/scalar.txt +0 -19
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/columns/vector.txt +0 -332
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/command.txt +0 -23
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/command/command_version.txt +0 -75
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/command/output_format.txt +0 -228
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/command/pretty_print.txt +0 -45
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/command/request_id.txt +0 -41
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/command/request_timeout.txt +0 -78
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/command/return_code.txt +0 -117
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/cache_limit.txt +0 -87
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/check.txt +0 -161
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/clearlock.txt +0 -60
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_copy.txt +0 -381
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_create.txt +0 -800
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_list.txt +0 -209
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_remove.txt +0 -57
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/column_rename.txt +0 -101
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/config_delete.txt +0 -95
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/config_get.txt +0 -96
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/config_set.txt +0 -96
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/database_unmap.txt +0 -85
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/define_selector.txt +0 -110
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/defrag.txt +0 -55
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/delete.txt +0 -122
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/dump.txt +0 -202
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/io_flush.txt +0 -266
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/load.txt +0 -100
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/lock_acquire.txt +0 -102
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/lock_clear.txt +0 -90
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/lock_release.txt +0 -98
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/log_level.txt +0 -87
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/log_put.txt +0 -65
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/log_reopen.txt +0 -62
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_count.txt +0 -171
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_parameters.txt +0 -134
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_range_filter.txt +0 -195
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_select.txt +0 -1359
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_shard_list.txt +0 -103
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/logical_table_remove.txt +0 -541
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/normalize.txt +0 -155
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/normalizer_list.txt +0 -64
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/object_exist.txt +0 -95
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/object_inspect.txt +0 -899
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/object_list.txt +0 -405
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/object_remove.txt +0 -140
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_register.txt +0 -64
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/plugin_unregister.txt +0 -63
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/query_expand.txt +0 -38
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/quit.txt +0 -38
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/range_filter.txt +0 -28
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/register.txt +0 -69
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/reindex.txt +0 -142
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/request_cancel.txt +0 -134
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_eval.txt +0 -71
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/ruby_load.txt +0 -71
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/schema.txt +0 -627
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/select.txt +0 -2776
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/shutdown.txt +0 -113
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/status.txt +0 -151
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/suggest.txt +0 -271
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_copy.txt +0 -64
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_create.txt +0 -380
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_list.txt +0 -81
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_remove.txt +0 -309
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_rename.txt +0 -90
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/table_tokenize.txt +0 -120
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/thread_limit.txt +0 -110
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/tokenize.txt +0 -248
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/tokenizer_list.txt +0 -63
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/commands/truncate.txt +0 -95
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/configuration.txt +0 -50
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables.txt +0 -14
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/grndb.txt +0 -117
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/grnslap.txt +0 -68
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga-benchmark.txt +0 -287
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga-httpd.txt +0 -552
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga-server-http.txt +0 -57
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga-suggest-create-dataset.txt +0 -63
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga-suggest-httpd.txt +0 -470
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga-suggest-learner.txt +0 -94
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/executables/groonga.txt +0 -473
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/function.txt +0 -20
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/between.txt +0 -105
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/edit_distance.txt +0 -48
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/fuzzy_search.txt +0 -23
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/geo_distance.txt +0 -300
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/geo_in_circle.txt +0 -81
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/geo_in_rectangle.txt +0 -55
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_full.txt +0 -127
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/highlight_html.txt +0 -105
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/html_untag.txt +0 -80
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/in_records.txt +0 -195
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/in_values.txt +0 -82
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/now.txt +0 -36
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/number_classify.txt +0 -20
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/prefix_rk_search.txt +0 -158
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/query.txt +0 -254
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/rand.txt +0 -43
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/snippet_html.txt +0 -114
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/string_length.txt +0 -33
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/string_substring.txt +0 -27
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/sub_filter.txt +0 -137
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/time_classify_day.txt +0 -18
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/time_classify_hour.txt +0 -18
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/time_classify_minute.txt +0 -18
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/time_classify_month.txt +0 -20
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/time_classify_second.txt +0 -18
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/time_classify_week.txt +0 -18
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/time_classify_year.txt +0 -18
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/vector_new.txt +0 -38
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/vector_size.txt +0 -76
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/functions/vector_slice.txt +0 -27
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr.txt +0 -59
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/query_syntax.txt +0 -652
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/grn_expr/script_syntax.txt +0 -1126
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/indexing.txt +0 -112
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/log.txt +0 -236
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/normalizers.txt +0 -133
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/operations.txt +0 -16
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/operations/geolocation_search.txt +0 -52
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/operations/prefix_rk_search.txt +0 -76
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/output.txt +0 -164
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/query_expanders.txt +0 -12
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/query_expanders/tsv.txt +0 -153
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/regular_expression.txt +0 -436
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorer.txt +0 -218
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorers/scorer_tf_at_most.txt +0 -136
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/scorers/scorer_tf_idf.txt +0 -157
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/sharding.txt +0 -104
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/suggest.txt +0 -17
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/suggest/completion.txt +0 -271
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/suggest/correction.txt +0 -148
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/suggest/introduction.txt +0 -96
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/suggest/suggestion.txt +0 -132
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tables.txt +0 -216
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/token_filters.txt +0 -120
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tokenizers.txt +0 -517
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/tuning.txt +0 -177
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/types.txt +0 -170
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/window_function.txt +0 -22
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/window_functions/record_number.txt +0 -28
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/window_functions/window_record_number.txt +0 -25
- data/vendor/local/share/doc/groonga/ja/html/_sources/reference/window_functions/window_sum.txt +0 -25
- data/vendor/local/share/doc/groonga/ja/html/_sources/server.txt +0 -15
- data/vendor/local/share/doc/groonga/ja/html/_sources/server/gqtp.txt +0 -48
- data/vendor/local/share/doc/groonga/ja/html/_sources/server/http.txt +0 -25
- data/vendor/local/share/doc/groonga/ja/html/_sources/server/http/comparison.txt +0 -298
- data/vendor/local/share/doc/groonga/ja/html/_sources/server/http/groonga-httpd.txt +0 -8
- data/vendor/local/share/doc/groonga/ja/html/_sources/server/http/groonga.txt +0 -8
- data/vendor/local/share/doc/groonga/ja/html/_sources/server/memcached.txt +0 -16
- data/vendor/local/share/doc/groonga/ja/html/_sources/server/package.txt +0 -209
- data/vendor/local/share/doc/groonga/ja/html/_sources/spec.txt +0 -13
- data/vendor/local/share/doc/groonga/ja/html/_sources/spec/gqtp.txt +0 -280
- data/vendor/local/share/doc/groonga/ja/html/_sources/spec/search.txt +0 -115
- data/vendor/local/share/doc/groonga/ja/html/_sources/troubleshooting.txt +0 -13
- data/vendor/local/share/doc/groonga/ja/html/_sources/troubleshooting/different_results_with_the_same_keyword.txt +0 -135
- data/vendor/local/share/doc/groonga/ja/html/_sources/troubleshooting/mmap_cannot_allocate_memory.txt +0 -45
- data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial.txt +0 -22
- data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/data.txt +0 -173
- data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/drilldown.txt +0 -130
- data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/index.txt +0 -123
- data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/introduction.txt +0 -294
- data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/lexicon.txt +0 -12
- data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/match_columns.txt +0 -234
- data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/micro_blog.txt +0 -539
- data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/network.txt +0 -64
- data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/patricia_trie.txt +0 -58
- data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/query_expansion.txt +0 -69
- data/vendor/local/share/doc/groonga/ja/html/_sources/tutorial/search.txt +0 -123
@@ -358,24 +358,24 @@ When PCRE is compiled in EBCDIC mode, \a, \e, \f, \n, \r, and \t
|
|
358
358
|
generate the appropriate EBCDIC code values. The \c escape is processed
|
359
359
|
as specified for Perl in the <b>perlebcdic</b> document. The only characters
|
360
360
|
that are allowed after \c are A-Z, a-z, or one of @, [, \, ], ^, _, or ?. Any
|
361
|
-
other character provokes a compile-time error. The sequence
|
362
|
-
character code 0; the letters (in either case) encode characters 1-26
|
363
|
-
to hex 1A); [, \, ], ^, and _ encode characters 27-31 (hex 1B to hex
|
364
|
-
|
361
|
+
other character provokes a compile-time error. The sequence \c@ encodes
|
362
|
+
character code 0; after \c the letters (in either case) encode characters 1-26
|
363
|
+
(hex 01 to hex 1A); [, \, ], ^, and _ encode characters 27-31 (hex 1B to hex
|
364
|
+
1F), and \c? becomes either 255 (hex FF) or 95 (hex 5F).
|
365
365
|
</P>
|
366
366
|
<P>
|
367
|
-
Thus, apart from
|
367
|
+
Thus, apart from \c?, these escapes generate the same character code values as
|
368
368
|
they do in an ASCII environment, though the meanings of the values mostly
|
369
|
-
differ. For example, \
|
369
|
+
differ. For example, \cG always generates code value 7, which is BEL in ASCII
|
370
370
|
but DEL in EBCDIC.
|
371
371
|
</P>
|
372
372
|
<P>
|
373
|
-
The sequence
|
373
|
+
The sequence \c? generates DEL (127, hex 7F) in an ASCII environment, but
|
374
374
|
because 127 is not a control character in EBCDIC, Perl makes it generate the
|
375
375
|
APC character. Unfortunately, there are several variants of EBCDIC. In most of
|
376
376
|
them the APC character has the value 255 (hex FF), but in the one Perl calls
|
377
377
|
POSIX-BC its value is 95 (hex 5F). If certain other characters have POSIX-BC
|
378
|
-
values, PCRE makes
|
378
|
+
values, PCRE makes \c? generate 95; otherwise it generates 255.
|
379
379
|
</P>
|
380
380
|
<P>
|
381
381
|
After \0 up to two further octal digits are read. If there are fewer than two
|
@@ -1512,13 +1512,8 @@ J, U and X respectively.
|
|
1512
1512
|
<P>
|
1513
1513
|
When one of these option changes occurs at top level (that is, not inside
|
1514
1514
|
subpattern parentheses), the change applies to the remainder of the pattern
|
1515
|
-
that follows.
|
1516
|
-
|
1517
|
-
extracted by the <b>pcre_fullinfo()</b> function).
|
1518
|
-
</P>
|
1519
|
-
<P>
|
1520
|
-
An option change within a subpattern (see below for a description of
|
1521
|
-
subpatterns) affects only that part of the subpattern that follows it, so
|
1515
|
+
that follows. An option change within a subpattern (see below for a description
|
1516
|
+
of subpatterns) affects only that part of the subpattern that follows it, so
|
1522
1517
|
<pre>
|
1523
1518
|
(a(?i)b)c
|
1524
1519
|
</pre>
|
@@ -2160,6 +2155,14 @@ capturing is carried out only for positive assertions. (Perl sometimes, but not
|
|
2160
2155
|
always, does do capturing in negative assertions.)
|
2161
2156
|
</P>
|
2162
2157
|
<P>
|
2158
|
+
WARNING: If a positive assertion containing one or more capturing subpatterns
|
2159
|
+
succeeds, but failure to match later in the pattern causes backtracking over
|
2160
|
+
this assertion, the captures within the assertion are reset only if no higher
|
2161
|
+
numbered captures are already set. This is, unfortunately, a fundamental
|
2162
|
+
limitation of the current implementation, and as PCRE1 is now in
|
2163
|
+
maintenance-only status, it is unlikely ever to change.
|
2164
|
+
</P>
|
2165
|
+
<P>
|
2163
2166
|
For compatibility with Perl, assertion subpatterns may be repeated; though
|
2164
2167
|
it makes no sense to assert the same thing several times, the side effect of
|
2165
2168
|
capturing parentheses may occasionally be useful. In practice, there only three
|
@@ -3264,9 +3267,9 @@ Cambridge CB2 3QH, England.
|
|
3264
3267
|
</P>
|
3265
3268
|
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
3266
3269
|
<P>
|
3267
|
-
Last updated:
|
3270
|
+
Last updated: 23 October 2016
|
3268
3271
|
<br>
|
3269
|
-
Copyright © 1997-
|
3272
|
+
Copyright © 1997-2016 University of Cambridge.
|
3270
3273
|
<br>
|
3271
3274
|
<p>
|
3272
3275
|
Return to the <a href="index.html">PCRE index page</a>.
|
@@ -74,6 +74,11 @@ newline as data characters. However, in some Windows environments character 26
|
|
74
74
|
maximum portability, therefore, it is safest to use only ASCII characters in
|
75
75
|
<b>pcretest</b> input files.
|
76
76
|
</P>
|
77
|
+
<P>
|
78
|
+
The input is processed using using C's string functions, so must not
|
79
|
+
contain binary zeroes, even though in Unix-like environments, <b>fgets()</b>
|
80
|
+
treats any bytes other than newline as data characters.
|
81
|
+
</P>
|
77
82
|
<br><a name="SEC3" href="#TOC1">PCRE's 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
|
78
83
|
<P>
|
79
84
|
From release 8.30, two separate PCRE libraries can be built. The original one
|
@@ -1149,9 +1154,9 @@ Cambridge CB2 3QH, England.
|
|
1149
1154
|
</P>
|
1150
1155
|
<br><a name="SEC17" href="#TOC1">REVISION</a><br>
|
1151
1156
|
<P>
|
1152
|
-
Last updated:
|
1157
|
+
Last updated: 23 February 2017
|
1153
1158
|
<br>
|
1154
|
-
Copyright © 1997-
|
1159
|
+
Copyright © 1997-2017 University of Cambridge.
|
1155
1160
|
<br>
|
1156
1161
|
<p>
|
1157
1162
|
Return to the <a href="index.html">PCRE index page</a>.
|
@@ -4640,7 +4640,7 @@ DIFFERENCES BETWEEN PCRE AND PERL
|
|
4640
4640
|
pattern names is not as general as Perl's. This is a consequence of the
|
4641
4641
|
fact the PCRE works internally just with numbers, using an external ta-
|
4642
4642
|
ble to translate between numbers and names. In particular, a pattern
|
4643
|
-
such as (?|(?<a>A)|(?<b
|
4643
|
+
such as (?|(?<a>A)|(?<b>B), where the two capturing parentheses have
|
4644
4644
|
the same number but different names, is not supported, and causes an
|
4645
4645
|
error at compile time. If it were allowed, it would not be possible to
|
4646
4646
|
distinguish which parentheses matched, because both names map to cap-
|
@@ -5028,55 +5028,56 @@ BACKSLASH
|
|
5028
5028
|
ate the appropriate EBCDIC code values. The \c escape is processed as
|
5029
5029
|
specified for Perl in the perlebcdic document. The only characters that
|
5030
5030
|
are allowed after \c are A-Z, a-z, or one of @, [, \, ], ^, _, or ?.
|
5031
|
-
Any other character provokes a
|
5032
|
-
encodes character code 0; the letters (in either case) encode
|
5033
|
-
|
5034
|
-
(hex 1B to hex 1F), and
|
5035
|
-
|
5036
|
-
|
5037
|
-
|
5038
|
-
values
|
5031
|
+
Any other character provokes a compile-time error. The sequence \c@
|
5032
|
+
encodes character code 0; after \c the letters (in either case) encode
|
5033
|
+
characters 1-26 (hex 01 to hex 1A); [, \, ], ^, and _ encode characters
|
5034
|
+
27-31 (hex 1B to hex 1F), and \c? becomes either 255 (hex FF) or 95
|
5035
|
+
(hex 5F).
|
5036
|
+
|
5037
|
+
Thus, apart from \c?, these escapes generate the same character code
|
5038
|
+
values as they do in an ASCII environment, though the meanings of the
|
5039
|
+
values mostly differ. For example, \cG always generates code value 7,
|
5039
5040
|
which is BEL in ASCII but DEL in EBCDIC.
|
5040
5041
|
|
5041
|
-
The
|
5042
|
-
but
|
5043
|
-
generate
|
5044
|
-
of
|
5045
|
-
FF),
|
5046
|
-
certain
|
5042
|
+
The sequence \c? generates DEL (127, hex 7F) in an ASCII environment,
|
5043
|
+
but because 127 is not a control character in EBCDIC, Perl makes it
|
5044
|
+
generate the APC character. Unfortunately, there are several variants
|
5045
|
+
of EBCDIC. In most of them the APC character has the value 255 (hex
|
5046
|
+
FF), but in the one Perl calls POSIX-BC its value is 95 (hex 5F). If
|
5047
|
+
certain other characters have POSIX-BC values, PCRE makes \c? generate
|
5047
5048
|
95; otherwise it generates 255.
|
5048
5049
|
|
5049
|
-
After
|
5050
|
-
than
|
5050
|
+
After \0 up to two further octal digits are read. If there are fewer
|
5051
|
+
than two digits, just those that are present are used. Thus the
|
5051
5052
|
sequence \0\x\015 specifies two binary zeros followed by a CR character
|
5052
5053
|
(code value 13). Make sure you supply two digits after the initial zero
|
5053
5054
|
if the pattern character that follows is itself an octal digit.
|
5054
5055
|
|
5055
|
-
The
|
5056
|
-
in
|
5057
|
-
recent
|
5058
|
-
points
|
5056
|
+
The escape \o must be followed by a sequence of octal digits, enclosed
|
5057
|
+
in braces. An error occurs if this is not the case. This escape is a
|
5058
|
+
recent addition to Perl; it provides way of specifying character code
|
5059
|
+
points as octal numbers greater than 0777, and it also allows octal
|
5059
5060
|
numbers and back references to be unambiguously specified.
|
5060
5061
|
|
5061
5062
|
For greater clarity and unambiguity, it is best to avoid following \ by
|
5062
5063
|
a digit greater than zero. Instead, use \o{} or \x{} to specify charac-
|
5063
|
-
ter
|
5064
|
+
ter numbers, and \g{} to specify back references. The following para-
|
5064
5065
|
graphs describe the old, ambiguous syntax.
|
5065
5066
|
|
5066
5067
|
The handling of a backslash followed by a digit other than 0 is compli-
|
5067
|
-
cated,
|
5068
|
+
cated, and Perl has changed in recent releases, causing PCRE also to
|
5068
5069
|
change. Outside a character class, PCRE reads the digit and any follow-
|
5069
|
-
ing
|
5070
|
-
there
|
5071
|
-
in
|
5072
|
-
description
|
5070
|
+
ing digits as a decimal number. If the number is less than 8, or if
|
5071
|
+
there have been at least that many previous capturing left parentheses
|
5072
|
+
in the expression, the entire sequence is taken as a back reference. A
|
5073
|
+
description of how this works is given later, following the discussion
|
5073
5074
|
of parenthesized subpatterns.
|
5074
5075
|
|
5075
|
-
Inside
|
5076
|
+
Inside a character class, or if the decimal number following \ is
|
5076
5077
|
greater than 7 and there have not been that many capturing subpatterns,
|
5077
|
-
PCRE
|
5078
|
+
PCRE handles \8 and \9 as the literal characters "8" and "9", and oth-
|
5078
5079
|
erwise re-reads up to three octal digits following the backslash, using
|
5079
|
-
them
|
5080
|
+
them to generate a data character. Any subsequent digits stand for
|
5080
5081
|
themselves. For example:
|
5081
5082
|
|
5082
5083
|
\040 is another way of writing an ASCII space
|
@@ -5094,31 +5095,31 @@ BACKSLASH
|
|
5094
5095
|
\81 is either a back reference, or the two
|
5095
5096
|
characters "8" and "1"
|
5096
5097
|
|
5097
|
-
Note
|
5098
|
-
syntax
|
5098
|
+
Note that octal values of 100 or greater that are specified using this
|
5099
|
+
syntax must not be introduced by a leading zero, because no more than
|
5099
5100
|
three octal digits are ever read.
|
5100
5101
|
|
5101
|
-
By
|
5102
|
-
decimal
|
5102
|
+
By default, after \x that is not followed by {, from zero to two hexa-
|
5103
|
+
decimal digits are read (letters can be in upper or lower case). Any
|
5103
5104
|
number of hexadecimal digits may appear between \x{ and }. If a charac-
|
5104
|
-
ter
|
5105
|
+
ter other than a hexadecimal digit appears between \x{ and }, or if
|
5105
5106
|
there is no terminating }, an error occurs.
|
5106
5107
|
|
5107
|
-
If
|
5108
|
-
is
|
5109
|
-
its.
|
5108
|
+
If the PCRE_JAVASCRIPT_COMPAT option is set, the interpretation of \x
|
5109
|
+
is as just described only when it is followed by two hexadecimal dig-
|
5110
|
+
its. Otherwise, it matches a literal "x" character. In JavaScript
|
5110
5111
|
mode, support for code points greater than 256 is provided by \u, which
|
5111
|
-
must
|
5112
|
+
must be followed by four hexadecimal digits; otherwise it matches a
|
5112
5113
|
literal "u" character.
|
5113
5114
|
|
5114
5115
|
Characters whose value is less than 256 can be defined by either of the
|
5115
|
-
two
|
5116
|
+
two syntaxes for \x (or by \u in JavaScript mode). There is no differ-
|
5116
5117
|
ence in the way they are handled. For example, \xdc is exactly the same
|
5117
5118
|
as \x{dc} (or \u00dc in JavaScript mode).
|
5118
5119
|
|
5119
5120
|
Constraints on character values
|
5120
5121
|
|
5121
|
-
Characters
|
5122
|
+
Characters that are specified using octal or hexadecimal numbers are
|
5122
5123
|
limited to certain values, as follows:
|
5123
5124
|
|
5124
5125
|
8-bit non-UTF mode less than 0x100
|
@@ -5128,44 +5129,44 @@ BACKSLASH
|
|
5128
5129
|
32-bit non-UTF mode less than 0x100000000
|
5129
5130
|
32-bit UTF-32 mode less than 0x10ffff and a valid codepoint
|
5130
5131
|
|
5131
|
-
Invalid
|
5132
|
+
Invalid Unicode codepoints are the range 0xd800 to 0xdfff (the so-
|
5132
5133
|
called "surrogate" codepoints), and 0xffef.
|
5133
5134
|
|
5134
5135
|
Escape sequences in character classes
|
5135
5136
|
|
5136
5137
|
All the sequences that define a single character value can be used both
|
5137
|
-
inside
|
5138
|
+
inside and outside character classes. In addition, inside a character
|
5138
5139
|
class, \b is interpreted as the backspace character (hex 08).
|
5139
5140
|
|
5140
|
-
\N
|
5141
|
-
inside
|
5142
|
-
they
|
5143
|
-
default,
|
5141
|
+
\N is not allowed in a character class. \B, \R, and \X are not special
|
5142
|
+
inside a character class. Like other unrecognized escape sequences,
|
5143
|
+
they are treated as the literal characters "B", "R", and "X" by
|
5144
|
+
default, but cause an error if the PCRE_EXTRA option is set. Outside a
|
5144
5145
|
character class, these sequences have different meanings.
|
5145
5146
|
|
5146
5147
|
Unsupported escape sequences
|
5147
5148
|
|
5148
|
-
In
|
5149
|
-
handler
|
5150
|
-
default,
|
5151
|
-
PCRE_JAVASCRIPT_COMPAT
|
5149
|
+
In Perl, the sequences \l, \L, \u, and \U are recognized by its string
|
5150
|
+
handler and used to modify the case of following characters. By
|
5151
|
+
default, PCRE does not support these escape sequences. However, if the
|
5152
|
+
PCRE_JAVASCRIPT_COMPAT option is set, \U matches a "U" character, and
|
5152
5153
|
\u can be used to define a character by code point, as described in the
|
5153
5154
|
previous section.
|
5154
5155
|
|
5155
5156
|
Absolute and relative back references
|
5156
5157
|
|
5157
|
-
The
|
5158
|
-
ally
|
5158
|
+
The sequence \g followed by an unsigned or a negative number, option-
|
5159
|
+
ally enclosed in braces, is an absolute or relative back reference. A
|
5159
5160
|
named back reference can be coded as \g{name}. Back references are dis-
|
5160
5161
|
cussed later, following the discussion of parenthesized subpatterns.
|
5161
5162
|
|
5162
5163
|
Absolute and relative subroutine calls
|
5163
5164
|
|
5164
|
-
For
|
5165
|
+
For compatibility with Oniguruma, the non-Perl syntax \g followed by a
|
5165
5166
|
name or a number enclosed either in angle brackets or single quotes, is
|
5166
|
-
an
|
5167
|
-
Details
|
5168
|
-
\g<...>
|
5167
|
+
an alternative syntax for referencing a subpattern as a "subroutine".
|
5168
|
+
Details are discussed later. Note that \g{...} (Perl syntax) and
|
5169
|
+
\g<...> (Oniguruma syntax) are not synonymous. The former is a back
|
5169
5170
|
reference; the latter is a subroutine call.
|
5170
5171
|
|
5171
5172
|
Generic character types
|
@@ -5184,59 +5185,59 @@ BACKSLASH
|
|
5184
5185
|
\W any "non-word" character
|
5185
5186
|
|
5186
5187
|
There is also the single sequence \N, which matches a non-newline char-
|
5187
|
-
acter.
|
5188
|
-
not
|
5188
|
+
acter. This is the same as the "." metacharacter when PCRE_DOTALL is
|
5189
|
+
not set. Perl also uses \N to match characters by name; PCRE does not
|
5189
5190
|
support this.
|
5190
5191
|
|
5191
|
-
Each
|
5192
|
-
plete
|
5193
|
-
matches
|
5194
|
-
inside
|
5195
|
-
the
|
5196
|
-
the
|
5192
|
+
Each pair of lower and upper case escape sequences partitions the com-
|
5193
|
+
plete set of characters into two disjoint sets. Any given character
|
5194
|
+
matches one, and only one, of each pair. The sequences can appear both
|
5195
|
+
inside and outside character classes. They each match one character of
|
5196
|
+
the appropriate type. If the current matching point is at the end of
|
5197
|
+
the subject string, all of them fail, because there is no character to
|
5197
5198
|
match.
|
5198
5199
|
|
5199
|
-
For
|
5200
|
-
(code
|
5201
|
-
However,
|
5202
|
-
release
|
5203
|
-
(11),
|
5200
|
+
For compatibility with Perl, \s did not used to match the VT character
|
5201
|
+
(code 11), which made it different from the the POSIX "space" class.
|
5202
|
+
However, Perl added VT at release 5.18, and PCRE followed suit at
|
5203
|
+
release 8.34. The default \s characters are now HT (9), LF (10), VT
|
5204
|
+
(11), FF (12), CR (13), and space (32), which are defined as white
|
5204
5205
|
space in the "C" locale. This list may vary if locale-specific matching
|
5205
|
-
is
|
5206
|
-
character
|
5206
|
+
is taking place. For example, in some locales the "non-breaking space"
|
5207
|
+
character (\xA0) is recognized as white space, and in others the VT
|
5207
5208
|
character is not.
|
5208
5209
|
|
5209
|
-
A
|
5210
|
-
or
|
5211
|
-
trolled
|
5212
|
-
specific
|
5213
|
-
page).
|
5214
|
-
systems,
|
5215
|
-
are
|
5210
|
+
A "word" character is an underscore or any character that is a letter
|
5211
|
+
or digit. By default, the definition of letters and digits is con-
|
5212
|
+
trolled by PCRE's low-valued character tables, and may vary if locale-
|
5213
|
+
specific matching is taking place (see "Locale support" in the pcreapi
|
5214
|
+
page). For example, in a French locale such as "fr_FR" in Unix-like
|
5215
|
+
systems, or "french" in Windows, some character codes greater than 127
|
5216
|
+
are used for accented letters, and these are then matched by \w. The
|
5216
5217
|
use of locales with Unicode is discouraged.
|
5217
5218
|
|
5218
|
-
By
|
5219
|
+
By default, characters whose code points are greater than 127 never
|
5219
5220
|
match \d, \s, or \w, and always match \D, \S, and \W, although this may
|
5220
|
-
vary
|
5221
|
-
is
|
5222
|
-
from
|
5223
|
-
sons.
|
5224
|
-
PCRE_UCP
|
5221
|
+
vary for characters in the range 128-255 when locale-specific matching
|
5222
|
+
is happening. These escape sequences retain their original meanings
|
5223
|
+
from before Unicode support was available, mainly for efficiency rea-
|
5224
|
+
sons. If PCRE is compiled with Unicode property support, and the
|
5225
|
+
PCRE_UCP option is set, the behaviour is changed so that Unicode prop-
|
5225
5226
|
erties are used to determine character types, as follows:
|
5226
5227
|
|
5227
5228
|
\d any character that matches \p{Nd} (decimal digit)
|
5228
5229
|
\s any character that matches \p{Z} or \h or \v
|
5229
5230
|
\w any character that matches \p{L} or \p{N}, plus underscore
|
5230
5231
|
|
5231
|
-
The
|
5232
|
-
\d
|
5233
|
-
as
|
5234
|
-
affects
|
5232
|
+
The upper case escapes match the inverse sets of characters. Note that
|
5233
|
+
\d matches only decimal digits, whereas \w matches any Unicode digit,
|
5234
|
+
as well as any Unicode letter, and underscore. Note also that PCRE_UCP
|
5235
|
+
affects \b, and \B because they are defined in terms of \w and \W.
|
5235
5236
|
Matching these sequences is noticeably slower when PCRE_UCP is set.
|
5236
5237
|
|
5237
|
-
The
|
5238
|
-
at
|
5239
|
-
ASCII
|
5238
|
+
The sequences \h, \H, \v, and \V are features that were added to Perl
|
5239
|
+
at release 5.10. In contrast to the other sequences, which match only
|
5240
|
+
ASCII characters by default, these always match certain high-valued
|
5240
5241
|
code points, whether or not PCRE_UCP is set. The horizontal space char-
|
5241
5242
|
acters are:
|
5242
5243
|
|
@@ -5275,110 +5276,110 @@ BACKSLASH
|
|
5275
5276
|
|
5276
5277
|
Newline sequences
|
5277
5278
|
|
5278
|
-
Outside
|
5279
|
-
any
|
5279
|
+
Outside a character class, by default, the escape sequence \R matches
|
5280
|
+
any Unicode newline sequence. In 8-bit non-UTF-8 mode \R is equivalent
|
5280
5281
|
to the following:
|
5281
5282
|
|
5282
5283
|
(?>\r\n|\n|\x0b|\f|\r|\x85)
|
5283
5284
|
|
5284
|
-
This
|
5285
|
+
This is an example of an "atomic group", details of which are given
|
5285
5286
|
below. This particular group matches either the two-character sequence
|
5286
|
-
CR
|
5287
|
-
U+000A),
|
5288
|
-
riage
|
5287
|
+
CR followed by LF, or one of the single characters LF (linefeed,
|
5288
|
+
U+000A), VT (vertical tab, U+000B), FF (form feed, U+000C), CR (car-
|
5289
|
+
riage return, U+000D), or NEL (next line, U+0085). The two-character
|
5289
5290
|
sequence is treated as a single unit that cannot be split.
|
5290
5291
|
|
5291
|
-
In
|
5292
|
+
In other modes, two additional characters whose codepoints are greater
|
5292
5293
|
than 255 are added: LS (line separator, U+2028) and PS (paragraph sepa-
|
5293
|
-
rator,
|
5294
|
+
rator, U+2029). Unicode character property support is not needed for
|
5294
5295
|
these characters to be recognized.
|
5295
5296
|
|
5296
5297
|
It is possible to restrict \R to match only CR, LF, or CRLF (instead of
|
5297
|
-
the
|
5298
|
+
the complete set of Unicode line endings) by setting the option
|
5298
5299
|
PCRE_BSR_ANYCRLF either at compile time or when the pattern is matched.
|
5299
5300
|
(BSR is an abbrevation for "backslash R".) This can be made the default
|
5300
|
-
when
|
5301
|
-
requested
|
5302
|
-
specify
|
5301
|
+
when PCRE is built; if this is the case, the other behaviour can be
|
5302
|
+
requested via the PCRE_BSR_UNICODE option. It is also possible to
|
5303
|
+
specify these settings by starting a pattern string with one of the
|
5303
5304
|
following sequences:
|
5304
5305
|
|
5305
5306
|
(*BSR_ANYCRLF) CR, LF, or CRLF only
|
5306
5307
|
(*BSR_UNICODE) any Unicode newline sequence
|
5307
5308
|
|
5308
5309
|
These override the default and the options given to the compiling func-
|
5309
|
-
tion,
|
5310
|
-
matching
|
5311
|
-
Perl-compatible,
|
5312
|
-
and
|
5313
|
-
present,
|
5310
|
+
tion, but they can themselves be overridden by options given to a
|
5311
|
+
matching function. Note that these special settings, which are not
|
5312
|
+
Perl-compatible, are recognized only at the very start of a pattern,
|
5313
|
+
and that they must be in upper case. If more than one of them is
|
5314
|
+
present, the last one is used. They can be combined with a change of
|
5314
5315
|
newline convention; for example, a pattern can start with:
|
5315
5316
|
|
5316
5317
|
(*ANY)(*BSR_ANYCRLF)
|
5317
5318
|
|
5318
|
-
They
|
5319
|
+
They can also be combined with the (*UTF8), (*UTF16), (*UTF32), (*UTF)
|
5319
5320
|
or (*UCP) special sequences. Inside a character class, \R is treated as
|
5320
|
-
an
|
5321
|
+
an unrecognized escape sequence, and so matches the letter "R" by
|
5321
5322
|
default, but causes an error if PCRE_EXTRA is set.
|
5322
5323
|
|
5323
5324
|
Unicode character properties
|
5324
5325
|
|
5325
5326
|
When PCRE is built with Unicode character property support, three addi-
|
5326
|
-
tional
|
5327
|
-
are
|
5328
|
-
course
|
5327
|
+
tional escape sequences that match characters with specific properties
|
5328
|
+
are available. When in 8-bit non-UTF-8 mode, these sequences are of
|
5329
|
+
course limited to testing characters whose codepoints are less than
|
5329
5330
|
256, but they do work in this mode. The extra escape sequences are:
|
5330
5331
|
|
5331
5332
|
\p{xx} a character with the xx property
|
5332
5333
|
\P{xx} a character without the xx property
|
5333
5334
|
\X a Unicode extended grapheme cluster
|
5334
5335
|
|
5335
|
-
The
|
5336
|
+
The property names represented by xx above are limited to the Unicode
|
5336
5337
|
script names, the general category properties, "Any", which matches any
|
5337
|
-
character
|
5338
|
-
(described
|
5339
|
-
sicalSymbols"
|
5338
|
+
character (including newline), and some special PCRE properties
|
5339
|
+
(described in the next section). Other Perl properties such as "InMu-
|
5340
|
+
sicalSymbols" are not currently supported by PCRE. Note that \P{Any}
|
5340
5341
|
does not match any characters, so always causes a match failure.
|
5341
5342
|
|
5342
5343
|
Sets of Unicode characters are defined as belonging to certain scripts.
|
5343
|
-
A
|
5344
|
+
A character from one of these sets can be matched using a script name.
|
5344
5345
|
For example:
|
5345
5346
|
|
5346
5347
|
\p{Greek}
|
5347
5348
|
\P{Han}
|
5348
5349
|
|
5349
|
-
Those
|
5350
|
+
Those that are not part of an identified script are lumped together as
|
5350
5351
|
"Common". The current list of scripts is:
|
5351
5352
|
|
5352
|
-
Arabic,
|
5353
|
-
Bopomofo,
|
5353
|
+
Arabic, Armenian, Avestan, Balinese, Bamum, Bassa_Vah, Batak, Bengali,
|
5354
|
+
Bopomofo, Brahmi, Braille, Buginese, Buhid, Canadian_Aboriginal, Car-
|
5354
5355
|
ian, Caucasian_Albanian, Chakma, Cham, Cherokee, Common, Coptic, Cunei-
|
5355
5356
|
form, Cypriot, Cyrillic, Deseret, Devanagari, Duployan, Egyptian_Hiero-
|
5356
5357
|
glyphs, Elbasan, Ethiopic, Georgian, Glagolitic, Gothic, Grantha,
|
5357
|
-
Greek,
|
5358
|
-
Imperial_Aramaic,
|
5359
|
-
tional_Parthian,
|
5360
|
-
Kharoshthi,
|
5361
|
-
ear_A,
|
5362
|
-
Manichaean,
|
5363
|
-
Meroitic_Hieroglyphs,
|
5364
|
-
New_Tai_Lue,
|
5358
|
+
Greek, Gujarati, Gurmukhi, Han, Hangul, Hanunoo, Hebrew, Hiragana,
|
5359
|
+
Imperial_Aramaic, Inherited, Inscriptional_Pahlavi, Inscrip-
|
5360
|
+
tional_Parthian, Javanese, Kaithi, Kannada, Katakana, Kayah_Li,
|
5361
|
+
Kharoshthi, Khmer, Khojki, Khudawadi, Lao, Latin, Lepcha, Limbu, Lin-
|
5362
|
+
ear_A, Linear_B, Lisu, Lycian, Lydian, Mahajani, Malayalam, Mandaic,
|
5363
|
+
Manichaean, Meetei_Mayek, Mende_Kikakui, Meroitic_Cursive,
|
5364
|
+
Meroitic_Hieroglyphs, Miao, Modi, Mongolian, Mro, Myanmar, Nabataean,
|
5365
|
+
New_Tai_Lue, Nko, Ogham, Ol_Chiki, Old_Italic, Old_North_Arabian,
|
5365
5366
|
Old_Permic, Old_Persian, Old_South_Arabian, Old_Turkic, Oriya, Osmanya,
|
5366
5367
|
Pahawh_Hmong, Palmyrene, Pau_Cin_Hau, Phags_Pa, Phoenician,
|
5367
|
-
Psalter_Pahlavi,
|
5368
|
-
vian,
|
5369
|
-
Tagalog,
|
5370
|
-
Thaana,
|
5368
|
+
Psalter_Pahlavi, Rejang, Runic, Samaritan, Saurashtra, Sharada, Sha-
|
5369
|
+
vian, Siddham, Sinhala, Sora_Sompeng, Sundanese, Syloti_Nagri, Syriac,
|
5370
|
+
Tagalog, Tagbanwa, Tai_Le, Tai_Tham, Tai_Viet, Takri, Tamil, Telugu,
|
5371
|
+
Thaana, Thai, Tibetan, Tifinagh, Tirhuta, Ugaritic, Vai, Warang_Citi,
|
5371
5372
|
Yi.
|
5372
5373
|
|
5373
5374
|
Each character has exactly one Unicode general category property, spec-
|
5374
|
-
ified
|
5375
|
-
tion
|
5376
|
-
brace
|
5375
|
+
ified by a two-letter abbreviation. For compatibility with Perl, nega-
|
5376
|
+
tion can be specified by including a circumflex between the opening
|
5377
|
+
brace and the property name. For example, \p{^Lu} is the same as
|
5377
5378
|
\P{Lu}.
|
5378
5379
|
|
5379
5380
|
If only one letter is specified with \p or \P, it includes all the gen-
|
5380
|
-
eral
|
5381
|
-
the
|
5381
|
+
eral category properties that start with that letter. In this case, in
|
5382
|
+
the absence of negation, the curly brackets in the escape sequence are
|
5382
5383
|
optional; these two examples have the same effect:
|
5383
5384
|
|
5384
5385
|
\p{L}
|
@@ -5430,73 +5431,73 @@ BACKSLASH
|
|
5430
5431
|
Zp Paragraph separator
|
5431
5432
|
Zs Space separator
|
5432
5433
|
|
5433
|
-
The
|
5434
|
-
has
|
5434
|
+
The special property L& is also supported: it matches a character that
|
5435
|
+
has the Lu, Ll, or Lt property, in other words, a letter that is not
|
5435
5436
|
classified as a modifier or "other".
|
5436
5437
|
|
5437
|
-
The
|
5438
|
-
U+D800
|
5439
|
-
so
|
5438
|
+
The Cs (Surrogate) property applies only to characters in the range
|
5439
|
+
U+D800 to U+DFFF. Such characters are not valid in Unicode strings and
|
5440
|
+
so cannot be tested by PCRE, unless UTF validity checking has been
|
5440
5441
|
turned off (see the discussion of PCRE_NO_UTF8_CHECK,
|
5441
|
-
PCRE_NO_UTF16_CHECK
|
5442
|
+
PCRE_NO_UTF16_CHECK and PCRE_NO_UTF32_CHECK in the pcreapi page). Perl
|
5442
5443
|
does not support the Cs property.
|
5443
5444
|
|
5444
|
-
The
|
5445
|
-
\p{Letter})
|
5445
|
+
The long synonyms for property names that Perl supports (such as
|
5446
|
+
\p{Letter}) are not supported by PCRE, nor is it permitted to prefix
|
5446
5447
|
any of these properties with "Is".
|
5447
5448
|
|
5448
5449
|
No character that is in the Unicode table has the Cn (unassigned) prop-
|
5449
5450
|
erty. Instead, this property is assumed for any code point that is not
|
5450
5451
|
in the Unicode table.
|
5451
5452
|
|
5452
|
-
Specifying
|
5453
|
-
For
|
5453
|
+
Specifying caseless matching does not affect these escape sequences.
|
5454
|
+
For example, \p{Lu} always matches only upper case letters. This is
|
5454
5455
|
different from the behaviour of current versions of Perl.
|
5455
5456
|
|
5456
|
-
Matching
|
5457
|
-
to
|
5457
|
+
Matching characters by Unicode property is not fast, because PCRE has
|
5458
|
+
to do a multistage table lookup in order to find a character's prop-
|
5458
5459
|
erty. That is why the traditional escape sequences such as \d and \w do
|
5459
5460
|
not use Unicode properties in PCRE by default, though you can make them
|
5460
|
-
do
|
5461
|
+
do so by setting the PCRE_UCP option or by starting the pattern with
|
5461
5462
|
(*UCP).
|
5462
5463
|
|
5463
5464
|
Extended grapheme clusters
|
5464
5465
|
|
5465
|
-
The
|
5466
|
+
The \X escape matches any number of Unicode characters that form an
|
5466
5467
|
"extended grapheme cluster", and treats the sequence as an atomic group
|
5467
|
-
(see
|
5468
|
+
(see below). Up to and including release 8.31, PCRE matched an ear-
|
5468
5469
|
lier, simpler definition that was equivalent to
|
5469
5470
|
|
5470
5471
|
(?>\PM\pM*)
|
5471
5472
|
|
5472
|
-
That
|
5473
|
-
by
|
5474
|
-
the
|
5473
|
+
That is, it matched a character without the "mark" property, followed
|
5474
|
+
by zero or more characters with the "mark" property. Characters with
|
5475
|
+
the "mark" property are typically non-spacing accents that affect the
|
5475
5476
|
preceding character.
|
5476
5477
|
|
5477
|
-
This
|
5478
|
-
cated
|
5479
|
-
breaking
|
5480
|
-
define
|
5478
|
+
This simple definition was extended in Unicode to include more compli-
|
5479
|
+
cated kinds of composite character by giving each character a grapheme
|
5480
|
+
breaking property, and creating rules that use these properties to
|
5481
|
+
define the boundaries of extended grapheme clusters. In releases of
|
5481
5482
|
PCRE later than 8.31, \X matches one of these clusters.
|
5482
5483
|
|
5483
|
-
\X
|
5484
|
+
\X always matches at least one character. Then it decides whether to
|
5484
5485
|
add additional characters according to the following rules for ending a
|
5485
5486
|
cluster:
|
5486
5487
|
|
5487
5488
|
1. End at the end of the subject string.
|
5488
5489
|
|
5489
|
-
2.
|
5490
|
+
2. Do not end between CR and LF; otherwise end after any control char-
|
5490
5491
|
acter.
|
5491
5492
|
|
5492
|
-
3.
|
5493
|
-
characters
|
5494
|
-
be
|
5493
|
+
3. Do not break Hangul (a Korean script) syllable sequences. Hangul
|
5494
|
+
characters are of five types: L, V, T, LV, and LVT. An L character may
|
5495
|
+
be followed by an L, V, LV, or LVT character; an LV or V character may
|
5495
5496
|
be followed by a V or T character; an LVT or T character may be follwed
|
5496
5497
|
only by a T character.
|
5497
5498
|
|
5498
|
-
4.
|
5499
|
-
with
|
5499
|
+
4. Do not end before extending characters or spacing marks. Characters
|
5500
|
+
with the "mark" property always have the "extend" grapheme breaking
|
5500
5501
|
property.
|
5501
5502
|
|
5502
5503
|
5. Do not end after prepend characters.
|
@@ -5505,9 +5506,9 @@ BACKSLASH
|
|
5505
5506
|
|
5506
5507
|
PCRE's additional properties
|
5507
5508
|
|
5508
|
-
As
|
5509
|
-
ports
|
5510
|
-
sequences
|
5509
|
+
As well as the standard Unicode properties described above, PCRE sup-
|
5510
|
+
ports four more that make it possible to convert traditional escape
|
5511
|
+
sequences such as \w and \s to use Unicode properties. PCRE uses these
|
5511
5512
|
non-standard, non-Perl properties internally when PCRE_UCP is set. How-
|
5512
5513
|
ever, they may also be used explicitly. These properties are:
|
5513
5514
|
|
@@ -5516,54 +5517,54 @@ BACKSLASH
|
|
5516
5517
|
Xsp Any Perl space character
|
5517
5518
|
Xwd Any Perl "word" character
|
5518
5519
|
|
5519
|
-
Xan
|
5520
|
-
ber)
|
5521
|
-
form
|
5522
|
-
(separator)
|
5523
|
-
tical
|
5524
|
-
lowed
|
5520
|
+
Xan matches characters that have either the L (letter) or the N (num-
|
5521
|
+
ber) property. Xps matches the characters tab, linefeed, vertical tab,
|
5522
|
+
form feed, or carriage return, and any other character that has the Z
|
5523
|
+
(separator) property. Xsp is the same as Xps; it used to exclude ver-
|
5524
|
+
tical tab, for Perl compatibility, but Perl changed, and so PCRE fol-
|
5525
|
+
lowed at release 8.34. Xwd matches the same characters as Xan, plus
|
5525
5526
|
underscore.
|
5526
5527
|
|
5527
|
-
There
|
5528
|
-
ter
|
5529
|
-
other
|
5530
|
-
accent),
|
5531
|
-
equal
|
5532
|
-
most
|
5533
|
-
are
|
5528
|
+
There is another non-standard property, Xuc, which matches any charac-
|
5529
|
+
ter that can be represented by a Universal Character Name in C++ and
|
5530
|
+
other programming languages. These are the characters $, @, ` (grave
|
5531
|
+
accent), and all characters with Unicode code points greater than or
|
5532
|
+
equal to U+00A0, except for the surrogates U+D800 to U+DFFF. Note that
|
5533
|
+
most base (ASCII) characters are excluded. (Universal Character Names
|
5534
|
+
are of the form \uHHHH or \UHHHHHHHH where H is a hexadecimal digit.
|
5534
5535
|
Note that the Xuc property does not match these sequences but the char-
|
5535
5536
|
acters that they represent.)
|
5536
5537
|
|
5537
5538
|
Resetting the match start
|
5538
5539
|
|
5539
|
-
The
|
5540
|
+
The escape sequence \K causes any previously matched characters not to
|
5540
5541
|
be included in the final matched sequence. For example, the pattern:
|
5541
5542
|
|
5542
5543
|
foo\Kbar
|
5543
5544
|
|
5544
|
-
matches
|
5545
|
-
is
|
5546
|
-
this
|
5547
|
-
to
|
5548
|
-
not
|
5545
|
+
matches "foobar", but reports that it has matched "bar". This feature
|
5546
|
+
is similar to a lookbehind assertion (described below). However, in
|
5547
|
+
this case, the part of the subject before the real match does not have
|
5548
|
+
to be of fixed length, as lookbehind assertions do. The use of \K does
|
5549
|
+
not interfere with the setting of captured substrings. For example,
|
5549
5550
|
when the pattern
|
5550
5551
|
|
5551
5552
|
(foo)\Kbar
|
5552
5553
|
|
5553
5554
|
matches "foobar", the first substring is still set to "foo".
|
5554
5555
|
|
5555
|
-
Perl
|
5556
|
-
defined".
|
5557
|
-
assertions,
|
5558
|
-
pattern
|
5556
|
+
Perl documents that the use of \K within assertions is "not well
|
5557
|
+
defined". In PCRE, \K is acted upon when it occurs inside positive
|
5558
|
+
assertions, but is ignored in negative assertions. Note that when a
|
5559
|
+
pattern such as (?=ab\K) matches, the reported start of the match can
|
5559
5560
|
be greater than the end of the match.
|
5560
5561
|
|
5561
5562
|
Simple assertions
|
5562
5563
|
|
5563
|
-
The
|
5564
|
-
tion
|
5565
|
-
a
|
5566
|
-
use
|
5564
|
+
The final use of backslash is for certain simple assertions. An asser-
|
5565
|
+
tion specifies a condition that has to be met at a particular point in
|
5566
|
+
a match, without consuming any characters from the subject string. The
|
5567
|
+
use of subpatterns for more complicated assertions is described below.
|
5567
5568
|
The backslashed assertions are:
|
5568
5569
|
|
5569
5570
|
\b matches at a word boundary
|
@@ -5574,161 +5575,161 @@ BACKSLASH
|
|
5574
5575
|
\z matches only at the end of the subject
|
5575
5576
|
\G matches at the first matching position in the subject
|
5576
5577
|
|
5577
|
-
Inside
|
5578
|
-
backspace
|
5579
|
-
character
|
5578
|
+
Inside a character class, \b has a different meaning; it matches the
|
5579
|
+
backspace character. If any other of these assertions appears in a
|
5580
|
+
character class, by default it matches the corresponding literal char-
|
5580
5581
|
acter (for example, \B matches the letter B). However, if the
|
5581
|
-
PCRE_EXTRA
|
5582
|
+
PCRE_EXTRA option is set, an "invalid escape sequence" error is gener-
|
5582
5583
|
ated instead.
|
5583
5584
|
|
5584
|
-
A
|
5585
|
-
character
|
5586
|
-
one
|
5587
|
-
string
|
5588
|
-
UTF
|
5589
|
-
PCRE_UCP
|
5590
|
-
PCRE
|
5591
|
-
quence.
|
5585
|
+
A word boundary is a position in the subject string where the current
|
5586
|
+
character and the previous character do not both match \w or \W (i.e.
|
5587
|
+
one matches \w and the other matches \W), or the start or end of the
|
5588
|
+
string if the first or last character matches \w, respectively. In a
|
5589
|
+
UTF mode, the meanings of \w and \W can be changed by setting the
|
5590
|
+
PCRE_UCP option. When this is done, it also affects \b and \B. Neither
|
5591
|
+
PCRE nor Perl has a separate "start of word" or "end of word" metase-
|
5592
|
+
quence. However, whatever follows \b normally determines which it is.
|
5592
5593
|
For example, the fragment \ba matches "a" at the start of a word.
|
5593
5594
|
|
5594
|
-
The
|
5595
|
+
The \A, \Z, and \z assertions differ from the traditional circumflex
|
5595
5596
|
and dollar (described in the next section) in that they only ever match
|
5596
|
-
at
|
5597
|
-
set.
|
5597
|
+
at the very start and end of the subject string, whatever options are
|
5598
|
+
set. Thus, they are independent of multiline mode. These three asser-
|
5598
5599
|
tions are not affected by the PCRE_NOTBOL or PCRE_NOTEOL options, which
|
5599
|
-
affect
|
5600
|
-
However,
|
5600
|
+
affect only the behaviour of the circumflex and dollar metacharacters.
|
5601
|
+
However, if the startoffset argument of pcre_exec() is non-zero, indi-
|
5601
5602
|
cating that matching is to start at a point other than the beginning of
|
5602
|
-
the
|
5603
|
+
the subject, \A can never match. The difference between \Z and \z is
|
5603
5604
|
that \Z matches before a newline at the end of the string as well as at
|
5604
5605
|
the very end, whereas \z matches only at the end.
|
5605
5606
|
|
5606
|
-
The
|
5607
|
-
the
|
5608
|
-
of
|
5609
|
-
non-zero.
|
5607
|
+
The \G assertion is true only when the current matching position is at
|
5608
|
+
the start point of the match, as specified by the startoffset argument
|
5609
|
+
of pcre_exec(). It differs from \A when the value of startoffset is
|
5610
|
+
non-zero. By calling pcre_exec() multiple times with appropriate argu-
|
5610
5611
|
ments, you can mimic Perl's /g option, and it is in this kind of imple-
|
5611
5612
|
mentation where \G can be useful.
|
5612
5613
|
|
5613
|
-
Note,
|
5614
|
+
Note, however, that PCRE's interpretation of \G, as the start of the
|
5614
5615
|
current match, is subtly different from Perl's, which defines it as the
|
5615
|
-
end
|
5616
|
-
previously
|
5616
|
+
end of the previous match. In Perl, these can be different when the
|
5617
|
+
previously matched string was empty. Because PCRE does just one match
|
5617
5618
|
at a time, it cannot reproduce this behaviour.
|
5618
5619
|
|
5619
|
-
If
|
5620
|
+
If all the alternatives of a pattern begin with \G, the expression is
|
5620
5621
|
anchored to the starting match position, and the "anchored" flag is set
|
5621
5622
|
in the compiled regular expression.
|
5622
5623
|
|
5623
5624
|
|
5624
5625
|
CIRCUMFLEX AND DOLLAR
|
5625
5626
|
|
5626
|
-
The
|
5627
|
-
That
|
5627
|
+
The circumflex and dollar metacharacters are zero-width assertions.
|
5628
|
+
That is, they test for a particular condition being true without con-
|
5628
5629
|
suming any characters from the subject string.
|
5629
5630
|
|
5630
5631
|
Outside a character class, in the default matching mode, the circumflex
|
5631
|
-
character
|
5632
|
-
point
|
5633
|
-
ment
|
5634
|
-
PCRE_MULTILINE
|
5632
|
+
character is an assertion that is true only if the current matching
|
5633
|
+
point is at the start of the subject string. If the startoffset argu-
|
5634
|
+
ment of pcre_exec() is non-zero, circumflex can never match if the
|
5635
|
+
PCRE_MULTILINE option is unset. Inside a character class, circumflex
|
5635
5636
|
has an entirely different meaning (see below).
|
5636
5637
|
|
5637
|
-
Circumflex
|
5638
|
-
of
|
5639
|
-
alternative
|
5640
|
-
branch.
|
5641
|
-
if
|
5642
|
-
ject,
|
5638
|
+
Circumflex need not be the first character of the pattern if a number
|
5639
|
+
of alternatives are involved, but it should be the first thing in each
|
5640
|
+
alternative in which it appears if the pattern is ever to match that
|
5641
|
+
branch. If all possible alternatives start with a circumflex, that is,
|
5642
|
+
if the pattern is constrained to match only at the start of the sub-
|
5643
|
+
ject, it is said to be an "anchored" pattern. (There are also other
|
5643
5644
|
constructs that can cause a pattern to be anchored.)
|
5644
5645
|
|
5645
|
-
The
|
5646
|
-
matching
|
5647
|
-
before
|
5648
|
-
that
|
5646
|
+
The dollar character is an assertion that is true only if the current
|
5647
|
+
matching point is at the end of the subject string, or immediately
|
5648
|
+
before a newline at the end of the string (by default). Note, however,
|
5649
|
+
that it does not actually match the newline. Dollar need not be the
|
5649
5650
|
last character of the pattern if a number of alternatives are involved,
|
5650
|
-
but
|
5651
|
+
but it should be the last item in any branch in which it appears. Dol-
|
5651
5652
|
lar has no special meaning in a character class.
|
5652
5653
|
|
5653
|
-
The
|
5654
|
-
very
|
5654
|
+
The meaning of dollar can be changed so that it matches only at the
|
5655
|
+
very end of the string, by setting the PCRE_DOLLAR_ENDONLY option at
|
5655
5656
|
compile time. This does not affect the \Z assertion.
|
5656
5657
|
|
5657
5658
|
The meanings of the circumflex and dollar characters are changed if the
|
5658
|
-
PCRE_MULTILINE
|
5659
|
-
matches
|
5660
|
-
the
|
5661
|
-
string.
|
5662
|
-
at
|
5663
|
-
as
|
5659
|
+
PCRE_MULTILINE option is set. When this is the case, a circumflex
|
5660
|
+
matches immediately after internal newlines as well as at the start of
|
5661
|
+
the subject string. It does not match after a newline that ends the
|
5662
|
+
string. A dollar matches before any newlines in the string, as well as
|
5663
|
+
at the very end, when PCRE_MULTILINE is set. When newline is specified
|
5664
|
+
as the two-character sequence CRLF, isolated CR and LF characters do
|
5664
5665
|
not indicate newlines.
|
5665
5666
|
|
5666
|
-
For
|
5667
|
-
(where
|
5668
|
-
Consequently,
|
5669
|
-
all
|
5670
|
-
match
|
5671
|
-
pcre_exec()
|
5667
|
+
For example, the pattern /^abc$/ matches the subject string "def\nabc"
|
5668
|
+
(where \n represents a newline) in multiline mode, but not otherwise.
|
5669
|
+
Consequently, patterns that are anchored in single line mode because
|
5670
|
+
all branches start with ^ are not anchored in multiline mode, and a
|
5671
|
+
match for circumflex is possible when the startoffset argument of
|
5672
|
+
pcre_exec() is non-zero. The PCRE_DOLLAR_ENDONLY option is ignored if
|
5672
5673
|
PCRE_MULTILINE is set.
|
5673
5674
|
|
5674
|
-
Note
|
5675
|
-
and
|
5676
|
-
start
|
5675
|
+
Note that the sequences \A, \Z, and \z can be used to match the start
|
5676
|
+
and end of the subject in both modes, and if all branches of a pattern
|
5677
|
+
start with \A it is always anchored, whether or not PCRE_MULTILINE is
|
5677
5678
|
set.
|
5678
5679
|
|
5679
5680
|
|
5680
5681
|
FULL STOP (PERIOD, DOT) AND \N
|
5681
5682
|
|
5682
5683
|
Outside a character class, a dot in the pattern matches any one charac-
|
5683
|
-
ter
|
5684
|
+
ter in the subject string except (by default) a character that signi-
|
5684
5685
|
fies the end of a line.
|
5685
5686
|
|
5686
|
-
When
|
5687
|
-
that
|
5688
|
-
not
|
5689
|
-
matches
|
5690
|
-
code
|
5687
|
+
When a line ending is defined as a single character, dot never matches
|
5688
|
+
that character; when the two-character sequence CRLF is used, dot does
|
5689
|
+
not match CR if it is immediately followed by LF, but otherwise it
|
5690
|
+
matches all characters (including isolated CRs and LFs). When any Uni-
|
5691
|
+
code line endings are being recognized, dot does not match CR or LF or
|
5691
5692
|
any of the other line ending characters.
|
5692
5693
|
|
5693
|
-
The
|
5694
|
-
PCRE_DOTALL
|
5694
|
+
The behaviour of dot with regard to newlines can be changed. If the
|
5695
|
+
PCRE_DOTALL option is set, a dot matches any one character, without
|
5695
5696
|
exception. If the two-character sequence CRLF is present in the subject
|
5696
5697
|
string, it takes two dots to match it.
|
5697
5698
|
|
5698
|
-
The
|
5699
|
-
flex
|
5699
|
+
The handling of dot is entirely independent of the handling of circum-
|
5700
|
+
flex and dollar, the only relationship being that they both involve
|
5700
5701
|
newlines. Dot has no special meaning in a character class.
|
5701
5702
|
|
5702
|
-
The
|
5703
|
-
affected
|
5704
|
-
character
|
5703
|
+
The escape sequence \N behaves like a dot, except that it is not
|
5704
|
+
affected by the PCRE_DOTALL option. In other words, it matches any
|
5705
|
+
character except one that signifies the end of a line. Perl also uses
|
5705
5706
|
\N to match characters by name; PCRE does not support this.
|
5706
5707
|
|
5707
5708
|
|
5708
5709
|
MATCHING A SINGLE DATA UNIT
|
5709
5710
|
|
5710
|
-
Outside
|
5711
|
-
unit,
|
5712
|
-
unit
|
5713
|
-
32-bit
|
5714
|
-
line-ending
|
5711
|
+
Outside a character class, the escape sequence \C matches any one data
|
5712
|
+
unit, whether or not a UTF mode is set. In the 8-bit library, one data
|
5713
|
+
unit is one byte; in the 16-bit library it is a 16-bit unit; in the
|
5714
|
+
32-bit library it is a 32-bit unit. Unlike a dot, \C always matches
|
5715
|
+
line-ending characters. The feature is provided in Perl in order to
|
5715
5716
|
match individual bytes in UTF-8 mode, but it is unclear how it can use-
|
5716
|
-
fully
|
5717
|
-
units,
|
5717
|
+
fully be used. Because \C breaks up characters into individual data
|
5718
|
+
units, matching one unit with \C in a UTF mode means that the rest of
|
5718
5719
|
the string may start with a malformed UTF character. This has undefined
|
5719
5720
|
results, because PCRE assumes that it is dealing with valid UTF strings
|
5720
|
-
(and
|
5721
|
-
PCRE_NO_UTF8_CHECK,
|
5721
|
+
(and by default it checks this at the start of processing unless the
|
5722
|
+
PCRE_NO_UTF8_CHECK, PCRE_NO_UTF16_CHECK or PCRE_NO_UTF32_CHECK option
|
5722
5723
|
is used).
|
5723
5724
|
|
5724
|
-
PCRE
|
5725
|
-
below)
|
5725
|
+
PCRE does not allow \C to appear in lookbehind assertions (described
|
5726
|
+
below) in a UTF mode, because this would make it impossible to calcu-
|
5726
5727
|
late the length of the lookbehind.
|
5727
5728
|
|
5728
5729
|
In general, the \C escape sequence is best avoided. However, one way of
|
5729
|
-
using
|
5730
|
-
a
|
5731
|
-
tern,
|
5730
|
+
using it that avoids the problem of malformed UTF characters is to use
|
5731
|
+
a lookahead to check the length of the next character, as in this pat-
|
5732
|
+
tern, which could be used with a UTF-8 string (ignore white space and
|
5732
5733
|
line breaks):
|
5733
5734
|
|
5734
5735
|
(?| (?=[\x00-\x7f])(\C) |
|
@@ -5736,11 +5737,11 @@ MATCHING A SINGLE DATA UNIT
|
|
5736
5737
|
(?=[\x{800}-\x{ffff}])(\C)(\C)(\C) |
|
5737
5738
|
(?=[\x{10000}-\x{1fffff}])(\C)(\C)(\C)(\C))
|
5738
5739
|
|
5739
|
-
A
|
5740
|
-
in
|
5741
|
-
assertions
|
5742
|
-
for
|
5743
|
-
character's
|
5740
|
+
A group that starts with (?| resets the capturing parentheses numbers
|
5741
|
+
in each alternative (see "Duplicate Subpattern Numbers" below). The
|
5742
|
+
assertions at the start of each branch check the next UTF-8 character
|
5743
|
+
for values whose encoding uses 1, 2, 3, or 4 bytes, respectively. The
|
5744
|
+
character's individual bytes are then captured by the appropriate num-
|
5744
5745
|
ber of groups.
|
5745
5746
|
|
5746
5747
|
|
@@ -5750,109 +5751,109 @@ SQUARE BRACKETS AND CHARACTER CLASSES
|
|
5750
5751
|
closing square bracket. A closing square bracket on its own is not spe-
|
5751
5752
|
cial by default. However, if the PCRE_JAVASCRIPT_COMPAT option is set,
|
5752
5753
|
a lone closing square bracket causes a compile-time error. If a closing
|
5753
|
-
square
|
5754
|
-
first
|
5754
|
+
square bracket is required as a member of the class, it should be the
|
5755
|
+
first data character in the class (after an initial circumflex, if
|
5755
5756
|
present) or escaped with a backslash.
|
5756
5757
|
|
5757
|
-
A
|
5758
|
-
mode,
|
5758
|
+
A character class matches a single character in the subject. In a UTF
|
5759
|
+
mode, the character may be more than one data unit long. A matched
|
5759
5760
|
character must be in the set of characters defined by the class, unless
|
5760
|
-
the
|
5761
|
+
the first character in the class definition is a circumflex, in which
|
5761
5762
|
case the subject character must not be in the set defined by the class.
|
5762
|
-
If
|
5763
|
+
If a circumflex is actually required as a member of the class, ensure
|
5763
5764
|
it is not the first character, or escape it with a backslash.
|
5764
5765
|
|
5765
|
-
For
|
5766
|
-
while
|
5766
|
+
For example, the character class [aeiou] matches any lower case vowel,
|
5767
|
+
while [^aeiou] matches any character that is not a lower case vowel.
|
5767
5768
|
Note that a circumflex is just a convenient notation for specifying the
|
5768
|
-
characters
|
5769
|
-
class
|
5770
|
-
sumes
|
5769
|
+
characters that are in the class by enumerating those that are not. A
|
5770
|
+
class that starts with a circumflex is not an assertion; it still con-
|
5771
|
+
sumes a character from the subject string, and therefore it fails if
|
5771
5772
|
the current pointer is at the end of the string.
|
5772
5773
|
|
5773
5774
|
In UTF-8 (UTF-16, UTF-32) mode, characters with values greater than 255
|
5774
|
-
(0xffff)
|
5775
|
+
(0xffff) can be included in a class as a literal string of data units,
|
5775
5776
|
or by using the \x{ escaping mechanism.
|
5776
5777
|
|
5777
|
-
When
|
5778
|
-
their
|
5779
|
-
[aeiou]
|
5780
|
-
match
|
5781
|
-
understands
|
5782
|
-
than
|
5783
|
-
higher
|
5784
|
-
with
|
5785
|
-
caseless
|
5786
|
-
ensure
|
5778
|
+
When caseless matching is set, any letters in a class represent both
|
5779
|
+
their upper case and lower case versions, so for example, a caseless
|
5780
|
+
[aeiou] matches "A" as well as "a", and a caseless [^aeiou] does not
|
5781
|
+
match "A", whereas a caseful version would. In a UTF mode, PCRE always
|
5782
|
+
understands the concept of case for characters whose values are less
|
5783
|
+
than 128, so caseless matching is always possible. For characters with
|
5784
|
+
higher values, the concept of case is supported if PCRE is compiled
|
5785
|
+
with Unicode property support, but not otherwise. If you want to use
|
5786
|
+
caseless matching in a UTF mode for characters 128 and above, you must
|
5787
|
+
ensure that PCRE is compiled with Unicode property support as well as
|
5787
5788
|
with UTF support.
|
5788
5789
|
|
5789
|
-
Characters
|
5790
|
-
special
|
5791
|
-
sequence
|
5790
|
+
Characters that might indicate line breaks are never treated in any
|
5791
|
+
special way when matching character classes, whatever line-ending
|
5792
|
+
sequence is in use, and whatever setting of the PCRE_DOTALL and
|
5792
5793
|
PCRE_MULTILINE options is used. A class such as [^a] always matches one
|
5793
5794
|
of these characters.
|
5794
5795
|
|
5795
|
-
The
|
5796
|
-
ters
|
5797
|
-
between
|
5798
|
-
class,
|
5799
|
-
where
|
5796
|
+
The minus (hyphen) character can be used to specify a range of charac-
|
5797
|
+
ters in a character class. For example, [d-m] matches any letter
|
5798
|
+
between d and m, inclusive. If a minus character is required in a
|
5799
|
+
class, it must be escaped with a backslash or appear in a position
|
5800
|
+
where it cannot be interpreted as indicating a range, typically as the
|
5800
5801
|
first or last character in the class, or immediately after a range. For
|
5801
|
-
example,
|
5802
|
+
example, [b-d-z] matches letters in the range b to d, a hyphen charac-
|
5802
5803
|
ter, or z.
|
5803
5804
|
|
5804
5805
|
It is not possible to have the literal character "]" as the end charac-
|
5805
|
-
ter
|
5806
|
-
two
|
5807
|
-
would
|
5808
|
-
backslash
|
5809
|
-
preted
|
5810
|
-
The
|
5806
|
+
ter of a range. A pattern such as [W-]46] is interpreted as a class of
|
5807
|
+
two characters ("W" and "-") followed by a literal string "46]", so it
|
5808
|
+
would match "W46]" or "-46]". However, if the "]" is escaped with a
|
5809
|
+
backslash it is interpreted as the end of range, so [W-\]46] is inter-
|
5810
|
+
preted as a class containing a range followed by two other characters.
|
5811
|
+
The octal or hexadecimal representation of "]" can also be used to end
|
5811
5812
|
a range.
|
5812
5813
|
|
5813
|
-
An
|
5814
|
-
escape
|
5815
|
-
at
|
5814
|
+
An error is generated if a POSIX character class (see below) or an
|
5815
|
+
escape sequence other than one that defines a single character appears
|
5816
|
+
at a point where a range ending character is expected. For example,
|
5816
5817
|
[z-\xff] is valid, but [A-\d] and [A-[:digit:]] are not.
|
5817
5818
|
|
5818
|
-
Ranges
|
5819
|
-
also
|
5820
|
-
[\000-\037].
|
5819
|
+
Ranges operate in the collating sequence of character values. They can
|
5820
|
+
also be used for characters specified numerically, for example
|
5821
|
+
[\000-\037]. Ranges can include any characters that are valid for the
|
5821
5822
|
current mode.
|
5822
5823
|
|
5823
5824
|
If a range that includes letters is used when caseless matching is set,
|
5824
5825
|
it matches the letters in either case. For example, [W-c] is equivalent
|
5825
|
-
to
|
5826
|
-
character
|
5827
|
-
accented
|
5828
|
-
concept
|
5826
|
+
to [][\\^_`wxyzabc], matched caselessly, and in a non-UTF mode, if
|
5827
|
+
character tables for a French locale are in use, [\xc8-\xcb] matches
|
5828
|
+
accented E characters in both cases. In UTF modes, PCRE supports the
|
5829
|
+
concept of case for characters with values greater than 128 only when
|
5829
5830
|
it is compiled with Unicode property support.
|
5830
5831
|
|
5831
|
-
The
|
5832
|
+
The character escape sequences \d, \D, \h, \H, \p, \P, \s, \S, \v, \V,
|
5832
5833
|
\w, and \W may appear in a character class, and add the characters that
|
5833
|
-
they
|
5834
|
-
mal
|
5835
|
-
\d,
|
5836
|
-
appear
|
5834
|
+
they match to the class. For example, [\dABCDEF] matches any hexadeci-
|
5835
|
+
mal digit. In UTF modes, the PCRE_UCP option affects the meanings of
|
5836
|
+
\d, \s, \w and their upper case partners, just as it does when they
|
5837
|
+
appear outside a character class, as described in the section entitled
|
5837
5838
|
"Generic character types" above. The escape sequence \b has a different
|
5838
|
-
meaning
|
5839
|
-
The
|
5840
|
-
class.
|
5841
|
-
as
|
5839
|
+
meaning inside a character class; it matches the backspace character.
|
5840
|
+
The sequences \B, \N, \R, and \X are not special inside a character
|
5841
|
+
class. Like any other unrecognized escape sequences, they are treated
|
5842
|
+
as the literal characters "B", "N", "R", and "X" by default, but cause
|
5842
5843
|
an error if the PCRE_EXTRA option is set.
|
5843
5844
|
|
5844
|
-
A
|
5845
|
-
types
|
5846
|
-
lower
|
5845
|
+
A circumflex can conveniently be used with the upper case character
|
5846
|
+
types to specify a more restricted set of characters than the matching
|
5847
|
+
lower case type. For example, the class [^\W_] matches any letter or
|
5847
5848
|
digit, but not underscore, whereas [\w] includes underscore. A positive
|
5848
5849
|
character class should be read as "something OR something OR ..." and a
|
5849
5850
|
negative class as "NOT something AND NOT something AND NOT ...".
|
5850
5851
|
|
5851
|
-
The
|
5852
|
-
backslash,
|
5853
|
-
range),
|
5854
|
-
when
|
5855
|
-
special
|
5852
|
+
The only metacharacters that are recognized in character classes are
|
5853
|
+
backslash, hyphen (only where it can be interpreted as specifying a
|
5854
|
+
range), circumflex (only at the start), opening square bracket (only
|
5855
|
+
when it can be interpreted as introducing a POSIX class name, or for a
|
5856
|
+
special compatibility feature - see the next two sections), and the
|
5856
5857
|
terminating closing square bracket. However, escaping other non-
|
5857
5858
|
alphanumeric characters does no harm.
|
5858
5859
|
|
@@ -5860,7 +5861,7 @@ SQUARE BRACKETS AND CHARACTER CLASSES
|
|
5860
5861
|
POSIX CHARACTER CLASSES
|
5861
5862
|
|
5862
5863
|
Perl supports the POSIX notation for character classes. This uses names
|
5863
|
-
enclosed
|
5864
|
+
enclosed by [: and :] within the enclosing square brackets. PCRE also
|
5864
5865
|
supports this notation. For example,
|
5865
5866
|
|
5866
5867
|
[01[:alpha:]%]
|
@@ -5883,28 +5884,28 @@ POSIX CHARACTER CLASSES
|
|
5883
5884
|
word "word" characters (same as \w)
|
5884
5885
|
xdigit hexadecimal digits
|
5885
5886
|
|
5886
|
-
The
|
5887
|
-
CR
|
5888
|
-
the
|
5887
|
+
The default "space" characters are HT (9), LF (10), VT (11), FF (12),
|
5888
|
+
CR (13), and space (32). If locale-specific matching is taking place,
|
5889
|
+
the list of space characters may be different; there may be fewer or
|
5889
5890
|
more of them. "Space" used to be different to \s, which did not include
|
5890
5891
|
VT, for Perl compatibility. However, Perl changed at release 5.18, and
|
5891
|
-
PCRE
|
5892
|
+
PCRE followed at release 8.34. "Space" and \s now match the same set
|
5892
5893
|
of characters.
|
5893
5894
|
|
5894
|
-
The
|
5895
|
-
from
|
5895
|
+
The name "word" is a Perl extension, and "blank" is a GNU extension
|
5896
|
+
from Perl 5.8. Another Perl extension is negation, which is indicated
|
5896
5897
|
by a ^ character after the colon. For example,
|
5897
5898
|
|
5898
5899
|
[12[:^digit:]]
|
5899
5900
|
|
5900
|
-
matches
|
5901
|
+
matches "1", "2", or any non-digit. PCRE (and Perl) also recognize the
|
5901
5902
|
POSIX syntax [.ch.] and [=ch=] where "ch" is a "collating element", but
|
5902
5903
|
these are not supported, and an error is given if they are encountered.
|
5903
5904
|
|
5904
5905
|
By default, characters with values greater than 128 do not match any of
|
5905
|
-
the
|
5906
|
-
to
|
5907
|
-
character
|
5906
|
+
the POSIX character classes. However, if the PCRE_UCP option is passed
|
5907
|
+
to pcre_compile(), some of the classes are changed so that Unicode
|
5908
|
+
character properties are used. This is achieved by replacing certain
|
5908
5909
|
POSIX classes by other sequences, as follows:
|
5909
5910
|
|
5910
5911
|
[:alnum:] becomes \p{Xan}
|
@@ -5916,10 +5917,10 @@ POSIX CHARACTER CLASSES
|
|
5916
5917
|
[:upper:] becomes \p{Lu}
|
5917
5918
|
[:word:] becomes \p{Xwd}
|
5918
5919
|
|
5919
|
-
Negated
|
5920
|
+
Negated versions, such as [:^alpha:] use \P instead of \p. Three other
|
5920
5921
|
POSIX classes are handled specially in UCP mode:
|
5921
5922
|
|
5922
|
-
[:graph:] This
|
5923
|
+
[:graph:] This matches characters that have glyphs that mark the page
|
5923
5924
|
when printed. In Unicode property terms, it matches all char-
|
5924
5925
|
acters with the L, M, N, P, S, or Cf properties, except for:
|
5925
5926
|
|
@@ -5928,58 +5929,58 @@ POSIX CHARACTER CLASSES
|
|
5928
5929
|
U+2066 - U+2069 Various "isolate"s
|
5929
5930
|
|
5930
5931
|
|
5931
|
-
[:print:] This
|
5932
|
-
characters
|
5932
|
+
[:print:] This matches the same characters as [:graph:] plus space
|
5933
|
+
characters that are not controls, that is, characters with
|
5933
5934
|
the Zs property.
|
5934
5935
|
|
5935
5936
|
[:punct:] This matches all characters that have the Unicode P (punctua-
|
5936
|
-
tion)
|
5937
|
+
tion) property, plus those characters whose code points are
|
5937
5938
|
less than 128 that have the S (Symbol) property.
|
5938
5939
|
|
5939
|
-
The
|
5940
|
+
The other POSIX classes are unchanged, and match only characters with
|
5940
5941
|
code points less than 128.
|
5941
5942
|
|
5942
5943
|
|
5943
5944
|
COMPATIBILITY FEATURE FOR WORD BOUNDARIES
|
5944
5945
|
|
5945
|
-
In
|
5946
|
-
ugly
|
5946
|
+
In the POSIX.2 compliant library that was included in 4.4BSD Unix, the
|
5947
|
+
ugly syntax [[:<:]] and [[:>:]] is used for matching "start of word"
|
5947
5948
|
and "end of word". PCRE treats these items as follows:
|
5948
5949
|
|
5949
5950
|
[[:<:]] is converted to \b(?=\w)
|
5950
5951
|
[[:>:]] is converted to \b(?<=\w)
|
5951
5952
|
|
5952
5953
|
Only these exact character sequences are recognized. A sequence such as
|
5953
|
-
[a[:<:]b]
|
5954
|
-
support
|
5954
|
+
[a[:<:]b] provokes error for an unrecognized POSIX class name. This
|
5955
|
+
support is not compatible with Perl. It is provided to help migrations
|
5955
5956
|
from other environments, and is best not used in any new patterns. Note
|
5956
|
-
that
|
5957
|
-
tions"
|
5958
|
-
character
|
5959
|
-
assertions
|
5957
|
+
that \b matches at the start and the end of a word (see "Simple asser-
|
5958
|
+
tions" above), and in a Perl-style pattern the preceding or following
|
5959
|
+
character normally shows which is wanted, without the need for the
|
5960
|
+
assertions that are used above in order to give exactly the POSIX be-
|
5960
5961
|
haviour.
|
5961
5962
|
|
5962
5963
|
|
5963
5964
|
VERTICAL BAR
|
5964
5965
|
|
5965
|
-
Vertical
|
5966
|
+
Vertical bar characters are used to separate alternative patterns. For
|
5966
5967
|
example, the pattern
|
5967
5968
|
|
5968
5969
|
gilbert|sullivan
|
5969
5970
|
|
5970
|
-
matches
|
5971
|
-
appear,
|
5971
|
+
matches either "gilbert" or "sullivan". Any number of alternatives may
|
5972
|
+
appear, and an empty alternative is permitted (matching the empty
|
5972
5973
|
string). The matching process tries each alternative in turn, from left
|
5973
|
-
to
|
5974
|
-
are
|
5974
|
+
to right, and the first one that succeeds is used. If the alternatives
|
5975
|
+
are within a subpattern (defined below), "succeeds" means matching the
|
5975
5976
|
rest of the main pattern as well as the alternative in the subpattern.
|
5976
5977
|
|
5977
5978
|
|
5978
5979
|
INTERNAL OPTION SETTING
|
5979
5980
|
|
5980
|
-
The
|
5981
|
-
PCRE_EXTENDED
|
5982
|
-
within
|
5981
|
+
The settings of the PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and
|
5982
|
+
PCRE_EXTENDED options (which are Perl-compatible) can be changed from
|
5983
|
+
within the pattern by a sequence of Perl option letters enclosed
|
5983
5984
|
between "(?" and ")". The option letters are
|
5984
5985
|
|
5985
5986
|
i for PCRE_CASELESS
|
@@ -5989,51 +5990,47 @@ INTERNAL OPTION SETTING
|
|
5989
5990
|
|
5990
5991
|
For example, (?im) sets caseless, multiline matching. It is also possi-
|
5991
5992
|
ble to unset these options by preceding the letter with a hyphen, and a
|
5992
|
-
combined
|
5993
|
-
LESS
|
5994
|
-
is
|
5993
|
+
combined setting and unsetting such as (?im-sx), which sets PCRE_CASE-
|
5994
|
+
LESS and PCRE_MULTILINE while unsetting PCRE_DOTALL and PCRE_EXTENDED,
|
5995
|
+
is also permitted. If a letter appears both before and after the
|
5995
5996
|
hyphen, the option is unset.
|
5996
5997
|
|
5997
|
-
The
|
5998
|
-
can
|
5998
|
+
The PCRE-specific options PCRE_DUPNAMES, PCRE_UNGREEDY, and PCRE_EXTRA
|
5999
|
+
can be changed in the same way as the Perl-compatible options by using
|
5999
6000
|
the characters J, U and X respectively.
|
6000
6001
|
|
6001
|
-
When
|
6002
|
-
inside
|
6003
|
-
the
|
6004
|
-
|
6005
|
-
|
6006
|
-
|
6007
|
-
An option change within a subpattern (see below for a description of
|
6008
|
-
subpatterns) affects only that part of the subpattern that follows it,
|
6009
|
-
so
|
6002
|
+
When one of these option changes occurs at top level (that is, not
|
6003
|
+
inside subpattern parentheses), the change applies to the remainder of
|
6004
|
+
the pattern that follows. An option change within a subpattern (see
|
6005
|
+
below for a description of subpatterns) affects only that part of the
|
6006
|
+
subpattern that follows it, so
|
6010
6007
|
|
6011
6008
|
(a(?i)b)c
|
6012
6009
|
|
6013
6010
|
matches abc and aBc and no other strings (assuming PCRE_CASELESS is not
|
6014
|
-
used).
|
6015
|
-
in
|
6016
|
-
do
|
6011
|
+
used). By this means, options can be made to have different settings
|
6012
|
+
in different parts of the pattern. Any changes made in one alternative
|
6013
|
+
do carry on into subsequent branches within the same subpattern. For
|
6017
6014
|
example,
|
6018
6015
|
|
6019
6016
|
(a(?i)b|c)
|
6020
6017
|
|
6021
|
-
matches
|
6022
|
-
first
|
6023
|
-
the
|
6018
|
+
matches "ab", "aB", "c", and "C", even though when matching "C" the
|
6019
|
+
first branch is abandoned before the option setting. This is because
|
6020
|
+
the effects of option settings happen at compile time. There would be
|
6024
6021
|
some very weird behaviour otherwise.
|
6025
6022
|
|
6026
|
-
Note:
|
6027
|
-
application
|
6028
|
-
some
|
6029
|
-
(*CRLF)
|
6030
|
-
defaulted.
|
6031
|
-
sequences"
|
6032
|
-
(*UCP)
|
6033
|
-
erty
|
6034
|
-
PCRE_UTF32
|
6035
|
-
is
|
6036
|
-
ever,
|
6023
|
+
Note: There are other PCRE-specific options that can be set by the
|
6024
|
+
application when the compiling or matching functions are called. In
|
6025
|
+
some cases the pattern can contain special leading sequences such as
|
6026
|
+
(*CRLF) to override what the application has set or what has been
|
6027
|
+
defaulted. Details are given in the section entitled "Newline
|
6028
|
+
sequences" above. There are also the (*UTF8), (*UTF16),(*UTF32), and
|
6029
|
+
(*UCP) leading sequences that can be used to set UTF and Unicode prop-
|
6030
|
+
erty modes; they are equivalent to setting the PCRE_UTF8, PCRE_UTF16,
|
6031
|
+
PCRE_UTF32 and the PCRE_UCP options, respectively. The (*UTF) sequence
|
6032
|
+
is a generic version that can be used with any of the libraries. How-
|
6033
|
+
ever, the application can set the PCRE_NEVER_UTF option, which locks
|
6037
6034
|
out the use of the (*UTF) sequences.
|
6038
6035
|
|
6039
6036
|
|
@@ -6046,18 +6043,18 @@ SUBPATTERNS
|
|
6046
6043
|
|
6047
6044
|
cat(aract|erpillar|)
|
6048
6045
|
|
6049
|
-
matches
|
6046
|
+
matches "cataract", "caterpillar", or "cat". Without the parentheses,
|
6050
6047
|
it would match "cataract", "erpillar" or an empty string.
|
6051
6048
|
|
6052
|
-
2.
|
6053
|
-
that,
|
6049
|
+
2. It sets up the subpattern as a capturing subpattern. This means
|
6050
|
+
that, when the whole pattern matches, that portion of the subject
|
6054
6051
|
string that matched the subpattern is passed back to the caller via the
|
6055
|
-
ovector
|
6056
|
-
traditional
|
6052
|
+
ovector argument of the matching function. (This applies only to the
|
6053
|
+
traditional matching functions; the DFA matching functions do not sup-
|
6057
6054
|
port capturing.)
|
6058
6055
|
|
6059
6056
|
Opening parentheses are counted from left to right (starting from 1) to
|
6060
|
-
obtain
|
6057
|
+
obtain numbers for the capturing subpatterns. For example, if the
|
6061
6058
|
string "the red king" is matched against the pattern
|
6062
6059
|
|
6063
6060
|
the ((red|white) (king|queen))
|
@@ -6065,12 +6062,12 @@ SUBPATTERNS
|
|
6065
6062
|
the captured substrings are "red king", "red", and "king", and are num-
|
6066
6063
|
bered 1, 2, and 3, respectively.
|
6067
6064
|
|
6068
|
-
The
|
6069
|
-
helpful.
|
6070
|
-
without
|
6071
|
-
by
|
6072
|
-
ing,
|
6073
|
-
capturing
|
6065
|
+
The fact that plain parentheses fulfil two functions is not always
|
6066
|
+
helpful. There are often times when a grouping subpattern is required
|
6067
|
+
without a capturing requirement. If an opening parenthesis is followed
|
6068
|
+
by a question mark and a colon, the subpattern does not do any captur-
|
6069
|
+
ing, and is not counted when computing the number of any subsequent
|
6070
|
+
capturing subpatterns. For example, if the string "the white queen" is
|
6074
6071
|
matched against the pattern
|
6075
6072
|
|
6076
6073
|
the ((?:red|white) (king|queen))
|
@@ -6078,37 +6075,37 @@ SUBPATTERNS
|
|
6078
6075
|
the captured substrings are "white queen" and "queen", and are numbered
|
6079
6076
|
1 and 2. The maximum number of capturing subpatterns is 65535.
|
6080
6077
|
|
6081
|
-
As
|
6082
|
-
start
|
6078
|
+
As a convenient shorthand, if any option settings are required at the
|
6079
|
+
start of a non-capturing subpattern, the option letters may appear
|
6083
6080
|
between the "?" and the ":". Thus the two patterns
|
6084
6081
|
|
6085
6082
|
(?i:saturday|sunday)
|
6086
6083
|
(?:(?i)saturday|sunday)
|
6087
6084
|
|
6088
6085
|
match exactly the same set of strings. Because alternative branches are
|
6089
|
-
tried
|
6090
|
-
the
|
6091
|
-
subsequent
|
6086
|
+
tried from left to right, and options are not reset until the end of
|
6087
|
+
the subpattern is reached, an option setting in one branch does affect
|
6088
|
+
subsequent branches, so the above patterns match "SUNDAY" as well as
|
6092
6089
|
"Saturday".
|
6093
6090
|
|
6094
6091
|
|
6095
6092
|
DUPLICATE SUBPATTERN NUMBERS
|
6096
6093
|
|
6097
6094
|
Perl 5.10 introduced a feature whereby each alternative in a subpattern
|
6098
|
-
uses
|
6099
|
-
starts
|
6095
|
+
uses the same numbers for its capturing parentheses. Such a subpattern
|
6096
|
+
starts with (?| and is itself a non-capturing subpattern. For example,
|
6100
6097
|
consider this pattern:
|
6101
6098
|
|
6102
6099
|
(?|(Sat)ur|(Sun))day
|
6103
6100
|
|
6104
|
-
Because
|
6105
|
-
turing
|
6106
|
-
you
|
6107
|
-
matched.
|
6101
|
+
Because the two alternatives are inside a (?| group, both sets of cap-
|
6102
|
+
turing parentheses are numbered one. Thus, when the pattern matches,
|
6103
|
+
you can look at captured substring number one, whichever alternative
|
6104
|
+
matched. This construct is useful when you want to capture part, but
|
6108
6105
|
not all, of one of a number of alternatives. Inside a (?| group, paren-
|
6109
|
-
theses
|
6110
|
-
each
|
6111
|
-
subpattern
|
6106
|
+
theses are numbered as usual, but the number is reset at the start of
|
6107
|
+
each branch. The numbers of any capturing parentheses that follow the
|
6108
|
+
subpattern start after the highest number used in any branch. The fol-
|
6112
6109
|
lowing example is taken from the Perl documentation. The numbers under-
|
6113
6110
|
neath show in which buffer the captured content will be stored.
|
6114
6111
|
|
@@ -6116,58 +6113,58 @@ DUPLICATE SUBPATTERN NUMBERS
|
|
6116
6113
|
/ ( a ) (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
|
6117
6114
|
# 1 2 2 3 2 3 4
|
6118
6115
|
|
6119
|
-
A
|
6120
|
-
that
|
6116
|
+
A back reference to a numbered subpattern uses the most recent value
|
6117
|
+
that is set for that number by any subpattern. The following pattern
|
6121
6118
|
matches "abcabc" or "defdef":
|
6122
6119
|
|
6123
6120
|
/(?|(abc)|(def))\1/
|
6124
6121
|
|
6125
|
-
In
|
6126
|
-
to
|
6122
|
+
In contrast, a subroutine call to a numbered subpattern always refers
|
6123
|
+
to the first one in the pattern with the given number. The following
|
6127
6124
|
pattern matches "abcabc" or "defabc":
|
6128
6125
|
|
6129
6126
|
/(?|(abc)|(def))(?1)/
|
6130
6127
|
|
6131
|
-
If
|
6132
|
-
unique
|
6128
|
+
If a condition test for a subpattern's having matched refers to a non-
|
6129
|
+
unique number, the test is true if any of the subpatterns of that num-
|
6133
6130
|
ber have matched.
|
6134
6131
|
|
6135
|
-
An
|
6132
|
+
An alternative approach to using this "branch reset" feature is to use
|
6136
6133
|
duplicate named subpatterns, as described in the next section.
|
6137
6134
|
|
6138
6135
|
|
6139
6136
|
NAMED SUBPATTERNS
|
6140
6137
|
|
6141
|
-
Identifying
|
6142
|
-
very
|
6143
|
-
sions.
|
6144
|
-
change.
|
6138
|
+
Identifying capturing parentheses by number is simple, but it can be
|
6139
|
+
very hard to keep track of the numbers in complicated regular expres-
|
6140
|
+
sions. Furthermore, if an expression is modified, the numbers may
|
6141
|
+
change. To help with this difficulty, PCRE supports the naming of sub-
|
6145
6142
|
patterns. This feature was not added to Perl until release 5.10. Python
|
6146
|
-
had
|
6147
|
-
the
|
6148
|
-
tax.
|
6143
|
+
had the feature earlier, and PCRE introduced it at release 4.0, using
|
6144
|
+
the Python syntax. PCRE now supports both the Perl and the Python syn-
|
6145
|
+
tax. Perl allows identically numbered subpatterns to have different
|
6149
6146
|
names, but PCRE does not.
|
6150
6147
|
|
6151
|
-
In
|
6152
|
-
or
|
6153
|
-
to
|
6154
|
-
references,
|
6148
|
+
In PCRE, a subpattern can be named in one of three ways: (?<name>...)
|
6149
|
+
or (?'name'...) as in Perl, or (?P<name>...) as in Python. References
|
6150
|
+
to capturing parentheses from other parts of the pattern, such as back
|
6151
|
+
references, recursion, and conditions, can be made by name as well as
|
6155
6152
|
by number.
|
6156
6153
|
|
6157
|
-
Names
|
6158
|
-
must
|
6159
|
-
allocated
|
6160
|
-
present.
|
6161
|
-
to-number
|
6154
|
+
Names consist of up to 32 alphanumeric characters and underscores, but
|
6155
|
+
must start with a non-digit. Named capturing parentheses are still
|
6156
|
+
allocated numbers as well as names, exactly as if the names were not
|
6157
|
+
present. The PCRE API provides function calls for extracting the name-
|
6158
|
+
to-number translation table from a compiled pattern. There is also a
|
6162
6159
|
convenience function for extracting a captured substring by name.
|
6163
6160
|
|
6164
|
-
By
|
6161
|
+
By default, a name must be unique within a pattern, but it is possible
|
6165
6162
|
to relax this constraint by setting the PCRE_DUPNAMES option at compile
|
6166
|
-
time.
|
6167
|
-
the
|
6168
|
-
cate
|
6169
|
-
named
|
6170
|
-
weekday,
|
6163
|
+
time. (Duplicate names are also always permitted for subpatterns with
|
6164
|
+
the same number, set up as described in the previous section.) Dupli-
|
6165
|
+
cate names can be useful for patterns where only one instance of the
|
6166
|
+
named parentheses can match. Suppose you want to match the name of a
|
6167
|
+
weekday, either as a 3-letter abbreviation or as the full name, and in
|
6171
6168
|
both cases you want to extract the abbreviation. This pattern (ignoring
|
6172
6169
|
the line breaks) does the job:
|
6173
6170
|
|
@@ -6177,18 +6174,18 @@ NAMED SUBPATTERNS
|
|
6177
6174
|
(?<DN>Thu)(?:rsday)?|
|
6178
6175
|
(?<DN>Sat)(?:urday)?
|
6179
6176
|
|
6180
|
-
There
|
6177
|
+
There are five capturing substrings, but only one is ever set after a
|
6181
6178
|
match. (An alternative way of solving this problem is to use a "branch
|
6182
6179
|
reset" subpattern, as described in the previous section.)
|
6183
6180
|
|
6184
|
-
The
|
6185
|
-
substring
|
6186
|
-
that
|
6181
|
+
The convenience function for extracting the data by name returns the
|
6182
|
+
substring for the first (and in this example, the only) subpattern of
|
6183
|
+
that name that matched. This saves searching to find which numbered
|
6187
6184
|
subpattern it was.
|
6188
6185
|
|
6189
|
-
If
|
6190
|
-
elsewhere
|
6191
|
-
checked
|
6186
|
+
If you make a back reference to a non-unique named subpattern from
|
6187
|
+
elsewhere in the pattern, the subpatterns to which the name refers are
|
6188
|
+
checked in the order in which they appear in the overall pattern. The
|
6192
6189
|
first one that is set is used for the reference. For example, this pat-
|
6193
6190
|
tern matches both "foofoo" and "barbar" but not "foobar" or "barfoo":
|
6194
6191
|
|
@@ -6196,29 +6193,29 @@ NAMED SUBPATTERNS
|
|
6196
6193
|
|
6197
6194
|
|
6198
6195
|
If you make a subroutine call to a non-unique named subpattern, the one
|
6199
|
-
that
|
6196
|
+
that corresponds to the first occurrence of the name is used. In the
|
6200
6197
|
absence of duplicate numbers (see the previous section) this is the one
|
6201
6198
|
with the lowest number.
|
6202
6199
|
|
6203
6200
|
If you use a named reference in a condition test (see the section about
|
6204
6201
|
conditions below), either to check whether a subpattern has matched, or
|
6205
|
-
to
|
6206
|
-
If
|
6207
|
-
true.
|
6208
|
-
details
|
6202
|
+
to check for recursion, all subpatterns with the same name are tested.
|
6203
|
+
If the condition is true for any one of them, the overall condition is
|
6204
|
+
true. This is the same behaviour as testing by number. For further
|
6205
|
+
details of the interfaces for handling named subpatterns, see the
|
6209
6206
|
pcreapi documentation.
|
6210
6207
|
|
6211
6208
|
Warning: You cannot use different names to distinguish between two sub-
|
6212
|
-
patterns
|
6209
|
+
patterns with the same number because PCRE uses only the numbers when
|
6213
6210
|
matching. For this reason, an error is given at compile time if differ-
|
6214
|
-
ent
|
6211
|
+
ent names are given to subpatterns with the same number. However, you
|
6215
6212
|
can always give the same name to subpatterns with the same number, even
|
6216
6213
|
when PCRE_DUPNAMES is not set.
|
6217
6214
|
|
6218
6215
|
|
6219
6216
|
REPETITION
|
6220
6217
|
|
6221
|
-
Repetition
|
6218
|
+
Repetition is specified by quantifiers, which can follow any of the
|
6222
6219
|
following items:
|
6223
6220
|
|
6224
6221
|
a literal data character
|
@@ -6232,17 +6229,17 @@ REPETITION
|
|
6232
6229
|
a parenthesized subpattern (including assertions)
|
6233
6230
|
a subroutine call to a subpattern (recursive or otherwise)
|
6234
6231
|
|
6235
|
-
The
|
6236
|
-
ber
|
6237
|
-
(braces),
|
6232
|
+
The general repetition quantifier specifies a minimum and maximum num-
|
6233
|
+
ber of permitted matches, by giving the two numbers in curly brackets
|
6234
|
+
(braces), separated by a comma. The numbers must be less than 65536,
|
6238
6235
|
and the first must be less than or equal to the second. For example:
|
6239
6236
|
|
6240
6237
|
z{2,4}
|
6241
6238
|
|
6242
|
-
matches
|
6243
|
-
special
|
6244
|
-
present,
|
6245
|
-
are
|
6239
|
+
matches "zz", "zzz", or "zzzz". A closing brace on its own is not a
|
6240
|
+
special character. If the second number is omitted, but the comma is
|
6241
|
+
present, there is no upper limit; if the second number and the comma
|
6242
|
+
are both omitted, the quantifier specifies an exact number of required
|
6246
6243
|
matches. Thus
|
6247
6244
|
|
6248
6245
|
[aeiou]{3,}
|
@@ -6251,50 +6248,50 @@ REPETITION
|
|
6251
6248
|
|
6252
6249
|
\d{8}
|
6253
6250
|
|
6254
|
-
matches
|
6255
|
-
position
|
6256
|
-
the
|
6251
|
+
matches exactly 8 digits. An opening curly bracket that appears in a
|
6252
|
+
position where a quantifier is not allowed, or one that does not match
|
6253
|
+
the syntax of a quantifier, is taken as a literal character. For exam-
|
6257
6254
|
ple, {,6} is not a quantifier, but a literal string of four characters.
|
6258
6255
|
|
6259
6256
|
In UTF modes, quantifiers apply to characters rather than to individual
|
6260
|
-
data
|
6257
|
+
data units. Thus, for example, \x{100}{2} matches two characters, each
|
6261
6258
|
of which is represented by a two-byte sequence in a UTF-8 string. Simi-
|
6262
|
-
larly,
|
6263
|
-
which
|
6259
|
+
larly, \X{3} matches three Unicode extended grapheme clusters, each of
|
6260
|
+
which may be several data units long (and they may be of different
|
6264
6261
|
lengths).
|
6265
6262
|
|
6266
6263
|
The quantifier {0} is permitted, causing the expression to behave as if
|
6267
6264
|
the previous item and the quantifier were not present. This may be use-
|
6268
|
-
ful
|
6265
|
+
ful for subpatterns that are referenced as subroutines from elsewhere
|
6269
6266
|
in the pattern (but see also the section entitled "Defining subpatterns
|
6270
|
-
for
|
6267
|
+
for use by reference only" below). Items other than subpatterns that
|
6271
6268
|
have a {0} quantifier are omitted from the compiled pattern.
|
6272
6269
|
|
6273
|
-
For
|
6270
|
+
For convenience, the three most common quantifiers have single-charac-
|
6274
6271
|
ter abbreviations:
|
6275
6272
|
|
6276
6273
|
* is equivalent to {0,}
|
6277
6274
|
+ is equivalent to {1,}
|
6278
6275
|
? is equivalent to {0,1}
|
6279
6276
|
|
6280
|
-
It
|
6277
|
+
It is possible to construct infinite loops by following a subpattern
|
6281
6278
|
that can match no characters with a quantifier that has no upper limit,
|
6282
6279
|
for example:
|
6283
6280
|
|
6284
6281
|
(a?)*
|
6285
6282
|
|
6286
6283
|
Earlier versions of Perl and PCRE used to give an error at compile time
|
6287
|
-
for
|
6288
|
-
useful,
|
6289
|
-
subpattern
|
6284
|
+
for such patterns. However, because there are cases where this can be
|
6285
|
+
useful, such patterns are now accepted, but if any repetition of the
|
6286
|
+
subpattern does in fact match no characters, the loop is forcibly bro-
|
6290
6287
|
ken.
|
6291
6288
|
|
6292
|
-
By
|
6293
|
-
as
|
6294
|
-
causing
|
6289
|
+
By default, the quantifiers are "greedy", that is, they match as much
|
6290
|
+
as possible (up to the maximum number of permitted times), without
|
6291
|
+
causing the rest of the pattern to fail. The classic example of where
|
6295
6292
|
this gives problems is in trying to match comments in C programs. These
|
6296
|
-
appear
|
6297
|
-
characters
|
6293
|
+
appear between /* and */ and within the comment, individual * and /
|
6294
|
+
characters may appear. An attempt to match C comments by applying the
|
6298
6295
|
pattern
|
6299
6296
|
|
6300
6297
|
/\*.*\*/
|
@@ -6303,19 +6300,19 @@ REPETITION
|
|
6303
6300
|
|
6304
6301
|
/* first comment */ not comment /* second comment */
|
6305
6302
|
|
6306
|
-
fails,
|
6303
|
+
fails, because it matches the entire string owing to the greediness of
|
6307
6304
|
the .* item.
|
6308
6305
|
|
6309
|
-
However,
|
6306
|
+
However, if a quantifier is followed by a question mark, it ceases to
|
6310
6307
|
be greedy, and instead matches the minimum number of times possible, so
|
6311
6308
|
the pattern
|
6312
6309
|
|
6313
6310
|
/\*.*?\*/
|
6314
6311
|
|
6315
|
-
does
|
6316
|
-
quantifiers
|
6317
|
-
matches.
|
6318
|
-
quantifier
|
6312
|
+
does the right thing with the C comments. The meaning of the various
|
6313
|
+
quantifiers is not otherwise changed, just the preferred number of
|
6314
|
+
matches. Do not confuse this use of question mark with its use as a
|
6315
|
+
quantifier in its own right. Because it has two uses, it can sometimes
|
6319
6316
|
appear doubled, as in
|
6320
6317
|
|
6321
6318
|
\d??\d
|
@@ -6323,45 +6320,45 @@ REPETITION
|
|
6323
6320
|
which matches one digit by preference, but can match two if that is the
|
6324
6321
|
only way the rest of the pattern matches.
|
6325
6322
|
|
6326
|
-
If
|
6327
|
-
Perl),
|
6328
|
-
can
|
6323
|
+
If the PCRE_UNGREEDY option is set (an option that is not available in
|
6324
|
+
Perl), the quantifiers are not greedy by default, but individual ones
|
6325
|
+
can be made greedy by following them with a question mark. In other
|
6329
6326
|
words, it inverts the default behaviour.
|
6330
6327
|
|
6331
|
-
When
|
6332
|
-
count
|
6333
|
-
required
|
6328
|
+
When a parenthesized subpattern is quantified with a minimum repeat
|
6329
|
+
count that is greater than 1 or with a limited maximum, more memory is
|
6330
|
+
required for the compiled pattern, in proportion to the size of the
|
6334
6331
|
minimum or maximum.
|
6335
6332
|
|
6336
6333
|
If a pattern starts with .* or .{0,} and the PCRE_DOTALL option (equiv-
|
6337
|
-
alent
|
6338
|
-
the
|
6339
|
-
tried
|
6340
|
-
is
|
6341
|
-
first.
|
6334
|
+
alent to Perl's /s) is set, thus allowing the dot to match newlines,
|
6335
|
+
the pattern is implicitly anchored, because whatever follows will be
|
6336
|
+
tried against every character position in the subject string, so there
|
6337
|
+
is no point in retrying the overall match at any position after the
|
6338
|
+
first. PCRE normally treats such a pattern as though it were preceded
|
6342
6339
|
by \A.
|
6343
6340
|
|
6344
|
-
In
|
6345
|
-
lines,
|
6341
|
+
In cases where it is known that the subject string contains no new-
|
6342
|
+
lines, it is worth setting PCRE_DOTALL in order to obtain this opti-
|
6346
6343
|
mization, or alternatively using ^ to indicate anchoring explicitly.
|
6347
6344
|
|
6348
|
-
However,
|
6345
|
+
However, there are some cases where the optimization cannot be used.
|
6349
6346
|
When .* is inside capturing parentheses that are the subject of a back
|
6350
6347
|
reference elsewhere in the pattern, a match at the start may fail where
|
6351
6348
|
a later one succeeds. Consider, for example:
|
6352
6349
|
|
6353
6350
|
(.*)abc\1
|
6354
6351
|
|
6355
|
-
If
|
6352
|
+
If the subject is "xyz123abc123" the match point is the fourth charac-
|
6356
6353
|
ter. For this reason, such a pattern is not implicitly anchored.
|
6357
6354
|
|
6358
|
-
Another
|
6359
|
-
ing
|
6355
|
+
Another case where implicit anchoring is not applied is when the lead-
|
6356
|
+
ing .* is inside an atomic group. Once again, a match at the start may
|
6360
6357
|
fail where a later one succeeds. Consider this pattern:
|
6361
6358
|
|
6362
6359
|
(?>.*?a)b
|
6363
6360
|
|
6364
|
-
It
|
6361
|
+
It matches "ab" in the subject "aab". The use of the backtracking con-
|
6365
6362
|
trol verbs (*PRUNE) and (*SKIP) also disable this optimization.
|
6366
6363
|
|
6367
6364
|
When a capturing subpattern is repeated, the value captured is the sub-
|
@@ -6370,8 +6367,8 @@ REPETITION
|
|
6370
6367
|
(tweedle[dume]{3}\s*)+
|
6371
6368
|
|
6372
6369
|
has matched "tweedledum tweedledee" the value of the captured substring
|
6373
|
-
is
|
6374
|
-
the
|
6370
|
+
is "tweedledee". However, if there are nested capturing subpatterns,
|
6371
|
+
the corresponding captured values may have been set in previous itera-
|
6375
6372
|
tions. For example, after
|
6376
6373
|
|
6377
6374
|
/(a|(b))+/
|
@@ -6381,53 +6378,53 @@ REPETITION
|
|
6381
6378
|
|
6382
6379
|
ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS
|
6383
6380
|
|
6384
|
-
With
|
6385
|
-
repetition,
|
6386
|
-
to
|
6387
|
-
rest
|
6388
|
-
either
|
6389
|
-
than
|
6381
|
+
With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy")
|
6382
|
+
repetition, failure of what follows normally causes the repeated item
|
6383
|
+
to be re-evaluated to see if a different number of repeats allows the
|
6384
|
+
rest of the pattern to match. Sometimes it is useful to prevent this,
|
6385
|
+
either to change the nature of the match, or to cause it fail earlier
|
6386
|
+
than it otherwise might, when the author of the pattern knows there is
|
6390
6387
|
no point in carrying on.
|
6391
6388
|
|
6392
|
-
Consider,
|
6389
|
+
Consider, for example, the pattern \d+foo when applied to the subject
|
6393
6390
|
line
|
6394
6391
|
|
6395
6392
|
123456bar
|
6396
6393
|
|
6397
6394
|
After matching all 6 digits and then failing to match "foo", the normal
|
6398
|
-
action
|
6399
|
-
\d+
|
6400
|
-
"Atomic
|
6401
|
-
the
|
6395
|
+
action of the matcher is to try again with only 5 digits matching the
|
6396
|
+
\d+ item, and then with 4, and so on, before ultimately failing.
|
6397
|
+
"Atomic grouping" (a term taken from Jeffrey Friedl's book) provides
|
6398
|
+
the means for specifying that once a subpattern has matched, it is not
|
6402
6399
|
to be re-evaluated in this way.
|
6403
6400
|
|
6404
|
-
If
|
6405
|
-
up
|
6401
|
+
If we use atomic grouping for the previous example, the matcher gives
|
6402
|
+
up immediately on failing to match "foo" the first time. The notation
|
6406
6403
|
is a kind of special parenthesis, starting with (?> as in this example:
|
6407
6404
|
|
6408
6405
|
(?>\d+)foo
|
6409
6406
|
|
6410
|
-
This
|
6411
|
-
tains
|
6412
|
-
prevented
|
6407
|
+
This kind of parenthesis "locks up" the part of the pattern it con-
|
6408
|
+
tains once it has matched, and a failure further into the pattern is
|
6409
|
+
prevented from backtracking into it. Backtracking past it to previous
|
6413
6410
|
items, however, works as normal.
|
6414
6411
|
|
6415
|
-
An
|
6416
|
-
the
|
6412
|
+
An alternative description is that a subpattern of this type matches
|
6413
|
+
the string of characters that an identical standalone pattern would
|
6417
6414
|
match, if anchored at the current point in the subject string.
|
6418
6415
|
|
6419
6416
|
Atomic grouping subpatterns are not capturing subpatterns. Simple cases
|
6420
6417
|
such as the above example can be thought of as a maximizing repeat that
|
6421
|
-
must
|
6422
|
-
pared
|
6418
|
+
must swallow everything it can. So, while both \d+ and \d+? are pre-
|
6419
|
+
pared to adjust the number of digits they match in order to make the
|
6423
6420
|
rest of the pattern match, (?>\d+) can only match an entire sequence of
|
6424
6421
|
digits.
|
6425
6422
|
|
6426
|
-
Atomic
|
6427
|
-
subpatterns,
|
6423
|
+
Atomic groups in general can of course contain arbitrarily complicated
|
6424
|
+
subpatterns, and can be nested. However, when the subpattern for an
|
6428
6425
|
atomic group is just a single repeated item, as in the example above, a
|
6429
|
-
simpler
|
6430
|
-
consists
|
6426
|
+
simpler notation, called a "possessive quantifier" can be used. This
|
6427
|
+
consists of an additional + character following a quantifier. Using
|
6431
6428
|
this notation, the previous example can be rewritten as
|
6432
6429
|
|
6433
6430
|
\d++foo
|
@@ -6437,45 +6434,45 @@ ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS
|
|
6437
6434
|
|
6438
6435
|
(abc|xyz){2,3}+
|
6439
6436
|
|
6440
|
-
Possessive
|
6437
|
+
Possessive quantifiers are always greedy; the setting of the
|
6441
6438
|
PCRE_UNGREEDY option is ignored. They are a convenient notation for the
|
6442
|
-
simpler
|
6443
|
-
meaning
|
6444
|
-
though
|
6439
|
+
simpler forms of atomic group. However, there is no difference in the
|
6440
|
+
meaning of a possessive quantifier and the equivalent atomic group,
|
6441
|
+
though there may be a performance difference; possessive quantifiers
|
6445
6442
|
should be slightly faster.
|
6446
6443
|
|
6447
|
-
The
|
6448
|
-
tax.
|
6444
|
+
The possessive quantifier syntax is an extension to the Perl 5.8 syn-
|
6445
|
+
tax. Jeffrey Friedl originated the idea (and the name) in the first
|
6449
6446
|
edition of his book. Mike McCloskey liked it, so implemented it when he
|
6450
|
-
built
|
6447
|
+
built Sun's Java package, and PCRE copied it from there. It ultimately
|
6451
6448
|
found its way into Perl at release 5.10.
|
6452
6449
|
|
6453
6450
|
PCRE has an optimization that automatically "possessifies" certain sim-
|
6454
|
-
ple
|
6455
|
-
A++B
|
6451
|
+
ple pattern constructs. For example, the sequence A+B is treated as
|
6452
|
+
A++B because there is no point in backtracking into a sequence of A's
|
6456
6453
|
when B must follow.
|
6457
6454
|
|
6458
|
-
When
|
6459
|
-
can
|
6460
|
-
atomic
|
6455
|
+
When a pattern contains an unlimited repeat inside a subpattern that
|
6456
|
+
can itself be repeated an unlimited number of times, the use of an
|
6457
|
+
atomic group is the only way to avoid some failing matches taking a
|
6461
6458
|
very long time indeed. The pattern
|
6462
6459
|
|
6463
6460
|
(\D+|<\d+>)*[!?]
|
6464
6461
|
|
6465
|
-
matches
|
6466
|
-
digits,
|
6462
|
+
matches an unlimited number of substrings that either consist of non-
|
6463
|
+
digits, or digits enclosed in <>, followed by either ! or ?. When it
|
6467
6464
|
matches, it runs quickly. However, if it is applied to
|
6468
6465
|
|
6469
6466
|
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
6470
6467
|
|
6471
|
-
it
|
6472
|
-
string
|
6473
|
-
*
|
6474
|
-
example
|
6475
|
-
both
|
6476
|
-
when
|
6477
|
-
ter
|
6478
|
-
in
|
6468
|
+
it takes a long time before reporting failure. This is because the
|
6469
|
+
string can be divided between the internal \D+ repeat and the external
|
6470
|
+
* repeat in a large number of ways, and all have to be tried. (The
|
6471
|
+
example uses [!?] rather than a single character at the end, because
|
6472
|
+
both PCRE and Perl have an optimization that allows for fast failure
|
6473
|
+
when a single character is used. They remember the last single charac-
|
6474
|
+
ter that is required for a match, and fail early if it is not present
|
6475
|
+
in the string.) If the pattern is changed so that it uses an atomic
|
6479
6476
|
group, like this:
|
6480
6477
|
|
6481
6478
|
((?>\D+)|<\d+>)*[!?]
|
@@ -6487,28 +6484,28 @@ BACK REFERENCES
|
|
6487
6484
|
|
6488
6485
|
Outside a character class, a backslash followed by a digit greater than
|
6489
6486
|
0 (and possibly further digits) is a back reference to a capturing sub-
|
6490
|
-
pattern
|
6487
|
+
pattern earlier (that is, to its left) in the pattern, provided there
|
6491
6488
|
have been that many previous capturing left parentheses.
|
6492
6489
|
|
6493
6490
|
However, if the decimal number following the backslash is less than 10,
|
6494
|
-
it
|
6495
|
-
there
|
6496
|
-
tern.
|
6497
|
-
to
|
6498
|
-
reference"
|
6499
|
-
and
|
6491
|
+
it is always taken as a back reference, and causes an error only if
|
6492
|
+
there are not that many capturing left parentheses in the entire pat-
|
6493
|
+
tern. In other words, the parentheses that are referenced need not be
|
6494
|
+
to the left of the reference for numbers less than 10. A "forward back
|
6495
|
+
reference" of this type can make sense when a repetition is involved
|
6496
|
+
and the subpattern to the right has participated in an earlier itera-
|
6500
6497
|
tion.
|
6501
6498
|
|
6502
|
-
It
|
6503
|
-
subpattern
|
6504
|
-
sequence
|
6499
|
+
It is not possible to have a numerical "forward back reference" to a
|
6500
|
+
subpattern whose number is 10 or more using this syntax because a
|
6501
|
+
sequence such as \50 is interpreted as a character defined in octal.
|
6505
6502
|
See the subsection entitled "Non-printing characters" above for further
|
6506
|
-
details
|
6507
|
-
such
|
6503
|
+
details of the handling of digits following a backslash. There is no
|
6504
|
+
such problem when named parentheses are used. A back reference to any
|
6508
6505
|
subpattern is possible using named parentheses (see below).
|
6509
6506
|
|
6510
|
-
Another
|
6511
|
-
following
|
6507
|
+
Another way of avoiding the ambiguity inherent in the use of digits
|
6508
|
+
following a backslash is to use the \g escape sequence. This escape
|
6512
6509
|
must be followed by an unsigned number or a negative number, optionally
|
6513
6510
|
enclosed in braces. These examples are all identical:
|
6514
6511
|
|
@@ -6516,7 +6513,7 @@ BACK REFERENCES
|
|
6516
6513
|
(ring), \g1
|
6517
6514
|
(ring), \g{1}
|
6518
6515
|
|
6519
|
-
An
|
6516
|
+
An unsigned number specifies an absolute reference without the ambigu-
|
6520
6517
|
ity that is present in the older syntax. It is also useful when literal
|
6521
6518
|
digits follow the reference. A negative number is a relative reference.
|
6522
6519
|
Consider this example:
|
@@ -6525,33 +6522,33 @@ BACK REFERENCES
|
|
6525
6522
|
|
6526
6523
|
The sequence \g{-1} is a reference to the most recently started captur-
|
6527
6524
|
ing subpattern before \g, that is, is it equivalent to \2 in this exam-
|
6528
|
-
ple.
|
6529
|
-
references
|
6530
|
-
are
|
6525
|
+
ple. Similarly, \g{-2} would be equivalent to \1. The use of relative
|
6526
|
+
references can be helpful in long patterns, and also in patterns that
|
6527
|
+
are created by joining together fragments that contain references
|
6531
6528
|
within themselves.
|
6532
6529
|
|
6533
|
-
A
|
6534
|
-
pattern
|
6530
|
+
A back reference matches whatever actually matched the capturing sub-
|
6531
|
+
pattern in the current subject string, rather than anything matching
|
6535
6532
|
the subpattern itself (see "Subpatterns as subroutines" below for a way
|
6536
6533
|
of doing that). So the pattern
|
6537
6534
|
|
6538
6535
|
(sens|respons)e and \1ibility
|
6539
6536
|
|
6540
|
-
matches
|
6541
|
-
not
|
6542
|
-
time
|
6537
|
+
matches "sense and sensibility" and "response and responsibility", but
|
6538
|
+
not "sense and responsibility". If caseful matching is in force at the
|
6539
|
+
time of the back reference, the case of letters is relevant. For exam-
|
6543
6540
|
ple,
|
6544
6541
|
|
6545
6542
|
((?i)rah)\s+\1
|
6546
6543
|
|
6547
|
-
matches
|
6544
|
+
matches "rah rah" and "RAH RAH", but not "RAH rah", even though the
|
6548
6545
|
original capturing subpattern is matched caselessly.
|
6549
6546
|
|
6550
|
-
There
|
6551
|
-
subpatterns.
|
6552
|
-
\k'name'
|
6547
|
+
There are several different ways of writing back references to named
|
6548
|
+
subpatterns. The .NET syntax \k{name} and the Perl syntax \k<name> or
|
6549
|
+
\k'name' are supported, as is the Python syntax (?P=name). Perl 5.10's
|
6553
6550
|
unified back reference syntax, in which \g can be used for both numeric
|
6554
|
-
and
|
6551
|
+
and named references, is also supported. We could rewrite the above
|
6555
6552
|
example in any of the following ways:
|
6556
6553
|
|
6557
6554
|
(?<p1>(?i)rah)\s+\k<p1>
|
@@ -6559,84 +6556,92 @@ BACK REFERENCES
|
|
6559
6556
|
(?P<p1>(?i)rah)\s+(?P=p1)
|
6560
6557
|
(?<p1>(?i)rah)\s+\g{p1}
|
6561
6558
|
|
6562
|
-
A
|
6559
|
+
A subpattern that is referenced by name may appear in the pattern
|
6563
6560
|
before or after the reference.
|
6564
6561
|
|
6565
|
-
There
|
6566
|
-
subpattern
|
6562
|
+
There may be more than one back reference to the same subpattern. If a
|
6563
|
+
subpattern has not actually been used in a particular match, any back
|
6567
6564
|
references to it always fail by default. For example, the pattern
|
6568
6565
|
|
6569
6566
|
(a|(bc))\2
|
6570
6567
|
|
6571
|
-
always
|
6568
|
+
always fails if it starts to match "a" rather than "bc". However, if
|
6572
6569
|
the PCRE_JAVASCRIPT_COMPAT option is set at compile time, a back refer-
|
6573
6570
|
ence to an unset value matches an empty string.
|
6574
6571
|
|
6575
|
-
Because
|
6576
|
-
its
|
6577
|
-
ence
|
6578
|
-
delimiter
|
6579
|
-
PCRE_EXTENDED
|
6572
|
+
Because there may be many capturing parentheses in a pattern, all dig-
|
6573
|
+
its following a backslash are taken as part of a potential back refer-
|
6574
|
+
ence number. If the pattern continues with a digit character, some
|
6575
|
+
delimiter must be used to terminate the back reference. If the
|
6576
|
+
PCRE_EXTENDED option is set, this can be white space. Otherwise, the
|
6580
6577
|
\g{ syntax or an empty comment (see "Comments" below) can be used.
|
6581
6578
|
|
6582
6579
|
Recursive back references
|
6583
6580
|
|
6584
|
-
A
|
6585
|
-
fails
|
6586
|
-
matches.
|
6581
|
+
A back reference that occurs inside the parentheses to which it refers
|
6582
|
+
fails when the subpattern is first used, so, for example, (a\1) never
|
6583
|
+
matches. However, such references can be useful inside repeated sub-
|
6587
6584
|
patterns. For example, the pattern
|
6588
6585
|
|
6589
6586
|
(a|b\1)+
|
6590
6587
|
|
6591
6588
|
matches any number of "a"s and also "aba", "ababbaa" etc. At each iter-
|
6592
|
-
ation
|
6593
|
-
string
|
6594
|
-
work,
|
6595
|
-
to
|
6589
|
+
ation of the subpattern, the back reference matches the character
|
6590
|
+
string corresponding to the previous iteration. In order for this to
|
6591
|
+
work, the pattern must be such that the first iteration does not need
|
6592
|
+
to match the back reference. This can be done using alternation, as in
|
6596
6593
|
the example above, or by a quantifier with a minimum of zero.
|
6597
6594
|
|
6598
|
-
Back
|
6599
|
-
treated
|
6600
|
-
subsequent
|
6595
|
+
Back references of this type cause the group that they reference to be
|
6596
|
+
treated as an atomic group. Once the whole group has been matched, a
|
6597
|
+
subsequent matching failure cannot cause backtracking into the middle
|
6601
6598
|
of the group.
|
6602
6599
|
|
6603
6600
|
|
6604
6601
|
ASSERTIONS
|
6605
6602
|
|
6606
|
-
An
|
6607
|
-
current
|
6608
|
-
The
|
6603
|
+
An assertion is a test on the characters following or preceding the
|
6604
|
+
current matching point that does not actually consume any characters.
|
6605
|
+
The simple assertions coded as \b, \B, \A, \G, \Z, \z, ^ and $ are
|
6609
6606
|
described above.
|
6610
6607
|
|
6611
|
-
More
|
6612
|
-
kinds:
|
6613
|
-
string,
|
6614
|
-
matched
|
6608
|
+
More complicated assertions are coded as subpatterns. There are two
|
6609
|
+
kinds: those that look ahead of the current position in the subject
|
6610
|
+
string, and those that look behind it. An assertion subpattern is
|
6611
|
+
matched in the normal way, except that it does not cause the current
|
6615
6612
|
matching position to be changed.
|
6616
6613
|
|
6617
|
-
Assertion
|
6618
|
-
tion
|
6619
|
-
the
|
6620
|
-
tern.
|
6614
|
+
Assertion subpatterns are not capturing subpatterns. If such an asser-
|
6615
|
+
tion contains capturing subpatterns within it, these are counted for
|
6616
|
+
the purposes of numbering the capturing subpatterns in the whole pat-
|
6617
|
+
tern. However, substring capturing is carried out only for positive
|
6621
6618
|
assertions. (Perl sometimes, but not always, does do capturing in nega-
|
6622
6619
|
tive assertions.)
|
6623
6620
|
|
6624
|
-
|
6625
|
-
|
6626
|
-
|
6621
|
+
WARNING: If a positive assertion containing one or more capturing sub-
|
6622
|
+
patterns succeeds, but failure to match later in the pattern causes
|
6623
|
+
backtracking over this assertion, the captures within the assertion are
|
6624
|
+
reset only if no higher numbered captures are already set. This is,
|
6625
|
+
unfortunately, a fundamental limitation of the current implementation,
|
6626
|
+
and as PCRE1 is now in maintenance-only status, it is unlikely ever to
|
6627
|
+
change.
|
6628
|
+
|
6629
|
+
For compatibility with Perl, assertion subpatterns may be repeated;
|
6630
|
+
though it makes no sense to assert the same thing several times, the
|
6631
|
+
side effect of capturing parentheses may occasionally be useful. In
|
6627
6632
|
practice, there only three cases:
|
6628
6633
|
|
6629
|
-
(1)
|
6630
|
-
matching.
|
6634
|
+
(1) If the quantifier is {0}, the assertion is never obeyed during
|
6635
|
+
matching. However, it may contain internal capturing parenthesized
|
6631
6636
|
groups that are called from elsewhere via the subroutine mechanism.
|
6632
6637
|
|
6633
|
-
(2)
|
6634
|
-
as
|
6638
|
+
(2) If quantifier is {0,n} where n is greater than zero, it is treated
|
6639
|
+
as if it were {0,1}. At run time, the rest of the pattern match is
|
6635
6640
|
tried with and without the assertion, the order depending on the greed-
|
6636
6641
|
iness of the quantifier.
|
6637
6642
|
|
6638
|
-
(3)
|
6639
|
-
ignored.
|
6643
|
+
(3) If the minimum repetition is greater than zero, the quantifier is
|
6644
|
+
ignored. The assertion is obeyed just once when encountered during
|
6640
6645
|
matching.
|
6641
6646
|
|
6642
6647
|
Lookahead assertions
|
@@ -6646,38 +6651,38 @@ ASSERTIONS
|
|
6646
6651
|
|
6647
6652
|
\w+(?=;)
|
6648
6653
|
|
6649
|
-
matches
|
6654
|
+
matches a word followed by a semicolon, but does not include the semi-
|
6650
6655
|
colon in the match, and
|
6651
6656
|
|
6652
6657
|
foo(?!bar)
|
6653
6658
|
|
6654
|
-
matches
|
6659
|
+
matches any occurrence of "foo" that is not followed by "bar". Note
|
6655
6660
|
that the apparently similar pattern
|
6656
6661
|
|
6657
6662
|
(?!foo)bar
|
6658
6663
|
|
6659
|
-
does
|
6660
|
-
other
|
6664
|
+
does not find an occurrence of "bar" that is preceded by something
|
6665
|
+
other than "foo"; it finds any occurrence of "bar" whatsoever, because
|
6661
6666
|
the assertion (?!foo) is always true when the next three characters are
|
6662
6667
|
"bar". A lookbehind assertion is needed to achieve the other effect.
|
6663
6668
|
|
6664
6669
|
If you want to force a matching failure at some point in a pattern, the
|
6665
|
-
most
|
6666
|
-
always
|
6670
|
+
most convenient way to do it is with (?!) because an empty string
|
6671
|
+
always matches, so an assertion that requires there not to be an empty
|
6667
6672
|
string must always fail. The backtracking control verb (*FAIL) or (*F)
|
6668
6673
|
is a synonym for (?!).
|
6669
6674
|
|
6670
6675
|
Lookbehind assertions
|
6671
6676
|
|
6672
|
-
Lookbehind
|
6677
|
+
Lookbehind assertions start with (?<= for positive assertions and (?<!
|
6673
6678
|
for negative assertions. For example,
|
6674
6679
|
|
6675
6680
|
(?<!foo)bar
|
6676
6681
|
|
6677
|
-
does
|
6678
|
-
contents
|
6682
|
+
does find an occurrence of "bar" that is not preceded by "foo". The
|
6683
|
+
contents of a lookbehind assertion are restricted such that all the
|
6679
6684
|
strings it matches must have a fixed length. However, if there are sev-
|
6680
|
-
eral
|
6685
|
+
eral top-level alternatives, they do not all have to have the same
|
6681
6686
|
fixed length. Thus
|
6682
6687
|
|
6683
6688
|
(?<=bullock|donkey)
|
@@ -6686,62 +6691,62 @@ ASSERTIONS
|
|
6686
6691
|
|
6687
6692
|
(?<!dogs?|cats?)
|
6688
6693
|
|
6689
|
-
causes
|
6690
|
-
strings
|
6694
|
+
causes an error at compile time. Branches that match different length
|
6695
|
+
strings are permitted only at the top level of a lookbehind assertion.
|
6691
6696
|
This is an extension compared with Perl, which requires all branches to
|
6692
6697
|
match the same length of string. An assertion such as
|
6693
6698
|
|
6694
6699
|
(?<=ab(c|de))
|
6695
6700
|
|
6696
|
-
is
|
6701
|
+
is not permitted, because its single top-level branch can match two
|
6697
6702
|
different lengths, but it is acceptable to PCRE if rewritten to use two
|
6698
6703
|
top-level branches:
|
6699
6704
|
|
6700
6705
|
(?<=abc|abde)
|
6701
6706
|
|
6702
|
-
In
|
6707
|
+
In some cases, the escape sequence \K (see above) can be used instead
|
6703
6708
|
of a lookbehind assertion to get round the fixed-length restriction.
|
6704
6709
|
|
6705
|
-
The
|
6706
|
-
to
|
6710
|
+
The implementation of lookbehind assertions is, for each alternative,
|
6711
|
+
to temporarily move the current position back by the fixed length and
|
6707
6712
|
then try to match. If there are insufficient characters before the cur-
|
6708
6713
|
rent position, the assertion fails.
|
6709
6714
|
|
6710
|
-
In
|
6711
|
-
gle
|
6712
|
-
because
|
6713
|
-
hind.
|
6715
|
+
In a UTF mode, PCRE does not allow the \C escape (which matches a sin-
|
6716
|
+
gle data unit even in a UTF mode) to appear in lookbehind assertions,
|
6717
|
+
because it makes it impossible to calculate the length of the lookbe-
|
6718
|
+
hind. The \X and \R escapes, which can match different numbers of data
|
6714
6719
|
units, are also not permitted.
|
6715
6720
|
|
6716
|
-
"Subroutine"
|
6717
|
-
lookbehinds,
|
6721
|
+
"Subroutine" calls (see below) such as (?2) or (?&X) are permitted in
|
6722
|
+
lookbehinds, as long as the subpattern matches a fixed-length string.
|
6718
6723
|
Recursion, however, is not supported.
|
6719
6724
|
|
6720
|
-
Possessive
|
6725
|
+
Possessive quantifiers can be used in conjunction with lookbehind
|
6721
6726
|
assertions to specify efficient matching of fixed-length strings at the
|
6722
6727
|
end of subject strings. Consider a simple pattern such as
|
6723
6728
|
|
6724
6729
|
abcd$
|
6725
6730
|
|
6726
|
-
when
|
6731
|
+
when applied to a long string that does not match. Because matching
|
6727
6732
|
proceeds from left to right, PCRE will look for each "a" in the subject
|
6728
|
-
and
|
6733
|
+
and then see if what follows matches the rest of the pattern. If the
|
6729
6734
|
pattern is specified as
|
6730
6735
|
|
6731
6736
|
^.*abcd$
|
6732
6737
|
|
6733
|
-
the
|
6738
|
+
the initial .* matches the entire string at first, but when this fails
|
6734
6739
|
(because there is no following "a"), it backtracks to match all but the
|
6735
|
-
last
|
6736
|
-
again
|
6740
|
+
last character, then all but the last two characters, and so on. Once
|
6741
|
+
again the search for "a" covers the entire string, from right to left,
|
6737
6742
|
so we are no better off. However, if the pattern is written as
|
6738
6743
|
|
6739
6744
|
^.*+(?<=abcd)
|
6740
6745
|
|
6741
|
-
there
|
6742
|
-
entire
|
6743
|
-
on
|
6744
|
-
For
|
6746
|
+
there can be no backtracking for the .*+ item; it can match only the
|
6747
|
+
entire string. The subsequent lookbehind assertion does a single test
|
6748
|
+
on the last four characters. If it fails, the match fails immediately.
|
6749
|
+
For long strings, this approach makes a significant difference to the
|
6745
6750
|
processing time.
|
6746
6751
|
|
6747
6752
|
Using multiple assertions
|
@@ -6750,18 +6755,18 @@ ASSERTIONS
|
|
6750
6755
|
|
6751
6756
|
(?<=\d{3})(?<!999)foo
|
6752
6757
|
|
6753
|
-
matches
|
6754
|
-
each
|
6755
|
-
the
|
6756
|
-
characters
|
6758
|
+
matches "foo" preceded by three digits that are not "999". Notice that
|
6759
|
+
each of the assertions is applied independently at the same point in
|
6760
|
+
the subject string. First there is a check that the previous three
|
6761
|
+
characters are all digits, and then there is a check that the same
|
6757
6762
|
three characters are not "999". This pattern does not match "foo" pre-
|
6758
|
-
ceded
|
6759
|
-
three
|
6763
|
+
ceded by six characters, the first of which are digits and the last
|
6764
|
+
three of which are not "999". For example, it doesn't match "123abc-
|
6760
6765
|
foo". A pattern to do that is
|
6761
6766
|
|
6762
6767
|
(?<=\d{3}...)(?<!999)foo
|
6763
6768
|
|
6764
|
-
This
|
6769
|
+
This time the first assertion looks at the preceding six characters,
|
6765
6770
|
checking that the first three are digits, and then the second assertion
|
6766
6771
|
checks that the preceding three characters are not "999".
|
6767
6772
|
|
@@ -6769,29 +6774,29 @@ ASSERTIONS
|
|
6769
6774
|
|
6770
6775
|
(?<=(?<!foo)bar)baz
|
6771
6776
|
|
6772
|
-
matches
|
6777
|
+
matches an occurrence of "baz" that is preceded by "bar" which in turn
|
6773
6778
|
is not preceded by "foo", while
|
6774
6779
|
|
6775
6780
|
(?<=\d{3}(?!999)...)foo
|
6776
6781
|
|
6777
|
-
is
|
6782
|
+
is another pattern that matches "foo" preceded by three digits and any
|
6778
6783
|
three characters that are not "999".
|
6779
6784
|
|
6780
6785
|
|
6781
6786
|
CONDITIONAL SUBPATTERNS
|
6782
6787
|
|
6783
|
-
It
|
6784
|
-
ditionally
|
6785
|
-
on
|
6786
|
-
tern
|
6788
|
+
It is possible to cause the matching process to obey a subpattern con-
|
6789
|
+
ditionally or to choose between two alternative subpatterns, depending
|
6790
|
+
on the result of an assertion, or whether a specific capturing subpat-
|
6791
|
+
tern has already been matched. The two possible forms of conditional
|
6787
6792
|
subpattern are:
|
6788
6793
|
|
6789
6794
|
(?(condition)yes-pattern)
|
6790
6795
|
(?(condition)yes-pattern|no-pattern)
|
6791
6796
|
|
6792
|
-
If
|
6793
|
-
no-pattern
|
6794
|
-
tives
|
6797
|
+
If the condition is satisfied, the yes-pattern is used; otherwise the
|
6798
|
+
no-pattern (if present) is used. If there are more than two alterna-
|
6799
|
+
tives in the subpattern, a compile-time error occurs. Each of the two
|
6795
6800
|
alternatives may itself contain nested subpatterns of any form, includ-
|
6796
6801
|
ing conditional subpatterns; the restriction to two alternatives
|
6797
6802
|
applies only at the level of the condition. This pattern fragment is an
|
@@ -6800,68 +6805,68 @@ CONDITIONAL SUBPATTERNS
|
|
6800
6805
|
(?(1) (A|B|C) | (D | (?(2)E|F) | E) )
|
6801
6806
|
|
6802
6807
|
|
6803
|
-
There
|
6808
|
+
There are four kinds of condition: references to subpatterns, refer-
|
6804
6809
|
ences to recursion, a pseudo-condition called DEFINE, and assertions.
|
6805
6810
|
|
6806
6811
|
Checking for a used subpattern by number
|
6807
6812
|
|
6808
|
-
If
|
6813
|
+
If the text between the parentheses consists of a sequence of digits,
|
6809
6814
|
the condition is true if a capturing subpattern of that number has pre-
|
6810
|
-
viously
|
6811
|
-
the
|
6812
|
-
numbers),
|
6813
|
-
native
|
6814
|
-
this
|
6815
|
-
most
|
6816
|
-
most
|
6815
|
+
viously matched. If there is more than one capturing subpattern with
|
6816
|
+
the same number (see the earlier section about duplicate subpattern
|
6817
|
+
numbers), the condition is true if any of them have matched. An alter-
|
6818
|
+
native notation is to precede the digits with a plus or minus sign. In
|
6819
|
+
this case, the subpattern number is relative rather than absolute. The
|
6820
|
+
most recently opened parentheses can be referenced by (?(-1), the next
|
6821
|
+
most recent by (?(-2), and so on. Inside loops it can also make sense
|
6817
6822
|
to refer to subsequent groups. The next parentheses to be opened can be
|
6818
|
-
referenced
|
6823
|
+
referenced as (?(+1), and so on. (The value zero in any of these forms
|
6819
6824
|
is not used; it provokes a compile-time error.)
|
6820
6825
|
|
6821
|
-
Consider
|
6826
|
+
Consider the following pattern, which contains non-significant white
|
6822
6827
|
space to make it more readable (assume the PCRE_EXTENDED option) and to
|
6823
6828
|
divide it into three parts for ease of discussion:
|
6824
6829
|
|
6825
6830
|
( \( )? [^()]+ (?(1) \) )
|
6826
6831
|
|
6827
|
-
The
|
6832
|
+
The first part matches an optional opening parenthesis, and if that
|
6828
6833
|
character is present, sets it as the first captured substring. The sec-
|
6829
|
-
ond
|
6830
|
-
third
|
6831
|
-
first
|
6832
|
-
started
|
6833
|
-
yes-pattern
|
6834
|
-
wise,
|
6835
|
-
In
|
6834
|
+
ond part matches one or more characters that are not parentheses. The
|
6835
|
+
third part is a conditional subpattern that tests whether or not the
|
6836
|
+
first set of parentheses matched. If they did, that is, if subject
|
6837
|
+
started with an opening parenthesis, the condition is true, and so the
|
6838
|
+
yes-pattern is executed and a closing parenthesis is required. Other-
|
6839
|
+
wise, since no-pattern is not present, the subpattern matches nothing.
|
6840
|
+
In other words, this pattern matches a sequence of non-parentheses,
|
6836
6841
|
optionally enclosed in parentheses.
|
6837
6842
|
|
6838
|
-
If
|
6843
|
+
If you were embedding this pattern in a larger one, you could use a
|
6839
6844
|
relative reference:
|
6840
6845
|
|
6841
6846
|
...other stuff... ( \( )? [^()]+ (?(-1) \) ) ...
|
6842
6847
|
|
6843
|
-
This
|
6848
|
+
This makes the fragment independent of the parentheses in the larger
|
6844
6849
|
pattern.
|
6845
6850
|
|
6846
6851
|
Checking for a used subpattern by name
|
6847
6852
|
|
6848
|
-
Perl
|
6849
|
-
used
|
6850
|
-
PCRE,
|
6853
|
+
Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a
|
6854
|
+
used subpattern by name. For compatibility with earlier versions of
|
6855
|
+
PCRE, which had this facility before Perl, the syntax (?(name)...) is
|
6851
6856
|
also recognized.
|
6852
6857
|
|
6853
6858
|
Rewriting the above example to use a named subpattern gives this:
|
6854
6859
|
|
6855
6860
|
(?<OPEN> \( )? [^()]+ (?(<OPEN>) \) )
|
6856
6861
|
|
6857
|
-
If
|
6858
|
-
is
|
6862
|
+
If the name used in a condition of this kind is a duplicate, the test
|
6863
|
+
is applied to all subpatterns of the same name, and is true if any one
|
6859
6864
|
of them has matched.
|
6860
6865
|
|
6861
6866
|
Checking for pattern recursion
|
6862
6867
|
|
6863
6868
|
If the condition is the string (R), and there is no subpattern with the
|
6864
|
-
name
|
6869
|
+
name R, the condition is true if a recursive call to the whole pattern
|
6865
6870
|
or any subpattern has been made. If digits or a name preceded by amper-
|
6866
6871
|
sand follow the letter R, for example:
|
6867
6872
|
|
@@ -6869,51 +6874,51 @@ CONDITIONAL SUBPATTERNS
|
|
6869
6874
|
|
6870
6875
|
the condition is true if the most recent recursion is into a subpattern
|
6871
6876
|
whose number or name is given. This condition does not check the entire
|
6872
|
-
recursion
|
6877
|
+
recursion stack. If the name used in a condition of this kind is a
|
6873
6878
|
duplicate, the test is applied to all subpatterns of the same name, and
|
6874
6879
|
is true if any one of them is the most recent recursion.
|
6875
6880
|
|
6876
|
-
At
|
6881
|
+
At "top level", all these recursion test conditions are false. The
|
6877
6882
|
syntax for recursive patterns is described below.
|
6878
6883
|
|
6879
6884
|
Defining subpatterns for use by reference only
|
6880
6885
|
|
6881
|
-
If
|
6882
|
-
with
|
6883
|
-
there
|
6884
|
-
skipped
|
6885
|
-
DEFINE
|
6886
|
-
enced
|
6887
|
-
example,
|
6886
|
+
If the condition is the string (DEFINE), and there is no subpattern
|
6887
|
+
with the name DEFINE, the condition is always false. In this case,
|
6888
|
+
there may be only one alternative in the subpattern. It is always
|
6889
|
+
skipped if control reaches this point in the pattern; the idea of
|
6890
|
+
DEFINE is that it can be used to define subroutines that can be refer-
|
6891
|
+
enced from elsewhere. (The use of subroutines is described below.) For
|
6892
|
+
example, a pattern to match an IPv4 address such as "192.168.23.245"
|
6888
6893
|
could be written like this (ignore white space and line breaks):
|
6889
6894
|
|
6890
6895
|
(?(DEFINE) (?<byte> 2[0-4]\d | 25[0-5] | 1\d\d | [1-9]?\d) )
|
6891
6896
|
\b (?&byte) (\.(?&byte)){3} \b
|
6892
6897
|
|
6893
|
-
The
|
6894
|
-
group
|
6895
|
-
an
|
6896
|
-
this
|
6897
|
-
condition.
|
6898
|
-
to
|
6898
|
+
The first part of the pattern is a DEFINE group inside which a another
|
6899
|
+
group named "byte" is defined. This matches an individual component of
|
6900
|
+
an IPv4 address (a number less than 256). When matching takes place,
|
6901
|
+
this part of the pattern is skipped because DEFINE acts like a false
|
6902
|
+
condition. The rest of the pattern uses references to the named group
|
6903
|
+
to match the four dot-separated components of an IPv4 address, insist-
|
6899
6904
|
ing on a word boundary at each end.
|
6900
6905
|
|
6901
6906
|
Assertion conditions
|
6902
6907
|
|
6903
|
-
If
|
6904
|
-
assertion.
|
6905
|
-
assertion.
|
6908
|
+
If the condition is not in any of the above formats, it must be an
|
6909
|
+
assertion. This may be a positive or negative lookahead or lookbehind
|
6910
|
+
assertion. Consider this pattern, again containing non-significant
|
6906
6911
|
white space, and with the two alternatives on the second line:
|
6907
6912
|
|
6908
6913
|
(?(?=[^a-z]*[a-z])
|
6909
6914
|
\d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} )
|
6910
6915
|
|
6911
|
-
The
|
6912
|
-
optional
|
6913
|
-
it
|
6914
|
-
letter
|
6915
|
-
otherwise
|
6916
|
-
strings
|
6916
|
+
The condition is a positive lookahead assertion that matches an
|
6917
|
+
optional sequence of non-letters followed by a letter. In other words,
|
6918
|
+
it tests for the presence of at least one letter in the subject. If a
|
6919
|
+
letter is found, the subject is matched against the first alternative;
|
6920
|
+
otherwise it is matched against the second. This pattern matches
|
6921
|
+
strings in one of the two forms dd-aaa-dd or dd-dd-dd, where aaa are
|
6917
6922
|
letters and dd are digits.
|
6918
6923
|
|
6919
6924
|
|
@@ -6922,41 +6927,41 @@ COMMENTS
|
|
6922
6927
|
There are two ways of including comments in patterns that are processed
|
6923
6928
|
by PCRE. In both cases, the start of the comment must not be in a char-
|
6924
6929
|
acter class, nor in the middle of any other sequence of related charac-
|
6925
|
-
ters
|
6930
|
+
ters such as (?: or a subpattern name or number. The characters that
|
6926
6931
|
make up a comment play no part in the pattern matching.
|
6927
6932
|
|
6928
|
-
The
|
6929
|
-
next
|
6933
|
+
The sequence (?# marks the start of a comment that continues up to the
|
6934
|
+
next closing parenthesis. Nested parentheses are not permitted. If the
|
6930
6935
|
PCRE_EXTENDED option is set, an unescaped # character also introduces a
|
6931
|
-
comment,
|
6932
|
-
newline
|
6936
|
+
comment, which in this case continues to immediately after the next
|
6937
|
+
newline character or character sequence in the pattern. Which charac-
|
6933
6938
|
ters are interpreted as newlines is controlled by the options passed to
|
6934
|
-
a
|
6939
|
+
a compiling function or by a special sequence at the start of the pat-
|
6935
6940
|
tern, as described in the section entitled "Newline conventions" above.
|
6936
6941
|
Note that the end of this type of comment is a literal newline sequence
|
6937
|
-
in
|
6938
|
-
not
|
6942
|
+
in the pattern; escape sequences that happen to represent a newline do
|
6943
|
+
not count. For example, consider this pattern when PCRE_EXTENDED is
|
6939
6944
|
set, and the default newline convention is in force:
|
6940
6945
|
|
6941
6946
|
abc #comment \n still comment
|
6942
6947
|
|
6943
|
-
On
|
6944
|
-
for
|
6945
|
-
stage,
|
6948
|
+
On encountering the # character, pcre_compile() skips along, looking
|
6949
|
+
for a newline in the pattern. The sequence \n is still literal at this
|
6950
|
+
stage, so it does not terminate the comment. Only an actual character
|
6946
6951
|
with the code value 0x0a (the default newline) does so.
|
6947
6952
|
|
6948
6953
|
|
6949
6954
|
RECURSIVE PATTERNS
|
6950
6955
|
|
6951
|
-
Consider
|
6952
|
-
unlimited
|
6953
|
-
that
|
6954
|
-
depth
|
6956
|
+
Consider the problem of matching a string in parentheses, allowing for
|
6957
|
+
unlimited nested parentheses. Without the use of recursion, the best
|
6958
|
+
that can be done is to use a pattern that matches up to some fixed
|
6959
|
+
depth of nesting. It is not possible to handle an arbitrary nesting
|
6955
6960
|
depth.
|
6956
6961
|
|
6957
6962
|
For some time, Perl has provided a facility that allows regular expres-
|
6958
|
-
sions
|
6959
|
-
Perl
|
6963
|
+
sions to recurse (amongst other things). It does this by interpolating
|
6964
|
+
Perl code in the expression at run time, and the code can refer to the
|
6960
6965
|
expression itself. A Perl pattern using code interpolation to solve the
|
6961
6966
|
parentheses problem can be created like this:
|
6962
6967
|
|
@@ -6966,201 +6971,201 @@ RECURSIVE PATTERNS
|
|
6966
6971
|
refers recursively to the pattern in which it appears.
|
6967
6972
|
|
6968
6973
|
Obviously, PCRE cannot support the interpolation of Perl code. Instead,
|
6969
|
-
it
|
6970
|
-
also
|
6971
|
-
PCRE
|
6974
|
+
it supports special syntax for recursion of the entire pattern, and
|
6975
|
+
also for individual subpattern recursion. After its introduction in
|
6976
|
+
PCRE and Python, this kind of recursion was subsequently introduced
|
6972
6977
|
into Perl at release 5.10.
|
6973
6978
|
|
6974
|
-
A
|
6975
|
-
zero
|
6976
|
-
subpattern
|
6977
|
-
subpattern.
|
6978
|
-
described
|
6979
|
+
A special item that consists of (? followed by a number greater than
|
6980
|
+
zero and a closing parenthesis is a recursive subroutine call of the
|
6981
|
+
subpattern of the given number, provided that it occurs inside that
|
6982
|
+
subpattern. (If not, it is a non-recursive subroutine call, which is
|
6983
|
+
described in the next section.) The special item (?R) or (?0) is a
|
6979
6984
|
recursive call of the entire regular expression.
|
6980
6985
|
|
6981
|
-
This
|
6986
|
+
This PCRE pattern solves the nested parentheses problem (assume the
|
6982
6987
|
PCRE_EXTENDED option is set so that white space is ignored):
|
6983
6988
|
|
6984
6989
|
\( ( [^()]++ | (?R) )* \)
|
6985
6990
|
|
6986
|
-
First
|
6987
|
-
substrings
|
6988
|
-
recursive
|
6991
|
+
First it matches an opening parenthesis. Then it matches any number of
|
6992
|
+
substrings which can either be a sequence of non-parentheses, or a
|
6993
|
+
recursive match of the pattern itself (that is, a correctly parenthe-
|
6989
6994
|
sized substring). Finally there is a closing parenthesis. Note the use
|
6990
6995
|
of a possessive quantifier to avoid backtracking into sequences of non-
|
6991
6996
|
parentheses.
|
6992
6997
|
|
6993
|
-
If
|
6998
|
+
If this were part of a larger pattern, you would not want to recurse
|
6994
6999
|
the entire pattern, so instead you could use this:
|
6995
7000
|
|
6996
7001
|
( \( ( [^()]++ | (?1) )* \) )
|
6997
7002
|
|
6998
|
-
We
|
7003
|
+
We have put the pattern into parentheses, and caused the recursion to
|
6999
7004
|
refer to them instead of the whole pattern.
|
7000
7005
|
|
7001
|
-
In
|
7002
|
-
tricky.
|
7006
|
+
In a larger pattern, keeping track of parenthesis numbers can be
|
7007
|
+
tricky. This is made easier by the use of relative references. Instead
|
7003
7008
|
of (?1) in the pattern above you can write (?-2) to refer to the second
|
7004
|
-
most
|
7005
|
-
words,
|
7009
|
+
most recently opened parentheses preceding the recursion. In other
|
7010
|
+
words, a negative number counts capturing parentheses leftwards from
|
7006
7011
|
the point at which it is encountered.
|
7007
7012
|
|
7008
|
-
It
|
7009
|
-
writing
|
7010
|
-
because
|
7011
|
-
enced.
|
7013
|
+
It is also possible to refer to subsequently opened parentheses, by
|
7014
|
+
writing references such as (?+2). However, these cannot be recursive
|
7015
|
+
because the reference is not inside the parentheses that are refer-
|
7016
|
+
enced. They are always non-recursive subroutine calls, as described in
|
7012
7017
|
the next section.
|
7013
7018
|
|
7014
|
-
An
|
7015
|
-
syntax
|
7019
|
+
An alternative approach is to use named parentheses instead. The Perl
|
7020
|
+
syntax for this is (?&name); PCRE's earlier syntax (?P>name) is also
|
7016
7021
|
supported. We could rewrite the above example as follows:
|
7017
7022
|
|
7018
7023
|
(?<pn> \( ( [^()]++ | (?&pn) )* \) )
|
7019
7024
|
|
7020
|
-
If
|
7025
|
+
If there is more than one subpattern with the same name, the earliest
|
7021
7026
|
one is used.
|
7022
7027
|
|
7023
|
-
This
|
7028
|
+
This particular example pattern that we have been looking at contains
|
7024
7029
|
nested unlimited repeats, and so the use of a possessive quantifier for
|
7025
7030
|
matching strings of non-parentheses is important when applying the pat-
|
7026
|
-
tern
|
7031
|
+
tern to strings that do not match. For example, when this pattern is
|
7027
7032
|
applied to
|
7028
7033
|
|
7029
7034
|
(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()
|
7030
7035
|
|
7031
|
-
it
|
7032
|
-
not
|
7033
|
-
so
|
7036
|
+
it yields "no match" quickly. However, if a possessive quantifier is
|
7037
|
+
not used, the match runs for a very long time indeed because there are
|
7038
|
+
so many different ways the + and * repeats can carve up the subject,
|
7034
7039
|
and all have to be tested before failure can be reported.
|
7035
7040
|
|
7036
|
-
At
|
7037
|
-
from
|
7038
|
-
callout
|
7041
|
+
At the end of a match, the values of capturing parentheses are those
|
7042
|
+
from the outermost level. If you want to obtain intermediate values, a
|
7043
|
+
callout function can be used (see below and the pcrecallout documenta-
|
7039
7044
|
tion). If the pattern above is matched against
|
7040
7045
|
|
7041
7046
|
(ab(cd)ef)
|
7042
7047
|
|
7043
|
-
the
|
7044
|
-
which
|
7045
|
-
pattern
|
7046
|
-
unset,
|
7048
|
+
the value for the inner capturing parentheses (numbered 2) is "ef",
|
7049
|
+
which is the last value taken on at the top level. If a capturing sub-
|
7050
|
+
pattern is not matched at the top level, its final captured value is
|
7051
|
+
unset, even if it was (temporarily) set at a deeper level during the
|
7047
7052
|
matching process.
|
7048
7053
|
|
7049
|
-
If
|
7050
|
-
to
|
7054
|
+
If there are more than 15 capturing parentheses in a pattern, PCRE has
|
7055
|
+
to obtain extra memory to store data during a recursion, which it does
|
7051
7056
|
by using pcre_malloc, freeing it via pcre_free afterwards. If no memory
|
7052
7057
|
can be obtained, the match fails with the PCRE_ERROR_NOMEMORY error.
|
7053
7058
|
|
7054
|
-
Do
|
7055
|
-
recursion.
|
7056
|
-
ets,
|
7057
|
-
brackets
|
7059
|
+
Do not confuse the (?R) item with the condition (R), which tests for
|
7060
|
+
recursion. Consider this pattern, which matches text in angle brack-
|
7061
|
+
ets, allowing for arbitrary nesting. Only digits are allowed in nested
|
7062
|
+
brackets (that is, when recursing), whereas any characters are permit-
|
7058
7063
|
ted at the outer level.
|
7059
7064
|
|
7060
7065
|
< (?: (?(R) \d++ | [^<>]*+) | (?R)) * >
|
7061
7066
|
|
7062
|
-
In
|
7063
|
-
two
|
7067
|
+
In this pattern, (?(R) is the start of a conditional subpattern, with
|
7068
|
+
two different alternatives for the recursive and non-recursive cases.
|
7064
7069
|
The (?R) item is the actual recursive call.
|
7065
7070
|
|
7066
7071
|
Differences in recursion processing between PCRE and Perl
|
7067
7072
|
|
7068
|
-
Recursion
|
7069
|
-
In
|
7073
|
+
Recursion processing in PCRE differs from Perl in two important ways.
|
7074
|
+
In PCRE (like Python, but unlike Perl), a recursive subpattern call is
|
7070
7075
|
always treated as an atomic group. That is, once it has matched some of
|
7071
7076
|
the subject string, it is never re-entered, even if it contains untried
|
7072
|
-
alternatives
|
7073
|
-
illustrated
|
7074
|
-
dromic
|
7077
|
+
alternatives and there is a subsequent matching failure. This can be
|
7078
|
+
illustrated by the following pattern, which purports to match a palin-
|
7079
|
+
dromic string that contains an odd number of characters (for example,
|
7075
7080
|
"a", "aba", "abcba", "abcdcba"):
|
7076
7081
|
|
7077
7082
|
^(.|(.)(?1)\2)$
|
7078
7083
|
|
7079
7084
|
The idea is that it either matches a single character, or two identical
|
7080
|
-
characters
|
7081
|
-
in
|
7085
|
+
characters surrounding a sub-palindrome. In Perl, this pattern works;
|
7086
|
+
in PCRE it does not if the pattern is longer than three characters.
|
7082
7087
|
Consider the subject string "abcba":
|
7083
7088
|
|
7084
|
-
At
|
7089
|
+
At the top level, the first character is matched, but as it is not at
|
7085
7090
|
the end of the string, the first alternative fails; the second alterna-
|
7086
7091
|
tive is taken and the recursion kicks in. The recursive call to subpat-
|
7087
|
-
tern
|
7092
|
+
tern 1 successfully matches the next character ("b"). (Note that the
|
7088
7093
|
beginning and end of line tests are not part of the recursion).
|
7089
7094
|
|
7090
|
-
Back
|
7091
|
-
subpattern
|
7092
|
-
is
|
7093
|
-
and
|
7094
|
-
enter
|
7095
|
+
Back at the top level, the next character ("c") is compared with what
|
7096
|
+
subpattern 2 matched, which was "a". This fails. Because the recursion
|
7097
|
+
is treated as an atomic group, there are now no backtracking points,
|
7098
|
+
and so the entire match fails. (Perl is able, at this point, to re-
|
7099
|
+
enter the recursion and try the second alternative.) However, if the
|
7095
7100
|
pattern is written with the alternatives in the other order, things are
|
7096
7101
|
different:
|
7097
7102
|
|
7098
7103
|
^((.)(?1)\2|.)$
|
7099
7104
|
|
7100
|
-
This
|
7101
|
-
recurse
|
7102
|
-
fails.
|
7103
|
-
higher
|
7105
|
+
This time, the recursing alternative is tried first, and continues to
|
7106
|
+
recurse until it runs out of characters, at which point the recursion
|
7107
|
+
fails. But this time we do have another alternative to try at the
|
7108
|
+
higher level. That is the big difference: in the previous case the
|
7104
7109
|
remaining alternative is at a deeper recursion level, which PCRE cannot
|
7105
7110
|
use.
|
7106
7111
|
|
7107
|
-
To
|
7108
|
-
just
|
7112
|
+
To change the pattern so that it matches all palindromic strings, not
|
7113
|
+
just those with an odd number of characters, it is tempting to change
|
7109
7114
|
the pattern to this:
|
7110
7115
|
|
7111
7116
|
^((.)(?1)\2|.?)$
|
7112
7117
|
|
7113
|
-
Again,
|
7114
|
-
When
|
7115
|
-
entered
|
7116
|
-
separate
|
7118
|
+
Again, this works in Perl, but not in PCRE, and for the same reason.
|
7119
|
+
When a deeper recursion has matched a single character, it cannot be
|
7120
|
+
entered again in order to match an empty string. The solution is to
|
7121
|
+
separate the two cases, and write out the odd and even cases as alter-
|
7117
7122
|
natives at the higher level:
|
7118
7123
|
|
7119
7124
|
^(?:((.)(?1)\2|)|((.)(?3)\4|.))
|
7120
7125
|
|
7121
|
-
If
|
7126
|
+
If you want to match typical palindromic phrases, the pattern has to
|
7122
7127
|
ignore all non-word characters, which can be done like this:
|
7123
7128
|
|
7124
7129
|
^\W*+(?:((.)\W*+(?1)\W*+\2|)|((.)\W*+(?3)\W*+\4|\W*+.\W*+))\W*+$
|
7125
7130
|
|
7126
7131
|
If run with the PCRE_CASELESS option, this pattern matches phrases such
|
7127
7132
|
as "A man, a plan, a canal: Panama!" and it works well in both PCRE and
|
7128
|
-
Perl.
|
7129
|
-
ing
|
7130
|
-
great
|
7133
|
+
Perl. Note the use of the possessive quantifier *+ to avoid backtrack-
|
7134
|
+
ing into sequences of non-word characters. Without this, PCRE takes a
|
7135
|
+
great deal longer (ten times or more) to match typical phrases, and
|
7131
7136
|
Perl takes so long that you think it has gone into a loop.
|
7132
7137
|
|
7133
|
-
WARNING:
|
7134
|
-
ject
|
7135
|
-
entire
|
7136
|
-
the
|
7137
|
-
then
|
7138
|
-
Once
|
7138
|
+
WARNING: The palindrome-matching patterns above work only if the sub-
|
7139
|
+
ject string does not start with a palindrome that is shorter than the
|
7140
|
+
entire string. For example, although "abcba" is correctly matched, if
|
7141
|
+
the subject is "ababa", PCRE finds the palindrome "aba" at the start,
|
7142
|
+
then fails at top level because the end of the string does not follow.
|
7143
|
+
Once again, it cannot jump back into the recursion to try other alter-
|
7139
7144
|
natives, so the entire match fails.
|
7140
7145
|
|
7141
|
-
The
|
7142
|
-
cessing
|
7143
|
-
tern
|
7144
|
-
it
|
7145
|
-
sion,
|
7146
|
+
The second way in which PCRE and Perl differ in their recursion pro-
|
7147
|
+
cessing is in the handling of captured values. In Perl, when a subpat-
|
7148
|
+
tern is called recursively or as a subpattern (see the next section),
|
7149
|
+
it has no access to any values that were captured outside the recur-
|
7150
|
+
sion, whereas in PCRE these values can be referenced. Consider this
|
7146
7151
|
pattern:
|
7147
7152
|
|
7148
7153
|
^(.)(\1|a(?2))
|
7149
7154
|
|
7150
|
-
In
|
7151
|
-
match
|
7152
|
-
to
|
7153
|
-
the
|
7154
|
-
In
|
7155
|
+
In PCRE, this pattern matches "bab". The first capturing parentheses
|
7156
|
+
match "b", then in the second group, when the back reference \1 fails
|
7157
|
+
to match "b", the second alternative matches "a" and then recurses. In
|
7158
|
+
the recursion, \1 does now match "b" and so the whole match succeeds.
|
7159
|
+
In Perl, the pattern fails to match because inside the recursive call
|
7155
7160
|
\1 cannot access the externally set value.
|
7156
7161
|
|
7157
7162
|
|
7158
7163
|
SUBPATTERNS AS SUBROUTINES
|
7159
7164
|
|
7160
|
-
If
|
7161
|
-
name)
|
7162
|
-
like
|
7163
|
-
be
|
7165
|
+
If the syntax for a recursive subpattern call (either by number or by
|
7166
|
+
name) is used outside the parentheses to which it refers, it operates
|
7167
|
+
like a subroutine in a programming language. The called subpattern may
|
7168
|
+
be defined before or after the reference. A numbered reference can be
|
7164
7169
|
absolute or relative, as in these examples:
|
7165
7170
|
|
7166
7171
|
(...(absolute)...)...(?2)...
|
@@ -7171,79 +7176,79 @@ SUBPATTERNS AS SUBROUTINES
|
|
7171
7176
|
|
7172
7177
|
(sens|respons)e and \1ibility
|
7173
7178
|
|
7174
|
-
matches
|
7179
|
+
matches "sense and sensibility" and "response and responsibility", but
|
7175
7180
|
not "sense and responsibility". If instead the pattern
|
7176
7181
|
|
7177
7182
|
(sens|respons)e and (?1)ibility
|
7178
7183
|
|
7179
|
-
is
|
7180
|
-
two
|
7184
|
+
is used, it does match "sense and responsibility" as well as the other
|
7185
|
+
two strings. Another example is given in the discussion of DEFINE
|
7181
7186
|
above.
|
7182
7187
|
|
7183
|
-
All
|
7184
|
-
atomic
|
7188
|
+
All subroutine calls, whether recursive or not, are always treated as
|
7189
|
+
atomic groups. That is, once a subroutine has matched some of the sub-
|
7185
7190
|
ject string, it is never re-entered, even if it contains untried alter-
|
7186
|
-
natives
|
7187
|
-
parentheses
|
7191
|
+
natives and there is a subsequent matching failure. Any capturing
|
7192
|
+
parentheses that are set during the subroutine call revert to their
|
7188
7193
|
previous values afterwards.
|
7189
7194
|
|
7190
|
-
Processing
|
7191
|
-
tern
|
7195
|
+
Processing options such as case-independence are fixed when a subpat-
|
7196
|
+
tern is defined, so if it is used as a subroutine, such options cannot
|
7192
7197
|
be changed for different calls. For example, consider this pattern:
|
7193
7198
|
|
7194
7199
|
(abc)(?i:(?-1))
|
7195
7200
|
|
7196
|
-
It
|
7201
|
+
It matches "abcabc". It does not match "abcABC" because the change of
|
7197
7202
|
processing option does not affect the called subpattern.
|
7198
7203
|
|
7199
7204
|
|
7200
7205
|
ONIGURUMA SUBROUTINE SYNTAX
|
7201
7206
|
|
7202
|
-
For
|
7207
|
+
For compatibility with Oniguruma, the non-Perl syntax \g followed by a
|
7203
7208
|
name or a number enclosed either in angle brackets or single quotes, is
|
7204
|
-
an
|
7205
|
-
possibly
|
7209
|
+
an alternative syntax for referencing a subpattern as a subroutine,
|
7210
|
+
possibly recursively. Here are two of the examples used above, rewrit-
|
7206
7211
|
ten using this syntax:
|
7207
7212
|
|
7208
7213
|
(?<pn> \( ( (?>[^()]+) | \g<pn> )* \) )
|
7209
7214
|
(sens|respons)e and \g'1'ibility
|
7210
7215
|
|
7211
|
-
PCRE
|
7216
|
+
PCRE supports an extension to Oniguruma: if a number is preceded by a
|
7212
7217
|
plus or a minus sign it is taken as a relative reference. For example:
|
7213
7218
|
|
7214
7219
|
(abc)(?i:\g<-1>)
|
7215
7220
|
|
7216
|
-
Note
|
7217
|
-
synonymous.
|
7221
|
+
Note that \g{...} (Perl syntax) and \g<...> (Oniguruma syntax) are not
|
7222
|
+
synonymous. The former is a back reference; the latter is a subroutine
|
7218
7223
|
call.
|
7219
7224
|
|
7220
7225
|
|
7221
7226
|
CALLOUTS
|
7222
7227
|
|
7223
7228
|
Perl has a feature whereby using the sequence (?{...}) causes arbitrary
|
7224
|
-
Perl
|
7229
|
+
Perl code to be obeyed in the middle of matching a regular expression.
|
7225
7230
|
This makes it possible, amongst other things, to extract different sub-
|
7226
7231
|
strings that match the same pair of parentheses when there is a repeti-
|
7227
7232
|
tion.
|
7228
7233
|
|
7229
7234
|
PCRE provides a similar feature, but of course it cannot obey arbitrary
|
7230
7235
|
Perl code. The feature is called "callout". The caller of PCRE provides
|
7231
|
-
an
|
7232
|
-
pcre_callout
|
7233
|
-
library).
|
7236
|
+
an external function by putting its entry point in the global variable
|
7237
|
+
pcre_callout (8-bit library) or pcre[16|32]_callout (16-bit or 32-bit
|
7238
|
+
library). By default, this variable contains NULL, which disables all
|
7234
7239
|
calling out.
|
7235
7240
|
|
7236
|
-
Within
|
7237
|
-
external
|
7238
|
-
callout
|
7239
|
-
The
|
7241
|
+
Within a regular expression, (?C) indicates the points at which the
|
7242
|
+
external function is to be called. If you want to identify different
|
7243
|
+
callout points, you can put a number less than 256 after the letter C.
|
7244
|
+
The default value is zero. For example, this pattern has two callout
|
7240
7245
|
points:
|
7241
7246
|
|
7242
7247
|
(?C1)abc(?C2)def
|
7243
7248
|
|
7244
|
-
If
|
7245
|
-
outs
|
7246
|
-
are
|
7249
|
+
If the PCRE_AUTO_CALLOUT flag is passed to a compiling function, call-
|
7250
|
+
outs are automatically installed before each item in the pattern. They
|
7251
|
+
are all numbered 255. If there is a conditional group in the pattern
|
7247
7252
|
whose condition is an assertion, an additional callout is inserted just
|
7248
7253
|
before the condition. An explicit callout may also be set at this posi-
|
7249
7254
|
tion, as in this example:
|
@@ -7253,120 +7258,120 @@ CALLOUTS
|
|
7253
7258
|
Note that this applies only to assertion conditions, not to other types
|
7254
7259
|
of condition.
|
7255
7260
|
|
7256
|
-
During
|
7257
|
-
tion
|
7258
|
-
position
|
7259
|
-
supplied
|
7261
|
+
During matching, when PCRE reaches a callout point, the external func-
|
7262
|
+
tion is called. It is provided with the number of the callout, the
|
7263
|
+
position in the pattern, and, optionally, one item of data originally
|
7264
|
+
supplied by the caller of the matching function. The callout function
|
7260
7265
|
may cause matching to proceed, to backtrack, or to fail altogether.
|
7261
7266
|
|
7262
|
-
By
|
7263
|
-
and
|
7264
|
-
skipped.
|
7265
|
-
options
|
7266
|
-
complete
|
7267
|
+
By default, PCRE implements a number of optimizations at compile time
|
7268
|
+
and matching time, and one side-effect is that sometimes callouts are
|
7269
|
+
skipped. If you need all possible callouts to happen, you need to set
|
7270
|
+
options that disable the relevant optimizations. More details, and a
|
7271
|
+
complete description of the interface to the callout function, are
|
7267
7272
|
given in the pcrecallout documentation.
|
7268
7273
|
|
7269
7274
|
|
7270
7275
|
BACKTRACKING CONTROL
|
7271
7276
|
|
7272
|
-
Perl
|
7273
|
-
which
|
7274
|
-
and
|
7275
|
-
on
|
7276
|
-
problems
|
7277
|
+
Perl 5.10 introduced a number of "Special Backtracking Control Verbs",
|
7278
|
+
which are still described in the Perl documentation as "experimental
|
7279
|
+
and subject to change or removal in a future version of Perl". It goes
|
7280
|
+
on to say: "Their usage in production code should be noted to avoid
|
7281
|
+
problems during upgrades." The same remarks apply to the PCRE features
|
7277
7282
|
described in this section.
|
7278
7283
|
|
7279
|
-
The
|
7284
|
+
The new verbs make use of what was previously invalid syntax: an open-
|
7280
7285
|
ing parenthesis followed by an asterisk. They are generally of the form
|
7281
|
-
(*VERB)
|
7282
|
-
differently
|
7286
|
+
(*VERB) or (*VERB:NAME). Some may take either form, possibly behaving
|
7287
|
+
differently depending on whether or not a name is present. A name is
|
7283
7288
|
any sequence of characters that does not include a closing parenthesis.
|
7284
7289
|
The maximum length of name is 255 in the 8-bit library and 65535 in the
|
7285
|
-
16-bit
|
7286
|
-
closing
|
7287
|
-
the
|
7290
|
+
16-bit and 32-bit libraries. If the name is empty, that is, if the
|
7291
|
+
closing parenthesis immediately follows the colon, the effect is as if
|
7292
|
+
the colon were not there. Any number of these verbs may occur in a
|
7288
7293
|
pattern.
|
7289
7294
|
|
7290
|
-
Since
|
7291
|
-
them
|
7292
|
-
the
|
7293
|
-
algorithm.
|
7294
|
-
negative
|
7295
|
+
Since these verbs are specifically related to backtracking, most of
|
7296
|
+
them can be used only when the pattern is to be matched using one of
|
7297
|
+
the traditional matching functions, because these use a backtracking
|
7298
|
+
algorithm. With the exception of (*FAIL), which behaves like a failing
|
7299
|
+
negative assertion, the backtracking control verbs cause an error if
|
7295
7300
|
encountered by a DFA matching function.
|
7296
7301
|
|
7297
|
-
The
|
7302
|
+
The behaviour of these verbs in repeated groups, assertions, and in
|
7298
7303
|
subpatterns called as subroutines (whether or not recursively) is docu-
|
7299
7304
|
mented below.
|
7300
7305
|
|
7301
7306
|
Optimizations that affect backtracking verbs
|
7302
7307
|
|
7303
|
-
PCRE
|
7308
|
+
PCRE contains some optimizations that are used to speed up matching by
|
7304
7309
|
running some checks at the start of each match attempt. For example, it
|
7305
|
-
may
|
7310
|
+
may know the minimum length of matching subject, or that a particular
|
7306
7311
|
character must be present. When one of these optimizations bypasses the
|
7307
|
-
running
|
7312
|
+
running of a match, any included backtracking verbs will not, of
|
7308
7313
|
course, be processed. You can suppress the start-of-match optimizations
|
7309
|
-
by
|
7314
|
+
by setting the PCRE_NO_START_OPTIMIZE option when calling pcre_com-
|
7310
7315
|
pile() or pcre_exec(), or by starting the pattern with (*NO_START_OPT).
|
7311
7316
|
There is more discussion of this option in the section entitled "Option
|
7312
7317
|
bits for pcre_exec()" in the pcreapi documentation.
|
7313
7318
|
|
7314
|
-
Experiments
|
7319
|
+
Experiments with Perl suggest that it too has similar optimizations,
|
7315
7320
|
sometimes leading to anomalous results.
|
7316
7321
|
|
7317
7322
|
Verbs that act immediately
|
7318
7323
|
|
7319
|
-
The
|
7324
|
+
The following verbs act as soon as they are encountered. They may not
|
7320
7325
|
be followed by a name.
|
7321
7326
|
|
7322
7327
|
(*ACCEPT)
|
7323
7328
|
|
7324
|
-
This
|
7325
|
-
of
|
7326
|
-
as
|
7329
|
+
This verb causes the match to end successfully, skipping the remainder
|
7330
|
+
of the pattern. However, when it is inside a subpattern that is called
|
7331
|
+
as a subroutine, only that subpattern is ended successfully. Matching
|
7327
7332
|
then continues at the outer level. If (*ACCEPT) in triggered in a posi-
|
7328
|
-
tive
|
7333
|
+
tive assertion, the assertion succeeds; in a negative assertion, the
|
7329
7334
|
assertion fails.
|
7330
7335
|
|
7331
|
-
If
|
7336
|
+
If (*ACCEPT) is inside capturing parentheses, the data so far is cap-
|
7332
7337
|
tured. For example:
|
7333
7338
|
|
7334
7339
|
A((?:A|B(*ACCEPT)|C)D)
|
7335
7340
|
|
7336
|
-
This
|
7341
|
+
This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is cap-
|
7337
7342
|
tured by the outer parentheses.
|
7338
7343
|
|
7339
7344
|
(*FAIL) or (*F)
|
7340
7345
|
|
7341
|
-
This
|
7342
|
-
is
|
7343
|
-
that
|
7344
|
-
Those
|
7345
|
-
nearest
|
7346
|
+
This verb causes a matching failure, forcing backtracking to occur. It
|
7347
|
+
is equivalent to (?!) but easier to read. The Perl documentation notes
|
7348
|
+
that it is probably useful only when combined with (?{}) or (??{}).
|
7349
|
+
Those are, of course, Perl features that are not present in PCRE. The
|
7350
|
+
nearest equivalent is the callout feature, as for example in this pat-
|
7346
7351
|
tern:
|
7347
7352
|
|
7348
7353
|
a+(?C)(*FAIL)
|
7349
7354
|
|
7350
|
-
A
|
7355
|
+
A match with the string "aaaa" always fails, but the callout is taken
|
7351
7356
|
before each backtrack happens (in this example, 10 times).
|
7352
7357
|
|
7353
7358
|
Recording which path was taken
|
7354
7359
|
|
7355
|
-
There
|
7356
|
-
arrived
|
7360
|
+
There is one verb whose main purpose is to track how a match was
|
7361
|
+
arrived at, though it also has a secondary use in conjunction with
|
7357
7362
|
advancing the match starting point (see (*SKIP) below).
|
7358
7363
|
|
7359
7364
|
(*MARK:NAME) or (*:NAME)
|
7360
7365
|
|
7361
|
-
A
|
7362
|
-
instances
|
7366
|
+
A name is always required with this verb. There may be as many
|
7367
|
+
instances of (*MARK) as you like in a pattern, and their names do not
|
7363
7368
|
have to be unique.
|
7364
7369
|
|
7365
|
-
When
|
7366
|
-
(*PRUNE:NAME),
|
7367
|
-
the
|
7368
|
-
pcre_exec()"
|
7369
|
-
pcretest
|
7370
|
+
When a match succeeds, the name of the last-encountered (*MARK:NAME),
|
7371
|
+
(*PRUNE:NAME), or (*THEN:NAME) on the matching path is passed back to
|
7372
|
+
the caller as described in the section entitled "Extra data for
|
7373
|
+
pcre_exec()" in the pcreapi documentation. Here is an example of
|
7374
|
+
pcretest output, where the /K modifier requests the retrieval and out-
|
7370
7375
|
putting of (*MARK) data:
|
7371
7376
|
|
7372
7377
|
re> /X(*MARK:A)Y|X(*MARK:B)Z/K
|
@@ -7378,73 +7383,73 @@ BACKTRACKING CONTROL
|
|
7378
7383
|
MK: B
|
7379
7384
|
|
7380
7385
|
The (*MARK) name is tagged with "MK:" in this output, and in this exam-
|
7381
|
-
ple
|
7382
|
-
efficient
|
7386
|
+
ple it indicates which of the two alternatives matched. This is a more
|
7387
|
+
efficient way of obtaining this information than putting each alterna-
|
7383
7388
|
tive in its own capturing parentheses.
|
7384
7389
|
|
7385
|
-
If
|
7386
|
-
true,
|
7390
|
+
If a verb with a name is encountered in a positive assertion that is
|
7391
|
+
true, the name is recorded and passed back if it is the last-encoun-
|
7387
7392
|
tered. This does not happen for negative assertions or failing positive
|
7388
7393
|
assertions.
|
7389
7394
|
|
7390
|
-
After
|
7395
|
+
After a partial match or a failed match, the last encountered name in
|
7391
7396
|
the entire match process is returned. For example:
|
7392
7397
|
|
7393
7398
|
re> /X(*MARK:A)Y|X(*MARK:B)Z/K
|
7394
7399
|
data> XP
|
7395
7400
|
No match, mark = B
|
7396
7401
|
|
7397
|
-
Note
|
7402
|
+
Note that in this unanchored example the mark is retained from the
|
7398
7403
|
match attempt that started at the letter "X" in the subject. Subsequent
|
7399
7404
|
match attempts starting at "P" and then with an empty string do not get
|
7400
7405
|
as far as the (*MARK) item, but nevertheless do not reset it.
|
7401
7406
|
|
7402
|
-
If
|
7403
|
-
should
|
7407
|
+
If you are interested in (*MARK) values after failed matches, you
|
7408
|
+
should probably set the PCRE_NO_START_OPTIMIZE option (see above) to
|
7404
7409
|
ensure that the match is always attempted.
|
7405
7410
|
|
7406
7411
|
Verbs that act after backtracking
|
7407
7412
|
|
7408
7413
|
The following verbs do nothing when they are encountered. Matching con-
|
7409
|
-
tinues
|
7410
|
-
a
|
7411
|
-
cannot
|
7414
|
+
tinues with what follows, but if there is no subsequent match, causing
|
7415
|
+
a backtrack to the verb, a failure is forced. That is, backtracking
|
7416
|
+
cannot pass to the left of the verb. However, when one of these verbs
|
7412
7417
|
appears inside an atomic group or an assertion that is true, its effect
|
7413
|
-
is
|
7414
|
-
there
|
7415
|
-
ing
|
7416
|
-
tion.
|
7418
|
+
is confined to that group, because once the group has been matched,
|
7419
|
+
there is never any backtracking into it. In this situation, backtrack-
|
7420
|
+
ing can "jump back" to the left of the entire atomic group or asser-
|
7421
|
+
tion. (Remember also, as stated above, that this localization also
|
7417
7422
|
applies in subroutine calls.)
|
7418
7423
|
|
7419
|
-
These
|
7420
|
-
tracking
|
7421
|
-
when
|
7424
|
+
These verbs differ in exactly what kind of failure occurs when back-
|
7425
|
+
tracking reaches them. The behaviour described below is what happens
|
7426
|
+
when the verb is not in a subroutine or an assertion. Subsequent sec-
|
7422
7427
|
tions cover these special cases.
|
7423
7428
|
|
7424
7429
|
(*COMMIT)
|
7425
7430
|
|
7426
|
-
This
|
7431
|
+
This verb, which may not be followed by a name, causes the whole match
|
7427
7432
|
to fail outright if there is a later matching failure that causes back-
|
7428
|
-
tracking
|
7433
|
+
tracking to reach it. Even if the pattern is unanchored, no further
|
7429
7434
|
attempts to find a match by advancing the starting point take place. If
|
7430
|
-
(*COMMIT)
|
7435
|
+
(*COMMIT) is the only backtracking verb that is encountered, once it
|
7431
7436
|
has been passed pcre_exec() is committed to finding a match at the cur-
|
7432
7437
|
rent starting point, or not at all. For example:
|
7433
7438
|
|
7434
7439
|
a+(*COMMIT)b
|
7435
7440
|
|
7436
|
-
This
|
7441
|
+
This matches "xxaab" but not "aacaab". It can be thought of as a kind
|
7437
7442
|
of dynamic anchor, or "I've started, so I must finish." The name of the
|
7438
|
-
most
|
7443
|
+
most recently passed (*MARK) in the path is passed back when (*COMMIT)
|
7439
7444
|
forces a match failure.
|
7440
7445
|
|
7441
|
-
If
|
7442
|
-
one
|
7446
|
+
If there is more than one backtracking verb in a pattern, a different
|
7447
|
+
one that follows (*COMMIT) may be triggered first, so merely passing
|
7443
7448
|
(*COMMIT) during a match does not always guarantee that a match must be
|
7444
7449
|
at this starting point.
|
7445
7450
|
|
7446
|
-
Note
|
7447
|
-
anchor,
|
7451
|
+
Note that (*COMMIT) at the start of a pattern is not the same as an
|
7452
|
+
anchor, unless PCRE's start-of-match optimizations are turned off, as
|
7448
7453
|
shown in this output from pcretest:
|
7449
7454
|
|
7450
7455
|
re> /(*COMMIT)abc/
|
@@ -7455,207 +7460,207 @@ BACKTRACKING CONTROL
|
|
7455
7460
|
|
7456
7461
|
For this pattern, PCRE knows that any match must start with "a", so the
|
7457
7462
|
optimization skips along the subject to "a" before applying the pattern
|
7458
|
-
to
|
7459
|
-
ond
|
7460
|
-
program.
|
7463
|
+
to the first set of data. The match attempt then succeeds. In the sec-
|
7464
|
+
ond set of data, the escape sequence \Y is interpreted by the pcretest
|
7465
|
+
program. It causes the PCRE_NO_START_OPTIMIZE option to be set when
|
7461
7466
|
pcre_exec() is called. This disables the optimization that skips along
|
7462
7467
|
to the first character. The pattern is now applied starting at "x", and
|
7463
|
-
so
|
7468
|
+
so the (*COMMIT) causes the match to fail without trying any other
|
7464
7469
|
starting points.
|
7465
7470
|
|
7466
7471
|
(*PRUNE) or (*PRUNE:NAME)
|
7467
7472
|
|
7468
|
-
This
|
7473
|
+
This verb causes the match to fail at the current starting position in
|
7469
7474
|
the subject if there is a later matching failure that causes backtrack-
|
7470
|
-
ing
|
7471
|
-
advance
|
7472
|
-
occur
|
7473
|
-
matching
|
7474
|
-
right,
|
7475
|
-
(*PRUNE)
|
7475
|
+
ing to reach it. If the pattern is unanchored, the normal "bumpalong"
|
7476
|
+
advance to the next starting character then happens. Backtracking can
|
7477
|
+
occur as usual to the left of (*PRUNE), before it is reached, or when
|
7478
|
+
matching to the right of (*PRUNE), but if there is no match to the
|
7479
|
+
right, backtracking cannot cross (*PRUNE). In simple cases, the use of
|
7480
|
+
(*PRUNE) is just an alternative to an atomic group or possessive quan-
|
7476
7481
|
tifier, but there are some uses of (*PRUNE) that cannot be expressed in
|
7477
|
-
any
|
7482
|
+
any other way. In an anchored pattern (*PRUNE) has the same effect as
|
7478
7483
|
(*COMMIT).
|
7479
7484
|
|
7480
7485
|
The behaviour of (*PRUNE:NAME) is the not the same as
|
7481
|
-
(*MARK:NAME)(*PRUNE).
|
7482
|
-
remembered
|
7486
|
+
(*MARK:NAME)(*PRUNE). It is like (*MARK:NAME) in that the name is
|
7487
|
+
remembered for passing back to the caller. However, (*SKIP:NAME)
|
7483
7488
|
searches only for names set with (*MARK).
|
7484
7489
|
|
7485
7490
|
(*SKIP)
|
7486
7491
|
|
7487
|
-
This
|
7488
|
-
the
|
7492
|
+
This verb, when given without a name, is like (*PRUNE), except that if
|
7493
|
+
the pattern is unanchored, the "bumpalong" advance is not to the next
|
7489
7494
|
character, but to the position in the subject where (*SKIP) was encoun-
|
7490
|
-
tered.
|
7495
|
+
tered. (*SKIP) signifies that whatever text was matched leading up to
|
7491
7496
|
it cannot be part of a successful match. Consider:
|
7492
7497
|
|
7493
7498
|
a+(*SKIP)b
|
7494
7499
|
|
7495
|
-
If
|
7496
|
-
(starting
|
7500
|
+
If the subject is "aaaac...", after the first match attempt fails
|
7501
|
+
(starting at the first character in the string), the starting point
|
7497
7502
|
skips on to start the next attempt at "c". Note that a possessive quan-
|
7498
|
-
tifer
|
7499
|
-
suppress
|
7500
|
-
attempt
|
7503
|
+
tifer does not have the same effect as this example; although it would
|
7504
|
+
suppress backtracking during the first match attempt, the second
|
7505
|
+
attempt would start at the second character instead of skipping on to
|
7501
7506
|
"c".
|
7502
7507
|
|
7503
7508
|
(*SKIP:NAME)
|
7504
7509
|
|
7505
7510
|
When (*SKIP) has an associated name, its behaviour is modified. When it
|
7506
7511
|
is triggered, the previous path through the pattern is searched for the
|
7507
|
-
most
|
7512
|
+
most recent (*MARK) that has the same name. If one is found, the
|
7508
7513
|
"bumpalong" advance is to the subject position that corresponds to that
|
7509
7514
|
(*MARK) instead of to where (*SKIP) was encountered. If no (*MARK) with
|
7510
7515
|
a matching name is found, the (*SKIP) is ignored.
|
7511
7516
|
|
7512
|
-
Note
|
7517
|
+
Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It
|
7513
7518
|
ignores names that are set by (*PRUNE:NAME) or (*THEN:NAME).
|
7514
7519
|
|
7515
7520
|
(*THEN) or (*THEN:NAME)
|
7516
7521
|
|
7517
|
-
This
|
7518
|
-
tracking
|
7519
|
-
within
|
7522
|
+
This verb causes a skip to the next innermost alternative when back-
|
7523
|
+
tracking reaches it. That is, it cancels any further backtracking
|
7524
|
+
within the current alternative. Its name comes from the observation
|
7520
7525
|
that it can be used for a pattern-based if-then-else block:
|
7521
7526
|
|
7522
7527
|
( COND1 (*THEN) FOO | COND2 (*THEN) BAR | COND3 (*THEN) BAZ ) ...
|
7523
7528
|
|
7524
|
-
If
|
7525
|
-
after
|
7526
|
-
skips
|
7527
|
-
into
|
7528
|
-
quently
|
7529
|
-
track
|
7529
|
+
If the COND1 pattern matches, FOO is tried (and possibly further items
|
7530
|
+
after the end of the group if FOO succeeds); on failure, the matcher
|
7531
|
+
skips to the second alternative and tries COND2, without backtracking
|
7532
|
+
into COND1. If that succeeds and BAR fails, COND3 is tried. If subse-
|
7533
|
+
quently BAZ fails, there are no more alternatives, so there is a back-
|
7534
|
+
track to whatever came before the entire group. If (*THEN) is not
|
7530
7535
|
inside an alternation, it acts like (*PRUNE).
|
7531
7536
|
|
7532
|
-
The
|
7533
|
-
(*MARK:NAME)(*THEN).
|
7534
|
-
remembered
|
7537
|
+
The behaviour of (*THEN:NAME) is the not the same as
|
7538
|
+
(*MARK:NAME)(*THEN). It is like (*MARK:NAME) in that the name is
|
7539
|
+
remembered for passing back to the caller. However, (*SKIP:NAME)
|
7535
7540
|
searches only for names set with (*MARK).
|
7536
7541
|
|
7537
|
-
A
|
7538
|
-
enclosing
|
7539
|
-
alternative.
|
7540
|
-
the
|
7541
|
-
complex
|
7542
|
+
A subpattern that does not contain a | character is just a part of the
|
7543
|
+
enclosing alternative; it is not a nested alternation with only one
|
7544
|
+
alternative. The effect of (*THEN) extends beyond such a subpattern to
|
7545
|
+
the enclosing alternative. Consider this pattern, where A, B, etc. are
|
7546
|
+
complex pattern fragments that do not contain any | characters at this
|
7542
7547
|
level:
|
7543
7548
|
|
7544
7549
|
A (B(*THEN)C) | D
|
7545
7550
|
|
7546
|
-
If
|
7551
|
+
If A and B are matched, but there is a failure in C, matching does not
|
7547
7552
|
backtrack into A; instead it moves to the next alternative, that is, D.
|
7548
|
-
However,
|
7553
|
+
However, if the subpattern containing (*THEN) is given an alternative,
|
7549
7554
|
it behaves differently:
|
7550
7555
|
|
7551
7556
|
A (B(*THEN)C | (*FAIL)) | D
|
7552
7557
|
|
7553
|
-
The
|
7558
|
+
The effect of (*THEN) is now confined to the inner subpattern. After a
|
7554
7559
|
failure in C, matching moves to (*FAIL), which causes the whole subpat-
|
7555
|
-
tern
|
7560
|
+
tern to fail because there are no more alternatives to try. In this
|
7556
7561
|
case, matching does now backtrack into A.
|
7557
7562
|
|
7558
|
-
Note
|
7559
|
-
alternatives,
|
7563
|
+
Note that a conditional subpattern is not considered as having two
|
7564
|
+
alternatives, because only one is ever used. In other words, the |
|
7560
7565
|
character in a conditional subpattern has a different meaning. Ignoring
|
7561
7566
|
white space, consider:
|
7562
7567
|
|
7563
7568
|
^.*? (?(?=a) a | b(*THEN)c )
|
7564
7569
|
|
7565
|
-
If
|
7566
|
-
ungreedy,
|
7567
|
-
then
|
7568
|
-
point,
|
7569
|
-
from
|
7570
|
+
If the subject is "ba", this pattern does not match. Because .*? is
|
7571
|
+
ungreedy, it initially matches zero characters. The condition (?=a)
|
7572
|
+
then fails, the character "b" is matched, but "c" is not. At this
|
7573
|
+
point, matching does not backtrack to .*? as might perhaps be expected
|
7574
|
+
from the presence of the | character. The conditional subpattern is
|
7570
7575
|
part of the single alternative that comprises the whole pattern, and so
|
7571
|
-
the
|
7576
|
+
the match fails. (If there was a backtrack into .*?, allowing it to
|
7572
7577
|
match "b", the match would succeed.)
|
7573
7578
|
|
7574
|
-
The
|
7579
|
+
The verbs just described provide four different "strengths" of control
|
7575
7580
|
when subsequent matching fails. (*THEN) is the weakest, carrying on the
|
7576
|
-
match
|
7577
|
-
at
|
7578
|
-
character
|
7581
|
+
match at the next alternative. (*PRUNE) comes next, failing the match
|
7582
|
+
at the current starting position, but allowing an advance to the next
|
7583
|
+
character (for an unanchored pattern). (*SKIP) is similar, except that
|
7579
7584
|
the advance may be more than one character. (*COMMIT) is the strongest,
|
7580
7585
|
causing the entire match to fail.
|
7581
7586
|
|
7582
7587
|
More than one backtracking verb
|
7583
7588
|
|
7584
|
-
If
|
7585
|
-
that
|
7589
|
+
If more than one backtracking verb is present in a pattern, the one
|
7590
|
+
that is backtracked onto first acts. For example, consider this pat-
|
7586
7591
|
tern, where A, B, etc. are complex pattern fragments:
|
7587
7592
|
|
7588
7593
|
(A(*COMMIT)B(*THEN)C|ABD)
|
7589
7594
|
|
7590
|
-
If
|
7595
|
+
If A matches but B fails, the backtrack to (*COMMIT) causes the entire
|
7591
7596
|
match to fail. However, if A and B match, but C fails, the backtrack to
|
7592
|
-
(*THEN)
|
7593
|
-
is
|
7594
|
-
two
|
7597
|
+
(*THEN) causes the next alternative (ABD) to be tried. This behaviour
|
7598
|
+
is consistent, but is not always the same as Perl's. It means that if
|
7599
|
+
two or more backtracking verbs appear in succession, all the the last
|
7595
7600
|
of them has no effect. Consider this example:
|
7596
7601
|
|
7597
7602
|
...(*COMMIT)(*PRUNE)...
|
7598
7603
|
|
7599
7604
|
If there is a matching failure to the right, backtracking onto (*PRUNE)
|
7600
|
-
causes
|
7605
|
+
causes it to be triggered, and its action is taken. There can never be
|
7601
7606
|
a backtrack onto (*COMMIT).
|
7602
7607
|
|
7603
7608
|
Backtracking verbs in repeated groups
|
7604
7609
|
|
7605
|
-
PCRE
|
7610
|
+
PCRE differs from Perl in its handling of backtracking verbs in
|
7606
7611
|
repeated groups. For example, consider:
|
7607
7612
|
|
7608
7613
|
/(a(*COMMIT)b)+ac/
|
7609
7614
|
|
7610
|
-
If
|
7615
|
+
If the subject is "abac", Perl matches, but PCRE fails because the
|
7611
7616
|
(*COMMIT) in the second repeat of the group acts.
|
7612
7617
|
|
7613
7618
|
Backtracking verbs in assertions
|
7614
7619
|
|
7615
|
-
(*FAIL)
|
7620
|
+
(*FAIL) in an assertion has its normal effect: it forces an immediate
|
7616
7621
|
backtrack.
|
7617
7622
|
|
7618
7623
|
(*ACCEPT) in a positive assertion causes the assertion to succeed with-
|
7619
|
-
out
|
7624
|
+
out any further processing. In a negative assertion, (*ACCEPT) causes
|
7620
7625
|
the assertion to fail without any further processing.
|
7621
7626
|
|
7622
|
-
The
|
7623
|
-
in
|
7624
|
-
alternative
|
7627
|
+
The other backtracking verbs are not treated specially if they appear
|
7628
|
+
in a positive assertion. In particular, (*THEN) skips to the next
|
7629
|
+
alternative in the innermost enclosing group that has alternations,
|
7625
7630
|
whether or not this is within the assertion.
|
7626
7631
|
|
7627
|
-
Negative
|
7628
|
-
changing
|
7632
|
+
Negative assertions are, however, different, in order to ensure that
|
7633
|
+
changing a positive assertion into a negative assertion changes its
|
7629
7634
|
result. Backtracking into (*COMMIT), (*SKIP), or (*PRUNE) causes a neg-
|
7630
7635
|
ative assertion to be true, without considering any further alternative
|
7631
7636
|
branches in the assertion. Backtracking into (*THEN) causes it to skip
|
7632
|
-
to
|
7633
|
-
haviour),
|
7637
|
+
to the next enclosing alternative within the assertion (the normal be-
|
7638
|
+
haviour), but if the assertion does not have such an alternative,
|
7634
7639
|
(*THEN) behaves like (*PRUNE).
|
7635
7640
|
|
7636
7641
|
Backtracking verbs in subroutines
|
7637
7642
|
|
7638
|
-
These
|
7643
|
+
These behaviours occur whether or not the subpattern is called recur-
|
7639
7644
|
sively. Perl's treatment of subroutines is different in some cases.
|
7640
7645
|
|
7641
|
-
(*FAIL)
|
7646
|
+
(*FAIL) in a subpattern called as a subroutine has its normal effect:
|
7642
7647
|
it forces an immediate backtrack.
|
7643
7648
|
|
7644
|
-
(*ACCEPT)
|
7645
|
-
match
|
7649
|
+
(*ACCEPT) in a subpattern called as a subroutine causes the subroutine
|
7650
|
+
match to succeed without any further processing. Matching then contin-
|
7646
7651
|
ues after the subroutine call.
|
7647
7652
|
|
7648
7653
|
(*COMMIT), (*SKIP), and (*PRUNE) in a subpattern called as a subroutine
|
7649
7654
|
cause the subroutine match to fail.
|
7650
7655
|
|
7651
|
-
(*THEN)
|
7652
|
-
within
|
7656
|
+
(*THEN) skips to the next alternative in the innermost enclosing group
|
7657
|
+
within the subpattern that has alternatives. If there is no such group
|
7653
7658
|
within the subpattern, (*THEN) causes the subroutine match to fail.
|
7654
7659
|
|
7655
7660
|
|
7656
7661
|
SEE ALSO
|
7657
7662
|
|
7658
|
-
pcreapi(3),
|
7663
|
+
pcreapi(3), pcrecallout(3), pcrematching(3), pcresyntax(3), pcre(3),
|
7659
7664
|
pcre16(3), pcre32(3).
|
7660
7665
|
|
7661
7666
|
|
@@ -7668,8 +7673,8 @@ AUTHOR
|
|
7668
7673
|
|
7669
7674
|
REVISION
|
7670
7675
|
|
7671
|
-
Last updated:
|
7672
|
-
Copyright (c) 1997-
|
7676
|
+
Last updated: 23 October 2016
|
7677
|
+
Copyright (c) 1997-2016 University of Cambridge.
|
7673
7678
|
------------------------------------------------------------------------------
|
7674
7679
|
|
7675
7680
|
|
@@ -8360,7 +8365,11 @@ AVAILABILITY OF JIT SUPPORT
|
|
8360
8365
|
If your program may sometimes be linked with versions of PCRE that are
|
8361
8366
|
older than 8.20, but you want to use JIT when it is available, you can
|
8362
8367
|
test the values of PCRE_MAJOR and PCRE_MINOR, or the existence of a JIT
|
8363
|
-
macro such as PCRE_CONFIG_JIT, for compile-time control of
|
8368
|
+
macro such as PCRE_CONFIG_JIT, for compile-time control of your code.
|
8369
|
+
Also beware that the pcre_jit_exec() function was not available at all
|
8370
|
+
before 8.32, and may not be available at all if PCRE isn't compiled
|
8371
|
+
with --enable-jit. See the "JIT FAST PATH API" section below for
|
8372
|
+
details.
|
8364
8373
|
|
8365
8374
|
|
8366
8375
|
SIMPLE USE OF JIT
|
@@ -8402,6 +8411,18 @@ SIMPLE USE OF JIT
|
|
8402
8411
|
PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
|
8403
8412
|
PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
|
8404
8413
|
|
8414
|
+
If using pcre_jit_exec() and supporting a pre-8.32 version of PCRE, you
|
8415
|
+
can insert:
|
8416
|
+
|
8417
|
+
#if PCRE_MAJOR >= 8 && PCRE_MINOR >= 32
|
8418
|
+
pcre_jit_exec(...);
|
8419
|
+
#else
|
8420
|
+
pcre_exec(...)
|
8421
|
+
#endif
|
8422
|
+
|
8423
|
+
but as described in the "JIT FAST PATH API" section below this assumes
|
8424
|
+
version 8.32 and later are compiled with --enable-jit, which may break.
|
8425
|
+
|
8405
8426
|
The JIT compiler generates different optimized code for each of the
|
8406
8427
|
three modes (normal, soft partial, hard partial). When pcre_exec() is
|
8407
8428
|
called, the appropriate code is run if it is available. Otherwise, the
|
@@ -8691,6 +8712,33 @@ JIT FAST PATH API
|
|
8691
8712
|
Bypassing the sanity checks and the pcre_exec() wrapping can give
|
8692
8713
|
speedups of more than 10%.
|
8693
8714
|
|
8715
|
+
Note that the pcre_jit_exec() function is not available in versions of
|
8716
|
+
PCRE before 8.32 (released in November 2012). If you need to support
|
8717
|
+
versions that old you must either use the slower pcre_exec(), or switch
|
8718
|
+
between the two codepaths by checking the values of PCRE_MAJOR and
|
8719
|
+
PCRE_MINOR.
|
8720
|
+
|
8721
|
+
Due to an unfortunate implementation oversight, even in versions 8.32
|
8722
|
+
and later there will be no pcre_jit_exec() stub function defined when
|
8723
|
+
PCRE is compiled with --disable-jit, which is the default, and there's
|
8724
|
+
no way to detect whether PCRE was compiled with --enable-jit via a
|
8725
|
+
macro.
|
8726
|
+
|
8727
|
+
If you need to support versions older than 8.32, or versions that may
|
8728
|
+
not build with --enable-jit, you must either use the slower
|
8729
|
+
pcre_exec(), or switch between the two codepaths by checking the values
|
8730
|
+
of PCRE_MAJOR and PCRE_MINOR.
|
8731
|
+
|
8732
|
+
Switching between the two by checking the version assumes that all the
|
8733
|
+
versions being targeted are built with --enable-jit. To also support
|
8734
|
+
builds that may use --disable-jit either pcre_exec() must be used, or a
|
8735
|
+
compile-time check for JIT via pcre_config() (which assumes the runtime
|
8736
|
+
environment will be the same), or as the Git project decided to do,
|
8737
|
+
simply assume that pcre_jit_exec() is present in 8.32 or later unless a
|
8738
|
+
compile-time flag is provided, see the "grep: un-break building with
|
8739
|
+
PCRE >= 8.32 without --enable-jit" commit in git.git for an example of
|
8740
|
+
that.
|
8741
|
+
|
8694
8742
|
|
8695
8743
|
SEE ALSO
|
8696
8744
|
|
@@ -8706,8 +8754,8 @@ AUTHOR
|
|
8706
8754
|
|
8707
8755
|
REVISION
|
8708
8756
|
|
8709
|
-
Last updated:
|
8710
|
-
Copyright (c) 1997-
|
8757
|
+
Last updated: 05 July 2017
|
8758
|
+
Copyright (c) 1997-2017 University of Cambridge.
|
8711
8759
|
------------------------------------------------------------------------------
|
8712
8760
|
|
8713
8761
|
|