glib2 0.90.2-x86-mingw32
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +3115 -0
- data/README +40 -0
- data/Rakefile +69 -0
- data/ext/glib2/depend +10 -0
- data/ext/glib2/extconf.rb +61 -0
- data/ext/glib2/glib2.def +89 -0
- data/ext/glib2/rbgcompat.h +30 -0
- data/ext/glib2/rbglib.c +330 -0
- data/ext/glib2/rbglib.h +96 -0
- data/ext/glib2/rbglib_bookmarkfile.c +595 -0
- data/ext/glib2/rbglib_completion.c +192 -0
- data/ext/glib2/rbglib_convert.c +195 -0
- data/ext/glib2/rbglib_error.c +95 -0
- data/ext/glib2/rbglib_fileutils.c +83 -0
- data/ext/glib2/rbglib_i18n.c +44 -0
- data/ext/glib2/rbglib_int64.c +157 -0
- data/ext/glib2/rbglib_iochannel.c +883 -0
- data/ext/glib2/rbglib_keyfile.c +846 -0
- data/ext/glib2/rbglib_maincontext.c +915 -0
- data/ext/glib2/rbglib_mainloop.c +87 -0
- data/ext/glib2/rbglib_messages.c +150 -0
- data/ext/glib2/rbglib_pollfd.c +111 -0
- data/ext/glib2/rbglib_shell.c +68 -0
- data/ext/glib2/rbglib_source.c +190 -0
- data/ext/glib2/rbglib_spawn.c +345 -0
- data/ext/glib2/rbglib_threads.c +51 -0
- data/ext/glib2/rbglib_timer.c +127 -0
- data/ext/glib2/rbglib_unicode.c +611 -0
- data/ext/glib2/rbglib_utils.c +386 -0
- data/ext/glib2/rbglib_win32.c +136 -0
- data/ext/glib2/rbgobj_boxed.c +251 -0
- data/ext/glib2/rbgobj_closure.c +337 -0
- data/ext/glib2/rbgobj_convert.c +167 -0
- data/ext/glib2/rbgobj_enums.c +961 -0
- data/ext/glib2/rbgobj_fundamental.c +30 -0
- data/ext/glib2/rbgobj_object.c +892 -0
- data/ext/glib2/rbgobj_param.c +390 -0
- data/ext/glib2/rbgobj_paramspecs.c +305 -0
- data/ext/glib2/rbgobj_signal.c +963 -0
- data/ext/glib2/rbgobj_strv.c +61 -0
- data/ext/glib2/rbgobj_type.c +851 -0
- data/ext/glib2/rbgobj_typeinstance.c +121 -0
- data/ext/glib2/rbgobj_typeinterface.c +148 -0
- data/ext/glib2/rbgobj_typemodule.c +66 -0
- data/ext/glib2/rbgobj_typeplugin.c +49 -0
- data/ext/glib2/rbgobj_value.c +313 -0
- data/ext/glib2/rbgobj_valuearray.c +59 -0
- data/ext/glib2/rbgobj_valuetypes.c +298 -0
- data/ext/glib2/rbgobject.c +406 -0
- data/ext/glib2/rbgobject.h +265 -0
- data/ext/glib2/rbgprivate.h +88 -0
- data/ext/glib2/rbgutil.c +316 -0
- data/ext/glib2/rbgutil.h +82 -0
- data/ext/glib2/rbgutil_callback.c +231 -0
- data/extconf.rb +49 -0
- data/lib/1.8/glib2.so +0 -0
- data/lib/1.9/glib2.so +0 -0
- data/lib/glib-mkenums.rb +199 -0
- data/lib/glib2.rb +239 -0
- data/lib/gnome2-win32-binary-downloader.rb +92 -0
- data/lib/mkmf-gnome2.rb +377 -0
- data/sample/bookmarkfile.rb +66 -0
- data/sample/completion.rb +45 -0
- data/sample/idle.rb +41 -0
- data/sample/iochannel.rb +44 -0
- data/sample/keyfile.rb +62 -0
- data/sample/shell.rb +36 -0
- data/sample/spawn.rb +25 -0
- data/sample/timeout.rb +28 -0
- data/sample/timeout2.rb +35 -0
- data/sample/timer.rb +40 -0
- data/sample/type-register.rb +103 -0
- data/sample/type-register2.rb +104 -0
- data/sample/utils.rb +54 -0
- data/test/glib-test-init.rb +6 -0
- data/test/glib-test-utils.rb +12 -0
- data/test/run-test.rb +21 -0
- data/test/test_enum.rb +99 -0
- data/test/test_file_utils.rb +15 -0
- data/test/test_glib2.rb +120 -0
- data/test/test_iochannel.rb +275 -0
- data/test/test_key_file.rb +38 -0
- data/test/test_mkenums.rb +25 -0
- data/test/test_signal.rb +20 -0
- data/test/test_timeout.rb +28 -0
- data/test/test_unicode.rb +369 -0
- data/test/test_utils.rb +37 -0
- data/test/test_win32.rb +13 -0
- data/test-unit/COPYING +56 -0
- data/test-unit/GPL +340 -0
- data/test-unit/History.txt +232 -0
- data/test-unit/Manifest.txt +110 -0
- data/test-unit/PSFL +271 -0
- data/test-unit/README.txt +75 -0
- data/test-unit/Rakefile +53 -0
- data/test-unit/TODO +5 -0
- data/test-unit/bin/testrb +5 -0
- data/test-unit/html/bar.png +0 -0
- data/test-unit/html/bar.svg +153 -0
- data/test-unit/html/developer.png +0 -0
- data/test-unit/html/developer.svg +469 -0
- data/test-unit/html/famfamfam-logo.png +0 -0
- data/test-unit/html/favicon.ico +0 -0
- data/test-unit/html/favicon.png +0 -0
- data/test-unit/html/favicon.svg +82 -0
- data/test-unit/html/heading-mark.png +0 -0
- data/test-unit/html/heading-mark.svg +393 -0
- data/test-unit/html/index.html +247 -0
- data/test-unit/html/index.html.ja +270 -0
- data/test-unit/html/install.png +0 -0
- data/test-unit/html/install.svg +636 -0
- data/test-unit/html/jp.png +0 -0
- data/test-unit/html/kinotan-failure.png +0 -0
- data/test-unit/html/kinotan-pass.png +0 -0
- data/test-unit/html/logo.png +0 -0
- data/test-unit/html/logo.svg +483 -0
- data/test-unit/html/reference.png +0 -0
- data/test-unit/html/rubyforge.png +0 -0
- data/test-unit/html/tango-logo.png +0 -0
- data/test-unit/html/test-unit.css +339 -0
- data/test-unit/html/tutorial.png +0 -0
- data/test-unit/html/tutorial.svg +559 -0
- data/test-unit/html/us.png +0 -0
- data/test-unit/images/color-diff.png +0 -0
- data/test-unit/lib/test/unit/assertionfailederror.rb +25 -0
- data/test-unit/lib/test/unit/assertions.rb +1334 -0
- data/test-unit/lib/test/unit/attribute.rb +125 -0
- data/test-unit/lib/test/unit/autorunner.rb +363 -0
- data/test-unit/lib/test/unit/collector/descendant.rb +23 -0
- data/test-unit/lib/test/unit/collector/dir.rb +108 -0
- data/test-unit/lib/test/unit/collector/load.rb +144 -0
- data/test-unit/lib/test/unit/collector/objectspace.rb +34 -0
- data/test-unit/lib/test/unit/collector.rb +36 -0
- data/test-unit/lib/test/unit/color-scheme.rb +106 -0
- data/test-unit/lib/test/unit/color.rb +96 -0
- data/test-unit/lib/test/unit/diff.rb +740 -0
- data/test-unit/lib/test/unit/error.rb +130 -0
- data/test-unit/lib/test/unit/exceptionhandler.rb +39 -0
- data/test-unit/lib/test/unit/failure.rb +136 -0
- data/test-unit/lib/test/unit/fixture.rb +176 -0
- data/test-unit/lib/test/unit/notification.rb +129 -0
- data/test-unit/lib/test/unit/omission.rb +191 -0
- data/test-unit/lib/test/unit/pending.rb +150 -0
- data/test-unit/lib/test/unit/priority.rb +180 -0
- data/test-unit/lib/test/unit/runner/console.rb +52 -0
- data/test-unit/lib/test/unit/runner/emacs.rb +8 -0
- data/test-unit/lib/test/unit/runner/tap.rb +8 -0
- data/test-unit/lib/test/unit/testcase.rb +483 -0
- data/test-unit/lib/test/unit/testresult.rb +121 -0
- data/test-unit/lib/test/unit/testsuite.rb +110 -0
- data/test-unit/lib/test/unit/ui/console/outputlevel.rb +14 -0
- data/test-unit/lib/test/unit/ui/console/testrunner.rb +430 -0
- data/test-unit/lib/test/unit/ui/emacs/testrunner.rb +63 -0
- data/test-unit/lib/test/unit/ui/tap/testrunner.rb +82 -0
- data/test-unit/lib/test/unit/ui/testrunner.rb +53 -0
- data/test-unit/lib/test/unit/ui/testrunnermediator.rb +77 -0
- data/test-unit/lib/test/unit/ui/testrunnerutilities.rb +41 -0
- data/test-unit/lib/test/unit/util/backtracefilter.rb +42 -0
- data/test-unit/lib/test/unit/util/method-owner-finder.rb +28 -0
- data/test-unit/lib/test/unit/util/observable.rb +90 -0
- data/test-unit/lib/test/unit/util/output.rb +31 -0
- data/test-unit/lib/test/unit/util/procwrapper.rb +48 -0
- data/test-unit/lib/test/unit/version.rb +7 -0
- data/test-unit/lib/test/unit.rb +328 -0
- data/test-unit/sample/adder.rb +13 -0
- data/test-unit/sample/subtracter.rb +12 -0
- data/test-unit/sample/test_adder.rb +20 -0
- data/test-unit/sample/test_subtracter.rb +20 -0
- data/test-unit/sample/test_user.rb +23 -0
- data/test-unit/test/collector/test-descendant.rb +133 -0
- data/test-unit/test/collector/test-load.rb +442 -0
- data/test-unit/test/collector/test_dir.rb +406 -0
- data/test-unit/test/collector/test_objectspace.rb +100 -0
- data/test-unit/test/run-test.rb +15 -0
- data/test-unit/test/test-attribute.rb +86 -0
- data/test-unit/test/test-color-scheme.rb +69 -0
- data/test-unit/test/test-color.rb +47 -0
- data/test-unit/test/test-diff.rb +518 -0
- data/test-unit/test/test-emacs-runner.rb +60 -0
- data/test-unit/test/test-fixture.rb +287 -0
- data/test-unit/test/test-notification.rb +33 -0
- data/test-unit/test/test-omission.rb +81 -0
- data/test-unit/test/test-pending.rb +70 -0
- data/test-unit/test/test-priority.rb +119 -0
- data/test-unit/test/test-testcase.rb +544 -0
- data/test-unit/test/test_assertions.rb +1197 -0
- data/test-unit/test/test_error.rb +26 -0
- data/test-unit/test/test_failure.rb +33 -0
- data/test-unit/test/test_testresult.rb +113 -0
- data/test-unit/test/test_testsuite.rb +129 -0
- data/test-unit/test/testunit-test-util.rb +14 -0
- data/test-unit/test/ui/test_tap.rb +33 -0
- data/test-unit/test/ui/test_testrunmediator.rb +20 -0
- data/test-unit/test/util/test-method-owner-finder.rb +38 -0
- data/test-unit/test/util/test-output.rb +11 -0
- data/test-unit/test/util/test_backtracefilter.rb +41 -0
- data/test-unit/test/util/test_observable.rb +102 -0
- data/test-unit/test/util/test_procwrapper.rb +36 -0
- data/vendor/local/bin/envsubst.exe +0 -0
- data/vendor/local/bin/gettext.exe +0 -0
- data/vendor/local/bin/gettext.sh +123 -0
- data/vendor/local/bin/glib-genmarshal.exe +0 -0
- data/vendor/local/bin/glib-gettextize +188 -0
- data/vendor/local/bin/glib-mkenums +511 -0
- data/vendor/local/bin/gobject-query.exe +0 -0
- data/vendor/local/bin/gspawn-win32-helper-console.exe +0 -0
- data/vendor/local/bin/gspawn-win32-helper.exe +0 -0
- data/vendor/local/bin/intl.dll +0 -0
- data/vendor/local/bin/libasprintf-0.dll +0 -0
- data/vendor/local/bin/libgcc_s_dw2-1.dll +0 -0
- data/vendor/local/bin/libgio-2.0-0.dll +0 -0
- data/vendor/local/bin/libglib-2.0-0.dll +0 -0
- data/vendor/local/bin/libgmodule-2.0-0.dll +0 -0
- data/vendor/local/bin/libgobject-2.0-0.dll +0 -0
- data/vendor/local/bin/libgthread-2.0-0.dll +0 -0
- data/vendor/local/bin/ngettext.exe +0 -0
- data/vendor/local/include/autosprintf.h +66 -0
- data/vendor/local/include/glib-2.0/gio/gappinfo.h +237 -0
- data/vendor/local/include/glib-2.0/gio/gasyncinitable.h +119 -0
- data/vendor/local/include/glib-2.0/gio/gasyncresult.h +73 -0
- data/vendor/local/include/glib-2.0/gio/gbufferedinputstream.h +123 -0
- data/vendor/local/include/glib-2.0/gio/gbufferedoutputstream.h +82 -0
- data/vendor/local/include/glib-2.0/gio/gcancellable.h +103 -0
- data/vendor/local/include/glib-2.0/gio/gcharsetconverter.h +60 -0
- data/vendor/local/include/glib-2.0/gio/gcontenttype.h +57 -0
- data/vendor/local/include/glib-2.0/gio/gconverter.h +95 -0
- data/vendor/local/include/glib-2.0/gio/gconverterinputstream.h +80 -0
- data/vendor/local/include/glib-2.0/gio/gconverteroutputstream.h +80 -0
- data/vendor/local/include/glib-2.0/gio/gdatainputstream.h +133 -0
- data/vendor/local/include/glib-2.0/gio/gdataoutputstream.h +116 -0
- data/vendor/local/include/glib-2.0/gio/gdrive.h +225 -0
- data/vendor/local/include/glib-2.0/gio/gemblem.h +58 -0
- data/vendor/local/include/glib-2.0/gio/gemblemedicon.h +62 -0
- data/vendor/local/include/glib-2.0/gio/gfile.h +1017 -0
- data/vendor/local/include/glib-2.0/gio/gfileattribute.h +77 -0
- data/vendor/local/include/glib-2.0/gio/gfileenumerator.h +133 -0
- data/vendor/local/include/glib-2.0/gio/gfileicon.h +56 -0
- data/vendor/local/include/glib-2.0/gio/gfileinfo.h +951 -0
- data/vendor/local/include/glib-2.0/gio/gfileinputstream.h +112 -0
- data/vendor/local/include/glib-2.0/gio/gfileiostream.h +118 -0
- data/vendor/local/include/glib-2.0/gio/gfilemonitor.h +95 -0
- data/vendor/local/include/glib-2.0/gio/gfilenamecompleter.h +76 -0
- data/vendor/local/include/glib-2.0/gio/gfileoutputstream.h +119 -0
- data/vendor/local/include/glib-2.0/gio/gfilterinputstream.h +76 -0
- data/vendor/local/include/glib-2.0/gio/gfilteroutputstream.h +76 -0
- data/vendor/local/include/glib-2.0/gio/gicon.h +91 -0
- data/vendor/local/include/glib-2.0/gio/ginetaddress.h +103 -0
- data/vendor/local/include/glib-2.0/gio/ginetsocketaddress.h +69 -0
- data/vendor/local/include/glib-2.0/gio/ginitable.h +96 -0
- data/vendor/local/include/glib-2.0/gio/ginputstream.h +172 -0
- data/vendor/local/include/glib-2.0/gio/gio.h +101 -0
- data/vendor/local/include/glib-2.0/gio/gioenums.h +703 -0
- data/vendor/local/include/glib-2.0/gio/gioenumtypes.h +79 -0
- data/vendor/local/include/glib-2.0/gio/gioerror.h +48 -0
- data/vendor/local/include/glib-2.0/gio/giomodule.h +132 -0
- data/vendor/local/include/glib-2.0/gio/gioscheduler.h +52 -0
- data/vendor/local/include/glib-2.0/gio/giostream.h +112 -0
- data/vendor/local/include/glib-2.0/gio/giotypes.h +339 -0
- data/vendor/local/include/glib-2.0/gio/gloadableicon.h +97 -0
- data/vendor/local/include/glib-2.0/gio/gmemoryinputstream.h +82 -0
- data/vendor/local/include/glib-2.0/gio/gmemoryoutputstream.h +97 -0
- data/vendor/local/include/glib-2.0/gio/gmount.h +242 -0
- data/vendor/local/include/glib-2.0/gio/gmountoperation.h +123 -0
- data/vendor/local/include/glib-2.0/gio/gnativevolumemonitor.h +62 -0
- data/vendor/local/include/glib-2.0/gio/gnetworkaddress.h +69 -0
- data/vendor/local/include/glib-2.0/gio/gnetworkservice.h +69 -0
- data/vendor/local/include/glib-2.0/gio/goutputstream.h +207 -0
- data/vendor/local/include/glib-2.0/gio/gresolver.h +167 -0
- data/vendor/local/include/glib-2.0/gio/gseekable.h +99 -0
- data/vendor/local/include/glib-2.0/gio/gsimpleasyncresult.h +125 -0
- data/vendor/local/include/glib-2.0/gio/gsocket.h +176 -0
- data/vendor/local/include/glib-2.0/gio/gsocketaddress.h +79 -0
- data/vendor/local/include/glib-2.0/gio/gsocketaddressenumerator.h +89 -0
- data/vendor/local/include/glib-2.0/gio/gsocketclient.h +130 -0
- data/vendor/local/include/glib-2.0/gio/gsocketconnectable.h +68 -0
- data/vendor/local/include/glib-2.0/gio/gsocketconnection.h +91 -0
- data/vendor/local/include/glib-2.0/gio/gsocketcontrolmessage.h +105 -0
- data/vendor/local/include/glib-2.0/gio/gsocketlistener.h +138 -0
- data/vendor/local/include/glib-2.0/gio/gsocketservice.h +88 -0
- data/vendor/local/include/glib-2.0/gio/gsrvtarget.h +52 -0
- data/vendor/local/include/glib-2.0/gio/gtcpconnection.h +68 -0
- data/vendor/local/include/glib-2.0/gio/gthemedicon.h +63 -0
- data/vendor/local/include/glib-2.0/gio/gthreadedsocketservice.h +81 -0
- data/vendor/local/include/glib-2.0/gio/gvfs.h +125 -0
- data/vendor/local/include/glib-2.0/gio/gvolume.h +211 -0
- data/vendor/local/include/glib-2.0/gio/gvolumemonitor.h +151 -0
- data/vendor/local/include/glib-2.0/gio/gzlibcompressor.h +55 -0
- data/vendor/local/include/glib-2.0/gio/gzlibdecompressor.h +54 -0
- data/vendor/local/include/glib-2.0/glib/galloca.h +63 -0
- data/vendor/local/include/glib-2.0/glib/garray.h +179 -0
- data/vendor/local/include/glib-2.0/glib/gasyncqueue.h +120 -0
- data/vendor/local/include/glib-2.0/glib/gatomic.h +85 -0
- data/vendor/local/include/glib-2.0/glib/gbacktrace.h +68 -0
- data/vendor/local/include/glib-2.0/glib/gbase64.h +57 -0
- data/vendor/local/include/glib-2.0/glib/gbitlock.h +43 -0
- data/vendor/local/include/glib-2.0/glib/gbookmarkfile.h +191 -0
- data/vendor/local/include/glib-2.0/glib/gcache.h +69 -0
- data/vendor/local/include/glib-2.0/glib/gchecksum.h +86 -0
- data/vendor/local/include/glib-2.0/glib/gcompletion.h +77 -0
- data/vendor/local/include/glib-2.0/glib/gconvert.h +138 -0
- data/vendor/local/include/glib-2.0/glib/gdataset.h +122 -0
- data/vendor/local/include/glib-2.0/glib/gdate.h +263 -0
- data/vendor/local/include/glib-2.0/glib/gdir.h +52 -0
- data/vendor/local/include/glib-2.0/glib/gerror.h +98 -0
- data/vendor/local/include/glib-2.0/glib/gfileutils.h +128 -0
- data/vendor/local/include/glib-2.0/glib/ghash.h +166 -0
- data/vendor/local/include/glib-2.0/glib/ghook.h +181 -0
- data/vendor/local/include/glib-2.0/glib/ghostutils.h +40 -0
- data/vendor/local/include/glib-2.0/glib/gi18n-lib.h +38 -0
- data/vendor/local/include/glib-2.0/glib/gi18n.h +34 -0
- data/vendor/local/include/glib-2.0/glib/giochannel.h +366 -0
- data/vendor/local/include/glib-2.0/glib/gkeyfile.h +250 -0
- data/vendor/local/include/glib-2.0/glib/glist.h +120 -0
- data/vendor/local/include/glib-2.0/glib/gmacros.h +277 -0
- data/vendor/local/include/glib-2.0/glib/gmain.h +304 -0
- data/vendor/local/include/glib-2.0/glib/gmappedfile.h +49 -0
- data/vendor/local/include/glib-2.0/glib/gmarkup.h +163 -0
- data/vendor/local/include/glib-2.0/glib/gmem.h +203 -0
- data/vendor/local/include/glib-2.0/glib/gmessages.h +343 -0
- data/vendor/local/include/glib-2.0/glib/gnode.h +288 -0
- data/vendor/local/include/glib-2.0/glib/goption.h +370 -0
- data/vendor/local/include/glib-2.0/glib/gpattern.h +49 -0
- data/vendor/local/include/glib-2.0/glib/gpoll.h +93 -0
- data/vendor/local/include/glib-2.0/glib/gprimes.h +51 -0
- data/vendor/local/include/glib-2.0/glib/gprintf.h +52 -0
- data/vendor/local/include/glib-2.0/glib/gqsort.h +46 -0
- data/vendor/local/include/glib-2.0/glib/gquark.h +52 -0
- data/vendor/local/include/glib-2.0/glib/gqueue.h +127 -0
- data/vendor/local/include/glib-2.0/glib/grand.h +85 -0
- data/vendor/local/include/glib-2.0/glib/gregex.h +243 -0
- data/vendor/local/include/glib-2.0/glib/grel.h +97 -0
- data/vendor/local/include/glib-2.0/glib/gscanner.h +278 -0
- data/vendor/local/include/glib-2.0/glib/gsequence.h +128 -0
- data/vendor/local/include/glib-2.0/glib/gshell.h +55 -0
- data/vendor/local/include/glib-2.0/glib/gslice.h +86 -0
- data/vendor/local/include/glib-2.0/glib/gslist.h +114 -0
- data/vendor/local/include/glib-2.0/glib/gspawn.h +139 -0
- data/vendor/local/include/glib-2.0/glib/gstdio.h +159 -0
- data/vendor/local/include/glib-2.0/glib/gstrfuncs.h +267 -0
- data/vendor/local/include/glib-2.0/glib/gstring.h +178 -0
- data/vendor/local/include/glib-2.0/glib/gtestutils.h +292 -0
- data/vendor/local/include/glib-2.0/glib/gthread.h +408 -0
- data/vendor/local/include/glib-2.0/glib/gthreadpool.h +114 -0
- data/vendor/local/include/glib-2.0/glib/gtimer.h +65 -0
- data/vendor/local/include/glib-2.0/glib/gtree.h +91 -0
- data/vendor/local/include/glib-2.0/glib/gtypes.h +451 -0
- data/vendor/local/include/glib-2.0/glib/gunicode.h +404 -0
- data/vendor/local/include/glib-2.0/glib/gurifuncs.h +81 -0
- data/vendor/local/include/glib-2.0/glib/gutils.h +490 -0
- data/vendor/local/include/glib-2.0/glib/gvariant.h +224 -0
- data/vendor/local/include/glib-2.0/glib/gvarianttype.h +305 -0
- data/vendor/local/include/glib-2.0/glib/gwin32.h +114 -0
- data/vendor/local/include/glib-2.0/glib-object.h +41 -0
- data/vendor/local/include/glib-2.0/glib.h +97 -0
- data/vendor/local/include/glib-2.0/gmodule.h +101 -0
- data/vendor/local/include/glib-2.0/gobject/gboxed.h +236 -0
- data/vendor/local/include/glib-2.0/gobject/gclosure.h +251 -0
- data/vendor/local/include/glib-2.0/gobject/genums.h +261 -0
- data/vendor/local/include/glib-2.0/gobject/gmarshal.h +169 -0
- data/vendor/local/include/glib-2.0/gobject/gobject.h +562 -0
- data/vendor/local/include/glib-2.0/gobject/gobjectnotifyqueue.c +166 -0
- data/vendor/local/include/glib-2.0/gobject/gparam.h +410 -0
- data/vendor/local/include/glib-2.0/gobject/gparamspecs.h +1083 -0
- data/vendor/local/include/glib-2.0/gobject/gsignal.h +509 -0
- data/vendor/local/include/glib-2.0/gobject/gsourceclosure.h +51 -0
- data/vendor/local/include/glib-2.0/gobject/gtype.h +1608 -0
- data/vendor/local/include/glib-2.0/gobject/gtypemodule.h +263 -0
- data/vendor/local/include/glib-2.0/gobject/gtypeplugin.h +134 -0
- data/vendor/local/include/glib-2.0/gobject/gvalue.h +159 -0
- data/vendor/local/include/glib-2.0/gobject/gvaluearray.h +77 -0
- data/vendor/local/include/glib-2.0/gobject/gvaluecollector.h +222 -0
- data/vendor/local/include/glib-2.0/gobject/gvaluetypes.h +243 -0
- data/vendor/local/include/libintl.h +464 -0
- data/vendor/local/lib/GNU.Gettext.dll +0 -0
- data/vendor/local/lib/gio-2.0.def +800 -0
- data/vendor/local/lib/gio-2.0.lib +0 -0
- data/vendor/local/lib/glib-2.0/include/glibconfig.h +284 -0
- data/vendor/local/lib/glib-2.0.def +1381 -0
- data/vendor/local/lib/glib-2.0.lib +0 -0
- data/vendor/local/lib/gmodule-2.0.def +11 -0
- data/vendor/local/lib/gmodule-2.0.lib +0 -0
- data/vendor/local/lib/gobject-2.0.def +349 -0
- data/vendor/local/lib/gobject-2.0.lib +0 -0
- data/vendor/local/lib/gthread-2.0.def +3 -0
- data/vendor/local/lib/gthread-2.0.lib +0 -0
- data/vendor/local/lib/intl.lib +0 -0
- data/vendor/local/lib/libasprintf.dll.a +0 -0
- data/vendor/local/lib/libgio-2.0.dll.a +0 -0
- data/vendor/local/lib/libglib-2.0.dll.a +0 -0
- data/vendor/local/lib/libgmodule-2.0.dll.a +0 -0
- data/vendor/local/lib/libgobject-2.0.dll.a +0 -0
- data/vendor/local/lib/libgthread-2.0.dll.a +0 -0
- data/vendor/local/lib/libintl.def +31 -0
- data/vendor/local/lib/libintl.dll.a +0 -0
- data/vendor/local/lib/pkgconfig/gio-2.0.pc +14 -0
- data/vendor/local/lib/pkgconfig/glib-2.0.pc +15 -0
- data/vendor/local/lib/pkgconfig/gmodule-2.0.pc +14 -0
- data/vendor/local/lib/pkgconfig/gmodule-no-export-2.0.pc +14 -0
- data/vendor/local/lib/pkgconfig/gobject-2.0.pc +11 -0
- data/vendor/local/lib/pkgconfig/gthread-2.0.pc +11 -0
- data/vendor/local/manifest/gettext-runtime-dev_0.18.1.1-2_win32.mft +81 -0
- data/vendor/local/manifest/gettext-runtime_0.18.1.1-2_win32.mft +3 -0
- data/vendor/local/manifest/glib-dev_2.24.2-2_win32.mft +491 -0
- data/vendor/local/manifest/glib_2.24.2-2_win32.mft +101 -0
- data/vendor/local/share/aclocal/glib-2.0.m4 +211 -0
- data/vendor/local/share/aclocal/glib-gettext.m4 +432 -0
- data/vendor/local/share/doc/gettext/bind_textdomain_codeset.3.html +165 -0
- data/vendor/local/share/doc/gettext/bindtextdomain.3.html +160 -0
- data/vendor/local/share/doc/gettext/csharpdoc/GNU_Gettext.html +8 -0
- data/vendor/local/share/doc/gettext/csharpdoc/GNU_Gettext_GettextResourceManager.html +305 -0
- data/vendor/local/share/doc/gettext/csharpdoc/GNU_Gettext_GettextResourceSet.html +356 -0
- data/vendor/local/share/doc/gettext/csharpdoc/begin.html +11 -0
- data/vendor/local/share/doc/gettext/csharpdoc/index.html +10 -0
- data/vendor/local/share/doc/gettext/csharpdoc/namespaces.html +6 -0
- data/vendor/local/share/doc/gettext/envsubst.1.html +213 -0
- data/vendor/local/share/doc/gettext/gettext.1.html +266 -0
- data/vendor/local/share/doc/gettext/gettext.3.html +186 -0
- data/vendor/local/share/doc/gettext/ngettext.1.html +280 -0
- data/vendor/local/share/doc/gettext/ngettext.3.html +143 -0
- data/vendor/local/share/doc/gettext/textdomain.3.html +150 -0
- data/vendor/local/share/doc/glib-2.24.2/COPYING +482 -0
- data/vendor/local/share/doc/glib-dev-2.24.2/COPYING +482 -0
- data/vendor/local/share/doc/libasprintf/autosprintf_all.html +174 -0
- data/vendor/local/share/glib-2.0/gdb/glib.py +252 -0
- data/vendor/local/share/glib-2.0/gdb/gobject.py +305 -0
- data/vendor/local/share/glib-2.0/gettext/mkinstalldirs +111 -0
- data/vendor/local/share/glib-2.0/gettext/po/Makefile.in.in +277 -0
- data/vendor/local/share/gtk-doc/html/gio/GAppInfo.html +1390 -0
- data/vendor/local/share/gtk-doc/html/gio/GAsyncInitable.html +531 -0
- data/vendor/local/share/gtk-doc/html/gio/GAsyncResult.html +343 -0
- data/vendor/local/share/gtk-doc/html/gio/GBufferedInputStream.html +568 -0
- data/vendor/local/share/gtk-doc/html/gio/GBufferedOutputStream.html +295 -0
- data/vendor/local/share/gtk-doc/html/gio/GCancellable.html +620 -0
- data/vendor/local/share/gtk-doc/html/gio/GCharsetConverter.html +245 -0
- data/vendor/local/share/gtk-doc/html/gio/GConverter.html +388 -0
- data/vendor/local/share/gtk-doc/html/gio/GDataInputStream.html +993 -0
- data/vendor/local/share/gtk-doc/html/gio/GDataOutputStream.html +544 -0
- data/vendor/local/share/gtk-doc/html/gio/GDrive.html +1416 -0
- data/vendor/local/share/gtk-doc/html/gio/GEmblem.html +272 -0
- data/vendor/local/share/gtk-doc/html/gio/GEmblemedIcon.html +206 -0
- data/vendor/local/share/gtk-doc/html/gio/GFile.html +7450 -0
- data/vendor/local/share/gtk-doc/html/gio/GFileDescriptorBased.html +115 -0
- data/vendor/local/share/gtk-doc/html/gio/GFileEnumerator.html +520 -0
- data/vendor/local/share/gtk-doc/html/gio/GFileIOStream.html +312 -0
- data/vendor/local/share/gtk-doc/html/gio/GFileIcon.html +155 -0
- data/vendor/local/share/gtk-doc/html/gio/GFileInfo.html +3116 -0
- data/vendor/local/share/gtk-doc/html/gio/GFileInputStream.html +262 -0
- data/vendor/local/share/gtk-doc/html/gio/GFileMonitor.html +353 -0
- data/vendor/local/share/gtk-doc/html/gio/GFileOutputStream.html +301 -0
- data/vendor/local/share/gtk-doc/html/gio/GFilenameCompleter.html +217 -0
- data/vendor/local/share/gtk-doc/html/gio/GFilterInputStream.html +182 -0
- data/vendor/local/share/gtk-doc/html/gio/GFilterOutputStream.html +183 -0
- data/vendor/local/share/gtk-doc/html/gio/GIOModule.html +261 -0
- data/vendor/local/share/gtk-doc/html/gio/GIOStream.html +479 -0
- data/vendor/local/share/gtk-doc/html/gio/GIcon.html +331 -0
- data/vendor/local/share/gtk-doc/html/gio/GInetAddress.html +717 -0
- data/vendor/local/share/gtk-doc/html/gio/GInetSocketAddress.html +195 -0
- data/vendor/local/share/gtk-doc/html/gio/GInitable.html +388 -0
- data/vendor/local/share/gtk-doc/html/gio/GInputStream.html +808 -0
- data/vendor/local/share/gtk-doc/html/gio/GLoadableIcon.html +301 -0
- data/vendor/local/share/gtk-doc/html/gio/GMemoryInputStream.html +185 -0
- data/vendor/local/share/gtk-doc/html/gio/GMemoryOutputStream.html +362 -0
- data/vendor/local/share/gtk-doc/html/gio/GMount.html +1465 -0
- data/vendor/local/share/gtk-doc/html/gio/GMountOperation.html +848 -0
- data/vendor/local/share/gtk-doc/html/gio/GNetworkAddress.html +263 -0
- data/vendor/local/share/gtk-doc/html/gio/GNetworkService.html +248 -0
- data/vendor/local/share/gtk-doc/html/gio/GOutputStream.html +1006 -0
- data/vendor/local/share/gtk-doc/html/gio/GResolver.html +787 -0
- data/vendor/local/share/gtk-doc/html/gio/GSeekable.html +350 -0
- data/vendor/local/share/gtk-doc/html/gio/GSimpleAsyncResult.html +979 -0
- data/vendor/local/share/gtk-doc/html/gio/GSocket.html +2152 -0
- data/vendor/local/share/gtk-doc/html/gio/GSocketAddress.html +297 -0
- data/vendor/local/share/gtk-doc/html/gio/GSocketClient.html +930 -0
- data/vendor/local/share/gtk-doc/html/gio/GSocketConnectable.html +428 -0
- data/vendor/local/share/gtk-doc/html/gio/GSocketConnection.html +548 -0
- data/vendor/local/share/gtk-doc/html/gio/GSocketControlMessage.html +274 -0
- data/vendor/local/share/gtk-doc/html/gio/GSocketListener.html +741 -0
- data/vendor/local/share/gtk-doc/html/gio/GSocketService.html +258 -0
- data/vendor/local/share/gtk-doc/html/gio/GThemedIcon.html +377 -0
- data/vendor/local/share/gtk-doc/html/gio/GThreadedSocketService.html +197 -0
- data/vendor/local/share/gtk-doc/html/gio/GUnixFDList.html +381 -0
- data/vendor/local/share/gtk-doc/html/gio/GUnixFDMessage.html +280 -0
- data/vendor/local/share/gtk-doc/html/gio/GUnixInputStream.html +237 -0
- data/vendor/local/share/gtk-doc/html/gio/GUnixOutputStream.html +237 -0
- data/vendor/local/share/gtk-doc/html/gio/GUnixSocketAddress.html +306 -0
- data/vendor/local/share/gtk-doc/html/gio/GVfs.html +274 -0
- data/vendor/local/share/gtk-doc/html/gio/GVolume.html +1028 -0
- data/vendor/local/share/gtk-doc/html/gio/GVolumeMonitor.html +709 -0
- data/vendor/local/share/gtk-doc/html/gio/GZlibCompressor.html +180 -0
- data/vendor/local/share/gtk-doc/html/gio/GZlibDecompressor.html +128 -0
- data/vendor/local/share/gtk-doc/html/gio/api-index-2-18.html +153 -0
- data/vendor/local/share/gtk-doc/html/gio/api-index-2-20.html +155 -0
- data/vendor/local/share/gtk-doc/html/gio/api-index-2-22.html +1088 -0
- data/vendor/local/share/gtk-doc/html/gio/api-index-2-24.html +206 -0
- data/vendor/local/share/gtk-doc/html/gio/api-index-deprecated.html +97 -0
- data/vendor/local/share/gtk-doc/html/gio/api-index-full.html +4499 -0
- data/vendor/local/share/gtk-doc/html/gio/async.html +44 -0
- data/vendor/local/share/gtk-doc/html/gio/ch01.html +171 -0
- data/vendor/local/share/gtk-doc/html/gio/ch02.html +45 -0
- data/vendor/local/share/gtk-doc/html/gio/ch03.html +80 -0
- data/vendor/local/share/gtk-doc/html/gio/ch19.html +75 -0
- data/vendor/local/share/gtk-doc/html/gio/ch20.html +217 -0
- data/vendor/local/share/gtk-doc/html/gio/ch20s02.html +41 -0
- data/vendor/local/share/gtk-doc/html/gio/ch20s03.html +39 -0
- data/vendor/local/share/gtk-doc/html/gio/conversion.html +44 -0
- data/vendor/local/share/gtk-doc/html/gio/extending-gio.html +96 -0
- data/vendor/local/share/gtk-doc/html/gio/extending.html +41 -0
- data/vendor/local/share/gtk-doc/html/gio/failable_initialization.html +38 -0
- data/vendor/local/share/gtk-doc/html/gio/file_mon.html +33 -0
- data/vendor/local/share/gtk-doc/html/gio/file_ops.html +50 -0
- data/vendor/local/share/gtk-doc/html/gio/gio-Desktop-file-based-GAppInfo.html +339 -0
- data/vendor/local/share/gtk-doc/html/gio/gio-Extension-Points.html +465 -0
- data/vendor/local/share/gtk-doc/html/gio/gio-GContentType.html +381 -0
- data/vendor/local/share/gtk-doc/html/gio/gio-GConverterInputstream.html +163 -0
- data/vendor/local/share/gtk-doc/html/gio/gio-GConverterOutputstream.html +163 -0
- data/vendor/local/share/gtk-doc/html/gio/gio-GFileAttribute.html +927 -0
- data/vendor/local/share/gtk-doc/html/gio/gio-GIOError.html +328 -0
- data/vendor/local/share/gtk-doc/html/gio/gio-GIOScheduler.html +297 -0
- data/vendor/local/share/gtk-doc/html/gio/gio-GSrvTarget.html +313 -0
- data/vendor/local/share/gtk-doc/html/gio/gio-Unix-Mounts.html +918 -0
- data/vendor/local/share/gtk-doc/html/gio/gio-hierarchy.html +107 -0
- data/vendor/local/share/gtk-doc/html/gio/gio.devhelp +1235 -0
- data/vendor/local/share/gtk-doc/html/gio/gio.devhelp2 +1384 -0
- data/vendor/local/share/gtk-doc/html/gio/gvfs-overview.png +0 -0
- data/vendor/local/share/gtk-doc/html/gio/highlevel-socket.html +47 -0
- data/vendor/local/share/gtk-doc/html/gio/home.png +0 -0
- data/vendor/local/share/gtk-doc/html/gio/icons.html +50 -0
- data/vendor/local/share/gtk-doc/html/gio/index.html +318 -0
- data/vendor/local/share/gtk-doc/html/gio/index.sgml +1795 -0
- data/vendor/local/share/gtk-doc/html/gio/left.png +0 -0
- data/vendor/local/share/gtk-doc/html/gio/migrating.html +42 -0
- data/vendor/local/share/gtk-doc/html/gio/networking.html +57 -0
- data/vendor/local/share/gtk-doc/html/gio/pt01.html +39 -0
- data/vendor/local/share/gtk-doc/html/gio/pt02.html +293 -0
- data/vendor/local/share/gtk-doc/html/gio/resolver.html +47 -0
- data/vendor/local/share/gtk-doc/html/gio/right.png +0 -0
- data/vendor/local/share/gtk-doc/html/gio/streaming.html +92 -0
- data/vendor/local/share/gtk-doc/html/gio/style.css +257 -0
- data/vendor/local/share/gtk-doc/html/gio/types.html +41 -0
- data/vendor/local/share/gtk-doc/html/gio/up.png +0 -0
- data/vendor/local/share/gtk-doc/html/gio/utils.html +33 -0
- data/vendor/local/share/gtk-doc/html/gio/volume_mon.html +47 -0
- data/vendor/local/share/gtk-doc/html/glib/api-index-2-10.html +180 -0
- data/vendor/local/share/gtk-doc/html/glib/api-index-2-12.html +312 -0
- data/vendor/local/share/gtk-doc/html/glib/api-index-2-14.html +563 -0
- data/vendor/local/share/gtk-doc/html/glib/api-index-2-16.html +362 -0
- data/vendor/local/share/gtk-doc/html/glib/api-index-2-18.html +102 -0
- data/vendor/local/share/gtk-doc/html/glib/api-index-2-2.html +124 -0
- data/vendor/local/share/gtk-doc/html/glib/api-index-2-20.html +83 -0
- data/vendor/local/share/gtk-doc/html/glib/api-index-2-22.html +194 -0
- data/vendor/local/share/gtk-doc/html/glib/api-index-2-24.html +419 -0
- data/vendor/local/share/gtk-doc/html/glib/api-index-2-4.html +389 -0
- data/vendor/local/share/gtk-doc/html/glib/api-index-2-6.html +413 -0
- data/vendor/local/share/gtk-doc/html/glib/api-index-2-8.html +158 -0
- data/vendor/local/share/gtk-doc/html/glib/api-index-deprecated.html +340 -0
- data/vendor/local/share/gtk-doc/html/glib/api-index-full.html +7835 -0
- data/vendor/local/share/gtk-doc/html/glib/file-name-encodings.png +0 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Arrays.html +893 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Asynchronous-Queues.html +818 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Atomic-Operations.html +413 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Automatic-String-Completion.html +430 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Balanced-Binary-Trees.html +802 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Base64-Encoding.html +372 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Basic-Types.html +500 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Bookmark-file-parser.html +2054 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Byte-Arrays.html +560 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Byte-Order-Macros.html +1686 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Caches.html +398 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Character-Set-Conversion.html +1187 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Commandline-option-parser.html +1665 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Data-Checksums.html +460 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Datasets.html +528 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Date-and-Time-Functions.html +2042 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Double-ended-Queues.html +1278 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Doubly-Linked-Lists.html +1406 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Dynamic-Loading-of-Modules.html +548 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Error-Reporting.html +1023 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-File-Utilities.html +1810 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-GVariant.html +3798 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-GVariantType.html +1815 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Glob-style-pattern-matching.html +295 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Hash-Tables.html +1512 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Hook-Functions.html +1367 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Hostname-Utilities.html +229 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-I18N.html +629 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-IO-Channels.html +2196 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Key-value-file-parser.html +2556 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Keyed-Data-Lists.html +612 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Lexical-Scanner.html +1451 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Limits-of-Basic-Types.html +389 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Memory-Allocation.html +1029 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Memory-Allocators.html +141 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Memory-Chunks.html +706 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Memory-Slices.html +510 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Message-Logging.html +703 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Miscellaneous-Macros.html +1045 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Miscellaneous-Utility-Functions.html +1523 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-N-ary-Trees.html +1466 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Numerical-Definitions.html +204 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Perl-compatible-regular-expressions.html +2648 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Pointer-Arrays.html +752 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Quarks.html +269 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Random-Numbers.html +578 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Relations-and-Tuples.html +483 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Sequences.html +1353 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Shell-related-Utilities.html +238 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Simple-XML-Subset-Parser.html +1283 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Singly-Linked-Lists.html +1212 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Spawning-Processes.html +924 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Standard-Macros.html +466 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-String-Chunks.html +301 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-String-Utility-Functions.html +2947 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Strings.html +1290 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Testing.html +1719 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-The-Main-Event-Loop.html +3379 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Thread-Pools.html +606 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Threads.html +3411 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Timers.html +245 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Trash-Stacks.html +188 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Type-Conversion-Macros.html +263 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-URI-Functions.html +314 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Unicode-Manipulation.html +3356 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Version-Information.html +150 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Warnings-and-Assertions.html +438 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-Windows-Compatibility-Functions.html +476 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-building.html +438 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-changes.html +159 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-compiling.html +118 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-core.html +64 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-cross-compiling.html +160 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-data-types.html +120 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-fundamentals.html +59 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-gettextize.html +93 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-regex-syntax.html +2395 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-resources.html +121 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-running.html +302 -0
- data/vendor/local/share/gtk-doc/html/glib/glib-utilities.html +112 -0
- data/vendor/local/share/gtk-doc/html/glib/glib.devhelp +2084 -0
- data/vendor/local/share/gtk-doc/html/glib/glib.devhelp2 +2556 -0
- data/vendor/local/share/gtk-doc/html/glib/glib.html +80 -0
- data/vendor/local/share/gtk-doc/html/glib/gtester-report.html +72 -0
- data/vendor/local/share/gtk-doc/html/glib/gtester.html +131 -0
- data/vendor/local/share/gtk-doc/html/glib/gvariant-format-strings.html +1171 -0
- data/vendor/local/share/gtk-doc/html/glib/home.png +0 -0
- data/vendor/local/share/gtk-doc/html/glib/index.html +336 -0
- data/vendor/local/share/gtk-doc/html/glib/index.sgml +2717 -0
- data/vendor/local/share/gtk-doc/html/glib/left.png +0 -0
- data/vendor/local/share/gtk-doc/html/glib/mainloop-states.gif +0 -0
- data/vendor/local/share/gtk-doc/html/glib/right.png +0 -0
- data/vendor/local/share/gtk-doc/html/glib/style.css +257 -0
- data/vendor/local/share/gtk-doc/html/glib/tools.html +41 -0
- data/vendor/local/share/gtk-doc/html/glib/up.png +0 -0
- data/vendor/local/share/gtk-doc/html/gobject/GTypeModule.html +710 -0
- data/vendor/local/share/gtk-doc/html/gobject/GTypePlugin.html +453 -0
- data/vendor/local/share/gtk-doc/html/gobject/api-index-2-10.html +85 -0
- data/vendor/local/share/gtk-doc/html/gobject/api-index-2-12.html +55 -0
- data/vendor/local/share/gtk-doc/html/gobject/api-index-2-14.html +51 -0
- data/vendor/local/share/gtk-doc/html/gobject/api-index-2-18.html +46 -0
- data/vendor/local/share/gtk-doc/html/gobject/api-index-2-2.html +38 -0
- data/vendor/local/share/gtk-doc/html/gobject/api-index-2-22.html +46 -0
- data/vendor/local/share/gtk-doc/html/gobject/api-index-2-24.html +72 -0
- data/vendor/local/share/gtk-doc/html/gobject/api-index-2-4.html +178 -0
- data/vendor/local/share/gtk-doc/html/gobject/api-index-2-6.html +42 -0
- data/vendor/local/share/gtk-doc/html/gobject/api-index-2-8.html +42 -0
- data/vendor/local/share/gtk-doc/html/gobject/api-index-deprecated.html +50 -0
- data/vendor/local/share/gtk-doc/html/gobject/api-index-full.html +2577 -0
- data/vendor/local/share/gtk-doc/html/gobject/ch01s02.html +136 -0
- data/vendor/local/share/gtk-doc/html/gobject/ch06s03.html +113 -0
- data/vendor/local/share/gtk-doc/html/gobject/chapter-gobject.html +293 -0
- data/vendor/local/share/gtk-doc/html/gobject/chapter-gtype.html +263 -0
- data/vendor/local/share/gtk-doc/html/gobject/chapter-intro.html +92 -0
- data/vendor/local/share/gtk-doc/html/gobject/chapter-signal.html +214 -0
- data/vendor/local/share/gtk-doc/html/gobject/glib-genmarshal.html +355 -0
- data/vendor/local/share/gtk-doc/html/gobject/glib-mkenums.html +295 -0
- data/vendor/local/share/gtk-doc/html/gobject/glue.png +0 -0
- data/vendor/local/share/gtk-doc/html/gobject/gobject-Boxed-Types.html +406 -0
- data/vendor/local/share/gtk-doc/html/gobject/gobject-Closures.html +2355 -0
- data/vendor/local/share/gtk-doc/html/gobject/gobject-Enumeration-and-Flag-Types.html +847 -0
- data/vendor/local/share/gtk-doc/html/gobject/gobject-GParamSpec.html +1423 -0
- data/vendor/local/share/gtk-doc/html/gobject/gobject-Generic-values.html +776 -0
- data/vendor/local/share/gtk-doc/html/gobject/gobject-Signals.html +2741 -0
- data/vendor/local/share/gtk-doc/html/gobject/gobject-Standard-Parameter-and-Value-Types.html +5242 -0
- data/vendor/local/share/gtk-doc/html/gobject/gobject-The-Base-Object-Type.html +2609 -0
- data/vendor/local/share/gtk-doc/html/gobject/gobject-Type-Information.html +4227 -0
- data/vendor/local/share/gtk-doc/html/gobject/gobject-Value-arrays.html +413 -0
- data/vendor/local/share/gtk-doc/html/gobject/gobject-Varargs-Value-Collection.html +230 -0
- data/vendor/local/share/gtk-doc/html/gobject/gobject-memory.html +234 -0
- data/vendor/local/share/gtk-doc/html/gobject/gobject-properties.html +270 -0
- data/vendor/local/share/gtk-doc/html/gobject/gobject-query.html +117 -0
- data/vendor/local/share/gtk-doc/html/gobject/gobject.devhelp +723 -0
- data/vendor/local/share/gtk-doc/html/gobject/gobject.devhelp2 +757 -0
- data/vendor/local/share/gtk-doc/html/gobject/gtype-conventions.html +143 -0
- data/vendor/local/share/gtk-doc/html/gobject/gtype-instantiable-classed.html +287 -0
- data/vendor/local/share/gtk-doc/html/gobject/gtype-non-instantiable-classed.html +316 -0
- data/vendor/local/share/gtk-doc/html/gobject/gtype-non-instantiable.html +76 -0
- data/vendor/local/share/gtk-doc/html/gobject/home.png +0 -0
- data/vendor/local/share/gtk-doc/html/gobject/howto-gobject-chainup.html +100 -0
- data/vendor/local/share/gtk-doc/html/gobject/howto-gobject-code.html +86 -0
- data/vendor/local/share/gtk-doc/html/gobject/howto-gobject-construction.html +113 -0
- data/vendor/local/share/gtk-doc/html/gobject/howto-gobject-destruction.html +122 -0
- data/vendor/local/share/gtk-doc/html/gobject/howto-gobject-methods.html +257 -0
- data/vendor/local/share/gtk-doc/html/gobject/howto-gobject.html +283 -0
- data/vendor/local/share/gtk-doc/html/gobject/howto-interface-implement.html +125 -0
- data/vendor/local/share/gtk-doc/html/gobject/howto-interface-properties.html +167 -0
- data/vendor/local/share/gtk-doc/html/gobject/howto-interface.html +160 -0
- data/vendor/local/share/gtk-doc/html/gobject/howto-signals.html +121 -0
- data/vendor/local/share/gtk-doc/html/gobject/index.html +187 -0
- data/vendor/local/share/gtk-doc/html/gobject/index.sgml +734 -0
- data/vendor/local/share/gtk-doc/html/gobject/left.png +0 -0
- data/vendor/local/share/gtk-doc/html/gobject/pr01.html +72 -0
- data/vendor/local/share/gtk-doc/html/gobject/pt01.html +80 -0
- data/vendor/local/share/gtk-doc/html/gobject/pt02.html +66 -0
- data/vendor/local/share/gtk-doc/html/gobject/pt03.html +55 -0
- data/vendor/local/share/gtk-doc/html/gobject/right.png +0 -0
- data/vendor/local/share/gtk-doc/html/gobject/rn01.html +82 -0
- data/vendor/local/share/gtk-doc/html/gobject/rn02.html +47 -0
- data/vendor/local/share/gtk-doc/html/gobject/signal.html +377 -0
- data/vendor/local/share/gtk-doc/html/gobject/style.css +257 -0
- data/vendor/local/share/gtk-doc/html/gobject/tools-ginspector.html +35 -0
- data/vendor/local/share/gtk-doc/html/gobject/tools-gob.html +40 -0
- data/vendor/local/share/gtk-doc/html/gobject/tools-gtkdoc.html +63 -0
- data/vendor/local/share/gtk-doc/html/gobject/tools-refdb.html +55 -0
- data/vendor/local/share/gtk-doc/html/gobject/tools-vala.html +43 -0
- data/vendor/local/share/gtk-doc/html/gobject/up.png +0 -0
- data/vendor/local/share/locale/af/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/am/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/ar/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/as/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/ast/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/az/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/be/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/be/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/be@latin/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/bg/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/bg/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/bn/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/bn_IN/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/bs/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/ca/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/ca/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/ca@valencia/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/cs/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/cs/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/cy/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/da/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/da/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/de/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/de/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/dz/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/el/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/el/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/en@boldquot/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/en@quot/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/en@shaw/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/en_CA/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/en_GB/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/eo/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/eo/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/es/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/es/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/et/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/et/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/eu/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/fa/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/fi/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/fi/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/fr/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/fr/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/ga/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/ga/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/gl/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/gl/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/gu/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/he/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/hi/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/hr/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/hu/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/hy/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/id/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/id/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/is/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/it/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/it/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/ja/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/ja/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/ka/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/kn/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/ko/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/ko/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/ku/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/locale.alias +77 -0
- data/vendor/local/share/locale/lt/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/lv/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/mai/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/mg/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/mk/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/ml/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/mn/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/mr/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/ms/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/nb/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/nb/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/nds/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/ne/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/nl/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/nl/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/nn/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/nn/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/oc/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/or/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/pa/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/pl/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/pl/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/ps/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/pt/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/pt/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/pt_BR/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/pt_BR/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/ro/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/ro/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/ru/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/ru/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/rw/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/si/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/sk/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/sk/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/sl/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/sl/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/sq/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/sr/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/sr/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/sr@ije/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/sr@latin/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/sv/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/sv/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/ta/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/te/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/th/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/tl/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/tr/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/tr/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/tt/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/uk/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/uk/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/vi/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/vi/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/wa/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/xh/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/yi/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/zh_CN/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/zh_CN/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/zh_HK/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/zh_HK/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/locale/zh_TW/LC_MESSAGES/gettext-runtime.mo +0 -0
- data/vendor/local/share/locale/zh_TW/LC_MESSAGES/glib20.mo +0 -0
- data/vendor/local/share/man/man1/envsubst.1 +54 -0
- data/vendor/local/share/man/man1/gettext.1 +69 -0
- data/vendor/local/share/man/man1/glib-genmarshal.1 +307 -0
- data/vendor/local/share/man/man1/glib-mkenums.1 +234 -0
- data/vendor/local/share/man/man1/gobject-query.1 +83 -0
- data/vendor/local/share/man/man1/ngettext.1 +68 -0
- data/vendor/local/share/man/man3/bind_textdomain_codeset.3 +72 -0
- data/vendor/local/share/man/man3/bindtextdomain.3 +69 -0
- data/vendor/local/share/man/man3/dcgettext.3 +1 -0
- data/vendor/local/share/man/man3/dcngettext.3 +1 -0
- data/vendor/local/share/man/man3/dgettext.3 +1 -0
- data/vendor/local/share/man/man3/dngettext.3 +1 -0
- data/vendor/local/share/man/man3/gettext.3 +99 -0
- data/vendor/local/share/man/man3/ngettext.3 +60 -0
- data/vendor/local/share/man/man3/textdomain.3 +57 -0
- data/vendor/local/src/tml/packaging/gettext_0.18.1.1-2_win32.log +10423 -0
- data/vendor/local/src/tml/packaging/gettext_0.18.1.1-2_win32.sh +457 -0
- data/vendor/local/src/tml/packaging/glib_2.24.2-2_win32.log +2602 -0
- data/vendor/local/src/tml/packaging/glib_2.24.2-2_win32.sh +290 -0
- metadata +940 -0
@@ -0,0 +1,2395 @@
|
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
5
|
+
<title>Regular expression syntax</title>
|
6
|
+
<meta name="generator" content="DocBook XSL Stylesheets V1.75.2">
|
7
|
+
<link rel="home" href="index.html" title="GLib Reference Manual">
|
8
|
+
<link rel="up" href="glib.html" title="GLib Overview">
|
9
|
+
<link rel="prev" href="glib-changes.html" title="Changes to GLib">
|
10
|
+
<link rel="next" href="glib-resources.html" title="Mailing lists and bug reports">
|
11
|
+
<meta name="generator" content="GTK-Doc V1.14 (XML mode)">
|
12
|
+
<link rel="stylesheet" href="style.css" type="text/css">
|
13
|
+
</head>
|
14
|
+
<body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
|
15
|
+
<table class="navigation" id="top" width="100%" summary="Navigation header" cellpadding="2" cellspacing="2"><tr valign="middle">
|
16
|
+
<td><a accesskey="p" href="glib-changes.html"><img src="left.png" width="24" height="24" border="0" alt="Prev"></a></td>
|
17
|
+
<td><a accesskey="u" href="glib.html"><img src="up.png" width="24" height="24" border="0" alt="Up"></a></td>
|
18
|
+
<td><a accesskey="h" href="index.html"><img src="home.png" width="24" height="24" border="0" alt="Home"></a></td>
|
19
|
+
<th width="100%" align="center">GLib Reference Manual</th>
|
20
|
+
<td><a accesskey="n" href="glib-resources.html"><img src="right.png" width="24" height="24" border="0" alt="Next"></a></td>
|
21
|
+
</tr></table>
|
22
|
+
<div class="refentry" title="Regular expression syntax">
|
23
|
+
<a name="glib-regex-syntax"></a><div class="titlepage"></div>
|
24
|
+
<div class="refnamediv"><table width="100%"><tr>
|
25
|
+
<td valign="top">
|
26
|
+
<h2><span class="refentrytitle">Regular expression syntax</span></h2>
|
27
|
+
<p>Regular expression syntax —
|
28
|
+
Syntax and semantics of the regular expressions supported by GRegex
|
29
|
+
</p>
|
30
|
+
</td>
|
31
|
+
<td valign="top" align="right"></td>
|
32
|
+
</tr></table></div>
|
33
|
+
<div class="refsect1" title="GRegex regular expression details">
|
34
|
+
<a name="id539370"></a><h2>GRegex regular expression details</h2>
|
35
|
+
<p>
|
36
|
+
A regular expression is a pattern that is matched against a
|
37
|
+
string from left to right. Most characters stand for themselves in a
|
38
|
+
pattern, and match the corresponding characters in the string. As a
|
39
|
+
trivial example, the pattern
|
40
|
+
</p>
|
41
|
+
<pre class="programlisting">
|
42
|
+
The quick brown fox
|
43
|
+
</pre>
|
44
|
+
<p>
|
45
|
+
matches a portion of a string that is identical to itself. When
|
46
|
+
caseless matching is specified (the <code class="varname">G_REGEX_CASELESS</code> flag), letters are
|
47
|
+
matched independently of case.
|
48
|
+
</p>
|
49
|
+
<p>
|
50
|
+
The power of regular expressions comes from the ability to include
|
51
|
+
alternatives and repetitions in the pattern. These are encoded in the
|
52
|
+
pattern by the use of metacharacters, which do not stand for themselves
|
53
|
+
but instead are interpreted in some special way.
|
54
|
+
</p>
|
55
|
+
<p>
|
56
|
+
There are two different sets of metacharacters: those that are recognized
|
57
|
+
anywhere in the pattern except within square brackets, and those
|
58
|
+
that are recognized in square brackets. Outside square brackets, the
|
59
|
+
metacharacters are as follows:
|
60
|
+
</p>
|
61
|
+
<div class="table">
|
62
|
+
<a name="id520990"></a><p class="title"><b>Table 1. Metacharacters outside square brackets</b></p>
|
63
|
+
<div class="table-contents"><table summary="Metacharacters outside square brackets" border="1">
|
64
|
+
<colgroup>
|
65
|
+
<col align="center">
|
66
|
+
<col>
|
67
|
+
</colgroup>
|
68
|
+
<thead><tr>
|
69
|
+
<th align="center">Character</th>
|
70
|
+
<th>Meaning</th>
|
71
|
+
</tr></thead>
|
72
|
+
<tbody>
|
73
|
+
<tr>
|
74
|
+
<td align="center">\</td>
|
75
|
+
<td>general escape character with several uses</td>
|
76
|
+
</tr>
|
77
|
+
<tr>
|
78
|
+
<td align="center">^</td>
|
79
|
+
<td>assert start of string (or line, in multiline mode)</td>
|
80
|
+
</tr>
|
81
|
+
<tr>
|
82
|
+
<td align="center">$</td>
|
83
|
+
<td>assert end of string (or line, in multiline mode)</td>
|
84
|
+
</tr>
|
85
|
+
<tr>
|
86
|
+
<td align="center">.</td>
|
87
|
+
<td>match any character except newline (by default)</td>
|
88
|
+
</tr>
|
89
|
+
<tr>
|
90
|
+
<td align="center">[</td>
|
91
|
+
<td>start character class definition</td>
|
92
|
+
</tr>
|
93
|
+
<tr>
|
94
|
+
<td align="center">|</td>
|
95
|
+
<td>start of alternative branch</td>
|
96
|
+
</tr>
|
97
|
+
<tr>
|
98
|
+
<td align="center">(</td>
|
99
|
+
<td>start subpattern</td>
|
100
|
+
</tr>
|
101
|
+
<tr>
|
102
|
+
<td align="center">)</td>
|
103
|
+
<td>end subpattern</td>
|
104
|
+
</tr>
|
105
|
+
<tr>
|
106
|
+
<td align="center">?</td>
|
107
|
+
<td>extends the meaning of (, or 0/1 quantifier, or quantifier minimizer</td>
|
108
|
+
</tr>
|
109
|
+
<tr>
|
110
|
+
<td align="center">*</td>
|
111
|
+
<td>0 or more quantifier</td>
|
112
|
+
</tr>
|
113
|
+
<tr>
|
114
|
+
<td align="center">+</td>
|
115
|
+
<td>1 or more quantifier, also "possessive quantifier"</td>
|
116
|
+
</tr>
|
117
|
+
<tr>
|
118
|
+
<td align="center">{</td>
|
119
|
+
<td>start min/max quantifier</td>
|
120
|
+
</tr>
|
121
|
+
</tbody>
|
122
|
+
</table></div>
|
123
|
+
</div>
|
124
|
+
<br class="table-break"><p>
|
125
|
+
Part of a pattern that is in square brackets is called a "character
|
126
|
+
class". In a character class the only metacharacters are:
|
127
|
+
</p>
|
128
|
+
<div class="table">
|
129
|
+
<a name="id509847"></a><p class="title"><b>Table 2. Metacharacters inside square brackets</b></p>
|
130
|
+
<div class="table-contents"><table summary="Metacharacters inside square brackets" border="1">
|
131
|
+
<colgroup>
|
132
|
+
<col align="center">
|
133
|
+
<col>
|
134
|
+
</colgroup>
|
135
|
+
<thead><tr>
|
136
|
+
<th align="center">Character</th>
|
137
|
+
<th>Meaning</th>
|
138
|
+
</tr></thead>
|
139
|
+
<tbody>
|
140
|
+
<tr>
|
141
|
+
<td align="center">\</td>
|
142
|
+
<td>general escape character</td>
|
143
|
+
</tr>
|
144
|
+
<tr>
|
145
|
+
<td align="center">^</td>
|
146
|
+
<td>negate the class, but only if the first character</td>
|
147
|
+
</tr>
|
148
|
+
<tr>
|
149
|
+
<td align="center">-</td>
|
150
|
+
<td>indicates character range</td>
|
151
|
+
</tr>
|
152
|
+
<tr>
|
153
|
+
<td align="center">[</td>
|
154
|
+
<td>POSIX character class (only if followed by POSIX syntax)</td>
|
155
|
+
</tr>
|
156
|
+
<tr>
|
157
|
+
<td align="center">]</td>
|
158
|
+
<td>terminates the character class</td>
|
159
|
+
</tr>
|
160
|
+
</tbody>
|
161
|
+
</table></div>
|
162
|
+
</div>
|
163
|
+
<br class="table-break">
|
164
|
+
</div>
|
165
|
+
<div class="refsect1" title="Backslash">
|
166
|
+
<a name="id554614"></a><h2>Backslash</h2>
|
167
|
+
<p>
|
168
|
+
The backslash character has several uses. Firstly, if it is followed by
|
169
|
+
a non-alphanumeric character, it takes away any special meaning that
|
170
|
+
character may have. This use of backslash as an escape character
|
171
|
+
applies both inside and outside character classes.
|
172
|
+
</p>
|
173
|
+
<p>
|
174
|
+
For example, if you want to match a * character, you write \* in the
|
175
|
+
pattern. This escaping action applies whether or not the following
|
176
|
+
character would otherwise be interpreted as a metacharacter, so it is
|
177
|
+
always safe to precede a non-alphanumeric with backslash to specify
|
178
|
+
that it stands for itself. In particular, if you want to match a
|
179
|
+
backslash, you write \\.
|
180
|
+
</p>
|
181
|
+
<p>
|
182
|
+
If a pattern is compiled with the <code class="varname">G_REGEX_EXTENDED</code>
|
183
|
+
option, whitespace in the pattern (other than in a character class) and
|
184
|
+
characters between a # outside a character class and the next newline
|
185
|
+
are ignored.
|
186
|
+
An escaping backslash can be used to include a whitespace or # character
|
187
|
+
as part of the pattern.
|
188
|
+
</p>
|
189
|
+
<p>
|
190
|
+
If you want to remove the special meaning from a sequence of characters,
|
191
|
+
you can do so by putting them between \Q and \E.
|
192
|
+
The \Q...\E sequence is recognized both inside and outside character
|
193
|
+
classes.
|
194
|
+
</p>
|
195
|
+
<div class="refsect2" title="Non-printing characters">
|
196
|
+
<a name="id554647"></a><h3>Non-printing characters</h3>
|
197
|
+
<p>
|
198
|
+
A second use of backslash provides a way of encoding non-printing
|
199
|
+
characters in patterns in a visible manner. There is no restriction on the
|
200
|
+
appearance of non-printing characters, apart from the binary zero that
|
201
|
+
terminates a pattern, but when a pattern is being prepared by text
|
202
|
+
editing, it is usually easier to use one of the following escape
|
203
|
+
sequences than the binary character it represents:
|
204
|
+
</p>
|
205
|
+
<div class="table">
|
206
|
+
<a name="id511381"></a><p class="title"><b>Table 3. Non-printing characters</b></p>
|
207
|
+
<div class="table-contents"><table summary="Non-printing characters" border="1">
|
208
|
+
<colgroup>
|
209
|
+
<col align="center">
|
210
|
+
<col>
|
211
|
+
</colgroup>
|
212
|
+
<thead><tr>
|
213
|
+
<th align="center">Escape</th>
|
214
|
+
<th>Meaning</th>
|
215
|
+
</tr></thead>
|
216
|
+
<tbody>
|
217
|
+
<tr>
|
218
|
+
<td align="center">\a</td>
|
219
|
+
<td>alarm, that is, the BEL character (hex 07)</td>
|
220
|
+
</tr>
|
221
|
+
<tr>
|
222
|
+
<td align="center">\cx</td>
|
223
|
+
<td>"control-x", where x is any character</td>
|
224
|
+
</tr>
|
225
|
+
<tr>
|
226
|
+
<td align="center">\e</td>
|
227
|
+
<td>escape (hex 1B)</td>
|
228
|
+
</tr>
|
229
|
+
<tr>
|
230
|
+
<td align="center">\f</td>
|
231
|
+
<td>formfeed (hex 0C)</td>
|
232
|
+
</tr>
|
233
|
+
<tr>
|
234
|
+
<td align="center">\n</td>
|
235
|
+
<td>newline (hex 0A)</td>
|
236
|
+
</tr>
|
237
|
+
<tr>
|
238
|
+
<td align="center">\r</td>
|
239
|
+
<td>carriage return (hex 0D)</td>
|
240
|
+
</tr>
|
241
|
+
<tr>
|
242
|
+
<td align="center">\t</td>
|
243
|
+
<td>tab (hex 09)</td>
|
244
|
+
</tr>
|
245
|
+
<tr>
|
246
|
+
<td align="center">\ddd</td>
|
247
|
+
<td>character with octal code ddd, or backreference</td>
|
248
|
+
</tr>
|
249
|
+
<tr>
|
250
|
+
<td align="center">\xhh</td>
|
251
|
+
<td>character with hex code hh</td>
|
252
|
+
</tr>
|
253
|
+
<tr>
|
254
|
+
<td align="center">\x{hhh..}</td>
|
255
|
+
<td>character with hex code hhh..</td>
|
256
|
+
</tr>
|
257
|
+
</tbody>
|
258
|
+
</table></div>
|
259
|
+
</div>
|
260
|
+
<br class="table-break"><p>
|
261
|
+
The precise effect of \cx is as follows: if x is a lower case letter,
|
262
|
+
it is converted to upper case. Then bit 6 of the character (hex 40) is
|
263
|
+
inverted. Thus \cz becomes hex 1A, but \c{ becomes hex 3B, while \c;
|
264
|
+
becomes hex 7B.
|
265
|
+
</p>
|
266
|
+
<p>
|
267
|
+
After \x, from zero to two hexadecimal digits are read (letters can be
|
268
|
+
in upper or lower case). Any number of hexadecimal digits may appear
|
269
|
+
between \x{ and }, but the value of the character code
|
270
|
+
must be less than 2**31 (that is, the maximum hexadecimal value is
|
271
|
+
7FFFFFFF). If characters other than hexadecimal digits appear between
|
272
|
+
\x{ and }, or if there is no terminating }, this form of escape is not
|
273
|
+
recognized. Instead, the initial \x will be interpreted as a basic hexadecimal
|
274
|
+
escape, with no following digits, giving a character whose
|
275
|
+
value is zero.
|
276
|
+
</p>
|
277
|
+
<p>
|
278
|
+
Characters whose value is less than 256 can be defined by either of the
|
279
|
+
two syntaxes for \x. There is no difference
|
280
|
+
in the way they are handled. For example, \xdc is exactly the same as
|
281
|
+
\x{dc}.
|
282
|
+
</p>
|
283
|
+
<p>
|
284
|
+
After \0 up to two further octal digits are read. If there are fewer
|
285
|
+
than two digits, just those that are present are used.
|
286
|
+
Thus the sequence \0\x\07 specifies two binary zeros followed by a BEL
|
287
|
+
character (code value 7). Make sure you supply two digits after the
|
288
|
+
initial zero if the pattern character that follows is itself an octal
|
289
|
+
digit.
|
290
|
+
</p>
|
291
|
+
<p>
|
292
|
+
The handling of a backslash followed by a digit other than 0 is complicated.
|
293
|
+
Outside a character class, GRegex reads it and any following digits as a
|
294
|
+
decimal number. If the number is less than 10, or if there
|
295
|
+
have been at least that many previous capturing left parentheses in the
|
296
|
+
expression, the entire sequence is taken as a back reference. A
|
297
|
+
description of how this works is given later, following the discussion
|
298
|
+
of parenthesized subpatterns.
|
299
|
+
</p>
|
300
|
+
<p>
|
301
|
+
Inside a character class, or if the decimal number is greater than 9
|
302
|
+
and there have not been that many capturing subpatterns, GRegex re-reads
|
303
|
+
up to three octal digits following the backslash, and uses them to generate
|
304
|
+
a data character. Any subsequent digits stand for themselves. For example:
|
305
|
+
</p>
|
306
|
+
<div class="table">
|
307
|
+
<a name="id555026"></a><p class="title"><b>Table 4. Non-printing characters</b></p>
|
308
|
+
<div class="table-contents"><table summary="Non-printing characters" border="1">
|
309
|
+
<colgroup>
|
310
|
+
<col align="center">
|
311
|
+
<col>
|
312
|
+
</colgroup>
|
313
|
+
<thead><tr>
|
314
|
+
<th align="center">Escape</th>
|
315
|
+
<th>Meaning</th>
|
316
|
+
</tr></thead>
|
317
|
+
<tbody>
|
318
|
+
<tr>
|
319
|
+
<td align="center">\040</td>
|
320
|
+
<td>is another way of writing a space</td>
|
321
|
+
</tr>
|
322
|
+
<tr>
|
323
|
+
<td align="center">\40</td>
|
324
|
+
<td>is the same, provided there are fewer than 40 previous capturing subpatterns</td>
|
325
|
+
</tr>
|
326
|
+
<tr>
|
327
|
+
<td align="center">\7</td>
|
328
|
+
<td>is always a back reference</td>
|
329
|
+
</tr>
|
330
|
+
<tr>
|
331
|
+
<td align="center">\11</td>
|
332
|
+
<td>might be a back reference, or another way of writing a tab</td>
|
333
|
+
</tr>
|
334
|
+
<tr>
|
335
|
+
<td align="center">\011</td>
|
336
|
+
<td>is always a tab</td>
|
337
|
+
</tr>
|
338
|
+
<tr>
|
339
|
+
<td align="center">\0113</td>
|
340
|
+
<td>is a tab followed by the character "3"</td>
|
341
|
+
</tr>
|
342
|
+
<tr>
|
343
|
+
<td align="center">\113</td>
|
344
|
+
<td>might be a back reference, otherwise the character with octal code 113</td>
|
345
|
+
</tr>
|
346
|
+
<tr>
|
347
|
+
<td align="center">\377</td>
|
348
|
+
<td>might be a back reference, otherwise the byte consisting entirely of 1 bits</td>
|
349
|
+
</tr>
|
350
|
+
<tr>
|
351
|
+
<td align="center">\81</td>
|
352
|
+
<td>is either a back reference, or a binary zero followed by the two characters "8" and "1"</td>
|
353
|
+
</tr>
|
354
|
+
</tbody>
|
355
|
+
</table></div>
|
356
|
+
</div>
|
357
|
+
<br class="table-break"><p>
|
358
|
+
Note that octal values of 100 or greater must not be introduced by a
|
359
|
+
leading zero, because no more than three octal digits are ever read.
|
360
|
+
</p>
|
361
|
+
<p>
|
362
|
+
All the sequences that define a single character can be used both inside
|
363
|
+
and outside character classes. In addition, inside a character class, the
|
364
|
+
sequence \b is interpreted as the backspace character (hex 08), and the
|
365
|
+
sequences \R and \X are interpreted as the characters "R" and "X", respectively.
|
366
|
+
Outside a character class, these sequences have different meanings (see below).
|
367
|
+
</p>
|
368
|
+
</div>
|
369
|
+
<hr>
|
370
|
+
<div class="refsect2" title="Absolute and relative back references">
|
371
|
+
<a name="id555185"></a><h3>Absolute and relative back references</h3>
|
372
|
+
<p>
|
373
|
+
The sequence \g followed by a positive or negative number, optionally enclosed
|
374
|
+
in braces, is an absolute or relative back reference. Back references are
|
375
|
+
discussed later, following the discussion of parenthesized subpatterns.
|
376
|
+
</p>
|
377
|
+
</div>
|
378
|
+
<hr>
|
379
|
+
<div class="refsect2" title="Generic character types">
|
380
|
+
<a name="id555198"></a><h3>Generic character types</h3>
|
381
|
+
<p>
|
382
|
+
Another use of backslash is for specifying generic character types.
|
383
|
+
The following are always recognized:
|
384
|
+
</p>
|
385
|
+
<div class="table">
|
386
|
+
<a name="id555209"></a><p class="title"><b>Table 5. Generic characters</b></p>
|
387
|
+
<div class="table-contents"><table summary="Generic characters" border="1">
|
388
|
+
<colgroup>
|
389
|
+
<col align="center">
|
390
|
+
<col>
|
391
|
+
</colgroup>
|
392
|
+
<thead><tr>
|
393
|
+
<th align="center">Escape</th>
|
394
|
+
<th>Meaning</th>
|
395
|
+
</tr></thead>
|
396
|
+
<tbody>
|
397
|
+
<tr>
|
398
|
+
<td align="center">\d</td>
|
399
|
+
<td>any decimal digit</td>
|
400
|
+
</tr>
|
401
|
+
<tr>
|
402
|
+
<td align="center">\D</td>
|
403
|
+
<td>any character that is not a decimal digit</td>
|
404
|
+
</tr>
|
405
|
+
<tr>
|
406
|
+
<td align="center">\s</td>
|
407
|
+
<td>any whitespace character</td>
|
408
|
+
</tr>
|
409
|
+
<tr>
|
410
|
+
<td align="center">\S</td>
|
411
|
+
<td>any character that is not a whitespace character</td>
|
412
|
+
</tr>
|
413
|
+
<tr>
|
414
|
+
<td align="center">\w</td>
|
415
|
+
<td>any "word" character</td>
|
416
|
+
</tr>
|
417
|
+
<tr>
|
418
|
+
<td align="center">\W</td>
|
419
|
+
<td>any "non-word" character</td>
|
420
|
+
</tr>
|
421
|
+
</tbody>
|
422
|
+
</table></div>
|
423
|
+
</div>
|
424
|
+
<br class="table-break"><p>
|
425
|
+
Each pair of escape sequences partitions the complete set of characters
|
426
|
+
into two disjoint sets. Any given character matches one, and only one,
|
427
|
+
of each pair.
|
428
|
+
</p>
|
429
|
+
<p>
|
430
|
+
These character type sequences can appear both inside and outside character
|
431
|
+
classes. They each match one character of the appropriate type.
|
432
|
+
If the current matching point is at the end of the passed string, all
|
433
|
+
of them fail, since there is no character to match.
|
434
|
+
</p>
|
435
|
+
<p>
|
436
|
+
For compatibility with Perl, \s does not match the VT character (code
|
437
|
+
11). This makes it different from the the POSIX "space" class. The \s
|
438
|
+
characters are HT (9), LF (10), FF (12), CR (13), and space (32).
|
439
|
+
</p>
|
440
|
+
<p>
|
441
|
+
A "word" character is an underscore or any character less than 256 that
|
442
|
+
is a letter or digit.</p>
|
443
|
+
<p>
|
444
|
+
Characters with values greater than 128 never match \d,
|
445
|
+
\s, or \w, and always match \D, \S, and \W.
|
446
|
+
</p>
|
447
|
+
</div>
|
448
|
+
<hr>
|
449
|
+
<div class="refsect2" title="Newline sequences">
|
450
|
+
<a name="id555348"></a><h3>Newline sequences</h3>
|
451
|
+
<p>Outside a character class, the escape sequence \R matches any Unicode
|
452
|
+
newline sequence.
|
453
|
+
This particular group matches either the two-character sequence CR followed by
|
454
|
+
LF, or one of the single characters LF (linefeed, U+000A), VT (vertical tab,
|
455
|
+
U+000B), FF (formfeed, U+000C), CR (carriage return, U+000D), NEL (next
|
456
|
+
line, U+0085), LS (line separator, U+2028), or PS (paragraph separator, U+2029).
|
457
|
+
The two-character sequence is treated as a single unit that
|
458
|
+
cannot be split. Inside a character class, \R matches the letter "R".</p>
|
459
|
+
</div>
|
460
|
+
<hr>
|
461
|
+
<div class="refsect2" title="Unicode character properties">
|
462
|
+
<a name="id555363"></a><h3>Unicode character properties</h3>
|
463
|
+
<p>
|
464
|
+
To support generic character types there are three additional escape
|
465
|
+
sequences, they are:
|
466
|
+
</p>
|
467
|
+
<div class="table">
|
468
|
+
<a name="id555374"></a><p class="title"><b>Table 6. Generic character types</b></p>
|
469
|
+
<div class="table-contents"><table summary="Generic character types" border="1">
|
470
|
+
<colgroup>
|
471
|
+
<col align="center">
|
472
|
+
<col>
|
473
|
+
</colgroup>
|
474
|
+
<thead><tr>
|
475
|
+
<th align="center">Escape</th>
|
476
|
+
<th>Meaning</th>
|
477
|
+
</tr></thead>
|
478
|
+
<tbody>
|
479
|
+
<tr>
|
480
|
+
<td align="center">\p{xx}</td>
|
481
|
+
<td>a character with the xx property</td>
|
482
|
+
</tr>
|
483
|
+
<tr>
|
484
|
+
<td align="center">\P{xx}</td>
|
485
|
+
<td>a character without the xx property</td>
|
486
|
+
</tr>
|
487
|
+
<tr>
|
488
|
+
<td align="center">\X</td>
|
489
|
+
<td>an extended Unicode sequence</td>
|
490
|
+
</tr>
|
491
|
+
</tbody>
|
492
|
+
</table></div>
|
493
|
+
</div>
|
494
|
+
<br class="table-break"><p>
|
495
|
+
The property names represented by xx above are limited to the Unicode
|
496
|
+
script names, the general category properties, and "Any", which matches
|
497
|
+
any character (including newline). Other properties such as "InMusicalSymbols"
|
498
|
+
are not currently supported. Note that \P{Any} does not match any characters,
|
499
|
+
so always causes a match failure.
|
500
|
+
</p>
|
501
|
+
<p>
|
502
|
+
Sets of Unicode characters are defined as belonging to certain scripts. A
|
503
|
+
character from one of these sets can be matched using a script name. For
|
504
|
+
example, \p{Greek} or \P{Han}.
|
505
|
+
</p>
|
506
|
+
<p>
|
507
|
+
Those that are not part of an identified script are lumped together as
|
508
|
+
"Common". The current list of scripts is:
|
509
|
+
</p>
|
510
|
+
<div class="itemizedlist"><ul class="itemizedlist" type="disc">
|
511
|
+
<li class="listitem"><p>Arabic</p></li>
|
512
|
+
<li class="listitem"><p>Armenian</p></li>
|
513
|
+
<li class="listitem"><p>Balinese</p></li>
|
514
|
+
<li class="listitem"><p>Bengali</p></li>
|
515
|
+
<li class="listitem"><p>Bopomofo</p></li>
|
516
|
+
<li class="listitem"><p>Braille</p></li>
|
517
|
+
<li class="listitem"><p>Buginese</p></li>
|
518
|
+
<li class="listitem"><p>Buhid</p></li>
|
519
|
+
<li class="listitem"><p>Canadian_Aboriginal</p></li>
|
520
|
+
<li class="listitem"><p>Cherokee</p></li>
|
521
|
+
<li class="listitem"><p>Common</p></li>
|
522
|
+
<li class="listitem"><p>Coptic</p></li>
|
523
|
+
<li class="listitem"><p>Cuneiform</p></li>
|
524
|
+
<li class="listitem"><p>Cypriot</p></li>
|
525
|
+
<li class="listitem"><p>Cyrillic</p></li>
|
526
|
+
<li class="listitem"><p>Deseret</p></li>
|
527
|
+
<li class="listitem"><p>Devanagari</p></li>
|
528
|
+
<li class="listitem"><p>Ethiopic</p></li>
|
529
|
+
<li class="listitem"><p>Georgian</p></li>
|
530
|
+
<li class="listitem"><p>Glagolitic</p></li>
|
531
|
+
<li class="listitem"><p>Gothic</p></li>
|
532
|
+
<li class="listitem"><p>Greek</p></li>
|
533
|
+
<li class="listitem"><p>Gujarati</p></li>
|
534
|
+
<li class="listitem"><p>Gurmukhi</p></li>
|
535
|
+
<li class="listitem"><p>Han</p></li>
|
536
|
+
<li class="listitem"><p>Hangul</p></li>
|
537
|
+
<li class="listitem"><p>Hanunoo</p></li>
|
538
|
+
<li class="listitem"><p>Hebrew</p></li>
|
539
|
+
<li class="listitem"><p>Hiragana</p></li>
|
540
|
+
<li class="listitem"><p>Inherited</p></li>
|
541
|
+
<li class="listitem"><p>Kannada</p></li>
|
542
|
+
<li class="listitem"><p>Katakana</p></li>
|
543
|
+
<li class="listitem"><p>Kharoshthi</p></li>
|
544
|
+
<li class="listitem"><p>Khmer</p></li>
|
545
|
+
<li class="listitem"><p>Lao</p></li>
|
546
|
+
<li class="listitem"><p>Latin</p></li>
|
547
|
+
<li class="listitem"><p>Limbu</p></li>
|
548
|
+
<li class="listitem"><p>Linear_B</p></li>
|
549
|
+
<li class="listitem"><p>Malayalam</p></li>
|
550
|
+
<li class="listitem"><p>Mongolian</p></li>
|
551
|
+
<li class="listitem"><p>Myanmar</p></li>
|
552
|
+
<li class="listitem"><p>New_Tai_Lue</p></li>
|
553
|
+
<li class="listitem"><p>Nko</p></li>
|
554
|
+
<li class="listitem"><p>Ogham</p></li>
|
555
|
+
<li class="listitem"><p>Old_Italic</p></li>
|
556
|
+
<li class="listitem"><p>Old_Persian</p></li>
|
557
|
+
<li class="listitem"><p>Oriya</p></li>
|
558
|
+
<li class="listitem"><p>Osmanya</p></li>
|
559
|
+
<li class="listitem"><p>Phags_Pa</p></li>
|
560
|
+
<li class="listitem"><p>Phoenician</p></li>
|
561
|
+
<li class="listitem"><p>Runic</p></li>
|
562
|
+
<li class="listitem"><p>Shavian</p></li>
|
563
|
+
<li class="listitem"><p>Sinhala</p></li>
|
564
|
+
<li class="listitem"><p>Syloti_Nagri</p></li>
|
565
|
+
<li class="listitem"><p>Syriac</p></li>
|
566
|
+
<li class="listitem"><p>Tagalog</p></li>
|
567
|
+
<li class="listitem"><p>Tagbanwa</p></li>
|
568
|
+
<li class="listitem"><p>Tai_Le</p></li>
|
569
|
+
<li class="listitem"><p>Tamil</p></li>
|
570
|
+
<li class="listitem"><p>Telugu</p></li>
|
571
|
+
<li class="listitem"><p>Thaana</p></li>
|
572
|
+
<li class="listitem"><p>Thai</p></li>
|
573
|
+
<li class="listitem"><p>Tibetan</p></li>
|
574
|
+
<li class="listitem"><p>Tifinagh</p></li>
|
575
|
+
<li class="listitem"><p>Ugaritic</p></li>
|
576
|
+
<li class="listitem"><p>Yi</p></li>
|
577
|
+
</ul></div>
|
578
|
+
<p>
|
579
|
+
Each character has exactly one general category property, specified by a
|
580
|
+
two-letter abbreviation. For compatibility with Perl, negation can be specified
|
581
|
+
by including a circumflex between the opening brace and the property name. For
|
582
|
+
example, \p{^Lu} is the same as \P{Lu}.
|
583
|
+
</p>
|
584
|
+
<p>
|
585
|
+
If only one letter is specified with \p or \P, it includes all the general
|
586
|
+
category properties that start with that letter. In this case, in the absence
|
587
|
+
of negation, the curly brackets in the escape sequence are optional; these two
|
588
|
+
examples have the same effect:
|
589
|
+
</p>
|
590
|
+
<pre class="programlisting">
|
591
|
+
\p{L}
|
592
|
+
\pL
|
593
|
+
</pre>
|
594
|
+
<p>
|
595
|
+
The following general category property codes are supported:
|
596
|
+
</p>
|
597
|
+
<div class="table">
|
598
|
+
<a name="id555812"></a><p class="title"><b>Table 7. Property codes</b></p>
|
599
|
+
<div class="table-contents"><table summary="Property codes" border="1">
|
600
|
+
<colgroup>
|
601
|
+
<col align="center">
|
602
|
+
<col>
|
603
|
+
</colgroup>
|
604
|
+
<thead><tr>
|
605
|
+
<th align="center">Code</th>
|
606
|
+
<th>Meaning</th>
|
607
|
+
</tr></thead>
|
608
|
+
<tbody>
|
609
|
+
<tr>
|
610
|
+
<td align="center">C</td>
|
611
|
+
<td>Other</td>
|
612
|
+
</tr>
|
613
|
+
<tr>
|
614
|
+
<td align="center">Cc</td>
|
615
|
+
<td>Control</td>
|
616
|
+
</tr>
|
617
|
+
<tr>
|
618
|
+
<td align="center">Cf</td>
|
619
|
+
<td>Format</td>
|
620
|
+
</tr>
|
621
|
+
<tr>
|
622
|
+
<td align="center">Cn</td>
|
623
|
+
<td>Unassigned</td>
|
624
|
+
</tr>
|
625
|
+
<tr>
|
626
|
+
<td align="center">Co</td>
|
627
|
+
<td>Private use</td>
|
628
|
+
</tr>
|
629
|
+
<tr>
|
630
|
+
<td align="center">Cs</td>
|
631
|
+
<td>Surrogate</td>
|
632
|
+
</tr>
|
633
|
+
<tr>
|
634
|
+
<td align="center">L</td>
|
635
|
+
<td>Letter</td>
|
636
|
+
</tr>
|
637
|
+
<tr>
|
638
|
+
<td align="center">Ll</td>
|
639
|
+
<td>Lower case letter</td>
|
640
|
+
</tr>
|
641
|
+
<tr>
|
642
|
+
<td align="center">Lm</td>
|
643
|
+
<td>Modifier letter</td>
|
644
|
+
</tr>
|
645
|
+
<tr>
|
646
|
+
<td align="center">Lo</td>
|
647
|
+
<td>Other letter</td>
|
648
|
+
</tr>
|
649
|
+
<tr>
|
650
|
+
<td align="center">Lt</td>
|
651
|
+
<td>Title case letter</td>
|
652
|
+
</tr>
|
653
|
+
<tr>
|
654
|
+
<td align="center">Lu</td>
|
655
|
+
<td>Upper case letter</td>
|
656
|
+
</tr>
|
657
|
+
<tr>
|
658
|
+
<td align="center">M</td>
|
659
|
+
<td>Mark</td>
|
660
|
+
</tr>
|
661
|
+
<tr>
|
662
|
+
<td align="center">Mc</td>
|
663
|
+
<td>Spacing mark</td>
|
664
|
+
</tr>
|
665
|
+
<tr>
|
666
|
+
<td align="center">Me</td>
|
667
|
+
<td>Enclosing mark</td>
|
668
|
+
</tr>
|
669
|
+
<tr>
|
670
|
+
<td align="center">Mn</td>
|
671
|
+
<td>Non-spacing mark</td>
|
672
|
+
</tr>
|
673
|
+
<tr>
|
674
|
+
<td align="center">N</td>
|
675
|
+
<td>Number</td>
|
676
|
+
</tr>
|
677
|
+
<tr>
|
678
|
+
<td align="center">Nd</td>
|
679
|
+
<td>Decimal number</td>
|
680
|
+
</tr>
|
681
|
+
<tr>
|
682
|
+
<td align="center">Nl</td>
|
683
|
+
<td>Letter number</td>
|
684
|
+
</tr>
|
685
|
+
<tr>
|
686
|
+
<td align="center">No</td>
|
687
|
+
<td>Other number</td>
|
688
|
+
</tr>
|
689
|
+
<tr>
|
690
|
+
<td align="center">P</td>
|
691
|
+
<td>Punctuation</td>
|
692
|
+
</tr>
|
693
|
+
<tr>
|
694
|
+
<td align="center">Pc</td>
|
695
|
+
<td>Connector punctuation</td>
|
696
|
+
</tr>
|
697
|
+
<tr>
|
698
|
+
<td align="center">Pd</td>
|
699
|
+
<td>Dash punctuation</td>
|
700
|
+
</tr>
|
701
|
+
<tr>
|
702
|
+
<td align="center">Pe</td>
|
703
|
+
<td>Close punctuation</td>
|
704
|
+
</tr>
|
705
|
+
<tr>
|
706
|
+
<td align="center">Pf</td>
|
707
|
+
<td>Final punctuation</td>
|
708
|
+
</tr>
|
709
|
+
<tr>
|
710
|
+
<td align="center">Pi</td>
|
711
|
+
<td>Initial punctuation</td>
|
712
|
+
</tr>
|
713
|
+
<tr>
|
714
|
+
<td align="center">Po</td>
|
715
|
+
<td>Other punctuation</td>
|
716
|
+
</tr>
|
717
|
+
<tr>
|
718
|
+
<td align="center">Ps</td>
|
719
|
+
<td>Open punctuation</td>
|
720
|
+
</tr>
|
721
|
+
<tr>
|
722
|
+
<td align="center">S</td>
|
723
|
+
<td>Symbol</td>
|
724
|
+
</tr>
|
725
|
+
<tr>
|
726
|
+
<td align="center">Sc</td>
|
727
|
+
<td>Currency symbol</td>
|
728
|
+
</tr>
|
729
|
+
<tr>
|
730
|
+
<td align="center">Sk</td>
|
731
|
+
<td>Modifier symbol</td>
|
732
|
+
</tr>
|
733
|
+
<tr>
|
734
|
+
<td align="center">Sm</td>
|
735
|
+
<td>Mathematical symbol</td>
|
736
|
+
</tr>
|
737
|
+
<tr>
|
738
|
+
<td align="center">So</td>
|
739
|
+
<td>Other symbol</td>
|
740
|
+
</tr>
|
741
|
+
<tr>
|
742
|
+
<td align="center">Z</td>
|
743
|
+
<td>Separator</td>
|
744
|
+
</tr>
|
745
|
+
<tr>
|
746
|
+
<td align="center">Zl</td>
|
747
|
+
<td>Line separator</td>
|
748
|
+
</tr>
|
749
|
+
<tr>
|
750
|
+
<td align="center">Zp</td>
|
751
|
+
<td>Paragraph separator</td>
|
752
|
+
</tr>
|
753
|
+
<tr>
|
754
|
+
<td align="center">Zs</td>
|
755
|
+
<td>Space separator</td>
|
756
|
+
</tr>
|
757
|
+
</tbody>
|
758
|
+
</table></div>
|
759
|
+
</div>
|
760
|
+
<br class="table-break"><p>
|
761
|
+
The special property L& is also supported: it matches a character that has
|
762
|
+
the Lu, Ll, or Lt property, in other words, a letter that is not classified as
|
763
|
+
a modifier or "other".
|
764
|
+
</p>
|
765
|
+
<p>
|
766
|
+
The long synonyms for these properties that Perl supports (such as \ep{Letter})
|
767
|
+
are not supported by GRegex, nor is it permitted to prefix any of these
|
768
|
+
properties with "Is".
|
769
|
+
</p>
|
770
|
+
<p>
|
771
|
+
No character that is in the Unicode table has the Cn (unassigned) property.
|
772
|
+
Instead, this property is assumed for any code point that is not in the
|
773
|
+
Unicode table.
|
774
|
+
</p>
|
775
|
+
<p>
|
776
|
+
Specifying caseless matching does not affect these escape sequences.
|
777
|
+
For example, \p{Lu} always matches only upper case letters.
|
778
|
+
</p>
|
779
|
+
<p>
|
780
|
+
The \X escape matches any number of Unicode characters that form an
|
781
|
+
extended Unicode sequence. \X is equivalent to
|
782
|
+
</p>
|
783
|
+
<pre class="programlisting">
|
784
|
+
(?>\PM\pM*)
|
785
|
+
</pre>
|
786
|
+
<p>
|
787
|
+
That is, it matches a character without the "mark" property, followed
|
788
|
+
by zero or more characters with the "mark" property, and treats the
|
789
|
+
sequence as an atomic group (see below). Characters with the "mark"
|
790
|
+
property are typically accents that affect the preceding character.
|
791
|
+
</p>
|
792
|
+
<p>
|
793
|
+
Matching characters by Unicode property is not fast, because GRegex has
|
794
|
+
to search a structure that contains data for over fifteen thousand
|
795
|
+
characters. That is why the traditional escape sequences such as \d and
|
796
|
+
\w do not use Unicode properties.
|
797
|
+
</p>
|
798
|
+
</div>
|
799
|
+
<hr>
|
800
|
+
<div class="refsect2" title="Simple assertions">
|
801
|
+
<a name="id556316"></a><h3>Simple assertions</h3>
|
802
|
+
<p>
|
803
|
+
The final use of backslash is for certain simple assertions. An
|
804
|
+
assertion specifies a condition that has to be met at a particular point in
|
805
|
+
a match, without consuming any characters from the string. The
|
806
|
+
use of subpatterns for more complicated assertions is described below.
|
807
|
+
The backslashed assertions are:
|
808
|
+
</p>
|
809
|
+
<div class="table">
|
810
|
+
<a name="id556328"></a><p class="title"><b>Table 8. Simple assertions</b></p>
|
811
|
+
<div class="table-contents"><table summary="Simple assertions" border="1">
|
812
|
+
<colgroup>
|
813
|
+
<col align="center">
|
814
|
+
<col>
|
815
|
+
</colgroup>
|
816
|
+
<thead><tr>
|
817
|
+
<th align="center">Escape</th>
|
818
|
+
<th>Meaning</th>
|
819
|
+
</tr></thead>
|
820
|
+
<tbody>
|
821
|
+
<tr>
|
822
|
+
<td align="center">\b</td>
|
823
|
+
<td>matches at a word boundary</td>
|
824
|
+
</tr>
|
825
|
+
<tr>
|
826
|
+
<td align="center">\B</td>
|
827
|
+
<td>matches when not at a word boundary</td>
|
828
|
+
</tr>
|
829
|
+
<tr>
|
830
|
+
<td align="center">\A</td>
|
831
|
+
<td>matches at the start of the string</td>
|
832
|
+
</tr>
|
833
|
+
<tr>
|
834
|
+
<td align="center">\Z</td>
|
835
|
+
<td>matches at the end of the string or before a newline at the end of the string</td>
|
836
|
+
</tr>
|
837
|
+
<tr>
|
838
|
+
<td align="center">\z</td>
|
839
|
+
<td>matches only at the end of the string</td>
|
840
|
+
</tr>
|
841
|
+
<tr>
|
842
|
+
<td align="center">\G</td>
|
843
|
+
<td>matches at first matching position in the string</td>
|
844
|
+
</tr>
|
845
|
+
</tbody>
|
846
|
+
</table></div>
|
847
|
+
</div>
|
848
|
+
<br class="table-break"><p>
|
849
|
+
These assertions may not appear in character classes (but note that \b
|
850
|
+
has a different meaning, namely the backspace character, inside a
|
851
|
+
character class).
|
852
|
+
</p>
|
853
|
+
<p>
|
854
|
+
A word boundary is a position in the string where the current
|
855
|
+
character and the previous character do not both match \w or \W (i.e.
|
856
|
+
one matches \w and the other matches \W), or the start or end of the
|
857
|
+
string if the first or last character matches \w, respectively.
|
858
|
+
</p>
|
859
|
+
<p>
|
860
|
+
The \A, \Z, and \z assertions differ from the traditional circumflex
|
861
|
+
and dollar (described in the next section) in that they only ever match
|
862
|
+
at the very start and end of the string, whatever options are
|
863
|
+
set. Thus, they are independent of multiline mode. These three assertions
|
864
|
+
are not affected by the <code class="varname">G_REGEX_MATCH_NOTBOL</code> or <code class="varname">G_REGEX_MATCH_NOTEOL</code> options,
|
865
|
+
which affect only the behaviour of the circumflex and dollar metacharacters.
|
866
|
+
However, if the start_position argument of a matching function is non-zero,
|
867
|
+
indicating that matching is to start at a point other than the beginning of
|
868
|
+
the string, \A can never match. The difference between \Z and \z is
|
869
|
+
that \Z matches before a newline at the end of the string as well at the
|
870
|
+
very end, whereas \z matches only at the end.
|
871
|
+
</p>
|
872
|
+
<p>
|
873
|
+
The \G assertion is true only when the current matching position is at
|
874
|
+
the start point of the match, as specified by the start_position argument
|
875
|
+
to the matching functions. It differs from \A when the value of startoffset is
|
876
|
+
non-zero.
|
877
|
+
</p>
|
878
|
+
<p>
|
879
|
+
Note, however, that the interpretation of \G, as the start of the
|
880
|
+
current match, is subtly different from Perl’s, which defines it as the
|
881
|
+
end of the previous match. In Perl, these can be different when the
|
882
|
+
previously matched string was empty.
|
883
|
+
</p>
|
884
|
+
<p>
|
885
|
+
If all the alternatives of a pattern begin with \G, the expression is
|
886
|
+
anchored to the starting match position, and the "anchored" flag is set
|
887
|
+
in the compiled regular expression.
|
888
|
+
</p>
|
889
|
+
</div>
|
890
|
+
</div>
|
891
|
+
<div class="refsect1" title="Circumflex and dollar">
|
892
|
+
<a name="id550608"></a><h2>Circumflex and dollar</h2>
|
893
|
+
<p>
|
894
|
+
Outside a character class, in the default matching mode, the circumflex
|
895
|
+
character is an assertion that is true only if the current matching
|
896
|
+
point is at the start of the string. If the start_position argument to
|
897
|
+
the matching functions is non-zero, circumflex can never match if the
|
898
|
+
<code class="varname">G_REGEX_MULTILINE</code> option is unset. Inside a character class, circumflex
|
899
|
+
has an entirely different meaning (see below).
|
900
|
+
</p>
|
901
|
+
<p>
|
902
|
+
Circumflex need not be the first character of the pattern if a number
|
903
|
+
of alternatives are involved, but it should be the first thing in each
|
904
|
+
alternative in which it appears if the pattern is ever to match that
|
905
|
+
branch. If all possible alternatives start with a circumflex, that is,
|
906
|
+
if the pattern is constrained to match only at the start of the string,
|
907
|
+
it is said to be an "anchored" pattern. (There are also other
|
908
|
+
constructs that can cause a pattern to be anchored.)
|
909
|
+
</p>
|
910
|
+
<p>
|
911
|
+
A dollar character is an assertion that is true only if the current
|
912
|
+
matching point is at the end of the string, or immediately
|
913
|
+
before a newline at the end of the string (by default). Dollar need not
|
914
|
+
be the last character of the pattern if a number of alternatives are
|
915
|
+
involved, but it should be the last item in any branch in which it
|
916
|
+
appears. Dollar has no special meaning in a character class.
|
917
|
+
</p>
|
918
|
+
<p>
|
919
|
+
The meaning of dollar can be changed so that it matches only at the
|
920
|
+
very end of the string, by setting the <code class="varname">G_REGEX_DOLLAR_ENDONLY</code> option at
|
921
|
+
compile time. This does not affect the \Z assertion.
|
922
|
+
</p>
|
923
|
+
<p>
|
924
|
+
The meanings of the circumflex and dollar characters are changed if the
|
925
|
+
<code class="varname">G_REGEX_MULTILINE</code> option is set. When this is the case,
|
926
|
+
a circumflex matches immediately after internal newlines as well as at the
|
927
|
+
start of the string. It does not match after a newline that ends the string.
|
928
|
+
A dollar matches before any newlines in the string, as well as at the very
|
929
|
+
end, when <code class="varname">G_REGEX_MULTILINE</code> is set. When newline is
|
930
|
+
specified as the two-character sequence CRLF, isolated CR and LF characters
|
931
|
+
do not indicate newlines.
|
932
|
+
</p>
|
933
|
+
<p>
|
934
|
+
For example, the pattern /^abc$/ matches the string "def\nabc" (where
|
935
|
+
\n represents a newline) in multiline mode, but not otherwise. Consequently,
|
936
|
+
patterns that are anchored in single line mode because all branches start with
|
937
|
+
^ are not anchored in multiline mode, and a match for circumflex is possible
|
938
|
+
when the <code class="varname">start_position</code> argument of a matching function
|
939
|
+
is non-zero. The <code class="varname">G_REGEX_DOLLAR_ENDONLY</code> option is ignored
|
940
|
+
if <code class="varname">G_REGEX_MULTILINE</code> is set.
|
941
|
+
</p>
|
942
|
+
<p>
|
943
|
+
Note that the sequences \A, \Z, and \z can be used to match the start and
|
944
|
+
end of the string in both modes, and if all branches of a pattern start with
|
945
|
+
\A it is always anchored, whether or not <code class="varname">G_REGEX_MULTILINE</code>
|
946
|
+
is set.
|
947
|
+
</p>
|
948
|
+
</div>
|
949
|
+
<div class="refsect1" title="Full stop (period, dot)">
|
950
|
+
<a name="id550689"></a><h2>Full stop (period, dot)</h2>
|
951
|
+
<p>
|
952
|
+
Outside a character class, a dot in the pattern matches any one character
|
953
|
+
in the string, including a non-printing character, but not (by
|
954
|
+
default) newline. In UTF-8 a character might be more than one byte long.
|
955
|
+
</p>
|
956
|
+
<p>
|
957
|
+
When a line ending is defined as a single character, dot never matches that
|
958
|
+
character; when the two-character sequence CRLF is used, dot does not match CR
|
959
|
+
if it is immediately followed by LF, but otherwise it matches all characters
|
960
|
+
(including isolated CRs and LFs). When any Unicode line endings are being
|
961
|
+
recognized, dot does not match CR or LF or any of the other line ending
|
962
|
+
characters.
|
963
|
+
</p>
|
964
|
+
<p>
|
965
|
+
If the <code class="varname">G_REGEX_DOTALL</code> flag is set, dots match newlines
|
966
|
+
as well. The handling of dot is entirely independent of the handling of circumflex
|
967
|
+
and dollar, the only relationship being that they both involve newline
|
968
|
+
characters. Dot has no special meaning in a character class.
|
969
|
+
</p>
|
970
|
+
<p>
|
971
|
+
The behaviour of dot with regard to newlines can be changed. If the
|
972
|
+
<code class="varname">G_REGEX_DOTALL</code> option is set, a dot matches any one
|
973
|
+
character, without exception. If newline is defined as the two-character
|
974
|
+
sequence CRLF, it takes two dots to match it.
|
975
|
+
</p>
|
976
|
+
<p>
|
977
|
+
The handling of dot is entirely independent of the handling of circumflex and
|
978
|
+
dollar, the only relationship being that they both involve newlines. Dot has no
|
979
|
+
special meaning in a character class.
|
980
|
+
</p>
|
981
|
+
</div>
|
982
|
+
<div class="refsect1" title="Matching a single byte">
|
983
|
+
<a name="id550732"></a><h2>Matching a single byte</h2>
|
984
|
+
<p>
|
985
|
+
Outside a character class, the escape sequence \C matches any one byte,
|
986
|
+
both in and out of UTF-8 mode. Unlike a dot, it always matches any line
|
987
|
+
ending characters.
|
988
|
+
The feature is provided in Perl in order to match individual bytes in
|
989
|
+
UTF-8 mode. Because it breaks up UTF-8 characters into individual
|
990
|
+
bytes, what remains in the string may be a malformed UTF-8 string. For
|
991
|
+
this reason, the \C escape sequence is best avoided.
|
992
|
+
</p>
|
993
|
+
<p>
|
994
|
+
GRegex does not allow \C to appear in lookbehind assertions (described
|
995
|
+
below), because in UTF-8 mode this would make it impossible to calculate
|
996
|
+
the length of the lookbehind.
|
997
|
+
</p>
|
998
|
+
</div>
|
999
|
+
<div class="refsect1" title="Square brackets and character classes">
|
1000
|
+
<a name="id550752"></a><h2>Square brackets and character classes</h2>
|
1001
|
+
<p>
|
1002
|
+
An opening square bracket introduces a character class, terminated by a
|
1003
|
+
closing square bracket. A closing square bracket on its own is not special. If a closing square bracket is required as a member of the class,
|
1004
|
+
it should be the first data character in the class (after an initial
|
1005
|
+
circumflex, if present) or escaped with a backslash.
|
1006
|
+
</p>
|
1007
|
+
<p>
|
1008
|
+
A character class matches a single character in the string. A matched character
|
1009
|
+
must be in the set of characters defined by the class, unless the first
|
1010
|
+
character in the class definition is a circumflex, in which case the
|
1011
|
+
string character must not be in the set defined by the class. If a
|
1012
|
+
circumflex is actually required as a member of the class, ensure it is
|
1013
|
+
not the first character, or escape it with a backslash.
|
1014
|
+
</p>
|
1015
|
+
<p>
|
1016
|
+
For example, the character class [aeiou] matches any lower case vowel,
|
1017
|
+
while [^aeiou] matches any character that is not a lower case vowel.
|
1018
|
+
Note that a circumflex is just a convenient notation for specifying the
|
1019
|
+
characters that are in the class by enumerating those that are not. A
|
1020
|
+
class that starts with a circumflex is not an assertion: it still consumes
|
1021
|
+
a character from the string, and therefore it fails if the current pointer
|
1022
|
+
is at the end of the string.
|
1023
|
+
</p>
|
1024
|
+
<p>
|
1025
|
+
In UTF-8 mode, characters with values greater than 255 can be included
|
1026
|
+
in a class as a literal string of bytes, or by using the \x{ escaping
|
1027
|
+
mechanism.
|
1028
|
+
</p>
|
1029
|
+
<p>
|
1030
|
+
When caseless matching is set, any letters in a class represent both
|
1031
|
+
their upper case and lower case versions, so for example, a caseless
|
1032
|
+
[aeiou] matches "A" as well as "a", and a caseless [^aeiou] does not
|
1033
|
+
match "A", whereas a caseful version would.
|
1034
|
+
</p>
|
1035
|
+
<p>
|
1036
|
+
Characters that might indicate line breaks are never treated
|
1037
|
+
in any special way when matching character classes, whatever line-ending
|
1038
|
+
sequence is in use, and whatever setting of the <code class="varname">G_REGEX_DOTALL</code>
|
1039
|
+
and <code class="varname">G_REGEX_MULTILINE</code> options is used. A class such as [^a]
|
1040
|
+
always matches one of these characters.
|
1041
|
+
</p>
|
1042
|
+
<p>
|
1043
|
+
The minus (hyphen) character can be used to specify a range of characters in
|
1044
|
+
a character class. For example, [d-m] matches any letter
|
1045
|
+
between d and m, inclusive. If a minus character is required in a
|
1046
|
+
class, it must be escaped with a backslash or appear in a position
|
1047
|
+
where it cannot be interpreted as indicating a range, typically as the
|
1048
|
+
first or last character in the class.
|
1049
|
+
</p>
|
1050
|
+
<p>
|
1051
|
+
It is not possible to have the literal character "]" as the end character
|
1052
|
+
of a range. A pattern such as [W-]46] is interpreted as a class of
|
1053
|
+
two characters ("W" and "-") followed by a literal string "46]", so it
|
1054
|
+
would match "W46]" or "-46]". However, if the "]" is escaped with a
|
1055
|
+
backslash it is interpreted as the end of range, so [W-\]46] is interpreted
|
1056
|
+
as a class containing a range followed by two other characters.
|
1057
|
+
The octal or hexadecimal representation of "]" can also be used to end
|
1058
|
+
a range.
|
1059
|
+
</p>
|
1060
|
+
<p>
|
1061
|
+
Ranges operate in the collating sequence of character values. They can
|
1062
|
+
also be used for characters specified numerically, for example
|
1063
|
+
[\000-\037]. In UTF-8 mode, ranges can include characters whose values
|
1064
|
+
are greater than 255, for example [\x{100}-\x{2ff}].
|
1065
|
+
</p>
|
1066
|
+
<p>
|
1067
|
+
The character types \d, \D, \p, \P, \s, \S, \w, and \W may also appear
|
1068
|
+
in a character class, and add the characters that they match to the
|
1069
|
+
class. For example, [\dABCDEF] matches any hexadecimal digit. A
|
1070
|
+
circumflex can conveniently be used with the upper case character types to
|
1071
|
+
specify a more restricted set of characters than the matching lower
|
1072
|
+
case type. For example, the class [^\W_] matches any letter or digit,
|
1073
|
+
but not underscore.
|
1074
|
+
</p>
|
1075
|
+
<p>
|
1076
|
+
The only metacharacters that are recognized in character classes are
|
1077
|
+
backslash, hyphen (only where it can be interpreted as specifying a
|
1078
|
+
range), circumflex (only at the start), opening square bracket (only
|
1079
|
+
when it can be interpreted as introducing a POSIX class name - see the
|
1080
|
+
next section), and the terminating closing square bracket. However,
|
1081
|
+
escaping other non-alphanumeric characters does no harm.
|
1082
|
+
</p>
|
1083
|
+
</div>
|
1084
|
+
<div class="refsect1" title="Posix character classes">
|
1085
|
+
<a name="id550838"></a><h2>Posix character classes</h2>
|
1086
|
+
<p>
|
1087
|
+
GRegex supports the POSIX notation for character classes. This uses names
|
1088
|
+
enclosed by [: and :] within the enclosing square brackets. For example,
|
1089
|
+
</p>
|
1090
|
+
<pre class="programlisting">
|
1091
|
+
[01[:alpha:]%]
|
1092
|
+
</pre>
|
1093
|
+
<p>
|
1094
|
+
matches "0", "1", any alphabetic character, or "%". The supported class
|
1095
|
+
names are
|
1096
|
+
</p>
|
1097
|
+
<div class="table">
|
1098
|
+
<a name="id550858"></a><p class="title"><b>Table 9. Posix classes</b></p>
|
1099
|
+
<div class="table-contents"><table summary="Posix classes" border="1">
|
1100
|
+
<colgroup>
|
1101
|
+
<col align="center">
|
1102
|
+
<col>
|
1103
|
+
</colgroup>
|
1104
|
+
<thead><tr>
|
1105
|
+
<th align="center">Name</th>
|
1106
|
+
<th>Meaning</th>
|
1107
|
+
</tr></thead>
|
1108
|
+
<tbody>
|
1109
|
+
<tr>
|
1110
|
+
<td align="center">alnum</td>
|
1111
|
+
<td>letters and digits</td>
|
1112
|
+
</tr>
|
1113
|
+
<tr>
|
1114
|
+
<td align="center">alpha</td>
|
1115
|
+
<td>letters</td>
|
1116
|
+
</tr>
|
1117
|
+
<tr>
|
1118
|
+
<td align="center">ascii</td>
|
1119
|
+
<td>character codes 0 - 127</td>
|
1120
|
+
</tr>
|
1121
|
+
<tr>
|
1122
|
+
<td align="center">blank</td>
|
1123
|
+
<td>space or tab only</td>
|
1124
|
+
</tr>
|
1125
|
+
<tr>
|
1126
|
+
<td align="center">cntrl</td>
|
1127
|
+
<td>control characters</td>
|
1128
|
+
</tr>
|
1129
|
+
<tr>
|
1130
|
+
<td align="center">digit</td>
|
1131
|
+
<td>decimal digits (same as \d)</td>
|
1132
|
+
</tr>
|
1133
|
+
<tr>
|
1134
|
+
<td align="center">graph</td>
|
1135
|
+
<td>printing characters, excluding space</td>
|
1136
|
+
</tr>
|
1137
|
+
<tr>
|
1138
|
+
<td align="center">lower</td>
|
1139
|
+
<td>lower case letters</td>
|
1140
|
+
</tr>
|
1141
|
+
<tr>
|
1142
|
+
<td align="center">print</td>
|
1143
|
+
<td>printing characters, including space</td>
|
1144
|
+
</tr>
|
1145
|
+
<tr>
|
1146
|
+
<td align="center">punct</td>
|
1147
|
+
<td>printing characters, excluding letters and digits</td>
|
1148
|
+
</tr>
|
1149
|
+
<tr>
|
1150
|
+
<td align="center">space</td>
|
1151
|
+
<td>white space (not quite the same as \s)</td>
|
1152
|
+
</tr>
|
1153
|
+
<tr>
|
1154
|
+
<td align="center">upper</td>
|
1155
|
+
<td>upper case letters</td>
|
1156
|
+
</tr>
|
1157
|
+
<tr>
|
1158
|
+
<td align="center">word</td>
|
1159
|
+
<td>"word" characters (same as \w)</td>
|
1160
|
+
</tr>
|
1161
|
+
<tr>
|
1162
|
+
<td align="center">xdigit</td>
|
1163
|
+
<td>hexadecimal digits</td>
|
1164
|
+
</tr>
|
1165
|
+
</tbody>
|
1166
|
+
</table></div>
|
1167
|
+
</div>
|
1168
|
+
<br class="table-break"><p>
|
1169
|
+
The "space" characters are HT (9), LF (10), VT (11), FF (12), CR (13),
|
1170
|
+
and space (32). Notice that this list includes the VT character (code
|
1171
|
+
11). This makes "space" different to \s, which does not include VT (for
|
1172
|
+
Perl compatibility).
|
1173
|
+
</p>
|
1174
|
+
<p>
|
1175
|
+
The name "word" is a Perl extension, and "blank" is a GNU extension.
|
1176
|
+
Another Perl extension is negation, which is indicated by a ^ character
|
1177
|
+
after the colon. For example,
|
1178
|
+
</p>
|
1179
|
+
<pre class="programlisting">
|
1180
|
+
[12[:^digit:]]
|
1181
|
+
</pre>
|
1182
|
+
<p>
|
1183
|
+
matches "1", "2", or any non-digit. GRegex also recognize the
|
1184
|
+
POSIX syntax [.ch.] and [=ch=] where "ch" is a "collating element", but
|
1185
|
+
these are not supported, and an error is given if they are encountered.
|
1186
|
+
</p>
|
1187
|
+
<p>
|
1188
|
+
In UTF-8 mode, characters with values greater than 128 do not match any
|
1189
|
+
of the POSIX character classes.
|
1190
|
+
</p>
|
1191
|
+
</div>
|
1192
|
+
<div class="refsect1" title="Vertical bar">
|
1193
|
+
<a name="id551089"></a><h2>Vertical bar</h2>
|
1194
|
+
<p>
|
1195
|
+
Vertical bar characters are used to separate alternative patterns. For
|
1196
|
+
example, the pattern
|
1197
|
+
</p>
|
1198
|
+
<pre class="programlisting">
|
1199
|
+
gilbert|sullivan
|
1200
|
+
</pre>
|
1201
|
+
<p>
|
1202
|
+
matches either "gilbert" or "sullivan". Any number of alternatives may
|
1203
|
+
appear, and an empty alternative is permitted (matching the empty
|
1204
|
+
string). The matching process tries each alternative in turn, from
|
1205
|
+
left to right, and the first one that succeeds is used. If the alternatives are within a subpattern (defined below), "succeeds" means matching the rest of the main pattern as well as the alternative in the subpattern.
|
1206
|
+
</p>
|
1207
|
+
</div>
|
1208
|
+
<div class="refsect1" title="Internal option setting">
|
1209
|
+
<a name="id551114"></a><h2>Internal option setting</h2>
|
1210
|
+
<p>
|
1211
|
+
The settings of the <code class="varname">G_REGEX_CASELESS</code>, <code class="varname">G_REGEX_MULTILINE</code>, <code class="varname">G_REGEX_MULTILINE</code>,
|
1212
|
+
and <code class="varname">G_REGEX_EXTENDED</code> options can be changed from within the pattern by a
|
1213
|
+
sequence of Perl-style option letters enclosed between "(?" and ")". The
|
1214
|
+
option letters are
|
1215
|
+
</p>
|
1216
|
+
<div class="table">
|
1217
|
+
<a name="id551140"></a><p class="title"><b>Table 10. Option settings</b></p>
|
1218
|
+
<div class="table-contents"><table summary="Option settings" border="1">
|
1219
|
+
<colgroup>
|
1220
|
+
<col align="center">
|
1221
|
+
<col>
|
1222
|
+
</colgroup>
|
1223
|
+
<thead><tr>
|
1224
|
+
<th align="center">Option</th>
|
1225
|
+
<th>Flag</th>
|
1226
|
+
</tr></thead>
|
1227
|
+
<tbody>
|
1228
|
+
<tr>
|
1229
|
+
<td align="center">i</td>
|
1230
|
+
<td><code class="varname">G_REGEX_CASELESS</code></td>
|
1231
|
+
</tr>
|
1232
|
+
<tr>
|
1233
|
+
<td align="center">m</td>
|
1234
|
+
<td><code class="varname">G_REGEX_MULTILINE</code></td>
|
1235
|
+
</tr>
|
1236
|
+
<tr>
|
1237
|
+
<td align="center">s</td>
|
1238
|
+
<td><code class="varname">G_REGEX_DOTALL</code></td>
|
1239
|
+
</tr>
|
1240
|
+
<tr>
|
1241
|
+
<td align="center">x</td>
|
1242
|
+
<td><code class="varname">G_REGEX_EXTENDED</code></td>
|
1243
|
+
</tr>
|
1244
|
+
</tbody>
|
1245
|
+
</table></div>
|
1246
|
+
</div>
|
1247
|
+
<br class="table-break"><p>
|
1248
|
+
For example, (?im) sets caseless, multiline matching. It is also
|
1249
|
+
possible to unset these options by preceding the letter with a hyphen, and a
|
1250
|
+
combined setting and unsetting such as (?im-sx), which sets <code class="varname">G_REGEX_CASELESS</code>
|
1251
|
+
and <code class="varname">G_REGEX_MULTILINE</code> while unsetting <code class="varname">G_REGEX_DOTALL</code> and <code class="varname">G_REGEX_EXTENDED</code>,
|
1252
|
+
is also permitted. If a letter appears both before and after the
|
1253
|
+
hyphen, the option is unset.
|
1254
|
+
</p>
|
1255
|
+
<p>
|
1256
|
+
When an option change occurs at top level (that is, not inside subpattern
|
1257
|
+
parentheses), the change applies to the remainder of the pattern
|
1258
|
+
that follows.
|
1259
|
+
</p>
|
1260
|
+
<p>
|
1261
|
+
An option change within a subpattern (see below for a description of subpatterns)
|
1262
|
+
affects only that part of the current pattern that follows it, so
|
1263
|
+
</p>
|
1264
|
+
<pre class="programlisting">
|
1265
|
+
(a(?i)b)c
|
1266
|
+
</pre>
|
1267
|
+
<p>
|
1268
|
+
matches abc and aBc and no other strings (assuming <code class="varname">G_REGEX_CASELESS</code> is not
|
1269
|
+
used). By this means, options can be made to have different settings
|
1270
|
+
in different parts of the pattern. Any changes made in one alternative
|
1271
|
+
do carry on into subsequent branches within the same subpattern. For
|
1272
|
+
example,
|
1273
|
+
</p>
|
1274
|
+
<pre class="programlisting">
|
1275
|
+
(a(?i)b|c)
|
1276
|
+
</pre>
|
1277
|
+
<p>
|
1278
|
+
matches "ab", "aB", "c", and "C", even though when matching "C" the
|
1279
|
+
first branch is abandoned before the option setting. This is because
|
1280
|
+
the effects of option settings happen at compile time. There would be
|
1281
|
+
some very weird behaviour otherwise.
|
1282
|
+
</p>
|
1283
|
+
<p>
|
1284
|
+
The options <code class="varname">G_REGEX_UNGREEDY</code> and
|
1285
|
+
<code class="varname">G_REGEX_EXTRA</code> and <code class="varname">G_REGEX_DUPNAMES</code>
|
1286
|
+
can be changed in the same way as the Perl-compatible options by using
|
1287
|
+
the characters U, X and J respectively.
|
1288
|
+
</p>
|
1289
|
+
</div>
|
1290
|
+
<div class="refsect1" title="Subpatterns">
|
1291
|
+
<a name="id551309"></a><h2>Subpatterns</h2>
|
1292
|
+
<p>
|
1293
|
+
Subpatterns are delimited by parentheses (round brackets), which can be
|
1294
|
+
nested. Turning part of a pattern into a subpattern does two things:
|
1295
|
+
</p>
|
1296
|
+
<div class="itemizedlist"><ul class="itemizedlist" type="disc">
|
1297
|
+
<li class="listitem"><p>
|
1298
|
+
It localizes a set of alternatives. For example, the pattern
|
1299
|
+
cat(aract|erpillar|) matches one of the words "cat", "cataract", or
|
1300
|
+
"caterpillar". Without the parentheses, it would match "cataract",
|
1301
|
+
"erpillar" or an empty string.
|
1302
|
+
</p></li>
|
1303
|
+
<li class="listitem"><p>
|
1304
|
+
It sets up the subpattern as a capturing subpattern. This means
|
1305
|
+
that, when the whole pattern matches, that portion of the
|
1306
|
+
string that matched the subpattern can be obtained using <code class="function">g_regex_fetch()</code>.
|
1307
|
+
Opening parentheses are counted from left to right (starting from 1, as
|
1308
|
+
subpattern 0 is the whole matched string) to obtain numbers for the
|
1309
|
+
capturing subpatterns.
|
1310
|
+
</p></li>
|
1311
|
+
</ul></div>
|
1312
|
+
<p>
|
1313
|
+
For example, if the string "the red king" is matched against the pattern
|
1314
|
+
</p>
|
1315
|
+
<pre class="programlisting">
|
1316
|
+
the ((red|white) (king|queen))
|
1317
|
+
</pre>
|
1318
|
+
<p>
|
1319
|
+
the captured substrings are "red king", "red", and "king", and are numbered 1, 2, and 3, respectively.
|
1320
|
+
</p>
|
1321
|
+
<p>
|
1322
|
+
The fact that plain parentheses fulfil two functions is not always
|
1323
|
+
helpful. There are often times when a grouping subpattern is required
|
1324
|
+
without a capturing requirement. If an opening parenthesis is followed
|
1325
|
+
by a question mark and a colon, the subpattern does not do any capturing,
|
1326
|
+
and is not counted when computing the number of any subsequent
|
1327
|
+
capturing subpatterns. For example, if the string "the white queen" is
|
1328
|
+
matched against the pattern
|
1329
|
+
</p>
|
1330
|
+
<pre class="programlisting">
|
1331
|
+
the ((?:red|white) (king|queen))
|
1332
|
+
</pre>
|
1333
|
+
<p>
|
1334
|
+
the captured substrings are "white queen" and "queen", and are numbered
|
1335
|
+
1 and 2. The maximum number of capturing subpatterns is 65535.
|
1336
|
+
</p>
|
1337
|
+
<p>
|
1338
|
+
As a convenient shorthand, if any option settings are required at the
|
1339
|
+
start of a non-capturing subpattern, the option letters may appear
|
1340
|
+
between the "?" and the ":". Thus the two patterns
|
1341
|
+
</p>
|
1342
|
+
<pre class="programlisting">
|
1343
|
+
(?i:saturday|sunday)
|
1344
|
+
(?:(?i)saturday|sunday)
|
1345
|
+
</pre>
|
1346
|
+
<p>
|
1347
|
+
match exactly the same set of strings. Because alternative branches are
|
1348
|
+
tried from left to right, and options are not reset until the end of
|
1349
|
+
the subpattern is reached, an option setting in one branch does affect
|
1350
|
+
subsequent branches, so the above patterns match "SUNDAY" as well as
|
1351
|
+
"Saturday".
|
1352
|
+
</p>
|
1353
|
+
</div>
|
1354
|
+
<div class="refsect1" title="Named subpatterns">
|
1355
|
+
<a name="id551394"></a><h2>Named subpatterns</h2>
|
1356
|
+
<p>
|
1357
|
+
Identifying capturing parentheses by number is simple, but it can be
|
1358
|
+
very hard to keep track of the numbers in complicated regular expressions.
|
1359
|
+
Furthermore, if an expression is modified, the numbers may
|
1360
|
+
change. To help with this difficulty, GRegex supports the naming of
|
1361
|
+
subpatterns. A subpattern can be named in one of three ways: (?<name>...) or
|
1362
|
+
(?'name'...) as in Perl, or (?P<name>...) as in Python.
|
1363
|
+
References to capturing parentheses from other
|
1364
|
+
parts of the pattern, such as backreferences, recursion, and conditions,
|
1365
|
+
can be made by name as well as by number.
|
1366
|
+
</p>
|
1367
|
+
<p>
|
1368
|
+
Names consist of up to 32 alphanumeric characters and underscores. Named
|
1369
|
+
capturing parentheses are still allocated numbers as well as names, exactly as
|
1370
|
+
if the names were not present.
|
1371
|
+
By default, a name must be unique within a pattern, but it is possible to relax
|
1372
|
+
this constraint by setting the <code class="varname">G_REGEX_DUPNAMES</code> option at
|
1373
|
+
compile time. This can be useful for patterns where only one instance of the
|
1374
|
+
named parentheses can match. Suppose you want to match the name of a weekday,
|
1375
|
+
either as a 3-letter abbreviation or as the full name, and in both cases you
|
1376
|
+
want to extract the abbreviation. This pattern (ignoring the line breaks) does
|
1377
|
+
the job:
|
1378
|
+
</p>
|
1379
|
+
<pre class="programlisting">
|
1380
|
+
(?<DN>Mon|Fri|Sun)(?:day)?|
|
1381
|
+
(?<DN>Tue)(?:sday)?|
|
1382
|
+
(?<DN>Wed)(?:nesday)?|
|
1383
|
+
(?<DN>Thu)(?:rsday)?|
|
1384
|
+
(?<DN>Sat)(?:urday)?
|
1385
|
+
</pre>
|
1386
|
+
<p>
|
1387
|
+
There are five capturing substrings, but only one is ever set after a match.
|
1388
|
+
The function for extracting the data by name returns the substring
|
1389
|
+
for the first (and in this example, the only) subpattern of that name that
|
1390
|
+
matched. This saves searching to find which numbered subpattern it was. If you
|
1391
|
+
make a reference to a non-unique named subpattern from elsewhere in the
|
1392
|
+
pattern, the one that corresponds to the lowest number is used.
|
1393
|
+
</p>
|
1394
|
+
</div>
|
1395
|
+
<div class="refsect1" title="Repetition">
|
1396
|
+
<a name="id551437"></a><h2>Repetition</h2>
|
1397
|
+
<p>
|
1398
|
+
Repetition is specified by quantifiers, which can follow any of the
|
1399
|
+
following items:
|
1400
|
+
</p>
|
1401
|
+
<div class="itemizedlist"><ul class="itemizedlist" type="disc">
|
1402
|
+
<li class="listitem"><p>a literal data character</p></li>
|
1403
|
+
<li class="listitem"><p>the dot metacharacter</p></li>
|
1404
|
+
<li class="listitem"><p>the \C escape sequence</p></li>
|
1405
|
+
<li class="listitem"><p>the \X escape sequence (in UTF-8 mode)</p></li>
|
1406
|
+
<li class="listitem"><p>the \R escape sequence</p></li>
|
1407
|
+
<li class="listitem"><p>an escape such as \d that matches a single character</p></li>
|
1408
|
+
<li class="listitem"><p>a character class</p></li>
|
1409
|
+
<li class="listitem"><p>a back reference (see next section)</p></li>
|
1410
|
+
<li class="listitem"><p>a parenthesized subpattern (unless it is an assertion)</p></li>
|
1411
|
+
</ul></div>
|
1412
|
+
<p>
|
1413
|
+
The general repetition quantifier specifies a minimum and maximum number
|
1414
|
+
of permitted matches, by giving the two numbers in curly brackets
|
1415
|
+
(braces), separated by a comma. The numbers must be less than 65536,
|
1416
|
+
and the first must be less than or equal to the second. For example:
|
1417
|
+
</p>
|
1418
|
+
<pre class="programlisting">
|
1419
|
+
z{2,4}
|
1420
|
+
</pre>
|
1421
|
+
<p>
|
1422
|
+
matches "zz", "zzz", or "zzzz". A closing brace on its own is not a
|
1423
|
+
special character. If the second number is omitted, but the comma is
|
1424
|
+
present, there is no upper limit; if the second number and the comma
|
1425
|
+
are both omitted, the quantifier specifies an exact number of required
|
1426
|
+
matches. Thus
|
1427
|
+
</p>
|
1428
|
+
<pre class="programlisting">
|
1429
|
+
[aeiou]{3,}
|
1430
|
+
</pre>
|
1431
|
+
<p>
|
1432
|
+
matches at least 3 successive vowels, but may match many more, while
|
1433
|
+
</p>
|
1434
|
+
<pre class="programlisting">
|
1435
|
+
\d{8}
|
1436
|
+
</pre>
|
1437
|
+
<p>
|
1438
|
+
matches exactly 8 digits. An opening curly bracket that appears in a
|
1439
|
+
position where a quantifier is not allowed, or one that does not match
|
1440
|
+
the syntax of a quantifier, is taken as a literal character. For example,
|
1441
|
+
{,6} is not a quantifier, but a literal string of four characters.
|
1442
|
+
</p>
|
1443
|
+
<p>
|
1444
|
+
In UTF-8 mode, quantifiers apply to UTF-8 characters rather than to
|
1445
|
+
individual bytes. Thus, for example, \x{100}{2} matches two UTF-8
|
1446
|
+
characters, each of which is represented by a two-byte sequence. Similarly,
|
1447
|
+
\X{3} matches three Unicode extended sequences, each of which may be
|
1448
|
+
several bytes long (and they may be of different lengths).
|
1449
|
+
</p>
|
1450
|
+
<p>
|
1451
|
+
The quantifier {0} is permitted, causing the expression to behave as if
|
1452
|
+
the previous item and the quantifier were not present.
|
1453
|
+
</p>
|
1454
|
+
<p>
|
1455
|
+
For convenience, the three most common quantifiers have single-character
|
1456
|
+
abbreviations:
|
1457
|
+
</p>
|
1458
|
+
<div class="table">
|
1459
|
+
<a name="id551549"></a><p class="title"><b>Table 11. Abbreviations for quantifiers</b></p>
|
1460
|
+
<div class="table-contents"><table summary="Abbreviations for quantifiers" border="1">
|
1461
|
+
<colgroup>
|
1462
|
+
<col align="center">
|
1463
|
+
<col>
|
1464
|
+
</colgroup>
|
1465
|
+
<thead><tr>
|
1466
|
+
<th align="center">Abbreviation</th>
|
1467
|
+
<th>Meaning</th>
|
1468
|
+
</tr></thead>
|
1469
|
+
<tbody>
|
1470
|
+
<tr>
|
1471
|
+
<td align="center">*</td>
|
1472
|
+
<td>is equivalent to {0,}</td>
|
1473
|
+
</tr>
|
1474
|
+
<tr>
|
1475
|
+
<td align="center">+</td>
|
1476
|
+
<td>is equivalent to {1,}</td>
|
1477
|
+
</tr>
|
1478
|
+
<tr>
|
1479
|
+
<td align="center">?</td>
|
1480
|
+
<td>is equivalent to {0,1}</td>
|
1481
|
+
</tr>
|
1482
|
+
</tbody>
|
1483
|
+
</table></div>
|
1484
|
+
</div>
|
1485
|
+
<br class="table-break"><p>
|
1486
|
+
It is possible to construct infinite loops by following a subpattern
|
1487
|
+
that can match no characters with a quantifier that has no upper limit,
|
1488
|
+
for example:
|
1489
|
+
</p>
|
1490
|
+
<pre class="programlisting">
|
1491
|
+
(a?)*
|
1492
|
+
</pre>
|
1493
|
+
<p>
|
1494
|
+
Because there are cases where this can be useful, such patterns are
|
1495
|
+
accepted, but if any repetition of the subpattern does in fact match
|
1496
|
+
no characters, the loop is forcibly broken.
|
1497
|
+
</p>
|
1498
|
+
<p>
|
1499
|
+
By default, the quantifiers are "greedy", that is, they match as much
|
1500
|
+
as possible (up to the maximum number of permitted times), without
|
1501
|
+
causing the rest of the pattern to fail. The classic example of where
|
1502
|
+
this gives problems is in trying to match comments in C programs. These
|
1503
|
+
appear between /* and */ and within the comment, individual * and /
|
1504
|
+
characters may appear. An attempt to match C comments by applying the
|
1505
|
+
pattern
|
1506
|
+
</p>
|
1507
|
+
<pre class="programlisting">
|
1508
|
+
/\*.*\*/
|
1509
|
+
</pre>
|
1510
|
+
<p>
|
1511
|
+
to the string
|
1512
|
+
</p>
|
1513
|
+
<pre class="programlisting">
|
1514
|
+
/* first comment */ not comment /* second comment */
|
1515
|
+
</pre>
|
1516
|
+
<p>
|
1517
|
+
fails, because it matches the entire string owing to the greediness of
|
1518
|
+
the .* item.
|
1519
|
+
</p>
|
1520
|
+
<p>
|
1521
|
+
However, if a quantifier is followed by a question mark, it ceases to
|
1522
|
+
be greedy, and instead matches the minimum number of times possible, so
|
1523
|
+
the pattern
|
1524
|
+
</p>
|
1525
|
+
<pre class="programlisting">
|
1526
|
+
/\*.*?\*/
|
1527
|
+
</pre>
|
1528
|
+
<p>
|
1529
|
+
does the right thing with the C comments. The meaning of the various
|
1530
|
+
quantifiers is not otherwise changed, just the preferred number of
|
1531
|
+
matches. Do not confuse this use of question mark with its use as a
|
1532
|
+
quantifier in its own right. Because it has two uses, it can sometimes
|
1533
|
+
appear doubled, as in
|
1534
|
+
</p>
|
1535
|
+
<pre class="programlisting">
|
1536
|
+
\d??\d
|
1537
|
+
</pre>
|
1538
|
+
<p>
|
1539
|
+
which matches one digit by preference, but can match two if that is the
|
1540
|
+
only way the rest of the pattern matches.
|
1541
|
+
</p>
|
1542
|
+
<p>
|
1543
|
+
If the <code class="varname">G_REGEX_UNGREEDY</code> flag is set, the quantifiers are not greedy
|
1544
|
+
by default, but individual ones can be made greedy by following them with
|
1545
|
+
a question mark. In other words, it inverts the default behaviour.
|
1546
|
+
</p>
|
1547
|
+
<p>
|
1548
|
+
When a parenthesized subpattern is quantified with a minimum repeat
|
1549
|
+
count that is greater than 1 or with a limited maximum, more memory is
|
1550
|
+
required for the compiled pattern, in proportion to the size of the
|
1551
|
+
minimum or maximum.
|
1552
|
+
</p>
|
1553
|
+
<p>
|
1554
|
+
If a pattern starts with .* or .{0,} and the <code class="varname">G_REGEX_DOTALL</code> flag
|
1555
|
+
is set, thus allowing the dot to match newlines, the
|
1556
|
+
pattern is implicitly anchored, because whatever follows will be tried
|
1557
|
+
against every character position in the string, so there is no
|
1558
|
+
point in retrying the overall match at any position after the first.
|
1559
|
+
GRegex normally treats such a pattern as though it were preceded by \A.
|
1560
|
+
</p>
|
1561
|
+
<p>
|
1562
|
+
In cases where it is known that the string contains no newlines, it
|
1563
|
+
is worth setting <code class="varname">G_REGEX_DOTALL</code> in order to obtain this optimization,
|
1564
|
+
or alternatively using ^ to indicate anchoring explicitly.
|
1565
|
+
</p>
|
1566
|
+
<p>
|
1567
|
+
However, there is one situation where the optimization cannot be used.
|
1568
|
+
When .* is inside capturing parentheses that are the subject of a
|
1569
|
+
backreference elsewhere in the pattern, a match at the start may fail
|
1570
|
+
where a later one succeeds. Consider, for example:
|
1571
|
+
</p>
|
1572
|
+
<pre class="programlisting">
|
1573
|
+
(.*)abc\1
|
1574
|
+
</pre>
|
1575
|
+
<p>
|
1576
|
+
If the string is "xyz123abc123" the match point is the fourth character.
|
1577
|
+
For this reason, such a pattern is not implicitly anchored.
|
1578
|
+
</p>
|
1579
|
+
<p>
|
1580
|
+
When a capturing subpattern is repeated, the value captured is the
|
1581
|
+
substring that matched the final iteration. For example, after
|
1582
|
+
</p>
|
1583
|
+
<pre class="programlisting">
|
1584
|
+
(tweedle[dume]{3}\s*)+
|
1585
|
+
</pre>
|
1586
|
+
<p>
|
1587
|
+
has matched "tweedledum tweedledee" the value of the captured substring
|
1588
|
+
is "tweedledee". However, if there are nested capturing subpatterns,
|
1589
|
+
the corresponding captured values may have been set in previous iterations.
|
1590
|
+
For example, after
|
1591
|
+
</p>
|
1592
|
+
<pre class="programlisting">
|
1593
|
+
/(a|(b))+/
|
1594
|
+
</pre>
|
1595
|
+
<p>
|
1596
|
+
matches "aba" the value of the second captured substring is "b".
|
1597
|
+
</p>
|
1598
|
+
</div>
|
1599
|
+
<div class="refsect1" title="Atomic grouping and possessive quantifiers">
|
1600
|
+
<a name="id551776"></a><h2>Atomic grouping and possessive quantifiers</h2>
|
1601
|
+
<p>
|
1602
|
+
With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy")
|
1603
|
+
repetition, failure of what follows normally causes the repeated
|
1604
|
+
item to be re-evaluated to see if a different number
|
1605
|
+
of repeats allows the rest of the pattern to match. Sometimes it
|
1606
|
+
is useful to prevent this, either to change the nature of the
|
1607
|
+
match, or to cause it fail earlier than it otherwise might, when the
|
1608
|
+
author of the pattern knows there is no point in carrying on.
|
1609
|
+
</p>
|
1610
|
+
<p>
|
1611
|
+
Consider, for example, the pattern \d+foo when applied to the string
|
1612
|
+
</p>
|
1613
|
+
<pre class="programlisting">
|
1614
|
+
123456bar
|
1615
|
+
</pre>
|
1616
|
+
<p>
|
1617
|
+
After matching all 6 digits and then failing to match "foo", the normal
|
1618
|
+
action of the matcher is to try again with only 5 digits matching the
|
1619
|
+
\d+ item, and then with 4, and so on, before ultimately failing.
|
1620
|
+
"Atomic grouping" (a term taken from Jeffrey Friedl’s book) provides
|
1621
|
+
the means for specifying that once a subpattern has matched, it is not
|
1622
|
+
to be re-evaluated in this way.
|
1623
|
+
</p>
|
1624
|
+
<p>
|
1625
|
+
If we use atomic grouping for the previous example, the matcher
|
1626
|
+
give up immediately on failing to match "foo" the first time. The notation
|
1627
|
+
is a kind of special parenthesis, starting with (?> as in this
|
1628
|
+
example:
|
1629
|
+
</p>
|
1630
|
+
<pre class="programlisting">
|
1631
|
+
(?>\d+)foo
|
1632
|
+
</pre>
|
1633
|
+
<p>
|
1634
|
+
This kind of parenthesis "locks up" the part of the pattern it contains
|
1635
|
+
once it has matched, and a failure further into the pattern is
|
1636
|
+
prevented from backtracking into it. Backtracking past it to previous
|
1637
|
+
items, however, works as normal.
|
1638
|
+
</p>
|
1639
|
+
<p>
|
1640
|
+
An alternative description is that a subpattern of this type matches
|
1641
|
+
the string of characters that an identical standalone pattern would
|
1642
|
+
match, if anchored at the current point in the string.
|
1643
|
+
</p>
|
1644
|
+
<p>
|
1645
|
+
Atomic grouping subpatterns are not capturing subpatterns. Simple cases
|
1646
|
+
such as the above example can be thought of as a maximizing repeat that
|
1647
|
+
must swallow everything it can. So, while both \d+ and \d+? are prepared
|
1648
|
+
to adjust the number of digits they match in order to make the
|
1649
|
+
rest of the pattern match, (?>\d+) can only match an entire sequence of
|
1650
|
+
digits.
|
1651
|
+
</p>
|
1652
|
+
<p>
|
1653
|
+
Atomic groups in general can of course contain arbitrarily complicated
|
1654
|
+
subpatterns, and can be nested. However, when the subpattern for an
|
1655
|
+
atomic group is just a single repeated item, as in the example above, a
|
1656
|
+
simpler notation, called a "possessive quantifier" can be used. This
|
1657
|
+
consists of an additional + character following a quantifier. Using
|
1658
|
+
this notation, the previous example can be rewritten as
|
1659
|
+
</p>
|
1660
|
+
<pre class="programlisting">
|
1661
|
+
\d++foo
|
1662
|
+
</pre>
|
1663
|
+
<p>
|
1664
|
+
Possessive quantifiers are always greedy; the setting of the
|
1665
|
+
<code class="varname">G_REGEX_UNGREEDY</code> option is ignored. They are a convenient notation for the
|
1666
|
+
simpler forms of atomic group. However, there is no difference in the
|
1667
|
+
meaning of a possessive quantifier and the equivalent
|
1668
|
+
atomic group, though there may be a performance difference;
|
1669
|
+
possessive quantifiers should be slightly faster.
|
1670
|
+
</p>
|
1671
|
+
<p>
|
1672
|
+
The possessive quantifier syntax is an extension to the Perl syntax.
|
1673
|
+
It was invented by Jeffrey Friedl in the first edition of his book and
|
1674
|
+
then implemented by Mike McCloskey in Sun's Java package.
|
1675
|
+
It ultimately found its way into Perl at release 5.10.
|
1676
|
+
</p>
|
1677
|
+
<p>
|
1678
|
+
GRegex has an optimization that automatically "possessifies" certain simple
|
1679
|
+
pattern constructs. For example, the sequence A+B is treated as A++B because
|
1680
|
+
there is no point in backtracking into a sequence of A's when B must follow.
|
1681
|
+
</p>
|
1682
|
+
<p>
|
1683
|
+
When a pattern contains an unlimited repeat inside a subpattern that
|
1684
|
+
can itself be repeated an unlimited number of times, the use of an
|
1685
|
+
atomic group is the only way to avoid some failing matches taking a
|
1686
|
+
very long time indeed. The pattern
|
1687
|
+
</p>
|
1688
|
+
<pre class="programlisting">
|
1689
|
+
(\D+|<\d+>)*[!?]
|
1690
|
+
</pre>
|
1691
|
+
<p>
|
1692
|
+
matches an unlimited number of substrings that either consist of non-
|
1693
|
+
digits, or digits enclosed in <>, followed by either ! or ?. When it
|
1694
|
+
matches, it runs quickly. However, if it is applied to
|
1695
|
+
</p>
|
1696
|
+
<pre class="programlisting">
|
1697
|
+
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
1698
|
+
</pre>
|
1699
|
+
<p>
|
1700
|
+
it takes a long time before reporting failure. This is because the
|
1701
|
+
string can be divided between the internal \D+ repeat and the external
|
1702
|
+
* repeat in a large number of ways, and all have to be tried. (The
|
1703
|
+
example uses [!?] rather than a single character at the end, because
|
1704
|
+
GRegex has an optimization that allows for fast failure
|
1705
|
+
when a single character is used. It remember the last single character
|
1706
|
+
that is required for a match, and fail early if it is not present
|
1707
|
+
in the string.) If the pattern is changed so that it uses an atomic
|
1708
|
+
group, like this:
|
1709
|
+
</p>
|
1710
|
+
<pre class="programlisting">
|
1711
|
+
((?>\D+)|<\d+>)*[!?]
|
1712
|
+
</pre>
|
1713
|
+
<p>
|
1714
|
+
sequences of non-digits cannot be broken, and failure happens quickly.
|
1715
|
+
</p>
|
1716
|
+
</div>
|
1717
|
+
<div class="refsect1" title="Back references">
|
1718
|
+
<a name="id551911"></a><h2>Back references</h2>
|
1719
|
+
<p>
|
1720
|
+
Outside a character class, a backslash followed by a digit greater than
|
1721
|
+
0 (and possibly further digits) is a back reference to a capturing subpattern
|
1722
|
+
earlier (that is, to its left) in the pattern, provided there have been that
|
1723
|
+
many previous capturing left parentheses.
|
1724
|
+
</p>
|
1725
|
+
<p>
|
1726
|
+
However, if the decimal number following the backslash is less than 10,
|
1727
|
+
it is always taken as a back reference, and causes an error only if
|
1728
|
+
there are not that many capturing left parentheses in the entire pattern.
|
1729
|
+
In other words, the parentheses that are referenced need not be
|
1730
|
+
to the left of the reference for numbers less than 10. A "forward back
|
1731
|
+
reference" of this type can make sense when a repetition is involved and
|
1732
|
+
the subpattern to the right has participated in an earlier iteration.
|
1733
|
+
</p>
|
1734
|
+
<p>
|
1735
|
+
It is not possible to have a numerical "forward back reference" to subpattern
|
1736
|
+
whose number is 10 or more using this syntax because a sequence such as \e50 is
|
1737
|
+
interpreted as a character defined in octal. See the subsection entitled
|
1738
|
+
"Non-printing characters" above for further details of the handling of digits
|
1739
|
+
following a backslash. There is no such problem when named parentheses are used.
|
1740
|
+
A back reference to any subpattern is possible using named parentheses (see below).
|
1741
|
+
</p>
|
1742
|
+
<p>
|
1743
|
+
Another way of avoiding the ambiguity inherent in the use of digits following a
|
1744
|
+
backslash is to use the \g escape sequence (introduced in Perl 5.10.)
|
1745
|
+
This escape must be followed by a positive or a negative number,
|
1746
|
+
optionally enclosed in braces.
|
1747
|
+
</p>
|
1748
|
+
<p>
|
1749
|
+
A positive number specifies an absolute reference without the ambiguity that is
|
1750
|
+
present in the older syntax. It is also useful when literal digits follow the
|
1751
|
+
reference. A negative number is a relative reference. Consider "(abc(def)ghi)\g{-1}",
|
1752
|
+
the sequence \g{-1} is a reference to the most recently started capturing
|
1753
|
+
subpattern before \g, that is, is it equivalent to \2. Similarly, \g{-2}
|
1754
|
+
would be equivalent to \1. The use of relative references can be helpful in
|
1755
|
+
long patterns, and also in patterns that are created by joining together
|
1756
|
+
fragments that contain references within themselves.
|
1757
|
+
</p>
|
1758
|
+
<p>
|
1759
|
+
A back reference matches whatever actually matched the capturing subpattern
|
1760
|
+
in the current string, rather than anything matching
|
1761
|
+
the subpattern itself (see "Subpatterns as subroutines" below for a way
|
1762
|
+
of doing that). So the pattern
|
1763
|
+
</p>
|
1764
|
+
<pre class="programlisting">
|
1765
|
+
(sens|respons)e and \1ibility
|
1766
|
+
</pre>
|
1767
|
+
<p>
|
1768
|
+
matches "sense and sensibility" and "response and responsibility", but
|
1769
|
+
not "sense and responsibility". If caseful matching is in force at the
|
1770
|
+
time of the back reference, the case of letters is relevant. For example,
|
1771
|
+
</p>
|
1772
|
+
<pre class="programlisting">
|
1773
|
+
((?i)rah)\s+\1
|
1774
|
+
</pre>
|
1775
|
+
<p>
|
1776
|
+
matches "rah rah" and "RAH RAH", but not "RAH rah", even though the
|
1777
|
+
original capturing subpattern is matched caselessly.
|
1778
|
+
</p>
|
1779
|
+
<p>
|
1780
|
+
Back references to named subpatterns use the Perl syntax \k<name> or \k'name'
|
1781
|
+
or the Python syntax (?P=name). We could rewrite the above example in either of
|
1782
|
+
the following ways:
|
1783
|
+
</p>
|
1784
|
+
<pre class="programlisting">
|
1785
|
+
(?<p1>(?i)rah)\s+\k<p1>
|
1786
|
+
(?P<p1>(?i)rah)\s+(?P=p1)
|
1787
|
+
</pre>
|
1788
|
+
<p>
|
1789
|
+
A subpattern that is referenced by name may appear in the pattern before or
|
1790
|
+
after the reference.
|
1791
|
+
</p>
|
1792
|
+
<p>
|
1793
|
+
There may be more than one back reference to the same subpattern. If a
|
1794
|
+
subpattern has not actually been used in a particular match, any back
|
1795
|
+
references to it always fail. For example, the pattern
|
1796
|
+
</p>
|
1797
|
+
<pre class="programlisting">
|
1798
|
+
(a|(bc))\2
|
1799
|
+
</pre>
|
1800
|
+
<p>
|
1801
|
+
always fails if it starts to match "a" rather than "bc". Because there
|
1802
|
+
may be many capturing parentheses in a pattern, all digits following
|
1803
|
+
the backslash are taken as part of a potential back reference number.
|
1804
|
+
If the pattern continues with a digit character, some delimiter must be
|
1805
|
+
used to terminate the back reference. If the <code class="varname">G_REGEX_EXTENDED</code> flag is
|
1806
|
+
set, this can be whitespace. Otherwise an empty comment (see "Comments" below) can be used.
|
1807
|
+
</p>
|
1808
|
+
<p>
|
1809
|
+
A back reference that occurs inside the parentheses to which it refers
|
1810
|
+
fails when the subpattern is first used, so, for example, (a\1) never
|
1811
|
+
matches. However, such references can be useful inside repeated subpatterns.
|
1812
|
+
For example, the pattern
|
1813
|
+
</p>
|
1814
|
+
<pre class="programlisting">
|
1815
|
+
(a|b\1)+
|
1816
|
+
</pre>
|
1817
|
+
<p>
|
1818
|
+
matches any number of "a"s and also "aba", "ababbaa" etc. At each iteration
|
1819
|
+
of the subpattern, the back reference matches the character
|
1820
|
+
string corresponding to the previous iteration. In order for this to
|
1821
|
+
work, the pattern must be such that the first iteration does not need
|
1822
|
+
to match the back reference. This can be done using alternation, as in
|
1823
|
+
the example above, or by a quantifier with a minimum of zero.
|
1824
|
+
</p>
|
1825
|
+
</div>
|
1826
|
+
<div class="refsect1" title="Assertions">
|
1827
|
+
<a name="id552038"></a><h2>Assertions</h2>
|
1828
|
+
<p>
|
1829
|
+
An assertion is a test on the characters following or preceding the
|
1830
|
+
current matching point that does not actually consume any characters.
|
1831
|
+
The simple assertions coded as \b, \B, \A, \G, \Z, \z, ^ and $ are
|
1832
|
+
described above.
|
1833
|
+
</p>
|
1834
|
+
<p>
|
1835
|
+
More complicated assertions are coded as subpatterns. There are two
|
1836
|
+
kinds: those that look ahead of the current position in the
|
1837
|
+
string, and those that look behind it. An assertion subpattern is
|
1838
|
+
matched in the normal way, except that it does not cause the current
|
1839
|
+
matching position to be changed.
|
1840
|
+
</p>
|
1841
|
+
<p>
|
1842
|
+
Assertion subpatterns are not capturing subpatterns, and may not be
|
1843
|
+
repeated, because it makes no sense to assert the same thing several
|
1844
|
+
times. If any kind of assertion contains capturing subpatterns within
|
1845
|
+
it, these are counted for the purposes of numbering the capturing
|
1846
|
+
subpatterns in the whole pattern. However, substring capturing is carried
|
1847
|
+
out only for positive assertions, because it does not make sense for
|
1848
|
+
negative assertions.
|
1849
|
+
</p>
|
1850
|
+
<div class="refsect2" title="Lookahead assertions">
|
1851
|
+
<a name="id552063"></a><h3>Lookahead assertions</h3>
|
1852
|
+
<p>
|
1853
|
+
Lookahead assertions start with (?= for positive assertions and (?! for
|
1854
|
+
negative assertions. For example,
|
1855
|
+
</p>
|
1856
|
+
<pre class="programlisting">
|
1857
|
+
\w+(?=;)
|
1858
|
+
</pre>
|
1859
|
+
<p>
|
1860
|
+
matches a word followed by a semicolon, but does not include the semicolon
|
1861
|
+
in the match, and
|
1862
|
+
</p>
|
1863
|
+
<pre class="programlisting">
|
1864
|
+
foo(?!bar)
|
1865
|
+
</pre>
|
1866
|
+
<p>
|
1867
|
+
matches any occurrence of "foo" that is not followed by "bar". Note
|
1868
|
+
that the apparently similar pattern
|
1869
|
+
</p>
|
1870
|
+
<pre class="programlisting">
|
1871
|
+
(?!foo)bar
|
1872
|
+
</pre>
|
1873
|
+
<p>
|
1874
|
+
does not find an occurrence of "bar" that is preceded by something
|
1875
|
+
other than "foo"; it finds any occurrence of "bar" whatsoever, because
|
1876
|
+
the assertion (?!foo) is always true when the next three characters are
|
1877
|
+
"bar". A lookbehind assertion is needed to achieve the other effect.
|
1878
|
+
</p>
|
1879
|
+
<p>
|
1880
|
+
If you want to force a matching failure at some point in a pattern, the
|
1881
|
+
most convenient way to do it is with (?!) because an empty string
|
1882
|
+
always matches, so an assertion that requires there not to be an empty
|
1883
|
+
string must always fail.
|
1884
|
+
</p>
|
1885
|
+
</div>
|
1886
|
+
<hr>
|
1887
|
+
<div class="refsect2" title="Lookbehind assertions">
|
1888
|
+
<a name="id552113"></a><h3>Lookbehind assertions</h3>
|
1889
|
+
<p>
|
1890
|
+
Lookbehind assertions start with (?<= for positive assertions and (?<!
|
1891
|
+
for negative assertions. For example,
|
1892
|
+
</p>
|
1893
|
+
<pre class="programlisting">
|
1894
|
+
(?<!foo)bar
|
1895
|
+
</pre>
|
1896
|
+
<p>
|
1897
|
+
does find an occurrence of "bar" that is not preceded by "foo". The
|
1898
|
+
contents of a lookbehind assertion are restricted such that all the
|
1899
|
+
strings it matches must have a fixed length. However, if there are
|
1900
|
+
several top-level alternatives, they do not all have to have the same
|
1901
|
+
fixed length. Thus
|
1902
|
+
</p>
|
1903
|
+
<pre class="programlisting">
|
1904
|
+
(?<=bullock|donkey)
|
1905
|
+
</pre>
|
1906
|
+
<p>
|
1907
|
+
is permitted, but
|
1908
|
+
</p>
|
1909
|
+
<pre class="programlisting">
|
1910
|
+
(?<!dogs?|cats?)
|
1911
|
+
</pre>
|
1912
|
+
<p>
|
1913
|
+
causes an error at compile time. Branches that match different length
|
1914
|
+
strings are permitted only at the top level of a lookbehind assertion.
|
1915
|
+
An assertion such as
|
1916
|
+
</p>
|
1917
|
+
<pre class="programlisting">
|
1918
|
+
(?<=ab(c|de))
|
1919
|
+
</pre>
|
1920
|
+
<p>
|
1921
|
+
is not permitted, because its single top-level branch can match two
|
1922
|
+
different lengths, but it is acceptable if rewritten to use two top-
|
1923
|
+
level branches:
|
1924
|
+
</p>
|
1925
|
+
<pre class="programlisting">
|
1926
|
+
(?<=abc|abde)
|
1927
|
+
</pre>
|
1928
|
+
<p>
|
1929
|
+
The implementation of lookbehind assertions is, for each alternative,
|
1930
|
+
to temporarily move the current position back by the fixed length and
|
1931
|
+
then try to match. If there are insufficient characters before the
|
1932
|
+
current position, the assertion fails.
|
1933
|
+
</p>
|
1934
|
+
<p>
|
1935
|
+
GRegex does not allow the \C escape (which matches a single byte in UTF-8
|
1936
|
+
mode) to appear in lookbehind assertions, because it makes it impossible
|
1937
|
+
to calculate the length of the lookbehind. The \X and \R escapes, which can
|
1938
|
+
match different numbers of bytes, are also not permitted.
|
1939
|
+
</p>
|
1940
|
+
<p>
|
1941
|
+
Possessive quantifiers can be used in conjunction with lookbehind assertions to
|
1942
|
+
specify efficient matching at the end of the subject string. Consider a simple
|
1943
|
+
pattern such as
|
1944
|
+
</p>
|
1945
|
+
<pre class="programlisting">
|
1946
|
+
abcd$
|
1947
|
+
</pre>
|
1948
|
+
<p>
|
1949
|
+
when applied to a long string that does not match. Because matching
|
1950
|
+
proceeds from left to right, GRegex will look for each "a" in the string
|
1951
|
+
and then see if what follows matches the rest of the pattern. If the
|
1952
|
+
pattern is specified as
|
1953
|
+
</p>
|
1954
|
+
<pre class="programlisting">
|
1955
|
+
^.*abcd$
|
1956
|
+
</pre>
|
1957
|
+
<p>
|
1958
|
+
the initial .* matches the entire string at first, but when this fails
|
1959
|
+
(because there is no following "a"), it backtracks to match all but the
|
1960
|
+
last character, then all but the last two characters, and so on. Once
|
1961
|
+
again the search for "a" covers the entire string, from right to left,
|
1962
|
+
so we are no better off. However, if the pattern is written as
|
1963
|
+
</p>
|
1964
|
+
<pre class="programlisting">
|
1965
|
+
^.*+(?<=abcd)
|
1966
|
+
</pre>
|
1967
|
+
<p>
|
1968
|
+
there can be no backtracking for the .*+ item; it can match only the
|
1969
|
+
entire string. The subsequent lookbehind assertion does a single test
|
1970
|
+
on the last four characters. If it fails, the match fails immediately.
|
1971
|
+
For long strings, this approach makes a significant difference to the
|
1972
|
+
processing time.
|
1973
|
+
</p>
|
1974
|
+
</div>
|
1975
|
+
<hr>
|
1976
|
+
<div class="refsect2" title="Using multiple assertions">
|
1977
|
+
<a name="id552228"></a><h3>Using multiple assertions</h3>
|
1978
|
+
<p>
|
1979
|
+
Several assertions (of any sort) may occur in succession. For example,
|
1980
|
+
</p>
|
1981
|
+
<pre class="programlisting">
|
1982
|
+
(?<=\d{3})(?<!999)foo
|
1983
|
+
</pre>
|
1984
|
+
<p>
|
1985
|
+
matches "foo" preceded by three digits that are not "999". Notice that
|
1986
|
+
each of the assertions is applied independently at the same point in
|
1987
|
+
the string. First there is a check that the previous three
|
1988
|
+
characters are all digits, and then there is a check that the same
|
1989
|
+
three characters are not "999". This pattern does not match "foo" preceded
|
1990
|
+
by six characters, the first of which are digits and the last
|
1991
|
+
three of which are not "999". For example, it doesn’t match "123abcfoo".
|
1992
|
+
A pattern to do that is
|
1993
|
+
</p>
|
1994
|
+
<pre class="programlisting">
|
1995
|
+
(?<=\d{3}...)(?<!999)foo
|
1996
|
+
</pre>
|
1997
|
+
<p>
|
1998
|
+
This time the first assertion looks at the preceding six characters,
|
1999
|
+
checking that the first three are digits, and then the second assertion
|
2000
|
+
checks that the preceding three characters are not "999".
|
2001
|
+
</p>
|
2002
|
+
<p>
|
2003
|
+
Assertions can be nested in any combination. For example,
|
2004
|
+
</p>
|
2005
|
+
<pre class="programlisting">
|
2006
|
+
(?<=(?<!foo)bar)baz
|
2007
|
+
</pre>
|
2008
|
+
<p>
|
2009
|
+
matches an occurrence of "baz" that is preceded by "bar" which in turn
|
2010
|
+
is not preceded by "foo", while
|
2011
|
+
</p>
|
2012
|
+
<pre class="programlisting">
|
2013
|
+
(?<=\d{3}(?!999)...)foo
|
2014
|
+
</pre>
|
2015
|
+
<p>
|
2016
|
+
is another pattern that matches "foo" preceded by three digits and any
|
2017
|
+
three characters that are not "999".
|
2018
|
+
</p>
|
2019
|
+
</div>
|
2020
|
+
</div>
|
2021
|
+
<div class="refsect1" title="Conditional subpatterns">
|
2022
|
+
<a name="id552292"></a><h2>Conditional subpatterns</h2>
|
2023
|
+
<p>
|
2024
|
+
It is possible to cause the matching process to obey a subpattern
|
2025
|
+
conditionally or to choose between two alternative subpatterns, depending
|
2026
|
+
on the result of an assertion, or whether a previous capturing subpattern
|
2027
|
+
matched or not. The two possible forms of conditional subpattern are
|
2028
|
+
</p>
|
2029
|
+
<pre class="programlisting">
|
2030
|
+
(?(condition)yes-pattern)
|
2031
|
+
(?(condition)yes-pattern|no-pattern)
|
2032
|
+
</pre>
|
2033
|
+
<p>
|
2034
|
+
If the condition is satisfied, the yes-pattern is used; otherwise the
|
2035
|
+
no-pattern (if present) is used. If there are more than two alternatives
|
2036
|
+
in the subpattern, a compile-time error occurs.
|
2037
|
+
</p>
|
2038
|
+
<p>
|
2039
|
+
There are four kinds of condition: references to subpatterns, references to
|
2040
|
+
recursion, a pseudo-condition called DEFINE, and assertions.
|
2041
|
+
</p>
|
2042
|
+
<div class="refsect2" title="Checking for a used subpattern by number">
|
2043
|
+
<a name="id552320"></a><h3>Checking for a used subpattern by number</h3>
|
2044
|
+
<p>
|
2045
|
+
If the text between the parentheses consists of a sequence of digits, the
|
2046
|
+
condition is true if the capturing subpattern of that number has previously
|
2047
|
+
matched.
|
2048
|
+
</p>
|
2049
|
+
<p>
|
2050
|
+
Consider the following pattern, which contains non-significant white space
|
2051
|
+
to make it more readable (assume the <code class="varname">G_REGEX_EXTENDED</code>)
|
2052
|
+
and to divide it into three parts for ease of discussion:
|
2053
|
+
</p>
|
2054
|
+
<pre class="programlisting">
|
2055
|
+
( \( )? [^()]+ (?(1) \) )
|
2056
|
+
</pre>
|
2057
|
+
<p>
|
2058
|
+
The first part matches an optional opening parenthesis, and if that
|
2059
|
+
character is present, sets it as the first captured substring. The second
|
2060
|
+
part matches one or more characters that are not parentheses. The
|
2061
|
+
third part is a conditional subpattern that tests whether the first set
|
2062
|
+
of parentheses matched or not. If they did, that is, if string started
|
2063
|
+
with an opening parenthesis, the condition is true, and so the yes-pattern
|
2064
|
+
is executed and a closing parenthesis is required. Otherwise,
|
2065
|
+
since no-pattern is not present, the subpattern matches nothing. In
|
2066
|
+
other words, this pattern matches a sequence of non-parentheses,
|
2067
|
+
optionally enclosed in parentheses.
|
2068
|
+
</p>
|
2069
|
+
</div>
|
2070
|
+
<hr>
|
2071
|
+
<div class="refsect2" title="Checking for a used subpattern by name">
|
2072
|
+
<a name="id552356"></a><h3>Checking for a used subpattern by name</h3>
|
2073
|
+
<p>
|
2074
|
+
Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a used
|
2075
|
+
subpattern by name, the Python syntax (?(name)...) is also recognized. However,
|
2076
|
+
there is a possible ambiguity with this syntax, because subpattern names may
|
2077
|
+
consist entirely of digits. GRegex looks first for a named subpattern; if it
|
2078
|
+
cannot find one and the name consists entirely of digits, GRegex looks for a
|
2079
|
+
subpattern of that number, which must be greater than zero. Using subpattern
|
2080
|
+
names that consist entirely of digits is not recommended.
|
2081
|
+
</p>
|
2082
|
+
<p>
|
2083
|
+
Rewriting the above example to use a named subpattern gives this:
|
2084
|
+
</p>
|
2085
|
+
<pre class="programlisting">
|
2086
|
+
(?<OPEN> \( )? [^()]+ (?(<OPEN>) \) )
|
2087
|
+
</pre>
|
2088
|
+
</div>
|
2089
|
+
<hr>
|
2090
|
+
<div class="refsect2" title="Checking for pattern recursion">
|
2091
|
+
<a name="id552382"></a><h3>Checking for pattern recursion</h3>
|
2092
|
+
<p>
|
2093
|
+
If the condition is the string (R), and there is no subpattern with the name R,
|
2094
|
+
the condition is true if a recursive call to the whole pattern or any
|
2095
|
+
subpattern has been made. If digits or a name preceded by ampersand follow the
|
2096
|
+
letter R, for example:
|
2097
|
+
</p>
|
2098
|
+
<pre class="programlisting">
|
2099
|
+
(?(R3)...)
|
2100
|
+
(?(R&name)...)
|
2101
|
+
</pre>
|
2102
|
+
<p>
|
2103
|
+
the condition is true if the most recent recursion is into the subpattern whose
|
2104
|
+
number or name is given. This condition does not check the entire recursion
|
2105
|
+
stack.
|
2106
|
+
</p>
|
2107
|
+
<p>
|
2108
|
+
At "top level", all these recursion test conditions are false. Recursive
|
2109
|
+
patterns are described below.
|
2110
|
+
</p>
|
2111
|
+
</div>
|
2112
|
+
<hr>
|
2113
|
+
<div class="refsect2" title="Defining subpatterns for use by reference only">
|
2114
|
+
<a name="id552411"></a><h3>Defining subpatterns for use by reference only</h3>
|
2115
|
+
<p>
|
2116
|
+
If the condition is the string (DEFINE), and there is no subpattern with the
|
2117
|
+
name DEFINE, the condition is always false. In this case, there may be only one
|
2118
|
+
alternative in the subpattern. It is always skipped if control reaches this
|
2119
|
+
point in the pattern; the idea of DEFINE is that it can be used to define
|
2120
|
+
"subroutines" that can be referenced from elsewhere. (The use of "subroutines"
|
2121
|
+
is described below.) For example, a pattern to match an IPv4 address could be
|
2122
|
+
written like this (ignore whitespace and line breaks):
|
2123
|
+
</p>
|
2124
|
+
<pre class="programlisting">
|
2125
|
+
(?(DEFINE) (?<byte> 2[0-4]\d | 25[0-5] | 1\d\d | [1-9]?\d) )
|
2126
|
+
\b (?&byte) (\.(?&byte)){3} \b
|
2127
|
+
</pre>
|
2128
|
+
<p>
|
2129
|
+
The first part of the pattern is a DEFINE group inside which a another group
|
2130
|
+
named "byte" is defined. This matches an individual component of an IPv4
|
2131
|
+
address (a number less than 256). When matching takes place, this part of the
|
2132
|
+
pattern is skipped because DEFINE acts like a false condition.
|
2133
|
+
</p>
|
2134
|
+
<p>
|
2135
|
+
The rest of the pattern uses references to the named group to match the four
|
2136
|
+
dot-separated components of an IPv4 address, insisting on a word boundary at
|
2137
|
+
each end.
|
2138
|
+
</p>
|
2139
|
+
</div>
|
2140
|
+
<hr>
|
2141
|
+
<div class="refsect2" title="Assertion conditions">
|
2142
|
+
<a name="id552443"></a><h3>Assertion conditions</h3>
|
2143
|
+
<p>
|
2144
|
+
If the condition is not in any of the above formats, it must be an
|
2145
|
+
assertion. This may be a positive or negative lookahead or lookbehind
|
2146
|
+
assertion. Consider this pattern, again containing non-significant
|
2147
|
+
white space, and with the two alternatives on the second line:
|
2148
|
+
</p>
|
2149
|
+
<pre class="programlisting">
|
2150
|
+
(?(?=[^a-z]*[a-z])
|
2151
|
+
\d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} )
|
2152
|
+
</pre>
|
2153
|
+
<p>
|
2154
|
+
The condition is a positive lookahead assertion that matches an
|
2155
|
+
optional sequence of non-letters followed by a letter. In other words,
|
2156
|
+
it tests for the presence of at least one letter in the string. If a
|
2157
|
+
letter is found, the string is matched against the first alternative;
|
2158
|
+
otherwise it is matched against the second. This pattern matches
|
2159
|
+
strings in one of the two forms dd-aaa-dd or dd-dd-dd, where aaa are
|
2160
|
+
letters and dd are digits.
|
2161
|
+
</p>
|
2162
|
+
</div>
|
2163
|
+
</div>
|
2164
|
+
<div class="refsect1" title="Comments">
|
2165
|
+
<a name="id552472"></a><h2>Comments</h2>
|
2166
|
+
<p>
|
2167
|
+
The sequence (?# marks the start of a comment that continues up to the
|
2168
|
+
next closing parenthesis. Nested parentheses are not permitted. The
|
2169
|
+
characters that make up a comment play no part in the pattern matching
|
2170
|
+
at all.
|
2171
|
+
</p>
|
2172
|
+
<p>
|
2173
|
+
If the <code class="varname">G_REGEX_EXTENDED</code> option is set, an unescaped #
|
2174
|
+
character outside a character class introduces a comment that continues to
|
2175
|
+
immediately after the next newline in the pattern.
|
2176
|
+
</p>
|
2177
|
+
</div>
|
2178
|
+
<div class="refsect1" title="Recursive patterns">
|
2179
|
+
<a name="id552493"></a><h2>Recursive patterns</h2>
|
2180
|
+
<p>
|
2181
|
+
Consider the problem of matching a string in parentheses, allowing for
|
2182
|
+
unlimited nested parentheses. Without the use of recursion, the best
|
2183
|
+
that can be done is to use a pattern that matches up to some fixed
|
2184
|
+
depth of nesting. It is not possible to handle an arbitrary nesting
|
2185
|
+
depth.
|
2186
|
+
</p>
|
2187
|
+
<p>
|
2188
|
+
For some time, Perl has provided a facility that allows regular expressions to
|
2189
|
+
recurse (amongst other things). It does this by interpolating Perl code in the
|
2190
|
+
expression at run time, and the code can refer to the expression itself. A Perl
|
2191
|
+
pattern using code interpolation to solve the parentheses problem can be
|
2192
|
+
created like this:
|
2193
|
+
</p>
|
2194
|
+
<pre class="programlisting">
|
2195
|
+
$re = qr{\( (?: (?>[^()]+) | (?p{$re}) )* \)}x;
|
2196
|
+
</pre>
|
2197
|
+
<p>
|
2198
|
+
The (?p{...}) item interpolates Perl code at run time, and in this case refers
|
2199
|
+
recursively to the pattern in which it appears.
|
2200
|
+
</p>
|
2201
|
+
<p>
|
2202
|
+
Obviously, GRegex cannot support the interpolation of Perl code. Instead, it
|
2203
|
+
supports special syntax for recursion of the entire pattern, and also for
|
2204
|
+
individual subpattern recursion. This kind of recursion was introduced into
|
2205
|
+
Perl at release 5.10.
|
2206
|
+
</p>
|
2207
|
+
<p>
|
2208
|
+
A special item that consists of (? followed by a number greater than zero and a
|
2209
|
+
closing parenthesis is a recursive call of the subpattern of the given number,
|
2210
|
+
provided that it occurs inside that subpattern. (If not, it is a "subroutine"
|
2211
|
+
call, which is described in the next section.) The special item (?R) or (?0) is
|
2212
|
+
a recursive call of the entire regular expression.
|
2213
|
+
</p>
|
2214
|
+
<p>
|
2215
|
+
In GRegex (like Python, but unlike Perl), a recursive subpattern call is always
|
2216
|
+
treated as an atomic group. That is, once it has matched some of the subject
|
2217
|
+
string, it is never re-entered, even if it contains untried alternatives and
|
2218
|
+
there is a subsequent matching failure.
|
2219
|
+
</p>
|
2220
|
+
<p>
|
2221
|
+
This pattern solves the nested parentheses problem (assume the
|
2222
|
+
<code class="varname">G_REGEX_EXTENDED</code> option is set so that white space is
|
2223
|
+
ignored):
|
2224
|
+
</p>
|
2225
|
+
<pre class="programlisting">
|
2226
|
+
\( ( (?>[^()]+) | (?R) )* \)
|
2227
|
+
</pre>
|
2228
|
+
<p>
|
2229
|
+
First it matches an opening parenthesis. Then it matches any number of
|
2230
|
+
substrings which can either be a sequence of non-parentheses, or a
|
2231
|
+
recursive match of the pattern itself (that is, a correctly parenthesized
|
2232
|
+
substring). Finally there is a closing parenthesis.
|
2233
|
+
</p>
|
2234
|
+
<p>
|
2235
|
+
If this were part of a larger pattern, you would not want to recurse
|
2236
|
+
the entire pattern, so instead you could use this:
|
2237
|
+
</p>
|
2238
|
+
<pre class="programlisting">
|
2239
|
+
( \( ( (?>[^()]+) | (?1) )* \) )
|
2240
|
+
</pre>
|
2241
|
+
<p>
|
2242
|
+
We have put the pattern into parentheses, and caused the recursion to
|
2243
|
+
refer to them instead of the whole pattern. In a larger pattern, keeping
|
2244
|
+
track of parenthesis numbers can be tricky. It may be more convenient to
|
2245
|
+
use named parentheses instead.
|
2246
|
+
The Perl syntax for this is (?&name); GRegex also supports the(?P>name)
|
2247
|
+
syntac. We could rewrite the above example as follows:
|
2248
|
+
</p>
|
2249
|
+
<pre class="programlisting">
|
2250
|
+
(?<pn> \( ( (?>[^()]+) | (?&pn) )* \) )
|
2251
|
+
</pre>
|
2252
|
+
<p>
|
2253
|
+
If there is more than one subpattern with the same name, the earliest one is
|
2254
|
+
used. This particular example pattern contains nested unlimited repeats, and so
|
2255
|
+
the use of atomic grouping for matching strings of non-parentheses is important
|
2256
|
+
when applying the pattern to strings that do not match.
|
2257
|
+
For example, when this pattern is applied to
|
2258
|
+
</p>
|
2259
|
+
<pre class="programlisting">
|
2260
|
+
(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()
|
2261
|
+
</pre>
|
2262
|
+
<p>
|
2263
|
+
it yields "no match" quickly. However, if atomic grouping is not used,
|
2264
|
+
the match runs for a very long time indeed because there are so many
|
2265
|
+
different ways the + and * repeats can carve up the string, and all
|
2266
|
+
have to be tested before failure can be reported.
|
2267
|
+
</p>
|
2268
|
+
<p>
|
2269
|
+
At the end of a match, the values set for any capturing subpatterns are
|
2270
|
+
those from the outermost level of the recursion at which the subpattern
|
2271
|
+
value is set.
|
2272
|
+
|
2273
|
+
|
2274
|
+
|
2275
|
+
If the pattern above is matched against
|
2276
|
+
</p>
|
2277
|
+
<pre class="programlisting">
|
2278
|
+
(ab(cd)ef)
|
2279
|
+
</pre>
|
2280
|
+
<p>
|
2281
|
+
the value for the capturing parentheses is "ef", which is the last
|
2282
|
+
value taken on at the top level. If additional parentheses are added,
|
2283
|
+
giving
|
2284
|
+
</p>
|
2285
|
+
<pre class="programlisting">
|
2286
|
+
\( ( ( (?>[^()]+) | (?R) )* ) \)
|
2287
|
+
^ ^
|
2288
|
+
^ ^
|
2289
|
+
</pre>
|
2290
|
+
<p>
|
2291
|
+
the string they capture is "ab(cd)ef", the contents of the top level
|
2292
|
+
parentheses.
|
2293
|
+
</p>
|
2294
|
+
<p>
|
2295
|
+
Do not confuse the (?R) item with the condition (R), which tests for
|
2296
|
+
recursion. Consider this pattern, which matches text in angle brackets,
|
2297
|
+
allowing for arbitrary nesting. Only digits are allowed in nested
|
2298
|
+
brackets (that is, when recursing), whereas any characters are permitted
|
2299
|
+
at the outer level.
|
2300
|
+
</p>
|
2301
|
+
<pre class="programlisting">
|
2302
|
+
< (?: (?(R) \d++ | [^<>]*+) | (?R)) * >
|
2303
|
+
</pre>
|
2304
|
+
<p>
|
2305
|
+
In this pattern, (?(R) is the start of a conditional subpattern, with
|
2306
|
+
two different alternatives for the recursive and non-recursive cases.
|
2307
|
+
The (?R) item is the actual recursive call.
|
2308
|
+
</p>
|
2309
|
+
</div>
|
2310
|
+
<div class="refsect1" title="Subpatterns as subroutines">
|
2311
|
+
<a name="id563035"></a><h2>Subpatterns as subroutines</h2>
|
2312
|
+
<p>
|
2313
|
+
If the syntax for a recursive subpattern reference (either by number or
|
2314
|
+
by name) is used outside the parentheses to which it refers, it operates
|
2315
|
+
like a subroutine in a programming language. The "called" subpattern may
|
2316
|
+
be defined before or after the reference. An earlier example pointed out
|
2317
|
+
that the pattern
|
2318
|
+
</p>
|
2319
|
+
<pre class="programlisting">
|
2320
|
+
(sens|respons)e and \1ibility
|
2321
|
+
</pre>
|
2322
|
+
<p>
|
2323
|
+
matches "sense and sensibility" and "response and responsibility", but
|
2324
|
+
not "sense and responsibility". If instead the pattern
|
2325
|
+
</p>
|
2326
|
+
<pre class="programlisting">
|
2327
|
+
(sens|respons)e and (?1)ibility
|
2328
|
+
</pre>
|
2329
|
+
<p>
|
2330
|
+
is used, it does match "sense and responsibility" as well as the other
|
2331
|
+
two strings. Another example is given in the discussion of DEFINE above.
|
2332
|
+
</p>
|
2333
|
+
<p>
|
2334
|
+
Like recursive subpatterns, a "subroutine" call is always treated as an atomic
|
2335
|
+
group. That is, once it has matched some of the string, it is never
|
2336
|
+
re-entered, even if it contains untried alternatives and there is a subsequent
|
2337
|
+
matching failure.
|
2338
|
+
</p>
|
2339
|
+
<p>
|
2340
|
+
When a subpattern is used as a subroutine, processing options such as
|
2341
|
+
case-independence are fixed when the subpattern is defined. They cannot be
|
2342
|
+
changed for different calls. For example, consider this pattern:
|
2343
|
+
</p>
|
2344
|
+
<pre class="programlisting">
|
2345
|
+
(abc)(?i:(?1))
|
2346
|
+
</pre>
|
2347
|
+
<p>
|
2348
|
+
It matches "abcabc". It does not match "abcABC" because the change of
|
2349
|
+
processing option does not affect the called subpattern.
|
2350
|
+
</p>
|
2351
|
+
</div>
|
2352
|
+
<div class="refsect1" title="Copyright">
|
2353
|
+
<a name="id563103"></a><h2>Copyright</h2>
|
2354
|
+
<p>
|
2355
|
+
This document was copied and adapted from the PCRE documentation,
|
2356
|
+
specifically from the man page for pcrepattern.
|
2357
|
+
The original copyright note is:
|
2358
|
+
</p>
|
2359
|
+
<pre class="programlisting">
|
2360
|
+
Copyright (c) 1997-2006 University of Cambridge.
|
2361
|
+
|
2362
|
+
Redistribution and use in source and binary forms, with or without
|
2363
|
+
modification, are permitted provided that the following conditions are met:
|
2364
|
+
|
2365
|
+
* Redistributions of source code must retain the above copyright notice,
|
2366
|
+
this list of conditions and the following disclaimer.
|
2367
|
+
|
2368
|
+
* Redistributions in binary form must reproduce the above copyright
|
2369
|
+
notice, this list of conditions and the following disclaimer in the
|
2370
|
+
documentation and/or other materials provided with the distribution.
|
2371
|
+
|
2372
|
+
* Neither the name of the University of Cambridge nor the name of Google
|
2373
|
+
Inc. nor the names of their contributors may be used to endorse or
|
2374
|
+
promote products derived from this software without specific prior
|
2375
|
+
written permission.
|
2376
|
+
|
2377
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
2378
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
2379
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
2380
|
+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
2381
|
+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
2382
|
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
2383
|
+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
2384
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
2385
|
+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
2386
|
+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
2387
|
+
POSSIBILITY OF SUCH DAMAGE.
|
2388
|
+
</pre>
|
2389
|
+
</div>
|
2390
|
+
</div>
|
2391
|
+
<div class="footer">
|
2392
|
+
<hr>
|
2393
|
+
Generated by GTK-Doc V1.14</div>
|
2394
|
+
</body>
|
2395
|
+
</html>
|