pango 2.2.0-x86-mingw32 → 2.2.1-x86-mingw32
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +21 -2
- data/lib/2.0/pango.so +0 -0
- data/vendor/local/bin/derb.exe +0 -0
- data/vendor/local/bin/genbrk.exe +0 -0
- data/vendor/local/bin/genccode.exe +0 -0
- data/vendor/local/bin/gencfu.exe +0 -0
- data/vendor/local/bin/gencmn.exe +0 -0
- data/vendor/local/bin/gencnval.exe +0 -0
- data/vendor/local/bin/gendict.exe +0 -0
- data/vendor/local/bin/gennorm2.exe +0 -0
- data/vendor/local/bin/genrb.exe +0 -0
- data/vendor/local/bin/gensprep.exe +0 -0
- data/vendor/local/bin/hb-ot-shape-closure.exe +0 -0
- data/vendor/local/bin/hb-shape.exe +0 -0
- data/vendor/local/bin/hb-view.exe +0 -0
- data/vendor/local/bin/icu-config +820 -0
- data/vendor/local/bin/icuinfo.exe +0 -0
- data/vendor/local/bin/icupkg.exe +0 -0
- data/vendor/local/bin/libharfbuzz-0.dll +0 -0
- data/vendor/local/bin/libpango-1.0-0.dll +0 -0
- data/vendor/local/bin/libpangocairo-1.0-0.dll +0 -0
- data/vendor/local/bin/libpangoft2-1.0-0.dll +0 -0
- data/vendor/local/bin/libpangowin32-1.0-0.dll +0 -0
- data/vendor/local/bin/libstdc++-6.dll +0 -0
- data/vendor/local/bin/makeconv.exe +0 -0
- data/vendor/local/bin/pango-querymodules.exe +0 -0
- data/vendor/local/bin/pango-view.exe +0 -0
- data/vendor/local/bin/pkgdata.exe +0 -0
- data/vendor/local/bin/uconv.exe +0 -0
- data/vendor/local/etc/pango/pango.modules +3 -3
- data/vendor/local/include/harfbuzz/hb-blob.h +3 -4
- data/vendor/local/include/harfbuzz/hb-buffer.h +22 -1
- data/vendor/local/include/harfbuzz/hb-common.h +23 -18
- data/vendor/local/include/harfbuzz/hb-icu.h +52 -0
- data/vendor/local/include/harfbuzz/hb-ot-font.h +41 -0
- data/vendor/local/include/harfbuzz/hb-ot-layout.h +11 -3
- data/vendor/local/include/harfbuzz/hb-ot-shape.h +10 -11
- data/vendor/local/include/harfbuzz/hb-ot.h +1 -0
- data/vendor/local/include/harfbuzz/hb-version.h +7 -7
- data/vendor/local/include/layout/LEFontInstance.h +550 -0
- data/vendor/local/include/layout/LEGlyphFilter.h +45 -0
- data/vendor/local/include/layout/LEGlyphStorage.h +546 -0
- data/vendor/local/include/layout/LEInsertionList.h +177 -0
- data/vendor/local/include/layout/LELanguages.h +112 -0
- data/vendor/local/include/layout/LEScripts.h +263 -0
- data/vendor/local/include/layout/LESwaps.h +100 -0
- data/vendor/local/include/layout/LETableReference.h +418 -0
- data/vendor/local/include/layout/LETypes.h +728 -0
- data/vendor/local/include/layout/LayoutEngine.h +518 -0
- data/vendor/local/include/layout/ParagraphLayout.h +747 -0
- data/vendor/local/include/layout/RunArrays.h +676 -0
- data/vendor/local/include/layout/loengine.h +225 -0
- data/vendor/local/include/layout/playout.h +466 -0
- data/vendor/local/include/layout/plruns.h +441 -0
- data/vendor/local/include/pango-1.0/pango/pango-features.h +2 -2
- data/vendor/local/include/pango-1.0/pango/pango-font.h +3 -1
- data/vendor/local/include/pango-1.0/pango/pango-item.h +11 -2
- data/vendor/local/include/unicode/alphaindex.h +752 -0
- data/vendor/local/include/unicode/appendable.h +232 -0
- data/vendor/local/include/unicode/basictz.h +214 -0
- data/vendor/local/include/unicode/brkiter.h +655 -0
- data/vendor/local/include/unicode/bytestream.h +257 -0
- data/vendor/local/include/unicode/bytestrie.h +519 -0
- data/vendor/local/include/unicode/bytestriebuilder.h +181 -0
- data/vendor/local/include/unicode/calendar.h +2519 -0
- data/vendor/local/include/unicode/caniter.h +208 -0
- data/vendor/local/include/unicode/chariter.h +722 -0
- data/vendor/local/include/unicode/choicfmt.h +594 -0
- data/vendor/local/include/unicode/coleitr.h +404 -0
- data/vendor/local/include/unicode/coll.h +1267 -0
- data/vendor/local/include/unicode/compactdecimalformat.h +330 -0
- data/vendor/local/include/unicode/curramt.h +130 -0
- data/vendor/local/include/unicode/currpinf.h +258 -0
- data/vendor/local/include/unicode/currunit.h +110 -0
- data/vendor/local/include/unicode/datefmt.h +883 -0
- data/vendor/local/include/unicode/dbbi.h +40 -0
- data/vendor/local/include/unicode/dcfmtsym.h +482 -0
- data/vendor/local/include/unicode/decimfmt.h +2479 -0
- data/vendor/local/include/unicode/docmain.h +215 -0
- data/vendor/local/include/unicode/dtfmtsym.h +912 -0
- data/vendor/local/include/unicode/dtintrv.h +158 -0
- data/vendor/local/include/unicode/dtitvfmt.h +985 -0
- data/vendor/local/include/unicode/dtitvinf.h +514 -0
- data/vendor/local/include/unicode/dtptngen.h +498 -0
- data/vendor/local/include/unicode/dtrule.h +250 -0
- data/vendor/local/include/unicode/enumset.h +64 -0
- data/vendor/local/include/unicode/errorcode.h +137 -0
- data/vendor/local/include/unicode/fieldpos.h +291 -0
- data/vendor/local/include/unicode/filteredbrk.h +131 -0
- data/vendor/local/include/unicode/fmtable.h +760 -0
- data/vendor/local/include/unicode/format.h +305 -0
- data/vendor/local/include/unicode/fpositer.h +117 -0
- data/vendor/local/include/unicode/gender.h +111 -0
- data/vendor/local/include/unicode/gregocal.h +777 -0
- data/vendor/local/include/unicode/icudataver.h +41 -0
- data/vendor/local/include/unicode/icuplug.h +371 -0
- data/vendor/local/include/unicode/idna.h +323 -0
- data/vendor/local/include/unicode/listformatter.h +167 -0
- data/vendor/local/include/unicode/localpointer.h +304 -0
- data/vendor/local/include/unicode/locdspnm.h +204 -0
- data/vendor/local/include/unicode/locid.h +815 -0
- data/vendor/local/include/unicode/measfmt.h +389 -0
- data/vendor/local/include/unicode/measunit.h +1443 -0
- data/vendor/local/include/unicode/measure.h +159 -0
- data/vendor/local/include/unicode/messagepattern.h +943 -0
- data/vendor/local/include/unicode/msgfmt.h +1093 -0
- data/vendor/local/include/unicode/normalizer2.h +658 -0
- data/vendor/local/include/unicode/normlzr.h +797 -0
- data/vendor/local/include/unicode/numfmt.h +1187 -0
- data/vendor/local/include/unicode/numsys.h +208 -0
- data/vendor/local/include/unicode/parseerr.h +92 -0
- data/vendor/local/include/unicode/parsepos.h +230 -0
- data/vendor/local/include/unicode/platform.h +751 -0
- data/vendor/local/include/unicode/plurfmt.h +615 -0
- data/vendor/local/include/unicode/plurrule.h +501 -0
- data/vendor/local/include/unicode/ptypes.h +126 -0
- data/vendor/local/include/unicode/putil.h +181 -0
- data/vendor/local/include/unicode/rbbi.h +782 -0
- data/vendor/local/include/unicode/rbnf.h +1032 -0
- data/vendor/local/include/unicode/rbtz.h +362 -0
- data/vendor/local/include/unicode/regex.h +1857 -0
- data/vendor/local/include/unicode/region.h +228 -0
- data/vendor/local/include/unicode/reldatefmt.h +498 -0
- data/vendor/local/include/unicode/rep.h +261 -0
- data/vendor/local/include/unicode/resbund.h +490 -0
- data/vendor/local/include/unicode/schriter.h +187 -0
- data/vendor/local/include/unicode/scientificformathelper.h +139 -0
- data/vendor/local/include/unicode/search.h +575 -0
- data/vendor/local/include/unicode/selfmt.h +367 -0
- data/vendor/local/include/unicode/simpletz.h +928 -0
- data/vendor/local/include/unicode/smpdtfmt.h +1592 -0
- data/vendor/local/include/unicode/sortkey.h +338 -0
- data/vendor/local/include/unicode/std_string.h +37 -0
- data/vendor/local/include/unicode/strenum.h +276 -0
- data/vendor/local/include/unicode/stringpiece.h +224 -0
- data/vendor/local/include/unicode/stringtriebuilder.h +402 -0
- data/vendor/local/include/unicode/stsearch.h +504 -0
- data/vendor/local/include/unicode/symtable.h +112 -0
- data/vendor/local/include/unicode/tblcoll.h +873 -0
- data/vendor/local/include/unicode/timezone.h +948 -0
- data/vendor/local/include/unicode/tmunit.h +129 -0
- data/vendor/local/include/unicode/tmutamt.h +168 -0
- data/vendor/local/include/unicode/tmutfmt.h +243 -0
- data/vendor/local/include/unicode/translit.h +1342 -0
- data/vendor/local/include/unicode/tzfmt.h +1098 -0
- data/vendor/local/include/unicode/tznames.h +404 -0
- data/vendor/local/include/unicode/tzrule.h +828 -0
- data/vendor/local/include/unicode/tztrans.h +195 -0
- data/vendor/local/include/unicode/ubidi.h +2186 -0
- data/vendor/local/include/unicode/ubrk.h +540 -0
- data/vendor/local/include/unicode/ucal.h +1560 -0
- data/vendor/local/include/unicode/ucasemap.h +423 -0
- data/vendor/local/include/unicode/ucat.h +158 -0
- data/vendor/local/include/unicode/uchar.h +3426 -0
- data/vendor/local/include/unicode/ucharstrie.h +576 -0
- data/vendor/local/include/unicode/ucharstriebuilder.h +185 -0
- data/vendor/local/include/unicode/uchriter.h +381 -0
- data/vendor/local/include/unicode/uclean.h +258 -0
- data/vendor/local/include/unicode/ucnv.h +2036 -0
- data/vendor/local/include/unicode/ucnv_cb.h +162 -0
- data/vendor/local/include/unicode/ucnv_err.h +463 -0
- data/vendor/local/include/unicode/ucnvsel.h +187 -0
- data/vendor/local/include/unicode/ucol.h +1474 -0
- data/vendor/local/include/unicode/ucoleitr.h +266 -0
- data/vendor/local/include/unicode/uconfig.h +430 -0
- data/vendor/local/include/unicode/ucsdet.h +413 -0
- data/vendor/local/include/unicode/ucurr.h +424 -0
- data/vendor/local/include/unicode/udat.h +1536 -0
- data/vendor/local/include/unicode/udata.h +430 -0
- data/vendor/local/include/unicode/udateintervalformat.h +181 -0
- data/vendor/local/include/unicode/udatpg.h +588 -0
- data/vendor/local/include/unicode/udisplaycontext.h +150 -0
- data/vendor/local/include/unicode/uenum.h +206 -0
- data/vendor/local/include/unicode/uformattable.h +280 -0
- data/vendor/local/include/unicode/ugender.h +82 -0
- data/vendor/local/include/unicode/uidna.h +762 -0
- data/vendor/local/include/unicode/uiter.h +707 -0
- data/vendor/local/include/unicode/uldnames.h +302 -0
- data/vendor/local/include/unicode/uloc.h +1256 -0
- data/vendor/local/include/unicode/ulocdata.h +277 -0
- data/vendor/local/include/unicode/umachine.h +356 -0
- data/vendor/local/include/unicode/umisc.h +60 -0
- data/vendor/local/include/unicode/umsg.h +623 -0
- data/vendor/local/include/unicode/unifilt.h +120 -0
- data/vendor/local/include/unicode/unifunct.h +125 -0
- data/vendor/local/include/unicode/unimatch.h +163 -0
- data/vendor/local/include/unicode/unirepl.h +97 -0
- data/vendor/local/include/unicode/uniset.h +1691 -0
- data/vendor/local/include/unicode/unistr.h +4470 -0
- data/vendor/local/include/unicode/unorm.h +561 -0
- data/vendor/local/include/unicode/unorm2.h +528 -0
- data/vendor/local/include/unicode/unum.h +1328 -0
- data/vendor/local/include/unicode/unumsys.h +170 -0
- data/vendor/local/include/unicode/uobject.h +320 -0
- data/vendor/local/include/unicode/upluralrules.h +145 -0
- data/vendor/local/include/unicode/uregex.h +1591 -0
- data/vendor/local/include/unicode/uregion.h +248 -0
- data/vendor/local/include/unicode/urename.h +1784 -0
- data/vendor/local/include/unicode/urep.h +155 -0
- data/vendor/local/include/unicode/ures.h +887 -0
- data/vendor/local/include/unicode/uscript.h +642 -0
- data/vendor/local/include/unicode/usearch.h +885 -0
- data/vendor/local/include/unicode/uset.h +1126 -0
- data/vendor/local/include/unicode/usetiter.h +318 -0
- data/vendor/local/include/unicode/ushape.h +474 -0
- data/vendor/local/include/unicode/uspoof.h +1064 -0
- data/vendor/local/include/unicode/usprep.h +269 -0
- data/vendor/local/include/unicode/ustdio.h +1018 -0
- data/vendor/local/include/unicode/ustream.h +68 -0
- data/vendor/local/include/unicode/ustring.h +1700 -0
- data/vendor/local/include/unicode/ustringtrie.h +95 -0
- data/vendor/local/include/unicode/utext.h +1600 -0
- data/vendor/local/include/unicode/utf.h +223 -0
- data/vendor/local/include/unicode/utf16.h +623 -0
- data/vendor/local/include/unicode/utf32.h +23 -0
- data/vendor/local/include/unicode/utf8.h +824 -0
- data/vendor/local/include/unicode/utf_old.h +1169 -0
- data/vendor/local/include/unicode/utmscale.h +481 -0
- data/vendor/local/include/unicode/utrace.h +359 -0
- data/vendor/local/include/unicode/utrans.h +658 -0
- data/vendor/local/include/unicode/utypes.h +723 -0
- data/vendor/local/include/unicode/uvernum.h +170 -0
- data/vendor/local/include/unicode/uversion.h +193 -0
- data/vendor/local/include/unicode/vtzone.h +455 -0
- data/vendor/local/lib/girepository-1.0/Pango-1.0.typelib +0 -0
- data/vendor/local/lib/girepository-1.0/PangoCairo-1.0.typelib +0 -0
- data/vendor/local/lib/girepository-1.0/PangoFT2-1.0.typelib +0 -0
- data/vendor/local/lib/icu/54.1/Makefile.inc +293 -0
- data/vendor/local/lib/icu/54.1/pkgdata.inc +17 -0
- data/vendor/local/lib/icu/Makefile.inc +293 -0
- data/vendor/local/lib/icu/pkgdata.inc +17 -0
- data/vendor/local/lib/icudt.dll +0 -0
- data/vendor/local/lib/icudt54.dll +0 -0
- data/vendor/local/lib/icuin.dll +0 -0
- data/vendor/local/lib/icuin54.dll +0 -0
- data/vendor/local/lib/icuio.dll +0 -0
- data/vendor/local/lib/icuio54.dll +0 -0
- data/vendor/local/lib/icule.dll +0 -0
- data/vendor/local/lib/icule54.dll +0 -0
- data/vendor/local/lib/iculx.dll +0 -0
- data/vendor/local/lib/iculx54.dll +0 -0
- data/vendor/local/lib/icutest.dll +0 -0
- data/vendor/local/lib/icutest54.dll +0 -0
- data/vendor/local/lib/icutu.dll +0 -0
- data/vendor/local/lib/icutu54.dll +0 -0
- data/vendor/local/lib/icuuc.dll +0 -0
- data/vendor/local/lib/icuuc54.dll +0 -0
- data/vendor/local/lib/libharfbuzz-icu.a +0 -0
- data/vendor/local/lib/libharfbuzz-icu.la +41 -0
- data/vendor/local/lib/libharfbuzz.dll.a +0 -0
- data/vendor/local/lib/libharfbuzz.la +3 -3
- data/vendor/local/lib/libicudt.dll.a +0 -0
- data/vendor/local/lib/libicuin.dll.a +0 -0
- data/vendor/local/lib/libicuio.dll.a +0 -0
- data/vendor/local/lib/libicule.dll.a +0 -0
- data/vendor/local/lib/libiculx.dll.a +0 -0
- data/vendor/local/lib/libicutest.dll.a +0 -0
- data/vendor/local/lib/libicutu.dll.a +0 -0
- data/vendor/local/lib/libicuuc.dll.a +0 -0
- data/vendor/local/lib/libpango-1.0.dll.a +0 -0
- data/vendor/local/lib/libpango-1.0.la +2 -2
- data/vendor/local/lib/libpangocairo-1.0.dll.a +0 -0
- data/vendor/local/lib/libpangocairo-1.0.la +2 -2
- data/vendor/local/lib/libpangoft2-1.0.dll.a +0 -0
- data/vendor/local/lib/libpangoft2-1.0.la +2 -2
- data/vendor/local/lib/libpangowin32-1.0.dll.a +0 -0
- data/vendor/local/lib/libpangowin32-1.0.la +2 -2
- data/vendor/local/lib/pango/1.8.0/modules/pango-arabic-lang.dll +0 -0
- data/vendor/local/lib/pango/1.8.0/modules/pango-arabic-lang.dll.a +0 -0
- data/vendor/local/lib/pango/1.8.0/modules/pango-arabic-lang.la +1 -1
- data/vendor/local/lib/pango/1.8.0/modules/pango-basic-fc.dll +0 -0
- data/vendor/local/lib/pango/1.8.0/modules/pango-basic-fc.dll.a +0 -0
- data/vendor/local/lib/pango/1.8.0/modules/pango-basic-fc.la +1 -1
- data/vendor/local/lib/pango/1.8.0/modules/pango-basic-win32.dll +0 -0
- data/vendor/local/lib/pango/1.8.0/modules/pango-basic-win32.dll.a +0 -0
- data/vendor/local/lib/pango/1.8.0/modules/pango-basic-win32.la +1 -1
- data/vendor/local/lib/pango/1.8.0/modules/pango-indic-lang.dll +0 -0
- data/vendor/local/lib/pango/1.8.0/modules/pango-indic-lang.dll.a +0 -0
- data/vendor/local/lib/pango/1.8.0/modules/pango-indic-lang.la +1 -1
- data/vendor/local/lib/pkgconfig/harfbuzz-icu.pc +13 -0
- data/vendor/local/lib/pkgconfig/harfbuzz.pc +1 -1
- data/vendor/local/lib/pkgconfig/icu-i18n.pc +38 -0
- data/vendor/local/lib/pkgconfig/icu-io.pc +38 -0
- data/vendor/local/lib/pkgconfig/icu-le.pc +38 -0
- data/vendor/local/lib/pkgconfig/icu-lx.pc +38 -0
- data/vendor/local/lib/pkgconfig/icu-uc.pc +38 -0
- data/vendor/local/lib/pkgconfig/pango.pc +1 -1
- data/vendor/local/lib/pkgconfig/pangocairo.pc +1 -1
- data/vendor/local/lib/pkgconfig/pangoft2.pc +1 -1
- data/vendor/local/lib/pkgconfig/pangowin32.pc +1 -1
- data/vendor/local/share/gir-1.0/Pango-1.0.gir +3009 -462
- data/vendor/local/share/gir-1.0/PangoCairo-1.0.gir +64 -22
- data/vendor/local/share/gir-1.0/PangoFT2-1.0.gir +4 -2
- data/vendor/local/share/gtk-doc/html/harfbuzz/annotation-glossary.html +30 -26
- data/vendor/local/share/gtk-doc/html/harfbuzz/api-index-full.html +343 -313
- data/vendor/local/share/gtk-doc/html/harfbuzz/ch01.html +5 -5
- data/vendor/local/share/gtk-doc/html/harfbuzz/deprecated-api-index.html +6 -8
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-blob.html +442 -247
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-buffer.html +1389 -654
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-common.html +954 -698
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-coretext.html +95 -18
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-deprecated.html +36 -27
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-face.html +477 -191
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-font.html +1951 -1039
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-ft.html +123 -65
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-glib.html +66 -21
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-gobject.html +11 -13
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-graphite2.html +67 -23
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-icu.html +66 -21
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-ot-layout.html +574 -347
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-ot-tag.html +104 -37
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-ot.html +10 -27
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-set.html +744 -318
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-shape-plan.html +294 -153
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-shape.html +196 -104
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-unicode.html +1100 -757
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-uniscribe.html +51 -18
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-version.html +110 -84
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb.html +11 -13
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz.devhelp2 +289 -503
- data/vendor/local/share/gtk-doc/html/harfbuzz/index.html +3 -3
- data/vendor/local/share/gtk-doc/html/harfbuzz/index.sgml +161 -316
- data/vendor/local/share/gtk-doc/html/harfbuzz/object-tree.html +31 -4
- data/vendor/local/share/gtk-doc/html/harfbuzz/style.css +260 -105
- data/vendor/local/share/gtk-doc/html/pango/PangoEngineLang.html +143 -105
- data/vendor/local/share/gtk-doc/html/pango/PangoEngineShape.html +150 -108
- data/vendor/local/share/gtk-doc/html/pango/PangoFcDecoder.html +163 -112
- data/vendor/local/share/gtk-doc/html/pango/PangoFcFont.html +348 -229
- data/vendor/local/share/gtk-doc/html/pango/PangoFcFontMap.html +746 -514
- data/vendor/local/share/gtk-doc/html/pango/PangoMarkupFormat.html +9 -9
- data/vendor/local/share/gtk-doc/html/pango/PangoRenderer.html +853 -623
- data/vendor/local/share/gtk-doc/html/pango/annotation-glossary.html +29 -13
- data/vendor/local/share/gtk-doc/html/pango/api-index-1-10.html +31 -33
- data/vendor/local/share/gtk-doc/html/pango/api-index-1-12.html +10 -12
- data/vendor/local/share/gtk-doc/html/pango/api-index-1-14.html +12 -14
- data/vendor/local/share/gtk-doc/html/pango/api-index-1-16.html +53 -55
- data/vendor/local/share/gtk-doc/html/pango/api-index-1-18.html +28 -30
- data/vendor/local/share/gtk-doc/html/pango/api-index-1-2.html +29 -31
- data/vendor/local/share/gtk-doc/html/pango/api-index-1-20.html +17 -19
- data/vendor/local/share/gtk-doc/html/pango/api-index-1-22.html +28 -30
- data/vendor/local/share/gtk-doc/html/pango/api-index-1-24.html +17 -19
- data/vendor/local/share/gtk-doc/html/pango/api-index-1-26.html +9 -11
- data/vendor/local/share/gtk-doc/html/pango/api-index-1-30.html +6 -6
- data/vendor/local/share/gtk-doc/html/pango/api-index-1-31-0.html +6 -6
- data/vendor/local/share/gtk-doc/html/pango/api-index-1-32-4.html +11 -13
- data/vendor/local/share/gtk-doc/html/pango/api-index-1-32.html +7 -9
- data/vendor/local/share/gtk-doc/html/pango/api-index-1-34.html +5 -5
- data/vendor/local/share/gtk-doc/html/pango/api-index-1-4.html +49 -51
- data/vendor/local/share/gtk-doc/html/pango/api-index-1-6.html +36 -38
- data/vendor/local/share/gtk-doc/html/pango/api-index-1-8.html +37 -39
- data/vendor/local/share/gtk-doc/html/pango/api-index-deprecated.html +68 -70
- data/vendor/local/share/gtk-doc/html/pango/api-index-full.html +568 -570
- data/vendor/local/share/gtk-doc/html/pango/index.html +3 -3
- data/vendor/local/share/gtk-doc/html/pango/index.sgml +229 -399
- data/vendor/local/share/gtk-doc/html/pango/lowlevel.html +4 -4
- data/vendor/local/share/gtk-doc/html/pango/pango-Bidirectional-Text.html +345 -259
- data/vendor/local/share/gtk-doc/html/pango/pango-Cairo-Rendering.html +979 -664
- data/vendor/local/share/gtk-doc/html/pango/pango-CoreText-Fonts.html +70 -43
- data/vendor/local/share/gtk-doc/html/pango/pango-Coverage-Maps.html +349 -229
- data/vendor/local/share/gtk-doc/html/pango/pango-Engines.html +236 -153
- data/vendor/local/share/gtk-doc/html/pango/pango-Fonts.html +3100 -2159
- data/vendor/local/share/gtk-doc/html/pango/pango-FreeType-Fonts-and-Rendering.html +659 -472
- data/vendor/local/share/gtk-doc/html/pango/pango-Glyph-Storage.html +1952 -1384
- data/vendor/local/share/gtk-doc/html/pango/pango-Layout-Objects.html +3050 -2135
- data/vendor/local/share/gtk-doc/html/pango/pango-Miscellaneous-Utilities.html +676 -463
- data/vendor/local/share/gtk-doc/html/pango/pango-Modules.html +223 -146
- data/vendor/local/share/gtk-doc/html/pango/pango-OpenType-Font-Handling.html +1589 -1199
- data/vendor/local/share/gtk-doc/html/pango/pango-Scripts-and-Languages.html +1072 -842
- data/vendor/local/share/gtk-doc/html/pango/pango-Tab-Stops.html +406 -276
- data/vendor/local/share/gtk-doc/html/pango/pango-Text-Attributes.html +2541 -1792
- data/vendor/local/share/gtk-doc/html/pango/pango-Text-Processing.html +1479 -1033
- data/vendor/local/share/gtk-doc/html/pango/pango-Version-Checking.html +214 -154
- data/vendor/local/share/gtk-doc/html/pango/pango-Vertical-Text.html +374 -291
- data/vendor/local/share/gtk-doc/html/pango/pango-Win32-Fonts-and-Rendering.html +733 -498
- data/vendor/local/share/gtk-doc/html/pango/pango-Xft-Fonts-and-Rendering.html +893 -617
- data/vendor/local/share/gtk-doc/html/pango/pango-hierarchy.html +59 -59
- data/vendor/local/share/gtk-doc/html/pango/pango-querymodules.html +5 -5
- data/vendor/local/share/gtk-doc/html/pango/pango.devhelp2 +578 -782
- data/vendor/local/share/gtk-doc/html/pango/pango.html +4 -4
- data/vendor/local/share/gtk-doc/html/pango/rendering.html +4 -4
- data/vendor/local/share/gtk-doc/html/pango/style.css +260 -105
- data/vendor/local/share/gtk-doc/html/pango/tools.html +4 -4
- data/vendor/local/share/icu/54.1/config/mh-mingw +158 -0
- data/vendor/local/share/icu/54.1/install-sh +251 -0
- data/vendor/local/share/icu/54.1/license.html +385 -0
- data/vendor/local/share/icu/54.1/mkinstalldirs +43 -0
- data/vendor/local/share/license/harfbuzz/AUTHORS +1 -0
- data/vendor/local/share/man/man1/derb.1 +196 -0
- data/vendor/local/share/man/man1/genbrk.1 +112 -0
- data/vendor/local/share/man/man1/gencfu.1 +91 -0
- data/vendor/local/share/man/man1/gencnval.1 +91 -0
- data/vendor/local/share/man/man1/gendict.1 +131 -0
- data/vendor/local/share/man/man1/genrb.1 +146 -0
- data/vendor/local/share/man/man1/icu-config.1 +288 -0
- data/vendor/local/share/man/man1/makeconv.1 +112 -0
- data/vendor/local/share/man/man1/pango-querymodules.1 +2 -2
- data/vendor/local/share/man/man1/pango-view.1 +1 -118
- data/vendor/local/share/man/man1/pkgdata.1 +258 -0
- data/vendor/local/share/man/man1/uconv.1 +443 -0
- data/vendor/local/share/man/man8/genccode.8 +106 -0
- data/vendor/local/share/man/man8/gencmn.8 +129 -0
- data/vendor/local/share/man/man8/gensprep.8 +102 -0
- data/vendor/local/share/man/man8/icupkg.8 +204 -0
- metadata +266 -14
- data/lib/1.9/pango.so +0 -0
- data/lib/2.1/pango.so +0 -0
@@ -0,0 +1,1691 @@
|
|
1
|
+
/*
|
2
|
+
***************************************************************************
|
3
|
+
* Copyright (C) 1999-2014, International Business Machines Corporation
|
4
|
+
* and others. All Rights Reserved.
|
5
|
+
***************************************************************************
|
6
|
+
* Date Name Description
|
7
|
+
* 10/20/99 alan Creation.
|
8
|
+
***************************************************************************
|
9
|
+
*/
|
10
|
+
|
11
|
+
#ifndef UNICODESET_H
|
12
|
+
#define UNICODESET_H
|
13
|
+
|
14
|
+
#include "unicode/unifilt.h"
|
15
|
+
#include "unicode/unistr.h"
|
16
|
+
#include "unicode/uset.h"
|
17
|
+
|
18
|
+
/**
|
19
|
+
* \file
|
20
|
+
* \brief C++ API: Unicode Set
|
21
|
+
*/
|
22
|
+
|
23
|
+
U_NAMESPACE_BEGIN
|
24
|
+
|
25
|
+
// Forward Declarations.
|
26
|
+
void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status); /**< @internal */
|
27
|
+
|
28
|
+
class BMPSet;
|
29
|
+
class ParsePosition;
|
30
|
+
class RBBIRuleScanner;
|
31
|
+
class SymbolTable;
|
32
|
+
class UnicodeSetStringSpan;
|
33
|
+
class UVector;
|
34
|
+
class RuleCharacterIterator;
|
35
|
+
|
36
|
+
/**
|
37
|
+
* A mutable set of Unicode characters and multicharacter strings. Objects of this class
|
38
|
+
* represent <em>character classes</em> used in regular expressions.
|
39
|
+
* A character specifies a subset of Unicode code points. Legal
|
40
|
+
* code points are U+0000 to U+10FFFF, inclusive.
|
41
|
+
*
|
42
|
+
* <p>The UnicodeSet class is not designed to be subclassed.
|
43
|
+
*
|
44
|
+
* <p><code>UnicodeSet</code> supports two APIs. The first is the
|
45
|
+
* <em>operand</em> API that allows the caller to modify the value of
|
46
|
+
* a <code>UnicodeSet</code> object. It conforms to Java 2's
|
47
|
+
* <code>java.util.Set</code> interface, although
|
48
|
+
* <code>UnicodeSet</code> does not actually implement that
|
49
|
+
* interface. All methods of <code>Set</code> are supported, with the
|
50
|
+
* modification that they take a character range or single character
|
51
|
+
* instead of an <code>Object</code>, and they take a
|
52
|
+
* <code>UnicodeSet</code> instead of a <code>Collection</code>. The
|
53
|
+
* operand API may be thought of in terms of boolean logic: a boolean
|
54
|
+
* OR is implemented by <code>add</code>, a boolean AND is implemented
|
55
|
+
* by <code>retain</code>, a boolean XOR is implemented by
|
56
|
+
* <code>complement</code> taking an argument, and a boolean NOT is
|
57
|
+
* implemented by <code>complement</code> with no argument. In terms
|
58
|
+
* of traditional set theory function names, <code>add</code> is a
|
59
|
+
* union, <code>retain</code> is an intersection, <code>remove</code>
|
60
|
+
* is an asymmetric difference, and <code>complement</code> with no
|
61
|
+
* argument is a set complement with respect to the superset range
|
62
|
+
* <code>MIN_VALUE-MAX_VALUE</code>
|
63
|
+
*
|
64
|
+
* <p>The second API is the
|
65
|
+
* <code>applyPattern()</code>/<code>toPattern()</code> API from the
|
66
|
+
* <code>java.text.Format</code>-derived classes. Unlike the
|
67
|
+
* methods that add characters, add categories, and control the logic
|
68
|
+
* of the set, the method <code>applyPattern()</code> sets all
|
69
|
+
* attributes of a <code>UnicodeSet</code> at once, based on a
|
70
|
+
* string pattern.
|
71
|
+
*
|
72
|
+
* <p><b>Pattern syntax</b></p>
|
73
|
+
*
|
74
|
+
* Patterns are accepted by the constructors and the
|
75
|
+
* <code>applyPattern()</code> methods and returned by the
|
76
|
+
* <code>toPattern()</code> method. These patterns follow a syntax
|
77
|
+
* similar to that employed by version 8 regular expression character
|
78
|
+
* classes. Here are some simple examples:
|
79
|
+
*
|
80
|
+
* \htmlonly<blockquote>\endhtmlonly
|
81
|
+
* <table>
|
82
|
+
* <tr align="top">
|
83
|
+
* <td nowrap valign="top" align="left"><code>[]</code></td>
|
84
|
+
* <td valign="top">No characters</td>
|
85
|
+
* </tr><tr align="top">
|
86
|
+
* <td nowrap valign="top" align="left"><code>[a]</code></td>
|
87
|
+
* <td valign="top">The character 'a'</td>
|
88
|
+
* </tr><tr align="top">
|
89
|
+
* <td nowrap valign="top" align="left"><code>[ae]</code></td>
|
90
|
+
* <td valign="top">The characters 'a' and 'e'</td>
|
91
|
+
* </tr>
|
92
|
+
* <tr>
|
93
|
+
* <td nowrap valign="top" align="left"><code>[a-e]</code></td>
|
94
|
+
* <td valign="top">The characters 'a' through 'e' inclusive, in Unicode code
|
95
|
+
* point order</td>
|
96
|
+
* </tr>
|
97
|
+
* <tr>
|
98
|
+
* <td nowrap valign="top" align="left"><code>[\\u4E01]</code></td>
|
99
|
+
* <td valign="top">The character U+4E01</td>
|
100
|
+
* </tr>
|
101
|
+
* <tr>
|
102
|
+
* <td nowrap valign="top" align="left"><code>[a{ab}{ac}]</code></td>
|
103
|
+
* <td valign="top">The character 'a' and the multicharacter strings "ab" and
|
104
|
+
* "ac"</td>
|
105
|
+
* </tr>
|
106
|
+
* <tr>
|
107
|
+
* <td nowrap valign="top" align="left"><code>[\\p{Lu}]</code></td>
|
108
|
+
* <td valign="top">All characters in the general category Uppercase Letter</td>
|
109
|
+
* </tr>
|
110
|
+
* </table>
|
111
|
+
* \htmlonly</blockquote>\endhtmlonly
|
112
|
+
*
|
113
|
+
* Any character may be preceded by a backslash in order to remove any special
|
114
|
+
* meaning. White space characters, as defined by UCharacter.isWhitespace(), are
|
115
|
+
* ignored, unless they are escaped.
|
116
|
+
*
|
117
|
+
* <p>Property patterns specify a set of characters having a certain
|
118
|
+
* property as defined by the Unicode standard. Both the POSIX-like
|
119
|
+
* "[:Lu:]" and the Perl-like syntax "\\p{Lu}" are recognized. For a
|
120
|
+
* complete list of supported property patterns, see the User's Guide
|
121
|
+
* for UnicodeSet at
|
122
|
+
* <a href="http://icu-project.org/userguide/unicodeSet.html">
|
123
|
+
* http://icu-project.org/userguide/unicodeSet.html</a>.
|
124
|
+
* Actual determination of property data is defined by the underlying
|
125
|
+
* Unicode database as implemented by UCharacter.
|
126
|
+
*
|
127
|
+
* <p>Patterns specify individual characters, ranges of characters, and
|
128
|
+
* Unicode property sets. When elements are concatenated, they
|
129
|
+
* specify their union. To complement a set, place a '^' immediately
|
130
|
+
* after the opening '['. Property patterns are inverted by modifying
|
131
|
+
* their delimiters; "[:^foo]" and "\\P{foo}". In any other location,
|
132
|
+
* '^' has no special meaning.
|
133
|
+
*
|
134
|
+
* <p>Ranges are indicated by placing two a '-' between two
|
135
|
+
* characters, as in "a-z". This specifies the range of all
|
136
|
+
* characters from the left to the right, in Unicode order. If the
|
137
|
+
* left character is greater than or equal to the
|
138
|
+
* right character it is a syntax error. If a '-' occurs as the first
|
139
|
+
* character after the opening '[' or '[^', or if it occurs as the
|
140
|
+
* last character before the closing ']', then it is taken as a
|
141
|
+
* literal. Thus "[a\-b]", "[-ab]", and "[ab-]" all indicate the same
|
142
|
+
* set of three characters, 'a', 'b', and '-'.
|
143
|
+
*
|
144
|
+
* <p>Sets may be intersected using the '&' operator or the asymmetric
|
145
|
+
* set difference may be taken using the '-' operator, for example,
|
146
|
+
* "[[:L:]&[\\u0000-\\u0FFF]]" indicates the set of all Unicode letters
|
147
|
+
* with values less than 4096. Operators ('&' and '|') have equal
|
148
|
+
* precedence and bind left-to-right. Thus
|
149
|
+
* "[[:L:]-[a-z]-[\\u0100-\\u01FF]]" is equivalent to
|
150
|
+
* "[[[:L:]-[a-z]]-[\\u0100-\\u01FF]]". This only really matters for
|
151
|
+
* difference; intersection is commutative.
|
152
|
+
*
|
153
|
+
* <table>
|
154
|
+
* <tr valign=top><td nowrap><code>[a]</code><td>The set containing 'a'
|
155
|
+
* <tr valign=top><td nowrap><code>[a-z]</code><td>The set containing 'a'
|
156
|
+
* through 'z' and all letters in between, in Unicode order
|
157
|
+
* <tr valign=top><td nowrap><code>[^a-z]</code><td>The set containing
|
158
|
+
* all characters but 'a' through 'z',
|
159
|
+
* that is, U+0000 through 'a'-1 and 'z'+1 through U+10FFFF
|
160
|
+
* <tr valign=top><td nowrap><code>[[<em>pat1</em>][<em>pat2</em>]]</code>
|
161
|
+
* <td>The union of sets specified by <em>pat1</em> and <em>pat2</em>
|
162
|
+
* <tr valign=top><td nowrap><code>[[<em>pat1</em>]&[<em>pat2</em>]]</code>
|
163
|
+
* <td>The intersection of sets specified by <em>pat1</em> and <em>pat2</em>
|
164
|
+
* <tr valign=top><td nowrap><code>[[<em>pat1</em>]-[<em>pat2</em>]]</code>
|
165
|
+
* <td>The asymmetric difference of sets specified by <em>pat1</em> and
|
166
|
+
* <em>pat2</em>
|
167
|
+
* <tr valign=top><td nowrap><code>[:Lu:] or \\p{Lu}</code>
|
168
|
+
* <td>The set of characters having the specified
|
169
|
+
* Unicode property; in
|
170
|
+
* this case, Unicode uppercase letters
|
171
|
+
* <tr valign=top><td nowrap><code>[:^Lu:] or \\P{Lu}</code>
|
172
|
+
* <td>The set of characters <em>not</em> having the given
|
173
|
+
* Unicode property
|
174
|
+
* </table>
|
175
|
+
*
|
176
|
+
* <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p>
|
177
|
+
*
|
178
|
+
* <p><b>Formal syntax</b></p>
|
179
|
+
*
|
180
|
+
* \htmlonly<blockquote>\endhtmlonly
|
181
|
+
* <table>
|
182
|
+
* <tr align="top">
|
183
|
+
* <td nowrap valign="top" align="right"><code>pattern := </code></td>
|
184
|
+
* <td valign="top"><code>('[' '^'? item* ']') |
|
185
|
+
* property</code></td>
|
186
|
+
* </tr>
|
187
|
+
* <tr align="top">
|
188
|
+
* <td nowrap valign="top" align="right"><code>item := </code></td>
|
189
|
+
* <td valign="top"><code>char | (char '-' char) | pattern-expr<br>
|
190
|
+
* </code></td>
|
191
|
+
* </tr>
|
192
|
+
* <tr align="top">
|
193
|
+
* <td nowrap valign="top" align="right"><code>pattern-expr := </code></td>
|
194
|
+
* <td valign="top"><code>pattern | pattern-expr pattern |
|
195
|
+
* pattern-expr op pattern<br>
|
196
|
+
* </code></td>
|
197
|
+
* </tr>
|
198
|
+
* <tr align="top">
|
199
|
+
* <td nowrap valign="top" align="right"><code>op := </code></td>
|
200
|
+
* <td valign="top"><code>'&' | '-'<br>
|
201
|
+
* </code></td>
|
202
|
+
* </tr>
|
203
|
+
* <tr align="top">
|
204
|
+
* <td nowrap valign="top" align="right"><code>special := </code></td>
|
205
|
+
* <td valign="top"><code>'[' | ']' | '-'<br>
|
206
|
+
* </code></td>
|
207
|
+
* </tr>
|
208
|
+
* <tr align="top">
|
209
|
+
* <td nowrap valign="top" align="right"><code>char := </code></td>
|
210
|
+
* <td valign="top"><em>any character that is not</em><code> special<br>
|
211
|
+
* | ('\' </code><em>any character</em><code>)<br>
|
212
|
+
* | ('\\u' hex hex hex hex)<br>
|
213
|
+
* </code></td>
|
214
|
+
* </tr>
|
215
|
+
* <tr align="top">
|
216
|
+
* <td nowrap valign="top" align="right"><code>hex := </code></td>
|
217
|
+
* <td valign="top"><em>any character for which
|
218
|
+
* </em><code>Character.digit(c, 16)</code><em>
|
219
|
+
* returns a non-negative result</em></td>
|
220
|
+
* </tr>
|
221
|
+
* <tr>
|
222
|
+
* <td nowrap valign="top" align="right"><code>property := </code></td>
|
223
|
+
* <td valign="top"><em>a Unicode property set pattern</em></td>
|
224
|
+
* </tr>
|
225
|
+
* </table>
|
226
|
+
* <br>
|
227
|
+
* <table border="1">
|
228
|
+
* <tr>
|
229
|
+
* <td>Legend: <table>
|
230
|
+
* <tr>
|
231
|
+
* <td nowrap valign="top"><code>a := b</code></td>
|
232
|
+
* <td width="20" valign="top"> </td>
|
233
|
+
* <td valign="top"><code>a</code> may be replaced by <code>b</code> </td>
|
234
|
+
* </tr>
|
235
|
+
* <tr>
|
236
|
+
* <td nowrap valign="top"><code>a?</code></td>
|
237
|
+
* <td valign="top"></td>
|
238
|
+
* <td valign="top">zero or one instance of <code>a</code><br>
|
239
|
+
* </td>
|
240
|
+
* </tr>
|
241
|
+
* <tr>
|
242
|
+
* <td nowrap valign="top"><code>a*</code></td>
|
243
|
+
* <td valign="top"></td>
|
244
|
+
* <td valign="top">one or more instances of <code>a</code><br>
|
245
|
+
* </td>
|
246
|
+
* </tr>
|
247
|
+
* <tr>
|
248
|
+
* <td nowrap valign="top"><code>a | b</code></td>
|
249
|
+
* <td valign="top"></td>
|
250
|
+
* <td valign="top">either <code>a</code> or <code>b</code><br>
|
251
|
+
* </td>
|
252
|
+
* </tr>
|
253
|
+
* <tr>
|
254
|
+
* <td nowrap valign="top"><code>'a'</code></td>
|
255
|
+
* <td valign="top"></td>
|
256
|
+
* <td valign="top">the literal string between the quotes </td>
|
257
|
+
* </tr>
|
258
|
+
* </table>
|
259
|
+
* </td>
|
260
|
+
* </tr>
|
261
|
+
* </table>
|
262
|
+
* \htmlonly</blockquote>\endhtmlonly
|
263
|
+
*
|
264
|
+
* <p>Note:
|
265
|
+
* - Most UnicodeSet methods do not take a UErrorCode parameter because
|
266
|
+
* there are usually very few opportunities for failure other than a shortage
|
267
|
+
* of memory, error codes in low-level C++ string methods would be inconvenient,
|
268
|
+
* and the error code as the last parameter (ICU convention) would prevent
|
269
|
+
* the use of default parameter values.
|
270
|
+
* Instead, such methods set the UnicodeSet into a "bogus" state
|
271
|
+
* (see isBogus()) if an error occurs.
|
272
|
+
*
|
273
|
+
* @author Alan Liu
|
274
|
+
* @stable ICU 2.0
|
275
|
+
*/
|
276
|
+
class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter {
|
277
|
+
|
278
|
+
int32_t len; // length of list used; 0 <= len <= capacity
|
279
|
+
int32_t capacity; // capacity of list
|
280
|
+
UChar32* list; // MUST be terminated with HIGH
|
281
|
+
BMPSet *bmpSet; // The set is frozen iff either bmpSet or stringSpan is not NULL.
|
282
|
+
UChar32* buffer; // internal buffer, may be NULL
|
283
|
+
int32_t bufferCapacity; // capacity of buffer
|
284
|
+
int32_t patLen;
|
285
|
+
|
286
|
+
/**
|
287
|
+
* The pattern representation of this set. This may not be the
|
288
|
+
* most economical pattern. It is the pattern supplied to
|
289
|
+
* applyPattern(), with variables substituted and whitespace
|
290
|
+
* removed. For sets constructed without applyPattern(), or
|
291
|
+
* modified using the non-pattern API, this string will be empty,
|
292
|
+
* indicating that toPattern() must generate a pattern
|
293
|
+
* representation from the inversion list.
|
294
|
+
*/
|
295
|
+
UChar *pat;
|
296
|
+
UVector* strings; // maintained in sorted order
|
297
|
+
UnicodeSetStringSpan *stringSpan;
|
298
|
+
|
299
|
+
private:
|
300
|
+
enum { // constants
|
301
|
+
kIsBogus = 1 // This set is bogus (i.e. not valid)
|
302
|
+
};
|
303
|
+
uint8_t fFlags; // Bit flag (see constants above)
|
304
|
+
public:
|
305
|
+
/**
|
306
|
+
* Determine if this object contains a valid set.
|
307
|
+
* A bogus set has no value. It is different from an empty set.
|
308
|
+
* It can be used to indicate that no set value is available.
|
309
|
+
*
|
310
|
+
* @return TRUE if the set is valid, FALSE otherwise
|
311
|
+
* @see setToBogus()
|
312
|
+
* @stable ICU 4.0
|
313
|
+
*/
|
314
|
+
inline UBool isBogus(void) const;
|
315
|
+
|
316
|
+
/**
|
317
|
+
* Make this UnicodeSet object invalid.
|
318
|
+
* The string will test TRUE with isBogus().
|
319
|
+
*
|
320
|
+
* A bogus set has no value. It is different from an empty set.
|
321
|
+
* It can be used to indicate that no set value is available.
|
322
|
+
*
|
323
|
+
* This utility function is used throughout the UnicodeSet
|
324
|
+
* implementation to indicate that a UnicodeSet operation failed,
|
325
|
+
* and may be used in other functions,
|
326
|
+
* especially but not exclusively when such functions do not
|
327
|
+
* take a UErrorCode for simplicity.
|
328
|
+
*
|
329
|
+
* @see isBogus()
|
330
|
+
* @stable ICU 4.0
|
331
|
+
*/
|
332
|
+
void setToBogus();
|
333
|
+
|
334
|
+
public:
|
335
|
+
|
336
|
+
enum {
|
337
|
+
/**
|
338
|
+
* Minimum value that can be stored in a UnicodeSet.
|
339
|
+
* @stable ICU 2.4
|
340
|
+
*/
|
341
|
+
MIN_VALUE = 0,
|
342
|
+
|
343
|
+
/**
|
344
|
+
* Maximum value that can be stored in a UnicodeSet.
|
345
|
+
* @stable ICU 2.4
|
346
|
+
*/
|
347
|
+
MAX_VALUE = 0x10ffff
|
348
|
+
};
|
349
|
+
|
350
|
+
//----------------------------------------------------------------
|
351
|
+
// Constructors &c
|
352
|
+
//----------------------------------------------------------------
|
353
|
+
|
354
|
+
public:
|
355
|
+
|
356
|
+
/**
|
357
|
+
* Constructs an empty set.
|
358
|
+
* @stable ICU 2.0
|
359
|
+
*/
|
360
|
+
UnicodeSet();
|
361
|
+
|
362
|
+
/**
|
363
|
+
* Constructs a set containing the given range. If <code>end >
|
364
|
+
* start</code> then an empty set is created.
|
365
|
+
*
|
366
|
+
* @param start first character, inclusive, of range
|
367
|
+
* @param end last character, inclusive, of range
|
368
|
+
* @stable ICU 2.4
|
369
|
+
*/
|
370
|
+
UnicodeSet(UChar32 start, UChar32 end);
|
371
|
+
|
372
|
+
/**
|
373
|
+
* Constructs a set from the given pattern. See the class
|
374
|
+
* description for the syntax of the pattern language.
|
375
|
+
* @param pattern a string specifying what characters are in the set
|
376
|
+
* @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
|
377
|
+
* contains a syntax error.
|
378
|
+
* @stable ICU 2.0
|
379
|
+
*/
|
380
|
+
UnicodeSet(const UnicodeString& pattern,
|
381
|
+
UErrorCode& status);
|
382
|
+
|
383
|
+
#ifndef U_HIDE_INTERNAL_API
|
384
|
+
/**
|
385
|
+
* Constructs a set from the given pattern. See the class
|
386
|
+
* description for the syntax of the pattern language.
|
387
|
+
* @param pattern a string specifying what characters are in the set
|
388
|
+
* @param options bitmask for options to apply to the pattern.
|
389
|
+
* Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
|
390
|
+
* @param symbols a symbol table mapping variable names to values
|
391
|
+
* and stand-in characters to UnicodeSets; may be NULL
|
392
|
+
* @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
|
393
|
+
* contains a syntax error.
|
394
|
+
* @internal
|
395
|
+
*/
|
396
|
+
UnicodeSet(const UnicodeString& pattern,
|
397
|
+
uint32_t options,
|
398
|
+
const SymbolTable* symbols,
|
399
|
+
UErrorCode& status);
|
400
|
+
#endif /* U_HIDE_INTERNAL_API */
|
401
|
+
|
402
|
+
/**
|
403
|
+
* Constructs a set from the given pattern. See the class description
|
404
|
+
* for the syntax of the pattern language.
|
405
|
+
* @param pattern a string specifying what characters are in the set
|
406
|
+
* @param pos on input, the position in pattern at which to start parsing.
|
407
|
+
* On output, the position after the last character parsed.
|
408
|
+
* @param options bitmask for options to apply to the pattern.
|
409
|
+
* Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
|
410
|
+
* @param symbols a symbol table mapping variable names to values
|
411
|
+
* and stand-in characters to UnicodeSets; may be NULL
|
412
|
+
* @param status input-output error code
|
413
|
+
* @stable ICU 2.8
|
414
|
+
*/
|
415
|
+
UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
|
416
|
+
uint32_t options,
|
417
|
+
const SymbolTable* symbols,
|
418
|
+
UErrorCode& status);
|
419
|
+
|
420
|
+
/**
|
421
|
+
* Constructs a set that is identical to the given UnicodeSet.
|
422
|
+
* @stable ICU 2.0
|
423
|
+
*/
|
424
|
+
UnicodeSet(const UnicodeSet& o);
|
425
|
+
|
426
|
+
/**
|
427
|
+
* Destructs the set.
|
428
|
+
* @stable ICU 2.0
|
429
|
+
*/
|
430
|
+
virtual ~UnicodeSet();
|
431
|
+
|
432
|
+
/**
|
433
|
+
* Assigns this object to be a copy of another.
|
434
|
+
* A frozen set will not be modified.
|
435
|
+
* @stable ICU 2.0
|
436
|
+
*/
|
437
|
+
UnicodeSet& operator=(const UnicodeSet& o);
|
438
|
+
|
439
|
+
/**
|
440
|
+
* Compares the specified object with this set for equality. Returns
|
441
|
+
* <tt>true</tt> if the two sets
|
442
|
+
* have the same size, and every member of the specified set is
|
443
|
+
* contained in this set (or equivalently, every member of this set is
|
444
|
+
* contained in the specified set).
|
445
|
+
*
|
446
|
+
* @param o set to be compared for equality with this set.
|
447
|
+
* @return <tt>true</tt> if the specified set is equal to this set.
|
448
|
+
* @stable ICU 2.0
|
449
|
+
*/
|
450
|
+
virtual UBool operator==(const UnicodeSet& o) const;
|
451
|
+
|
452
|
+
/**
|
453
|
+
* Compares the specified object with this set for equality. Returns
|
454
|
+
* <tt>true</tt> if the specified set is not equal to this set.
|
455
|
+
* @stable ICU 2.0
|
456
|
+
*/
|
457
|
+
UBool operator!=(const UnicodeSet& o) const;
|
458
|
+
|
459
|
+
/**
|
460
|
+
* Returns a copy of this object. All UnicodeFunctor objects have
|
461
|
+
* to support cloning in order to allow classes using
|
462
|
+
* UnicodeFunctors, such as Transliterator, to implement cloning.
|
463
|
+
* If this set is frozen, then the clone will be frozen as well.
|
464
|
+
* Use cloneAsThawed() for a mutable clone of a frozen set.
|
465
|
+
* @see cloneAsThawed
|
466
|
+
* @stable ICU 2.0
|
467
|
+
*/
|
468
|
+
virtual UnicodeFunctor* clone() const;
|
469
|
+
|
470
|
+
/**
|
471
|
+
* Returns the hash code value for this set.
|
472
|
+
*
|
473
|
+
* @return the hash code value for this set.
|
474
|
+
* @see Object#hashCode()
|
475
|
+
* @stable ICU 2.0
|
476
|
+
*/
|
477
|
+
virtual int32_t hashCode(void) const;
|
478
|
+
|
479
|
+
/**
|
480
|
+
* Get a UnicodeSet pointer from a USet
|
481
|
+
*
|
482
|
+
* @param uset a USet (the ICU plain C type for UnicodeSet)
|
483
|
+
* @return the corresponding UnicodeSet pointer.
|
484
|
+
*
|
485
|
+
* @stable ICU 4.2
|
486
|
+
*/
|
487
|
+
inline static UnicodeSet *fromUSet(USet *uset);
|
488
|
+
|
489
|
+
/**
|
490
|
+
* Get a UnicodeSet pointer from a const USet
|
491
|
+
*
|
492
|
+
* @param uset a const USet (the ICU plain C type for UnicodeSet)
|
493
|
+
* @return the corresponding UnicodeSet pointer.
|
494
|
+
*
|
495
|
+
* @stable ICU 4.2
|
496
|
+
*/
|
497
|
+
inline static const UnicodeSet *fromUSet(const USet *uset);
|
498
|
+
|
499
|
+
/**
|
500
|
+
* Produce a USet * pointer for this UnicodeSet.
|
501
|
+
* USet is the plain C type for UnicodeSet
|
502
|
+
*
|
503
|
+
* @return a USet pointer for this UnicodeSet
|
504
|
+
* @stable ICU 4.2
|
505
|
+
*/
|
506
|
+
inline USet *toUSet();
|
507
|
+
|
508
|
+
|
509
|
+
/**
|
510
|
+
* Produce a const USet * pointer for this UnicodeSet.
|
511
|
+
* USet is the plain C type for UnicodeSet
|
512
|
+
*
|
513
|
+
* @return a const USet pointer for this UnicodeSet
|
514
|
+
* @stable ICU 4.2
|
515
|
+
*/
|
516
|
+
inline const USet * toUSet() const;
|
517
|
+
|
518
|
+
|
519
|
+
//----------------------------------------------------------------
|
520
|
+
// Freezable API
|
521
|
+
//----------------------------------------------------------------
|
522
|
+
|
523
|
+
/**
|
524
|
+
* Determines whether the set has been frozen (made immutable) or not.
|
525
|
+
* See the ICU4J Freezable interface for details.
|
526
|
+
* @return TRUE/FALSE for whether the set has been frozen
|
527
|
+
* @see freeze
|
528
|
+
* @see cloneAsThawed
|
529
|
+
* @stable ICU 3.8
|
530
|
+
*/
|
531
|
+
inline UBool isFrozen() const;
|
532
|
+
|
533
|
+
/**
|
534
|
+
* Freeze the set (make it immutable).
|
535
|
+
* Once frozen, it cannot be unfrozen and is therefore thread-safe
|
536
|
+
* until it is deleted.
|
537
|
+
* See the ICU4J Freezable interface for details.
|
538
|
+
* Freezing the set may also make some operations faster, for example
|
539
|
+
* contains() and span().
|
540
|
+
* A frozen set will not be modified. (It remains frozen.)
|
541
|
+
* @return this set.
|
542
|
+
* @see isFrozen
|
543
|
+
* @see cloneAsThawed
|
544
|
+
* @stable ICU 3.8
|
545
|
+
*/
|
546
|
+
UnicodeFunctor *freeze();
|
547
|
+
|
548
|
+
/**
|
549
|
+
* Clone the set and make the clone mutable.
|
550
|
+
* See the ICU4J Freezable interface for details.
|
551
|
+
* @return the mutable clone
|
552
|
+
* @see freeze
|
553
|
+
* @see isFrozen
|
554
|
+
* @stable ICU 3.8
|
555
|
+
*/
|
556
|
+
UnicodeFunctor *cloneAsThawed() const;
|
557
|
+
|
558
|
+
//----------------------------------------------------------------
|
559
|
+
// Public API
|
560
|
+
//----------------------------------------------------------------
|
561
|
+
|
562
|
+
/**
|
563
|
+
* Make this object represent the range <code>start - end</code>.
|
564
|
+
* If <code>end > start</code> then this object is set to an
|
565
|
+
* an empty range.
|
566
|
+
* A frozen set will not be modified.
|
567
|
+
*
|
568
|
+
* @param start first character in the set, inclusive
|
569
|
+
* @param end last character in the set, inclusive
|
570
|
+
* @stable ICU 2.4
|
571
|
+
*/
|
572
|
+
UnicodeSet& set(UChar32 start, UChar32 end);
|
573
|
+
|
574
|
+
/**
|
575
|
+
* Return true if the given position, in the given pattern, appears
|
576
|
+
* to be the start of a UnicodeSet pattern.
|
577
|
+
* @stable ICU 2.4
|
578
|
+
*/
|
579
|
+
static UBool resemblesPattern(const UnicodeString& pattern,
|
580
|
+
int32_t pos);
|
581
|
+
|
582
|
+
/**
|
583
|
+
* Modifies this set to represent the set specified by the given
|
584
|
+
* pattern, ignoring Unicode Pattern_White_Space characters.
|
585
|
+
* See the class description for the syntax of the pattern language.
|
586
|
+
* A frozen set will not be modified.
|
587
|
+
* @param pattern a string specifying what characters are in the set
|
588
|
+
* @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
|
589
|
+
* contains a syntax error.
|
590
|
+
* <em> Empties the set passed before applying the pattern.</em>
|
591
|
+
* @return a reference to this
|
592
|
+
* @stable ICU 2.0
|
593
|
+
*/
|
594
|
+
UnicodeSet& applyPattern(const UnicodeString& pattern,
|
595
|
+
UErrorCode& status);
|
596
|
+
|
597
|
+
#ifndef U_HIDE_INTERNAL_API
|
598
|
+
/**
|
599
|
+
* Modifies this set to represent the set specified by the given
|
600
|
+
* pattern, optionally ignoring Unicode Pattern_White_Space characters.
|
601
|
+
* See the class description for the syntax of the pattern language.
|
602
|
+
* A frozen set will not be modified.
|
603
|
+
* @param pattern a string specifying what characters are in the set
|
604
|
+
* @param options bitmask for options to apply to the pattern.
|
605
|
+
* Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
|
606
|
+
* @param symbols a symbol table mapping variable names to
|
607
|
+
* values and stand-ins to UnicodeSets; may be NULL
|
608
|
+
* @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
|
609
|
+
* contains a syntax error.
|
610
|
+
*<em> Empties the set passed before applying the pattern.</em>
|
611
|
+
* @return a reference to this
|
612
|
+
* @internal
|
613
|
+
*/
|
614
|
+
UnicodeSet& applyPattern(const UnicodeString& pattern,
|
615
|
+
uint32_t options,
|
616
|
+
const SymbolTable* symbols,
|
617
|
+
UErrorCode& status);
|
618
|
+
#endif /* U_HIDE_INTERNAL_API */
|
619
|
+
|
620
|
+
/**
|
621
|
+
* Parses the given pattern, starting at the given position. The
|
622
|
+
* character at pattern.charAt(pos.getIndex()) must be '[', or the
|
623
|
+
* parse fails. Parsing continues until the corresponding closing
|
624
|
+
* ']'. If a syntax error is encountered between the opening and
|
625
|
+
* closing brace, the parse fails. Upon return from a successful
|
626
|
+
* parse, the ParsePosition is updated to point to the character
|
627
|
+
* following the closing ']', and a StringBuffer containing a
|
628
|
+
* pairs list for the parsed pattern is returned. This method calls
|
629
|
+
* itself recursively to parse embedded subpatterns.
|
630
|
+
*<em> Empties the set passed before applying the pattern.</em>
|
631
|
+
* A frozen set will not be modified.
|
632
|
+
*
|
633
|
+
* @param pattern the string containing the pattern to be parsed.
|
634
|
+
* The portion of the string from pos.getIndex(), which must be a
|
635
|
+
* '[', to the corresponding closing ']', is parsed.
|
636
|
+
* @param pos upon entry, the position at which to being parsing.
|
637
|
+
* The character at pattern.charAt(pos.getIndex()) must be a '['.
|
638
|
+
* Upon return from a successful parse, pos.getIndex() is either
|
639
|
+
* the character after the closing ']' of the parsed pattern, or
|
640
|
+
* pattern.length() if the closing ']' is the last character of
|
641
|
+
* the pattern string.
|
642
|
+
* @param options bitmask for options to apply to the pattern.
|
643
|
+
* Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
|
644
|
+
* @param symbols a symbol table mapping variable names to
|
645
|
+
* values and stand-ins to UnicodeSets; may be NULL
|
646
|
+
* @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
|
647
|
+
* contains a syntax error.
|
648
|
+
* @return a reference to this
|
649
|
+
* @stable ICU 2.8
|
650
|
+
*/
|
651
|
+
UnicodeSet& applyPattern(const UnicodeString& pattern,
|
652
|
+
ParsePosition& pos,
|
653
|
+
uint32_t options,
|
654
|
+
const SymbolTable* symbols,
|
655
|
+
UErrorCode& status);
|
656
|
+
|
657
|
+
/**
|
658
|
+
* Returns a string representation of this set. If the result of
|
659
|
+
* calling this function is passed to a UnicodeSet constructor, it
|
660
|
+
* will produce another set that is equal to this one.
|
661
|
+
* A frozen set will not be modified.
|
662
|
+
* @param result the string to receive the rules. Previous
|
663
|
+
* contents will be deleted.
|
664
|
+
* @param escapeUnprintable if TRUE then convert unprintable
|
665
|
+
* character to their hex escape representations, \\uxxxx or
|
666
|
+
* \\Uxxxxxxxx. Unprintable characters are those other than
|
667
|
+
* U+000A, U+0020..U+007E.
|
668
|
+
* @stable ICU 2.0
|
669
|
+
*/
|
670
|
+
virtual UnicodeString& toPattern(UnicodeString& result,
|
671
|
+
UBool escapeUnprintable = FALSE) const;
|
672
|
+
|
673
|
+
/**
|
674
|
+
* Modifies this set to contain those code points which have the given value
|
675
|
+
* for the given binary or enumerated property, as returned by
|
676
|
+
* u_getIntPropertyValue. Prior contents of this set are lost.
|
677
|
+
* A frozen set will not be modified.
|
678
|
+
*
|
679
|
+
* @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
|
680
|
+
* or UCHAR_INT_START..UCHAR_INT_LIMIT-1
|
681
|
+
* or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
|
682
|
+
*
|
683
|
+
* @param value a value in the range u_getIntPropertyMinValue(prop)..
|
684
|
+
* u_getIntPropertyMaxValue(prop), with one exception. If prop is
|
685
|
+
* UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
|
686
|
+
* rather a mask value produced by U_GET_GC_MASK(). This allows grouped
|
687
|
+
* categories such as [:L:] to be represented.
|
688
|
+
*
|
689
|
+
* @param ec error code input/output parameter
|
690
|
+
*
|
691
|
+
* @return a reference to this set
|
692
|
+
*
|
693
|
+
* @stable ICU 2.4
|
694
|
+
*/
|
695
|
+
UnicodeSet& applyIntPropertyValue(UProperty prop,
|
696
|
+
int32_t value,
|
697
|
+
UErrorCode& ec);
|
698
|
+
|
699
|
+
/**
|
700
|
+
* Modifies this set to contain those code points which have the
|
701
|
+
* given value for the given property. Prior contents of this
|
702
|
+
* set are lost.
|
703
|
+
* A frozen set will not be modified.
|
704
|
+
*
|
705
|
+
* @param prop a property alias, either short or long. The name is matched
|
706
|
+
* loosely. See PropertyAliases.txt for names and a description of loose
|
707
|
+
* matching. If the value string is empty, then this string is interpreted
|
708
|
+
* as either a General_Category value alias, a Script value alias, a binary
|
709
|
+
* property alias, or a special ID. Special IDs are matched loosely and
|
710
|
+
* correspond to the following sets:
|
711
|
+
*
|
712
|
+
* "ANY" = [\\u0000-\\U0010FFFF],
|
713
|
+
* "ASCII" = [\\u0000-\\u007F],
|
714
|
+
* "Assigned" = [:^Cn:].
|
715
|
+
*
|
716
|
+
* @param value a value alias, either short or long. The name is matched
|
717
|
+
* loosely. See PropertyValueAliases.txt for names and a description of
|
718
|
+
* loose matching. In addition to aliases listed, numeric values and
|
719
|
+
* canonical combining classes may be expressed numerically, e.g., ("nv",
|
720
|
+
* "0.5") or ("ccc", "220"). The value string may also be empty.
|
721
|
+
*
|
722
|
+
* @param ec error code input/output parameter
|
723
|
+
*
|
724
|
+
* @return a reference to this set
|
725
|
+
*
|
726
|
+
* @stable ICU 2.4
|
727
|
+
*/
|
728
|
+
UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
|
729
|
+
const UnicodeString& value,
|
730
|
+
UErrorCode& ec);
|
731
|
+
|
732
|
+
/**
|
733
|
+
* Returns the number of elements in this set (its cardinality).
|
734
|
+
* Note than the elements of a set may include both individual
|
735
|
+
* codepoints and strings.
|
736
|
+
*
|
737
|
+
* @return the number of elements in this set (its cardinality).
|
738
|
+
* @stable ICU 2.0
|
739
|
+
*/
|
740
|
+
virtual int32_t size(void) const;
|
741
|
+
|
742
|
+
/**
|
743
|
+
* Returns <tt>true</tt> if this set contains no elements.
|
744
|
+
*
|
745
|
+
* @return <tt>true</tt> if this set contains no elements.
|
746
|
+
* @stable ICU 2.0
|
747
|
+
*/
|
748
|
+
virtual UBool isEmpty(void) const;
|
749
|
+
|
750
|
+
/**
|
751
|
+
* Returns true if this set contains the given character.
|
752
|
+
* This function works faster with a frozen set.
|
753
|
+
* @param c character to be checked for containment
|
754
|
+
* @return true if the test condition is met
|
755
|
+
* @stable ICU 2.0
|
756
|
+
*/
|
757
|
+
virtual UBool contains(UChar32 c) const;
|
758
|
+
|
759
|
+
/**
|
760
|
+
* Returns true if this set contains every character
|
761
|
+
* of the given range.
|
762
|
+
* @param start first character, inclusive, of the range
|
763
|
+
* @param end last character, inclusive, of the range
|
764
|
+
* @return true if the test condition is met
|
765
|
+
* @stable ICU 2.0
|
766
|
+
*/
|
767
|
+
virtual UBool contains(UChar32 start, UChar32 end) const;
|
768
|
+
|
769
|
+
/**
|
770
|
+
* Returns <tt>true</tt> if this set contains the given
|
771
|
+
* multicharacter string.
|
772
|
+
* @param s string to be checked for containment
|
773
|
+
* @return <tt>true</tt> if this set contains the specified string
|
774
|
+
* @stable ICU 2.4
|
775
|
+
*/
|
776
|
+
UBool contains(const UnicodeString& s) const;
|
777
|
+
|
778
|
+
/**
|
779
|
+
* Returns true if this set contains all the characters and strings
|
780
|
+
* of the given set.
|
781
|
+
* @param c set to be checked for containment
|
782
|
+
* @return true if the test condition is met
|
783
|
+
* @stable ICU 2.4
|
784
|
+
*/
|
785
|
+
virtual UBool containsAll(const UnicodeSet& c) const;
|
786
|
+
|
787
|
+
/**
|
788
|
+
* Returns true if this set contains all the characters
|
789
|
+
* of the given string.
|
790
|
+
* @param s string containing characters to be checked for containment
|
791
|
+
* @return true if the test condition is met
|
792
|
+
* @stable ICU 2.4
|
793
|
+
*/
|
794
|
+
UBool containsAll(const UnicodeString& s) const;
|
795
|
+
|
796
|
+
/**
|
797
|
+
* Returns true if this set contains none of the characters
|
798
|
+
* of the given range.
|
799
|
+
* @param start first character, inclusive, of the range
|
800
|
+
* @param end last character, inclusive, of the range
|
801
|
+
* @return true if the test condition is met
|
802
|
+
* @stable ICU 2.4
|
803
|
+
*/
|
804
|
+
UBool containsNone(UChar32 start, UChar32 end) const;
|
805
|
+
|
806
|
+
/**
|
807
|
+
* Returns true if this set contains none of the characters and strings
|
808
|
+
* of the given set.
|
809
|
+
* @param c set to be checked for containment
|
810
|
+
* @return true if the test condition is met
|
811
|
+
* @stable ICU 2.4
|
812
|
+
*/
|
813
|
+
UBool containsNone(const UnicodeSet& c) const;
|
814
|
+
|
815
|
+
/**
|
816
|
+
* Returns true if this set contains none of the characters
|
817
|
+
* of the given string.
|
818
|
+
* @param s string containing characters to be checked for containment
|
819
|
+
* @return true if the test condition is met
|
820
|
+
* @stable ICU 2.4
|
821
|
+
*/
|
822
|
+
UBool containsNone(const UnicodeString& s) const;
|
823
|
+
|
824
|
+
/**
|
825
|
+
* Returns true if this set contains one or more of the characters
|
826
|
+
* in the given range.
|
827
|
+
* @param start first character, inclusive, of the range
|
828
|
+
* @param end last character, inclusive, of the range
|
829
|
+
* @return true if the condition is met
|
830
|
+
* @stable ICU 2.4
|
831
|
+
*/
|
832
|
+
inline UBool containsSome(UChar32 start, UChar32 end) const;
|
833
|
+
|
834
|
+
/**
|
835
|
+
* Returns true if this set contains one or more of the characters
|
836
|
+
* and strings of the given set.
|
837
|
+
* @param s The set to be checked for containment
|
838
|
+
* @return true if the condition is met
|
839
|
+
* @stable ICU 2.4
|
840
|
+
*/
|
841
|
+
inline UBool containsSome(const UnicodeSet& s) const;
|
842
|
+
|
843
|
+
/**
|
844
|
+
* Returns true if this set contains one or more of the characters
|
845
|
+
* of the given string.
|
846
|
+
* @param s string containing characters to be checked for containment
|
847
|
+
* @return true if the condition is met
|
848
|
+
* @stable ICU 2.4
|
849
|
+
*/
|
850
|
+
inline UBool containsSome(const UnicodeString& s) const;
|
851
|
+
|
852
|
+
/**
|
853
|
+
* Returns the length of the initial substring of the input string which
|
854
|
+
* consists only of characters and strings that are contained in this set
|
855
|
+
* (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
|
856
|
+
* or only of characters and strings that are not contained
|
857
|
+
* in this set (USET_SPAN_NOT_CONTAINED).
|
858
|
+
* See USetSpanCondition for details.
|
859
|
+
* Similar to the strspn() C library function.
|
860
|
+
* Unpaired surrogates are treated according to contains() of their surrogate code points.
|
861
|
+
* This function works faster with a frozen set and with a non-negative string length argument.
|
862
|
+
* @param s start of the string
|
863
|
+
* @param length of the string; can be -1 for NUL-terminated
|
864
|
+
* @param spanCondition specifies the containment condition
|
865
|
+
* @return the length of the initial substring according to the spanCondition;
|
866
|
+
* 0 if the start of the string does not fit the spanCondition
|
867
|
+
* @stable ICU 3.8
|
868
|
+
* @see USetSpanCondition
|
869
|
+
*/
|
870
|
+
int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
|
871
|
+
|
872
|
+
/**
|
873
|
+
* Returns the end of the substring of the input string according to the USetSpanCondition.
|
874
|
+
* Same as <code>start+span(s.getBuffer()+start, s.length()-start, spanCondition)</code>
|
875
|
+
* after pinning start to 0<=start<=s.length().
|
876
|
+
* @param s the string
|
877
|
+
* @param start the start index in the string for the span operation
|
878
|
+
* @param spanCondition specifies the containment condition
|
879
|
+
* @return the exclusive end of the substring according to the spanCondition;
|
880
|
+
* the substring s.tempSubStringBetween(start, end) fulfills the spanCondition
|
881
|
+
* @stable ICU 4.4
|
882
|
+
* @see USetSpanCondition
|
883
|
+
*/
|
884
|
+
inline int32_t span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const;
|
885
|
+
|
886
|
+
/**
|
887
|
+
* Returns the start of the trailing substring of the input string which
|
888
|
+
* consists only of characters and strings that are contained in this set
|
889
|
+
* (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
|
890
|
+
* or only of characters and strings that are not contained
|
891
|
+
* in this set (USET_SPAN_NOT_CONTAINED).
|
892
|
+
* See USetSpanCondition for details.
|
893
|
+
* Unpaired surrogates are treated according to contains() of their surrogate code points.
|
894
|
+
* This function works faster with a frozen set and with a non-negative string length argument.
|
895
|
+
* @param s start of the string
|
896
|
+
* @param length of the string; can be -1 for NUL-terminated
|
897
|
+
* @param spanCondition specifies the containment condition
|
898
|
+
* @return the start of the trailing substring according to the spanCondition;
|
899
|
+
* the string length if the end of the string does not fit the spanCondition
|
900
|
+
* @stable ICU 3.8
|
901
|
+
* @see USetSpanCondition
|
902
|
+
*/
|
903
|
+
int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
|
904
|
+
|
905
|
+
/**
|
906
|
+
* Returns the start of the substring of the input string according to the USetSpanCondition.
|
907
|
+
* Same as <code>spanBack(s.getBuffer(), limit, spanCondition)</code>
|
908
|
+
* after pinning limit to 0<=end<=s.length().
|
909
|
+
* @param s the string
|
910
|
+
* @param limit the exclusive-end index in the string for the span operation
|
911
|
+
* (use s.length() or INT32_MAX for spanning back from the end of the string)
|
912
|
+
* @param spanCondition specifies the containment condition
|
913
|
+
* @return the start of the substring according to the spanCondition;
|
914
|
+
* the substring s.tempSubStringBetween(start, limit) fulfills the spanCondition
|
915
|
+
* @stable ICU 4.4
|
916
|
+
* @see USetSpanCondition
|
917
|
+
*/
|
918
|
+
inline int32_t spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const;
|
919
|
+
|
920
|
+
/**
|
921
|
+
* Returns the length of the initial substring of the input string which
|
922
|
+
* consists only of characters and strings that are contained in this set
|
923
|
+
* (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
|
924
|
+
* or only of characters and strings that are not contained
|
925
|
+
* in this set (USET_SPAN_NOT_CONTAINED).
|
926
|
+
* See USetSpanCondition for details.
|
927
|
+
* Similar to the strspn() C library function.
|
928
|
+
* Malformed byte sequences are treated according to contains(0xfffd).
|
929
|
+
* This function works faster with a frozen set and with a non-negative string length argument.
|
930
|
+
* @param s start of the string (UTF-8)
|
931
|
+
* @param length of the string; can be -1 for NUL-terminated
|
932
|
+
* @param spanCondition specifies the containment condition
|
933
|
+
* @return the length of the initial substring according to the spanCondition;
|
934
|
+
* 0 if the start of the string does not fit the spanCondition
|
935
|
+
* @stable ICU 3.8
|
936
|
+
* @see USetSpanCondition
|
937
|
+
*/
|
938
|
+
int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
|
939
|
+
|
940
|
+
/**
|
941
|
+
* Returns the start of the trailing substring of the input string which
|
942
|
+
* consists only of characters and strings that are contained in this set
|
943
|
+
* (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
|
944
|
+
* or only of characters and strings that are not contained
|
945
|
+
* in this set (USET_SPAN_NOT_CONTAINED).
|
946
|
+
* See USetSpanCondition for details.
|
947
|
+
* Malformed byte sequences are treated according to contains(0xfffd).
|
948
|
+
* This function works faster with a frozen set and with a non-negative string length argument.
|
949
|
+
* @param s start of the string (UTF-8)
|
950
|
+
* @param length of the string; can be -1 for NUL-terminated
|
951
|
+
* @param spanCondition specifies the containment condition
|
952
|
+
* @return the start of the trailing substring according to the spanCondition;
|
953
|
+
* the string length if the end of the string does not fit the spanCondition
|
954
|
+
* @stable ICU 3.8
|
955
|
+
* @see USetSpanCondition
|
956
|
+
*/
|
957
|
+
int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
|
958
|
+
|
959
|
+
/**
|
960
|
+
* Implement UnicodeMatcher::matches()
|
961
|
+
* @stable ICU 2.4
|
962
|
+
*/
|
963
|
+
virtual UMatchDegree matches(const Replaceable& text,
|
964
|
+
int32_t& offset,
|
965
|
+
int32_t limit,
|
966
|
+
UBool incremental);
|
967
|
+
|
968
|
+
private:
|
969
|
+
/**
|
970
|
+
* Returns the longest match for s in text at the given position.
|
971
|
+
* If limit > start then match forward from start+1 to limit
|
972
|
+
* matching all characters except s.charAt(0). If limit < start,
|
973
|
+
* go backward starting from start-1 matching all characters
|
974
|
+
* except s.charAt(s.length()-1). This method assumes that the
|
975
|
+
* first character, text.charAt(start), matches s, so it does not
|
976
|
+
* check it.
|
977
|
+
* @param text the text to match
|
978
|
+
* @param start the first character to match. In the forward
|
979
|
+
* direction, text.charAt(start) is matched against s.charAt(0).
|
980
|
+
* In the reverse direction, it is matched against
|
981
|
+
* s.charAt(s.length()-1).
|
982
|
+
* @param limit the limit offset for matching, either last+1 in
|
983
|
+
* the forward direction, or last-1 in the reverse direction,
|
984
|
+
* where last is the index of the last character to match.
|
985
|
+
* @param s
|
986
|
+
* @return If part of s matches up to the limit, return |limit -
|
987
|
+
* start|. If all of s matches before reaching the limit, return
|
988
|
+
* s.length(). If there is a mismatch between s and text, return
|
989
|
+
* 0
|
990
|
+
*/
|
991
|
+
static int32_t matchRest(const Replaceable& text,
|
992
|
+
int32_t start, int32_t limit,
|
993
|
+
const UnicodeString& s);
|
994
|
+
|
995
|
+
/**
|
996
|
+
* Returns the smallest value i such that c < list[i]. Caller
|
997
|
+
* must ensure that c is a legal value or this method will enter
|
998
|
+
* an infinite loop. This method performs a binary search.
|
999
|
+
* @param c a character in the range MIN_VALUE..MAX_VALUE
|
1000
|
+
* inclusive
|
1001
|
+
* @return the smallest integer i in the range 0..len-1,
|
1002
|
+
* inclusive, such that c < list[i]
|
1003
|
+
*/
|
1004
|
+
int32_t findCodePoint(UChar32 c) const;
|
1005
|
+
|
1006
|
+
public:
|
1007
|
+
|
1008
|
+
/**
|
1009
|
+
* Implementation of UnicodeMatcher API. Union the set of all
|
1010
|
+
* characters that may be matched by this object into the given
|
1011
|
+
* set.
|
1012
|
+
* @param toUnionTo the set into which to union the source characters
|
1013
|
+
* @stable ICU 2.4
|
1014
|
+
*/
|
1015
|
+
virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
|
1016
|
+
|
1017
|
+
/**
|
1018
|
+
* Returns the index of the given character within this set, where
|
1019
|
+
* the set is ordered by ascending code point. If the character
|
1020
|
+
* is not in this set, return -1. The inverse of this method is
|
1021
|
+
* <code>charAt()</code>.
|
1022
|
+
* @return an index from 0..size()-1, or -1
|
1023
|
+
* @stable ICU 2.4
|
1024
|
+
*/
|
1025
|
+
int32_t indexOf(UChar32 c) const;
|
1026
|
+
|
1027
|
+
/**
|
1028
|
+
* Returns the character at the given index within this set, where
|
1029
|
+
* the set is ordered by ascending code point. If the index is
|
1030
|
+
* out of range, return (UChar32)-1. The inverse of this method is
|
1031
|
+
* <code>indexOf()</code>.
|
1032
|
+
* @param index an index from 0..size()-1
|
1033
|
+
* @return the character at the given index, or (UChar32)-1.
|
1034
|
+
* @stable ICU 2.4
|
1035
|
+
*/
|
1036
|
+
UChar32 charAt(int32_t index) const;
|
1037
|
+
|
1038
|
+
/**
|
1039
|
+
* Adds the specified range to this set if it is not already
|
1040
|
+
* present. If this set already contains the specified range,
|
1041
|
+
* the call leaves this set unchanged. If <code>end > start</code>
|
1042
|
+
* then an empty range is added, leaving the set unchanged.
|
1043
|
+
* This is equivalent to a boolean logic OR, or a set UNION.
|
1044
|
+
* A frozen set will not be modified.
|
1045
|
+
*
|
1046
|
+
* @param start first character, inclusive, of range to be added
|
1047
|
+
* to this set.
|
1048
|
+
* @param end last character, inclusive, of range to be added
|
1049
|
+
* to this set.
|
1050
|
+
* @stable ICU 2.0
|
1051
|
+
*/
|
1052
|
+
virtual UnicodeSet& add(UChar32 start, UChar32 end);
|
1053
|
+
|
1054
|
+
/**
|
1055
|
+
* Adds the specified character to this set if it is not already
|
1056
|
+
* present. If this set already contains the specified character,
|
1057
|
+
* the call leaves this set unchanged.
|
1058
|
+
* A frozen set will not be modified.
|
1059
|
+
* @stable ICU 2.0
|
1060
|
+
*/
|
1061
|
+
UnicodeSet& add(UChar32 c);
|
1062
|
+
|
1063
|
+
/**
|
1064
|
+
* Adds the specified multicharacter to this set if it is not already
|
1065
|
+
* present. If this set already contains the multicharacter,
|
1066
|
+
* the call leaves this set unchanged.
|
1067
|
+
* Thus "ch" => {"ch"}
|
1068
|
+
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
1069
|
+
* A frozen set will not be modified.
|
1070
|
+
* @param s the source string
|
1071
|
+
* @return this object, for chaining
|
1072
|
+
* @stable ICU 2.4
|
1073
|
+
*/
|
1074
|
+
UnicodeSet& add(const UnicodeString& s);
|
1075
|
+
|
1076
|
+
private:
|
1077
|
+
/**
|
1078
|
+
* @return a code point IF the string consists of a single one.
|
1079
|
+
* otherwise returns -1.
|
1080
|
+
* @param s string to test
|
1081
|
+
*/
|
1082
|
+
static int32_t getSingleCP(const UnicodeString& s);
|
1083
|
+
|
1084
|
+
void _add(const UnicodeString& s);
|
1085
|
+
|
1086
|
+
public:
|
1087
|
+
/**
|
1088
|
+
* Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
|
1089
|
+
* If this set already any particular character, it has no effect on that character.
|
1090
|
+
* A frozen set will not be modified.
|
1091
|
+
* @param s the source string
|
1092
|
+
* @return this object, for chaining
|
1093
|
+
* @stable ICU 2.4
|
1094
|
+
*/
|
1095
|
+
UnicodeSet& addAll(const UnicodeString& s);
|
1096
|
+
|
1097
|
+
/**
|
1098
|
+
* Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
|
1099
|
+
* If this set already any particular character, it has no effect on that character.
|
1100
|
+
* A frozen set will not be modified.
|
1101
|
+
* @param s the source string
|
1102
|
+
* @return this object, for chaining
|
1103
|
+
* @stable ICU 2.4
|
1104
|
+
*/
|
1105
|
+
UnicodeSet& retainAll(const UnicodeString& s);
|
1106
|
+
|
1107
|
+
/**
|
1108
|
+
* Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
|
1109
|
+
* If this set already any particular character, it has no effect on that character.
|
1110
|
+
* A frozen set will not be modified.
|
1111
|
+
* @param s the source string
|
1112
|
+
* @return this object, for chaining
|
1113
|
+
* @stable ICU 2.4
|
1114
|
+
*/
|
1115
|
+
UnicodeSet& complementAll(const UnicodeString& s);
|
1116
|
+
|
1117
|
+
/**
|
1118
|
+
* Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
|
1119
|
+
* If this set already any particular character, it has no effect on that character.
|
1120
|
+
* A frozen set will not be modified.
|
1121
|
+
* @param s the source string
|
1122
|
+
* @return this object, for chaining
|
1123
|
+
* @stable ICU 2.4
|
1124
|
+
*/
|
1125
|
+
UnicodeSet& removeAll(const UnicodeString& s);
|
1126
|
+
|
1127
|
+
/**
|
1128
|
+
* Makes a set from a multicharacter string. Thus "ch" => {"ch"}
|
1129
|
+
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
1130
|
+
* @param s the source string
|
1131
|
+
* @return a newly created set containing the given string.
|
1132
|
+
* The caller owns the return object and is responsible for deleting it.
|
1133
|
+
* @stable ICU 2.4
|
1134
|
+
*/
|
1135
|
+
static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
|
1136
|
+
|
1137
|
+
|
1138
|
+
/**
|
1139
|
+
* Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"}
|
1140
|
+
* @param s the source string
|
1141
|
+
* @return a newly created set containing the given characters
|
1142
|
+
* The caller owns the return object and is responsible for deleting it.
|
1143
|
+
* @stable ICU 2.4
|
1144
|
+
*/
|
1145
|
+
static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
|
1146
|
+
|
1147
|
+
/**
|
1148
|
+
* Retain only the elements in this set that are contained in the
|
1149
|
+
* specified range. If <code>end > start</code> then an empty range is
|
1150
|
+
* retained, leaving the set empty. This is equivalent to
|
1151
|
+
* a boolean logic AND, or a set INTERSECTION.
|
1152
|
+
* A frozen set will not be modified.
|
1153
|
+
*
|
1154
|
+
* @param start first character, inclusive, of range to be retained
|
1155
|
+
* to this set.
|
1156
|
+
* @param end last character, inclusive, of range to be retained
|
1157
|
+
* to this set.
|
1158
|
+
* @stable ICU 2.0
|
1159
|
+
*/
|
1160
|
+
virtual UnicodeSet& retain(UChar32 start, UChar32 end);
|
1161
|
+
|
1162
|
+
|
1163
|
+
/**
|
1164
|
+
* Retain the specified character from this set if it is present.
|
1165
|
+
* A frozen set will not be modified.
|
1166
|
+
* @stable ICU 2.0
|
1167
|
+
*/
|
1168
|
+
UnicodeSet& retain(UChar32 c);
|
1169
|
+
|
1170
|
+
/**
|
1171
|
+
* Removes the specified range from this set if it is present.
|
1172
|
+
* The set will not contain the specified range once the call
|
1173
|
+
* returns. If <code>end > start</code> then an empty range is
|
1174
|
+
* removed, leaving the set unchanged.
|
1175
|
+
* A frozen set will not be modified.
|
1176
|
+
*
|
1177
|
+
* @param start first character, inclusive, of range to be removed
|
1178
|
+
* from this set.
|
1179
|
+
* @param end last character, inclusive, of range to be removed
|
1180
|
+
* from this set.
|
1181
|
+
* @stable ICU 2.0
|
1182
|
+
*/
|
1183
|
+
virtual UnicodeSet& remove(UChar32 start, UChar32 end);
|
1184
|
+
|
1185
|
+
/**
|
1186
|
+
* Removes the specified character from this set if it is present.
|
1187
|
+
* The set will not contain the specified range once the call
|
1188
|
+
* returns.
|
1189
|
+
* A frozen set will not be modified.
|
1190
|
+
* @stable ICU 2.0
|
1191
|
+
*/
|
1192
|
+
UnicodeSet& remove(UChar32 c);
|
1193
|
+
|
1194
|
+
/**
|
1195
|
+
* Removes the specified string from this set if it is present.
|
1196
|
+
* The set will not contain the specified character once the call
|
1197
|
+
* returns.
|
1198
|
+
* A frozen set will not be modified.
|
1199
|
+
* @param s the source string
|
1200
|
+
* @return this object, for chaining
|
1201
|
+
* @stable ICU 2.4
|
1202
|
+
*/
|
1203
|
+
UnicodeSet& remove(const UnicodeString& s);
|
1204
|
+
|
1205
|
+
/**
|
1206
|
+
* Inverts this set. This operation modifies this set so that
|
1207
|
+
* its value is its complement. This is equivalent to
|
1208
|
+
* <code>complement(MIN_VALUE, MAX_VALUE)</code>.
|
1209
|
+
* A frozen set will not be modified.
|
1210
|
+
* @stable ICU 2.0
|
1211
|
+
*/
|
1212
|
+
virtual UnicodeSet& complement(void);
|
1213
|
+
|
1214
|
+
/**
|
1215
|
+
* Complements the specified range in this set. Any character in
|
1216
|
+
* the range will be removed if it is in this set, or will be
|
1217
|
+
* added if it is not in this set. If <code>end > start</code>
|
1218
|
+
* then an empty range is complemented, leaving the set unchanged.
|
1219
|
+
* This is equivalent to a boolean logic XOR.
|
1220
|
+
* A frozen set will not be modified.
|
1221
|
+
*
|
1222
|
+
* @param start first character, inclusive, of range to be removed
|
1223
|
+
* from this set.
|
1224
|
+
* @param end last character, inclusive, of range to be removed
|
1225
|
+
* from this set.
|
1226
|
+
* @stable ICU 2.0
|
1227
|
+
*/
|
1228
|
+
virtual UnicodeSet& complement(UChar32 start, UChar32 end);
|
1229
|
+
|
1230
|
+
/**
|
1231
|
+
* Complements the specified character in this set. The character
|
1232
|
+
* will be removed if it is in this set, or will be added if it is
|
1233
|
+
* not in this set.
|
1234
|
+
* A frozen set will not be modified.
|
1235
|
+
* @stable ICU 2.0
|
1236
|
+
*/
|
1237
|
+
UnicodeSet& complement(UChar32 c);
|
1238
|
+
|
1239
|
+
/**
|
1240
|
+
* Complement the specified string in this set.
|
1241
|
+
* The set will not contain the specified string once the call
|
1242
|
+
* returns.
|
1243
|
+
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
1244
|
+
* A frozen set will not be modified.
|
1245
|
+
* @param s the string to complement
|
1246
|
+
* @return this object, for chaining
|
1247
|
+
* @stable ICU 2.4
|
1248
|
+
*/
|
1249
|
+
UnicodeSet& complement(const UnicodeString& s);
|
1250
|
+
|
1251
|
+
/**
|
1252
|
+
* Adds all of the elements in the specified set to this set if
|
1253
|
+
* they're not already present. This operation effectively
|
1254
|
+
* modifies this set so that its value is the <i>union</i> of the two
|
1255
|
+
* sets. The behavior of this operation is unspecified if the specified
|
1256
|
+
* collection is modified while the operation is in progress.
|
1257
|
+
* A frozen set will not be modified.
|
1258
|
+
*
|
1259
|
+
* @param c set whose elements are to be added to this set.
|
1260
|
+
* @see #add(UChar32, UChar32)
|
1261
|
+
* @stable ICU 2.0
|
1262
|
+
*/
|
1263
|
+
virtual UnicodeSet& addAll(const UnicodeSet& c);
|
1264
|
+
|
1265
|
+
/**
|
1266
|
+
* Retains only the elements in this set that are contained in the
|
1267
|
+
* specified set. In other words, removes from this set all of
|
1268
|
+
* its elements that are not contained in the specified set. This
|
1269
|
+
* operation effectively modifies this set so that its value is
|
1270
|
+
* the <i>intersection</i> of the two sets.
|
1271
|
+
* A frozen set will not be modified.
|
1272
|
+
*
|
1273
|
+
* @param c set that defines which elements this set will retain.
|
1274
|
+
* @stable ICU 2.0
|
1275
|
+
*/
|
1276
|
+
virtual UnicodeSet& retainAll(const UnicodeSet& c);
|
1277
|
+
|
1278
|
+
/**
|
1279
|
+
* Removes from this set all of its elements that are contained in the
|
1280
|
+
* specified set. This operation effectively modifies this
|
1281
|
+
* set so that its value is the <i>asymmetric set difference</i> of
|
1282
|
+
* the two sets.
|
1283
|
+
* A frozen set will not be modified.
|
1284
|
+
*
|
1285
|
+
* @param c set that defines which elements will be removed from
|
1286
|
+
* this set.
|
1287
|
+
* @stable ICU 2.0
|
1288
|
+
*/
|
1289
|
+
virtual UnicodeSet& removeAll(const UnicodeSet& c);
|
1290
|
+
|
1291
|
+
/**
|
1292
|
+
* Complements in this set all elements contained in the specified
|
1293
|
+
* set. Any character in the other set will be removed if it is
|
1294
|
+
* in this set, or will be added if it is not in this set.
|
1295
|
+
* A frozen set will not be modified.
|
1296
|
+
*
|
1297
|
+
* @param c set that defines which elements will be xor'ed from
|
1298
|
+
* this set.
|
1299
|
+
* @stable ICU 2.4
|
1300
|
+
*/
|
1301
|
+
virtual UnicodeSet& complementAll(const UnicodeSet& c);
|
1302
|
+
|
1303
|
+
/**
|
1304
|
+
* Removes all of the elements from this set. This set will be
|
1305
|
+
* empty after this call returns.
|
1306
|
+
* A frozen set will not be modified.
|
1307
|
+
* @stable ICU 2.0
|
1308
|
+
*/
|
1309
|
+
virtual UnicodeSet& clear(void);
|
1310
|
+
|
1311
|
+
/**
|
1312
|
+
* Close this set over the given attribute. For the attribute
|
1313
|
+
* USET_CASE, the result is to modify this set so that:
|
1314
|
+
*
|
1315
|
+
* 1. For each character or string 'a' in this set, all strings or
|
1316
|
+
* characters 'b' such that foldCase(a) == foldCase(b) are added
|
1317
|
+
* to this set.
|
1318
|
+
*
|
1319
|
+
* 2. For each string 'e' in the resulting set, if e !=
|
1320
|
+
* foldCase(e), 'e' will be removed.
|
1321
|
+
*
|
1322
|
+
* Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]
|
1323
|
+
*
|
1324
|
+
* (Here foldCase(x) refers to the operation u_strFoldCase, and a
|
1325
|
+
* == b denotes that the contents are the same, not pointer
|
1326
|
+
* comparison.)
|
1327
|
+
*
|
1328
|
+
* A frozen set will not be modified.
|
1329
|
+
*
|
1330
|
+
* @param attribute bitmask for attributes to close over.
|
1331
|
+
* Currently only the USET_CASE bit is supported. Any undefined bits
|
1332
|
+
* are ignored.
|
1333
|
+
* @return a reference to this set.
|
1334
|
+
* @stable ICU 4.2
|
1335
|
+
*/
|
1336
|
+
UnicodeSet& closeOver(int32_t attribute);
|
1337
|
+
|
1338
|
+
/**
|
1339
|
+
* Remove all strings from this set.
|
1340
|
+
*
|
1341
|
+
* @return a reference to this set.
|
1342
|
+
* @stable ICU 4.2
|
1343
|
+
*/
|
1344
|
+
virtual UnicodeSet &removeAllStrings();
|
1345
|
+
|
1346
|
+
/**
|
1347
|
+
* Iteration method that returns the number of ranges contained in
|
1348
|
+
* this set.
|
1349
|
+
* @see #getRangeStart
|
1350
|
+
* @see #getRangeEnd
|
1351
|
+
* @stable ICU 2.4
|
1352
|
+
*/
|
1353
|
+
virtual int32_t getRangeCount(void) const;
|
1354
|
+
|
1355
|
+
/**
|
1356
|
+
* Iteration method that returns the first character in the
|
1357
|
+
* specified range of this set.
|
1358
|
+
* @see #getRangeCount
|
1359
|
+
* @see #getRangeEnd
|
1360
|
+
* @stable ICU 2.4
|
1361
|
+
*/
|
1362
|
+
virtual UChar32 getRangeStart(int32_t index) const;
|
1363
|
+
|
1364
|
+
/**
|
1365
|
+
* Iteration method that returns the last character in the
|
1366
|
+
* specified range of this set.
|
1367
|
+
* @see #getRangeStart
|
1368
|
+
* @see #getRangeEnd
|
1369
|
+
* @stable ICU 2.4
|
1370
|
+
*/
|
1371
|
+
virtual UChar32 getRangeEnd(int32_t index) const;
|
1372
|
+
|
1373
|
+
/**
|
1374
|
+
* Serializes this set into an array of 16-bit integers. Serialization
|
1375
|
+
* (currently) only records the characters in the set; multicharacter
|
1376
|
+
* strings are ignored.
|
1377
|
+
*
|
1378
|
+
* The array has following format (each line is one 16-bit
|
1379
|
+
* integer):
|
1380
|
+
*
|
1381
|
+
* length = (n+2*m) | (m!=0?0x8000:0)
|
1382
|
+
* bmpLength = n; present if m!=0
|
1383
|
+
* bmp[0]
|
1384
|
+
* bmp[1]
|
1385
|
+
* ...
|
1386
|
+
* bmp[n-1]
|
1387
|
+
* supp-high[0]
|
1388
|
+
* supp-low[0]
|
1389
|
+
* supp-high[1]
|
1390
|
+
* supp-low[1]
|
1391
|
+
* ...
|
1392
|
+
* supp-high[m-1]
|
1393
|
+
* supp-low[m-1]
|
1394
|
+
*
|
1395
|
+
* The array starts with a header. After the header are n bmp
|
1396
|
+
* code points, then m supplementary code points. Either n or m
|
1397
|
+
* or both may be zero. n+2*m is always <= 0x7FFF.
|
1398
|
+
*
|
1399
|
+
* If there are no supplementary characters (if m==0) then the
|
1400
|
+
* header is one 16-bit integer, 'length', with value n.
|
1401
|
+
*
|
1402
|
+
* If there are supplementary characters (if m!=0) then the header
|
1403
|
+
* is two 16-bit integers. The first, 'length', has value
|
1404
|
+
* (n+2*m)|0x8000. The second, 'bmpLength', has value n.
|
1405
|
+
*
|
1406
|
+
* After the header the code points are stored in ascending order.
|
1407
|
+
* Supplementary code points are stored as most significant 16
|
1408
|
+
* bits followed by least significant 16 bits.
|
1409
|
+
*
|
1410
|
+
* @param dest pointer to buffer of destCapacity 16-bit integers.
|
1411
|
+
* May be NULL only if destCapacity is zero.
|
1412
|
+
* @param destCapacity size of dest, or zero. Must not be negative.
|
1413
|
+
* @param ec error code. Will be set to U_INDEX_OUTOFBOUNDS_ERROR
|
1414
|
+
* if n+2*m > 0x7FFF. Will be set to U_BUFFER_OVERFLOW_ERROR if
|
1415
|
+
* n+2*m+(m!=0?2:1) > destCapacity.
|
1416
|
+
* @return the total length of the serialized format, including
|
1417
|
+
* the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
|
1418
|
+
* than U_BUFFER_OVERFLOW_ERROR.
|
1419
|
+
* @stable ICU 2.4
|
1420
|
+
*/
|
1421
|
+
int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
|
1422
|
+
|
1423
|
+
/**
|
1424
|
+
* Reallocate this objects internal structures to take up the least
|
1425
|
+
* possible space, without changing this object's value.
|
1426
|
+
* A frozen set will not be modified.
|
1427
|
+
* @stable ICU 2.4
|
1428
|
+
*/
|
1429
|
+
virtual UnicodeSet& compact();
|
1430
|
+
|
1431
|
+
/**
|
1432
|
+
* Return the class ID for this class. This is useful only for
|
1433
|
+
* comparing to a return value from getDynamicClassID(). For example:
|
1434
|
+
* <pre>
|
1435
|
+
* . Base* polymorphic_pointer = createPolymorphicObject();
|
1436
|
+
* . if (polymorphic_pointer->getDynamicClassID() ==
|
1437
|
+
* . Derived::getStaticClassID()) ...
|
1438
|
+
* </pre>
|
1439
|
+
* @return The class ID for all objects of this class.
|
1440
|
+
* @stable ICU 2.0
|
1441
|
+
*/
|
1442
|
+
static UClassID U_EXPORT2 getStaticClassID(void);
|
1443
|
+
|
1444
|
+
/**
|
1445
|
+
* Implement UnicodeFunctor API.
|
1446
|
+
*
|
1447
|
+
* @return The class ID for this object. All objects of a given
|
1448
|
+
* class have the same class ID. Objects of other classes have
|
1449
|
+
* different class IDs.
|
1450
|
+
* @stable ICU 2.4
|
1451
|
+
*/
|
1452
|
+
virtual UClassID getDynamicClassID(void) const;
|
1453
|
+
|
1454
|
+
private:
|
1455
|
+
|
1456
|
+
// Private API for the USet API
|
1457
|
+
|
1458
|
+
friend class USetAccess;
|
1459
|
+
|
1460
|
+
int32_t getStringCount() const;
|
1461
|
+
|
1462
|
+
const UnicodeString* getString(int32_t index) const;
|
1463
|
+
|
1464
|
+
//----------------------------------------------------------------
|
1465
|
+
// RuleBasedTransliterator support
|
1466
|
+
//----------------------------------------------------------------
|
1467
|
+
|
1468
|
+
private:
|
1469
|
+
|
1470
|
+
/**
|
1471
|
+
* Returns <tt>true</tt> if this set contains any character whose low byte
|
1472
|
+
* is the given value. This is used by <tt>RuleBasedTransliterator</tt> for
|
1473
|
+
* indexing.
|
1474
|
+
*/
|
1475
|
+
virtual UBool matchesIndexValue(uint8_t v) const;
|
1476
|
+
|
1477
|
+
private:
|
1478
|
+
friend class RBBIRuleScanner;
|
1479
|
+
|
1480
|
+
//----------------------------------------------------------------
|
1481
|
+
// Implementation: Clone as thawed (see ICU4J Freezable)
|
1482
|
+
//----------------------------------------------------------------
|
1483
|
+
|
1484
|
+
UnicodeSet(const UnicodeSet& o, UBool /* asThawed */);
|
1485
|
+
|
1486
|
+
//----------------------------------------------------------------
|
1487
|
+
// Implementation: Pattern parsing
|
1488
|
+
//----------------------------------------------------------------
|
1489
|
+
|
1490
|
+
void applyPatternIgnoreSpace(const UnicodeString& pattern,
|
1491
|
+
ParsePosition& pos,
|
1492
|
+
const SymbolTable* symbols,
|
1493
|
+
UErrorCode& status);
|
1494
|
+
|
1495
|
+
void applyPattern(RuleCharacterIterator& chars,
|
1496
|
+
const SymbolTable* symbols,
|
1497
|
+
UnicodeString& rebuiltPat,
|
1498
|
+
uint32_t options,
|
1499
|
+
UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
|
1500
|
+
UErrorCode& ec);
|
1501
|
+
|
1502
|
+
//----------------------------------------------------------------
|
1503
|
+
// Implementation: Utility methods
|
1504
|
+
//----------------------------------------------------------------
|
1505
|
+
|
1506
|
+
void ensureCapacity(int32_t newLen, UErrorCode& ec);
|
1507
|
+
|
1508
|
+
void ensureBufferCapacity(int32_t newLen, UErrorCode& ec);
|
1509
|
+
|
1510
|
+
void swapBuffers(void);
|
1511
|
+
|
1512
|
+
UBool allocateStrings(UErrorCode &status);
|
1513
|
+
|
1514
|
+
UnicodeString& _toPattern(UnicodeString& result,
|
1515
|
+
UBool escapeUnprintable) const;
|
1516
|
+
|
1517
|
+
UnicodeString& _generatePattern(UnicodeString& result,
|
1518
|
+
UBool escapeUnprintable) const;
|
1519
|
+
|
1520
|
+
static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
|
1521
|
+
|
1522
|
+
static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
|
1523
|
+
|
1524
|
+
//----------------------------------------------------------------
|
1525
|
+
// Implementation: Fundamental operators
|
1526
|
+
//----------------------------------------------------------------
|
1527
|
+
|
1528
|
+
void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
|
1529
|
+
|
1530
|
+
void add(const UChar32* other, int32_t otherLen, int8_t polarity);
|
1531
|
+
|
1532
|
+
void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
|
1533
|
+
|
1534
|
+
/**
|
1535
|
+
* Return true if the given position, in the given pattern, appears
|
1536
|
+
* to be the start of a property set pattern [:foo:], \\p{foo}, or
|
1537
|
+
* \\P{foo}, or \\N{name}.
|
1538
|
+
*/
|
1539
|
+
static UBool resemblesPropertyPattern(const UnicodeString& pattern,
|
1540
|
+
int32_t pos);
|
1541
|
+
|
1542
|
+
static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
|
1543
|
+
int32_t iterOpts);
|
1544
|
+
|
1545
|
+
/**
|
1546
|
+
* Parse the given property pattern at the given parse position
|
1547
|
+
* and set this UnicodeSet to the result.
|
1548
|
+
*
|
1549
|
+
* The original design document is out of date, but still useful.
|
1550
|
+
* Ignore the property and value names:
|
1551
|
+
* http://source.icu-project.org/repos/icu/icuhtml/trunk/design/unicodeset_properties.html
|
1552
|
+
*
|
1553
|
+
* Recognized syntax:
|
1554
|
+
*
|
1555
|
+
* [:foo:] [:^foo:] - white space not allowed within "[:" or ":]"
|
1556
|
+
* \\p{foo} \\P{foo} - white space not allowed within "\\p" or "\\P"
|
1557
|
+
* \\N{name} - white space not allowed within "\\N"
|
1558
|
+
*
|
1559
|
+
* Other than the above restrictions, Unicode Pattern_White_Space characters are ignored.
|
1560
|
+
* Case is ignored except in "\\p" and "\\P" and "\\N". In 'name' leading
|
1561
|
+
* and trailing space is deleted, and internal runs of whitespace
|
1562
|
+
* are collapsed to a single space.
|
1563
|
+
*
|
1564
|
+
* We support binary properties, enumerated properties, and the
|
1565
|
+
* following non-enumerated properties:
|
1566
|
+
*
|
1567
|
+
* Numeric_Value
|
1568
|
+
* Name
|
1569
|
+
* Unicode_1_Name
|
1570
|
+
*
|
1571
|
+
* @param pattern the pattern string
|
1572
|
+
* @param ppos on entry, the position at which to begin parsing.
|
1573
|
+
* This should be one of the locations marked '^':
|
1574
|
+
*
|
1575
|
+
* [:blah:] \\p{blah} \\P{blah} \\N{name}
|
1576
|
+
* ^ % ^ % ^ % ^ %
|
1577
|
+
*
|
1578
|
+
* On return, the position after the last character parsed, that is,
|
1579
|
+
* the locations marked '%'. If the parse fails, ppos is returned
|
1580
|
+
* unchanged.
|
1581
|
+
* @param ec status
|
1582
|
+
* @return a reference to this.
|
1583
|
+
*/
|
1584
|
+
UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
|
1585
|
+
ParsePosition& ppos,
|
1586
|
+
UErrorCode &ec);
|
1587
|
+
|
1588
|
+
void applyPropertyPattern(RuleCharacterIterator& chars,
|
1589
|
+
UnicodeString& rebuiltPat,
|
1590
|
+
UErrorCode& ec);
|
1591
|
+
|
1592
|
+
friend void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status);
|
1593
|
+
static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status);
|
1594
|
+
|
1595
|
+
/**
|
1596
|
+
* A filter that returns TRUE if the given code point should be
|
1597
|
+
* included in the UnicodeSet being constructed.
|
1598
|
+
*/
|
1599
|
+
typedef UBool (*Filter)(UChar32 codePoint, void* context);
|
1600
|
+
|
1601
|
+
/**
|
1602
|
+
* Given a filter, set this UnicodeSet to the code points
|
1603
|
+
* contained by that filter. The filter MUST be
|
1604
|
+
* property-conformant. That is, if it returns value v for one
|
1605
|
+
* code point, then it must return v for all affiliated code
|
1606
|
+
* points, as defined by the inclusions list. See
|
1607
|
+
* getInclusions().
|
1608
|
+
* src is a UPropertySource value.
|
1609
|
+
*/
|
1610
|
+
void applyFilter(Filter filter,
|
1611
|
+
void* context,
|
1612
|
+
int32_t src,
|
1613
|
+
UErrorCode &status);
|
1614
|
+
|
1615
|
+
/**
|
1616
|
+
* Set the new pattern to cache.
|
1617
|
+
*/
|
1618
|
+
void setPattern(const UnicodeString& newPat);
|
1619
|
+
/**
|
1620
|
+
* Release existing cached pattern.
|
1621
|
+
*/
|
1622
|
+
void releasePattern();
|
1623
|
+
|
1624
|
+
friend class UnicodeSetIterator;
|
1625
|
+
};
|
1626
|
+
|
1627
|
+
|
1628
|
+
|
1629
|
+
inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
|
1630
|
+
return !operator==(o);
|
1631
|
+
}
|
1632
|
+
|
1633
|
+
inline UBool UnicodeSet::isFrozen() const {
|
1634
|
+
return (UBool)(bmpSet!=NULL || stringSpan!=NULL);
|
1635
|
+
}
|
1636
|
+
|
1637
|
+
inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
|
1638
|
+
return !containsNone(start, end);
|
1639
|
+
}
|
1640
|
+
|
1641
|
+
inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
|
1642
|
+
return !containsNone(s);
|
1643
|
+
}
|
1644
|
+
|
1645
|
+
inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
|
1646
|
+
return !containsNone(s);
|
1647
|
+
}
|
1648
|
+
|
1649
|
+
inline UBool UnicodeSet::isBogus() const {
|
1650
|
+
return (UBool)(fFlags & kIsBogus);
|
1651
|
+
}
|
1652
|
+
|
1653
|
+
inline UnicodeSet *UnicodeSet::fromUSet(USet *uset) {
|
1654
|
+
return reinterpret_cast<UnicodeSet *>(uset);
|
1655
|
+
}
|
1656
|
+
|
1657
|
+
inline const UnicodeSet *UnicodeSet::fromUSet(const USet *uset) {
|
1658
|
+
return reinterpret_cast<const UnicodeSet *>(uset);
|
1659
|
+
}
|
1660
|
+
|
1661
|
+
inline USet *UnicodeSet::toUSet() {
|
1662
|
+
return reinterpret_cast<USet *>(this);
|
1663
|
+
}
|
1664
|
+
|
1665
|
+
inline const USet *UnicodeSet::toUSet() const {
|
1666
|
+
return reinterpret_cast<const USet *>(this);
|
1667
|
+
}
|
1668
|
+
|
1669
|
+
inline int32_t UnicodeSet::span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const {
|
1670
|
+
int32_t sLength=s.length();
|
1671
|
+
if(start<0) {
|
1672
|
+
start=0;
|
1673
|
+
} else if(start>sLength) {
|
1674
|
+
start=sLength;
|
1675
|
+
}
|
1676
|
+
return start+span(s.getBuffer()+start, sLength-start, spanCondition);
|
1677
|
+
}
|
1678
|
+
|
1679
|
+
inline int32_t UnicodeSet::spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const {
|
1680
|
+
int32_t sLength=s.length();
|
1681
|
+
if(limit<0) {
|
1682
|
+
limit=0;
|
1683
|
+
} else if(limit>sLength) {
|
1684
|
+
limit=sLength;
|
1685
|
+
}
|
1686
|
+
return spanBack(s.getBuffer(), limit, spanCondition);
|
1687
|
+
}
|
1688
|
+
|
1689
|
+
U_NAMESPACE_END
|
1690
|
+
|
1691
|
+
#endif
|