pango 2.2.4-x64-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. checksums.yaml +7 -0
  2. data/README +33 -0
  3. data/Rakefile +65 -0
  4. data/ext/pango/depend +12 -0
  5. data/ext/pango/extconf.rb +90 -0
  6. data/ext/pango/pango.def +18 -0
  7. data/ext/pango/rbpango.c +318 -0
  8. data/ext/pango/rbpango.h +96 -0
  9. data/ext/pango/rbpangoanalysis.c +218 -0
  10. data/ext/pango/rbpangoattribute.c +499 -0
  11. data/ext/pango/rbpangoattriterator.c +141 -0
  12. data/ext/pango/rbpangoattrlist.c +101 -0
  13. data/ext/pango/rbpangocairo.c +122 -0
  14. data/ext/pango/rbpangocairocontext.c +131 -0
  15. data/ext/pango/rbpangocolor.c +120 -0
  16. data/ext/pango/rbpangocontext.c +309 -0
  17. data/ext/pango/rbpangoconversions.h +111 -0
  18. data/ext/pango/rbpangocoverage.c +106 -0
  19. data/ext/pango/rbpangoengine.c +73 -0
  20. data/ext/pango/rbpangofont.c +110 -0
  21. data/ext/pango/rbpangofontdescription.c +282 -0
  22. data/ext/pango/rbpangofontface.c +73 -0
  23. data/ext/pango/rbpangofontfamily.c +79 -0
  24. data/ext/pango/rbpangofontmap.c +102 -0
  25. data/ext/pango/rbpangofontmetrics.c +85 -0
  26. data/ext/pango/rbpangofontset.c +69 -0
  27. data/ext/pango/rbpangofontsetsimple.c +60 -0
  28. data/ext/pango/rbpangoglyphinfo.c +123 -0
  29. data/ext/pango/rbpangoglyphitem.c +125 -0
  30. data/ext/pango/rbpangoglyphstring.c +151 -0
  31. data/ext/pango/rbpangogravity.c +54 -0
  32. data/ext/pango/rbpangoitem.c +95 -0
  33. data/ext/pango/rbpangolanguage.c +86 -0
  34. data/ext/pango/rbpangolayout.c +572 -0
  35. data/ext/pango/rbpangolayoutiter.c +189 -0
  36. data/ext/pango/rbpangolayoutline.c +239 -0
  37. data/ext/pango/rbpangologattr.c +109 -0
  38. data/ext/pango/rbpangomatrix.c +143 -0
  39. data/ext/pango/rbpangoprivate.h +49 -0
  40. data/ext/pango/rbpangorectangle.c +170 -0
  41. data/ext/pango/rbpangorenderer.c +193 -0
  42. data/ext/pango/rbpangoscript.c +84 -0
  43. data/ext/pango/rbpangoscriptiter.c +92 -0
  44. data/ext/pango/rbpangotabarray.c +128 -0
  45. data/extconf.rb +49 -0
  46. data/lib/2.0/pango.so +0 -0
  47. data/lib/2.1/pango.so +0 -0
  48. data/lib/2.2/pango.so +0 -0
  49. data/lib/pango.rb +79 -0
  50. data/sample/attribute.rb +82 -0
  51. data/sample/break.rb +28 -0
  52. data/sample/gdk_layout.rb +27 -0
  53. data/sample/glyphstring.rb +61 -0
  54. data/sample/item.rb +37 -0
  55. data/sample/label.rb +23 -0
  56. data/sample/layout.rb +102 -0
  57. data/sample/pango_cairo.rb +66 -0
  58. data/sample/parse.rb +33 -0
  59. data/sample/sample.txt +10 -0
  60. data/sample/script.rb +23 -0
  61. data/test/pango-test-utils.rb +9 -0
  62. data/test/run-test.rb +28 -0
  63. data/test/test-analysis.rb +26 -0
  64. data/test/test-attribute.rb +19 -0
  65. data/test/test-color.rb +47 -0
  66. data/test/test-language.rb +7 -0
  67. data/test/test-layout.rb +34 -0
  68. data/test/test-log-attr.rb +26 -0
  69. data/test/test-matrix.rb +26 -0
  70. data/test/test-rectangle.rb +26 -0
  71. data/vendor/local/bin/derb.exe +0 -0
  72. data/vendor/local/bin/genbrk.exe +0 -0
  73. data/vendor/local/bin/genccode.exe +0 -0
  74. data/vendor/local/bin/gencfu.exe +0 -0
  75. data/vendor/local/bin/gencmn.exe +0 -0
  76. data/vendor/local/bin/gencnval.exe +0 -0
  77. data/vendor/local/bin/gendict.exe +0 -0
  78. data/vendor/local/bin/gennorm2.exe +0 -0
  79. data/vendor/local/bin/genrb.exe +0 -0
  80. data/vendor/local/bin/gensprep.exe +0 -0
  81. data/vendor/local/bin/hb-ot-shape-closure.exe +0 -0
  82. data/vendor/local/bin/hb-shape.exe +0 -0
  83. data/vendor/local/bin/hb-view.exe +0 -0
  84. data/vendor/local/bin/icu-config +820 -0
  85. data/vendor/local/bin/icuinfo.exe +0 -0
  86. data/vendor/local/bin/icupkg.exe +0 -0
  87. data/vendor/local/bin/libharfbuzz-0.dll +0 -0
  88. data/vendor/local/bin/libpango-1.0-0.dll +0 -0
  89. data/vendor/local/bin/libpangocairo-1.0-0.dll +0 -0
  90. data/vendor/local/bin/libpangoft2-1.0-0.dll +0 -0
  91. data/vendor/local/bin/libpangowin32-1.0-0.dll +0 -0
  92. data/vendor/local/bin/makeconv.exe +0 -0
  93. data/vendor/local/bin/pango-querymodules.exe +0 -0
  94. data/vendor/local/bin/pango-view.exe +0 -0
  95. data/vendor/local/bin/pkgdata.exe +0 -0
  96. data/vendor/local/bin/uconv.exe +0 -0
  97. data/vendor/local/etc/pango/pango.modules +19 -0
  98. data/vendor/local/include/harfbuzz/hb-blob.h +126 -0
  99. data/vendor/local/include/harfbuzz/hb-buffer.h +344 -0
  100. data/vendor/local/include/harfbuzz/hb-common.h +344 -0
  101. data/vendor/local/include/harfbuzz/hb-deprecated.h +51 -0
  102. data/vendor/local/include/harfbuzz/hb-face.h +117 -0
  103. data/vendor/local/include/harfbuzz/hb-font.h +507 -0
  104. data/vendor/local/include/harfbuzz/hb-ft.h +62 -0
  105. data/vendor/local/include/harfbuzz/hb-glib.h +52 -0
  106. data/vendor/local/include/harfbuzz/hb-icu.h +52 -0
  107. data/vendor/local/include/harfbuzz/hb-ot-font.h +41 -0
  108. data/vendor/local/include/harfbuzz/hb-ot-layout.h +302 -0
  109. data/vendor/local/include/harfbuzz/hb-ot-shape.h +53 -0
  110. data/vendor/local/include/harfbuzz/hb-ot-tag.h +59 -0
  111. data/vendor/local/include/harfbuzz/hb-ot.h +43 -0
  112. data/vendor/local/include/harfbuzz/hb-set.h +154 -0
  113. data/vendor/local/include/harfbuzz/hb-shape-plan.h +89 -0
  114. data/vendor/local/include/harfbuzz/hb-shape.h +81 -0
  115. data/vendor/local/include/harfbuzz/hb-unicode.h +445 -0
  116. data/vendor/local/include/harfbuzz/hb-version.h +66 -0
  117. data/vendor/local/include/harfbuzz/hb.h +47 -0
  118. data/vendor/local/include/layout/LEFontInstance.h +550 -0
  119. data/vendor/local/include/layout/LEGlyphFilter.h +45 -0
  120. data/vendor/local/include/layout/LEGlyphStorage.h +546 -0
  121. data/vendor/local/include/layout/LEInsertionList.h +177 -0
  122. data/vendor/local/include/layout/LELanguages.h +112 -0
  123. data/vendor/local/include/layout/LEScripts.h +263 -0
  124. data/vendor/local/include/layout/LESwaps.h +100 -0
  125. data/vendor/local/include/layout/LETableReference.h +418 -0
  126. data/vendor/local/include/layout/LETypes.h +728 -0
  127. data/vendor/local/include/layout/LayoutEngine.h +518 -0
  128. data/vendor/local/include/layout/ParagraphLayout.h +747 -0
  129. data/vendor/local/include/layout/RunArrays.h +676 -0
  130. data/vendor/local/include/layout/loengine.h +225 -0
  131. data/vendor/local/include/layout/playout.h +466 -0
  132. data/vendor/local/include/layout/plruns.h +441 -0
  133. data/vendor/local/include/pango-1.0/pango/pango-attributes.h +514 -0
  134. data/vendor/local/include/pango-1.0/pango/pango-bidi-type.h +136 -0
  135. data/vendor/local/include/pango-1.0/pango/pango-break.h +173 -0
  136. data/vendor/local/include/pango-1.0/pango/pango-context.h +112 -0
  137. data/vendor/local/include/pango-1.0/pango/pango-coverage.h +79 -0
  138. data/vendor/local/include/pango-1.0/pango/pango-engine.h +429 -0
  139. data/vendor/local/include/pango-1.0/pango/pango-enum-types.h +69 -0
  140. data/vendor/local/include/pango-1.0/pango/pango-features.h +12 -0
  141. data/vendor/local/include/pango-1.0/pango/pango-font.h +622 -0
  142. data/vendor/local/include/pango-1.0/pango/pango-fontmap.h +167 -0
  143. data/vendor/local/include/pango-1.0/pango/pango-fontset.h +169 -0
  144. data/vendor/local/include/pango-1.0/pango/pango-glyph-item.h +158 -0
  145. data/vendor/local/include/pango-1.0/pango/pango-glyph.h +197 -0
  146. data/vendor/local/include/pango-1.0/pango/pango-gravity.h +128 -0
  147. data/vendor/local/include/pango-1.0/pango/pango-item.h +109 -0
  148. data/vendor/local/include/pango-1.0/pango/pango-language.h +61 -0
  149. data/vendor/local/include/pango-1.0/pango/pango-layout.h +340 -0
  150. data/vendor/local/include/pango-1.0/pango/pango-matrix.h +117 -0
  151. data/vendor/local/include/pango-1.0/pango/pango-modules.h +71 -0
  152. data/vendor/local/include/pango-1.0/pango/pango-ot.h +391 -0
  153. data/vendor/local/include/pango-1.0/pango/pango-renderer.h +260 -0
  154. data/vendor/local/include/pango-1.0/pango/pango-script.h +260 -0
  155. data/vendor/local/include/pango-1.0/pango/pango-tabs.h +87 -0
  156. data/vendor/local/include/pango-1.0/pango/pango-types.h +200 -0
  157. data/vendor/local/include/pango-1.0/pango/pango-utils.h +189 -0
  158. data/vendor/local/include/pango-1.0/pango/pango.h +48 -0
  159. data/vendor/local/include/pango-1.0/pango/pangocairo.h +169 -0
  160. data/vendor/local/include/pango-1.0/pango/pangofc-decoder.h +110 -0
  161. data/vendor/local/include/pango-1.0/pango/pangofc-font.h +153 -0
  162. data/vendor/local/include/pango-1.0/pango/pangofc-fontmap.h +289 -0
  163. data/vendor/local/include/pango-1.0/pango/pangoft2.h +133 -0
  164. data/vendor/local/include/pango-1.0/pango/pangowin32.h +128 -0
  165. data/vendor/local/include/unicode/alphaindex.h +752 -0
  166. data/vendor/local/include/unicode/appendable.h +232 -0
  167. data/vendor/local/include/unicode/basictz.h +214 -0
  168. data/vendor/local/include/unicode/brkiter.h +655 -0
  169. data/vendor/local/include/unicode/bytestream.h +257 -0
  170. data/vendor/local/include/unicode/bytestrie.h +519 -0
  171. data/vendor/local/include/unicode/bytestriebuilder.h +181 -0
  172. data/vendor/local/include/unicode/calendar.h +2519 -0
  173. data/vendor/local/include/unicode/caniter.h +208 -0
  174. data/vendor/local/include/unicode/chariter.h +722 -0
  175. data/vendor/local/include/unicode/choicfmt.h +594 -0
  176. data/vendor/local/include/unicode/coleitr.h +404 -0
  177. data/vendor/local/include/unicode/coll.h +1267 -0
  178. data/vendor/local/include/unicode/compactdecimalformat.h +330 -0
  179. data/vendor/local/include/unicode/curramt.h +130 -0
  180. data/vendor/local/include/unicode/currpinf.h +258 -0
  181. data/vendor/local/include/unicode/currunit.h +110 -0
  182. data/vendor/local/include/unicode/datefmt.h +883 -0
  183. data/vendor/local/include/unicode/dbbi.h +40 -0
  184. data/vendor/local/include/unicode/dcfmtsym.h +482 -0
  185. data/vendor/local/include/unicode/decimfmt.h +2479 -0
  186. data/vendor/local/include/unicode/docmain.h +215 -0
  187. data/vendor/local/include/unicode/dtfmtsym.h +912 -0
  188. data/vendor/local/include/unicode/dtintrv.h +158 -0
  189. data/vendor/local/include/unicode/dtitvfmt.h +985 -0
  190. data/vendor/local/include/unicode/dtitvinf.h +514 -0
  191. data/vendor/local/include/unicode/dtptngen.h +498 -0
  192. data/vendor/local/include/unicode/dtrule.h +250 -0
  193. data/vendor/local/include/unicode/enumset.h +64 -0
  194. data/vendor/local/include/unicode/errorcode.h +137 -0
  195. data/vendor/local/include/unicode/fieldpos.h +291 -0
  196. data/vendor/local/include/unicode/filteredbrk.h +131 -0
  197. data/vendor/local/include/unicode/fmtable.h +760 -0
  198. data/vendor/local/include/unicode/format.h +305 -0
  199. data/vendor/local/include/unicode/fpositer.h +117 -0
  200. data/vendor/local/include/unicode/gender.h +111 -0
  201. data/vendor/local/include/unicode/gregocal.h +777 -0
  202. data/vendor/local/include/unicode/icudataver.h +41 -0
  203. data/vendor/local/include/unicode/icuplug.h +371 -0
  204. data/vendor/local/include/unicode/idna.h +323 -0
  205. data/vendor/local/include/unicode/listformatter.h +167 -0
  206. data/vendor/local/include/unicode/localpointer.h +304 -0
  207. data/vendor/local/include/unicode/locdspnm.h +204 -0
  208. data/vendor/local/include/unicode/locid.h +815 -0
  209. data/vendor/local/include/unicode/measfmt.h +389 -0
  210. data/vendor/local/include/unicode/measunit.h +1443 -0
  211. data/vendor/local/include/unicode/measure.h +159 -0
  212. data/vendor/local/include/unicode/messagepattern.h +943 -0
  213. data/vendor/local/include/unicode/msgfmt.h +1093 -0
  214. data/vendor/local/include/unicode/normalizer2.h +658 -0
  215. data/vendor/local/include/unicode/normlzr.h +797 -0
  216. data/vendor/local/include/unicode/numfmt.h +1187 -0
  217. data/vendor/local/include/unicode/numsys.h +208 -0
  218. data/vendor/local/include/unicode/parseerr.h +92 -0
  219. data/vendor/local/include/unicode/parsepos.h +230 -0
  220. data/vendor/local/include/unicode/platform.h +751 -0
  221. data/vendor/local/include/unicode/plurfmt.h +615 -0
  222. data/vendor/local/include/unicode/plurrule.h +501 -0
  223. data/vendor/local/include/unicode/ptypes.h +126 -0
  224. data/vendor/local/include/unicode/putil.h +181 -0
  225. data/vendor/local/include/unicode/rbbi.h +782 -0
  226. data/vendor/local/include/unicode/rbnf.h +1032 -0
  227. data/vendor/local/include/unicode/rbtz.h +362 -0
  228. data/vendor/local/include/unicode/regex.h +1857 -0
  229. data/vendor/local/include/unicode/region.h +228 -0
  230. data/vendor/local/include/unicode/reldatefmt.h +498 -0
  231. data/vendor/local/include/unicode/rep.h +261 -0
  232. data/vendor/local/include/unicode/resbund.h +490 -0
  233. data/vendor/local/include/unicode/schriter.h +187 -0
  234. data/vendor/local/include/unicode/scientificformathelper.h +139 -0
  235. data/vendor/local/include/unicode/search.h +575 -0
  236. data/vendor/local/include/unicode/selfmt.h +367 -0
  237. data/vendor/local/include/unicode/simpletz.h +928 -0
  238. data/vendor/local/include/unicode/smpdtfmt.h +1592 -0
  239. data/vendor/local/include/unicode/sortkey.h +338 -0
  240. data/vendor/local/include/unicode/std_string.h +37 -0
  241. data/vendor/local/include/unicode/strenum.h +276 -0
  242. data/vendor/local/include/unicode/stringpiece.h +224 -0
  243. data/vendor/local/include/unicode/stringtriebuilder.h +402 -0
  244. data/vendor/local/include/unicode/stsearch.h +504 -0
  245. data/vendor/local/include/unicode/symtable.h +112 -0
  246. data/vendor/local/include/unicode/tblcoll.h +873 -0
  247. data/vendor/local/include/unicode/timezone.h +948 -0
  248. data/vendor/local/include/unicode/tmunit.h +129 -0
  249. data/vendor/local/include/unicode/tmutamt.h +168 -0
  250. data/vendor/local/include/unicode/tmutfmt.h +243 -0
  251. data/vendor/local/include/unicode/translit.h +1342 -0
  252. data/vendor/local/include/unicode/tzfmt.h +1098 -0
  253. data/vendor/local/include/unicode/tznames.h +404 -0
  254. data/vendor/local/include/unicode/tzrule.h +828 -0
  255. data/vendor/local/include/unicode/tztrans.h +195 -0
  256. data/vendor/local/include/unicode/ubidi.h +2186 -0
  257. data/vendor/local/include/unicode/ubrk.h +540 -0
  258. data/vendor/local/include/unicode/ucal.h +1560 -0
  259. data/vendor/local/include/unicode/ucasemap.h +423 -0
  260. data/vendor/local/include/unicode/ucat.h +158 -0
  261. data/vendor/local/include/unicode/uchar.h +3426 -0
  262. data/vendor/local/include/unicode/ucharstrie.h +576 -0
  263. data/vendor/local/include/unicode/ucharstriebuilder.h +185 -0
  264. data/vendor/local/include/unicode/uchriter.h +381 -0
  265. data/vendor/local/include/unicode/uclean.h +258 -0
  266. data/vendor/local/include/unicode/ucnv.h +2036 -0
  267. data/vendor/local/include/unicode/ucnv_cb.h +162 -0
  268. data/vendor/local/include/unicode/ucnv_err.h +463 -0
  269. data/vendor/local/include/unicode/ucnvsel.h +187 -0
  270. data/vendor/local/include/unicode/ucol.h +1474 -0
  271. data/vendor/local/include/unicode/ucoleitr.h +266 -0
  272. data/vendor/local/include/unicode/uconfig.h +430 -0
  273. data/vendor/local/include/unicode/ucsdet.h +413 -0
  274. data/vendor/local/include/unicode/ucurr.h +424 -0
  275. data/vendor/local/include/unicode/udat.h +1536 -0
  276. data/vendor/local/include/unicode/udata.h +430 -0
  277. data/vendor/local/include/unicode/udateintervalformat.h +181 -0
  278. data/vendor/local/include/unicode/udatpg.h +588 -0
  279. data/vendor/local/include/unicode/udisplaycontext.h +150 -0
  280. data/vendor/local/include/unicode/uenum.h +206 -0
  281. data/vendor/local/include/unicode/uformattable.h +280 -0
  282. data/vendor/local/include/unicode/ugender.h +82 -0
  283. data/vendor/local/include/unicode/uidna.h +762 -0
  284. data/vendor/local/include/unicode/uiter.h +707 -0
  285. data/vendor/local/include/unicode/uldnames.h +302 -0
  286. data/vendor/local/include/unicode/uloc.h +1256 -0
  287. data/vendor/local/include/unicode/ulocdata.h +277 -0
  288. data/vendor/local/include/unicode/umachine.h +356 -0
  289. data/vendor/local/include/unicode/umisc.h +60 -0
  290. data/vendor/local/include/unicode/umsg.h +623 -0
  291. data/vendor/local/include/unicode/unifilt.h +120 -0
  292. data/vendor/local/include/unicode/unifunct.h +125 -0
  293. data/vendor/local/include/unicode/unimatch.h +163 -0
  294. data/vendor/local/include/unicode/unirepl.h +97 -0
  295. data/vendor/local/include/unicode/uniset.h +1691 -0
  296. data/vendor/local/include/unicode/unistr.h +4470 -0
  297. data/vendor/local/include/unicode/unorm.h +561 -0
  298. data/vendor/local/include/unicode/unorm2.h +528 -0
  299. data/vendor/local/include/unicode/unum.h +1328 -0
  300. data/vendor/local/include/unicode/unumsys.h +170 -0
  301. data/vendor/local/include/unicode/uobject.h +320 -0
  302. data/vendor/local/include/unicode/upluralrules.h +145 -0
  303. data/vendor/local/include/unicode/uregex.h +1591 -0
  304. data/vendor/local/include/unicode/uregion.h +248 -0
  305. data/vendor/local/include/unicode/urename.h +1784 -0
  306. data/vendor/local/include/unicode/urep.h +155 -0
  307. data/vendor/local/include/unicode/ures.h +887 -0
  308. data/vendor/local/include/unicode/uscript.h +642 -0
  309. data/vendor/local/include/unicode/usearch.h +885 -0
  310. data/vendor/local/include/unicode/uset.h +1126 -0
  311. data/vendor/local/include/unicode/usetiter.h +318 -0
  312. data/vendor/local/include/unicode/ushape.h +474 -0
  313. data/vendor/local/include/unicode/uspoof.h +1064 -0
  314. data/vendor/local/include/unicode/usprep.h +269 -0
  315. data/vendor/local/include/unicode/ustdio.h +1018 -0
  316. data/vendor/local/include/unicode/ustream.h +68 -0
  317. data/vendor/local/include/unicode/ustring.h +1700 -0
  318. data/vendor/local/include/unicode/ustringtrie.h +95 -0
  319. data/vendor/local/include/unicode/utext.h +1600 -0
  320. data/vendor/local/include/unicode/utf.h +223 -0
  321. data/vendor/local/include/unicode/utf16.h +623 -0
  322. data/vendor/local/include/unicode/utf32.h +23 -0
  323. data/vendor/local/include/unicode/utf8.h +824 -0
  324. data/vendor/local/include/unicode/utf_old.h +1169 -0
  325. data/vendor/local/include/unicode/utmscale.h +481 -0
  326. data/vendor/local/include/unicode/utrace.h +359 -0
  327. data/vendor/local/include/unicode/utrans.h +658 -0
  328. data/vendor/local/include/unicode/utypes.h +723 -0
  329. data/vendor/local/include/unicode/uvernum.h +170 -0
  330. data/vendor/local/include/unicode/uversion.h +193 -0
  331. data/vendor/local/include/unicode/vtzone.h +455 -0
  332. data/vendor/local/lib/girepository-1.0/Pango-1.0.typelib +0 -0
  333. data/vendor/local/lib/girepository-1.0/PangoCairo-1.0.typelib +0 -0
  334. data/vendor/local/lib/girepository-1.0/PangoFT2-1.0.typelib +0 -0
  335. data/vendor/local/lib/icu/54.1/Makefile.inc +293 -0
  336. data/vendor/local/lib/icu/54.1/pkgdata.inc +17 -0
  337. data/vendor/local/lib/icu/Makefile.inc +293 -0
  338. data/vendor/local/lib/icu/pkgdata.inc +17 -0
  339. data/vendor/local/lib/icudt.dll +0 -0
  340. data/vendor/local/lib/icudt54.dll +0 -0
  341. data/vendor/local/lib/icuin.dll +0 -0
  342. data/vendor/local/lib/icuin54.dll +0 -0
  343. data/vendor/local/lib/icuio.dll +0 -0
  344. data/vendor/local/lib/icuio54.dll +0 -0
  345. data/vendor/local/lib/icule.dll +0 -0
  346. data/vendor/local/lib/icule54.dll +0 -0
  347. data/vendor/local/lib/iculx.dll +0 -0
  348. data/vendor/local/lib/iculx54.dll +0 -0
  349. data/vendor/local/lib/icutest.dll +0 -0
  350. data/vendor/local/lib/icutest54.dll +0 -0
  351. data/vendor/local/lib/icutu.dll +0 -0
  352. data/vendor/local/lib/icutu54.dll +0 -0
  353. data/vendor/local/lib/icuuc.dll +0 -0
  354. data/vendor/local/lib/icuuc54.dll +0 -0
  355. data/vendor/local/lib/libharfbuzz-icu.a +0 -0
  356. data/vendor/local/lib/libharfbuzz-icu.la +41 -0
  357. data/vendor/local/lib/libharfbuzz.dll.a +0 -0
  358. data/vendor/local/lib/libharfbuzz.la +41 -0
  359. data/vendor/local/lib/libicudt.dll.a +0 -0
  360. data/vendor/local/lib/libicuin.dll.a +0 -0
  361. data/vendor/local/lib/libicuio.dll.a +0 -0
  362. data/vendor/local/lib/libicule.dll.a +0 -0
  363. data/vendor/local/lib/libiculx.dll.a +0 -0
  364. data/vendor/local/lib/libicutest.dll.a +0 -0
  365. data/vendor/local/lib/libicutu.dll.a +0 -0
  366. data/vendor/local/lib/libicuuc.dll.a +0 -0
  367. data/vendor/local/lib/libpango-1.0.dll.a +0 -0
  368. data/vendor/local/lib/libpango-1.0.la +41 -0
  369. data/vendor/local/lib/libpangocairo-1.0.dll.a +0 -0
  370. data/vendor/local/lib/libpangocairo-1.0.la +41 -0
  371. data/vendor/local/lib/libpangoft2-1.0.dll.a +0 -0
  372. data/vendor/local/lib/libpangoft2-1.0.la +41 -0
  373. data/vendor/local/lib/libpangowin32-1.0.dll.a +0 -0
  374. data/vendor/local/lib/libpangowin32-1.0.la +41 -0
  375. data/vendor/local/lib/pango/1.8.0/modules/pango-arabic-lang.dll +0 -0
  376. data/vendor/local/lib/pango/1.8.0/modules/pango-arabic-lang.dll.a +0 -0
  377. data/vendor/local/lib/pango/1.8.0/modules/pango-arabic-lang.la +41 -0
  378. data/vendor/local/lib/pango/1.8.0/modules/pango-basic-fc.dll +0 -0
  379. data/vendor/local/lib/pango/1.8.0/modules/pango-basic-fc.dll.a +0 -0
  380. data/vendor/local/lib/pango/1.8.0/modules/pango-basic-fc.la +41 -0
  381. data/vendor/local/lib/pango/1.8.0/modules/pango-basic-win32.dll +0 -0
  382. data/vendor/local/lib/pango/1.8.0/modules/pango-basic-win32.dll.a +0 -0
  383. data/vendor/local/lib/pango/1.8.0/modules/pango-basic-win32.la +41 -0
  384. data/vendor/local/lib/pango/1.8.0/modules/pango-indic-lang.dll +0 -0
  385. data/vendor/local/lib/pango/1.8.0/modules/pango-indic-lang.dll.a +0 -0
  386. data/vendor/local/lib/pango/1.8.0/modules/pango-indic-lang.la +41 -0
  387. data/vendor/local/lib/pkgconfig/harfbuzz-icu.pc +13 -0
  388. data/vendor/local/lib/pkgconfig/harfbuzz.pc +11 -0
  389. data/vendor/local/lib/pkgconfig/icu-i18n.pc +38 -0
  390. data/vendor/local/lib/pkgconfig/icu-io.pc +38 -0
  391. data/vendor/local/lib/pkgconfig/icu-le.pc +38 -0
  392. data/vendor/local/lib/pkgconfig/icu-lx.pc +38 -0
  393. data/vendor/local/lib/pkgconfig/icu-uc.pc +38 -0
  394. data/vendor/local/lib/pkgconfig/pango.pc +14 -0
  395. data/vendor/local/lib/pkgconfig/pangocairo.pc +11 -0
  396. data/vendor/local/lib/pkgconfig/pangoft2.pc +12 -0
  397. data/vendor/local/lib/pkgconfig/pangowin32.pc +11 -0
  398. data/vendor/local/share/gir-1.0/Pango-1.0.gir +13329 -0
  399. data/vendor/local/share/gir-1.0/PangoCairo-1.0.gir +833 -0
  400. data/vendor/local/share/gir-1.0/PangoFT2-1.0.gir +354 -0
  401. data/vendor/local/share/gtk-doc/html/harfbuzz/annotation-glossary.html +75 -0
  402. data/vendor/local/share/gtk-doc/html/harfbuzz/api-index-full.html +1449 -0
  403. data/vendor/local/share/gtk-doc/html/harfbuzz/ch01.html +98 -0
  404. data/vendor/local/share/gtk-doc/html/harfbuzz/deprecated-api-index.html +46 -0
  405. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-blob.html +610 -0
  406. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-buffer.html +1780 -0
  407. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-common.html +1337 -0
  408. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-coretext.html +144 -0
  409. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-deprecated.html +99 -0
  410. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-face.html +630 -0
  411. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-font.html +2529 -0
  412. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-ft.html +188 -0
  413. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-glib.html +120 -0
  414. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-gobject.html +50 -0
  415. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-graphite2.html +120 -0
  416. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-icu.html +120 -0
  417. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-ot-layout.html +771 -0
  418. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-ot-tag.html +172 -0
  419. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-ot.html +50 -0
  420. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-set.html +966 -0
  421. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-shape-plan.html +405 -0
  422. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-shape.html +286 -0
  423. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-unicode.html +1523 -0
  424. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-uniscribe.html +100 -0
  425. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-version.html +182 -0
  426. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb.html +50 -0
  427. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz.devhelp2 +378 -0
  428. data/vendor/local/share/gtk-doc/html/harfbuzz/home.png +0 -0
  429. data/vendor/local/share/gtk-doc/html/harfbuzz/index.html +105 -0
  430. data/vendor/local/share/gtk-doc/html/harfbuzz/index.sgml +496 -0
  431. data/vendor/local/share/gtk-doc/html/harfbuzz/left-insensitive.png +0 -0
  432. data/vendor/local/share/gtk-doc/html/harfbuzz/left.png +0 -0
  433. data/vendor/local/share/gtk-doc/html/harfbuzz/object-tree.html +59 -0
  434. data/vendor/local/share/gtk-doc/html/harfbuzz/right-insensitive.png +0 -0
  435. data/vendor/local/share/gtk-doc/html/harfbuzz/right.png +0 -0
  436. data/vendor/local/share/gtk-doc/html/harfbuzz/style.css +476 -0
  437. data/vendor/local/share/gtk-doc/html/harfbuzz/up-insensitive.png +0 -0
  438. data/vendor/local/share/gtk-doc/html/harfbuzz/up.png +0 -0
  439. data/vendor/local/share/gtk-doc/html/pango/PangoEngineLang.html +224 -0
  440. data/vendor/local/share/gtk-doc/html/pango/PangoEngineShape.html +255 -0
  441. data/vendor/local/share/gtk-doc/html/pango/PangoFcDecoder.html +246 -0
  442. data/vendor/local/share/gtk-doc/html/pango/PangoFcFont.html +500 -0
  443. data/vendor/local/share/gtk-doc/html/pango/PangoFcFontMap.html +1001 -0
  444. data/vendor/local/share/gtk-doc/html/pango/PangoMarkupFormat.html +301 -0
  445. data/vendor/local/share/gtk-doc/html/pango/PangoRenderer.html +1219 -0
  446. data/vendor/local/share/gtk-doc/html/pango/annotation-glossary.html +89 -0
  447. data/vendor/local/share/gtk-doc/html/pango/api-index-1-10.html +135 -0
  448. data/vendor/local/share/gtk-doc/html/pango/api-index-1-12.html +49 -0
  449. data/vendor/local/share/gtk-doc/html/pango/api-index-1-14.html +64 -0
  450. data/vendor/local/share/gtk-doc/html/pango/api-index-1-16.html +228 -0
  451. data/vendor/local/share/gtk-doc/html/pango/api-index-1-18.html +152 -0
  452. data/vendor/local/share/gtk-doc/html/pango/api-index-1-2.html +122 -0
  453. data/vendor/local/share/gtk-doc/html/pango/api-index-1-20.html +87 -0
  454. data/vendor/local/share/gtk-doc/html/pango/api-index-1-22.html +124 -0
  455. data/vendor/local/share/gtk-doc/html/pango/api-index-1-24.html +97 -0
  456. data/vendor/local/share/gtk-doc/html/pango/api-index-1-26.html +46 -0
  457. data/vendor/local/share/gtk-doc/html/pango/api-index-1-30.html +39 -0
  458. data/vendor/local/share/gtk-doc/html/pango/api-index-1-31-0.html +39 -0
  459. data/vendor/local/share/gtk-doc/html/pango/api-index-1-32-4.html +53 -0
  460. data/vendor/local/share/gtk-doc/html/pango/api-index-1-32.html +42 -0
  461. data/vendor/local/share/gtk-doc/html/pango/api-index-1-34.html +38 -0
  462. data/vendor/local/share/gtk-doc/html/pango/api-index-1-4.html +201 -0
  463. data/vendor/local/share/gtk-doc/html/pango/api-index-1-6.html +165 -0
  464. data/vendor/local/share/gtk-doc/html/pango/api-index-1-8.html +171 -0
  465. data/vendor/local/share/gtk-doc/html/pango/api-index-deprecated.html +284 -0
  466. data/vendor/local/share/gtk-doc/html/pango/api-index-full.html +2968 -0
  467. data/vendor/local/share/gtk-doc/html/pango/home.png +0 -0
  468. data/vendor/local/share/gtk-doc/html/pango/index.html +146 -0
  469. data/vendor/local/share/gtk-doc/html/pango/index.sgml +957 -0
  470. data/vendor/local/share/gtk-doc/html/pango/layout.gif +0 -0
  471. data/vendor/local/share/gtk-doc/html/pango/left-insensitive.png +0 -0
  472. data/vendor/local/share/gtk-doc/html/pango/left.png +0 -0
  473. data/vendor/local/share/gtk-doc/html/pango/lowlevel.html +68 -0
  474. data/vendor/local/share/gtk-doc/html/pango/pango-Bidirectional-Text.html +514 -0
  475. data/vendor/local/share/gtk-doc/html/pango/pango-Cairo-Rendering.html +1632 -0
  476. data/vendor/local/share/gtk-doc/html/pango/pango-CoreText-Fonts.html +115 -0
  477. data/vendor/local/share/gtk-doc/html/pango/pango-Coverage-Maps.html +508 -0
  478. data/vendor/local/share/gtk-doc/html/pango/pango-Engines.html +358 -0
  479. data/vendor/local/share/gtk-doc/html/pango/pango-Fonts.html +4154 -0
  480. data/vendor/local/share/gtk-doc/html/pango/pango-FreeType-Fonts-and-Rendering.html +956 -0
  481. data/vendor/local/share/gtk-doc/html/pango/pango-Glyph-Storage.html +2627 -0
  482. data/vendor/local/share/gtk-doc/html/pango/pango-Layout-Objects.html +4153 -0
  483. data/vendor/local/share/gtk-doc/html/pango/pango-Miscellaneous-Utilities.html +965 -0
  484. data/vendor/local/share/gtk-doc/html/pango/pango-Modules.html +331 -0
  485. data/vendor/local/share/gtk-doc/html/pango/pango-OpenType-Font-Handling.html +2202 -0
  486. data/vendor/local/share/gtk-doc/html/pango/pango-Scripts-and-Languages.html +1430 -0
  487. data/vendor/local/share/gtk-doc/html/pango/pango-Tab-Stops.html +582 -0
  488. data/vendor/local/share/gtk-doc/html/pango/pango-Text-Attributes.html +3356 -0
  489. data/vendor/local/share/gtk-doc/html/pango/pango-Text-Processing.html +2079 -0
  490. data/vendor/local/share/gtk-doc/html/pango/pango-Version-Checking.html +325 -0
  491. data/vendor/local/share/gtk-doc/html/pango/pango-Vertical-Text.html +523 -0
  492. data/vendor/local/share/gtk-doc/html/pango/pango-Win32-Fonts-and-Rendering.html +1028 -0
  493. data/vendor/local/share/gtk-doc/html/pango/pango-Xft-Fonts-and-Rendering.html +1275 -0
  494. data/vendor/local/share/gtk-doc/html/pango/pango-hierarchy.html +91 -0
  495. data/vendor/local/share/gtk-doc/html/pango/pango-querymodules.html +131 -0
  496. data/vendor/local/share/gtk-doc/html/pango/pango.devhelp2 +786 -0
  497. data/vendor/local/share/gtk-doc/html/pango/pango.html +63 -0
  498. data/vendor/local/share/gtk-doc/html/pango/rendering.html +47 -0
  499. data/vendor/local/share/gtk-doc/html/pango/right-insensitive.png +0 -0
  500. data/vendor/local/share/gtk-doc/html/pango/right.png +0 -0
  501. data/vendor/local/share/gtk-doc/html/pango/rotated-text.png +0 -0
  502. data/vendor/local/share/gtk-doc/html/pango/style.css +476 -0
  503. data/vendor/local/share/gtk-doc/html/pango/tools.html +33 -0
  504. data/vendor/local/share/gtk-doc/html/pango/up-insensitive.png +0 -0
  505. data/vendor/local/share/gtk-doc/html/pango/up.png +0 -0
  506. data/vendor/local/share/icu/54.1/config/mh-mingw64 +158 -0
  507. data/vendor/local/share/icu/54.1/install-sh +251 -0
  508. data/vendor/local/share/icu/54.1/license.html +385 -0
  509. data/vendor/local/share/icu/54.1/mkinstalldirs +43 -0
  510. data/vendor/local/share/license/harfbuzz/AUTHORS +9 -0
  511. data/vendor/local/share/license/harfbuzz/COPYING +36 -0
  512. data/vendor/local/share/license/pango/AUTHORS +2 -0
  513. data/vendor/local/share/license/pango/COPYING +482 -0
  514. data/vendor/local/share/man/man1/derb.1 +196 -0
  515. data/vendor/local/share/man/man1/genbrk.1 +112 -0
  516. data/vendor/local/share/man/man1/gencfu.1 +91 -0
  517. data/vendor/local/share/man/man1/gencnval.1 +91 -0
  518. data/vendor/local/share/man/man1/gendict.1 +131 -0
  519. data/vendor/local/share/man/man1/genrb.1 +146 -0
  520. data/vendor/local/share/man/man1/icu-config.1 +288 -0
  521. data/vendor/local/share/man/man1/makeconv.1 +112 -0
  522. data/vendor/local/share/man/man1/pango-querymodules.1 +106 -0
  523. data/vendor/local/share/man/man1/pango-view.1 +1 -0
  524. data/vendor/local/share/man/man1/pkgdata.1 +258 -0
  525. data/vendor/local/share/man/man1/uconv.1 +443 -0
  526. data/vendor/local/share/man/man8/genccode.8 +106 -0
  527. data/vendor/local/share/man/man8/gencmn.8 +129 -0
  528. data/vendor/local/share/man/man8/gensprep.8 +102 -0
  529. data/vendor/local/share/man/man8/icupkg.8 +204 -0
  530. metadata +599 -0
@@ -0,0 +1,1691 @@
1
+ /*
2
+ ***************************************************************************
3
+ * Copyright (C) 1999-2014, International Business Machines Corporation
4
+ * and others. All Rights Reserved.
5
+ ***************************************************************************
6
+ * Date Name Description
7
+ * 10/20/99 alan Creation.
8
+ ***************************************************************************
9
+ */
10
+
11
+ #ifndef UNICODESET_H
12
+ #define UNICODESET_H
13
+
14
+ #include "unicode/unifilt.h"
15
+ #include "unicode/unistr.h"
16
+ #include "unicode/uset.h"
17
+
18
+ /**
19
+ * \file
20
+ * \brief C++ API: Unicode Set
21
+ */
22
+
23
+ U_NAMESPACE_BEGIN
24
+
25
+ // Forward Declarations.
26
+ void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status); /**< @internal */
27
+
28
+ class BMPSet;
29
+ class ParsePosition;
30
+ class RBBIRuleScanner;
31
+ class SymbolTable;
32
+ class UnicodeSetStringSpan;
33
+ class UVector;
34
+ class RuleCharacterIterator;
35
+
36
+ /**
37
+ * A mutable set of Unicode characters and multicharacter strings. Objects of this class
38
+ * represent <em>character classes</em> used in regular expressions.
39
+ * A character specifies a subset of Unicode code points. Legal
40
+ * code points are U+0000 to U+10FFFF, inclusive.
41
+ *
42
+ * <p>The UnicodeSet class is not designed to be subclassed.
43
+ *
44
+ * <p><code>UnicodeSet</code> supports two APIs. The first is the
45
+ * <em>operand</em> API that allows the caller to modify the value of
46
+ * a <code>UnicodeSet</code> object. It conforms to Java 2's
47
+ * <code>java.util.Set</code> interface, although
48
+ * <code>UnicodeSet</code> does not actually implement that
49
+ * interface. All methods of <code>Set</code> are supported, with the
50
+ * modification that they take a character range or single character
51
+ * instead of an <code>Object</code>, and they take a
52
+ * <code>UnicodeSet</code> instead of a <code>Collection</code>. The
53
+ * operand API may be thought of in terms of boolean logic: a boolean
54
+ * OR is implemented by <code>add</code>, a boolean AND is implemented
55
+ * by <code>retain</code>, a boolean XOR is implemented by
56
+ * <code>complement</code> taking an argument, and a boolean NOT is
57
+ * implemented by <code>complement</code> with no argument. In terms
58
+ * of traditional set theory function names, <code>add</code> is a
59
+ * union, <code>retain</code> is an intersection, <code>remove</code>
60
+ * is an asymmetric difference, and <code>complement</code> with no
61
+ * argument is a set complement with respect to the superset range
62
+ * <code>MIN_VALUE-MAX_VALUE</code>
63
+ *
64
+ * <p>The second API is the
65
+ * <code>applyPattern()</code>/<code>toPattern()</code> API from the
66
+ * <code>java.text.Format</code>-derived classes. Unlike the
67
+ * methods that add characters, add categories, and control the logic
68
+ * of the set, the method <code>applyPattern()</code> sets all
69
+ * attributes of a <code>UnicodeSet</code> at once, based on a
70
+ * string pattern.
71
+ *
72
+ * <p><b>Pattern syntax</b></p>
73
+ *
74
+ * Patterns are accepted by the constructors and the
75
+ * <code>applyPattern()</code> methods and returned by the
76
+ * <code>toPattern()</code> method. These patterns follow a syntax
77
+ * similar to that employed by version 8 regular expression character
78
+ * classes. Here are some simple examples:
79
+ *
80
+ * \htmlonly<blockquote>\endhtmlonly
81
+ * <table>
82
+ * <tr align="top">
83
+ * <td nowrap valign="top" align="left"><code>[]</code></td>
84
+ * <td valign="top">No characters</td>
85
+ * </tr><tr align="top">
86
+ * <td nowrap valign="top" align="left"><code>[a]</code></td>
87
+ * <td valign="top">The character 'a'</td>
88
+ * </tr><tr align="top">
89
+ * <td nowrap valign="top" align="left"><code>[ae]</code></td>
90
+ * <td valign="top">The characters 'a' and 'e'</td>
91
+ * </tr>
92
+ * <tr>
93
+ * <td nowrap valign="top" align="left"><code>[a-e]</code></td>
94
+ * <td valign="top">The characters 'a' through 'e' inclusive, in Unicode code
95
+ * point order</td>
96
+ * </tr>
97
+ * <tr>
98
+ * <td nowrap valign="top" align="left"><code>[\\u4E01]</code></td>
99
+ * <td valign="top">The character U+4E01</td>
100
+ * </tr>
101
+ * <tr>
102
+ * <td nowrap valign="top" align="left"><code>[a{ab}{ac}]</code></td>
103
+ * <td valign="top">The character 'a' and the multicharacter strings &quot;ab&quot; and
104
+ * &quot;ac&quot;</td>
105
+ * </tr>
106
+ * <tr>
107
+ * <td nowrap valign="top" align="left"><code>[\\p{Lu}]</code></td>
108
+ * <td valign="top">All characters in the general category Uppercase Letter</td>
109
+ * </tr>
110
+ * </table>
111
+ * \htmlonly</blockquote>\endhtmlonly
112
+ *
113
+ * Any character may be preceded by a backslash in order to remove any special
114
+ * meaning. White space characters, as defined by UCharacter.isWhitespace(), are
115
+ * ignored, unless they are escaped.
116
+ *
117
+ * <p>Property patterns specify a set of characters having a certain
118
+ * property as defined by the Unicode standard. Both the POSIX-like
119
+ * "[:Lu:]" and the Perl-like syntax "\\p{Lu}" are recognized. For a
120
+ * complete list of supported property patterns, see the User's Guide
121
+ * for UnicodeSet at
122
+ * <a href="http://icu-project.org/userguide/unicodeSet.html">
123
+ * http://icu-project.org/userguide/unicodeSet.html</a>.
124
+ * Actual determination of property data is defined by the underlying
125
+ * Unicode database as implemented by UCharacter.
126
+ *
127
+ * <p>Patterns specify individual characters, ranges of characters, and
128
+ * Unicode property sets. When elements are concatenated, they
129
+ * specify their union. To complement a set, place a '^' immediately
130
+ * after the opening '['. Property patterns are inverted by modifying
131
+ * their delimiters; "[:^foo]" and "\\P{foo}". In any other location,
132
+ * '^' has no special meaning.
133
+ *
134
+ * <p>Ranges are indicated by placing two a '-' between two
135
+ * characters, as in "a-z". This specifies the range of all
136
+ * characters from the left to the right, in Unicode order. If the
137
+ * left character is greater than or equal to the
138
+ * right character it is a syntax error. If a '-' occurs as the first
139
+ * character after the opening '[' or '[^', or if it occurs as the
140
+ * last character before the closing ']', then it is taken as a
141
+ * literal. Thus "[a\-b]", "[-ab]", and "[ab-]" all indicate the same
142
+ * set of three characters, 'a', 'b', and '-'.
143
+ *
144
+ * <p>Sets may be intersected using the '&' operator or the asymmetric
145
+ * set difference may be taken using the '-' operator, for example,
146
+ * "[[:L:]&[\\u0000-\\u0FFF]]" indicates the set of all Unicode letters
147
+ * with values less than 4096. Operators ('&' and '|') have equal
148
+ * precedence and bind left-to-right. Thus
149
+ * "[[:L:]-[a-z]-[\\u0100-\\u01FF]]" is equivalent to
150
+ * "[[[:L:]-[a-z]]-[\\u0100-\\u01FF]]". This only really matters for
151
+ * difference; intersection is commutative.
152
+ *
153
+ * <table>
154
+ * <tr valign=top><td nowrap><code>[a]</code><td>The set containing 'a'
155
+ * <tr valign=top><td nowrap><code>[a-z]</code><td>The set containing 'a'
156
+ * through 'z' and all letters in between, in Unicode order
157
+ * <tr valign=top><td nowrap><code>[^a-z]</code><td>The set containing
158
+ * all characters but 'a' through 'z',
159
+ * that is, U+0000 through 'a'-1 and 'z'+1 through U+10FFFF
160
+ * <tr valign=top><td nowrap><code>[[<em>pat1</em>][<em>pat2</em>]]</code>
161
+ * <td>The union of sets specified by <em>pat1</em> and <em>pat2</em>
162
+ * <tr valign=top><td nowrap><code>[[<em>pat1</em>]&[<em>pat2</em>]]</code>
163
+ * <td>The intersection of sets specified by <em>pat1</em> and <em>pat2</em>
164
+ * <tr valign=top><td nowrap><code>[[<em>pat1</em>]-[<em>pat2</em>]]</code>
165
+ * <td>The asymmetric difference of sets specified by <em>pat1</em> and
166
+ * <em>pat2</em>
167
+ * <tr valign=top><td nowrap><code>[:Lu:] or \\p{Lu}</code>
168
+ * <td>The set of characters having the specified
169
+ * Unicode property; in
170
+ * this case, Unicode uppercase letters
171
+ * <tr valign=top><td nowrap><code>[:^Lu:] or \\P{Lu}</code>
172
+ * <td>The set of characters <em>not</em> having the given
173
+ * Unicode property
174
+ * </table>
175
+ *
176
+ * <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p>
177
+ *
178
+ * <p><b>Formal syntax</b></p>
179
+ *
180
+ * \htmlonly<blockquote>\endhtmlonly
181
+ * <table>
182
+ * <tr align="top">
183
+ * <td nowrap valign="top" align="right"><code>pattern :=&nbsp; </code></td>
184
+ * <td valign="top"><code>('[' '^'? item* ']') |
185
+ * property</code></td>
186
+ * </tr>
187
+ * <tr align="top">
188
+ * <td nowrap valign="top" align="right"><code>item :=&nbsp; </code></td>
189
+ * <td valign="top"><code>char | (char '-' char) | pattern-expr<br>
190
+ * </code></td>
191
+ * </tr>
192
+ * <tr align="top">
193
+ * <td nowrap valign="top" align="right"><code>pattern-expr :=&nbsp; </code></td>
194
+ * <td valign="top"><code>pattern | pattern-expr pattern |
195
+ * pattern-expr op pattern<br>
196
+ * </code></td>
197
+ * </tr>
198
+ * <tr align="top">
199
+ * <td nowrap valign="top" align="right"><code>op :=&nbsp; </code></td>
200
+ * <td valign="top"><code>'&amp;' | '-'<br>
201
+ * </code></td>
202
+ * </tr>
203
+ * <tr align="top">
204
+ * <td nowrap valign="top" align="right"><code>special :=&nbsp; </code></td>
205
+ * <td valign="top"><code>'[' | ']' | '-'<br>
206
+ * </code></td>
207
+ * </tr>
208
+ * <tr align="top">
209
+ * <td nowrap valign="top" align="right"><code>char :=&nbsp; </code></td>
210
+ * <td valign="top"><em>any character that is not</em><code> special<br>
211
+ * | ('\' </code><em>any character</em><code>)<br>
212
+ * | ('\\u' hex hex hex hex)<br>
213
+ * </code></td>
214
+ * </tr>
215
+ * <tr align="top">
216
+ * <td nowrap valign="top" align="right"><code>hex :=&nbsp; </code></td>
217
+ * <td valign="top"><em>any character for which
218
+ * </em><code>Character.digit(c, 16)</code><em>
219
+ * returns a non-negative result</em></td>
220
+ * </tr>
221
+ * <tr>
222
+ * <td nowrap valign="top" align="right"><code>property :=&nbsp; </code></td>
223
+ * <td valign="top"><em>a Unicode property set pattern</em></td>
224
+ * </tr>
225
+ * </table>
226
+ * <br>
227
+ * <table border="1">
228
+ * <tr>
229
+ * <td>Legend: <table>
230
+ * <tr>
231
+ * <td nowrap valign="top"><code>a := b</code></td>
232
+ * <td width="20" valign="top">&nbsp; </td>
233
+ * <td valign="top"><code>a</code> may be replaced by <code>b</code> </td>
234
+ * </tr>
235
+ * <tr>
236
+ * <td nowrap valign="top"><code>a?</code></td>
237
+ * <td valign="top"></td>
238
+ * <td valign="top">zero or one instance of <code>a</code><br>
239
+ * </td>
240
+ * </tr>
241
+ * <tr>
242
+ * <td nowrap valign="top"><code>a*</code></td>
243
+ * <td valign="top"></td>
244
+ * <td valign="top">one or more instances of <code>a</code><br>
245
+ * </td>
246
+ * </tr>
247
+ * <tr>
248
+ * <td nowrap valign="top"><code>a | b</code></td>
249
+ * <td valign="top"></td>
250
+ * <td valign="top">either <code>a</code> or <code>b</code><br>
251
+ * </td>
252
+ * </tr>
253
+ * <tr>
254
+ * <td nowrap valign="top"><code>'a'</code></td>
255
+ * <td valign="top"></td>
256
+ * <td valign="top">the literal string between the quotes </td>
257
+ * </tr>
258
+ * </table>
259
+ * </td>
260
+ * </tr>
261
+ * </table>
262
+ * \htmlonly</blockquote>\endhtmlonly
263
+ *
264
+ * <p>Note:
265
+ * - Most UnicodeSet methods do not take a UErrorCode parameter because
266
+ * there are usually very few opportunities for failure other than a shortage
267
+ * of memory, error codes in low-level C++ string methods would be inconvenient,
268
+ * and the error code as the last parameter (ICU convention) would prevent
269
+ * the use of default parameter values.
270
+ * Instead, such methods set the UnicodeSet into a "bogus" state
271
+ * (see isBogus()) if an error occurs.
272
+ *
273
+ * @author Alan Liu
274
+ * @stable ICU 2.0
275
+ */
276
+ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter {
277
+
278
+ int32_t len; // length of list used; 0 <= len <= capacity
279
+ int32_t capacity; // capacity of list
280
+ UChar32* list; // MUST be terminated with HIGH
281
+ BMPSet *bmpSet; // The set is frozen iff either bmpSet or stringSpan is not NULL.
282
+ UChar32* buffer; // internal buffer, may be NULL
283
+ int32_t bufferCapacity; // capacity of buffer
284
+ int32_t patLen;
285
+
286
+ /**
287
+ * The pattern representation of this set. This may not be the
288
+ * most economical pattern. It is the pattern supplied to
289
+ * applyPattern(), with variables substituted and whitespace
290
+ * removed. For sets constructed without applyPattern(), or
291
+ * modified using the non-pattern API, this string will be empty,
292
+ * indicating that toPattern() must generate a pattern
293
+ * representation from the inversion list.
294
+ */
295
+ UChar *pat;
296
+ UVector* strings; // maintained in sorted order
297
+ UnicodeSetStringSpan *stringSpan;
298
+
299
+ private:
300
+ enum { // constants
301
+ kIsBogus = 1 // This set is bogus (i.e. not valid)
302
+ };
303
+ uint8_t fFlags; // Bit flag (see constants above)
304
+ public:
305
+ /**
306
+ * Determine if this object contains a valid set.
307
+ * A bogus set has no value. It is different from an empty set.
308
+ * It can be used to indicate that no set value is available.
309
+ *
310
+ * @return TRUE if the set is valid, FALSE otherwise
311
+ * @see setToBogus()
312
+ * @stable ICU 4.0
313
+ */
314
+ inline UBool isBogus(void) const;
315
+
316
+ /**
317
+ * Make this UnicodeSet object invalid.
318
+ * The string will test TRUE with isBogus().
319
+ *
320
+ * A bogus set has no value. It is different from an empty set.
321
+ * It can be used to indicate that no set value is available.
322
+ *
323
+ * This utility function is used throughout the UnicodeSet
324
+ * implementation to indicate that a UnicodeSet operation failed,
325
+ * and may be used in other functions,
326
+ * especially but not exclusively when such functions do not
327
+ * take a UErrorCode for simplicity.
328
+ *
329
+ * @see isBogus()
330
+ * @stable ICU 4.0
331
+ */
332
+ void setToBogus();
333
+
334
+ public:
335
+
336
+ enum {
337
+ /**
338
+ * Minimum value that can be stored in a UnicodeSet.
339
+ * @stable ICU 2.4
340
+ */
341
+ MIN_VALUE = 0,
342
+
343
+ /**
344
+ * Maximum value that can be stored in a UnicodeSet.
345
+ * @stable ICU 2.4
346
+ */
347
+ MAX_VALUE = 0x10ffff
348
+ };
349
+
350
+ //----------------------------------------------------------------
351
+ // Constructors &c
352
+ //----------------------------------------------------------------
353
+
354
+ public:
355
+
356
+ /**
357
+ * Constructs an empty set.
358
+ * @stable ICU 2.0
359
+ */
360
+ UnicodeSet();
361
+
362
+ /**
363
+ * Constructs a set containing the given range. If <code>end >
364
+ * start</code> then an empty set is created.
365
+ *
366
+ * @param start first character, inclusive, of range
367
+ * @param end last character, inclusive, of range
368
+ * @stable ICU 2.4
369
+ */
370
+ UnicodeSet(UChar32 start, UChar32 end);
371
+
372
+ /**
373
+ * Constructs a set from the given pattern. See the class
374
+ * description for the syntax of the pattern language.
375
+ * @param pattern a string specifying what characters are in the set
376
+ * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
377
+ * contains a syntax error.
378
+ * @stable ICU 2.0
379
+ */
380
+ UnicodeSet(const UnicodeString& pattern,
381
+ UErrorCode& status);
382
+
383
+ #ifndef U_HIDE_INTERNAL_API
384
+ /**
385
+ * Constructs a set from the given pattern. See the class
386
+ * description for the syntax of the pattern language.
387
+ * @param pattern a string specifying what characters are in the set
388
+ * @param options bitmask for options to apply to the pattern.
389
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
390
+ * @param symbols a symbol table mapping variable names to values
391
+ * and stand-in characters to UnicodeSets; may be NULL
392
+ * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
393
+ * contains a syntax error.
394
+ * @internal
395
+ */
396
+ UnicodeSet(const UnicodeString& pattern,
397
+ uint32_t options,
398
+ const SymbolTable* symbols,
399
+ UErrorCode& status);
400
+ #endif /* U_HIDE_INTERNAL_API */
401
+
402
+ /**
403
+ * Constructs a set from the given pattern. See the class description
404
+ * for the syntax of the pattern language.
405
+ * @param pattern a string specifying what characters are in the set
406
+ * @param pos on input, the position in pattern at which to start parsing.
407
+ * On output, the position after the last character parsed.
408
+ * @param options bitmask for options to apply to the pattern.
409
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
410
+ * @param symbols a symbol table mapping variable names to values
411
+ * and stand-in characters to UnicodeSets; may be NULL
412
+ * @param status input-output error code
413
+ * @stable ICU 2.8
414
+ */
415
+ UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
416
+ uint32_t options,
417
+ const SymbolTable* symbols,
418
+ UErrorCode& status);
419
+
420
+ /**
421
+ * Constructs a set that is identical to the given UnicodeSet.
422
+ * @stable ICU 2.0
423
+ */
424
+ UnicodeSet(const UnicodeSet& o);
425
+
426
+ /**
427
+ * Destructs the set.
428
+ * @stable ICU 2.0
429
+ */
430
+ virtual ~UnicodeSet();
431
+
432
+ /**
433
+ * Assigns this object to be a copy of another.
434
+ * A frozen set will not be modified.
435
+ * @stable ICU 2.0
436
+ */
437
+ UnicodeSet& operator=(const UnicodeSet& o);
438
+
439
+ /**
440
+ * Compares the specified object with this set for equality. Returns
441
+ * <tt>true</tt> if the two sets
442
+ * have the same size, and every member of the specified set is
443
+ * contained in this set (or equivalently, every member of this set is
444
+ * contained in the specified set).
445
+ *
446
+ * @param o set to be compared for equality with this set.
447
+ * @return <tt>true</tt> if the specified set is equal to this set.
448
+ * @stable ICU 2.0
449
+ */
450
+ virtual UBool operator==(const UnicodeSet& o) const;
451
+
452
+ /**
453
+ * Compares the specified object with this set for equality. Returns
454
+ * <tt>true</tt> if the specified set is not equal to this set.
455
+ * @stable ICU 2.0
456
+ */
457
+ UBool operator!=(const UnicodeSet& o) const;
458
+
459
+ /**
460
+ * Returns a copy of this object. All UnicodeFunctor objects have
461
+ * to support cloning in order to allow classes using
462
+ * UnicodeFunctors, such as Transliterator, to implement cloning.
463
+ * If this set is frozen, then the clone will be frozen as well.
464
+ * Use cloneAsThawed() for a mutable clone of a frozen set.
465
+ * @see cloneAsThawed
466
+ * @stable ICU 2.0
467
+ */
468
+ virtual UnicodeFunctor* clone() const;
469
+
470
+ /**
471
+ * Returns the hash code value for this set.
472
+ *
473
+ * @return the hash code value for this set.
474
+ * @see Object#hashCode()
475
+ * @stable ICU 2.0
476
+ */
477
+ virtual int32_t hashCode(void) const;
478
+
479
+ /**
480
+ * Get a UnicodeSet pointer from a USet
481
+ *
482
+ * @param uset a USet (the ICU plain C type for UnicodeSet)
483
+ * @return the corresponding UnicodeSet pointer.
484
+ *
485
+ * @stable ICU 4.2
486
+ */
487
+ inline static UnicodeSet *fromUSet(USet *uset);
488
+
489
+ /**
490
+ * Get a UnicodeSet pointer from a const USet
491
+ *
492
+ * @param uset a const USet (the ICU plain C type for UnicodeSet)
493
+ * @return the corresponding UnicodeSet pointer.
494
+ *
495
+ * @stable ICU 4.2
496
+ */
497
+ inline static const UnicodeSet *fromUSet(const USet *uset);
498
+
499
+ /**
500
+ * Produce a USet * pointer for this UnicodeSet.
501
+ * USet is the plain C type for UnicodeSet
502
+ *
503
+ * @return a USet pointer for this UnicodeSet
504
+ * @stable ICU 4.2
505
+ */
506
+ inline USet *toUSet();
507
+
508
+
509
+ /**
510
+ * Produce a const USet * pointer for this UnicodeSet.
511
+ * USet is the plain C type for UnicodeSet
512
+ *
513
+ * @return a const USet pointer for this UnicodeSet
514
+ * @stable ICU 4.2
515
+ */
516
+ inline const USet * toUSet() const;
517
+
518
+
519
+ //----------------------------------------------------------------
520
+ // Freezable API
521
+ //----------------------------------------------------------------
522
+
523
+ /**
524
+ * Determines whether the set has been frozen (made immutable) or not.
525
+ * See the ICU4J Freezable interface for details.
526
+ * @return TRUE/FALSE for whether the set has been frozen
527
+ * @see freeze
528
+ * @see cloneAsThawed
529
+ * @stable ICU 3.8
530
+ */
531
+ inline UBool isFrozen() const;
532
+
533
+ /**
534
+ * Freeze the set (make it immutable).
535
+ * Once frozen, it cannot be unfrozen and is therefore thread-safe
536
+ * until it is deleted.
537
+ * See the ICU4J Freezable interface for details.
538
+ * Freezing the set may also make some operations faster, for example
539
+ * contains() and span().
540
+ * A frozen set will not be modified. (It remains frozen.)
541
+ * @return this set.
542
+ * @see isFrozen
543
+ * @see cloneAsThawed
544
+ * @stable ICU 3.8
545
+ */
546
+ UnicodeFunctor *freeze();
547
+
548
+ /**
549
+ * Clone the set and make the clone mutable.
550
+ * See the ICU4J Freezable interface for details.
551
+ * @return the mutable clone
552
+ * @see freeze
553
+ * @see isFrozen
554
+ * @stable ICU 3.8
555
+ */
556
+ UnicodeFunctor *cloneAsThawed() const;
557
+
558
+ //----------------------------------------------------------------
559
+ // Public API
560
+ //----------------------------------------------------------------
561
+
562
+ /**
563
+ * Make this object represent the range <code>start - end</code>.
564
+ * If <code>end > start</code> then this object is set to an
565
+ * an empty range.
566
+ * A frozen set will not be modified.
567
+ *
568
+ * @param start first character in the set, inclusive
569
+ * @param end last character in the set, inclusive
570
+ * @stable ICU 2.4
571
+ */
572
+ UnicodeSet& set(UChar32 start, UChar32 end);
573
+
574
+ /**
575
+ * Return true if the given position, in the given pattern, appears
576
+ * to be the start of a UnicodeSet pattern.
577
+ * @stable ICU 2.4
578
+ */
579
+ static UBool resemblesPattern(const UnicodeString& pattern,
580
+ int32_t pos);
581
+
582
+ /**
583
+ * Modifies this set to represent the set specified by the given
584
+ * pattern, ignoring Unicode Pattern_White_Space characters.
585
+ * See the class description for the syntax of the pattern language.
586
+ * A frozen set will not be modified.
587
+ * @param pattern a string specifying what characters are in the set
588
+ * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
589
+ * contains a syntax error.
590
+ * <em> Empties the set passed before applying the pattern.</em>
591
+ * @return a reference to this
592
+ * @stable ICU 2.0
593
+ */
594
+ UnicodeSet& applyPattern(const UnicodeString& pattern,
595
+ UErrorCode& status);
596
+
597
+ #ifndef U_HIDE_INTERNAL_API
598
+ /**
599
+ * Modifies this set to represent the set specified by the given
600
+ * pattern, optionally ignoring Unicode Pattern_White_Space characters.
601
+ * See the class description for the syntax of the pattern language.
602
+ * A frozen set will not be modified.
603
+ * @param pattern a string specifying what characters are in the set
604
+ * @param options bitmask for options to apply to the pattern.
605
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
606
+ * @param symbols a symbol table mapping variable names to
607
+ * values and stand-ins to UnicodeSets; may be NULL
608
+ * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
609
+ * contains a syntax error.
610
+ *<em> Empties the set passed before applying the pattern.</em>
611
+ * @return a reference to this
612
+ * @internal
613
+ */
614
+ UnicodeSet& applyPattern(const UnicodeString& pattern,
615
+ uint32_t options,
616
+ const SymbolTable* symbols,
617
+ UErrorCode& status);
618
+ #endif /* U_HIDE_INTERNAL_API */
619
+
620
+ /**
621
+ * Parses the given pattern, starting at the given position. The
622
+ * character at pattern.charAt(pos.getIndex()) must be '[', or the
623
+ * parse fails. Parsing continues until the corresponding closing
624
+ * ']'. If a syntax error is encountered between the opening and
625
+ * closing brace, the parse fails. Upon return from a successful
626
+ * parse, the ParsePosition is updated to point to the character
627
+ * following the closing ']', and a StringBuffer containing a
628
+ * pairs list for the parsed pattern is returned. This method calls
629
+ * itself recursively to parse embedded subpatterns.
630
+ *<em> Empties the set passed before applying the pattern.</em>
631
+ * A frozen set will not be modified.
632
+ *
633
+ * @param pattern the string containing the pattern to be parsed.
634
+ * The portion of the string from pos.getIndex(), which must be a
635
+ * '[', to the corresponding closing ']', is parsed.
636
+ * @param pos upon entry, the position at which to being parsing.
637
+ * The character at pattern.charAt(pos.getIndex()) must be a '['.
638
+ * Upon return from a successful parse, pos.getIndex() is either
639
+ * the character after the closing ']' of the parsed pattern, or
640
+ * pattern.length() if the closing ']' is the last character of
641
+ * the pattern string.
642
+ * @param options bitmask for options to apply to the pattern.
643
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
644
+ * @param symbols a symbol table mapping variable names to
645
+ * values and stand-ins to UnicodeSets; may be NULL
646
+ * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
647
+ * contains a syntax error.
648
+ * @return a reference to this
649
+ * @stable ICU 2.8
650
+ */
651
+ UnicodeSet& applyPattern(const UnicodeString& pattern,
652
+ ParsePosition& pos,
653
+ uint32_t options,
654
+ const SymbolTable* symbols,
655
+ UErrorCode& status);
656
+
657
+ /**
658
+ * Returns a string representation of this set. If the result of
659
+ * calling this function is passed to a UnicodeSet constructor, it
660
+ * will produce another set that is equal to this one.
661
+ * A frozen set will not be modified.
662
+ * @param result the string to receive the rules. Previous
663
+ * contents will be deleted.
664
+ * @param escapeUnprintable if TRUE then convert unprintable
665
+ * character to their hex escape representations, \\uxxxx or
666
+ * \\Uxxxxxxxx. Unprintable characters are those other than
667
+ * U+000A, U+0020..U+007E.
668
+ * @stable ICU 2.0
669
+ */
670
+ virtual UnicodeString& toPattern(UnicodeString& result,
671
+ UBool escapeUnprintable = FALSE) const;
672
+
673
+ /**
674
+ * Modifies this set to contain those code points which have the given value
675
+ * for the given binary or enumerated property, as returned by
676
+ * u_getIntPropertyValue. Prior contents of this set are lost.
677
+ * A frozen set will not be modified.
678
+ *
679
+ * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
680
+ * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
681
+ * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
682
+ *
683
+ * @param value a value in the range u_getIntPropertyMinValue(prop)..
684
+ * u_getIntPropertyMaxValue(prop), with one exception. If prop is
685
+ * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
686
+ * rather a mask value produced by U_GET_GC_MASK(). This allows grouped
687
+ * categories such as [:L:] to be represented.
688
+ *
689
+ * @param ec error code input/output parameter
690
+ *
691
+ * @return a reference to this set
692
+ *
693
+ * @stable ICU 2.4
694
+ */
695
+ UnicodeSet& applyIntPropertyValue(UProperty prop,
696
+ int32_t value,
697
+ UErrorCode& ec);
698
+
699
+ /**
700
+ * Modifies this set to contain those code points which have the
701
+ * given value for the given property. Prior contents of this
702
+ * set are lost.
703
+ * A frozen set will not be modified.
704
+ *
705
+ * @param prop a property alias, either short or long. The name is matched
706
+ * loosely. See PropertyAliases.txt for names and a description of loose
707
+ * matching. If the value string is empty, then this string is interpreted
708
+ * as either a General_Category value alias, a Script value alias, a binary
709
+ * property alias, or a special ID. Special IDs are matched loosely and
710
+ * correspond to the following sets:
711
+ *
712
+ * "ANY" = [\\u0000-\\U0010FFFF],
713
+ * "ASCII" = [\\u0000-\\u007F],
714
+ * "Assigned" = [:^Cn:].
715
+ *
716
+ * @param value a value alias, either short or long. The name is matched
717
+ * loosely. See PropertyValueAliases.txt for names and a description of
718
+ * loose matching. In addition to aliases listed, numeric values and
719
+ * canonical combining classes may be expressed numerically, e.g., ("nv",
720
+ * "0.5") or ("ccc", "220"). The value string may also be empty.
721
+ *
722
+ * @param ec error code input/output parameter
723
+ *
724
+ * @return a reference to this set
725
+ *
726
+ * @stable ICU 2.4
727
+ */
728
+ UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
729
+ const UnicodeString& value,
730
+ UErrorCode& ec);
731
+
732
+ /**
733
+ * Returns the number of elements in this set (its cardinality).
734
+ * Note than the elements of a set may include both individual
735
+ * codepoints and strings.
736
+ *
737
+ * @return the number of elements in this set (its cardinality).
738
+ * @stable ICU 2.0
739
+ */
740
+ virtual int32_t size(void) const;
741
+
742
+ /**
743
+ * Returns <tt>true</tt> if this set contains no elements.
744
+ *
745
+ * @return <tt>true</tt> if this set contains no elements.
746
+ * @stable ICU 2.0
747
+ */
748
+ virtual UBool isEmpty(void) const;
749
+
750
+ /**
751
+ * Returns true if this set contains the given character.
752
+ * This function works faster with a frozen set.
753
+ * @param c character to be checked for containment
754
+ * @return true if the test condition is met
755
+ * @stable ICU 2.0
756
+ */
757
+ virtual UBool contains(UChar32 c) const;
758
+
759
+ /**
760
+ * Returns true if this set contains every character
761
+ * of the given range.
762
+ * @param start first character, inclusive, of the range
763
+ * @param end last character, inclusive, of the range
764
+ * @return true if the test condition is met
765
+ * @stable ICU 2.0
766
+ */
767
+ virtual UBool contains(UChar32 start, UChar32 end) const;
768
+
769
+ /**
770
+ * Returns <tt>true</tt> if this set contains the given
771
+ * multicharacter string.
772
+ * @param s string to be checked for containment
773
+ * @return <tt>true</tt> if this set contains the specified string
774
+ * @stable ICU 2.4
775
+ */
776
+ UBool contains(const UnicodeString& s) const;
777
+
778
+ /**
779
+ * Returns true if this set contains all the characters and strings
780
+ * of the given set.
781
+ * @param c set to be checked for containment
782
+ * @return true if the test condition is met
783
+ * @stable ICU 2.4
784
+ */
785
+ virtual UBool containsAll(const UnicodeSet& c) const;
786
+
787
+ /**
788
+ * Returns true if this set contains all the characters
789
+ * of the given string.
790
+ * @param s string containing characters to be checked for containment
791
+ * @return true if the test condition is met
792
+ * @stable ICU 2.4
793
+ */
794
+ UBool containsAll(const UnicodeString& s) const;
795
+
796
+ /**
797
+ * Returns true if this set contains none of the characters
798
+ * of the given range.
799
+ * @param start first character, inclusive, of the range
800
+ * @param end last character, inclusive, of the range
801
+ * @return true if the test condition is met
802
+ * @stable ICU 2.4
803
+ */
804
+ UBool containsNone(UChar32 start, UChar32 end) const;
805
+
806
+ /**
807
+ * Returns true if this set contains none of the characters and strings
808
+ * of the given set.
809
+ * @param c set to be checked for containment
810
+ * @return true if the test condition is met
811
+ * @stable ICU 2.4
812
+ */
813
+ UBool containsNone(const UnicodeSet& c) const;
814
+
815
+ /**
816
+ * Returns true if this set contains none of the characters
817
+ * of the given string.
818
+ * @param s string containing characters to be checked for containment
819
+ * @return true if the test condition is met
820
+ * @stable ICU 2.4
821
+ */
822
+ UBool containsNone(const UnicodeString& s) const;
823
+
824
+ /**
825
+ * Returns true if this set contains one or more of the characters
826
+ * in the given range.
827
+ * @param start first character, inclusive, of the range
828
+ * @param end last character, inclusive, of the range
829
+ * @return true if the condition is met
830
+ * @stable ICU 2.4
831
+ */
832
+ inline UBool containsSome(UChar32 start, UChar32 end) const;
833
+
834
+ /**
835
+ * Returns true if this set contains one or more of the characters
836
+ * and strings of the given set.
837
+ * @param s The set to be checked for containment
838
+ * @return true if the condition is met
839
+ * @stable ICU 2.4
840
+ */
841
+ inline UBool containsSome(const UnicodeSet& s) const;
842
+
843
+ /**
844
+ * Returns true if this set contains one or more of the characters
845
+ * of the given string.
846
+ * @param s string containing characters to be checked for containment
847
+ * @return true if the condition is met
848
+ * @stable ICU 2.4
849
+ */
850
+ inline UBool containsSome(const UnicodeString& s) const;
851
+
852
+ /**
853
+ * Returns the length of the initial substring of the input string which
854
+ * consists only of characters and strings that are contained in this set
855
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
856
+ * or only of characters and strings that are not contained
857
+ * in this set (USET_SPAN_NOT_CONTAINED).
858
+ * See USetSpanCondition for details.
859
+ * Similar to the strspn() C library function.
860
+ * Unpaired surrogates are treated according to contains() of their surrogate code points.
861
+ * This function works faster with a frozen set and with a non-negative string length argument.
862
+ * @param s start of the string
863
+ * @param length of the string; can be -1 for NUL-terminated
864
+ * @param spanCondition specifies the containment condition
865
+ * @return the length of the initial substring according to the spanCondition;
866
+ * 0 if the start of the string does not fit the spanCondition
867
+ * @stable ICU 3.8
868
+ * @see USetSpanCondition
869
+ */
870
+ int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
871
+
872
+ /**
873
+ * Returns the end of the substring of the input string according to the USetSpanCondition.
874
+ * Same as <code>start+span(s.getBuffer()+start, s.length()-start, spanCondition)</code>
875
+ * after pinning start to 0<=start<=s.length().
876
+ * @param s the string
877
+ * @param start the start index in the string for the span operation
878
+ * @param spanCondition specifies the containment condition
879
+ * @return the exclusive end of the substring according to the spanCondition;
880
+ * the substring s.tempSubStringBetween(start, end) fulfills the spanCondition
881
+ * @stable ICU 4.4
882
+ * @see USetSpanCondition
883
+ */
884
+ inline int32_t span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const;
885
+
886
+ /**
887
+ * Returns the start of the trailing substring of the input string which
888
+ * consists only of characters and strings that are contained in this set
889
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
890
+ * or only of characters and strings that are not contained
891
+ * in this set (USET_SPAN_NOT_CONTAINED).
892
+ * See USetSpanCondition for details.
893
+ * Unpaired surrogates are treated according to contains() of their surrogate code points.
894
+ * This function works faster with a frozen set and with a non-negative string length argument.
895
+ * @param s start of the string
896
+ * @param length of the string; can be -1 for NUL-terminated
897
+ * @param spanCondition specifies the containment condition
898
+ * @return the start of the trailing substring according to the spanCondition;
899
+ * the string length if the end of the string does not fit the spanCondition
900
+ * @stable ICU 3.8
901
+ * @see USetSpanCondition
902
+ */
903
+ int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
904
+
905
+ /**
906
+ * Returns the start of the substring of the input string according to the USetSpanCondition.
907
+ * Same as <code>spanBack(s.getBuffer(), limit, spanCondition)</code>
908
+ * after pinning limit to 0<=end<=s.length().
909
+ * @param s the string
910
+ * @param limit the exclusive-end index in the string for the span operation
911
+ * (use s.length() or INT32_MAX for spanning back from the end of the string)
912
+ * @param spanCondition specifies the containment condition
913
+ * @return the start of the substring according to the spanCondition;
914
+ * the substring s.tempSubStringBetween(start, limit) fulfills the spanCondition
915
+ * @stable ICU 4.4
916
+ * @see USetSpanCondition
917
+ */
918
+ inline int32_t spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const;
919
+
920
+ /**
921
+ * Returns the length of the initial substring of the input string which
922
+ * consists only of characters and strings that are contained in this set
923
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
924
+ * or only of characters and strings that are not contained
925
+ * in this set (USET_SPAN_NOT_CONTAINED).
926
+ * See USetSpanCondition for details.
927
+ * Similar to the strspn() C library function.
928
+ * Malformed byte sequences are treated according to contains(0xfffd).
929
+ * This function works faster with a frozen set and with a non-negative string length argument.
930
+ * @param s start of the string (UTF-8)
931
+ * @param length of the string; can be -1 for NUL-terminated
932
+ * @param spanCondition specifies the containment condition
933
+ * @return the length of the initial substring according to the spanCondition;
934
+ * 0 if the start of the string does not fit the spanCondition
935
+ * @stable ICU 3.8
936
+ * @see USetSpanCondition
937
+ */
938
+ int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
939
+
940
+ /**
941
+ * Returns the start of the trailing substring of the input string which
942
+ * consists only of characters and strings that are contained in this set
943
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
944
+ * or only of characters and strings that are not contained
945
+ * in this set (USET_SPAN_NOT_CONTAINED).
946
+ * See USetSpanCondition for details.
947
+ * Malformed byte sequences are treated according to contains(0xfffd).
948
+ * This function works faster with a frozen set and with a non-negative string length argument.
949
+ * @param s start of the string (UTF-8)
950
+ * @param length of the string; can be -1 for NUL-terminated
951
+ * @param spanCondition specifies the containment condition
952
+ * @return the start of the trailing substring according to the spanCondition;
953
+ * the string length if the end of the string does not fit the spanCondition
954
+ * @stable ICU 3.8
955
+ * @see USetSpanCondition
956
+ */
957
+ int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
958
+
959
+ /**
960
+ * Implement UnicodeMatcher::matches()
961
+ * @stable ICU 2.4
962
+ */
963
+ virtual UMatchDegree matches(const Replaceable& text,
964
+ int32_t& offset,
965
+ int32_t limit,
966
+ UBool incremental);
967
+
968
+ private:
969
+ /**
970
+ * Returns the longest match for s in text at the given position.
971
+ * If limit > start then match forward from start+1 to limit
972
+ * matching all characters except s.charAt(0). If limit < start,
973
+ * go backward starting from start-1 matching all characters
974
+ * except s.charAt(s.length()-1). This method assumes that the
975
+ * first character, text.charAt(start), matches s, so it does not
976
+ * check it.
977
+ * @param text the text to match
978
+ * @param start the first character to match. In the forward
979
+ * direction, text.charAt(start) is matched against s.charAt(0).
980
+ * In the reverse direction, it is matched against
981
+ * s.charAt(s.length()-1).
982
+ * @param limit the limit offset for matching, either last+1 in
983
+ * the forward direction, or last-1 in the reverse direction,
984
+ * where last is the index of the last character to match.
985
+ * @param s
986
+ * @return If part of s matches up to the limit, return |limit -
987
+ * start|. If all of s matches before reaching the limit, return
988
+ * s.length(). If there is a mismatch between s and text, return
989
+ * 0
990
+ */
991
+ static int32_t matchRest(const Replaceable& text,
992
+ int32_t start, int32_t limit,
993
+ const UnicodeString& s);
994
+
995
+ /**
996
+ * Returns the smallest value i such that c < list[i]. Caller
997
+ * must ensure that c is a legal value or this method will enter
998
+ * an infinite loop. This method performs a binary search.
999
+ * @param c a character in the range MIN_VALUE..MAX_VALUE
1000
+ * inclusive
1001
+ * @return the smallest integer i in the range 0..len-1,
1002
+ * inclusive, such that c < list[i]
1003
+ */
1004
+ int32_t findCodePoint(UChar32 c) const;
1005
+
1006
+ public:
1007
+
1008
+ /**
1009
+ * Implementation of UnicodeMatcher API. Union the set of all
1010
+ * characters that may be matched by this object into the given
1011
+ * set.
1012
+ * @param toUnionTo the set into which to union the source characters
1013
+ * @stable ICU 2.4
1014
+ */
1015
+ virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
1016
+
1017
+ /**
1018
+ * Returns the index of the given character within this set, where
1019
+ * the set is ordered by ascending code point. If the character
1020
+ * is not in this set, return -1. The inverse of this method is
1021
+ * <code>charAt()</code>.
1022
+ * @return an index from 0..size()-1, or -1
1023
+ * @stable ICU 2.4
1024
+ */
1025
+ int32_t indexOf(UChar32 c) const;
1026
+
1027
+ /**
1028
+ * Returns the character at the given index within this set, where
1029
+ * the set is ordered by ascending code point. If the index is
1030
+ * out of range, return (UChar32)-1. The inverse of this method is
1031
+ * <code>indexOf()</code>.
1032
+ * @param index an index from 0..size()-1
1033
+ * @return the character at the given index, or (UChar32)-1.
1034
+ * @stable ICU 2.4
1035
+ */
1036
+ UChar32 charAt(int32_t index) const;
1037
+
1038
+ /**
1039
+ * Adds the specified range to this set if it is not already
1040
+ * present. If this set already contains the specified range,
1041
+ * the call leaves this set unchanged. If <code>end > start</code>
1042
+ * then an empty range is added, leaving the set unchanged.
1043
+ * This is equivalent to a boolean logic OR, or a set UNION.
1044
+ * A frozen set will not be modified.
1045
+ *
1046
+ * @param start first character, inclusive, of range to be added
1047
+ * to this set.
1048
+ * @param end last character, inclusive, of range to be added
1049
+ * to this set.
1050
+ * @stable ICU 2.0
1051
+ */
1052
+ virtual UnicodeSet& add(UChar32 start, UChar32 end);
1053
+
1054
+ /**
1055
+ * Adds the specified character to this set if it is not already
1056
+ * present. If this set already contains the specified character,
1057
+ * the call leaves this set unchanged.
1058
+ * A frozen set will not be modified.
1059
+ * @stable ICU 2.0
1060
+ */
1061
+ UnicodeSet& add(UChar32 c);
1062
+
1063
+ /**
1064
+ * Adds the specified multicharacter to this set if it is not already
1065
+ * present. If this set already contains the multicharacter,
1066
+ * the call leaves this set unchanged.
1067
+ * Thus "ch" => {"ch"}
1068
+ * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
1069
+ * A frozen set will not be modified.
1070
+ * @param s the source string
1071
+ * @return this object, for chaining
1072
+ * @stable ICU 2.4
1073
+ */
1074
+ UnicodeSet& add(const UnicodeString& s);
1075
+
1076
+ private:
1077
+ /**
1078
+ * @return a code point IF the string consists of a single one.
1079
+ * otherwise returns -1.
1080
+ * @param s string to test
1081
+ */
1082
+ static int32_t getSingleCP(const UnicodeString& s);
1083
+
1084
+ void _add(const UnicodeString& s);
1085
+
1086
+ public:
1087
+ /**
1088
+ * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
1089
+ * If this set already any particular character, it has no effect on that character.
1090
+ * A frozen set will not be modified.
1091
+ * @param s the source string
1092
+ * @return this object, for chaining
1093
+ * @stable ICU 2.4
1094
+ */
1095
+ UnicodeSet& addAll(const UnicodeString& s);
1096
+
1097
+ /**
1098
+ * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
1099
+ * If this set already any particular character, it has no effect on that character.
1100
+ * A frozen set will not be modified.
1101
+ * @param s the source string
1102
+ * @return this object, for chaining
1103
+ * @stable ICU 2.4
1104
+ */
1105
+ UnicodeSet& retainAll(const UnicodeString& s);
1106
+
1107
+ /**
1108
+ * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
1109
+ * If this set already any particular character, it has no effect on that character.
1110
+ * A frozen set will not be modified.
1111
+ * @param s the source string
1112
+ * @return this object, for chaining
1113
+ * @stable ICU 2.4
1114
+ */
1115
+ UnicodeSet& complementAll(const UnicodeString& s);
1116
+
1117
+ /**
1118
+ * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
1119
+ * If this set already any particular character, it has no effect on that character.
1120
+ * A frozen set will not be modified.
1121
+ * @param s the source string
1122
+ * @return this object, for chaining
1123
+ * @stable ICU 2.4
1124
+ */
1125
+ UnicodeSet& removeAll(const UnicodeString& s);
1126
+
1127
+ /**
1128
+ * Makes a set from a multicharacter string. Thus "ch" => {"ch"}
1129
+ * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
1130
+ * @param s the source string
1131
+ * @return a newly created set containing the given string.
1132
+ * The caller owns the return object and is responsible for deleting it.
1133
+ * @stable ICU 2.4
1134
+ */
1135
+ static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
1136
+
1137
+
1138
+ /**
1139
+ * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"}
1140
+ * @param s the source string
1141
+ * @return a newly created set containing the given characters
1142
+ * The caller owns the return object and is responsible for deleting it.
1143
+ * @stable ICU 2.4
1144
+ */
1145
+ static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
1146
+
1147
+ /**
1148
+ * Retain only the elements in this set that are contained in the
1149
+ * specified range. If <code>end > start</code> then an empty range is
1150
+ * retained, leaving the set empty. This is equivalent to
1151
+ * a boolean logic AND, or a set INTERSECTION.
1152
+ * A frozen set will not be modified.
1153
+ *
1154
+ * @param start first character, inclusive, of range to be retained
1155
+ * to this set.
1156
+ * @param end last character, inclusive, of range to be retained
1157
+ * to this set.
1158
+ * @stable ICU 2.0
1159
+ */
1160
+ virtual UnicodeSet& retain(UChar32 start, UChar32 end);
1161
+
1162
+
1163
+ /**
1164
+ * Retain the specified character from this set if it is present.
1165
+ * A frozen set will not be modified.
1166
+ * @stable ICU 2.0
1167
+ */
1168
+ UnicodeSet& retain(UChar32 c);
1169
+
1170
+ /**
1171
+ * Removes the specified range from this set if it is present.
1172
+ * The set will not contain the specified range once the call
1173
+ * returns. If <code>end > start</code> then an empty range is
1174
+ * removed, leaving the set unchanged.
1175
+ * A frozen set will not be modified.
1176
+ *
1177
+ * @param start first character, inclusive, of range to be removed
1178
+ * from this set.
1179
+ * @param end last character, inclusive, of range to be removed
1180
+ * from this set.
1181
+ * @stable ICU 2.0
1182
+ */
1183
+ virtual UnicodeSet& remove(UChar32 start, UChar32 end);
1184
+
1185
+ /**
1186
+ * Removes the specified character from this set if it is present.
1187
+ * The set will not contain the specified range once the call
1188
+ * returns.
1189
+ * A frozen set will not be modified.
1190
+ * @stable ICU 2.0
1191
+ */
1192
+ UnicodeSet& remove(UChar32 c);
1193
+
1194
+ /**
1195
+ * Removes the specified string from this set if it is present.
1196
+ * The set will not contain the specified character once the call
1197
+ * returns.
1198
+ * A frozen set will not be modified.
1199
+ * @param s the source string
1200
+ * @return this object, for chaining
1201
+ * @stable ICU 2.4
1202
+ */
1203
+ UnicodeSet& remove(const UnicodeString& s);
1204
+
1205
+ /**
1206
+ * Inverts this set. This operation modifies this set so that
1207
+ * its value is its complement. This is equivalent to
1208
+ * <code>complement(MIN_VALUE, MAX_VALUE)</code>.
1209
+ * A frozen set will not be modified.
1210
+ * @stable ICU 2.0
1211
+ */
1212
+ virtual UnicodeSet& complement(void);
1213
+
1214
+ /**
1215
+ * Complements the specified range in this set. Any character in
1216
+ * the range will be removed if it is in this set, or will be
1217
+ * added if it is not in this set. If <code>end > start</code>
1218
+ * then an empty range is complemented, leaving the set unchanged.
1219
+ * This is equivalent to a boolean logic XOR.
1220
+ * A frozen set will not be modified.
1221
+ *
1222
+ * @param start first character, inclusive, of range to be removed
1223
+ * from this set.
1224
+ * @param end last character, inclusive, of range to be removed
1225
+ * from this set.
1226
+ * @stable ICU 2.0
1227
+ */
1228
+ virtual UnicodeSet& complement(UChar32 start, UChar32 end);
1229
+
1230
+ /**
1231
+ * Complements the specified character in this set. The character
1232
+ * will be removed if it is in this set, or will be added if it is
1233
+ * not in this set.
1234
+ * A frozen set will not be modified.
1235
+ * @stable ICU 2.0
1236
+ */
1237
+ UnicodeSet& complement(UChar32 c);
1238
+
1239
+ /**
1240
+ * Complement the specified string in this set.
1241
+ * The set will not contain the specified string once the call
1242
+ * returns.
1243
+ * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
1244
+ * A frozen set will not be modified.
1245
+ * @param s the string to complement
1246
+ * @return this object, for chaining
1247
+ * @stable ICU 2.4
1248
+ */
1249
+ UnicodeSet& complement(const UnicodeString& s);
1250
+
1251
+ /**
1252
+ * Adds all of the elements in the specified set to this set if
1253
+ * they're not already present. This operation effectively
1254
+ * modifies this set so that its value is the <i>union</i> of the two
1255
+ * sets. The behavior of this operation is unspecified if the specified
1256
+ * collection is modified while the operation is in progress.
1257
+ * A frozen set will not be modified.
1258
+ *
1259
+ * @param c set whose elements are to be added to this set.
1260
+ * @see #add(UChar32, UChar32)
1261
+ * @stable ICU 2.0
1262
+ */
1263
+ virtual UnicodeSet& addAll(const UnicodeSet& c);
1264
+
1265
+ /**
1266
+ * Retains only the elements in this set that are contained in the
1267
+ * specified set. In other words, removes from this set all of
1268
+ * its elements that are not contained in the specified set. This
1269
+ * operation effectively modifies this set so that its value is
1270
+ * the <i>intersection</i> of the two sets.
1271
+ * A frozen set will not be modified.
1272
+ *
1273
+ * @param c set that defines which elements this set will retain.
1274
+ * @stable ICU 2.0
1275
+ */
1276
+ virtual UnicodeSet& retainAll(const UnicodeSet& c);
1277
+
1278
+ /**
1279
+ * Removes from this set all of its elements that are contained in the
1280
+ * specified set. This operation effectively modifies this
1281
+ * set so that its value is the <i>asymmetric set difference</i> of
1282
+ * the two sets.
1283
+ * A frozen set will not be modified.
1284
+ *
1285
+ * @param c set that defines which elements will be removed from
1286
+ * this set.
1287
+ * @stable ICU 2.0
1288
+ */
1289
+ virtual UnicodeSet& removeAll(const UnicodeSet& c);
1290
+
1291
+ /**
1292
+ * Complements in this set all elements contained in the specified
1293
+ * set. Any character in the other set will be removed if it is
1294
+ * in this set, or will be added if it is not in this set.
1295
+ * A frozen set will not be modified.
1296
+ *
1297
+ * @param c set that defines which elements will be xor'ed from
1298
+ * this set.
1299
+ * @stable ICU 2.4
1300
+ */
1301
+ virtual UnicodeSet& complementAll(const UnicodeSet& c);
1302
+
1303
+ /**
1304
+ * Removes all of the elements from this set. This set will be
1305
+ * empty after this call returns.
1306
+ * A frozen set will not be modified.
1307
+ * @stable ICU 2.0
1308
+ */
1309
+ virtual UnicodeSet& clear(void);
1310
+
1311
+ /**
1312
+ * Close this set over the given attribute. For the attribute
1313
+ * USET_CASE, the result is to modify this set so that:
1314
+ *
1315
+ * 1. For each character or string 'a' in this set, all strings or
1316
+ * characters 'b' such that foldCase(a) == foldCase(b) are added
1317
+ * to this set.
1318
+ *
1319
+ * 2. For each string 'e' in the resulting set, if e !=
1320
+ * foldCase(e), 'e' will be removed.
1321
+ *
1322
+ * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]
1323
+ *
1324
+ * (Here foldCase(x) refers to the operation u_strFoldCase, and a
1325
+ * == b denotes that the contents are the same, not pointer
1326
+ * comparison.)
1327
+ *
1328
+ * A frozen set will not be modified.
1329
+ *
1330
+ * @param attribute bitmask for attributes to close over.
1331
+ * Currently only the USET_CASE bit is supported. Any undefined bits
1332
+ * are ignored.
1333
+ * @return a reference to this set.
1334
+ * @stable ICU 4.2
1335
+ */
1336
+ UnicodeSet& closeOver(int32_t attribute);
1337
+
1338
+ /**
1339
+ * Remove all strings from this set.
1340
+ *
1341
+ * @return a reference to this set.
1342
+ * @stable ICU 4.2
1343
+ */
1344
+ virtual UnicodeSet &removeAllStrings();
1345
+
1346
+ /**
1347
+ * Iteration method that returns the number of ranges contained in
1348
+ * this set.
1349
+ * @see #getRangeStart
1350
+ * @see #getRangeEnd
1351
+ * @stable ICU 2.4
1352
+ */
1353
+ virtual int32_t getRangeCount(void) const;
1354
+
1355
+ /**
1356
+ * Iteration method that returns the first character in the
1357
+ * specified range of this set.
1358
+ * @see #getRangeCount
1359
+ * @see #getRangeEnd
1360
+ * @stable ICU 2.4
1361
+ */
1362
+ virtual UChar32 getRangeStart(int32_t index) const;
1363
+
1364
+ /**
1365
+ * Iteration method that returns the last character in the
1366
+ * specified range of this set.
1367
+ * @see #getRangeStart
1368
+ * @see #getRangeEnd
1369
+ * @stable ICU 2.4
1370
+ */
1371
+ virtual UChar32 getRangeEnd(int32_t index) const;
1372
+
1373
+ /**
1374
+ * Serializes this set into an array of 16-bit integers. Serialization
1375
+ * (currently) only records the characters in the set; multicharacter
1376
+ * strings are ignored.
1377
+ *
1378
+ * The array has following format (each line is one 16-bit
1379
+ * integer):
1380
+ *
1381
+ * length = (n+2*m) | (m!=0?0x8000:0)
1382
+ * bmpLength = n; present if m!=0
1383
+ * bmp[0]
1384
+ * bmp[1]
1385
+ * ...
1386
+ * bmp[n-1]
1387
+ * supp-high[0]
1388
+ * supp-low[0]
1389
+ * supp-high[1]
1390
+ * supp-low[1]
1391
+ * ...
1392
+ * supp-high[m-1]
1393
+ * supp-low[m-1]
1394
+ *
1395
+ * The array starts with a header. After the header are n bmp
1396
+ * code points, then m supplementary code points. Either n or m
1397
+ * or both may be zero. n+2*m is always <= 0x7FFF.
1398
+ *
1399
+ * If there are no supplementary characters (if m==0) then the
1400
+ * header is one 16-bit integer, 'length', with value n.
1401
+ *
1402
+ * If there are supplementary characters (if m!=0) then the header
1403
+ * is two 16-bit integers. The first, 'length', has value
1404
+ * (n+2*m)|0x8000. The second, 'bmpLength', has value n.
1405
+ *
1406
+ * After the header the code points are stored in ascending order.
1407
+ * Supplementary code points are stored as most significant 16
1408
+ * bits followed by least significant 16 bits.
1409
+ *
1410
+ * @param dest pointer to buffer of destCapacity 16-bit integers.
1411
+ * May be NULL only if destCapacity is zero.
1412
+ * @param destCapacity size of dest, or zero. Must not be negative.
1413
+ * @param ec error code. Will be set to U_INDEX_OUTOFBOUNDS_ERROR
1414
+ * if n+2*m > 0x7FFF. Will be set to U_BUFFER_OVERFLOW_ERROR if
1415
+ * n+2*m+(m!=0?2:1) > destCapacity.
1416
+ * @return the total length of the serialized format, including
1417
+ * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
1418
+ * than U_BUFFER_OVERFLOW_ERROR.
1419
+ * @stable ICU 2.4
1420
+ */
1421
+ int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
1422
+
1423
+ /**
1424
+ * Reallocate this objects internal structures to take up the least
1425
+ * possible space, without changing this object's value.
1426
+ * A frozen set will not be modified.
1427
+ * @stable ICU 2.4
1428
+ */
1429
+ virtual UnicodeSet& compact();
1430
+
1431
+ /**
1432
+ * Return the class ID for this class. This is useful only for
1433
+ * comparing to a return value from getDynamicClassID(). For example:
1434
+ * <pre>
1435
+ * . Base* polymorphic_pointer = createPolymorphicObject();
1436
+ * . if (polymorphic_pointer->getDynamicClassID() ==
1437
+ * . Derived::getStaticClassID()) ...
1438
+ * </pre>
1439
+ * @return The class ID for all objects of this class.
1440
+ * @stable ICU 2.0
1441
+ */
1442
+ static UClassID U_EXPORT2 getStaticClassID(void);
1443
+
1444
+ /**
1445
+ * Implement UnicodeFunctor API.
1446
+ *
1447
+ * @return The class ID for this object. All objects of a given
1448
+ * class have the same class ID. Objects of other classes have
1449
+ * different class IDs.
1450
+ * @stable ICU 2.4
1451
+ */
1452
+ virtual UClassID getDynamicClassID(void) const;
1453
+
1454
+ private:
1455
+
1456
+ // Private API for the USet API
1457
+
1458
+ friend class USetAccess;
1459
+
1460
+ int32_t getStringCount() const;
1461
+
1462
+ const UnicodeString* getString(int32_t index) const;
1463
+
1464
+ //----------------------------------------------------------------
1465
+ // RuleBasedTransliterator support
1466
+ //----------------------------------------------------------------
1467
+
1468
+ private:
1469
+
1470
+ /**
1471
+ * Returns <tt>true</tt> if this set contains any character whose low byte
1472
+ * is the given value. This is used by <tt>RuleBasedTransliterator</tt> for
1473
+ * indexing.
1474
+ */
1475
+ virtual UBool matchesIndexValue(uint8_t v) const;
1476
+
1477
+ private:
1478
+ friend class RBBIRuleScanner;
1479
+
1480
+ //----------------------------------------------------------------
1481
+ // Implementation: Clone as thawed (see ICU4J Freezable)
1482
+ //----------------------------------------------------------------
1483
+
1484
+ UnicodeSet(const UnicodeSet& o, UBool /* asThawed */);
1485
+
1486
+ //----------------------------------------------------------------
1487
+ // Implementation: Pattern parsing
1488
+ //----------------------------------------------------------------
1489
+
1490
+ void applyPatternIgnoreSpace(const UnicodeString& pattern,
1491
+ ParsePosition& pos,
1492
+ const SymbolTable* symbols,
1493
+ UErrorCode& status);
1494
+
1495
+ void applyPattern(RuleCharacterIterator& chars,
1496
+ const SymbolTable* symbols,
1497
+ UnicodeString& rebuiltPat,
1498
+ uint32_t options,
1499
+ UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
1500
+ UErrorCode& ec);
1501
+
1502
+ //----------------------------------------------------------------
1503
+ // Implementation: Utility methods
1504
+ //----------------------------------------------------------------
1505
+
1506
+ void ensureCapacity(int32_t newLen, UErrorCode& ec);
1507
+
1508
+ void ensureBufferCapacity(int32_t newLen, UErrorCode& ec);
1509
+
1510
+ void swapBuffers(void);
1511
+
1512
+ UBool allocateStrings(UErrorCode &status);
1513
+
1514
+ UnicodeString& _toPattern(UnicodeString& result,
1515
+ UBool escapeUnprintable) const;
1516
+
1517
+ UnicodeString& _generatePattern(UnicodeString& result,
1518
+ UBool escapeUnprintable) const;
1519
+
1520
+ static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
1521
+
1522
+ static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
1523
+
1524
+ //----------------------------------------------------------------
1525
+ // Implementation: Fundamental operators
1526
+ //----------------------------------------------------------------
1527
+
1528
+ void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
1529
+
1530
+ void add(const UChar32* other, int32_t otherLen, int8_t polarity);
1531
+
1532
+ void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
1533
+
1534
+ /**
1535
+ * Return true if the given position, in the given pattern, appears
1536
+ * to be the start of a property set pattern [:foo:], \\p{foo}, or
1537
+ * \\P{foo}, or \\N{name}.
1538
+ */
1539
+ static UBool resemblesPropertyPattern(const UnicodeString& pattern,
1540
+ int32_t pos);
1541
+
1542
+ static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
1543
+ int32_t iterOpts);
1544
+
1545
+ /**
1546
+ * Parse the given property pattern at the given parse position
1547
+ * and set this UnicodeSet to the result.
1548
+ *
1549
+ * The original design document is out of date, but still useful.
1550
+ * Ignore the property and value names:
1551
+ * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/unicodeset_properties.html
1552
+ *
1553
+ * Recognized syntax:
1554
+ *
1555
+ * [:foo:] [:^foo:] - white space not allowed within "[:" or ":]"
1556
+ * \\p{foo} \\P{foo} - white space not allowed within "\\p" or "\\P"
1557
+ * \\N{name} - white space not allowed within "\\N"
1558
+ *
1559
+ * Other than the above restrictions, Unicode Pattern_White_Space characters are ignored.
1560
+ * Case is ignored except in "\\p" and "\\P" and "\\N". In 'name' leading
1561
+ * and trailing space is deleted, and internal runs of whitespace
1562
+ * are collapsed to a single space.
1563
+ *
1564
+ * We support binary properties, enumerated properties, and the
1565
+ * following non-enumerated properties:
1566
+ *
1567
+ * Numeric_Value
1568
+ * Name
1569
+ * Unicode_1_Name
1570
+ *
1571
+ * @param pattern the pattern string
1572
+ * @param ppos on entry, the position at which to begin parsing.
1573
+ * This should be one of the locations marked '^':
1574
+ *
1575
+ * [:blah:] \\p{blah} \\P{blah} \\N{name}
1576
+ * ^ % ^ % ^ % ^ %
1577
+ *
1578
+ * On return, the position after the last character parsed, that is,
1579
+ * the locations marked '%'. If the parse fails, ppos is returned
1580
+ * unchanged.
1581
+ * @param ec status
1582
+ * @return a reference to this.
1583
+ */
1584
+ UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
1585
+ ParsePosition& ppos,
1586
+ UErrorCode &ec);
1587
+
1588
+ void applyPropertyPattern(RuleCharacterIterator& chars,
1589
+ UnicodeString& rebuiltPat,
1590
+ UErrorCode& ec);
1591
+
1592
+ friend void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status);
1593
+ static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status);
1594
+
1595
+ /**
1596
+ * A filter that returns TRUE if the given code point should be
1597
+ * included in the UnicodeSet being constructed.
1598
+ */
1599
+ typedef UBool (*Filter)(UChar32 codePoint, void* context);
1600
+
1601
+ /**
1602
+ * Given a filter, set this UnicodeSet to the code points
1603
+ * contained by that filter. The filter MUST be
1604
+ * property-conformant. That is, if it returns value v for one
1605
+ * code point, then it must return v for all affiliated code
1606
+ * points, as defined by the inclusions list. See
1607
+ * getInclusions().
1608
+ * src is a UPropertySource value.
1609
+ */
1610
+ void applyFilter(Filter filter,
1611
+ void* context,
1612
+ int32_t src,
1613
+ UErrorCode &status);
1614
+
1615
+ /**
1616
+ * Set the new pattern to cache.
1617
+ */
1618
+ void setPattern(const UnicodeString& newPat);
1619
+ /**
1620
+ * Release existing cached pattern.
1621
+ */
1622
+ void releasePattern();
1623
+
1624
+ friend class UnicodeSetIterator;
1625
+ };
1626
+
1627
+
1628
+
1629
+ inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
1630
+ return !operator==(o);
1631
+ }
1632
+
1633
+ inline UBool UnicodeSet::isFrozen() const {
1634
+ return (UBool)(bmpSet!=NULL || stringSpan!=NULL);
1635
+ }
1636
+
1637
+ inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
1638
+ return !containsNone(start, end);
1639
+ }
1640
+
1641
+ inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
1642
+ return !containsNone(s);
1643
+ }
1644
+
1645
+ inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
1646
+ return !containsNone(s);
1647
+ }
1648
+
1649
+ inline UBool UnicodeSet::isBogus() const {
1650
+ return (UBool)(fFlags & kIsBogus);
1651
+ }
1652
+
1653
+ inline UnicodeSet *UnicodeSet::fromUSet(USet *uset) {
1654
+ return reinterpret_cast<UnicodeSet *>(uset);
1655
+ }
1656
+
1657
+ inline const UnicodeSet *UnicodeSet::fromUSet(const USet *uset) {
1658
+ return reinterpret_cast<const UnicodeSet *>(uset);
1659
+ }
1660
+
1661
+ inline USet *UnicodeSet::toUSet() {
1662
+ return reinterpret_cast<USet *>(this);
1663
+ }
1664
+
1665
+ inline const USet *UnicodeSet::toUSet() const {
1666
+ return reinterpret_cast<const USet *>(this);
1667
+ }
1668
+
1669
+ inline int32_t UnicodeSet::span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const {
1670
+ int32_t sLength=s.length();
1671
+ if(start<0) {
1672
+ start=0;
1673
+ } else if(start>sLength) {
1674
+ start=sLength;
1675
+ }
1676
+ return start+span(s.getBuffer()+start, sLength-start, spanCondition);
1677
+ }
1678
+
1679
+ inline int32_t UnicodeSet::spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const {
1680
+ int32_t sLength=s.length();
1681
+ if(limit<0) {
1682
+ limit=0;
1683
+ } else if(limit>sLength) {
1684
+ limit=sLength;
1685
+ }
1686
+ return spanBack(s.getBuffer(), limit, spanCondition);
1687
+ }
1688
+
1689
+ U_NAMESPACE_END
1690
+
1691
+ #endif