pango 3.1.0-x64-mingw32 → 3.1.1-x64-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (291) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +9 -4
  3. data/ext/pango/rbpangolayoutline.c +6 -2
  4. data/lib/2.2/pango.so +0 -0
  5. data/lib/2.3/pango.so +0 -0
  6. data/lib/2.4/pango.so +0 -0
  7. data/sample/layout.rb +1 -1
  8. data/vendor/local/bin/derb.exe +0 -0
  9. data/vendor/local/bin/genbrk.exe +0 -0
  10. data/vendor/local/bin/genccode.exe +0 -0
  11. data/vendor/local/bin/gencfu.exe +0 -0
  12. data/vendor/local/bin/gencmn.exe +0 -0
  13. data/vendor/local/bin/gencnval.exe +0 -0
  14. data/vendor/local/bin/gendict.exe +0 -0
  15. data/vendor/local/bin/gennorm2.exe +0 -0
  16. data/vendor/local/bin/genrb.exe +0 -0
  17. data/vendor/local/bin/gensprep.exe +0 -0
  18. data/vendor/local/bin/hb-ot-shape-closure.exe +0 -0
  19. data/vendor/local/bin/hb-shape.exe +0 -0
  20. data/vendor/local/bin/hb-view.exe +0 -0
  21. data/vendor/local/bin/icu-config +16 -11
  22. data/vendor/local/bin/icuinfo.exe +0 -0
  23. data/vendor/local/bin/icupkg.exe +0 -0
  24. data/vendor/local/bin/libharfbuzz-0.dll +0 -0
  25. data/vendor/local/bin/libpango-1.0-0.dll +0 -0
  26. data/vendor/local/bin/libpangocairo-1.0-0.dll +0 -0
  27. data/vendor/local/bin/libpangoft2-1.0-0.dll +0 -0
  28. data/vendor/local/bin/libpangowin32-1.0-0.dll +0 -0
  29. data/vendor/local/bin/makeconv.exe +0 -0
  30. data/vendor/local/bin/pango-view.exe +0 -0
  31. data/vendor/local/bin/pkgdata.exe +0 -0
  32. data/vendor/local/bin/uconv.exe +0 -0
  33. data/vendor/local/include/harfbuzz/hb-font.h +5 -0
  34. data/vendor/local/include/harfbuzz/hb-glib.h +2 -1
  35. data/vendor/local/include/harfbuzz/hb-ot-layout.h +19 -0
  36. data/vendor/local/include/harfbuzz/hb-shape-plan.h +19 -0
  37. data/vendor/local/include/harfbuzz/hb-version.h +3 -3
  38. data/vendor/local/include/unicode/alphaindex.h +2 -0
  39. data/vendor/local/include/unicode/appendable.h +2 -0
  40. data/vendor/local/include/unicode/basictz.h +2 -0
  41. data/vendor/local/include/unicode/brkiter.h +7 -0
  42. data/vendor/local/include/unicode/bytestream.h +2 -0
  43. data/vendor/local/include/unicode/bytestrie.h +3 -2
  44. data/vendor/local/include/unicode/bytestriebuilder.h +4 -4
  45. data/vendor/local/include/unicode/calendar.h +2 -0
  46. data/vendor/local/include/unicode/caniter.h +2 -0
  47. data/vendor/local/include/unicode/chariter.h +2 -0
  48. data/vendor/local/include/unicode/choicfmt.h +2 -0
  49. data/vendor/local/include/unicode/coleitr.h +2 -0
  50. data/vendor/local/include/unicode/coll.h +2 -0
  51. data/vendor/local/include/unicode/compactdecimalformat.h +4 -3
  52. data/vendor/local/include/unicode/curramt.h +2 -0
  53. data/vendor/local/include/unicode/currpinf.h +2 -0
  54. data/vendor/local/include/unicode/currunit.h +2 -0
  55. data/vendor/local/include/unicode/datefmt.h +2 -0
  56. data/vendor/local/include/unicode/dbbi.h +2 -0
  57. data/vendor/local/include/unicode/dcfmtsym.h +2 -0
  58. data/vendor/local/include/unicode/decimfmt.h +9 -5
  59. data/vendor/local/include/unicode/docmain.h +3 -1
  60. data/vendor/local/include/unicode/dtfmtsym.h +26 -14
  61. data/vendor/local/include/unicode/dtintrv.h +2 -0
  62. data/vendor/local/include/unicode/dtitvfmt.h +2 -0
  63. data/vendor/local/include/unicode/dtitvinf.h +8 -3
  64. data/vendor/local/include/unicode/dtptngen.h +13 -9
  65. data/vendor/local/include/unicode/dtrule.h +2 -0
  66. data/vendor/local/include/unicode/enumset.h +2 -0
  67. data/vendor/local/include/unicode/errorcode.h +2 -0
  68. data/vendor/local/include/unicode/fieldpos.h +5 -2
  69. data/vendor/local/include/unicode/filteredbrk.h +10 -12
  70. data/vendor/local/include/unicode/fmtable.h +4 -2
  71. data/vendor/local/include/unicode/format.h +2 -0
  72. data/vendor/local/include/unicode/fpositer.h +2 -0
  73. data/vendor/local/include/unicode/gender.h +2 -0
  74. data/vendor/local/include/unicode/gregocal.h +2 -0
  75. data/vendor/local/include/unicode/icudataver.h +2 -0
  76. data/vendor/local/include/unicode/icuplug.h +12 -2
  77. data/vendor/local/include/unicode/idna.h +6 -4
  78. data/vendor/local/include/unicode/listformatter.h +2 -0
  79. data/vendor/local/include/unicode/localpointer.h +19 -28
  80. data/vendor/local/include/unicode/locdspnm.h +2 -0
  81. data/vendor/local/include/unicode/locid.h +2 -0
  82. data/vendor/local/include/unicode/measfmt.h +21 -2
  83. data/vendor/local/include/unicode/measunit.h +50 -24
  84. data/vendor/local/include/unicode/measure.h +2 -0
  85. data/vendor/local/include/unicode/messagepattern.h +2 -0
  86. data/vendor/local/include/unicode/msgfmt.h +2 -0
  87. data/vendor/local/include/unicode/normalizer2.h +2 -0
  88. data/vendor/local/include/unicode/normlzr.h +4 -4
  89. data/vendor/local/include/unicode/numfmt.h +8 -6
  90. data/vendor/local/include/unicode/numsys.h +2 -0
  91. data/vendor/local/include/unicode/parseerr.h +2 -0
  92. data/vendor/local/include/unicode/parsepos.h +2 -0
  93. data/vendor/local/include/unicode/platform.h +8 -0
  94. data/vendor/local/include/unicode/plurfmt.h +2 -0
  95. data/vendor/local/include/unicode/plurrule.h +2 -0
  96. data/vendor/local/include/unicode/ptypes.h +2 -0
  97. data/vendor/local/include/unicode/putil.h +2 -0
  98. data/vendor/local/include/unicode/rbbi.h +18 -66
  99. data/vendor/local/include/unicode/rbnf.h +8 -0
  100. data/vendor/local/include/unicode/rbtz.h +2 -0
  101. data/vendor/local/include/unicode/regex.h +2 -0
  102. data/vendor/local/include/unicode/region.h +3 -1
  103. data/vendor/local/include/unicode/reldatefmt.h +19 -9
  104. data/vendor/local/include/unicode/rep.h +2 -0
  105. data/vendor/local/include/unicode/resbund.h +2 -0
  106. data/vendor/local/include/unicode/schriter.h +2 -0
  107. data/vendor/local/include/unicode/scientificnumberformatter.h +2 -0
  108. data/vendor/local/include/unicode/search.h +2 -0
  109. data/vendor/local/include/unicode/selfmt.h +2 -0
  110. data/vendor/local/include/unicode/simpleformatter.h +2 -0
  111. data/vendor/local/include/unicode/simpletz.h +2 -0
  112. data/vendor/local/include/unicode/smpdtfmt.h +9 -1
  113. data/vendor/local/include/unicode/sortkey.h +2 -0
  114. data/vendor/local/include/unicode/std_string.h +2 -0
  115. data/vendor/local/include/unicode/strenum.h +2 -0
  116. data/vendor/local/include/unicode/stringpiece.h +5 -3
  117. data/vendor/local/include/unicode/stringtriebuilder.h +13 -1
  118. data/vendor/local/include/unicode/stsearch.h +2 -0
  119. data/vendor/local/include/unicode/symtable.h +2 -0
  120. data/vendor/local/include/unicode/tblcoll.h +3 -1
  121. data/vendor/local/include/unicode/timezone.h +2 -0
  122. data/vendor/local/include/unicode/tmunit.h +8 -0
  123. data/vendor/local/include/unicode/tmutamt.h +2 -0
  124. data/vendor/local/include/unicode/tmutfmt.h +5 -0
  125. data/vendor/local/include/unicode/translit.h +2 -0
  126. data/vendor/local/include/unicode/tzfmt.h +2 -0
  127. data/vendor/local/include/unicode/tznames.h +16 -2
  128. data/vendor/local/include/unicode/tzrule.h +2 -0
  129. data/vendor/local/include/unicode/tztrans.h +2 -0
  130. data/vendor/local/include/unicode/ubidi.h +20 -8
  131. data/vendor/local/include/unicode/ubiditransform.h +321 -0
  132. data/vendor/local/include/unicode/ubrk.h +26 -7
  133. data/vendor/local/include/unicode/ucal.h +8 -4
  134. data/vendor/local/include/unicode/ucasemap.h +2 -0
  135. data/vendor/local/include/unicode/ucat.h +2 -0
  136. data/vendor/local/include/unicode/uchar.h +244 -17
  137. data/vendor/local/include/unicode/ucharstrie.h +2 -0
  138. data/vendor/local/include/unicode/ucharstriebuilder.h +2 -0
  139. data/vendor/local/include/unicode/uchriter.h +2 -0
  140. data/vendor/local/include/unicode/uclean.h +2 -0
  141. data/vendor/local/include/unicode/ucnv.h +8 -1
  142. data/vendor/local/include/unicode/ucnv_cb.h +2 -0
  143. data/vendor/local/include/unicode/ucnv_err.h +2 -0
  144. data/vendor/local/include/unicode/ucnvsel.h +2 -0
  145. data/vendor/local/include/unicode/ucol.h +28 -11
  146. data/vendor/local/include/unicode/ucoleitr.h +2 -0
  147. data/vendor/local/include/unicode/uconfig.h +2 -0
  148. data/vendor/local/include/unicode/ucsdet.h +2 -0
  149. data/vendor/local/include/unicode/ucurr.h +5 -1
  150. data/vendor/local/include/unicode/udat.h +13 -11
  151. data/vendor/local/include/unicode/udata.h +8 -1
  152. data/vendor/local/include/unicode/udateintervalformat.h +2 -0
  153. data/vendor/local/include/unicode/udatpg.h +15 -2
  154. data/vendor/local/include/unicode/udisplaycontext.h +34 -2
  155. data/vendor/local/include/unicode/uenum.h +2 -0
  156. data/vendor/local/include/unicode/ufieldpositer.h +4 -2
  157. data/vendor/local/include/unicode/uformattable.h +9 -1
  158. data/vendor/local/include/unicode/ugender.h +2 -0
  159. data/vendor/local/include/unicode/uidna.h +2 -0
  160. data/vendor/local/include/unicode/uiter.h +2 -0
  161. data/vendor/local/include/unicode/uldnames.h +2 -0
  162. data/vendor/local/include/unicode/ulistformatter.h +2 -0
  163. data/vendor/local/include/unicode/uloc.h +9 -3
  164. data/vendor/local/include/unicode/ulocdata.h +20 -2
  165. data/vendor/local/include/unicode/umachine.h +25 -0
  166. data/vendor/local/include/unicode/umisc.h +2 -0
  167. data/vendor/local/include/unicode/umsg.h +2 -0
  168. data/vendor/local/include/unicode/unifilt.h +2 -0
  169. data/vendor/local/include/unicode/unifunct.h +2 -0
  170. data/vendor/local/include/unicode/unimatch.h +2 -0
  171. data/vendor/local/include/unicode/unirepl.h +2 -0
  172. data/vendor/local/include/unicode/uniset.h +3 -1
  173. data/vendor/local/include/unicode/unistr.h +12 -14
  174. data/vendor/local/include/unicode/unorm.h +6 -2
  175. data/vendor/local/include/unicode/unorm2.h +2 -0
  176. data/vendor/local/include/unicode/unum.h +28 -10
  177. data/vendor/local/include/unicode/unumsys.h +2 -0
  178. data/vendor/local/include/unicode/uobject.h +2 -0
  179. data/vendor/local/include/unicode/upluralrules.h +6 -2
  180. data/vendor/local/include/unicode/uregex.h +2 -0
  181. data/vendor/local/include/unicode/uregion.h +6 -2
  182. data/vendor/local/include/unicode/ureldatefmt.h +13 -7
  183. data/vendor/local/include/unicode/urename.h +16 -4
  184. data/vendor/local/include/unicode/urep.h +2 -0
  185. data/vendor/local/include/unicode/ures.h +7 -1
  186. data/vendor/local/include/unicode/uscript.h +41 -29
  187. data/vendor/local/include/unicode/usearch.h +10 -4
  188. data/vendor/local/include/unicode/uset.h +5 -1
  189. data/vendor/local/include/unicode/usetiter.h +2 -0
  190. data/vendor/local/include/unicode/ushape.h +2 -0
  191. data/vendor/local/include/unicode/uspoof.h +876 -363
  192. data/vendor/local/include/unicode/usprep.h +2 -0
  193. data/vendor/local/include/unicode/ustdio.h +2 -0
  194. data/vendor/local/include/unicode/ustream.h +2 -0
  195. data/vendor/local/include/unicode/ustring.h +2 -0
  196. data/vendor/local/include/unicode/ustringtrie.h +2 -0
  197. data/vendor/local/include/unicode/utext.h +2 -0
  198. data/vendor/local/include/unicode/utf.h +2 -0
  199. data/vendor/local/include/unicode/utf16.h +2 -0
  200. data/vendor/local/include/unicode/utf32.h +2 -0
  201. data/vendor/local/include/unicode/utf8.h +2 -0
  202. data/vendor/local/include/unicode/utf_old.h +2 -0
  203. data/vendor/local/include/unicode/utmscale.h +2 -0
  204. data/vendor/local/include/unicode/utrace.h +20 -0
  205. data/vendor/local/include/unicode/utrans.h +2 -0
  206. data/vendor/local/include/unicode/utypes.h +71 -97
  207. data/vendor/local/include/unicode/uvernum.h +13 -12
  208. data/vendor/local/include/unicode/uversion.h +2 -0
  209. data/vendor/local/include/unicode/vtzone.h +2 -0
  210. data/vendor/local/lib/icu/{57.1 → 58.2}/Makefile.inc +8 -9
  211. data/vendor/local/lib/icu/{57.1 → 58.2}/pkgdata.inc +2 -2
  212. data/vendor/local/lib/icu/Makefile.inc +8 -9
  213. data/vendor/local/lib/icu/pkgdata.inc +2 -2
  214. data/vendor/local/lib/icudt.dll +0 -0
  215. data/vendor/local/lib/{icudt57.dll → icudt58.dll} +0 -0
  216. data/vendor/local/lib/icuin.dll +0 -0
  217. data/vendor/local/lib/icuin58.dll +0 -0
  218. data/vendor/local/lib/icuio.dll +0 -0
  219. data/vendor/local/lib/{icuio57.dll → icuio58.dll} +0 -0
  220. data/vendor/local/lib/icutest.dll +0 -0
  221. data/vendor/local/lib/{icutest57.dll → icutest58.dll} +0 -0
  222. data/vendor/local/lib/icutu.dll +0 -0
  223. data/vendor/local/lib/icutu58.dll +0 -0
  224. data/vendor/local/lib/icuuc.dll +0 -0
  225. data/vendor/local/lib/icuuc58.dll +0 -0
  226. data/vendor/local/lib/libharfbuzz-icu.a +0 -0
  227. data/vendor/local/lib/libharfbuzz-icu.la +4 -4
  228. data/vendor/local/lib/libharfbuzz.dll.a +0 -0
  229. data/vendor/local/lib/libharfbuzz.la +3 -3
  230. data/vendor/local/lib/libicudt.dll.a +0 -0
  231. data/vendor/local/lib/libicuin.dll.a +0 -0
  232. data/vendor/local/lib/libicuio.dll.a +0 -0
  233. data/vendor/local/lib/libicutest.dll.a +0 -0
  234. data/vendor/local/lib/libicutu.dll.a +0 -0
  235. data/vendor/local/lib/libicuuc.dll.a +0 -0
  236. data/vendor/local/lib/libpango-1.0.dll.a +0 -0
  237. data/vendor/local/lib/libpangocairo-1.0.dll.a +0 -0
  238. data/vendor/local/lib/libpangoft2-1.0.dll.a +0 -0
  239. data/vendor/local/lib/libpangowin32-1.0.dll.a +0 -0
  240. data/vendor/local/lib/pkgconfig/harfbuzz-icu.pc +1 -1
  241. data/vendor/local/lib/pkgconfig/harfbuzz.pc +2 -2
  242. data/vendor/local/lib/pkgconfig/icu-i18n.pc +8 -6
  243. data/vendor/local/lib/pkgconfig/icu-io.pc +8 -6
  244. data/vendor/local/lib/pkgconfig/icu-uc.pc +8 -6
  245. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-version.html +3 -3
  246. data/vendor/local/share/gtk-doc/html/harfbuzz/pt02.html +1 -1
  247. data/vendor/local/share/icu/{57.1 → 58.2}/LICENSE +52 -60
  248. data/vendor/local/share/icu/{57.1 → 58.2}/config/mh-mingw64 +2 -0
  249. data/vendor/local/share/icu/{57.1 → 58.2}/install-sh +0 -0
  250. data/vendor/local/share/icu/{57.1 → 58.2}/mkinstalldirs +2 -0
  251. data/vendor/local/share/man/man1/derb.1 +4 -2
  252. data/vendor/local/share/man/man1/genbrk.1 +4 -2
  253. data/vendor/local/share/man/man1/gencfu.1 +3 -1
  254. data/vendor/local/share/man/man1/gencnval.1 +5 -3
  255. data/vendor/local/share/man/man1/gendict.1 +4 -2
  256. data/vendor/local/share/man/man1/genrb.1 +5 -3
  257. data/vendor/local/share/man/man1/icu-config.1 +4 -5
  258. data/vendor/local/share/man/man1/makeconv.1 +5 -3
  259. data/vendor/local/share/man/man1/pkgdata.1 +4 -2
  260. data/vendor/local/share/man/man1/uconv.1 +4 -2
  261. data/vendor/local/share/man/man8/genccode.8 +4 -2
  262. data/vendor/local/share/man/man8/gencmn.8 +5 -3
  263. data/vendor/local/share/man/man8/gensprep.8 +5 -3
  264. data/vendor/local/share/man/man8/icupkg.8 +3 -1
  265. metadata +22 -40
  266. data/vendor/local/include/layout/LEFontInstance.h +0 -524
  267. data/vendor/local/include/layout/LEGlyphFilter.h +0 -45
  268. data/vendor/local/include/layout/LEGlyphStorage.h +0 -546
  269. data/vendor/local/include/layout/LEInsertionList.h +0 -177
  270. data/vendor/local/include/layout/LELanguages.h +0 -109
  271. data/vendor/local/include/layout/LEScripts.h +0 -204
  272. data/vendor/local/include/layout/LESwaps.h +0 -100
  273. data/vendor/local/include/layout/LETableReference.h +0 -435
  274. data/vendor/local/include/layout/LETypes.h +0 -728
  275. data/vendor/local/include/layout/LayoutEngine.h +0 -518
  276. data/vendor/local/include/layout/ParagraphLayout.h +0 -747
  277. data/vendor/local/include/layout/RunArrays.h +0 -676
  278. data/vendor/local/include/layout/loengine.h +0 -225
  279. data/vendor/local/include/layout/playout.h +0 -466
  280. data/vendor/local/include/layout/plruns.h +0 -441
  281. data/vendor/local/lib/icuin57.dll +0 -0
  282. data/vendor/local/lib/icule.dll +0 -0
  283. data/vendor/local/lib/icule57.dll +0 -0
  284. data/vendor/local/lib/iculx.dll +0 -0
  285. data/vendor/local/lib/iculx57.dll +0 -0
  286. data/vendor/local/lib/icutu57.dll +0 -0
  287. data/vendor/local/lib/icuuc57.dll +0 -0
  288. data/vendor/local/lib/libicule.dll.a +0 -0
  289. data/vendor/local/lib/libiculx.dll.a +0 -0
  290. data/vendor/local/lib/pkgconfig/icu-le.pc +0 -38
  291. data/vendor/local/lib/pkgconfig/icu-lx.pc +0 -38
@@ -1,3 +1,5 @@
1
+ // Copyright (C) 2016 and later: Unicode, Inc. and others.
2
+ // License & terms of use: http://www.unicode.org/copyright.html
1
3
  /*
2
4
  *****************************************************************************************
3
5
  * Copyright (C) 2016, International Business Machines
@@ -58,11 +60,13 @@ typedef enum UDateRelativeDateTimeFormatterStyle {
58
60
  */
59
61
  UDAT_STYLE_NARROW,
60
62
 
61
- /**
62
- * The number of styles.
63
- * @stable ICU 54
64
- */
65
- UDAT_STYLE_COUNT
63
+ #ifndef U_HIDE_DEPRECATED_API
64
+ /**
65
+ * One more than the highest normal UDateRelativeDateTimeFormatterStyle value.
66
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
67
+ */
68
+ UDAT_STYLE_COUNT
69
+ #endif // U_HIDE_DEPRECATED_API
66
70
  } UDateRelativeDateTimeFormatterStyle;
67
71
 
68
72
  #ifndef U_HIDE_DRAFT_API
@@ -162,11 +166,13 @@ typedef enum URelativeDateTimeUnit {
162
166
  * @draft ICU 57
163
167
  */
164
168
  UDAT_REL_UNIT_SATURDAY,
169
+ #ifndef U_HIDE_DEPRECATED_API
165
170
  /**
166
- * Count of URelativeDateTimeUnit values
167
- * @draft ICU 57
171
+ * One more than the highest normal URelativeDateTimeUnit value.
172
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
168
173
  */
169
174
  UDAT_REL_UNIT_COUNT
175
+ #endif // U_HIDE_DEPRECATED_API
170
176
  } URelativeDateTimeUnit;
171
177
  #endif /* U_HIDE_DRAFT_API */
172
178
 
@@ -1,3 +1,5 @@
1
+ // Copyright (C) 2016 and later: Unicode, Inc. and others.
2
+ // License & terms of use: http://www.unicode.org/copyright.html
1
3
  /*
2
4
  *******************************************************************************
3
5
  * Copyright (C) 2002-2016, International Business Machines
@@ -474,6 +476,9 @@
474
476
  #define ubidi_setReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_setReorderingOptions)
475
477
  #define ubidi_writeReordered U_ICU_ENTRY_POINT_RENAME(ubidi_writeReordered)
476
478
  #define ubidi_writeReverse U_ICU_ENTRY_POINT_RENAME(ubidi_writeReverse)
479
+ #define ubiditransform_close U_ICU_ENTRY_POINT_RENAME(ubiditransform_close)
480
+ #define ubiditransform_open U_ICU_ENTRY_POINT_RENAME(ubiditransform_open)
481
+ #define ubiditransform_transform U_ICU_ENTRY_POINT_RENAME(ubiditransform_transform)
477
482
  #define ublock_getCode U_ICU_ENTRY_POINT_RENAME(ublock_getCode)
478
483
  #define ubrk_close U_ICU_ENTRY_POINT_RENAME(ubrk_close)
479
484
  #define ubrk_countAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_countAvailable)
@@ -1013,6 +1018,7 @@
1013
1018
  #define ulist_getListSize U_ICU_ENTRY_POINT_RENAME(ulist_getListSize)
1014
1019
  #define ulist_getNext U_ICU_ENTRY_POINT_RENAME(ulist_getNext)
1015
1020
  #define ulist_next_keyword_value U_ICU_ENTRY_POINT_RENAME(ulist_next_keyword_value)
1021
+ #define ulist_removeString U_ICU_ENTRY_POINT_RENAME(ulist_removeString)
1016
1022
  #define ulist_resetList U_ICU_ENTRY_POINT_RENAME(ulist_resetList)
1017
1023
  #define ulist_reset_keyword_values_iterator U_ICU_ENTRY_POINT_RENAME(ulist_reset_keyword_values_iterator)
1018
1024
  #define ulistfmt_close U_ICU_ENTRY_POINT_RENAME(ulistfmt_close)
@@ -1355,6 +1361,7 @@
1355
1361
  #define uprv_toupper U_ICU_ENTRY_POINT_RENAME(uprv_toupper)
1356
1362
  #define uprv_trunc U_ICU_ENTRY_POINT_RENAME(uprv_trunc)
1357
1363
  #define uprv_tzname U_ICU_ENTRY_POINT_RENAME(uprv_tzname)
1364
+ #define uprv_tzname_clear_cache U_ICU_ENTRY_POINT_RENAME(uprv_tzname_clear_cache)
1358
1365
  #define uprv_tzset U_ICU_ENTRY_POINT_RENAME(uprv_tzset)
1359
1366
  #define uprv_uint16Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint16Comparator)
1360
1367
  #define uprv_uint32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint32Comparator)
@@ -1456,10 +1463,7 @@
1456
1463
  #define ures_countArrayItems U_ICU_ENTRY_POINT_RENAME(ures_countArrayItems)
1457
1464
  #define ures_findResource U_ICU_ENTRY_POINT_RENAME(ures_findResource)
1458
1465
  #define ures_findSubResource U_ICU_ENTRY_POINT_RENAME(ures_findSubResource)
1459
- #define ures_getAllArrayItems U_ICU_ENTRY_POINT_RENAME(ures_getAllArrayItems)
1460
- #define ures_getAllArrayItemsWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getAllArrayItemsWithFallback)
1461
- #define ures_getAllTableItems U_ICU_ENTRY_POINT_RENAME(ures_getAllTableItems)
1462
- #define ures_getAllTableItemsWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getAllTableItemsWithFallback)
1466
+ #define ures_getAllItemsWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getAllItemsWithFallback)
1463
1467
  #define ures_getBinary U_ICU_ENTRY_POINT_RENAME(ures_getBinary)
1464
1468
  #define ures_getByIndex U_ICU_ENTRY_POINT_RENAME(ures_getByIndex)
1465
1469
  #define ures_getByKey U_ICU_ENTRY_POINT_RENAME(ures_getByKey)
@@ -1607,13 +1611,20 @@
1607
1611
  #define uspoof_areConfusableUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUTF8)
1608
1612
  #define uspoof_areConfusableUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUnicodeString)
1609
1613
  #define uspoof_check U_ICU_ENTRY_POINT_RENAME(uspoof_check)
1614
+ #define uspoof_check2 U_ICU_ENTRY_POINT_RENAME(uspoof_check2)
1615
+ #define uspoof_check2UTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_check2UTF8)
1616
+ #define uspoof_check2UnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_check2UnicodeString)
1610
1617
  #define uspoof_checkUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_checkUTF8)
1611
1618
  #define uspoof_checkUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_checkUnicodeString)
1612
1619
  #define uspoof_clone U_ICU_ENTRY_POINT_RENAME(uspoof_clone)
1613
1620
  #define uspoof_close U_ICU_ENTRY_POINT_RENAME(uspoof_close)
1621
+ #define uspoof_closeCheckResult U_ICU_ENTRY_POINT_RENAME(uspoof_closeCheckResult)
1614
1622
  #define uspoof_getAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedChars)
1615
1623
  #define uspoof_getAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedLocales)
1616
1624
  #define uspoof_getAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedUnicodeSet)
1625
+ #define uspoof_getCheckResultChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultChecks)
1626
+ #define uspoof_getCheckResultNumerics U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultNumerics)
1627
+ #define uspoof_getCheckResultRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultRestrictionLevel)
1617
1628
  #define uspoof_getChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getChecks)
1618
1629
  #define uspoof_getInclusionSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionSet)
1619
1630
  #define uspoof_getInclusionUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionUnicodeSet)
@@ -1625,6 +1636,7 @@
1625
1636
  #define uspoof_getSkeletonUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUnicodeString)
1626
1637
  #define uspoof_internalInitStatics U_ICU_ENTRY_POINT_RENAME(uspoof_internalInitStatics)
1627
1638
  #define uspoof_open U_ICU_ENTRY_POINT_RENAME(uspoof_open)
1639
+ #define uspoof_openCheckResult U_ICU_ENTRY_POINT_RENAME(uspoof_openCheckResult)
1628
1640
  #define uspoof_openFromSerialized U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSerialized)
1629
1641
  #define uspoof_openFromSource U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSource)
1630
1642
  #define uspoof_serialize U_ICU_ENTRY_POINT_RENAME(uspoof_serialize)
@@ -1,3 +1,5 @@
1
+ // Copyright (C) 2016 and later: Unicode, Inc. and others.
2
+ // License & terms of use: http://www.unicode.org/copyright.html
1
3
  /*
2
4
  ******************************************************************************
3
5
  * Copyright (C) 1997-2010, International Business Machines
@@ -1,3 +1,5 @@
1
+ // Copyright (C) 2016 and later: Unicode, Inc. and others.
2
+ // License & terms of use: http://www.unicode.org/copyright.html
1
3
  /*
2
4
  **********************************************************************
3
5
  * Copyright (C) 1997-2016, International Business Machines
@@ -120,9 +122,13 @@ typedef enum {
120
122
  RES_INT_VECTOR=URES_INT_VECTOR,
121
123
  /** @deprecated ICU 2.6 Not used. */
122
124
  RES_RESERVED=15,
123
- #endif /* U_HIDE_DEPRECATED_API */
124
125
 
126
+ /**
127
+ * One more than the highest normal UResType value.
128
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
129
+ */
125
130
  URES_LIMIT = 16
131
+ #endif // U_HIDE_DEPRECATED_API
126
132
  } UResType;
127
133
 
128
134
  /*
@@ -1,6 +1,8 @@
1
+ // Copyright (C) 2016 and later: Unicode, Inc. and others.
2
+ // License & terms of use: http://www.unicode.org/copyright.html
1
3
  /*
2
4
  **********************************************************************
3
- * Copyright (C) 1997-2015, International Business Machines
5
+ * Copyright (C) 1997-2016, International Business Machines
4
6
  * Corporation and others. All Rights Reserved.
5
7
  **********************************************************************
6
8
  *
@@ -32,13 +34,13 @@
32
34
  * See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/)
33
35
  * and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .
34
36
  *
35
- * Starting with ICU 3.6, constants for most ISO 15924 script codes
37
+ * In addition, constants for many ISO 15924 script codes
36
38
  * are included, for use with language tags, CLDR data, and similar.
37
39
  * Some of those codes are not used in the Unicode Character Database (UCD).
38
40
  * For example, there are no characters that have a UCD script property value of
39
41
  * Hans or Hant. All Han ideographs have the Hani script property value in Unicode.
40
42
  *
41
- * Private-use codes Qaaa..Qabx are not included.
43
+ * Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.
42
44
  *
43
45
  * Starting with ICU 55, script codes are only added when their scripts
44
46
  * have been or will certainly be encoded in Unicode,
@@ -424,24 +426,39 @@ typedef enum UScriptCode {
424
426
  /** @stable ICU 54 */
425
427
  USCRIPT_SIDDHAM = 166,/* Sidd */
426
428
 
427
- /**
428
- * One higher than the last script code constant.
429
- * This value increases as constants for script codes are added.
430
- *
431
- * There are constants for Unicode 7 script property values.
432
- * There are constants for ISO 15924 script codes assigned on or before 2013-10-12.
433
- * There are no constants for private use codes from Qaaa - Qabx
434
- * except as used in the UCD.
435
- *
436
- * @stable ICU 2.2
437
- */
438
- USCRIPT_CODE_LIMIT = 167
429
+ /** @stable ICU 58 */
430
+ USCRIPT_ADLAM = 167,/* Adlm */
431
+ /** @stable ICU 58 */
432
+ USCRIPT_BHAIKSUKI = 168,/* Bhks */
433
+ /** @stable ICU 58 */
434
+ USCRIPT_MARCHEN = 169,/* Marc */
435
+ /** @stable ICU 58 */
436
+ USCRIPT_NEWA = 170,/* Newa */
437
+ /** @stable ICU 58 */
438
+ USCRIPT_OSAGE = 171,/* Osge */
439
+
440
+ /** @stable ICU 58 */
441
+ USCRIPT_HAN_WITH_BOPOMOFO = 172,/* Hanb */
442
+ /** @stable ICU 58 */
443
+ USCRIPT_JAMO = 173,/* Jamo */
444
+ /** @stable ICU 58 */
445
+ USCRIPT_SYMBOLS_EMOJI = 174,/* Zsye */
446
+
447
+ #ifndef U_HIDE_DEPRECATED_API
448
+ /**
449
+ * One more than the highest normal UScriptCode value.
450
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_SCRIPT).
451
+ *
452
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
453
+ */
454
+ USCRIPT_CODE_LIMIT = 175
455
+ #endif // U_HIDE_DEPRECATED_API
439
456
  } UScriptCode;
440
457
 
441
458
  /**
442
- * Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
459
+ * Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
443
460
  * Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym".
444
- * Fills in USCRIPT_LATIN given "en" OR "en_US"
461
+ * Fills in USCRIPT_LATIN given "en" OR "en_US"
445
462
  * If the required capacity is greater than the capacity of the destination buffer,
446
463
  * then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned.
447
464
  *
@@ -454,10 +471,10 @@ typedef enum UScriptCode {
454
471
  * @param fillIn the UScriptCode buffer to fill in the script code
455
472
  * @param capacity the capacity (size) fo UScriptCode buffer passed in.
456
473
  * @param err the error status code.
457
- * @return The number of script codes filled in the buffer passed in
474
+ * @return The number of script codes filled in the buffer passed in
458
475
  * @stable ICU 2.4
459
476
  */
460
- U_STABLE int32_t U_EXPORT2
477
+ U_STABLE int32_t U_EXPORT2
461
478
  uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err);
462
479
 
463
480
  /**
@@ -470,7 +487,7 @@ uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capac
470
487
  * or NULL if scriptCode is invalid
471
488
  * @stable ICU 2.4
472
489
  */
473
- U_STABLE const char* U_EXPORT2
490
+ U_STABLE const char* U_EXPORT2
474
491
  uscript_getName(UScriptCode scriptCode);
475
492
 
476
493
  /**
@@ -482,18 +499,18 @@ uscript_getName(UScriptCode scriptCode);
482
499
  * @return short script name (4-letter code), or NULL if scriptCode is invalid
483
500
  * @stable ICU 2.4
484
501
  */
485
- U_STABLE const char* U_EXPORT2
502
+ U_STABLE const char* U_EXPORT2
486
503
  uscript_getShortName(UScriptCode scriptCode);
487
504
 
488
505
  /**
489
506
  * Gets the script code associated with the given codepoint.
490
- * Returns USCRIPT_MALAYALAM given 0x0D02
507
+ * Returns USCRIPT_MALAYALAM given 0x0D02
491
508
  * @param codepoint UChar32 codepoint
492
509
  * @param err the error status code.
493
- * @return The UScriptCode, or 0 if codepoint is invalid
510
+ * @return The UScriptCode, or 0 if codepoint is invalid
494
511
  * @stable ICU 2.4
495
512
  */
496
- U_STABLE UScriptCode U_EXPORT2
513
+ U_STABLE UScriptCode U_EXPORT2
497
514
  uscript_getScript(UChar32 codepoint, UErrorCode *err);
498
515
 
499
516
  /**
@@ -503,9 +520,6 @@ uscript_getScript(UChar32 codepoint, UErrorCode *err);
503
520
  *
504
521
  * Some characters are commonly used in multiple scripts.
505
522
  * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
506
- *
507
- * The Script_Extensions property is provisional. It may be modified or removed
508
- * in future versions of the Unicode Standard, and thus in ICU.
509
523
  * @param c code point
510
524
  * @param sc script code
511
525
  * @return TRUE if sc is in Script_Extensions(c)
@@ -532,8 +546,6 @@ uscript_hasScript(UChar32 c, UScriptCode sc);
532
546
  * U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned.
533
547
  * (Usual ICU buffer handling behavior.)
534
548
  *
535
- * The Script_Extensions property is provisional. It may be modified or removed
536
- * in future versions of the Unicode Standard, and thus in ICU.
537
549
  * @param c code point
538
550
  * @param scripts output script code array
539
551
  * @param capacity capacity of the scripts array
@@ -1,3 +1,5 @@
1
+ // Copyright (C) 2016 and later: Unicode, Inc. and others.
2
+ // License & terms of use: http://www.unicode.org/copyright.html
1
3
  /*
2
4
  **********************************************************************
3
5
  * Copyright (C) 2001-2011,2014 IBM and others. All rights reserved.
@@ -188,11 +190,13 @@ typedef enum {
188
190
  */
189
191
  USEARCH_ELEMENT_COMPARISON = 2,
190
192
 
193
+ #ifndef U_HIDE_DEPRECATED_API
191
194
  /**
192
- * Count of attribute types
193
- * @stable ICU 2.4
195
+ * One more than the highest normal USearchAttribute value.
196
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
194
197
  */
195
198
  USEARCH_ATTRIBUTE_COUNT = 3
199
+ #endif // U_HIDE_DEPRECATED_API
196
200
  } USearchAttribute;
197
201
 
198
202
  /**
@@ -262,11 +266,13 @@ typedef enum {
262
266
  */
263
267
  USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD,
264
268
 
269
+ #ifndef U_HIDE_DEPRECATED_API
265
270
  /**
266
- * Count of attribute values
267
- * @stable ICU 2.4
271
+ * One more than the highest normal USearchAttributeValue value.
272
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
268
273
  */
269
274
  USEARCH_ATTRIBUTE_VALUE_COUNT
275
+ #endif // U_HIDE_DEPRECATED_API
270
276
  } USearchAttributeValue;
271
277
 
272
278
  /* open and close ------------------------------------------------------ */
@@ -1,3 +1,5 @@
1
+ // Copyright (C) 2016 and later: Unicode, Inc. and others.
2
+ // License & terms of use: http://www.unicode.org/copyright.html
1
3
  /*
2
4
  *******************************************************************************
3
5
  *
@@ -196,11 +198,13 @@ typedef enum USetSpanCondition {
196
198
  * @stable ICU 3.8
197
199
  */
198
200
  USET_SPAN_SIMPLE = 2,
201
+ #ifndef U_HIDE_DEPRECATED_API
199
202
  /**
200
203
  * One more than the last span condition.
201
- * @stable ICU 3.8
204
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
202
205
  */
203
206
  USET_SPAN_CONDITION_COUNT
207
+ #endif // U_HIDE_DEPRECATED_API
204
208
  } USetSpanCondition;
205
209
 
206
210
  enum {
@@ -1,3 +1,5 @@
1
+ // Copyright (C) 2016 and later: Unicode, Inc. and others.
2
+ // License & terms of use: http://www.unicode.org/copyright.html
1
3
  /*
2
4
  **********************************************************************
3
5
  * Copyright (c) 2002-2014, International Business Machines
@@ -1,3 +1,5 @@
1
+ // Copyright (C) 2016 and later: Unicode, Inc. and others.
2
+ // License & terms of use: http://www.unicode.org/copyright.html
1
3
  /*
2
4
  ******************************************************************************
3
5
  *
@@ -1,6 +1,8 @@
1
+ // Copyright (C) 2016 and later: Unicode, Inc. and others.
2
+ // License & terms of use: http://www.unicode.org/copyright.html
1
3
  /*
2
4
  ***************************************************************************
3
- * Copyright (C) 2008-2015, International Business Machines Corporation
5
+ * Copyright (C) 2008-2016, International Business Machines Corporation
4
6
  * and others. All Rights Reserved.
5
7
  ***************************************************************************
6
8
  * file name: uspoof.h
@@ -35,123 +37,350 @@
35
37
  * \file
36
38
  * \brief Unicode Security and Spoofing Detection, C API.
37
39
  *
38
- * These functions are intended to check strings, typically
39
- * identifiers of some type, such as URLs, for the presence of
40
- * characters that are likely to be visually confusing -
41
- * for cases where the displayed form of an identifier may
42
- * not be what it appears to be.
43
- *
44
- * Unicode Technical Report #36, http://unicode.org/reports/tr36, and
45
- * Unicode Technical Standard #39, http://unicode.org/reports/tr39
46
- * "Unicode security considerations", give more background on
47
- * security an spoofing issues with Unicode identifiers.
48
- * The tests and checks provided by this module implement the recommendations
49
- * from those Unicode documents.
50
- *
51
- * The tests available on identifiers fall into two general categories:
52
- * -# Single identifier tests. Check whether an identifier is
53
- * potentially confusable with any other string, or is suspicious
54
- * for other reasons.
55
- * -# Two identifier tests. Check whether two specific identifiers are confusable.
56
- * This does not consider whether either of strings is potentially
57
- * confusable with any string other than the exact one specified.
58
- *
59
- * The steps to perform confusability testing are
60
- * -# Open a USpoofChecker.
61
- * -# Configure the USPoofChecker for the desired set of tests. The tests that will
62
- * be performed are specified by a set of USpoofChecks flags.
63
- * -# Perform the checks using the pre-configured USpoofChecker. The results indicate
64
- * which (if any) of the selected tests have identified possible problems with the identifier.
65
- * Results are reported as a set of USpoofChecks flags; this mirrors the form in which
66
- * the set of tests to perform was originally specified to the USpoofChecker.
67
- *
68
- * A USpoofChecker may be used repeatedly to perform checks on any number of identifiers.
69
- *
70
- * Thread Safety: The test functions for checking a single identifier, or for testing
71
- * whether two identifiers are possible confusable, are thread safe.
72
- * They may called concurrently, from multiple threads, using the same USpoofChecker instance.
73
- *
74
- * More generally, the standard ICU thread safety rules apply: functions that take a
75
- * const USpoofChecker parameter are thread safe. Those that take a non-const
76
- * USpoofChecier are not thread safe.
77
- *
78
- *
79
- * Descriptions of the available checks.
80
- *
81
- * When testing whether pairs of identifiers are confusable, with the uspoof_areConfusable()
82
- * family of functions, the relevant tests are
83
- *
84
- * -# USPOOF_SINGLE_SCRIPT_CONFUSABLE: All of the characters from the two identifiers are
85
- * from a single script, and the two identifiers are visually confusable.
86
- * -# USPOOF_MIXED_SCRIPT_CONFUSABLE: At least one of the identifiers contains characters
87
- * from more than one script, and the two identifiers are visually confusable.
88
- * -# USPOOF_WHOLE_SCRIPT_CONFUSABLE: Each of the two identifiers is of a single script, but
89
- * the two identifiers are from different scripts, and they are visually confusable.
90
- *
91
- * The safest approach is to enable all three of these checks as a group.
92
- *
93
- * USPOOF_ANY_CASE is a modifier for the above tests. If the identifiers being checked can
94
- * be of mixed case and are used in a case-sensitive manner, this option should be specified.
95
- *
96
- * If the identifiers being checked are used in a case-insensitive manner, and if they are
97
- * displayed to users in lower-case form only, the USPOOF_ANY_CASE option should not be
98
- * specified. Confusabality issues involving upper case letters will not be reported.
99
- *
100
- * When performing tests on a single identifier, with the uspoof_check() family of functions,
101
- * the relevant tests are:
102
- *
103
- * -# USPOOF_MIXED_SCRIPT_CONFUSABLE: the identifier contains characters from multiple
104
- * scripts, and there exists an identifier of a single script that is visually confusable.
105
- * -# USPOOF_WHOLE_SCRIPT_CONFUSABLE: the identifier consists of characters from a single
106
- * script, and there exists a visually confusable identifier.
107
- * The visually confusable identifier also consists of characters from a single script.
108
- * but not the same script as the identifier being checked.
109
- * -# USPOOF_ANY_CASE: modifies the mixed script and whole script confusables tests. If
110
- * specified, the checks will consider confusable characters of any case. If this flag is not
111
- * set, the test is performed assuming case folded identifiers.
112
- * -# USPOOF_SINGLE_SCRIPT: check that the identifier contains only characters from a
113
- * single script. (Characters from the 'common' and 'inherited' scripts are ignored.)
114
- * This is not a test for confusable identifiers
115
- * -# USPOOF_INVISIBLE: check an identifier for the presence of invisible characters,
116
- * such as zero-width spaces, or character sequences that are
117
- * likely not to display, such as multiple occurrences of the same
118
- * non-spacing mark. This check does not test the input string as a whole
119
- * for conformance to any particular syntax for identifiers.
120
- * -# USPOOF_CHAR_LIMIT: check that an identifier contains only characters from a specified set
121
- * of acceptable characters. See uspoof_setAllowedChars() and
122
- * uspoof_setAllowedLocales().
123
- *
124
- * Note on Scripts:
125
- * Characters from the Unicode Scripts "Common" and "Inherited" are ignored when considering
126
- * the script of an identifier. Common characters include digits and symbols that
127
- * are normally used with text from more than one script.
128
- *
129
- * Identifier Skeletons: A skeleton is a transformation of an identifier, such that
130
- * all identifiers that are confusable with each other have the same skeleton.
131
- * Using skeletons, it is possible to build a dictionary data structure for
132
- * a set of identifiers, and then quickly test whether a new identifier is
133
- * confusable with an identifier already in the set. The uspoof_getSkeleton()
134
- * family of functions will produce the skeleton from an identifier.
135
- *
136
- * Note that skeletons are not guaranteed to be stable between versions
137
- * of Unicode or ICU, so an applications should not rely on creating a permanent,
138
- * or difficult to update, database of skeletons. Instabilities result from
139
- * identifying new pairs or sequences of characters that are visually
140
- * confusable, and thus must be mapped to the same skeleton character(s).
141
- *
142
- * Skeletons are computed using the algorithm and data describe in Unicode UAX 39.
143
- * The latest proposed update, UAX 39 Version 8 draft 1, says "the tables SL, SA, and ML
144
- * were still problematic, and discouraged from use in [Uniocde] 7.0.
145
- * They were thus removed from version 8.0"
146
- *
147
- * In light of this, the default mapping data included with ICU 55 uses the
148
- * Unicode 7 MA (Multi script Any case) table data for the other type options
149
- * (Single Script, Any Case), (Single Script, Lower Case) and (Multi Script, Lower Case).
40
+ * <p>
41
+ * This class, based on <a href="http://unicode.org/reports/tr36">Unicode Technical Report #36</a> and
42
+ * <a href="http://unicode.org/reports/tr39">Unicode Technical Standard #39</a>, has two main functions:
43
+ *
44
+ * <ol>
45
+ * <li>Checking whether two strings are visually <em>confusable</em> with each other, such as "Harvest" and
46
+ * &quot;&Eta;arvest&quot;, where the second string starts with the Greek capital letter Eta.</li>
47
+ * <li>Checking whether an individual string is likely to be an attempt at confusing the reader (<em>spoof
48
+ * detection</em>), such as "paypal" with some Latin characters substituted with Cyrillic look-alikes.</li>
49
+ * </ol>
50
+ *
51
+ * <p>
52
+ * Although originally designed as a method for flagging suspicious identifier strings such as URLs,
53
+ * <code>USpoofChecker</code> has a number of other practical use cases, such as preventing attempts to evade bad-word
54
+ * content filters.
55
+ *
56
+ * <p>
57
+ * The functions of this class are exposed as C API, with a handful of syntactical conveniences for C++.
58
+ *
59
+ * <h2>Confusables</h2>
60
+ *
61
+ * <p>
62
+ * The following example shows how to use <code>USpoofChecker</code> to check for confusability between two strings:
63
+ *
64
+ * \code{.c}
65
+ * UErrorCode status = U_ZERO_ERROR;
66
+ * UChar* str1 = (UChar*) u"Harvest";
67
+ * UChar* str2 = (UChar*) u"\u0397arvest"; // with U+0397 GREEK CAPITAL LETTER ETA
68
+ *
69
+ * USpoofChecker* sc = uspoof_open(&status);
70
+ * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
71
+ *
72
+ * int32_t bitmask = uspoof_areConfusable(sc, str1, -1, str2, -1, &status);
73
+ * UBool result = bitmask != 0;
74
+ * // areConfusable: 1 (status: U_ZERO_ERROR)
75
+ * printf("areConfusable: %d (status: %s)\n", result, u_errorName(status));
76
+ * uspoof_close(sc);
77
+ * \endcode
78
+ *
79
+ * <p>
80
+ * The call to {@link uspoof_open} creates a <code>USpoofChecker</code> object; the call to {@link uspoof_setChecks}
81
+ * enables confusable checking and disables all other checks; the call to {@link uspoof_areConfusable} performs the
82
+ * confusability test; and the following line extracts the result out of the return value. For best performance,
83
+ * the instance should be created once (e.g., upon application startup), and the efficient
84
+ * {@link uspoof_areConfusable} method can be used at runtime.
85
+ *
86
+ * <p>
87
+ * The type {@link LocalUSpoofCheckerPointer} is exposed for C++ programmers. It will automatically call
88
+ * {@link uspoof_close} when the object goes out of scope:
89
+ *
90
+ * \code{.cpp}
91
+ * UErrorCode status = U_ZERO_ERROR;
92
+ * LocalUSpoofCheckerPointer sc(uspoof_open(&status));
93
+ * uspoof_setChecks(sc.getAlias(), USPOOF_CONFUSABLE, &status);
94
+ * // ...
95
+ * \endcode
96
+ *
97
+ * <p>
98
+ * UTS 39 defines two strings to be <em>confusable</em> if they map to the same <em>skeleton string</em>. A skeleton can
99
+ * be thought of as a "hash code". {@link uspoof_getSkeleton} computes the skeleton for a particular string, so
100
+ * the following snippet is equivalent to the example above:
101
+ *
102
+ * \code{.c}
103
+ * UErrorCode status = U_ZERO_ERROR;
104
+ * UChar* str1 = (UChar*) u"Harvest";
105
+ * UChar* str2 = (UChar*) u"\u0397arvest"; // with U+0397 GREEK CAPITAL LETTER ETA
106
+ *
107
+ * USpoofChecker* sc = uspoof_open(&status);
108
+ * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
109
+ *
110
+ * // Get skeleton 1
111
+ * int32_t skel1Len = uspoof_getSkeleton(sc, 0, str1, -1, NULL, 0, &status);
112
+ * UChar* skel1 = (UChar*) malloc(++skel1Len * sizeof(UChar));
113
+ * status = U_ZERO_ERROR;
114
+ * uspoof_getSkeleton(sc, 0, str1, -1, skel1, skel1Len, &status);
115
+ *
116
+ * // Get skeleton 2
117
+ * int32_t skel2Len = uspoof_getSkeleton(sc, 0, str2, -1, NULL, 0, &status);
118
+ * UChar* skel2 = (UChar*) malloc(++skel2Len * sizeof(UChar));
119
+ * status = U_ZERO_ERROR;
120
+ * uspoof_getSkeleton(sc, 0, str2, -1, skel2, skel2Len, &status);
121
+ *
122
+ * // Are the skeletons the same?
123
+ * UBool result = u_strcmp(skel1, skel2) == 0;
124
+ * // areConfusable: 1 (status: U_ZERO_ERROR)
125
+ * printf("areConfusable: %d (status: %s)\n", result, u_errorName(status));
126
+ * uspoof_close(sc);
127
+ * free(skel1);
128
+ * free(skel2);
129
+ * \endcode
130
+ *
131
+ * <p>
132
+ * If you need to check if a string is confusable with any string in a dictionary of many strings, rather than calling
133
+ * {@link uspoof_areConfusable} many times in a loop, {@link uspoof_getSkeleton} can be used instead, as shown below:
134
+ *
135
+ * \code{.c}
136
+ * UErrorCode status = U_ZERO_ERROR;
137
+ * #define DICTIONARY_LENGTH 2
138
+ * UChar* dictionary[DICTIONARY_LENGTH] = { (UChar*) u"lorem", (UChar*) u"ipsum" };
139
+ * UChar* skeletons[DICTIONARY_LENGTH];
140
+ * UChar* str = (UChar*) u"1orern";
141
+ *
142
+ * // Setup:
143
+ * USpoofChecker* sc = uspoof_open(&status);
144
+ * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
145
+ * for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
146
+ * UChar* word = dictionary[i];
147
+ * int32_t len = uspoof_getSkeleton(sc, 0, word, -1, NULL, 0, &status);
148
+ * skeletons[i] = (UChar*) malloc(++len * sizeof(UChar));
149
+ * status = U_ZERO_ERROR;
150
+ * uspoof_getSkeleton(sc, 0, word, -1, skeletons[i], len, &status);
151
+ * }
152
+ *
153
+ * // Live Check:
154
+ * {
155
+ * int32_t len = uspoof_getSkeleton(sc, 0, str, -1, NULL, 0, &status);
156
+ * UChar* skel = (UChar*) malloc(++len * sizeof(UChar));
157
+ * status = U_ZERO_ERROR;
158
+ * uspoof_getSkeleton(sc, 0, str, -1, skel, len, &status);
159
+ * UBool result = FALSE;
160
+ * for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
161
+ * result = u_strcmp(skel, skeletons[i]) == 0;
162
+ * if (result == TRUE) { break; }
163
+ * }
164
+ * // Has confusable in dictionary: 1 (status: U_ZERO_ERROR)
165
+ * printf("Has confusable in dictionary: %d (status: %s)\n", result, u_errorName(status));
166
+ * free(skel);
167
+ * }
168
+ *
169
+ * for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
170
+ * free(skeletons[i]);
171
+ * }
172
+ * uspoof_close(sc);
173
+ * \endcode
174
+ *
175
+ * <p>
176
+ * <b>Note:</b> Since the Unicode confusables mapping table is frequently updated, confusable skeletons are <em>not</em>
177
+ * guaranteed to be the same between ICU releases. We therefore recommend that you always compute confusable skeletons
178
+ * at runtime and do not rely on creating a permanent, or difficult to update, database of skeletons.
179
+ *
180
+ * <h2>Spoof Detection</h2>
181
+ *
182
+ * <p>
183
+ * The following snippet shows a minimal example of using <code>USpoofChecker</code> to perform spoof detection on a
184
+ * string:
185
+ *
186
+ * \code{.c}
187
+ * UErrorCode status = U_ZERO_ERROR;
188
+ * UChar* str = (UChar*) u"p\u0430ypal"; // with U+0430 CYRILLIC SMALL LETTER A
189
+ *
190
+ * // Get the default set of allowable characters:
191
+ * USet* allowed = uset_openEmpty();
192
+ * uset_addAll(allowed, uspoof_getRecommendedSet(&status));
193
+ * uset_addAll(allowed, uspoof_getInclusionSet(&status));
194
+ *
195
+ * USpoofChecker* sc = uspoof_open(&status);
196
+ * uspoof_setAllowedChars(sc, allowed, &status);
197
+ * uspoof_setRestrictionLevel(sc, USPOOF_MODERATELY_RESTRICTIVE);
198
+ *
199
+ * int32_t bitmask = uspoof_check(sc, str, -1, NULL, &status);
200
+ * UBool result = bitmask != 0;
201
+ * // fails checks: 1 (status: U_ZERO_ERROR)
202
+ * printf("fails checks: %d (status: %s)\n", result, u_errorName(status));
203
+ * uspoof_close(sc);
204
+ * uset_close(allowed);
205
+ * \endcode
206
+ *
207
+ * <p>
208
+ * As in the case for confusability checking, it is good practice to create one <code>USpoofChecker</code> instance at
209
+ * startup, and call the cheaper {@link uspoof_check} online. We specify the set of
210
+ * allowed characters to be those with type RECOMMENDED or INCLUSION, according to the recommendation in UTS 39.
211
+ *
212
+ * <p>
213
+ * In addition to {@link uspoof_check}, the function {@link uspoof_checkUTF8} is exposed for UTF8-encoded char* strings,
214
+ * and {@link uspoof_checkUnicodeString} is exposed for C++ programmers.
215
+ *
216
+ * <p>
217
+ * If the {@link USPOOF_AUX_INFO} check is enabled, a limited amount of information on why a string failed the checks
218
+ * is available in the returned bitmask. For complete information, use the {@link uspoof_check2} class of functions
219
+ * with a {@link USpoofCheckResult} parameter:
220
+ *
221
+ * \code{.c}
222
+ * UErrorCode status = U_ZERO_ERROR;
223
+ * UChar* str = (UChar*) u"p\u0430ypal"; // with U+0430 CYRILLIC SMALL LETTER A
224
+ *
225
+ * // Get the default set of allowable characters:
226
+ * USet* allowed = uset_openEmpty();
227
+ * uset_addAll(allowed, uspoof_getRecommendedSet(&status));
228
+ * uset_addAll(allowed, uspoof_getInclusionSet(&status));
229
+ *
230
+ * USpoofChecker* sc = uspoof_open(&status);
231
+ * uspoof_setAllowedChars(sc, allowed, &status);
232
+ * uspoof_setRestrictionLevel(sc, USPOOF_MODERATELY_RESTRICTIVE);
233
+ *
234
+ * USpoofCheckResult* checkResult = uspoof_openCheckResult(&status);
235
+ * int32_t bitmask = uspoof_check2(sc, str, -1, checkResult, &status);
236
+ *
237
+ * int32_t failures1 = bitmask;
238
+ * int32_t failures2 = uspoof_getCheckResultChecks(checkResult, &status);
239
+ * assert(failures1 == failures2);
240
+ * // checks that failed: 0x00000010 (status: U_ZERO_ERROR)
241
+ * printf("checks that failed: %#010x (status: %s)\n", failures1, u_errorName(status));
242
+ *
243
+ * // Cleanup:
244
+ * uspoof_close(sc);
245
+ * uset_close(allowed);
246
+ * uspoof_closeCheckResult(checkResult);
247
+ * \endcode
248
+ *
249
+ * C++ users can take advantage of a few syntactical conveniences. The following snippet is functionally
250
+ * equivalent to the one above:
251
+ *
252
+ * \code{.cpp}
253
+ * UErrorCode status = U_ZERO_ERROR;
254
+ * UnicodeString str((UChar*) u"p\u0430ypal"); // with U+0430 CYRILLIC SMALL LETTER A
255
+ *
256
+ * // Get the default set of allowable characters:
257
+ * UnicodeSet allowed;
258
+ * allowed.addAll(*uspoof_getRecommendedUnicodeSet(&status));
259
+ * allowed.addAll(*uspoof_getInclusionUnicodeSet(&status));
260
+ *
261
+ * LocalUSpoofCheckerPointer sc(uspoof_open(&status));
262
+ * uspoof_setAllowedChars(sc.getAlias(), allowed.toUSet(), &status);
263
+ * uspoof_setRestrictionLevel(sc.getAlias(), USPOOF_MODERATELY_RESTRICTIVE);
264
+ *
265
+ * LocalUSpoofCheckResultPointer checkResult(uspoof_openCheckResult(&status));
266
+ * int32_t bitmask = uspoof_check2UnicodeString(sc.getAlias(), str, checkResult.getAlias(), &status);
267
+ *
268
+ * int32_t failures1 = bitmask;
269
+ * int32_t failures2 = uspoof_getCheckResultChecks(checkResult.getAlias(), &status);
270
+ * assert(failures1 == failures2);
271
+ * // checks that failed: 0x00000010 (status: U_ZERO_ERROR)
272
+ * printf("checks that failed: %#010x (status: %s)\n", failures1, u_errorName(status));
273
+ *
274
+ * // Explicit cleanup not necessary.
275
+ * \endcode
276
+ *
277
+ * <p>
278
+ * The return value is a bitmask of the checks that failed. In this case, there was one check that failed:
279
+ * {@link USPOOF_RESTRICTION_LEVEL}, corresponding to the fifth bit (16). The possible checks are:
280
+ *
281
+ * <ul>
282
+ * <li><code>RESTRICTION_LEVEL</code>: flags strings that violate the
283
+ * <a href="http://unicode.org/reports/tr39/#Restriction_Level_Detection">Restriction Level</a> test as specified in UTS
284
+ * 39; in most cases, this means flagging strings that contain characters from multiple different scripts.</li>
285
+ * <li><code>INVISIBLE</code>: flags strings that contain invisible characters, such as zero-width spaces, or character
286
+ * sequences that are likely not to display, such as multiple occurrences of the same non-spacing mark.</li>
287
+ * <li><code>CHAR_LIMIT</code>: flags strings that contain characters outside of a specified set of acceptable
288
+ * characters. See {@link uspoof_setAllowedChars} and {@link uspoof_setAllowedLocales}.</li>
289
+ * <li><code>MIXED_NUMBERS</code>: flags strings that contain digits from multiple different numbering systems.</li>
290
+ * </ul>
291
+ *
292
+ * <p>
293
+ * These checks can be enabled independently of each other. For example, if you were interested in checking for only the
294
+ * INVISIBLE and MIXED_NUMBERS conditions, you could do:
295
+ *
296
+ * \code{.c}
297
+ * UErrorCode status = U_ZERO_ERROR;
298
+ * UChar* str = (UChar*) u"8\u09EA"; // 8 mixed with U+09EA BENGALI DIGIT FOUR
299
+ *
300
+ * USpoofChecker* sc = uspoof_open(&status);
301
+ * uspoof_setChecks(sc, USPOOF_INVISIBLE | USPOOF_MIXED_NUMBERS, &status);
302
+ *
303
+ * int32_t bitmask = uspoof_check2(sc, str, -1, NULL, &status);
304
+ * UBool result = bitmask != 0;
305
+ * // fails checks: 1 (status: U_ZERO_ERROR)
306
+ * printf("fails checks: %d (status: %s)\n", result, u_errorName(status));
307
+ * uspoof_close(sc);
308
+ * \endcode
309
+ *
310
+ * <p>
311
+ * Here is an example in C++ showing how to compute the restriction level of a string:
312
+ *
313
+ * \code{.cpp}
314
+ * UErrorCode status = U_ZERO_ERROR;
315
+ * UnicodeString str((UChar*) u"p\u0430ypal"); // with U+0430 CYRILLIC SMALL LETTER A
316
+ *
317
+ * // Get the default set of allowable characters:
318
+ * UnicodeSet allowed;
319
+ * allowed.addAll(*uspoof_getRecommendedUnicodeSet(&status));
320
+ * allowed.addAll(*uspoof_getInclusionUnicodeSet(&status));
321
+ *
322
+ * LocalUSpoofCheckerPointer sc(uspoof_open(&status));
323
+ * uspoof_setAllowedChars(sc.getAlias(), allowed.toUSet(), &status);
324
+ * uspoof_setRestrictionLevel(sc.getAlias(), USPOOF_MODERATELY_RESTRICTIVE);
325
+ * uspoof_setChecks(sc.getAlias(), USPOOF_RESTRICTION_LEVEL | USPOOF_AUX_INFO, &status);
326
+ *
327
+ * LocalUSpoofCheckResultPointer checkResult(uspoof_openCheckResult(&status));
328
+ * int32_t bitmask = uspoof_check2UnicodeString(sc.getAlias(), str, checkResult.getAlias(), &status);
329
+ *
330
+ * URestrictionLevel restrictionLevel = uspoof_getCheckResultRestrictionLevel(checkResult.getAlias(), &status);
331
+ * // Since USPOOF_AUX_INFO was enabled, the restriction level is also available in the upper bits of the bitmask:
332
+ * assert((restrictionLevel & bitmask) == restrictionLevel);
333
+ * // Restriction level: 0x50000000 (status: U_ZERO_ERROR)
334
+ * printf("Restriction level: %#010x (status: %s)\n", restrictionLevel, u_errorName(status));
335
+ * \endcode
336
+ *
337
+ * <p>
338
+ * The code '0x50000000' corresponds to the restriction level USPOOF_MINIMALLY_RESTRICTIVE. Since
339
+ * USPOOF_MINIMALLY_RESTRICTIVE is weaker than USPOOF_MODERATELY_RESTRICTIVE, the string fails the check.
340
+ *
341
+ * <p>
342
+ * <b>Note:</b> The Restriction Level is the most powerful of the checks. The full logic is documented in
343
+ * <a href="http://unicode.org/reports/tr39/#Restriction_Level_Detection">UTS 39</a>, but the basic idea is that strings
344
+ * are restricted to contain characters from only a single script, <em>except</em> that most scripts are allowed to have
345
+ * Latin characters interspersed. Although the default restriction level is <code>HIGHLY_RESTRICTIVE</code>, it is
346
+ * recommended that users set their restriction level to <code>MODERATELY_RESTRICTIVE</code>, which allows Latin mixed
347
+ * with all other scripts except Cyrillic, Greek, and Cherokee, with which it is often confusable. For more details on
348
+ * the levels, see UTS 39 or {@link URestrictionLevel}. The Restriction Level test is aware of the set of
349
+ * allowed characters set in {@link uspoof_setAllowedChars}. Note that characters which have script code
350
+ * COMMON or INHERITED, such as numbers and punctuation, are ignored when computing whether a string has multiple
351
+ * scripts.
352
+ *
353
+ * <h2>Additional Information</h2>
354
+ *
355
+ * <p>
356
+ * A <code>USpoofChecker</code> instance may be used repeatedly to perform checks on any number of identifiers.
357
+ *
358
+ * <p>
359
+ * <b>Thread Safety:</b> The test functions for checking a single identifier, or for testing whether
360
+ * two identifiers are possible confusable, are thread safe. They may called concurrently, from multiple threads,
361
+ * using the same USpoofChecker instance.
362
+ *
363
+ * <p>
364
+ * More generally, the standard ICU thread safety rules apply: functions that take a const USpoofChecker parameter are
365
+ * thread safe. Those that take a non-const USpoofChecker are not thread safe..
366
+ *
367
+ * @stable ICU 4.6
150
368
  */
151
369
 
152
370
  struct USpoofChecker;
153
371
  typedef struct USpoofChecker USpoofChecker; /**< typedef for C of USpoofChecker */
154
372
 
373
+ #ifndef U_HIDE_DRAFT_API
374
+ /**
375
+ * @see uspoof_openCheckResult
376
+ */
377
+ struct USpoofCheckResult;
378
+ /**
379
+ * @see uspoof_openCheckResult
380
+ */
381
+ typedef struct USpoofCheckResult USpoofCheckResult;
382
+ #endif /* U_HIDE_DRAFT_API */
383
+
155
384
  /**
156
385
  * Enum for the kinds of checks that USpoofChecker can perform.
157
386
  * These enum values are used both to select the set of checks that
@@ -160,45 +389,61 @@ typedef struct USpoofChecker USpoofChecker; /**< typedef for C of USpoofChecker
160
389
  * @stable ICU 4.2
161
390
  */
162
391
  typedef enum USpoofChecks {
163
- /** Single script confusable test.
164
- * When testing whether two identifiers are confusable, report that they are if
165
- * both are from the same script and they are visually confusable.
166
- * Note: this test is not applicable to a check of a single identifier.
167
- */
392
+ /**
393
+ * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
394
+ * that the two strings are visually confusable and that they are from the same script, according to UTS 39 section
395
+ * 4.
396
+ *
397
+ * @see uspoof_areConfusable
398
+ * @stable ICU 4.2
399
+ */
168
400
  USPOOF_SINGLE_SCRIPT_CONFUSABLE = 1,
169
401
 
170
- /** Mixed script confusable test.
171
- * When checking a single identifier, report a problem if
172
- * the identifier contains multiple scripts, and
173
- * is confusable with some other identifier in a single script
174
- * When testing whether two identifiers are confusable, report that they are if
175
- * the two IDs are visually confusable,
176
- * and at least one contains characters from more than one script.
402
+ /**
403
+ * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
404
+ * that the two strings are visually confusable and that they are <b>not</b> from the same script, according to UTS
405
+ * 39 section 4.
406
+ *
407
+ * @see uspoof_areConfusable
408
+ * @stable ICU 4.2
177
409
  */
178
410
  USPOOF_MIXED_SCRIPT_CONFUSABLE = 2,
179
411
 
180
- /** Whole script confusable test.
181
- * When checking a single identifier, report a problem if
182
- * The identifier is of a single script, and
183
- * there exists a confusable identifier in another script.
184
- * When testing whether two identifiers are confusable, report that they are if
185
- * each is of a single script,
186
- * the scripts of the two identifiers are different, and
187
- * the identifiers are visually confusable.
412
+ /**
413
+ * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
414
+ * that the two strings are visually confusable and that they are not from the same script but both of them are
415
+ * single-script strings, according to UTS 39 section 4.
416
+ *
417
+ * @see uspoof_areConfusable
418
+ * @stable ICU 4.2
188
419
  */
189
420
  USPOOF_WHOLE_SCRIPT_CONFUSABLE = 4,
190
-
191
- /** Any Case Modifier for confusable identifier tests.
192
- If specified, consider all characters, of any case, when looking for confusables.
193
- If USPOOF_ANY_CASE is not specified, identifiers being checked are assumed to have been
194
- case folded. Upper case confusable characters will not be checked.
195
- Selects between Lower Case Confusable and
196
- Any Case Confusable. */
421
+
422
+ #ifndef U_HIDE_DRAFT_API
423
+ /**
424
+ * Enable this flag in {@link uspoof_setChecks} to turn on all types of confusables. You may set
425
+ * the checks to some subset of SINGLE_SCRIPT_CONFUSABLE, MIXED_SCRIPT_CONFUSABLE, or WHOLE_SCRIPT_CONFUSABLE to
426
+ * make {@link uspoof_areConfusable} return only those types of confusables.
427
+ *
428
+ * @see uspoof_areConfusable
429
+ * @see uspoof_getSkeleton
430
+ * @draft ICU 58
431
+ */
432
+ USPOOF_CONFUSABLE = USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE,
433
+ #endif /* U_HIDE_DRAFT_API */
434
+
435
+ #ifndef U_HIDE_DEPRECATED_API
436
+ /**
437
+ * This flag is deprecated and no longer affects the behavior of SpoofChecker.
438
+ *
439
+ * @deprecated ICU 58 Any case confusable mappings were removed from UTS 39; the corresponding ICU API was deprecated.
440
+ */
197
441
  USPOOF_ANY_CASE = 8,
442
+ #endif /* U_HIDE_DEPRECATED_API */
198
443
 
199
444
  /**
200
445
  * Check that an identifier is no looser than the specified RestrictionLevel.
201
- * The default if uspoof_setRestrctionLevel() is not called is HIGHLY_RESTRICTIVE.
446
+ * The default if {@link uspoof_setRestrictionLevel} is not called is HIGHLY_RESTRICTIVE.
202
447
  *
203
448
  * If USPOOF_AUX_INFO is enabled the actual restriction level of the
204
449
  * identifier being tested will also be returned by uspoof_check().
@@ -211,7 +456,7 @@ typedef enum USpoofChecks {
211
456
  */
212
457
  USPOOF_RESTRICTION_LEVEL = 16,
213
458
 
214
- #ifndef U_HIDE_DEPRECATED_API
459
+ #ifndef U_HIDE_DEPRECATED_API
215
460
  /** Check that an identifier contains only characters from a
216
461
  * single script (plus chars from the common and inherited scripts.)
217
462
  * Applies to checks of a single identifier check only.
@@ -219,7 +464,7 @@ typedef enum USpoofChecks {
219
464
  */
220
465
  USPOOF_SINGLE_SCRIPT = USPOOF_RESTRICTION_LEVEL,
221
466
  #endif /* U_HIDE_DEPRECATED_API */
222
-
467
+
223
468
  /** Check an identifier for the presence of invisible characters,
224
469
  * such as zero-width spaces, or character sequences that are
225
470
  * likely not to display, such as multiple occurrences of the same
@@ -229,97 +474,119 @@ typedef enum USpoofChecks {
229
474
  USPOOF_INVISIBLE = 32,
230
475
 
231
476
  /** Check that an identifier contains only characters from a specified set
232
- * of acceptable characters. See uspoof_setAllowedChars() and
233
- * uspoof_setAllowedLocales().
477
+ * of acceptable characters. See {@link uspoof_setAllowedChars} and
478
+ * {@link uspoof_setAllowedLocales}. Note that a string that fails this check
479
+ * will also fail the {@link USPOOF_RESTRICTION_LEVEL} check.
234
480
  */
235
481
  USPOOF_CHAR_LIMIT = 64,
236
482
 
237
483
  /**
238
- * Check that an identifier does not include decimal digits from
239
- * more than one numbering system.
240
- *
484
+ * Check that an identifier does not mix numbers from different numbering systems.
485
+ * For more information, see UTS 39 section 5.3.
486
+ *
241
487
  * @stable ICU 51
242
488
  */
243
489
  USPOOF_MIXED_NUMBERS = 128,
244
490
 
245
491
  /**
246
492
  * Enable all spoof checks.
247
- *
493
+ *
248
494
  * @stable ICU 4.6
249
495
  */
250
496
  USPOOF_ALL_CHECKS = 0xFFFF,
251
497
 
252
498
  /**
253
499
  * Enable the return of auxillary (non-error) information in the
254
- * upper bits of the check results value.
500
+ * upper bits of the check results value.
255
501
  *
256
- * If this "check" is not enabled, the results of uspoof_check() will be zero when an
257
- * identifier passes all of the enabled checks.
502
+ * If this "check" is not enabled, the results of {@link uspoof_check} will be
503
+ * zero when an identifier passes all of the enabled checks.
258
504
  *
259
- * If this "check" is enabled, (uspoof_check() & USPOOF_ALL_CHECKS) will be zero
260
- * when an identifier passes all checks.
505
+ * If this "check" is enabled, (uspoof_check() & {@link USPOOF_ALL_CHECKS}) will
506
+ * be zero when an identifier passes all checks.
261
507
  *
262
508
  * @stable ICU 51
263
509
  */
264
510
  USPOOF_AUX_INFO = 0x40000000
265
511
 
266
512
  } USpoofChecks;
267
-
268
-
513
+
514
+
269
515
  /**
270
- * Constants from UAX #39 for use in setRestrictionLevel(), and
516
+ * Constants from UAX #39 for use in {@link uspoof_setRestrictionLevel}, and
271
517
  * for returned identifier restriction levels in check results.
518
+ *
272
519
  * @stable ICU 51
520
+ *
521
+ * @see uspoof_setRestrictionLevel
522
+ * @see uspoof_check
273
523
  */
274
524
  typedef enum URestrictionLevel {
275
525
  /**
276
- * Only ASCII characters: U+0000..U+007F
277
- *
526
+ * All characters in the string are in the identifier profile and all characters in the string are in the
527
+ * ASCII range.
528
+ *
278
529
  * @stable ICU 51
279
530
  */
280
531
  USPOOF_ASCII = 0x10000000,
281
532
  /**
282
- * All characters in each identifier must be from a single script.
283
- *
284
- * @stable ICU 53
285
- */
533
+ * The string classifies as ASCII-Only, or all characters in the string are in the identifier profile and
534
+ * the string is single-script, according to the definition in UTS 39 section 5.1.
535
+ *
536
+ * @stable ICU 53
537
+ */
286
538
  USPOOF_SINGLE_SCRIPT_RESTRICTIVE = 0x20000000,
287
539
  /**
288
- * All characters in each identifier must be from a single script, or from the combinations: Latin + Han +
289
- * Hiragana + Katakana; Latin + Han + Bopomofo; or Latin + Han + Hangul. Note that this level will satisfy the
290
- * vast majority of Latin-script users; also that TR36 has ASCII instead of Latin.
291
- *
540
+ * The string classifies as Single Script, or all characters in the string are in the identifier profile and
541
+ * the string is covered by any of the following sets of scripts, according to the definition in UTS 39
542
+ * section 5.1:
543
+ * <ul>
544
+ * <li>Latin + Han + Bopomofo (or equivalently: Latn + Hanb)</li>
545
+ * <li>Latin + Han + Hiragana + Katakana (or equivalently: Latn + Jpan)</li>
546
+ * <li>Latin + Han + Hangul (or equivalently: Latn +Kore)</li>
547
+ * </ul>
548
+ * This is the default restriction in ICU.
549
+ *
292
550
  * @stable ICU 51
293
551
  */
294
552
  USPOOF_HIGHLY_RESTRICTIVE = 0x30000000,
295
553
  /**
296
- * Allow Latin with other scripts except Cyrillic, Greek, Cherokee Otherwise, the same as Highly Restrictive
297
- *
554
+ * The string classifies as Highly Restrictive, or all characters in the string are in the identifier profile
555
+ * and the string is covered by Latin and any one other Recommended or Aspirational script, except Cyrillic,
556
+ * Greek, and Cherokee.
557
+ *
298
558
  * @stable ICU 51
299
559
  */
300
560
  USPOOF_MODERATELY_RESTRICTIVE = 0x40000000,
301
561
  /**
302
- * Allow arbitrary mixtures of scripts. Otherwise, the same as Moderately Restrictive.
303
- *
562
+ * All characters in the string are in the identifier profile. Allow arbitrary mixtures of scripts.
563
+ *
304
564
  * @stable ICU 51
305
565
  */
306
566
  USPOOF_MINIMALLY_RESTRICTIVE = 0x50000000,
307
567
  /**
308
568
  * Any valid identifiers, including characters outside of the Identifier Profile.
309
- *
569
+ *
310
570
  * @stable ICU 51
311
571
  */
312
572
  USPOOF_UNRESTRICTIVE = 0x60000000,
313
573
  /**
314
- * Mask for selecting the Restriction Level bits from the return value of uspoof_check().
315
- *
316
- * @stable ICU 53
317
- */
318
- USPOOF_RESTRICTION_LEVEL_MASK = 0x7F000000
574
+ * Mask for selecting the Restriction Level bits from the return value of {@link uspoof_check}.
575
+ *
576
+ * @stable ICU 53
577
+ */
578
+ USPOOF_RESTRICTION_LEVEL_MASK = 0x7F000000,
579
+ #ifndef U_HIDE_INTERNAL_API
580
+ /**
581
+ * An undefined restriction level.
582
+ * @internal
583
+ */
584
+ USPOOF_UNDEFINED_RESTRICTIVE = -1
585
+ #endif /* U_HIDE_INTERNAL_API */
319
586
  } URestrictionLevel;
320
587
 
321
588
  /**
322
- * Create a Unicode Spoof Checker, configured to perform all
589
+ * Create a Unicode Spoof Checker, configured to perform all
323
590
  * checks except for USPOOF_LOCALE_LIMIT and USPOOF_CHAR_LIMIT.
324
591
  * Note that additional checks may be added in the future,
325
592
  * resulting in the changes to the default checking behavior.
@@ -359,10 +626,10 @@ uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLeng
359
626
 
360
627
  /**
361
628
  * Open a Spoof Checker from the source form of the spoof data.
362
- * The two inputs correspond to the Unicode data files confusables.txt
363
- * and confusablesWholeScript.txt as described in Unicode UAX #39.
364
- * The syntax of the source data is as described in UAX #39 for
365
- * these files, and the content of these files is acceptable input.
629
+ * The input corresponds to the Unicode data file confusables.txt
630
+ * as described in Unicode UAX #39. The syntax of the source data
631
+ * is as described in UAX #39 for this file, and the content of
632
+ * this file is acceptable input.
366
633
  *
367
634
  * The character encoding of the (char *) input text is UTF-8.
368
635
  *
@@ -371,10 +638,9 @@ uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLeng
371
638
  * @param confusablesLen The length of the confusables text, or -1 if the
372
639
  * input string is zero terminated.
373
640
  * @param confusablesWholeScript
374
- * a pointer to the whole script confusables definitions,
375
- * as found in the file confusablesWholeScript.txt from unicode.org.
376
- * @param confusablesWholeScriptLen The length of the whole script confusables text, or
377
- * -1 if the input string is zero terminated.
641
+ * Deprecated in ICU 58. No longer used.
642
+ * @param confusablesWholeScriptLen
643
+ * Deprecated in ICU 58. No longer used.
378
644
  * @param errType In the event of an error in the input, indicates
379
645
  * which of the input files contains the error.
380
646
  * The value is one of USPOOF_SINGLE_SCRIPT_CONFUSABLE or
@@ -435,8 +701,33 @@ uspoof_clone(const USpoofChecker *sc, UErrorCode *status);
435
701
 
436
702
 
437
703
  /**
438
- * Specify the set of checks that will be performed by the check
439
- * functions of this Spoof Checker.
704
+ * Specify the bitmask of checks that will be performed by {@link uspoof_check}. Calling this method
705
+ * overwrites any checks that may have already been enabled. By default, all checks are enabled.
706
+ *
707
+ * To enable specific checks and disable all others, the "whitelisted" checks should be ORed together. For
708
+ * example, to fail strings containing characters outside of the set specified by {@link uspoof_setAllowedChars} and
709
+ * also strings that contain digits from mixed numbering systems:
710
+ *
711
+ * <pre>
712
+ * {@code
713
+ * uspoof_setChecks(USPOOF_CHAR_LIMIT | USPOOF_MIXED_NUMBERS);
714
+ * }
715
+ * </pre>
716
+ *
717
+ * To disable specific checks and enable all others, the "blacklisted" checks should be ANDed away from
718
+ * ALL_CHECKS. For example, if you are not planning to use the {@link uspoof_areConfusable} functionality,
719
+ * it is good practice to disable the CONFUSABLE check:
720
+ *
721
+ * <pre>
722
+ * {@code
723
+ * uspoof_setChecks(USPOOF_ALL_CHECKS & ~USPOOF_CONFUSABLE);
724
+ * }
725
+ * </pre>
726
+ *
727
+ * Note that methods such as {@link uspoof_setAllowedChars}, {@link uspoof_setAllowedLocales}, and
728
+ * {@link uspoof_setRestrictionLevel} will enable certain checks when called. Those methods will OR the check they
729
+ * enable onto the existing bitmask specified by this method. For more details, see the documentation of those
730
+ * methods.
440
731
  *
441
732
  * @param sc The USpoofChecker
442
733
  * @param checks The set of checks that this spoof checker will perform.
@@ -451,7 +742,7 @@ uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status);
451
742
 
452
743
  /**
453
744
  * Get the set of checks that this Spoof Checker has been configured to perform.
454
- *
745
+ *
455
746
  * @param sc The USpoofChecker
456
747
  * @param status The error code, set if this function encounters a problem.
457
748
  * @return The set of checks that this spoof checker will perform.
@@ -464,19 +755,22 @@ U_STABLE int32_t U_EXPORT2
464
755
  uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status);
465
756
 
466
757
  /**
467
- * Set the loosest restriction level allowed. The default if this function
468
- * is not called is HIGHLY_RESTRICTIVE.
469
- * Calling this function also enables the RESTRICTION_LEVEL check.
470
- * @param restrictionLevel The loosest restriction level allowed.
471
- * @see URestrictionLevel
472
- * @stable ICU 51
473
- */
758
+ * Set the loosest restriction level allowed for strings. The default if this is not called is
759
+ * {@link USPOOF_HIGHLY_RESTRICTIVE}. Calling this method enables the {@link USPOOF_RESTRICTION_LEVEL} and
760
+ * {@link USPOOF_MIXED_NUMBERS} checks, corresponding to Sections 5.1 and 5.2 of UTS 39. To customize which checks are
761
+ * to be performed by {@link uspoof_check}, see {@link uspoof_setChecks}.
762
+ *
763
+ * @param sc The USpoofChecker
764
+ * @param restrictionLevel The loosest restriction level allowed.
765
+ * @see URestrictionLevel
766
+ * @stable ICU 51
767
+ */
474
768
  U_STABLE void U_EXPORT2
475
769
  uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel);
476
770
 
477
771
 
478
772
  /**
479
- * Get the Restriction Level that will be tested if the checks include RESTRICTION_LEVEL.
773
+ * Get the Restriction Level that will be tested if the checks include {@link USPOOF_RESTRICTION_LEVEL}.
480
774
  *
481
775
  * @return The restriction level
482
776
  * @see URestrictionLevel
@@ -486,7 +780,7 @@ U_STABLE URestrictionLevel U_EXPORT2
486
780
  uspoof_getRestrictionLevel(const USpoofChecker *sc);
487
781
 
488
782
  /**
489
- * Limit characters that are acceptable in identifiers being checked to those
783
+ * Limit characters that are acceptable in identifiers being checked to those
490
784
  * normally used with the languages associated with the specified locales.
491
785
  * Any previously specified list of locales is replaced by the new settings.
492
786
  *
@@ -499,7 +793,7 @@ uspoof_getRestrictionLevel(const USpoofChecker *sc);
499
793
  * Supplying an empty string removes all restrictions;
500
794
  * characters from any script will be allowed.
501
795
  *
502
- * The USPOOF_CHAR_LIMIT test is automatically enabled for this
796
+ * The {@link USPOOF_CHAR_LIMIT} test is automatically enabled for this
503
797
  * USpoofChecker when calling this function with a non-empty list
504
798
  * of locales.
505
799
  *
@@ -511,9 +805,9 @@ uspoof_getRestrictionLevel(const USpoofChecker *sc);
511
805
  * can be made to the result of uspoof_setAllowedLocales() by
512
806
  * fetching the resulting set with uspoof_getAllowedChars(),
513
807
  * manipulating it with the Unicode Set API, then resetting the
514
- * spoof detectors limits with uspoof_setAllowedChars()
808
+ * spoof detectors limits with uspoof_setAllowedChars().
515
809
  *
516
- * @param sc The USpoofChecker
810
+ * @param sc The USpoofChecker
517
811
  * @param localesList A list list of locales, from which the language
518
812
  * and associated script are extracted. The locales
519
813
  * are comma-separated if there is more than one.
@@ -537,18 +831,18 @@ uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode
537
831
  *
538
832
  * uspoof_setAllowedChars() will reset the list of allowed to be empty.
539
833
  *
540
- * The format of the returned list is the same as that supplied to
541
- * uspoof_setAllowedLocales(), but returned list may not be identical
542
- * to the originally specified string; the string may be reformatted,
834
+ * The format of the returned list is the same as that supplied to
835
+ * uspoof_setAllowedLocales(), but returned list may not be identical
836
+ * to the originally specified string; the string may be reformatted,
543
837
  * and information other than languages from
544
838
  * the originally specified locales may be omitted.
545
839
  *
546
- * @param sc The USpoofChecker
840
+ * @param sc The USpoofChecker
547
841
  * @param status The error code, set if this function encounters a problem.
548
842
  * @return A string containing a list of locales corresponding
549
843
  * to the acceptable scripts, formatted like an
550
844
  * HTTP Accept Language value.
551
- *
845
+ *
552
846
  * @stable ICU 4.2
553
847
  */
554
848
  U_STABLE const char * U_EXPORT2
@@ -564,7 +858,7 @@ uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status);
564
858
  * The USPOOF_CHAR_LIMIT test is automatically enabled for this
565
859
  * USpoofChecker by this function.
566
860
  *
567
- * @param sc The USpoofChecker
861
+ * @param sc The USpoofChecker
568
862
  * @param chars A Unicode Set containing the list of
569
863
  * characters that are permitted. Ownership of the set
570
864
  * remains with the caller. The incoming set is cloned by
@@ -591,7 +885,7 @@ uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status)
591
885
  * or if a new set of allowed characters is specified.
592
886
  *
593
887
  *
594
- * @param sc The USpoofChecker
888
+ * @param sc The USpoofChecker
595
889
  * @param status The error code, set if this function encounters a problem.
596
890
  * @return A USet containing the characters that are permitted by
597
891
  * the USPOOF_CHAR_LIMIT test.
@@ -611,7 +905,7 @@ uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status);
611
905
  * The USPOOF_CHAR_LIMIT test is automatically enabled for this
612
906
  * USoofChecker by this function.
613
907
  *
614
- * @param sc The USpoofChecker
908
+ * @param sc The USpoofChecker
615
909
  * @param chars A Unicode Set containing the list of
616
910
  * characters that are permitted. Ownership of the set
617
911
  * remains with the caller. The incoming set is cloned by
@@ -626,7 +920,7 @@ uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const icu::UnicodeSet *chars, UEr
626
920
 
627
921
  /**
628
922
  * Get a UnicodeSet for the characters permitted in an identifier.
629
- * This corresponds to the limits imposed by the Set Allowed Characters /
923
+ * This corresponds to the limits imposed by the Set Allowed Characters /
630
924
  * UnicodeSet functions. Limitations imposed by other checks will not be
631
925
  * reflected in the set returned by this function.
632
926
  *
@@ -638,7 +932,7 @@ uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const icu::UnicodeSet *chars, UEr
638
932
  * or if a new set of allowed characters is specified.
639
933
  *
640
934
  *
641
- * @param sc The USpoofChecker
935
+ * @param sc The USpoofChecker
642
936
  * @param status The error code, set if this function encounters a problem.
643
937
  * @return A UnicodeSet containing the characters that are permitted by
644
938
  * the USPOOF_CHAR_LIMIT test.
@@ -653,17 +947,22 @@ uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status);
653
947
  * Check the specified string for possible security issues.
654
948
  * The text to be checked will typically be an identifier of some sort.
655
949
  * The set of checks to be performed is specified with uspoof_setChecks().
656
- *
657
- * @param sc The USpoofChecker
950
+ *
951
+ * \note
952
+ * Consider using the newer API, {@link uspoof_check2}, instead.
953
+ * The newer API exposes additional information from the check procedure
954
+ * and is otherwise identical to this method.
955
+ *
956
+ * @param sc The USpoofChecker
658
957
  * @param id The identifier to be checked for possible security issues,
659
958
  * in UTF-16 format.
660
959
  * @param length the length of the string to be checked, expressed in
661
- * 16 bit UTF-16 code units, or -1 if the string is
960
+ * 16 bit UTF-16 code units, or -1 if the string is
662
961
  * zero terminated.
663
- * @param position An out parameter.
664
- * Originally, the index of the first string position that failed a check.
665
- * Now, always returns zero.
666
- * This parameter may be null.
962
+ * @param position Deprecated in ICU 51. Always returns zero.
963
+ * Originally, an out parameter for the index of the first
964
+ * string position that failed a check.
965
+ * This parameter may be NULL.
667
966
  * @param status The error code, set if an error occurred while attempting to
668
967
  * perform the check.
669
968
  * Spoofing or security issues detected with the input string are
@@ -673,11 +972,12 @@ uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status);
673
972
  * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
674
973
  * will be zero if the input string passes all of the
675
974
  * enabled checks.
975
+ * @see uspoof_check2
676
976
  * @stable ICU 4.2
677
977
  */
678
978
  U_STABLE int32_t U_EXPORT2
679
979
  uspoof_check(const USpoofChecker *sc,
680
- const UChar *id, int32_t length,
980
+ const UChar *id, int32_t length,
681
981
  int32_t *position,
682
982
  UErrorCode *status);
683
983
 
@@ -686,16 +986,20 @@ uspoof_check(const USpoofChecker *sc,
686
986
  * Check the specified string for possible security issues.
687
987
  * The text to be checked will typically be an identifier of some sort.
688
988
  * The set of checks to be performed is specified with uspoof_setChecks().
689
- *
690
- * @param sc The USpoofChecker
989
+ *
990
+ * \note
991
+ * Consider using the newer API, {@link uspoof_check2UTF8}, instead.
992
+ * The newer API exposes additional information from the check procedure
993
+ * and is otherwise identical to this method.
994
+ *
995
+ * @param sc The USpoofChecker
691
996
  * @param id A identifier to be checked for possible security issues, in UTF8 format.
692
- * @param length the length of the string to be checked, or -1 if the string is
997
+ * @param length the length of the string to be checked, or -1 if the string is
693
998
  * zero terminated.
694
- * @param position An out parameter.
695
- * Originally, the index of the first string position that failed a check.
696
- * Now, always returns zero.
697
- * This parameter may be null.
698
- * @deprecated ICU 51
999
+ * @param position Deprecated in ICU 51. Always returns zero.
1000
+ * Originally, an out parameter for the index of the first
1001
+ * string position that failed a check.
1002
+ * This parameter may be NULL.
699
1003
  * @param status The error code, set if an error occurred while attempting to
700
1004
  * perform the check.
701
1005
  * Spoofing or security issues detected with the input string are
@@ -707,6 +1011,7 @@ uspoof_check(const USpoofChecker *sc,
707
1011
  * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
708
1012
  * will be zero if the input string passes all of the
709
1013
  * enabled checks.
1014
+ * @see uspoof_check2UTF8
710
1015
  * @stable ICU 4.2
711
1016
  */
712
1017
  U_STABLE int32_t U_EXPORT2
@@ -721,14 +1026,18 @@ uspoof_checkUTF8(const USpoofChecker *sc,
721
1026
  * Check the specified string for possible security issues.
722
1027
  * The text to be checked will typically be an identifier of some sort.
723
1028
  * The set of checks to be performed is specified with uspoof_setChecks().
724
- *
725
- * @param sc The USpoofChecker
1029
+ *
1030
+ * \note
1031
+ * Consider using the newer API, {@link uspoof_check2UnicodeString}, instead.
1032
+ * The newer API exposes additional information from the check procedure
1033
+ * and is otherwise identical to this method.
1034
+ *
1035
+ * @param sc The USpoofChecker
726
1036
  * @param id A identifier to be checked for possible security issues.
727
- * @param position An out parameter.
728
- * Originally, the index of the first string position that failed a check.
729
- * Now, always returns zero.
730
- * This parameter may be null.
731
- * @deprecated ICU 51
1037
+ * @param position Deprecated in ICU 51. Always returns zero.
1038
+ * Originally, an out parameter for the index of the first
1039
+ * string position that failed a check.
1040
+ * This parameter may be NULL.
732
1041
  * @param status The error code, set if an error occurred while attempting to
733
1042
  * perform the check.
734
1043
  * Spoofing or security issues detected with the input string are
@@ -738,45 +1047,249 @@ uspoof_checkUTF8(const USpoofChecker *sc,
738
1047
  * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
739
1048
  * will be zero if the input string passes all of the
740
1049
  * enabled checks.
1050
+ * @see uspoof_check2UnicodeString
741
1051
  * @stable ICU 4.2
742
1052
  */
743
1053
  U_STABLE int32_t U_EXPORT2
744
1054
  uspoof_checkUnicodeString(const USpoofChecker *sc,
745
- const icu::UnicodeString &id,
1055
+ const icu::UnicodeString &id,
746
1056
  int32_t *position,
747
1057
  UErrorCode *status);
1058
+ #endif
1059
+
1060
+
1061
+ #ifndef U_HIDE_DRAFT_API
1062
+ /**
1063
+ * Check the specified string for possible security issues.
1064
+ * The text to be checked will typically be an identifier of some sort.
1065
+ * The set of checks to be performed is specified with uspoof_setChecks().
1066
+ *
1067
+ * @param sc The USpoofChecker
1068
+ * @param id The identifier to be checked for possible security issues,
1069
+ * in UTF-16 format.
1070
+ * @param length the length of the string to be checked, or -1 if the string is
1071
+ * zero terminated.
1072
+ * @param checkResult An instance of USpoofCheckResult to be filled with
1073
+ * details about the identifier. Can be NULL.
1074
+ * @param status The error code, set if an error occurred while attempting to
1075
+ * perform the check.
1076
+ * Spoofing or security issues detected with the input string are
1077
+ * not reported here, but through the function's return value.
1078
+ * @return An integer value with bits set for any potential security
1079
+ * or spoofing issues detected. The bits are defined by
1080
+ * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
1081
+ * will be zero if the input string passes all of the
1082
+ * enabled checks. Any information in this bitmask will be
1083
+ * consistent with the information saved in the optional
1084
+ * checkResult parameter.
1085
+ * @see uspoof_openCheckResult
1086
+ * @see uspoof_check2UTF8
1087
+ * @see uspoof_check2UnicodeString
1088
+ * @draft ICU 58
1089
+ */
1090
+ U_DRAFT int32_t U_EXPORT2
1091
+ uspoof_check2(const USpoofChecker *sc,
1092
+ const UChar* id, int32_t length,
1093
+ USpoofCheckResult* checkResult,
1094
+ UErrorCode *status);
748
1095
 
1096
+ /**
1097
+ * Check the specified string for possible security issues.
1098
+ * The text to be checked will typically be an identifier of some sort.
1099
+ * The set of checks to be performed is specified with uspoof_setChecks().
1100
+ *
1101
+ * This version of {@link uspoof_check} accepts a USpoofCheckResult, which
1102
+ * returns additional information about the identifier. For more
1103
+ * information, see {@link uspoof_openCheckResult}.
1104
+ *
1105
+ * @param sc The USpoofChecker
1106
+ * @param id A identifier to be checked for possible security issues, in UTF8 format.
1107
+ * @param length the length of the string to be checked, or -1 if the string is
1108
+ * zero terminated.
1109
+ * @param checkResult An instance of USpoofCheckResult to be filled with
1110
+ * details about the identifier. Can be NULL.
1111
+ * @param status The error code, set if an error occurred while attempting to
1112
+ * perform the check.
1113
+ * Spoofing or security issues detected with the input string are
1114
+ * not reported here, but through the function's return value.
1115
+ * @return An integer value with bits set for any potential security
1116
+ * or spoofing issues detected. The bits are defined by
1117
+ * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
1118
+ * will be zero if the input string passes all of the
1119
+ * enabled checks. Any information in this bitmask will be
1120
+ * consistent with the information saved in the optional
1121
+ * checkResult parameter.
1122
+ * @see uspoof_openCheckResult
1123
+ * @see uspoof_check2
1124
+ * @see uspoof_check2UnicodeString
1125
+ * @draft ICU 58
1126
+ */
1127
+ U_DRAFT int32_t U_EXPORT2
1128
+ uspoof_check2UTF8(const USpoofChecker *sc,
1129
+ const char *id, int32_t length,
1130
+ USpoofCheckResult* checkResult,
1131
+ UErrorCode *status);
1132
+
1133
+ #if U_SHOW_CPLUSPLUS_API
1134
+ /**
1135
+ * Check the specified string for possible security issues.
1136
+ * The text to be checked will typically be an identifier of some sort.
1137
+ * The set of checks to be performed is specified with uspoof_setChecks().
1138
+ *
1139
+ * @param sc The USpoofChecker
1140
+ * @param id A identifier to be checked for possible security issues.
1141
+ * @param checkResult An instance of USpoofCheckResult to be filled with
1142
+ * details about the identifier. Can be NULL.
1143
+ * @param status The error code, set if an error occurred while attempting to
1144
+ * perform the check.
1145
+ * Spoofing or security issues detected with the input string are
1146
+ * not reported here, but through the function's return value.
1147
+ * @return An integer value with bits set for any potential security
1148
+ * or spoofing issues detected. The bits are defined by
1149
+ * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
1150
+ * will be zero if the input string passes all of the
1151
+ * enabled checks. Any information in this bitmask will be
1152
+ * consistent with the information saved in the optional
1153
+ * checkResult parameter.
1154
+ * @see uspoof_openCheckResult
1155
+ * @see uspoof_check2
1156
+ * @see uspoof_check2UTF8
1157
+ * @draft ICU 58
1158
+ */
1159
+ U_DRAFT int32_t U_EXPORT2
1160
+ uspoof_check2UnicodeString(const USpoofChecker *sc,
1161
+ const icu::UnicodeString &id,
1162
+ USpoofCheckResult* checkResult,
1163
+ UErrorCode *status);
749
1164
  #endif
750
1165
 
1166
+ /**
1167
+ * Create a USpoofCheckResult, used by the {@link uspoof_check2} class of functions to return
1168
+ * information about the identifier. Information includes:
1169
+ * <ul>
1170
+ * <li>A bitmask of the checks that failed</li>
1171
+ * <li>The identifier's restriction level (UTS 39 section 5.2)</li>
1172
+ * <li>The set of numerics in the string (UTS 39 section 5.3)</li>
1173
+ * </ul>
1174
+ * The data held in a USpoofCheckResult is cleared whenever it is passed into a new call
1175
+ * of {@link uspoof_check2}.
1176
+ *
1177
+ * @param status The error code, set if this function encounters a problem.
1178
+ * @return the newly created USpoofCheckResult
1179
+ * @see uspoof_check2
1180
+ * @see uspoof_check2UTF8
1181
+ * @see uspoof_check2UnicodeString
1182
+ * @draft ICU 58
1183
+ */
1184
+ U_DRAFT USpoofCheckResult* U_EXPORT2
1185
+ uspoof_openCheckResult(UErrorCode *status);
1186
+
1187
+ /**
1188
+ * Close a USpoofCheckResult, freeing any memory that was being held by
1189
+ * its implementation.
1190
+ *
1191
+ * @param checkResult The instance of USpoofCheckResult to close
1192
+ * @draft ICU 58
1193
+ */
1194
+ U_DRAFT void U_EXPORT2
1195
+ uspoof_closeCheckResult(USpoofCheckResult *checkResult);
1196
+
1197
+ #if U_SHOW_CPLUSPLUS_API
1198
+
1199
+ U_NAMESPACE_BEGIN
1200
+
1201
+ /**
1202
+ * \class LocalUSpoofCheckResultPointer
1203
+ * "Smart pointer" class, closes a USpoofCheckResult via {@link uspoof_closeCheckResult}.
1204
+ * For most methods see the LocalPointerBase base class.
1205
+ *
1206
+ * @see LocalPointerBase
1207
+ * @see LocalPointer
1208
+ * @draft ICU 58
1209
+ */
1210
+ U_DEFINE_LOCAL_OPEN_POINTER(LocalUSpoofCheckResultPointer, USpoofCheckResult, uspoof_closeCheckResult);
1211
+
1212
+ U_NAMESPACE_END
1213
+
1214
+ #endif
1215
+
1216
+ /**
1217
+ * Indicates which of the spoof check(s) have failed. The value is a bitwise OR of the constants for the tests
1218
+ * in question: USPOOF_RESTRICTION_LEVEL, USPOOF_CHAR_LIMIT, and so on.
1219
+ *
1220
+ * @param checkResult The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
1221
+ * @param status The error code, set if an error occurred.
1222
+ * @return An integer value with bits set for any potential security
1223
+ * or spoofing issues detected. The bits are defined by
1224
+ * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
1225
+ * will be zero if the input string passes all of the
1226
+ * enabled checks.
1227
+ * @see uspoof_setChecks
1228
+ * @draft ICU 58
1229
+ */
1230
+ U_DRAFT int32_t U_EXPORT2
1231
+ uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status);
1232
+
1233
+ /**
1234
+ * Gets the restriction level that the text meets, if the USPOOF_RESTRICTION_LEVEL check
1235
+ * was enabled; otherwise, undefined.
1236
+ *
1237
+ * @param checkResult The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
1238
+ * @param status The error code, set if an error occurred.
1239
+ * @return The restriction level contained in the USpoofCheckResult
1240
+ * @see uspoof_setRestrictionLevel
1241
+ * @draft ICU 58
1242
+ */
1243
+ U_DRAFT URestrictionLevel U_EXPORT2
1244
+ uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status);
1245
+
1246
+ /**
1247
+ * Gets the set of numerics found in the string, if the USPOOF_MIXED_NUMBERS check was enabled;
1248
+ * otherwise, undefined. The set will contain the zero digit from each decimal number system found
1249
+ * in the input string. Ownership of the returned USet remains with the USpoofCheckResult.
1250
+ * The USet will be free'd when {@link uspoof_closeCheckResult} is called.
1251
+ *
1252
+ * @param checkResult The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
1253
+ * @return The set of numerics contained in the USpoofCheckResult
1254
+ * @param status The error code, set if an error occurred.
1255
+ * @draft ICU 58
1256
+ */
1257
+ U_DRAFT const USet* U_EXPORT2
1258
+ uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status);
1259
+ #endif /* U_HIDE_DRAFT_API */
1260
+
751
1261
 
752
1262
  /**
753
1263
  * Check the whether two specified strings are visually confusable.
754
- * The types of confusability to be tested - single script, mixed script,
755
- * or whole script - are determined by the check options set for the
756
- * USpoofChecker.
757
- *
758
- * The tests to be performed are controlled by the flags
759
- * USPOOF_SINGLE_SCRIPT_CONFUSABLE
760
- * USPOOF_MIXED_SCRIPT_CONFUSABLE
761
- * USPOOF_WHOLE_SCRIPT_CONFUSABLE
762
- * At least one of these tests must be selected.
763
- *
764
- * USPOOF_ANY_CASE is a modifier for the tests. Select it if the identifiers
765
- * may be of mixed case.
766
- * If identifiers are case folded for comparison and
767
- * display to the user, do not select the USPOOF_ANY_CASE option.
1264
+ *
1265
+ * If the strings are confusable, the return value will be nonzero, as long as
1266
+ * {@link USPOOF_CONFUSABLE} was enabled in uspoof_setChecks().
1267
+ *
1268
+ * The bits in the return value correspond to flags for each of the classes of
1269
+ * confusables applicable to the two input strings. According to UTS 39
1270
+ * section 4, the possible flags are:
1271
+ *
1272
+ * <ul>
1273
+ * <li>{@link USPOOF_SINGLE_SCRIPT_CONFUSABLE}</li>
1274
+ * <li>{@link USPOOF_MIXED_SCRIPT_CONFUSABLE}</li>
1275
+ * <li>{@link USPOOF_WHOLE_SCRIPT_CONFUSABLE}</li>
1276
+ * </ul>
1277
+ *
1278
+ * If one or more of the above flags were not listed in uspoof_setChecks(), this
1279
+ * function will never report that class of confusable. The check
1280
+ * {@link USPOOF_CONFUSABLE} enables all three flags.
768
1281
  *
769
1282
  *
770
1283
  * @param sc The USpoofChecker
771
- * @param id1 The first of the two identifiers to be compared for
1284
+ * @param id1 The first of the two identifiers to be compared for
772
1285
  * confusability. The strings are in UTF-16 format.
773
1286
  * @param length1 the length of the first identifer, expressed in
774
- * 16 bit UTF-16 code units, or -1 if the string is
1287
+ * 16 bit UTF-16 code units, or -1 if the string is
775
1288
  * nul terminated.
776
- * @param id2 The second of the two identifiers to be compared for
1289
+ * @param id2 The second of the two identifiers to be compared for
777
1290
  * confusability. The identifiers are in UTF-16 format.
778
1291
  * @param length2 The length of the second identifiers, expressed in
779
- * 16 bit UTF-16 code units, or -1 if the string is
1292
+ * 16 bit UTF-16 code units, or -1 if the string is
780
1293
  * nul terminated.
781
1294
  * @param status The error code, set if an error occurred while attempting to
782
1295
  * perform the check.
@@ -786,6 +1299,7 @@ uspoof_checkUnicodeString(const USpoofChecker *sc,
786
1299
  * the type of confusability found, as defined by
787
1300
  * enum USpoofChecks. Zero is returned if the identifiers
788
1301
  * are not confusable.
1302
+ *
789
1303
  * @stable ICU 4.2
790
1304
  */
791
1305
  U_STABLE int32_t U_EXPORT2
@@ -797,19 +1311,16 @@ uspoof_areConfusable(const USpoofChecker *sc,
797
1311
 
798
1312
 
799
1313
  /**
800
- * Check the whether two specified strings are visually confusable.
801
- * The types of confusability to be tested - single script, mixed script,
802
- * or whole script - are determined by the check options set for the
803
- * USpoofChecker.
1314
+ * A version of {@link uspoof_areConfusable} accepting strings in UTF-8 format.
804
1315
  *
805
1316
  * @param sc The USpoofChecker
806
- * @param id1 The first of the two identifiers to be compared for
1317
+ * @param id1 The first of the two identifiers to be compared for
807
1318
  * confusability. The strings are in UTF-8 format.
808
- * @param length1 the length of the first identifiers, in bytes, or -1
1319
+ * @param length1 the length of the first identifiers, in bytes, or -1
809
1320
  * if the string is nul terminated.
810
- * @param id2 The second of the two identifiers to be compared for
1321
+ * @param id2 The second of the two identifiers to be compared for
811
1322
  * confusability. The strings are in UTF-8 format.
812
- * @param length2 The length of the second string in bytes, or -1
1323
+ * @param length2 The length of the second string in bytes, or -1
813
1324
  * if the string is nul terminated.
814
1325
  * @param status The error code, set if an error occurred while attempting to
815
1326
  * perform the check.
@@ -819,7 +1330,10 @@ uspoof_areConfusable(const USpoofChecker *sc,
819
1330
  * the type of confusability found, as defined by
820
1331
  * enum USpoofChecks. Zero is returned if the strings
821
1332
  * are not confusable.
1333
+ *
822
1334
  * @stable ICU 4.2
1335
+ *
1336
+ * @see uspoof_areConfusable
823
1337
  */
824
1338
  U_STABLE int32_t U_EXPORT2
825
1339
  uspoof_areConfusableUTF8(const USpoofChecker *sc,
@@ -832,15 +1346,12 @@ uspoof_areConfusableUTF8(const USpoofChecker *sc,
832
1346
 
833
1347
  #if U_SHOW_CPLUSPLUS_API
834
1348
  /**
835
- * Check the whether two specified strings are visually confusable.
836
- * The types of confusability to be tested - single script, mixed script,
837
- * or whole script - are determined by the check options set for the
838
- * USpoofChecker.
1349
+ * A version of {@link uspoof_areConfusable} accepting UnicodeStrings.
839
1350
  *
840
1351
  * @param sc The USpoofChecker
841
- * @param s1 The first of the two identifiers to be compared for
1352
+ * @param s1 The first of the two identifiers to be compared for
842
1353
  * confusability. The strings are in UTF-8 format.
843
- * @param s2 The second of the two identifiers to be compared for
1354
+ * @param s2 The second of the two identifiers to be compared for
844
1355
  * confusability. The strings are in UTF-8 format.
845
1356
  * @param status The error code, set if an error occurred while attempting to
846
1357
  * perform the check.
@@ -850,7 +1361,10 @@ uspoof_areConfusableUTF8(const USpoofChecker *sc,
850
1361
  * the type of confusability found, as defined by
851
1362
  * enum USpoofChecks. Zero is returned if the identifiers
852
1363
  * are not confusable.
1364
+ *
853
1365
  * @stable ICU 4.2
1366
+ *
1367
+ * @see uspoof_areConfusable
854
1368
  */
855
1369
  U_STABLE int32_t U_EXPORT2
856
1370
  uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
@@ -861,112 +1375,107 @@ uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
861
1375
 
862
1376
 
863
1377
  /**
864
- * Get the "skeleton" for an identifier.
865
- * Skeletons are a transformation of the input identifier;
866
- * Two identifiers are confusable if their skeletons are identical.
867
- * See Unicode UAX #39 for additional information.
868
- *
869
- * Using skeletons directly makes it possible to quickly check
870
- * whether an identifier is confusable with any of some large
871
- * set of existing identifiers, by creating an efficiently
872
- * searchable collection of the skeletons.
873
- *
874
- * @param sc The USpoofChecker
875
- * @param type The type of skeleton, corresponding to which
876
- * of the Unicode confusable data tables to use.
877
- * The default is Mixed-Script, Lowercase.
878
- * Allowed options are USPOOF_SINGLE_SCRIPT_CONFUSABLE and
879
- * USPOOF_ANY_CASE. The two flags may be ORed.
880
- * @param id The input identifier whose skeleton will be computed.
881
- * @param length The length of the input identifier, expressed in 16 bit
882
- * UTF-16 code units, or -1 if the string is zero terminated.
883
- * @param dest The output buffer, to receive the skeleton string.
884
- * @param destCapacity The length of the output buffer, in 16 bit units.
885
- * The destCapacity may be zero, in which case the function will
886
- * return the actual length of the skeleton.
887
- * @param status The error code, set if an error occurred while attempting to
888
- * perform the check.
889
- * @return The length of the skeleton string. The returned length
890
- * is always that of the complete skeleton, even when the
891
- * supplied buffer is too small (or of zero length)
892
- *
893
- * @stable ICU 4.2
894
- */
1378
+ * Get the "skeleton" for an identifier.
1379
+ * Skeletons are a transformation of the input identifier;
1380
+ * Two identifiers are confusable if their skeletons are identical.
1381
+ * See Unicode UAX #39 for additional information.
1382
+ *
1383
+ * Using skeletons directly makes it possible to quickly check
1384
+ * whether an identifier is confusable with any of some large
1385
+ * set of existing identifiers, by creating an efficiently
1386
+ * searchable collection of the skeletons.
1387
+ *
1388
+ * @param sc The USpoofChecker
1389
+ * @param type Deprecated in ICU 58. You may pass any number.
1390
+ * Originally, controlled which of the Unicode confusable data
1391
+ * tables to use.
1392
+ * @param id The input identifier whose skeleton will be computed.
1393
+ * @param length The length of the input identifier, expressed in 16 bit
1394
+ * UTF-16 code units, or -1 if the string is zero terminated.
1395
+ * @param dest The output buffer, to receive the skeleton string.
1396
+ * @param destCapacity The length of the output buffer, in 16 bit units.
1397
+ * The destCapacity may be zero, in which case the function will
1398
+ * return the actual length of the skeleton.
1399
+ * @param status The error code, set if an error occurred while attempting to
1400
+ * perform the check.
1401
+ * @return The length of the skeleton string. The returned length
1402
+ * is always that of the complete skeleton, even when the
1403
+ * supplied buffer is too small (or of zero length)
1404
+ *
1405
+ * @stable ICU 4.2
1406
+ * @see uspoof_areConfusable
1407
+ */
895
1408
  U_STABLE int32_t U_EXPORT2
896
1409
  uspoof_getSkeleton(const USpoofChecker *sc,
897
1410
  uint32_t type,
898
1411
  const UChar *id, int32_t length,
899
1412
  UChar *dest, int32_t destCapacity,
900
1413
  UErrorCode *status);
901
-
1414
+
902
1415
  /**
903
- * Get the "skeleton" for an identifier.
904
- * Skeletons are a transformation of the input identifier;
905
- * Two identifiers are confusable if their skeletons are identical.
906
- * See Unicode UAX #39 for additional information.
907
- *
908
- * Using skeletons directly makes it possible to quickly check
909
- * whether an identifier is confusable with any of some large
910
- * set of existing identifiers, by creating an efficiently
911
- * searchable collection of the skeletons.
912
- *
913
- * @param sc The USpoofChecker
914
- * @param type The type of skeleton, corresponding to which
915
- * of the Unicode confusable data tables to use.
916
- * The default is Mixed-Script, Lowercase.
917
- * Allowed options are USPOOF_SINGLE_SCRIPT_CONFUSABLE and
918
- * USPOOF_ANY_CASE. The two flags may be ORed.
919
- * @param id The UTF-8 format identifier whose skeleton will be computed.
920
- * @param length The length of the input string, in bytes,
921
- * or -1 if the string is zero terminated.
922
- * @param dest The output buffer, to receive the skeleton string.
923
- * @param destCapacity The length of the output buffer, in bytes.
924
- * The destCapacity may be zero, in which case the function will
925
- * return the actual length of the skeleton.
926
- * @param status The error code, set if an error occurred while attempting to
927
- * perform the check. Possible Errors include U_INVALID_CHAR_FOUND
928
- * for invalid UTF-8 sequences, and
929
- * U_BUFFER_OVERFLOW_ERROR if the destination buffer is too small
930
- * to hold the complete skeleton.
931
- * @return The length of the skeleton string, in bytes. The returned length
932
- * is always that of the complete skeleton, even when the
933
- * supplied buffer is too small (or of zero length)
934
- *
935
- * @stable ICU 4.2
936
- */
1416
+ * Get the "skeleton" for an identifier.
1417
+ * Skeletons are a transformation of the input identifier;
1418
+ * Two identifiers are confusable if their skeletons are identical.
1419
+ * See Unicode UAX #39 for additional information.
1420
+ *
1421
+ * Using skeletons directly makes it possible to quickly check
1422
+ * whether an identifier is confusable with any of some large
1423
+ * set of existing identifiers, by creating an efficiently
1424
+ * searchable collection of the skeletons.
1425
+ *
1426
+ * @param sc The USpoofChecker
1427
+ * @param type Deprecated in ICU 58. You may pass any number.
1428
+ * Originally, controlled which of the Unicode confusable data
1429
+ * tables to use.
1430
+ * @param id The UTF-8 format identifier whose skeleton will be computed.
1431
+ * @param length The length of the input string, in bytes,
1432
+ * or -1 if the string is zero terminated.
1433
+ * @param dest The output buffer, to receive the skeleton string.
1434
+ * @param destCapacity The length of the output buffer, in bytes.
1435
+ * The destCapacity may be zero, in which case the function will
1436
+ * return the actual length of the skeleton.
1437
+ * @param status The error code, set if an error occurred while attempting to
1438
+ * perform the check. Possible Errors include U_INVALID_CHAR_FOUND
1439
+ * for invalid UTF-8 sequences, and
1440
+ * U_BUFFER_OVERFLOW_ERROR if the destination buffer is too small
1441
+ * to hold the complete skeleton.
1442
+ * @return The length of the skeleton string, in bytes. The returned length
1443
+ * is always that of the complete skeleton, even when the
1444
+ * supplied buffer is too small (or of zero length)
1445
+ *
1446
+ * @stable ICU 4.2
1447
+ */
937
1448
  U_STABLE int32_t U_EXPORT2
938
1449
  uspoof_getSkeletonUTF8(const USpoofChecker *sc,
939
1450
  uint32_t type,
940
1451
  const char *id, int32_t length,
941
1452
  char *dest, int32_t destCapacity,
942
1453
  UErrorCode *status);
943
-
1454
+
944
1455
  #if U_SHOW_CPLUSPLUS_API
945
1456
  /**
946
- * Get the "skeleton" for an identifier.
947
- * Skeletons are a transformation of the input identifier;
948
- * Two identifiers are confusable if their skeletons are identical.
949
- * See Unicode UAX #39 for additional information.
950
- *
951
- * Using skeletons directly makes it possible to quickly check
952
- * whether an identifier is confusable with any of some large
953
- * set of existing identifiers, by creating an efficiently
954
- * searchable collection of the skeletons.
955
- *
956
- * @param sc The USpoofChecker.
957
- * @param type The type of skeleton, corresponding to which
958
- * of the Unicode confusable data tables to use.
959
- * The default is Mixed-Script, Lowercase.
960
- * Allowed options are USPOOF_SINGLE_SCRIPT_CONFUSABLE and
961
- * USPOOF_ANY_CASE. The two flags may be ORed.
962
- * @param id The input identifier whose skeleton will be computed.
963
- * @param dest The output identifier, to receive the skeleton string.
964
- * @param status The error code, set if an error occurred while attempting to
965
- * perform the check.
966
- * @return A reference to the destination (skeleton) string.
967
- *
968
- * @stable ICU 4.2
969
- */
1457
+ * Get the "skeleton" for an identifier.
1458
+ * Skeletons are a transformation of the input identifier;
1459
+ * Two identifiers are confusable if their skeletons are identical.
1460
+ * See Unicode UAX #39 for additional information.
1461
+ *
1462
+ * Using skeletons directly makes it possible to quickly check
1463
+ * whether an identifier is confusable with any of some large
1464
+ * set of existing identifiers, by creating an efficiently
1465
+ * searchable collection of the skeletons.
1466
+ *
1467
+ * @param sc The USpoofChecker.
1468
+ * @param type Deprecated in ICU 58. You may pass any number.
1469
+ * Originally, controlled which of the Unicode confusable data
1470
+ * tables to use.
1471
+ * @param id The input identifier whose skeleton will be computed.
1472
+ * @param dest The output identifier, to receive the skeleton string.
1473
+ * @param status The error code, set if an error occurred while attempting to
1474
+ * perform the check.
1475
+ * @return A reference to the destination (skeleton) string.
1476
+ *
1477
+ * @stable ICU 4.2
1478
+ */
970
1479
  U_I18N_API icu::UnicodeString & U_EXPORT2
971
1480
  uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
972
1481
  uint32_t type,
@@ -977,7 +1486,8 @@ uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
977
1486
 
978
1487
  /**
979
1488
  * Get the set of Candidate Characters for Inclusion in Identifiers, as defined
980
- * in Unicode UAX #31, http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Inclusion_in_Identifiers
1489
+ * in http://unicode.org/Public/security/latest/xidmodifications.txt
1490
+ * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
981
1491
  *
982
1492
  * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
983
1493
  * be deleted by the caller.
@@ -991,7 +1501,8 @@ uspoof_getInclusionSet(UErrorCode *status);
991
1501
 
992
1502
  /**
993
1503
  * Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined
994
- * in Unicode UAX #31, http://www.unicode.org/reports/tr31/#Table_Recommended_Scripts
1504
+ * in http://unicode.org/Public/security/latest/xidmodifications.txt
1505
+ * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
995
1506
  *
996
1507
  * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
997
1508
  * be deleted by the caller.
@@ -1007,7 +1518,8 @@ uspoof_getRecommendedSet(UErrorCode *status);
1007
1518
 
1008
1519
  /**
1009
1520
  * Get the set of Candidate Characters for Inclusion in Identifiers, as defined
1010
- * in Unicode UAX #31, http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Inclusion_in_Identifiers
1521
+ * in http://unicode.org/Public/security/latest/xidmodifications.txt
1522
+ * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
1011
1523
  *
1012
1524
  * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
1013
1525
  * be deleted by the caller.
@@ -1021,7 +1533,8 @@ uspoof_getInclusionUnicodeSet(UErrorCode *status);
1021
1533
 
1022
1534
  /**
1023
1535
  * Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined
1024
- * in Unicode UAX #31, http://www.unicode.org/reports/tr31/#Table_Recommended_Scripts
1536
+ * in http://unicode.org/Public/security/latest/xidmodifications.txt
1537
+ * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
1025
1538
  *
1026
1539
  * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
1027
1540
  * be deleted by the caller.
@@ -1041,7 +1554,7 @@ uspoof_getRecommendedUnicodeSet(UErrorCode *status);
1041
1554
  * instantiate a new Spoof Detector.
1042
1555
  *
1043
1556
  * The serialized spoof checker includes only the data compiled from the
1044
- * Unicode data tables by uspoof_openFromSource(); it does not include
1557
+ * Unicode data tables by uspoof_openFromSource(); it does not include
1045
1558
  * include any other state or configuration that may have been set.
1046
1559
  *
1047
1560
  * @param sc the Spoof Detector whose data is to be serialized.