pango 3.1.0-x64-mingw32 → 3.1.1-x64-mingw32

Sign up to get free protection for your applications and to get access to all the features.
Files changed (291) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +9 -4
  3. data/ext/pango/rbpangolayoutline.c +6 -2
  4. data/lib/2.2/pango.so +0 -0
  5. data/lib/2.3/pango.so +0 -0
  6. data/lib/2.4/pango.so +0 -0
  7. data/sample/layout.rb +1 -1
  8. data/vendor/local/bin/derb.exe +0 -0
  9. data/vendor/local/bin/genbrk.exe +0 -0
  10. data/vendor/local/bin/genccode.exe +0 -0
  11. data/vendor/local/bin/gencfu.exe +0 -0
  12. data/vendor/local/bin/gencmn.exe +0 -0
  13. data/vendor/local/bin/gencnval.exe +0 -0
  14. data/vendor/local/bin/gendict.exe +0 -0
  15. data/vendor/local/bin/gennorm2.exe +0 -0
  16. data/vendor/local/bin/genrb.exe +0 -0
  17. data/vendor/local/bin/gensprep.exe +0 -0
  18. data/vendor/local/bin/hb-ot-shape-closure.exe +0 -0
  19. data/vendor/local/bin/hb-shape.exe +0 -0
  20. data/vendor/local/bin/hb-view.exe +0 -0
  21. data/vendor/local/bin/icu-config +16 -11
  22. data/vendor/local/bin/icuinfo.exe +0 -0
  23. data/vendor/local/bin/icupkg.exe +0 -0
  24. data/vendor/local/bin/libharfbuzz-0.dll +0 -0
  25. data/vendor/local/bin/libpango-1.0-0.dll +0 -0
  26. data/vendor/local/bin/libpangocairo-1.0-0.dll +0 -0
  27. data/vendor/local/bin/libpangoft2-1.0-0.dll +0 -0
  28. data/vendor/local/bin/libpangowin32-1.0-0.dll +0 -0
  29. data/vendor/local/bin/makeconv.exe +0 -0
  30. data/vendor/local/bin/pango-view.exe +0 -0
  31. data/vendor/local/bin/pkgdata.exe +0 -0
  32. data/vendor/local/bin/uconv.exe +0 -0
  33. data/vendor/local/include/harfbuzz/hb-font.h +5 -0
  34. data/vendor/local/include/harfbuzz/hb-glib.h +2 -1
  35. data/vendor/local/include/harfbuzz/hb-ot-layout.h +19 -0
  36. data/vendor/local/include/harfbuzz/hb-shape-plan.h +19 -0
  37. data/vendor/local/include/harfbuzz/hb-version.h +3 -3
  38. data/vendor/local/include/unicode/alphaindex.h +2 -0
  39. data/vendor/local/include/unicode/appendable.h +2 -0
  40. data/vendor/local/include/unicode/basictz.h +2 -0
  41. data/vendor/local/include/unicode/brkiter.h +7 -0
  42. data/vendor/local/include/unicode/bytestream.h +2 -0
  43. data/vendor/local/include/unicode/bytestrie.h +3 -2
  44. data/vendor/local/include/unicode/bytestriebuilder.h +4 -4
  45. data/vendor/local/include/unicode/calendar.h +2 -0
  46. data/vendor/local/include/unicode/caniter.h +2 -0
  47. data/vendor/local/include/unicode/chariter.h +2 -0
  48. data/vendor/local/include/unicode/choicfmt.h +2 -0
  49. data/vendor/local/include/unicode/coleitr.h +2 -0
  50. data/vendor/local/include/unicode/coll.h +2 -0
  51. data/vendor/local/include/unicode/compactdecimalformat.h +4 -3
  52. data/vendor/local/include/unicode/curramt.h +2 -0
  53. data/vendor/local/include/unicode/currpinf.h +2 -0
  54. data/vendor/local/include/unicode/currunit.h +2 -0
  55. data/vendor/local/include/unicode/datefmt.h +2 -0
  56. data/vendor/local/include/unicode/dbbi.h +2 -0
  57. data/vendor/local/include/unicode/dcfmtsym.h +2 -0
  58. data/vendor/local/include/unicode/decimfmt.h +9 -5
  59. data/vendor/local/include/unicode/docmain.h +3 -1
  60. data/vendor/local/include/unicode/dtfmtsym.h +26 -14
  61. data/vendor/local/include/unicode/dtintrv.h +2 -0
  62. data/vendor/local/include/unicode/dtitvfmt.h +2 -0
  63. data/vendor/local/include/unicode/dtitvinf.h +8 -3
  64. data/vendor/local/include/unicode/dtptngen.h +13 -9
  65. data/vendor/local/include/unicode/dtrule.h +2 -0
  66. data/vendor/local/include/unicode/enumset.h +2 -0
  67. data/vendor/local/include/unicode/errorcode.h +2 -0
  68. data/vendor/local/include/unicode/fieldpos.h +5 -2
  69. data/vendor/local/include/unicode/filteredbrk.h +10 -12
  70. data/vendor/local/include/unicode/fmtable.h +4 -2
  71. data/vendor/local/include/unicode/format.h +2 -0
  72. data/vendor/local/include/unicode/fpositer.h +2 -0
  73. data/vendor/local/include/unicode/gender.h +2 -0
  74. data/vendor/local/include/unicode/gregocal.h +2 -0
  75. data/vendor/local/include/unicode/icudataver.h +2 -0
  76. data/vendor/local/include/unicode/icuplug.h +12 -2
  77. data/vendor/local/include/unicode/idna.h +6 -4
  78. data/vendor/local/include/unicode/listformatter.h +2 -0
  79. data/vendor/local/include/unicode/localpointer.h +19 -28
  80. data/vendor/local/include/unicode/locdspnm.h +2 -0
  81. data/vendor/local/include/unicode/locid.h +2 -0
  82. data/vendor/local/include/unicode/measfmt.h +21 -2
  83. data/vendor/local/include/unicode/measunit.h +50 -24
  84. data/vendor/local/include/unicode/measure.h +2 -0
  85. data/vendor/local/include/unicode/messagepattern.h +2 -0
  86. data/vendor/local/include/unicode/msgfmt.h +2 -0
  87. data/vendor/local/include/unicode/normalizer2.h +2 -0
  88. data/vendor/local/include/unicode/normlzr.h +4 -4
  89. data/vendor/local/include/unicode/numfmt.h +8 -6
  90. data/vendor/local/include/unicode/numsys.h +2 -0
  91. data/vendor/local/include/unicode/parseerr.h +2 -0
  92. data/vendor/local/include/unicode/parsepos.h +2 -0
  93. data/vendor/local/include/unicode/platform.h +8 -0
  94. data/vendor/local/include/unicode/plurfmt.h +2 -0
  95. data/vendor/local/include/unicode/plurrule.h +2 -0
  96. data/vendor/local/include/unicode/ptypes.h +2 -0
  97. data/vendor/local/include/unicode/putil.h +2 -0
  98. data/vendor/local/include/unicode/rbbi.h +18 -66
  99. data/vendor/local/include/unicode/rbnf.h +8 -0
  100. data/vendor/local/include/unicode/rbtz.h +2 -0
  101. data/vendor/local/include/unicode/regex.h +2 -0
  102. data/vendor/local/include/unicode/region.h +3 -1
  103. data/vendor/local/include/unicode/reldatefmt.h +19 -9
  104. data/vendor/local/include/unicode/rep.h +2 -0
  105. data/vendor/local/include/unicode/resbund.h +2 -0
  106. data/vendor/local/include/unicode/schriter.h +2 -0
  107. data/vendor/local/include/unicode/scientificnumberformatter.h +2 -0
  108. data/vendor/local/include/unicode/search.h +2 -0
  109. data/vendor/local/include/unicode/selfmt.h +2 -0
  110. data/vendor/local/include/unicode/simpleformatter.h +2 -0
  111. data/vendor/local/include/unicode/simpletz.h +2 -0
  112. data/vendor/local/include/unicode/smpdtfmt.h +9 -1
  113. data/vendor/local/include/unicode/sortkey.h +2 -0
  114. data/vendor/local/include/unicode/std_string.h +2 -0
  115. data/vendor/local/include/unicode/strenum.h +2 -0
  116. data/vendor/local/include/unicode/stringpiece.h +5 -3
  117. data/vendor/local/include/unicode/stringtriebuilder.h +13 -1
  118. data/vendor/local/include/unicode/stsearch.h +2 -0
  119. data/vendor/local/include/unicode/symtable.h +2 -0
  120. data/vendor/local/include/unicode/tblcoll.h +3 -1
  121. data/vendor/local/include/unicode/timezone.h +2 -0
  122. data/vendor/local/include/unicode/tmunit.h +8 -0
  123. data/vendor/local/include/unicode/tmutamt.h +2 -0
  124. data/vendor/local/include/unicode/tmutfmt.h +5 -0
  125. data/vendor/local/include/unicode/translit.h +2 -0
  126. data/vendor/local/include/unicode/tzfmt.h +2 -0
  127. data/vendor/local/include/unicode/tznames.h +16 -2
  128. data/vendor/local/include/unicode/tzrule.h +2 -0
  129. data/vendor/local/include/unicode/tztrans.h +2 -0
  130. data/vendor/local/include/unicode/ubidi.h +20 -8
  131. data/vendor/local/include/unicode/ubiditransform.h +321 -0
  132. data/vendor/local/include/unicode/ubrk.h +26 -7
  133. data/vendor/local/include/unicode/ucal.h +8 -4
  134. data/vendor/local/include/unicode/ucasemap.h +2 -0
  135. data/vendor/local/include/unicode/ucat.h +2 -0
  136. data/vendor/local/include/unicode/uchar.h +244 -17
  137. data/vendor/local/include/unicode/ucharstrie.h +2 -0
  138. data/vendor/local/include/unicode/ucharstriebuilder.h +2 -0
  139. data/vendor/local/include/unicode/uchriter.h +2 -0
  140. data/vendor/local/include/unicode/uclean.h +2 -0
  141. data/vendor/local/include/unicode/ucnv.h +8 -1
  142. data/vendor/local/include/unicode/ucnv_cb.h +2 -0
  143. data/vendor/local/include/unicode/ucnv_err.h +2 -0
  144. data/vendor/local/include/unicode/ucnvsel.h +2 -0
  145. data/vendor/local/include/unicode/ucol.h +28 -11
  146. data/vendor/local/include/unicode/ucoleitr.h +2 -0
  147. data/vendor/local/include/unicode/uconfig.h +2 -0
  148. data/vendor/local/include/unicode/ucsdet.h +2 -0
  149. data/vendor/local/include/unicode/ucurr.h +5 -1
  150. data/vendor/local/include/unicode/udat.h +13 -11
  151. data/vendor/local/include/unicode/udata.h +8 -1
  152. data/vendor/local/include/unicode/udateintervalformat.h +2 -0
  153. data/vendor/local/include/unicode/udatpg.h +15 -2
  154. data/vendor/local/include/unicode/udisplaycontext.h +34 -2
  155. data/vendor/local/include/unicode/uenum.h +2 -0
  156. data/vendor/local/include/unicode/ufieldpositer.h +4 -2
  157. data/vendor/local/include/unicode/uformattable.h +9 -1
  158. data/vendor/local/include/unicode/ugender.h +2 -0
  159. data/vendor/local/include/unicode/uidna.h +2 -0
  160. data/vendor/local/include/unicode/uiter.h +2 -0
  161. data/vendor/local/include/unicode/uldnames.h +2 -0
  162. data/vendor/local/include/unicode/ulistformatter.h +2 -0
  163. data/vendor/local/include/unicode/uloc.h +9 -3
  164. data/vendor/local/include/unicode/ulocdata.h +20 -2
  165. data/vendor/local/include/unicode/umachine.h +25 -0
  166. data/vendor/local/include/unicode/umisc.h +2 -0
  167. data/vendor/local/include/unicode/umsg.h +2 -0
  168. data/vendor/local/include/unicode/unifilt.h +2 -0
  169. data/vendor/local/include/unicode/unifunct.h +2 -0
  170. data/vendor/local/include/unicode/unimatch.h +2 -0
  171. data/vendor/local/include/unicode/unirepl.h +2 -0
  172. data/vendor/local/include/unicode/uniset.h +3 -1
  173. data/vendor/local/include/unicode/unistr.h +12 -14
  174. data/vendor/local/include/unicode/unorm.h +6 -2
  175. data/vendor/local/include/unicode/unorm2.h +2 -0
  176. data/vendor/local/include/unicode/unum.h +28 -10
  177. data/vendor/local/include/unicode/unumsys.h +2 -0
  178. data/vendor/local/include/unicode/uobject.h +2 -0
  179. data/vendor/local/include/unicode/upluralrules.h +6 -2
  180. data/vendor/local/include/unicode/uregex.h +2 -0
  181. data/vendor/local/include/unicode/uregion.h +6 -2
  182. data/vendor/local/include/unicode/ureldatefmt.h +13 -7
  183. data/vendor/local/include/unicode/urename.h +16 -4
  184. data/vendor/local/include/unicode/urep.h +2 -0
  185. data/vendor/local/include/unicode/ures.h +7 -1
  186. data/vendor/local/include/unicode/uscript.h +41 -29
  187. data/vendor/local/include/unicode/usearch.h +10 -4
  188. data/vendor/local/include/unicode/uset.h +5 -1
  189. data/vendor/local/include/unicode/usetiter.h +2 -0
  190. data/vendor/local/include/unicode/ushape.h +2 -0
  191. data/vendor/local/include/unicode/uspoof.h +876 -363
  192. data/vendor/local/include/unicode/usprep.h +2 -0
  193. data/vendor/local/include/unicode/ustdio.h +2 -0
  194. data/vendor/local/include/unicode/ustream.h +2 -0
  195. data/vendor/local/include/unicode/ustring.h +2 -0
  196. data/vendor/local/include/unicode/ustringtrie.h +2 -0
  197. data/vendor/local/include/unicode/utext.h +2 -0
  198. data/vendor/local/include/unicode/utf.h +2 -0
  199. data/vendor/local/include/unicode/utf16.h +2 -0
  200. data/vendor/local/include/unicode/utf32.h +2 -0
  201. data/vendor/local/include/unicode/utf8.h +2 -0
  202. data/vendor/local/include/unicode/utf_old.h +2 -0
  203. data/vendor/local/include/unicode/utmscale.h +2 -0
  204. data/vendor/local/include/unicode/utrace.h +20 -0
  205. data/vendor/local/include/unicode/utrans.h +2 -0
  206. data/vendor/local/include/unicode/utypes.h +71 -97
  207. data/vendor/local/include/unicode/uvernum.h +13 -12
  208. data/vendor/local/include/unicode/uversion.h +2 -0
  209. data/vendor/local/include/unicode/vtzone.h +2 -0
  210. data/vendor/local/lib/icu/{57.1 → 58.2}/Makefile.inc +8 -9
  211. data/vendor/local/lib/icu/{57.1 → 58.2}/pkgdata.inc +2 -2
  212. data/vendor/local/lib/icu/Makefile.inc +8 -9
  213. data/vendor/local/lib/icu/pkgdata.inc +2 -2
  214. data/vendor/local/lib/icudt.dll +0 -0
  215. data/vendor/local/lib/{icudt57.dll → icudt58.dll} +0 -0
  216. data/vendor/local/lib/icuin.dll +0 -0
  217. data/vendor/local/lib/icuin58.dll +0 -0
  218. data/vendor/local/lib/icuio.dll +0 -0
  219. data/vendor/local/lib/{icuio57.dll → icuio58.dll} +0 -0
  220. data/vendor/local/lib/icutest.dll +0 -0
  221. data/vendor/local/lib/{icutest57.dll → icutest58.dll} +0 -0
  222. data/vendor/local/lib/icutu.dll +0 -0
  223. data/vendor/local/lib/icutu58.dll +0 -0
  224. data/vendor/local/lib/icuuc.dll +0 -0
  225. data/vendor/local/lib/icuuc58.dll +0 -0
  226. data/vendor/local/lib/libharfbuzz-icu.a +0 -0
  227. data/vendor/local/lib/libharfbuzz-icu.la +4 -4
  228. data/vendor/local/lib/libharfbuzz.dll.a +0 -0
  229. data/vendor/local/lib/libharfbuzz.la +3 -3
  230. data/vendor/local/lib/libicudt.dll.a +0 -0
  231. data/vendor/local/lib/libicuin.dll.a +0 -0
  232. data/vendor/local/lib/libicuio.dll.a +0 -0
  233. data/vendor/local/lib/libicutest.dll.a +0 -0
  234. data/vendor/local/lib/libicutu.dll.a +0 -0
  235. data/vendor/local/lib/libicuuc.dll.a +0 -0
  236. data/vendor/local/lib/libpango-1.0.dll.a +0 -0
  237. data/vendor/local/lib/libpangocairo-1.0.dll.a +0 -0
  238. data/vendor/local/lib/libpangoft2-1.0.dll.a +0 -0
  239. data/vendor/local/lib/libpangowin32-1.0.dll.a +0 -0
  240. data/vendor/local/lib/pkgconfig/harfbuzz-icu.pc +1 -1
  241. data/vendor/local/lib/pkgconfig/harfbuzz.pc +2 -2
  242. data/vendor/local/lib/pkgconfig/icu-i18n.pc +8 -6
  243. data/vendor/local/lib/pkgconfig/icu-io.pc +8 -6
  244. data/vendor/local/lib/pkgconfig/icu-uc.pc +8 -6
  245. data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-version.html +3 -3
  246. data/vendor/local/share/gtk-doc/html/harfbuzz/pt02.html +1 -1
  247. data/vendor/local/share/icu/{57.1 → 58.2}/LICENSE +52 -60
  248. data/vendor/local/share/icu/{57.1 → 58.2}/config/mh-mingw64 +2 -0
  249. data/vendor/local/share/icu/{57.1 → 58.2}/install-sh +0 -0
  250. data/vendor/local/share/icu/{57.1 → 58.2}/mkinstalldirs +2 -0
  251. data/vendor/local/share/man/man1/derb.1 +4 -2
  252. data/vendor/local/share/man/man1/genbrk.1 +4 -2
  253. data/vendor/local/share/man/man1/gencfu.1 +3 -1
  254. data/vendor/local/share/man/man1/gencnval.1 +5 -3
  255. data/vendor/local/share/man/man1/gendict.1 +4 -2
  256. data/vendor/local/share/man/man1/genrb.1 +5 -3
  257. data/vendor/local/share/man/man1/icu-config.1 +4 -5
  258. data/vendor/local/share/man/man1/makeconv.1 +5 -3
  259. data/vendor/local/share/man/man1/pkgdata.1 +4 -2
  260. data/vendor/local/share/man/man1/uconv.1 +4 -2
  261. data/vendor/local/share/man/man8/genccode.8 +4 -2
  262. data/vendor/local/share/man/man8/gencmn.8 +5 -3
  263. data/vendor/local/share/man/man8/gensprep.8 +5 -3
  264. data/vendor/local/share/man/man8/icupkg.8 +3 -1
  265. metadata +22 -40
  266. data/vendor/local/include/layout/LEFontInstance.h +0 -524
  267. data/vendor/local/include/layout/LEGlyphFilter.h +0 -45
  268. data/vendor/local/include/layout/LEGlyphStorage.h +0 -546
  269. data/vendor/local/include/layout/LEInsertionList.h +0 -177
  270. data/vendor/local/include/layout/LELanguages.h +0 -109
  271. data/vendor/local/include/layout/LEScripts.h +0 -204
  272. data/vendor/local/include/layout/LESwaps.h +0 -100
  273. data/vendor/local/include/layout/LETableReference.h +0 -435
  274. data/vendor/local/include/layout/LETypes.h +0 -728
  275. data/vendor/local/include/layout/LayoutEngine.h +0 -518
  276. data/vendor/local/include/layout/ParagraphLayout.h +0 -747
  277. data/vendor/local/include/layout/RunArrays.h +0 -676
  278. data/vendor/local/include/layout/loengine.h +0 -225
  279. data/vendor/local/include/layout/playout.h +0 -466
  280. data/vendor/local/include/layout/plruns.h +0 -441
  281. data/vendor/local/lib/icuin57.dll +0 -0
  282. data/vendor/local/lib/icule.dll +0 -0
  283. data/vendor/local/lib/icule57.dll +0 -0
  284. data/vendor/local/lib/iculx.dll +0 -0
  285. data/vendor/local/lib/iculx57.dll +0 -0
  286. data/vendor/local/lib/icutu57.dll +0 -0
  287. data/vendor/local/lib/icuuc57.dll +0 -0
  288. data/vendor/local/lib/libicule.dll.a +0 -0
  289. data/vendor/local/lib/libiculx.dll.a +0 -0
  290. data/vendor/local/lib/pkgconfig/icu-le.pc +0 -38
  291. data/vendor/local/lib/pkgconfig/icu-lx.pc +0 -38
@@ -1,3 +1,5 @@
1
+ // Copyright (C) 2016 and later: Unicode, Inc. and others.
2
+ // License & terms of use: http://www.unicode.org/copyright.html
1
3
  /*
2
4
  *****************************************************************************************
3
5
  * Copyright (C) 2016, International Business Machines
@@ -58,11 +60,13 @@ typedef enum UDateRelativeDateTimeFormatterStyle {
58
60
  */
59
61
  UDAT_STYLE_NARROW,
60
62
 
61
- /**
62
- * The number of styles.
63
- * @stable ICU 54
64
- */
65
- UDAT_STYLE_COUNT
63
+ #ifndef U_HIDE_DEPRECATED_API
64
+ /**
65
+ * One more than the highest normal UDateRelativeDateTimeFormatterStyle value.
66
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
67
+ */
68
+ UDAT_STYLE_COUNT
69
+ #endif // U_HIDE_DEPRECATED_API
66
70
  } UDateRelativeDateTimeFormatterStyle;
67
71
 
68
72
  #ifndef U_HIDE_DRAFT_API
@@ -162,11 +166,13 @@ typedef enum URelativeDateTimeUnit {
162
166
  * @draft ICU 57
163
167
  */
164
168
  UDAT_REL_UNIT_SATURDAY,
169
+ #ifndef U_HIDE_DEPRECATED_API
165
170
  /**
166
- * Count of URelativeDateTimeUnit values
167
- * @draft ICU 57
171
+ * One more than the highest normal URelativeDateTimeUnit value.
172
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
168
173
  */
169
174
  UDAT_REL_UNIT_COUNT
175
+ #endif // U_HIDE_DEPRECATED_API
170
176
  } URelativeDateTimeUnit;
171
177
  #endif /* U_HIDE_DRAFT_API */
172
178
 
@@ -1,3 +1,5 @@
1
+ // Copyright (C) 2016 and later: Unicode, Inc. and others.
2
+ // License & terms of use: http://www.unicode.org/copyright.html
1
3
  /*
2
4
  *******************************************************************************
3
5
  * Copyright (C) 2002-2016, International Business Machines
@@ -474,6 +476,9 @@
474
476
  #define ubidi_setReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_setReorderingOptions)
475
477
  #define ubidi_writeReordered U_ICU_ENTRY_POINT_RENAME(ubidi_writeReordered)
476
478
  #define ubidi_writeReverse U_ICU_ENTRY_POINT_RENAME(ubidi_writeReverse)
479
+ #define ubiditransform_close U_ICU_ENTRY_POINT_RENAME(ubiditransform_close)
480
+ #define ubiditransform_open U_ICU_ENTRY_POINT_RENAME(ubiditransform_open)
481
+ #define ubiditransform_transform U_ICU_ENTRY_POINT_RENAME(ubiditransform_transform)
477
482
  #define ublock_getCode U_ICU_ENTRY_POINT_RENAME(ublock_getCode)
478
483
  #define ubrk_close U_ICU_ENTRY_POINT_RENAME(ubrk_close)
479
484
  #define ubrk_countAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_countAvailable)
@@ -1013,6 +1018,7 @@
1013
1018
  #define ulist_getListSize U_ICU_ENTRY_POINT_RENAME(ulist_getListSize)
1014
1019
  #define ulist_getNext U_ICU_ENTRY_POINT_RENAME(ulist_getNext)
1015
1020
  #define ulist_next_keyword_value U_ICU_ENTRY_POINT_RENAME(ulist_next_keyword_value)
1021
+ #define ulist_removeString U_ICU_ENTRY_POINT_RENAME(ulist_removeString)
1016
1022
  #define ulist_resetList U_ICU_ENTRY_POINT_RENAME(ulist_resetList)
1017
1023
  #define ulist_reset_keyword_values_iterator U_ICU_ENTRY_POINT_RENAME(ulist_reset_keyword_values_iterator)
1018
1024
  #define ulistfmt_close U_ICU_ENTRY_POINT_RENAME(ulistfmt_close)
@@ -1355,6 +1361,7 @@
1355
1361
  #define uprv_toupper U_ICU_ENTRY_POINT_RENAME(uprv_toupper)
1356
1362
  #define uprv_trunc U_ICU_ENTRY_POINT_RENAME(uprv_trunc)
1357
1363
  #define uprv_tzname U_ICU_ENTRY_POINT_RENAME(uprv_tzname)
1364
+ #define uprv_tzname_clear_cache U_ICU_ENTRY_POINT_RENAME(uprv_tzname_clear_cache)
1358
1365
  #define uprv_tzset U_ICU_ENTRY_POINT_RENAME(uprv_tzset)
1359
1366
  #define uprv_uint16Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint16Comparator)
1360
1367
  #define uprv_uint32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint32Comparator)
@@ -1456,10 +1463,7 @@
1456
1463
  #define ures_countArrayItems U_ICU_ENTRY_POINT_RENAME(ures_countArrayItems)
1457
1464
  #define ures_findResource U_ICU_ENTRY_POINT_RENAME(ures_findResource)
1458
1465
  #define ures_findSubResource U_ICU_ENTRY_POINT_RENAME(ures_findSubResource)
1459
- #define ures_getAllArrayItems U_ICU_ENTRY_POINT_RENAME(ures_getAllArrayItems)
1460
- #define ures_getAllArrayItemsWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getAllArrayItemsWithFallback)
1461
- #define ures_getAllTableItems U_ICU_ENTRY_POINT_RENAME(ures_getAllTableItems)
1462
- #define ures_getAllTableItemsWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getAllTableItemsWithFallback)
1466
+ #define ures_getAllItemsWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getAllItemsWithFallback)
1463
1467
  #define ures_getBinary U_ICU_ENTRY_POINT_RENAME(ures_getBinary)
1464
1468
  #define ures_getByIndex U_ICU_ENTRY_POINT_RENAME(ures_getByIndex)
1465
1469
  #define ures_getByKey U_ICU_ENTRY_POINT_RENAME(ures_getByKey)
@@ -1607,13 +1611,20 @@
1607
1611
  #define uspoof_areConfusableUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUTF8)
1608
1612
  #define uspoof_areConfusableUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUnicodeString)
1609
1613
  #define uspoof_check U_ICU_ENTRY_POINT_RENAME(uspoof_check)
1614
+ #define uspoof_check2 U_ICU_ENTRY_POINT_RENAME(uspoof_check2)
1615
+ #define uspoof_check2UTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_check2UTF8)
1616
+ #define uspoof_check2UnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_check2UnicodeString)
1610
1617
  #define uspoof_checkUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_checkUTF8)
1611
1618
  #define uspoof_checkUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_checkUnicodeString)
1612
1619
  #define uspoof_clone U_ICU_ENTRY_POINT_RENAME(uspoof_clone)
1613
1620
  #define uspoof_close U_ICU_ENTRY_POINT_RENAME(uspoof_close)
1621
+ #define uspoof_closeCheckResult U_ICU_ENTRY_POINT_RENAME(uspoof_closeCheckResult)
1614
1622
  #define uspoof_getAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedChars)
1615
1623
  #define uspoof_getAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedLocales)
1616
1624
  #define uspoof_getAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedUnicodeSet)
1625
+ #define uspoof_getCheckResultChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultChecks)
1626
+ #define uspoof_getCheckResultNumerics U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultNumerics)
1627
+ #define uspoof_getCheckResultRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultRestrictionLevel)
1617
1628
  #define uspoof_getChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getChecks)
1618
1629
  #define uspoof_getInclusionSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionSet)
1619
1630
  #define uspoof_getInclusionUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionUnicodeSet)
@@ -1625,6 +1636,7 @@
1625
1636
  #define uspoof_getSkeletonUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUnicodeString)
1626
1637
  #define uspoof_internalInitStatics U_ICU_ENTRY_POINT_RENAME(uspoof_internalInitStatics)
1627
1638
  #define uspoof_open U_ICU_ENTRY_POINT_RENAME(uspoof_open)
1639
+ #define uspoof_openCheckResult U_ICU_ENTRY_POINT_RENAME(uspoof_openCheckResult)
1628
1640
  #define uspoof_openFromSerialized U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSerialized)
1629
1641
  #define uspoof_openFromSource U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSource)
1630
1642
  #define uspoof_serialize U_ICU_ENTRY_POINT_RENAME(uspoof_serialize)
@@ -1,3 +1,5 @@
1
+ // Copyright (C) 2016 and later: Unicode, Inc. and others.
2
+ // License & terms of use: http://www.unicode.org/copyright.html
1
3
  /*
2
4
  ******************************************************************************
3
5
  * Copyright (C) 1997-2010, International Business Machines
@@ -1,3 +1,5 @@
1
+ // Copyright (C) 2016 and later: Unicode, Inc. and others.
2
+ // License & terms of use: http://www.unicode.org/copyright.html
1
3
  /*
2
4
  **********************************************************************
3
5
  * Copyright (C) 1997-2016, International Business Machines
@@ -120,9 +122,13 @@ typedef enum {
120
122
  RES_INT_VECTOR=URES_INT_VECTOR,
121
123
  /** @deprecated ICU 2.6 Not used. */
122
124
  RES_RESERVED=15,
123
- #endif /* U_HIDE_DEPRECATED_API */
124
125
 
126
+ /**
127
+ * One more than the highest normal UResType value.
128
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
129
+ */
125
130
  URES_LIMIT = 16
131
+ #endif // U_HIDE_DEPRECATED_API
126
132
  } UResType;
127
133
 
128
134
  /*
@@ -1,6 +1,8 @@
1
+ // Copyright (C) 2016 and later: Unicode, Inc. and others.
2
+ // License & terms of use: http://www.unicode.org/copyright.html
1
3
  /*
2
4
  **********************************************************************
3
- * Copyright (C) 1997-2015, International Business Machines
5
+ * Copyright (C) 1997-2016, International Business Machines
4
6
  * Corporation and others. All Rights Reserved.
5
7
  **********************************************************************
6
8
  *
@@ -32,13 +34,13 @@
32
34
  * See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/)
33
35
  * and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .
34
36
  *
35
- * Starting with ICU 3.6, constants for most ISO 15924 script codes
37
+ * In addition, constants for many ISO 15924 script codes
36
38
  * are included, for use with language tags, CLDR data, and similar.
37
39
  * Some of those codes are not used in the Unicode Character Database (UCD).
38
40
  * For example, there are no characters that have a UCD script property value of
39
41
  * Hans or Hant. All Han ideographs have the Hani script property value in Unicode.
40
42
  *
41
- * Private-use codes Qaaa..Qabx are not included.
43
+ * Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.
42
44
  *
43
45
  * Starting with ICU 55, script codes are only added when their scripts
44
46
  * have been or will certainly be encoded in Unicode,
@@ -424,24 +426,39 @@ typedef enum UScriptCode {
424
426
  /** @stable ICU 54 */
425
427
  USCRIPT_SIDDHAM = 166,/* Sidd */
426
428
 
427
- /**
428
- * One higher than the last script code constant.
429
- * This value increases as constants for script codes are added.
430
- *
431
- * There are constants for Unicode 7 script property values.
432
- * There are constants for ISO 15924 script codes assigned on or before 2013-10-12.
433
- * There are no constants for private use codes from Qaaa - Qabx
434
- * except as used in the UCD.
435
- *
436
- * @stable ICU 2.2
437
- */
438
- USCRIPT_CODE_LIMIT = 167
429
+ /** @stable ICU 58 */
430
+ USCRIPT_ADLAM = 167,/* Adlm */
431
+ /** @stable ICU 58 */
432
+ USCRIPT_BHAIKSUKI = 168,/* Bhks */
433
+ /** @stable ICU 58 */
434
+ USCRIPT_MARCHEN = 169,/* Marc */
435
+ /** @stable ICU 58 */
436
+ USCRIPT_NEWA = 170,/* Newa */
437
+ /** @stable ICU 58 */
438
+ USCRIPT_OSAGE = 171,/* Osge */
439
+
440
+ /** @stable ICU 58 */
441
+ USCRIPT_HAN_WITH_BOPOMOFO = 172,/* Hanb */
442
+ /** @stable ICU 58 */
443
+ USCRIPT_JAMO = 173,/* Jamo */
444
+ /** @stable ICU 58 */
445
+ USCRIPT_SYMBOLS_EMOJI = 174,/* Zsye */
446
+
447
+ #ifndef U_HIDE_DEPRECATED_API
448
+ /**
449
+ * One more than the highest normal UScriptCode value.
450
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_SCRIPT).
451
+ *
452
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
453
+ */
454
+ USCRIPT_CODE_LIMIT = 175
455
+ #endif // U_HIDE_DEPRECATED_API
439
456
  } UScriptCode;
440
457
 
441
458
  /**
442
- * Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
459
+ * Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
443
460
  * Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym".
444
- * Fills in USCRIPT_LATIN given "en" OR "en_US"
461
+ * Fills in USCRIPT_LATIN given "en" OR "en_US"
445
462
  * If the required capacity is greater than the capacity of the destination buffer,
446
463
  * then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned.
447
464
  *
@@ -454,10 +471,10 @@ typedef enum UScriptCode {
454
471
  * @param fillIn the UScriptCode buffer to fill in the script code
455
472
  * @param capacity the capacity (size) fo UScriptCode buffer passed in.
456
473
  * @param err the error status code.
457
- * @return The number of script codes filled in the buffer passed in
474
+ * @return The number of script codes filled in the buffer passed in
458
475
  * @stable ICU 2.4
459
476
  */
460
- U_STABLE int32_t U_EXPORT2
477
+ U_STABLE int32_t U_EXPORT2
461
478
  uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err);
462
479
 
463
480
  /**
@@ -470,7 +487,7 @@ uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capac
470
487
  * or NULL if scriptCode is invalid
471
488
  * @stable ICU 2.4
472
489
  */
473
- U_STABLE const char* U_EXPORT2
490
+ U_STABLE const char* U_EXPORT2
474
491
  uscript_getName(UScriptCode scriptCode);
475
492
 
476
493
  /**
@@ -482,18 +499,18 @@ uscript_getName(UScriptCode scriptCode);
482
499
  * @return short script name (4-letter code), or NULL if scriptCode is invalid
483
500
  * @stable ICU 2.4
484
501
  */
485
- U_STABLE const char* U_EXPORT2
502
+ U_STABLE const char* U_EXPORT2
486
503
  uscript_getShortName(UScriptCode scriptCode);
487
504
 
488
505
  /**
489
506
  * Gets the script code associated with the given codepoint.
490
- * Returns USCRIPT_MALAYALAM given 0x0D02
507
+ * Returns USCRIPT_MALAYALAM given 0x0D02
491
508
  * @param codepoint UChar32 codepoint
492
509
  * @param err the error status code.
493
- * @return The UScriptCode, or 0 if codepoint is invalid
510
+ * @return The UScriptCode, or 0 if codepoint is invalid
494
511
  * @stable ICU 2.4
495
512
  */
496
- U_STABLE UScriptCode U_EXPORT2
513
+ U_STABLE UScriptCode U_EXPORT2
497
514
  uscript_getScript(UChar32 codepoint, UErrorCode *err);
498
515
 
499
516
  /**
@@ -503,9 +520,6 @@ uscript_getScript(UChar32 codepoint, UErrorCode *err);
503
520
  *
504
521
  * Some characters are commonly used in multiple scripts.
505
522
  * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
506
- *
507
- * The Script_Extensions property is provisional. It may be modified or removed
508
- * in future versions of the Unicode Standard, and thus in ICU.
509
523
  * @param c code point
510
524
  * @param sc script code
511
525
  * @return TRUE if sc is in Script_Extensions(c)
@@ -532,8 +546,6 @@ uscript_hasScript(UChar32 c, UScriptCode sc);
532
546
  * U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned.
533
547
  * (Usual ICU buffer handling behavior.)
534
548
  *
535
- * The Script_Extensions property is provisional. It may be modified or removed
536
- * in future versions of the Unicode Standard, and thus in ICU.
537
549
  * @param c code point
538
550
  * @param scripts output script code array
539
551
  * @param capacity capacity of the scripts array
@@ -1,3 +1,5 @@
1
+ // Copyright (C) 2016 and later: Unicode, Inc. and others.
2
+ // License & terms of use: http://www.unicode.org/copyright.html
1
3
  /*
2
4
  **********************************************************************
3
5
  * Copyright (C) 2001-2011,2014 IBM and others. All rights reserved.
@@ -188,11 +190,13 @@ typedef enum {
188
190
  */
189
191
  USEARCH_ELEMENT_COMPARISON = 2,
190
192
 
193
+ #ifndef U_HIDE_DEPRECATED_API
191
194
  /**
192
- * Count of attribute types
193
- * @stable ICU 2.4
195
+ * One more than the highest normal USearchAttribute value.
196
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
194
197
  */
195
198
  USEARCH_ATTRIBUTE_COUNT = 3
199
+ #endif // U_HIDE_DEPRECATED_API
196
200
  } USearchAttribute;
197
201
 
198
202
  /**
@@ -262,11 +266,13 @@ typedef enum {
262
266
  */
263
267
  USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD,
264
268
 
269
+ #ifndef U_HIDE_DEPRECATED_API
265
270
  /**
266
- * Count of attribute values
267
- * @stable ICU 2.4
271
+ * One more than the highest normal USearchAttributeValue value.
272
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
268
273
  */
269
274
  USEARCH_ATTRIBUTE_VALUE_COUNT
275
+ #endif // U_HIDE_DEPRECATED_API
270
276
  } USearchAttributeValue;
271
277
 
272
278
  /* open and close ------------------------------------------------------ */
@@ -1,3 +1,5 @@
1
+ // Copyright (C) 2016 and later: Unicode, Inc. and others.
2
+ // License & terms of use: http://www.unicode.org/copyright.html
1
3
  /*
2
4
  *******************************************************************************
3
5
  *
@@ -196,11 +198,13 @@ typedef enum USetSpanCondition {
196
198
  * @stable ICU 3.8
197
199
  */
198
200
  USET_SPAN_SIMPLE = 2,
201
+ #ifndef U_HIDE_DEPRECATED_API
199
202
  /**
200
203
  * One more than the last span condition.
201
- * @stable ICU 3.8
204
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
202
205
  */
203
206
  USET_SPAN_CONDITION_COUNT
207
+ #endif // U_HIDE_DEPRECATED_API
204
208
  } USetSpanCondition;
205
209
 
206
210
  enum {
@@ -1,3 +1,5 @@
1
+ // Copyright (C) 2016 and later: Unicode, Inc. and others.
2
+ // License & terms of use: http://www.unicode.org/copyright.html
1
3
  /*
2
4
  **********************************************************************
3
5
  * Copyright (c) 2002-2014, International Business Machines
@@ -1,3 +1,5 @@
1
+ // Copyright (C) 2016 and later: Unicode, Inc. and others.
2
+ // License & terms of use: http://www.unicode.org/copyright.html
1
3
  /*
2
4
  ******************************************************************************
3
5
  *
@@ -1,6 +1,8 @@
1
+ // Copyright (C) 2016 and later: Unicode, Inc. and others.
2
+ // License & terms of use: http://www.unicode.org/copyright.html
1
3
  /*
2
4
  ***************************************************************************
3
- * Copyright (C) 2008-2015, International Business Machines Corporation
5
+ * Copyright (C) 2008-2016, International Business Machines Corporation
4
6
  * and others. All Rights Reserved.
5
7
  ***************************************************************************
6
8
  * file name: uspoof.h
@@ -35,123 +37,350 @@
35
37
  * \file
36
38
  * \brief Unicode Security and Spoofing Detection, C API.
37
39
  *
38
- * These functions are intended to check strings, typically
39
- * identifiers of some type, such as URLs, for the presence of
40
- * characters that are likely to be visually confusing -
41
- * for cases where the displayed form of an identifier may
42
- * not be what it appears to be.
43
- *
44
- * Unicode Technical Report #36, http://unicode.org/reports/tr36, and
45
- * Unicode Technical Standard #39, http://unicode.org/reports/tr39
46
- * "Unicode security considerations", give more background on
47
- * security an spoofing issues with Unicode identifiers.
48
- * The tests and checks provided by this module implement the recommendations
49
- * from those Unicode documents.
50
- *
51
- * The tests available on identifiers fall into two general categories:
52
- * -# Single identifier tests. Check whether an identifier is
53
- * potentially confusable with any other string, or is suspicious
54
- * for other reasons.
55
- * -# Two identifier tests. Check whether two specific identifiers are confusable.
56
- * This does not consider whether either of strings is potentially
57
- * confusable with any string other than the exact one specified.
58
- *
59
- * The steps to perform confusability testing are
60
- * -# Open a USpoofChecker.
61
- * -# Configure the USPoofChecker for the desired set of tests. The tests that will
62
- * be performed are specified by a set of USpoofChecks flags.
63
- * -# Perform the checks using the pre-configured USpoofChecker. The results indicate
64
- * which (if any) of the selected tests have identified possible problems with the identifier.
65
- * Results are reported as a set of USpoofChecks flags; this mirrors the form in which
66
- * the set of tests to perform was originally specified to the USpoofChecker.
67
- *
68
- * A USpoofChecker may be used repeatedly to perform checks on any number of identifiers.
69
- *
70
- * Thread Safety: The test functions for checking a single identifier, or for testing
71
- * whether two identifiers are possible confusable, are thread safe.
72
- * They may called concurrently, from multiple threads, using the same USpoofChecker instance.
73
- *
74
- * More generally, the standard ICU thread safety rules apply: functions that take a
75
- * const USpoofChecker parameter are thread safe. Those that take a non-const
76
- * USpoofChecier are not thread safe.
77
- *
78
- *
79
- * Descriptions of the available checks.
80
- *
81
- * When testing whether pairs of identifiers are confusable, with the uspoof_areConfusable()
82
- * family of functions, the relevant tests are
83
- *
84
- * -# USPOOF_SINGLE_SCRIPT_CONFUSABLE: All of the characters from the two identifiers are
85
- * from a single script, and the two identifiers are visually confusable.
86
- * -# USPOOF_MIXED_SCRIPT_CONFUSABLE: At least one of the identifiers contains characters
87
- * from more than one script, and the two identifiers are visually confusable.
88
- * -# USPOOF_WHOLE_SCRIPT_CONFUSABLE: Each of the two identifiers is of a single script, but
89
- * the two identifiers are from different scripts, and they are visually confusable.
90
- *
91
- * The safest approach is to enable all three of these checks as a group.
92
- *
93
- * USPOOF_ANY_CASE is a modifier for the above tests. If the identifiers being checked can
94
- * be of mixed case and are used in a case-sensitive manner, this option should be specified.
95
- *
96
- * If the identifiers being checked are used in a case-insensitive manner, and if they are
97
- * displayed to users in lower-case form only, the USPOOF_ANY_CASE option should not be
98
- * specified. Confusabality issues involving upper case letters will not be reported.
99
- *
100
- * When performing tests on a single identifier, with the uspoof_check() family of functions,
101
- * the relevant tests are:
102
- *
103
- * -# USPOOF_MIXED_SCRIPT_CONFUSABLE: the identifier contains characters from multiple
104
- * scripts, and there exists an identifier of a single script that is visually confusable.
105
- * -# USPOOF_WHOLE_SCRIPT_CONFUSABLE: the identifier consists of characters from a single
106
- * script, and there exists a visually confusable identifier.
107
- * The visually confusable identifier also consists of characters from a single script.
108
- * but not the same script as the identifier being checked.
109
- * -# USPOOF_ANY_CASE: modifies the mixed script and whole script confusables tests. If
110
- * specified, the checks will consider confusable characters of any case. If this flag is not
111
- * set, the test is performed assuming case folded identifiers.
112
- * -# USPOOF_SINGLE_SCRIPT: check that the identifier contains only characters from a
113
- * single script. (Characters from the 'common' and 'inherited' scripts are ignored.)
114
- * This is not a test for confusable identifiers
115
- * -# USPOOF_INVISIBLE: check an identifier for the presence of invisible characters,
116
- * such as zero-width spaces, or character sequences that are
117
- * likely not to display, such as multiple occurrences of the same
118
- * non-spacing mark. This check does not test the input string as a whole
119
- * for conformance to any particular syntax for identifiers.
120
- * -# USPOOF_CHAR_LIMIT: check that an identifier contains only characters from a specified set
121
- * of acceptable characters. See uspoof_setAllowedChars() and
122
- * uspoof_setAllowedLocales().
123
- *
124
- * Note on Scripts:
125
- * Characters from the Unicode Scripts "Common" and "Inherited" are ignored when considering
126
- * the script of an identifier. Common characters include digits and symbols that
127
- * are normally used with text from more than one script.
128
- *
129
- * Identifier Skeletons: A skeleton is a transformation of an identifier, such that
130
- * all identifiers that are confusable with each other have the same skeleton.
131
- * Using skeletons, it is possible to build a dictionary data structure for
132
- * a set of identifiers, and then quickly test whether a new identifier is
133
- * confusable with an identifier already in the set. The uspoof_getSkeleton()
134
- * family of functions will produce the skeleton from an identifier.
135
- *
136
- * Note that skeletons are not guaranteed to be stable between versions
137
- * of Unicode or ICU, so an applications should not rely on creating a permanent,
138
- * or difficult to update, database of skeletons. Instabilities result from
139
- * identifying new pairs or sequences of characters that are visually
140
- * confusable, and thus must be mapped to the same skeleton character(s).
141
- *
142
- * Skeletons are computed using the algorithm and data describe in Unicode UAX 39.
143
- * The latest proposed update, UAX 39 Version 8 draft 1, says "the tables SL, SA, and ML
144
- * were still problematic, and discouraged from use in [Uniocde] 7.0.
145
- * They were thus removed from version 8.0"
146
- *
147
- * In light of this, the default mapping data included with ICU 55 uses the
148
- * Unicode 7 MA (Multi script Any case) table data for the other type options
149
- * (Single Script, Any Case), (Single Script, Lower Case) and (Multi Script, Lower Case).
40
+ * <p>
41
+ * This class, based on <a href="http://unicode.org/reports/tr36">Unicode Technical Report #36</a> and
42
+ * <a href="http://unicode.org/reports/tr39">Unicode Technical Standard #39</a>, has two main functions:
43
+ *
44
+ * <ol>
45
+ * <li>Checking whether two strings are visually <em>confusable</em> with each other, such as "Harvest" and
46
+ * &quot;&Eta;arvest&quot;, where the second string starts with the Greek capital letter Eta.</li>
47
+ * <li>Checking whether an individual string is likely to be an attempt at confusing the reader (<em>spoof
48
+ * detection</em>), such as "paypal" with some Latin characters substituted with Cyrillic look-alikes.</li>
49
+ * </ol>
50
+ *
51
+ * <p>
52
+ * Although originally designed as a method for flagging suspicious identifier strings such as URLs,
53
+ * <code>USpoofChecker</code> has a number of other practical use cases, such as preventing attempts to evade bad-word
54
+ * content filters.
55
+ *
56
+ * <p>
57
+ * The functions of this class are exposed as C API, with a handful of syntactical conveniences for C++.
58
+ *
59
+ * <h2>Confusables</h2>
60
+ *
61
+ * <p>
62
+ * The following example shows how to use <code>USpoofChecker</code> to check for confusability between two strings:
63
+ *
64
+ * \code{.c}
65
+ * UErrorCode status = U_ZERO_ERROR;
66
+ * UChar* str1 = (UChar*) u"Harvest";
67
+ * UChar* str2 = (UChar*) u"\u0397arvest"; // with U+0397 GREEK CAPITAL LETTER ETA
68
+ *
69
+ * USpoofChecker* sc = uspoof_open(&status);
70
+ * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
71
+ *
72
+ * int32_t bitmask = uspoof_areConfusable(sc, str1, -1, str2, -1, &status);
73
+ * UBool result = bitmask != 0;
74
+ * // areConfusable: 1 (status: U_ZERO_ERROR)
75
+ * printf("areConfusable: %d (status: %s)\n", result, u_errorName(status));
76
+ * uspoof_close(sc);
77
+ * \endcode
78
+ *
79
+ * <p>
80
+ * The call to {@link uspoof_open} creates a <code>USpoofChecker</code> object; the call to {@link uspoof_setChecks}
81
+ * enables confusable checking and disables all other checks; the call to {@link uspoof_areConfusable} performs the
82
+ * confusability test; and the following line extracts the result out of the return value. For best performance,
83
+ * the instance should be created once (e.g., upon application startup), and the efficient
84
+ * {@link uspoof_areConfusable} method can be used at runtime.
85
+ *
86
+ * <p>
87
+ * The type {@link LocalUSpoofCheckerPointer} is exposed for C++ programmers. It will automatically call
88
+ * {@link uspoof_close} when the object goes out of scope:
89
+ *
90
+ * \code{.cpp}
91
+ * UErrorCode status = U_ZERO_ERROR;
92
+ * LocalUSpoofCheckerPointer sc(uspoof_open(&status));
93
+ * uspoof_setChecks(sc.getAlias(), USPOOF_CONFUSABLE, &status);
94
+ * // ...
95
+ * \endcode
96
+ *
97
+ * <p>
98
+ * UTS 39 defines two strings to be <em>confusable</em> if they map to the same <em>skeleton string</em>. A skeleton can
99
+ * be thought of as a "hash code". {@link uspoof_getSkeleton} computes the skeleton for a particular string, so
100
+ * the following snippet is equivalent to the example above:
101
+ *
102
+ * \code{.c}
103
+ * UErrorCode status = U_ZERO_ERROR;
104
+ * UChar* str1 = (UChar*) u"Harvest";
105
+ * UChar* str2 = (UChar*) u"\u0397arvest"; // with U+0397 GREEK CAPITAL LETTER ETA
106
+ *
107
+ * USpoofChecker* sc = uspoof_open(&status);
108
+ * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
109
+ *
110
+ * // Get skeleton 1
111
+ * int32_t skel1Len = uspoof_getSkeleton(sc, 0, str1, -1, NULL, 0, &status);
112
+ * UChar* skel1 = (UChar*) malloc(++skel1Len * sizeof(UChar));
113
+ * status = U_ZERO_ERROR;
114
+ * uspoof_getSkeleton(sc, 0, str1, -1, skel1, skel1Len, &status);
115
+ *
116
+ * // Get skeleton 2
117
+ * int32_t skel2Len = uspoof_getSkeleton(sc, 0, str2, -1, NULL, 0, &status);
118
+ * UChar* skel2 = (UChar*) malloc(++skel2Len * sizeof(UChar));
119
+ * status = U_ZERO_ERROR;
120
+ * uspoof_getSkeleton(sc, 0, str2, -1, skel2, skel2Len, &status);
121
+ *
122
+ * // Are the skeletons the same?
123
+ * UBool result = u_strcmp(skel1, skel2) == 0;
124
+ * // areConfusable: 1 (status: U_ZERO_ERROR)
125
+ * printf("areConfusable: %d (status: %s)\n", result, u_errorName(status));
126
+ * uspoof_close(sc);
127
+ * free(skel1);
128
+ * free(skel2);
129
+ * \endcode
130
+ *
131
+ * <p>
132
+ * If you need to check if a string is confusable with any string in a dictionary of many strings, rather than calling
133
+ * {@link uspoof_areConfusable} many times in a loop, {@link uspoof_getSkeleton} can be used instead, as shown below:
134
+ *
135
+ * \code{.c}
136
+ * UErrorCode status = U_ZERO_ERROR;
137
+ * #define DICTIONARY_LENGTH 2
138
+ * UChar* dictionary[DICTIONARY_LENGTH] = { (UChar*) u"lorem", (UChar*) u"ipsum" };
139
+ * UChar* skeletons[DICTIONARY_LENGTH];
140
+ * UChar* str = (UChar*) u"1orern";
141
+ *
142
+ * // Setup:
143
+ * USpoofChecker* sc = uspoof_open(&status);
144
+ * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
145
+ * for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
146
+ * UChar* word = dictionary[i];
147
+ * int32_t len = uspoof_getSkeleton(sc, 0, word, -1, NULL, 0, &status);
148
+ * skeletons[i] = (UChar*) malloc(++len * sizeof(UChar));
149
+ * status = U_ZERO_ERROR;
150
+ * uspoof_getSkeleton(sc, 0, word, -1, skeletons[i], len, &status);
151
+ * }
152
+ *
153
+ * // Live Check:
154
+ * {
155
+ * int32_t len = uspoof_getSkeleton(sc, 0, str, -1, NULL, 0, &status);
156
+ * UChar* skel = (UChar*) malloc(++len * sizeof(UChar));
157
+ * status = U_ZERO_ERROR;
158
+ * uspoof_getSkeleton(sc, 0, str, -1, skel, len, &status);
159
+ * UBool result = FALSE;
160
+ * for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
161
+ * result = u_strcmp(skel, skeletons[i]) == 0;
162
+ * if (result == TRUE) { break; }
163
+ * }
164
+ * // Has confusable in dictionary: 1 (status: U_ZERO_ERROR)
165
+ * printf("Has confusable in dictionary: %d (status: %s)\n", result, u_errorName(status));
166
+ * free(skel);
167
+ * }
168
+ *
169
+ * for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
170
+ * free(skeletons[i]);
171
+ * }
172
+ * uspoof_close(sc);
173
+ * \endcode
174
+ *
175
+ * <p>
176
+ * <b>Note:</b> Since the Unicode confusables mapping table is frequently updated, confusable skeletons are <em>not</em>
177
+ * guaranteed to be the same between ICU releases. We therefore recommend that you always compute confusable skeletons
178
+ * at runtime and do not rely on creating a permanent, or difficult to update, database of skeletons.
179
+ *
180
+ * <h2>Spoof Detection</h2>
181
+ *
182
+ * <p>
183
+ * The following snippet shows a minimal example of using <code>USpoofChecker</code> to perform spoof detection on a
184
+ * string:
185
+ *
186
+ * \code{.c}
187
+ * UErrorCode status = U_ZERO_ERROR;
188
+ * UChar* str = (UChar*) u"p\u0430ypal"; // with U+0430 CYRILLIC SMALL LETTER A
189
+ *
190
+ * // Get the default set of allowable characters:
191
+ * USet* allowed = uset_openEmpty();
192
+ * uset_addAll(allowed, uspoof_getRecommendedSet(&status));
193
+ * uset_addAll(allowed, uspoof_getInclusionSet(&status));
194
+ *
195
+ * USpoofChecker* sc = uspoof_open(&status);
196
+ * uspoof_setAllowedChars(sc, allowed, &status);
197
+ * uspoof_setRestrictionLevel(sc, USPOOF_MODERATELY_RESTRICTIVE);
198
+ *
199
+ * int32_t bitmask = uspoof_check(sc, str, -1, NULL, &status);
200
+ * UBool result = bitmask != 0;
201
+ * // fails checks: 1 (status: U_ZERO_ERROR)
202
+ * printf("fails checks: %d (status: %s)\n", result, u_errorName(status));
203
+ * uspoof_close(sc);
204
+ * uset_close(allowed);
205
+ * \endcode
206
+ *
207
+ * <p>
208
+ * As in the case for confusability checking, it is good practice to create one <code>USpoofChecker</code> instance at
209
+ * startup, and call the cheaper {@link uspoof_check} online. We specify the set of
210
+ * allowed characters to be those with type RECOMMENDED or INCLUSION, according to the recommendation in UTS 39.
211
+ *
212
+ * <p>
213
+ * In addition to {@link uspoof_check}, the function {@link uspoof_checkUTF8} is exposed for UTF8-encoded char* strings,
214
+ * and {@link uspoof_checkUnicodeString} is exposed for C++ programmers.
215
+ *
216
+ * <p>
217
+ * If the {@link USPOOF_AUX_INFO} check is enabled, a limited amount of information on why a string failed the checks
218
+ * is available in the returned bitmask. For complete information, use the {@link uspoof_check2} class of functions
219
+ * with a {@link USpoofCheckResult} parameter:
220
+ *
221
+ * \code{.c}
222
+ * UErrorCode status = U_ZERO_ERROR;
223
+ * UChar* str = (UChar*) u"p\u0430ypal"; // with U+0430 CYRILLIC SMALL LETTER A
224
+ *
225
+ * // Get the default set of allowable characters:
226
+ * USet* allowed = uset_openEmpty();
227
+ * uset_addAll(allowed, uspoof_getRecommendedSet(&status));
228
+ * uset_addAll(allowed, uspoof_getInclusionSet(&status));
229
+ *
230
+ * USpoofChecker* sc = uspoof_open(&status);
231
+ * uspoof_setAllowedChars(sc, allowed, &status);
232
+ * uspoof_setRestrictionLevel(sc, USPOOF_MODERATELY_RESTRICTIVE);
233
+ *
234
+ * USpoofCheckResult* checkResult = uspoof_openCheckResult(&status);
235
+ * int32_t bitmask = uspoof_check2(sc, str, -1, checkResult, &status);
236
+ *
237
+ * int32_t failures1 = bitmask;
238
+ * int32_t failures2 = uspoof_getCheckResultChecks(checkResult, &status);
239
+ * assert(failures1 == failures2);
240
+ * // checks that failed: 0x00000010 (status: U_ZERO_ERROR)
241
+ * printf("checks that failed: %#010x (status: %s)\n", failures1, u_errorName(status));
242
+ *
243
+ * // Cleanup:
244
+ * uspoof_close(sc);
245
+ * uset_close(allowed);
246
+ * uspoof_closeCheckResult(checkResult);
247
+ * \endcode
248
+ *
249
+ * C++ users can take advantage of a few syntactical conveniences. The following snippet is functionally
250
+ * equivalent to the one above:
251
+ *
252
+ * \code{.cpp}
253
+ * UErrorCode status = U_ZERO_ERROR;
254
+ * UnicodeString str((UChar*) u"p\u0430ypal"); // with U+0430 CYRILLIC SMALL LETTER A
255
+ *
256
+ * // Get the default set of allowable characters:
257
+ * UnicodeSet allowed;
258
+ * allowed.addAll(*uspoof_getRecommendedUnicodeSet(&status));
259
+ * allowed.addAll(*uspoof_getInclusionUnicodeSet(&status));
260
+ *
261
+ * LocalUSpoofCheckerPointer sc(uspoof_open(&status));
262
+ * uspoof_setAllowedChars(sc.getAlias(), allowed.toUSet(), &status);
263
+ * uspoof_setRestrictionLevel(sc.getAlias(), USPOOF_MODERATELY_RESTRICTIVE);
264
+ *
265
+ * LocalUSpoofCheckResultPointer checkResult(uspoof_openCheckResult(&status));
266
+ * int32_t bitmask = uspoof_check2UnicodeString(sc.getAlias(), str, checkResult.getAlias(), &status);
267
+ *
268
+ * int32_t failures1 = bitmask;
269
+ * int32_t failures2 = uspoof_getCheckResultChecks(checkResult.getAlias(), &status);
270
+ * assert(failures1 == failures2);
271
+ * // checks that failed: 0x00000010 (status: U_ZERO_ERROR)
272
+ * printf("checks that failed: %#010x (status: %s)\n", failures1, u_errorName(status));
273
+ *
274
+ * // Explicit cleanup not necessary.
275
+ * \endcode
276
+ *
277
+ * <p>
278
+ * The return value is a bitmask of the checks that failed. In this case, there was one check that failed:
279
+ * {@link USPOOF_RESTRICTION_LEVEL}, corresponding to the fifth bit (16). The possible checks are:
280
+ *
281
+ * <ul>
282
+ * <li><code>RESTRICTION_LEVEL</code>: flags strings that violate the
283
+ * <a href="http://unicode.org/reports/tr39/#Restriction_Level_Detection">Restriction Level</a> test as specified in UTS
284
+ * 39; in most cases, this means flagging strings that contain characters from multiple different scripts.</li>
285
+ * <li><code>INVISIBLE</code>: flags strings that contain invisible characters, such as zero-width spaces, or character
286
+ * sequences that are likely not to display, such as multiple occurrences of the same non-spacing mark.</li>
287
+ * <li><code>CHAR_LIMIT</code>: flags strings that contain characters outside of a specified set of acceptable
288
+ * characters. See {@link uspoof_setAllowedChars} and {@link uspoof_setAllowedLocales}.</li>
289
+ * <li><code>MIXED_NUMBERS</code>: flags strings that contain digits from multiple different numbering systems.</li>
290
+ * </ul>
291
+ *
292
+ * <p>
293
+ * These checks can be enabled independently of each other. For example, if you were interested in checking for only the
294
+ * INVISIBLE and MIXED_NUMBERS conditions, you could do:
295
+ *
296
+ * \code{.c}
297
+ * UErrorCode status = U_ZERO_ERROR;
298
+ * UChar* str = (UChar*) u"8\u09EA"; // 8 mixed with U+09EA BENGALI DIGIT FOUR
299
+ *
300
+ * USpoofChecker* sc = uspoof_open(&status);
301
+ * uspoof_setChecks(sc, USPOOF_INVISIBLE | USPOOF_MIXED_NUMBERS, &status);
302
+ *
303
+ * int32_t bitmask = uspoof_check2(sc, str, -1, NULL, &status);
304
+ * UBool result = bitmask != 0;
305
+ * // fails checks: 1 (status: U_ZERO_ERROR)
306
+ * printf("fails checks: %d (status: %s)\n", result, u_errorName(status));
307
+ * uspoof_close(sc);
308
+ * \endcode
309
+ *
310
+ * <p>
311
+ * Here is an example in C++ showing how to compute the restriction level of a string:
312
+ *
313
+ * \code{.cpp}
314
+ * UErrorCode status = U_ZERO_ERROR;
315
+ * UnicodeString str((UChar*) u"p\u0430ypal"); // with U+0430 CYRILLIC SMALL LETTER A
316
+ *
317
+ * // Get the default set of allowable characters:
318
+ * UnicodeSet allowed;
319
+ * allowed.addAll(*uspoof_getRecommendedUnicodeSet(&status));
320
+ * allowed.addAll(*uspoof_getInclusionUnicodeSet(&status));
321
+ *
322
+ * LocalUSpoofCheckerPointer sc(uspoof_open(&status));
323
+ * uspoof_setAllowedChars(sc.getAlias(), allowed.toUSet(), &status);
324
+ * uspoof_setRestrictionLevel(sc.getAlias(), USPOOF_MODERATELY_RESTRICTIVE);
325
+ * uspoof_setChecks(sc.getAlias(), USPOOF_RESTRICTION_LEVEL | USPOOF_AUX_INFO, &status);
326
+ *
327
+ * LocalUSpoofCheckResultPointer checkResult(uspoof_openCheckResult(&status));
328
+ * int32_t bitmask = uspoof_check2UnicodeString(sc.getAlias(), str, checkResult.getAlias(), &status);
329
+ *
330
+ * URestrictionLevel restrictionLevel = uspoof_getCheckResultRestrictionLevel(checkResult.getAlias(), &status);
331
+ * // Since USPOOF_AUX_INFO was enabled, the restriction level is also available in the upper bits of the bitmask:
332
+ * assert((restrictionLevel & bitmask) == restrictionLevel);
333
+ * // Restriction level: 0x50000000 (status: U_ZERO_ERROR)
334
+ * printf("Restriction level: %#010x (status: %s)\n", restrictionLevel, u_errorName(status));
335
+ * \endcode
336
+ *
337
+ * <p>
338
+ * The code '0x50000000' corresponds to the restriction level USPOOF_MINIMALLY_RESTRICTIVE. Since
339
+ * USPOOF_MINIMALLY_RESTRICTIVE is weaker than USPOOF_MODERATELY_RESTRICTIVE, the string fails the check.
340
+ *
341
+ * <p>
342
+ * <b>Note:</b> The Restriction Level is the most powerful of the checks. The full logic is documented in
343
+ * <a href="http://unicode.org/reports/tr39/#Restriction_Level_Detection">UTS 39</a>, but the basic idea is that strings
344
+ * are restricted to contain characters from only a single script, <em>except</em> that most scripts are allowed to have
345
+ * Latin characters interspersed. Although the default restriction level is <code>HIGHLY_RESTRICTIVE</code>, it is
346
+ * recommended that users set their restriction level to <code>MODERATELY_RESTRICTIVE</code>, which allows Latin mixed
347
+ * with all other scripts except Cyrillic, Greek, and Cherokee, with which it is often confusable. For more details on
348
+ * the levels, see UTS 39 or {@link URestrictionLevel}. The Restriction Level test is aware of the set of
349
+ * allowed characters set in {@link uspoof_setAllowedChars}. Note that characters which have script code
350
+ * COMMON or INHERITED, such as numbers and punctuation, are ignored when computing whether a string has multiple
351
+ * scripts.
352
+ *
353
+ * <h2>Additional Information</h2>
354
+ *
355
+ * <p>
356
+ * A <code>USpoofChecker</code> instance may be used repeatedly to perform checks on any number of identifiers.
357
+ *
358
+ * <p>
359
+ * <b>Thread Safety:</b> The test functions for checking a single identifier, or for testing whether
360
+ * two identifiers are possible confusable, are thread safe. They may called concurrently, from multiple threads,
361
+ * using the same USpoofChecker instance.
362
+ *
363
+ * <p>
364
+ * More generally, the standard ICU thread safety rules apply: functions that take a const USpoofChecker parameter are
365
+ * thread safe. Those that take a non-const USpoofChecker are not thread safe..
366
+ *
367
+ * @stable ICU 4.6
150
368
  */
151
369
 
152
370
  struct USpoofChecker;
153
371
  typedef struct USpoofChecker USpoofChecker; /**< typedef for C of USpoofChecker */
154
372
 
373
+ #ifndef U_HIDE_DRAFT_API
374
+ /**
375
+ * @see uspoof_openCheckResult
376
+ */
377
+ struct USpoofCheckResult;
378
+ /**
379
+ * @see uspoof_openCheckResult
380
+ */
381
+ typedef struct USpoofCheckResult USpoofCheckResult;
382
+ #endif /* U_HIDE_DRAFT_API */
383
+
155
384
  /**
156
385
  * Enum for the kinds of checks that USpoofChecker can perform.
157
386
  * These enum values are used both to select the set of checks that
@@ -160,45 +389,61 @@ typedef struct USpoofChecker USpoofChecker; /**< typedef for C of USpoofChecker
160
389
  * @stable ICU 4.2
161
390
  */
162
391
  typedef enum USpoofChecks {
163
- /** Single script confusable test.
164
- * When testing whether two identifiers are confusable, report that they are if
165
- * both are from the same script and they are visually confusable.
166
- * Note: this test is not applicable to a check of a single identifier.
167
- */
392
+ /**
393
+ * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
394
+ * that the two strings are visually confusable and that they are from the same script, according to UTS 39 section
395
+ * 4.
396
+ *
397
+ * @see uspoof_areConfusable
398
+ * @stable ICU 4.2
399
+ */
168
400
  USPOOF_SINGLE_SCRIPT_CONFUSABLE = 1,
169
401
 
170
- /** Mixed script confusable test.
171
- * When checking a single identifier, report a problem if
172
- * the identifier contains multiple scripts, and
173
- * is confusable with some other identifier in a single script
174
- * When testing whether two identifiers are confusable, report that they are if
175
- * the two IDs are visually confusable,
176
- * and at least one contains characters from more than one script.
402
+ /**
403
+ * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
404
+ * that the two strings are visually confusable and that they are <b>not</b> from the same script, according to UTS
405
+ * 39 section 4.
406
+ *
407
+ * @see uspoof_areConfusable
408
+ * @stable ICU 4.2
177
409
  */
178
410
  USPOOF_MIXED_SCRIPT_CONFUSABLE = 2,
179
411
 
180
- /** Whole script confusable test.
181
- * When checking a single identifier, report a problem if
182
- * The identifier is of a single script, and
183
- * there exists a confusable identifier in another script.
184
- * When testing whether two identifiers are confusable, report that they are if
185
- * each is of a single script,
186
- * the scripts of the two identifiers are different, and
187
- * the identifiers are visually confusable.
412
+ /**
413
+ * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
414
+ * that the two strings are visually confusable and that they are not from the same script but both of them are
415
+ * single-script strings, according to UTS 39 section 4.
416
+ *
417
+ * @see uspoof_areConfusable
418
+ * @stable ICU 4.2
188
419
  */
189
420
  USPOOF_WHOLE_SCRIPT_CONFUSABLE = 4,
190
-
191
- /** Any Case Modifier for confusable identifier tests.
192
- If specified, consider all characters, of any case, when looking for confusables.
193
- If USPOOF_ANY_CASE is not specified, identifiers being checked are assumed to have been
194
- case folded. Upper case confusable characters will not be checked.
195
- Selects between Lower Case Confusable and
196
- Any Case Confusable. */
421
+
422
+ #ifndef U_HIDE_DRAFT_API
423
+ /**
424
+ * Enable this flag in {@link uspoof_setChecks} to turn on all types of confusables. You may set
425
+ * the checks to some subset of SINGLE_SCRIPT_CONFUSABLE, MIXED_SCRIPT_CONFUSABLE, or WHOLE_SCRIPT_CONFUSABLE to
426
+ * make {@link uspoof_areConfusable} return only those types of confusables.
427
+ *
428
+ * @see uspoof_areConfusable
429
+ * @see uspoof_getSkeleton
430
+ * @draft ICU 58
431
+ */
432
+ USPOOF_CONFUSABLE = USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE,
433
+ #endif /* U_HIDE_DRAFT_API */
434
+
435
+ #ifndef U_HIDE_DEPRECATED_API
436
+ /**
437
+ * This flag is deprecated and no longer affects the behavior of SpoofChecker.
438
+ *
439
+ * @deprecated ICU 58 Any case confusable mappings were removed from UTS 39; the corresponding ICU API was deprecated.
440
+ */
197
441
  USPOOF_ANY_CASE = 8,
442
+ #endif /* U_HIDE_DEPRECATED_API */
198
443
 
199
444
  /**
200
445
  * Check that an identifier is no looser than the specified RestrictionLevel.
201
- * The default if uspoof_setRestrctionLevel() is not called is HIGHLY_RESTRICTIVE.
446
+ * The default if {@link uspoof_setRestrictionLevel} is not called is HIGHLY_RESTRICTIVE.
202
447
  *
203
448
  * If USPOOF_AUX_INFO is enabled the actual restriction level of the
204
449
  * identifier being tested will also be returned by uspoof_check().
@@ -211,7 +456,7 @@ typedef enum USpoofChecks {
211
456
  */
212
457
  USPOOF_RESTRICTION_LEVEL = 16,
213
458
 
214
- #ifndef U_HIDE_DEPRECATED_API
459
+ #ifndef U_HIDE_DEPRECATED_API
215
460
  /** Check that an identifier contains only characters from a
216
461
  * single script (plus chars from the common and inherited scripts.)
217
462
  * Applies to checks of a single identifier check only.
@@ -219,7 +464,7 @@ typedef enum USpoofChecks {
219
464
  */
220
465
  USPOOF_SINGLE_SCRIPT = USPOOF_RESTRICTION_LEVEL,
221
466
  #endif /* U_HIDE_DEPRECATED_API */
222
-
467
+
223
468
  /** Check an identifier for the presence of invisible characters,
224
469
  * such as zero-width spaces, or character sequences that are
225
470
  * likely not to display, such as multiple occurrences of the same
@@ -229,97 +474,119 @@ typedef enum USpoofChecks {
229
474
  USPOOF_INVISIBLE = 32,
230
475
 
231
476
  /** Check that an identifier contains only characters from a specified set
232
- * of acceptable characters. See uspoof_setAllowedChars() and
233
- * uspoof_setAllowedLocales().
477
+ * of acceptable characters. See {@link uspoof_setAllowedChars} and
478
+ * {@link uspoof_setAllowedLocales}. Note that a string that fails this check
479
+ * will also fail the {@link USPOOF_RESTRICTION_LEVEL} check.
234
480
  */
235
481
  USPOOF_CHAR_LIMIT = 64,
236
482
 
237
483
  /**
238
- * Check that an identifier does not include decimal digits from
239
- * more than one numbering system.
240
- *
484
+ * Check that an identifier does not mix numbers from different numbering systems.
485
+ * For more information, see UTS 39 section 5.3.
486
+ *
241
487
  * @stable ICU 51
242
488
  */
243
489
  USPOOF_MIXED_NUMBERS = 128,
244
490
 
245
491
  /**
246
492
  * Enable all spoof checks.
247
- *
493
+ *
248
494
  * @stable ICU 4.6
249
495
  */
250
496
  USPOOF_ALL_CHECKS = 0xFFFF,
251
497
 
252
498
  /**
253
499
  * Enable the return of auxillary (non-error) information in the
254
- * upper bits of the check results value.
500
+ * upper bits of the check results value.
255
501
  *
256
- * If this "check" is not enabled, the results of uspoof_check() will be zero when an
257
- * identifier passes all of the enabled checks.
502
+ * If this "check" is not enabled, the results of {@link uspoof_check} will be
503
+ * zero when an identifier passes all of the enabled checks.
258
504
  *
259
- * If this "check" is enabled, (uspoof_check() & USPOOF_ALL_CHECKS) will be zero
260
- * when an identifier passes all checks.
505
+ * If this "check" is enabled, (uspoof_check() & {@link USPOOF_ALL_CHECKS}) will
506
+ * be zero when an identifier passes all checks.
261
507
  *
262
508
  * @stable ICU 51
263
509
  */
264
510
  USPOOF_AUX_INFO = 0x40000000
265
511
 
266
512
  } USpoofChecks;
267
-
268
-
513
+
514
+
269
515
  /**
270
- * Constants from UAX #39 for use in setRestrictionLevel(), and
516
+ * Constants from UAX #39 for use in {@link uspoof_setRestrictionLevel}, and
271
517
  * for returned identifier restriction levels in check results.
518
+ *
272
519
  * @stable ICU 51
520
+ *
521
+ * @see uspoof_setRestrictionLevel
522
+ * @see uspoof_check
273
523
  */
274
524
  typedef enum URestrictionLevel {
275
525
  /**
276
- * Only ASCII characters: U+0000..U+007F
277
- *
526
+ * All characters in the string are in the identifier profile and all characters in the string are in the
527
+ * ASCII range.
528
+ *
278
529
  * @stable ICU 51
279
530
  */
280
531
  USPOOF_ASCII = 0x10000000,
281
532
  /**
282
- * All characters in each identifier must be from a single script.
283
- *
284
- * @stable ICU 53
285
- */
533
+ * The string classifies as ASCII-Only, or all characters in the string are in the identifier profile and
534
+ * the string is single-script, according to the definition in UTS 39 section 5.1.
535
+ *
536
+ * @stable ICU 53
537
+ */
286
538
  USPOOF_SINGLE_SCRIPT_RESTRICTIVE = 0x20000000,
287
539
  /**
288
- * All characters in each identifier must be from a single script, or from the combinations: Latin + Han +
289
- * Hiragana + Katakana; Latin + Han + Bopomofo; or Latin + Han + Hangul. Note that this level will satisfy the
290
- * vast majority of Latin-script users; also that TR36 has ASCII instead of Latin.
291
- *
540
+ * The string classifies as Single Script, or all characters in the string are in the identifier profile and
541
+ * the string is covered by any of the following sets of scripts, according to the definition in UTS 39
542
+ * section 5.1:
543
+ * <ul>
544
+ * <li>Latin + Han + Bopomofo (or equivalently: Latn + Hanb)</li>
545
+ * <li>Latin + Han + Hiragana + Katakana (or equivalently: Latn + Jpan)</li>
546
+ * <li>Latin + Han + Hangul (or equivalently: Latn +Kore)</li>
547
+ * </ul>
548
+ * This is the default restriction in ICU.
549
+ *
292
550
  * @stable ICU 51
293
551
  */
294
552
  USPOOF_HIGHLY_RESTRICTIVE = 0x30000000,
295
553
  /**
296
- * Allow Latin with other scripts except Cyrillic, Greek, Cherokee Otherwise, the same as Highly Restrictive
297
- *
554
+ * The string classifies as Highly Restrictive, or all characters in the string are in the identifier profile
555
+ * and the string is covered by Latin and any one other Recommended or Aspirational script, except Cyrillic,
556
+ * Greek, and Cherokee.
557
+ *
298
558
  * @stable ICU 51
299
559
  */
300
560
  USPOOF_MODERATELY_RESTRICTIVE = 0x40000000,
301
561
  /**
302
- * Allow arbitrary mixtures of scripts. Otherwise, the same as Moderately Restrictive.
303
- *
562
+ * All characters in the string are in the identifier profile. Allow arbitrary mixtures of scripts.
563
+ *
304
564
  * @stable ICU 51
305
565
  */
306
566
  USPOOF_MINIMALLY_RESTRICTIVE = 0x50000000,
307
567
  /**
308
568
  * Any valid identifiers, including characters outside of the Identifier Profile.
309
- *
569
+ *
310
570
  * @stable ICU 51
311
571
  */
312
572
  USPOOF_UNRESTRICTIVE = 0x60000000,
313
573
  /**
314
- * Mask for selecting the Restriction Level bits from the return value of uspoof_check().
315
- *
316
- * @stable ICU 53
317
- */
318
- USPOOF_RESTRICTION_LEVEL_MASK = 0x7F000000
574
+ * Mask for selecting the Restriction Level bits from the return value of {@link uspoof_check}.
575
+ *
576
+ * @stable ICU 53
577
+ */
578
+ USPOOF_RESTRICTION_LEVEL_MASK = 0x7F000000,
579
+ #ifndef U_HIDE_INTERNAL_API
580
+ /**
581
+ * An undefined restriction level.
582
+ * @internal
583
+ */
584
+ USPOOF_UNDEFINED_RESTRICTIVE = -1
585
+ #endif /* U_HIDE_INTERNAL_API */
319
586
  } URestrictionLevel;
320
587
 
321
588
  /**
322
- * Create a Unicode Spoof Checker, configured to perform all
589
+ * Create a Unicode Spoof Checker, configured to perform all
323
590
  * checks except for USPOOF_LOCALE_LIMIT and USPOOF_CHAR_LIMIT.
324
591
  * Note that additional checks may be added in the future,
325
592
  * resulting in the changes to the default checking behavior.
@@ -359,10 +626,10 @@ uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLeng
359
626
 
360
627
  /**
361
628
  * Open a Spoof Checker from the source form of the spoof data.
362
- * The two inputs correspond to the Unicode data files confusables.txt
363
- * and confusablesWholeScript.txt as described in Unicode UAX #39.
364
- * The syntax of the source data is as described in UAX #39 for
365
- * these files, and the content of these files is acceptable input.
629
+ * The input corresponds to the Unicode data file confusables.txt
630
+ * as described in Unicode UAX #39. The syntax of the source data
631
+ * is as described in UAX #39 for this file, and the content of
632
+ * this file is acceptable input.
366
633
  *
367
634
  * The character encoding of the (char *) input text is UTF-8.
368
635
  *
@@ -371,10 +638,9 @@ uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLeng
371
638
  * @param confusablesLen The length of the confusables text, or -1 if the
372
639
  * input string is zero terminated.
373
640
  * @param confusablesWholeScript
374
- * a pointer to the whole script confusables definitions,
375
- * as found in the file confusablesWholeScript.txt from unicode.org.
376
- * @param confusablesWholeScriptLen The length of the whole script confusables text, or
377
- * -1 if the input string is zero terminated.
641
+ * Deprecated in ICU 58. No longer used.
642
+ * @param confusablesWholeScriptLen
643
+ * Deprecated in ICU 58. No longer used.
378
644
  * @param errType In the event of an error in the input, indicates
379
645
  * which of the input files contains the error.
380
646
  * The value is one of USPOOF_SINGLE_SCRIPT_CONFUSABLE or
@@ -435,8 +701,33 @@ uspoof_clone(const USpoofChecker *sc, UErrorCode *status);
435
701
 
436
702
 
437
703
  /**
438
- * Specify the set of checks that will be performed by the check
439
- * functions of this Spoof Checker.
704
+ * Specify the bitmask of checks that will be performed by {@link uspoof_check}. Calling this method
705
+ * overwrites any checks that may have already been enabled. By default, all checks are enabled.
706
+ *
707
+ * To enable specific checks and disable all others, the "whitelisted" checks should be ORed together. For
708
+ * example, to fail strings containing characters outside of the set specified by {@link uspoof_setAllowedChars} and
709
+ * also strings that contain digits from mixed numbering systems:
710
+ *
711
+ * <pre>
712
+ * {@code
713
+ * uspoof_setChecks(USPOOF_CHAR_LIMIT | USPOOF_MIXED_NUMBERS);
714
+ * }
715
+ * </pre>
716
+ *
717
+ * To disable specific checks and enable all others, the "blacklisted" checks should be ANDed away from
718
+ * ALL_CHECKS. For example, if you are not planning to use the {@link uspoof_areConfusable} functionality,
719
+ * it is good practice to disable the CONFUSABLE check:
720
+ *
721
+ * <pre>
722
+ * {@code
723
+ * uspoof_setChecks(USPOOF_ALL_CHECKS & ~USPOOF_CONFUSABLE);
724
+ * }
725
+ * </pre>
726
+ *
727
+ * Note that methods such as {@link uspoof_setAllowedChars}, {@link uspoof_setAllowedLocales}, and
728
+ * {@link uspoof_setRestrictionLevel} will enable certain checks when called. Those methods will OR the check they
729
+ * enable onto the existing bitmask specified by this method. For more details, see the documentation of those
730
+ * methods.
440
731
  *
441
732
  * @param sc The USpoofChecker
442
733
  * @param checks The set of checks that this spoof checker will perform.
@@ -451,7 +742,7 @@ uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status);
451
742
 
452
743
  /**
453
744
  * Get the set of checks that this Spoof Checker has been configured to perform.
454
- *
745
+ *
455
746
  * @param sc The USpoofChecker
456
747
  * @param status The error code, set if this function encounters a problem.
457
748
  * @return The set of checks that this spoof checker will perform.
@@ -464,19 +755,22 @@ U_STABLE int32_t U_EXPORT2
464
755
  uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status);
465
756
 
466
757
  /**
467
- * Set the loosest restriction level allowed. The default if this function
468
- * is not called is HIGHLY_RESTRICTIVE.
469
- * Calling this function also enables the RESTRICTION_LEVEL check.
470
- * @param restrictionLevel The loosest restriction level allowed.
471
- * @see URestrictionLevel
472
- * @stable ICU 51
473
- */
758
+ * Set the loosest restriction level allowed for strings. The default if this is not called is
759
+ * {@link USPOOF_HIGHLY_RESTRICTIVE}. Calling this method enables the {@link USPOOF_RESTRICTION_LEVEL} and
760
+ * {@link USPOOF_MIXED_NUMBERS} checks, corresponding to Sections 5.1 and 5.2 of UTS 39. To customize which checks are
761
+ * to be performed by {@link uspoof_check}, see {@link uspoof_setChecks}.
762
+ *
763
+ * @param sc The USpoofChecker
764
+ * @param restrictionLevel The loosest restriction level allowed.
765
+ * @see URestrictionLevel
766
+ * @stable ICU 51
767
+ */
474
768
  U_STABLE void U_EXPORT2
475
769
  uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel);
476
770
 
477
771
 
478
772
  /**
479
- * Get the Restriction Level that will be tested if the checks include RESTRICTION_LEVEL.
773
+ * Get the Restriction Level that will be tested if the checks include {@link USPOOF_RESTRICTION_LEVEL}.
480
774
  *
481
775
  * @return The restriction level
482
776
  * @see URestrictionLevel
@@ -486,7 +780,7 @@ U_STABLE URestrictionLevel U_EXPORT2
486
780
  uspoof_getRestrictionLevel(const USpoofChecker *sc);
487
781
 
488
782
  /**
489
- * Limit characters that are acceptable in identifiers being checked to those
783
+ * Limit characters that are acceptable in identifiers being checked to those
490
784
  * normally used with the languages associated with the specified locales.
491
785
  * Any previously specified list of locales is replaced by the new settings.
492
786
  *
@@ -499,7 +793,7 @@ uspoof_getRestrictionLevel(const USpoofChecker *sc);
499
793
  * Supplying an empty string removes all restrictions;
500
794
  * characters from any script will be allowed.
501
795
  *
502
- * The USPOOF_CHAR_LIMIT test is automatically enabled for this
796
+ * The {@link USPOOF_CHAR_LIMIT} test is automatically enabled for this
503
797
  * USpoofChecker when calling this function with a non-empty list
504
798
  * of locales.
505
799
  *
@@ -511,9 +805,9 @@ uspoof_getRestrictionLevel(const USpoofChecker *sc);
511
805
  * can be made to the result of uspoof_setAllowedLocales() by
512
806
  * fetching the resulting set with uspoof_getAllowedChars(),
513
807
  * manipulating it with the Unicode Set API, then resetting the
514
- * spoof detectors limits with uspoof_setAllowedChars()
808
+ * spoof detectors limits with uspoof_setAllowedChars().
515
809
  *
516
- * @param sc The USpoofChecker
810
+ * @param sc The USpoofChecker
517
811
  * @param localesList A list list of locales, from which the language
518
812
  * and associated script are extracted. The locales
519
813
  * are comma-separated if there is more than one.
@@ -537,18 +831,18 @@ uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode
537
831
  *
538
832
  * uspoof_setAllowedChars() will reset the list of allowed to be empty.
539
833
  *
540
- * The format of the returned list is the same as that supplied to
541
- * uspoof_setAllowedLocales(), but returned list may not be identical
542
- * to the originally specified string; the string may be reformatted,
834
+ * The format of the returned list is the same as that supplied to
835
+ * uspoof_setAllowedLocales(), but returned list may not be identical
836
+ * to the originally specified string; the string may be reformatted,
543
837
  * and information other than languages from
544
838
  * the originally specified locales may be omitted.
545
839
  *
546
- * @param sc The USpoofChecker
840
+ * @param sc The USpoofChecker
547
841
  * @param status The error code, set if this function encounters a problem.
548
842
  * @return A string containing a list of locales corresponding
549
843
  * to the acceptable scripts, formatted like an
550
844
  * HTTP Accept Language value.
551
- *
845
+ *
552
846
  * @stable ICU 4.2
553
847
  */
554
848
  U_STABLE const char * U_EXPORT2
@@ -564,7 +858,7 @@ uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status);
564
858
  * The USPOOF_CHAR_LIMIT test is automatically enabled for this
565
859
  * USpoofChecker by this function.
566
860
  *
567
- * @param sc The USpoofChecker
861
+ * @param sc The USpoofChecker
568
862
  * @param chars A Unicode Set containing the list of
569
863
  * characters that are permitted. Ownership of the set
570
864
  * remains with the caller. The incoming set is cloned by
@@ -591,7 +885,7 @@ uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status)
591
885
  * or if a new set of allowed characters is specified.
592
886
  *
593
887
  *
594
- * @param sc The USpoofChecker
888
+ * @param sc The USpoofChecker
595
889
  * @param status The error code, set if this function encounters a problem.
596
890
  * @return A USet containing the characters that are permitted by
597
891
  * the USPOOF_CHAR_LIMIT test.
@@ -611,7 +905,7 @@ uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status);
611
905
  * The USPOOF_CHAR_LIMIT test is automatically enabled for this
612
906
  * USoofChecker by this function.
613
907
  *
614
- * @param sc The USpoofChecker
908
+ * @param sc The USpoofChecker
615
909
  * @param chars A Unicode Set containing the list of
616
910
  * characters that are permitted. Ownership of the set
617
911
  * remains with the caller. The incoming set is cloned by
@@ -626,7 +920,7 @@ uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const icu::UnicodeSet *chars, UEr
626
920
 
627
921
  /**
628
922
  * Get a UnicodeSet for the characters permitted in an identifier.
629
- * This corresponds to the limits imposed by the Set Allowed Characters /
923
+ * This corresponds to the limits imposed by the Set Allowed Characters /
630
924
  * UnicodeSet functions. Limitations imposed by other checks will not be
631
925
  * reflected in the set returned by this function.
632
926
  *
@@ -638,7 +932,7 @@ uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const icu::UnicodeSet *chars, UEr
638
932
  * or if a new set of allowed characters is specified.
639
933
  *
640
934
  *
641
- * @param sc The USpoofChecker
935
+ * @param sc The USpoofChecker
642
936
  * @param status The error code, set if this function encounters a problem.
643
937
  * @return A UnicodeSet containing the characters that are permitted by
644
938
  * the USPOOF_CHAR_LIMIT test.
@@ -653,17 +947,22 @@ uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status);
653
947
  * Check the specified string for possible security issues.
654
948
  * The text to be checked will typically be an identifier of some sort.
655
949
  * The set of checks to be performed is specified with uspoof_setChecks().
656
- *
657
- * @param sc The USpoofChecker
950
+ *
951
+ * \note
952
+ * Consider using the newer API, {@link uspoof_check2}, instead.
953
+ * The newer API exposes additional information from the check procedure
954
+ * and is otherwise identical to this method.
955
+ *
956
+ * @param sc The USpoofChecker
658
957
  * @param id The identifier to be checked for possible security issues,
659
958
  * in UTF-16 format.
660
959
  * @param length the length of the string to be checked, expressed in
661
- * 16 bit UTF-16 code units, or -1 if the string is
960
+ * 16 bit UTF-16 code units, or -1 if the string is
662
961
  * zero terminated.
663
- * @param position An out parameter.
664
- * Originally, the index of the first string position that failed a check.
665
- * Now, always returns zero.
666
- * This parameter may be null.
962
+ * @param position Deprecated in ICU 51. Always returns zero.
963
+ * Originally, an out parameter for the index of the first
964
+ * string position that failed a check.
965
+ * This parameter may be NULL.
667
966
  * @param status The error code, set if an error occurred while attempting to
668
967
  * perform the check.
669
968
  * Spoofing or security issues detected with the input string are
@@ -673,11 +972,12 @@ uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status);
673
972
  * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
674
973
  * will be zero if the input string passes all of the
675
974
  * enabled checks.
975
+ * @see uspoof_check2
676
976
  * @stable ICU 4.2
677
977
  */
678
978
  U_STABLE int32_t U_EXPORT2
679
979
  uspoof_check(const USpoofChecker *sc,
680
- const UChar *id, int32_t length,
980
+ const UChar *id, int32_t length,
681
981
  int32_t *position,
682
982
  UErrorCode *status);
683
983
 
@@ -686,16 +986,20 @@ uspoof_check(const USpoofChecker *sc,
686
986
  * Check the specified string for possible security issues.
687
987
  * The text to be checked will typically be an identifier of some sort.
688
988
  * The set of checks to be performed is specified with uspoof_setChecks().
689
- *
690
- * @param sc The USpoofChecker
989
+ *
990
+ * \note
991
+ * Consider using the newer API, {@link uspoof_check2UTF8}, instead.
992
+ * The newer API exposes additional information from the check procedure
993
+ * and is otherwise identical to this method.
994
+ *
995
+ * @param sc The USpoofChecker
691
996
  * @param id A identifier to be checked for possible security issues, in UTF8 format.
692
- * @param length the length of the string to be checked, or -1 if the string is
997
+ * @param length the length of the string to be checked, or -1 if the string is
693
998
  * zero terminated.
694
- * @param position An out parameter.
695
- * Originally, the index of the first string position that failed a check.
696
- * Now, always returns zero.
697
- * This parameter may be null.
698
- * @deprecated ICU 51
999
+ * @param position Deprecated in ICU 51. Always returns zero.
1000
+ * Originally, an out parameter for the index of the first
1001
+ * string position that failed a check.
1002
+ * This parameter may be NULL.
699
1003
  * @param status The error code, set if an error occurred while attempting to
700
1004
  * perform the check.
701
1005
  * Spoofing or security issues detected with the input string are
@@ -707,6 +1011,7 @@ uspoof_check(const USpoofChecker *sc,
707
1011
  * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
708
1012
  * will be zero if the input string passes all of the
709
1013
  * enabled checks.
1014
+ * @see uspoof_check2UTF8
710
1015
  * @stable ICU 4.2
711
1016
  */
712
1017
  U_STABLE int32_t U_EXPORT2
@@ -721,14 +1026,18 @@ uspoof_checkUTF8(const USpoofChecker *sc,
721
1026
  * Check the specified string for possible security issues.
722
1027
  * The text to be checked will typically be an identifier of some sort.
723
1028
  * The set of checks to be performed is specified with uspoof_setChecks().
724
- *
725
- * @param sc The USpoofChecker
1029
+ *
1030
+ * \note
1031
+ * Consider using the newer API, {@link uspoof_check2UnicodeString}, instead.
1032
+ * The newer API exposes additional information from the check procedure
1033
+ * and is otherwise identical to this method.
1034
+ *
1035
+ * @param sc The USpoofChecker
726
1036
  * @param id A identifier to be checked for possible security issues.
727
- * @param position An out parameter.
728
- * Originally, the index of the first string position that failed a check.
729
- * Now, always returns zero.
730
- * This parameter may be null.
731
- * @deprecated ICU 51
1037
+ * @param position Deprecated in ICU 51. Always returns zero.
1038
+ * Originally, an out parameter for the index of the first
1039
+ * string position that failed a check.
1040
+ * This parameter may be NULL.
732
1041
  * @param status The error code, set if an error occurred while attempting to
733
1042
  * perform the check.
734
1043
  * Spoofing or security issues detected with the input string are
@@ -738,45 +1047,249 @@ uspoof_checkUTF8(const USpoofChecker *sc,
738
1047
  * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
739
1048
  * will be zero if the input string passes all of the
740
1049
  * enabled checks.
1050
+ * @see uspoof_check2UnicodeString
741
1051
  * @stable ICU 4.2
742
1052
  */
743
1053
  U_STABLE int32_t U_EXPORT2
744
1054
  uspoof_checkUnicodeString(const USpoofChecker *sc,
745
- const icu::UnicodeString &id,
1055
+ const icu::UnicodeString &id,
746
1056
  int32_t *position,
747
1057
  UErrorCode *status);
1058
+ #endif
1059
+
1060
+
1061
+ #ifndef U_HIDE_DRAFT_API
1062
+ /**
1063
+ * Check the specified string for possible security issues.
1064
+ * The text to be checked will typically be an identifier of some sort.
1065
+ * The set of checks to be performed is specified with uspoof_setChecks().
1066
+ *
1067
+ * @param sc The USpoofChecker
1068
+ * @param id The identifier to be checked for possible security issues,
1069
+ * in UTF-16 format.
1070
+ * @param length the length of the string to be checked, or -1 if the string is
1071
+ * zero terminated.
1072
+ * @param checkResult An instance of USpoofCheckResult to be filled with
1073
+ * details about the identifier. Can be NULL.
1074
+ * @param status The error code, set if an error occurred while attempting to
1075
+ * perform the check.
1076
+ * Spoofing or security issues detected with the input string are
1077
+ * not reported here, but through the function's return value.
1078
+ * @return An integer value with bits set for any potential security
1079
+ * or spoofing issues detected. The bits are defined by
1080
+ * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
1081
+ * will be zero if the input string passes all of the
1082
+ * enabled checks. Any information in this bitmask will be
1083
+ * consistent with the information saved in the optional
1084
+ * checkResult parameter.
1085
+ * @see uspoof_openCheckResult
1086
+ * @see uspoof_check2UTF8
1087
+ * @see uspoof_check2UnicodeString
1088
+ * @draft ICU 58
1089
+ */
1090
+ U_DRAFT int32_t U_EXPORT2
1091
+ uspoof_check2(const USpoofChecker *sc,
1092
+ const UChar* id, int32_t length,
1093
+ USpoofCheckResult* checkResult,
1094
+ UErrorCode *status);
748
1095
 
1096
+ /**
1097
+ * Check the specified string for possible security issues.
1098
+ * The text to be checked will typically be an identifier of some sort.
1099
+ * The set of checks to be performed is specified with uspoof_setChecks().
1100
+ *
1101
+ * This version of {@link uspoof_check} accepts a USpoofCheckResult, which
1102
+ * returns additional information about the identifier. For more
1103
+ * information, see {@link uspoof_openCheckResult}.
1104
+ *
1105
+ * @param sc The USpoofChecker
1106
+ * @param id A identifier to be checked for possible security issues, in UTF8 format.
1107
+ * @param length the length of the string to be checked, or -1 if the string is
1108
+ * zero terminated.
1109
+ * @param checkResult An instance of USpoofCheckResult to be filled with
1110
+ * details about the identifier. Can be NULL.
1111
+ * @param status The error code, set if an error occurred while attempting to
1112
+ * perform the check.
1113
+ * Spoofing or security issues detected with the input string are
1114
+ * not reported here, but through the function's return value.
1115
+ * @return An integer value with bits set for any potential security
1116
+ * or spoofing issues detected. The bits are defined by
1117
+ * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
1118
+ * will be zero if the input string passes all of the
1119
+ * enabled checks. Any information in this bitmask will be
1120
+ * consistent with the information saved in the optional
1121
+ * checkResult parameter.
1122
+ * @see uspoof_openCheckResult
1123
+ * @see uspoof_check2
1124
+ * @see uspoof_check2UnicodeString
1125
+ * @draft ICU 58
1126
+ */
1127
+ U_DRAFT int32_t U_EXPORT2
1128
+ uspoof_check2UTF8(const USpoofChecker *sc,
1129
+ const char *id, int32_t length,
1130
+ USpoofCheckResult* checkResult,
1131
+ UErrorCode *status);
1132
+
1133
+ #if U_SHOW_CPLUSPLUS_API
1134
+ /**
1135
+ * Check the specified string for possible security issues.
1136
+ * The text to be checked will typically be an identifier of some sort.
1137
+ * The set of checks to be performed is specified with uspoof_setChecks().
1138
+ *
1139
+ * @param sc The USpoofChecker
1140
+ * @param id A identifier to be checked for possible security issues.
1141
+ * @param checkResult An instance of USpoofCheckResult to be filled with
1142
+ * details about the identifier. Can be NULL.
1143
+ * @param status The error code, set if an error occurred while attempting to
1144
+ * perform the check.
1145
+ * Spoofing or security issues detected with the input string are
1146
+ * not reported here, but through the function's return value.
1147
+ * @return An integer value with bits set for any potential security
1148
+ * or spoofing issues detected. The bits are defined by
1149
+ * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
1150
+ * will be zero if the input string passes all of the
1151
+ * enabled checks. Any information in this bitmask will be
1152
+ * consistent with the information saved in the optional
1153
+ * checkResult parameter.
1154
+ * @see uspoof_openCheckResult
1155
+ * @see uspoof_check2
1156
+ * @see uspoof_check2UTF8
1157
+ * @draft ICU 58
1158
+ */
1159
+ U_DRAFT int32_t U_EXPORT2
1160
+ uspoof_check2UnicodeString(const USpoofChecker *sc,
1161
+ const icu::UnicodeString &id,
1162
+ USpoofCheckResult* checkResult,
1163
+ UErrorCode *status);
749
1164
  #endif
750
1165
 
1166
+ /**
1167
+ * Create a USpoofCheckResult, used by the {@link uspoof_check2} class of functions to return
1168
+ * information about the identifier. Information includes:
1169
+ * <ul>
1170
+ * <li>A bitmask of the checks that failed</li>
1171
+ * <li>The identifier's restriction level (UTS 39 section 5.2)</li>
1172
+ * <li>The set of numerics in the string (UTS 39 section 5.3)</li>
1173
+ * </ul>
1174
+ * The data held in a USpoofCheckResult is cleared whenever it is passed into a new call
1175
+ * of {@link uspoof_check2}.
1176
+ *
1177
+ * @param status The error code, set if this function encounters a problem.
1178
+ * @return the newly created USpoofCheckResult
1179
+ * @see uspoof_check2
1180
+ * @see uspoof_check2UTF8
1181
+ * @see uspoof_check2UnicodeString
1182
+ * @draft ICU 58
1183
+ */
1184
+ U_DRAFT USpoofCheckResult* U_EXPORT2
1185
+ uspoof_openCheckResult(UErrorCode *status);
1186
+
1187
+ /**
1188
+ * Close a USpoofCheckResult, freeing any memory that was being held by
1189
+ * its implementation.
1190
+ *
1191
+ * @param checkResult The instance of USpoofCheckResult to close
1192
+ * @draft ICU 58
1193
+ */
1194
+ U_DRAFT void U_EXPORT2
1195
+ uspoof_closeCheckResult(USpoofCheckResult *checkResult);
1196
+
1197
+ #if U_SHOW_CPLUSPLUS_API
1198
+
1199
+ U_NAMESPACE_BEGIN
1200
+
1201
+ /**
1202
+ * \class LocalUSpoofCheckResultPointer
1203
+ * "Smart pointer" class, closes a USpoofCheckResult via {@link uspoof_closeCheckResult}.
1204
+ * For most methods see the LocalPointerBase base class.
1205
+ *
1206
+ * @see LocalPointerBase
1207
+ * @see LocalPointer
1208
+ * @draft ICU 58
1209
+ */
1210
+ U_DEFINE_LOCAL_OPEN_POINTER(LocalUSpoofCheckResultPointer, USpoofCheckResult, uspoof_closeCheckResult);
1211
+
1212
+ U_NAMESPACE_END
1213
+
1214
+ #endif
1215
+
1216
+ /**
1217
+ * Indicates which of the spoof check(s) have failed. The value is a bitwise OR of the constants for the tests
1218
+ * in question: USPOOF_RESTRICTION_LEVEL, USPOOF_CHAR_LIMIT, and so on.
1219
+ *
1220
+ * @param checkResult The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
1221
+ * @param status The error code, set if an error occurred.
1222
+ * @return An integer value with bits set for any potential security
1223
+ * or spoofing issues detected. The bits are defined by
1224
+ * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
1225
+ * will be zero if the input string passes all of the
1226
+ * enabled checks.
1227
+ * @see uspoof_setChecks
1228
+ * @draft ICU 58
1229
+ */
1230
+ U_DRAFT int32_t U_EXPORT2
1231
+ uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status);
1232
+
1233
+ /**
1234
+ * Gets the restriction level that the text meets, if the USPOOF_RESTRICTION_LEVEL check
1235
+ * was enabled; otherwise, undefined.
1236
+ *
1237
+ * @param checkResult The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
1238
+ * @param status The error code, set if an error occurred.
1239
+ * @return The restriction level contained in the USpoofCheckResult
1240
+ * @see uspoof_setRestrictionLevel
1241
+ * @draft ICU 58
1242
+ */
1243
+ U_DRAFT URestrictionLevel U_EXPORT2
1244
+ uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status);
1245
+
1246
+ /**
1247
+ * Gets the set of numerics found in the string, if the USPOOF_MIXED_NUMBERS check was enabled;
1248
+ * otherwise, undefined. The set will contain the zero digit from each decimal number system found
1249
+ * in the input string. Ownership of the returned USet remains with the USpoofCheckResult.
1250
+ * The USet will be free'd when {@link uspoof_closeCheckResult} is called.
1251
+ *
1252
+ * @param checkResult The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
1253
+ * @return The set of numerics contained in the USpoofCheckResult
1254
+ * @param status The error code, set if an error occurred.
1255
+ * @draft ICU 58
1256
+ */
1257
+ U_DRAFT const USet* U_EXPORT2
1258
+ uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status);
1259
+ #endif /* U_HIDE_DRAFT_API */
1260
+
751
1261
 
752
1262
  /**
753
1263
  * Check the whether two specified strings are visually confusable.
754
- * The types of confusability to be tested - single script, mixed script,
755
- * or whole script - are determined by the check options set for the
756
- * USpoofChecker.
757
- *
758
- * The tests to be performed are controlled by the flags
759
- * USPOOF_SINGLE_SCRIPT_CONFUSABLE
760
- * USPOOF_MIXED_SCRIPT_CONFUSABLE
761
- * USPOOF_WHOLE_SCRIPT_CONFUSABLE
762
- * At least one of these tests must be selected.
763
- *
764
- * USPOOF_ANY_CASE is a modifier for the tests. Select it if the identifiers
765
- * may be of mixed case.
766
- * If identifiers are case folded for comparison and
767
- * display to the user, do not select the USPOOF_ANY_CASE option.
1264
+ *
1265
+ * If the strings are confusable, the return value will be nonzero, as long as
1266
+ * {@link USPOOF_CONFUSABLE} was enabled in uspoof_setChecks().
1267
+ *
1268
+ * The bits in the return value correspond to flags for each of the classes of
1269
+ * confusables applicable to the two input strings. According to UTS 39
1270
+ * section 4, the possible flags are:
1271
+ *
1272
+ * <ul>
1273
+ * <li>{@link USPOOF_SINGLE_SCRIPT_CONFUSABLE}</li>
1274
+ * <li>{@link USPOOF_MIXED_SCRIPT_CONFUSABLE}</li>
1275
+ * <li>{@link USPOOF_WHOLE_SCRIPT_CONFUSABLE}</li>
1276
+ * </ul>
1277
+ *
1278
+ * If one or more of the above flags were not listed in uspoof_setChecks(), this
1279
+ * function will never report that class of confusable. The check
1280
+ * {@link USPOOF_CONFUSABLE} enables all three flags.
768
1281
  *
769
1282
  *
770
1283
  * @param sc The USpoofChecker
771
- * @param id1 The first of the two identifiers to be compared for
1284
+ * @param id1 The first of the two identifiers to be compared for
772
1285
  * confusability. The strings are in UTF-16 format.
773
1286
  * @param length1 the length of the first identifer, expressed in
774
- * 16 bit UTF-16 code units, or -1 if the string is
1287
+ * 16 bit UTF-16 code units, or -1 if the string is
775
1288
  * nul terminated.
776
- * @param id2 The second of the two identifiers to be compared for
1289
+ * @param id2 The second of the two identifiers to be compared for
777
1290
  * confusability. The identifiers are in UTF-16 format.
778
1291
  * @param length2 The length of the second identifiers, expressed in
779
- * 16 bit UTF-16 code units, or -1 if the string is
1292
+ * 16 bit UTF-16 code units, or -1 if the string is
780
1293
  * nul terminated.
781
1294
  * @param status The error code, set if an error occurred while attempting to
782
1295
  * perform the check.
@@ -786,6 +1299,7 @@ uspoof_checkUnicodeString(const USpoofChecker *sc,
786
1299
  * the type of confusability found, as defined by
787
1300
  * enum USpoofChecks. Zero is returned if the identifiers
788
1301
  * are not confusable.
1302
+ *
789
1303
  * @stable ICU 4.2
790
1304
  */
791
1305
  U_STABLE int32_t U_EXPORT2
@@ -797,19 +1311,16 @@ uspoof_areConfusable(const USpoofChecker *sc,
797
1311
 
798
1312
 
799
1313
  /**
800
- * Check the whether two specified strings are visually confusable.
801
- * The types of confusability to be tested - single script, mixed script,
802
- * or whole script - are determined by the check options set for the
803
- * USpoofChecker.
1314
+ * A version of {@link uspoof_areConfusable} accepting strings in UTF-8 format.
804
1315
  *
805
1316
  * @param sc The USpoofChecker
806
- * @param id1 The first of the two identifiers to be compared for
1317
+ * @param id1 The first of the two identifiers to be compared for
807
1318
  * confusability. The strings are in UTF-8 format.
808
- * @param length1 the length of the first identifiers, in bytes, or -1
1319
+ * @param length1 the length of the first identifiers, in bytes, or -1
809
1320
  * if the string is nul terminated.
810
- * @param id2 The second of the two identifiers to be compared for
1321
+ * @param id2 The second of the two identifiers to be compared for
811
1322
  * confusability. The strings are in UTF-8 format.
812
- * @param length2 The length of the second string in bytes, or -1
1323
+ * @param length2 The length of the second string in bytes, or -1
813
1324
  * if the string is nul terminated.
814
1325
  * @param status The error code, set if an error occurred while attempting to
815
1326
  * perform the check.
@@ -819,7 +1330,10 @@ uspoof_areConfusable(const USpoofChecker *sc,
819
1330
  * the type of confusability found, as defined by
820
1331
  * enum USpoofChecks. Zero is returned if the strings
821
1332
  * are not confusable.
1333
+ *
822
1334
  * @stable ICU 4.2
1335
+ *
1336
+ * @see uspoof_areConfusable
823
1337
  */
824
1338
  U_STABLE int32_t U_EXPORT2
825
1339
  uspoof_areConfusableUTF8(const USpoofChecker *sc,
@@ -832,15 +1346,12 @@ uspoof_areConfusableUTF8(const USpoofChecker *sc,
832
1346
 
833
1347
  #if U_SHOW_CPLUSPLUS_API
834
1348
  /**
835
- * Check the whether two specified strings are visually confusable.
836
- * The types of confusability to be tested - single script, mixed script,
837
- * or whole script - are determined by the check options set for the
838
- * USpoofChecker.
1349
+ * A version of {@link uspoof_areConfusable} accepting UnicodeStrings.
839
1350
  *
840
1351
  * @param sc The USpoofChecker
841
- * @param s1 The first of the two identifiers to be compared for
1352
+ * @param s1 The first of the two identifiers to be compared for
842
1353
  * confusability. The strings are in UTF-8 format.
843
- * @param s2 The second of the two identifiers to be compared for
1354
+ * @param s2 The second of the two identifiers to be compared for
844
1355
  * confusability. The strings are in UTF-8 format.
845
1356
  * @param status The error code, set if an error occurred while attempting to
846
1357
  * perform the check.
@@ -850,7 +1361,10 @@ uspoof_areConfusableUTF8(const USpoofChecker *sc,
850
1361
  * the type of confusability found, as defined by
851
1362
  * enum USpoofChecks. Zero is returned if the identifiers
852
1363
  * are not confusable.
1364
+ *
853
1365
  * @stable ICU 4.2
1366
+ *
1367
+ * @see uspoof_areConfusable
854
1368
  */
855
1369
  U_STABLE int32_t U_EXPORT2
856
1370
  uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
@@ -861,112 +1375,107 @@ uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
861
1375
 
862
1376
 
863
1377
  /**
864
- * Get the "skeleton" for an identifier.
865
- * Skeletons are a transformation of the input identifier;
866
- * Two identifiers are confusable if their skeletons are identical.
867
- * See Unicode UAX #39 for additional information.
868
- *
869
- * Using skeletons directly makes it possible to quickly check
870
- * whether an identifier is confusable with any of some large
871
- * set of existing identifiers, by creating an efficiently
872
- * searchable collection of the skeletons.
873
- *
874
- * @param sc The USpoofChecker
875
- * @param type The type of skeleton, corresponding to which
876
- * of the Unicode confusable data tables to use.
877
- * The default is Mixed-Script, Lowercase.
878
- * Allowed options are USPOOF_SINGLE_SCRIPT_CONFUSABLE and
879
- * USPOOF_ANY_CASE. The two flags may be ORed.
880
- * @param id The input identifier whose skeleton will be computed.
881
- * @param length The length of the input identifier, expressed in 16 bit
882
- * UTF-16 code units, or -1 if the string is zero terminated.
883
- * @param dest The output buffer, to receive the skeleton string.
884
- * @param destCapacity The length of the output buffer, in 16 bit units.
885
- * The destCapacity may be zero, in which case the function will
886
- * return the actual length of the skeleton.
887
- * @param status The error code, set if an error occurred while attempting to
888
- * perform the check.
889
- * @return The length of the skeleton string. The returned length
890
- * is always that of the complete skeleton, even when the
891
- * supplied buffer is too small (or of zero length)
892
- *
893
- * @stable ICU 4.2
894
- */
1378
+ * Get the "skeleton" for an identifier.
1379
+ * Skeletons are a transformation of the input identifier;
1380
+ * Two identifiers are confusable if their skeletons are identical.
1381
+ * See Unicode UAX #39 for additional information.
1382
+ *
1383
+ * Using skeletons directly makes it possible to quickly check
1384
+ * whether an identifier is confusable with any of some large
1385
+ * set of existing identifiers, by creating an efficiently
1386
+ * searchable collection of the skeletons.
1387
+ *
1388
+ * @param sc The USpoofChecker
1389
+ * @param type Deprecated in ICU 58. You may pass any number.
1390
+ * Originally, controlled which of the Unicode confusable data
1391
+ * tables to use.
1392
+ * @param id The input identifier whose skeleton will be computed.
1393
+ * @param length The length of the input identifier, expressed in 16 bit
1394
+ * UTF-16 code units, or -1 if the string is zero terminated.
1395
+ * @param dest The output buffer, to receive the skeleton string.
1396
+ * @param destCapacity The length of the output buffer, in 16 bit units.
1397
+ * The destCapacity may be zero, in which case the function will
1398
+ * return the actual length of the skeleton.
1399
+ * @param status The error code, set if an error occurred while attempting to
1400
+ * perform the check.
1401
+ * @return The length of the skeleton string. The returned length
1402
+ * is always that of the complete skeleton, even when the
1403
+ * supplied buffer is too small (or of zero length)
1404
+ *
1405
+ * @stable ICU 4.2
1406
+ * @see uspoof_areConfusable
1407
+ */
895
1408
  U_STABLE int32_t U_EXPORT2
896
1409
  uspoof_getSkeleton(const USpoofChecker *sc,
897
1410
  uint32_t type,
898
1411
  const UChar *id, int32_t length,
899
1412
  UChar *dest, int32_t destCapacity,
900
1413
  UErrorCode *status);
901
-
1414
+
902
1415
  /**
903
- * Get the "skeleton" for an identifier.
904
- * Skeletons are a transformation of the input identifier;
905
- * Two identifiers are confusable if their skeletons are identical.
906
- * See Unicode UAX #39 for additional information.
907
- *
908
- * Using skeletons directly makes it possible to quickly check
909
- * whether an identifier is confusable with any of some large
910
- * set of existing identifiers, by creating an efficiently
911
- * searchable collection of the skeletons.
912
- *
913
- * @param sc The USpoofChecker
914
- * @param type The type of skeleton, corresponding to which
915
- * of the Unicode confusable data tables to use.
916
- * The default is Mixed-Script, Lowercase.
917
- * Allowed options are USPOOF_SINGLE_SCRIPT_CONFUSABLE and
918
- * USPOOF_ANY_CASE. The two flags may be ORed.
919
- * @param id The UTF-8 format identifier whose skeleton will be computed.
920
- * @param length The length of the input string, in bytes,
921
- * or -1 if the string is zero terminated.
922
- * @param dest The output buffer, to receive the skeleton string.
923
- * @param destCapacity The length of the output buffer, in bytes.
924
- * The destCapacity may be zero, in which case the function will
925
- * return the actual length of the skeleton.
926
- * @param status The error code, set if an error occurred while attempting to
927
- * perform the check. Possible Errors include U_INVALID_CHAR_FOUND
928
- * for invalid UTF-8 sequences, and
929
- * U_BUFFER_OVERFLOW_ERROR if the destination buffer is too small
930
- * to hold the complete skeleton.
931
- * @return The length of the skeleton string, in bytes. The returned length
932
- * is always that of the complete skeleton, even when the
933
- * supplied buffer is too small (or of zero length)
934
- *
935
- * @stable ICU 4.2
936
- */
1416
+ * Get the "skeleton" for an identifier.
1417
+ * Skeletons are a transformation of the input identifier;
1418
+ * Two identifiers are confusable if their skeletons are identical.
1419
+ * See Unicode UAX #39 for additional information.
1420
+ *
1421
+ * Using skeletons directly makes it possible to quickly check
1422
+ * whether an identifier is confusable with any of some large
1423
+ * set of existing identifiers, by creating an efficiently
1424
+ * searchable collection of the skeletons.
1425
+ *
1426
+ * @param sc The USpoofChecker
1427
+ * @param type Deprecated in ICU 58. You may pass any number.
1428
+ * Originally, controlled which of the Unicode confusable data
1429
+ * tables to use.
1430
+ * @param id The UTF-8 format identifier whose skeleton will be computed.
1431
+ * @param length The length of the input string, in bytes,
1432
+ * or -1 if the string is zero terminated.
1433
+ * @param dest The output buffer, to receive the skeleton string.
1434
+ * @param destCapacity The length of the output buffer, in bytes.
1435
+ * The destCapacity may be zero, in which case the function will
1436
+ * return the actual length of the skeleton.
1437
+ * @param status The error code, set if an error occurred while attempting to
1438
+ * perform the check. Possible Errors include U_INVALID_CHAR_FOUND
1439
+ * for invalid UTF-8 sequences, and
1440
+ * U_BUFFER_OVERFLOW_ERROR if the destination buffer is too small
1441
+ * to hold the complete skeleton.
1442
+ * @return The length of the skeleton string, in bytes. The returned length
1443
+ * is always that of the complete skeleton, even when the
1444
+ * supplied buffer is too small (or of zero length)
1445
+ *
1446
+ * @stable ICU 4.2
1447
+ */
937
1448
  U_STABLE int32_t U_EXPORT2
938
1449
  uspoof_getSkeletonUTF8(const USpoofChecker *sc,
939
1450
  uint32_t type,
940
1451
  const char *id, int32_t length,
941
1452
  char *dest, int32_t destCapacity,
942
1453
  UErrorCode *status);
943
-
1454
+
944
1455
  #if U_SHOW_CPLUSPLUS_API
945
1456
  /**
946
- * Get the "skeleton" for an identifier.
947
- * Skeletons are a transformation of the input identifier;
948
- * Two identifiers are confusable if their skeletons are identical.
949
- * See Unicode UAX #39 for additional information.
950
- *
951
- * Using skeletons directly makes it possible to quickly check
952
- * whether an identifier is confusable with any of some large
953
- * set of existing identifiers, by creating an efficiently
954
- * searchable collection of the skeletons.
955
- *
956
- * @param sc The USpoofChecker.
957
- * @param type The type of skeleton, corresponding to which
958
- * of the Unicode confusable data tables to use.
959
- * The default is Mixed-Script, Lowercase.
960
- * Allowed options are USPOOF_SINGLE_SCRIPT_CONFUSABLE and
961
- * USPOOF_ANY_CASE. The two flags may be ORed.
962
- * @param id The input identifier whose skeleton will be computed.
963
- * @param dest The output identifier, to receive the skeleton string.
964
- * @param status The error code, set if an error occurred while attempting to
965
- * perform the check.
966
- * @return A reference to the destination (skeleton) string.
967
- *
968
- * @stable ICU 4.2
969
- */
1457
+ * Get the "skeleton" for an identifier.
1458
+ * Skeletons are a transformation of the input identifier;
1459
+ * Two identifiers are confusable if their skeletons are identical.
1460
+ * See Unicode UAX #39 for additional information.
1461
+ *
1462
+ * Using skeletons directly makes it possible to quickly check
1463
+ * whether an identifier is confusable with any of some large
1464
+ * set of existing identifiers, by creating an efficiently
1465
+ * searchable collection of the skeletons.
1466
+ *
1467
+ * @param sc The USpoofChecker.
1468
+ * @param type Deprecated in ICU 58. You may pass any number.
1469
+ * Originally, controlled which of the Unicode confusable data
1470
+ * tables to use.
1471
+ * @param id The input identifier whose skeleton will be computed.
1472
+ * @param dest The output identifier, to receive the skeleton string.
1473
+ * @param status The error code, set if an error occurred while attempting to
1474
+ * perform the check.
1475
+ * @return A reference to the destination (skeleton) string.
1476
+ *
1477
+ * @stable ICU 4.2
1478
+ */
970
1479
  U_I18N_API icu::UnicodeString & U_EXPORT2
971
1480
  uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
972
1481
  uint32_t type,
@@ -977,7 +1486,8 @@ uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
977
1486
 
978
1487
  /**
979
1488
  * Get the set of Candidate Characters for Inclusion in Identifiers, as defined
980
- * in Unicode UAX #31, http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Inclusion_in_Identifiers
1489
+ * in http://unicode.org/Public/security/latest/xidmodifications.txt
1490
+ * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
981
1491
  *
982
1492
  * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
983
1493
  * be deleted by the caller.
@@ -991,7 +1501,8 @@ uspoof_getInclusionSet(UErrorCode *status);
991
1501
 
992
1502
  /**
993
1503
  * Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined
994
- * in Unicode UAX #31, http://www.unicode.org/reports/tr31/#Table_Recommended_Scripts
1504
+ * in http://unicode.org/Public/security/latest/xidmodifications.txt
1505
+ * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
995
1506
  *
996
1507
  * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
997
1508
  * be deleted by the caller.
@@ -1007,7 +1518,8 @@ uspoof_getRecommendedSet(UErrorCode *status);
1007
1518
 
1008
1519
  /**
1009
1520
  * Get the set of Candidate Characters for Inclusion in Identifiers, as defined
1010
- * in Unicode UAX #31, http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Inclusion_in_Identifiers
1521
+ * in http://unicode.org/Public/security/latest/xidmodifications.txt
1522
+ * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
1011
1523
  *
1012
1524
  * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
1013
1525
  * be deleted by the caller.
@@ -1021,7 +1533,8 @@ uspoof_getInclusionUnicodeSet(UErrorCode *status);
1021
1533
 
1022
1534
  /**
1023
1535
  * Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined
1024
- * in Unicode UAX #31, http://www.unicode.org/reports/tr31/#Table_Recommended_Scripts
1536
+ * in http://unicode.org/Public/security/latest/xidmodifications.txt
1537
+ * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
1025
1538
  *
1026
1539
  * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
1027
1540
  * be deleted by the caller.
@@ -1041,7 +1554,7 @@ uspoof_getRecommendedUnicodeSet(UErrorCode *status);
1041
1554
  * instantiate a new Spoof Detector.
1042
1555
  *
1043
1556
  * The serialized spoof checker includes only the data compiled from the
1044
- * Unicode data tables by uspoof_openFromSource(); it does not include
1557
+ * Unicode data tables by uspoof_openFromSource(); it does not include
1045
1558
  * include any other state or configuration that may have been set.
1046
1559
  *
1047
1560
  * @param sc the Spoof Detector whose data is to be serialized.