pango 3.1.0-x86-mingw32 → 3.1.1-x86-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +9 -4
- data/ext/pango/rbpangolayoutline.c +6 -2
- data/lib/2.2/pango.so +0 -0
- data/lib/2.3/pango.so +0 -0
- data/lib/2.4/pango.so +0 -0
- data/sample/layout.rb +1 -1
- data/vendor/local/bin/derb.exe +0 -0
- data/vendor/local/bin/genbrk.exe +0 -0
- data/vendor/local/bin/genccode.exe +0 -0
- data/vendor/local/bin/gencfu.exe +0 -0
- data/vendor/local/bin/gencmn.exe +0 -0
- data/vendor/local/bin/gencnval.exe +0 -0
- data/vendor/local/bin/gendict.exe +0 -0
- data/vendor/local/bin/gennorm2.exe +0 -0
- data/vendor/local/bin/genrb.exe +0 -0
- data/vendor/local/bin/gensprep.exe +0 -0
- data/vendor/local/bin/hb-ot-shape-closure.exe +0 -0
- data/vendor/local/bin/hb-shape.exe +0 -0
- data/vendor/local/bin/hb-view.exe +0 -0
- data/vendor/local/bin/icu-config +18 -13
- data/vendor/local/bin/icuinfo.exe +0 -0
- data/vendor/local/bin/icupkg.exe +0 -0
- data/vendor/local/bin/libharfbuzz-0.dll +0 -0
- data/vendor/local/bin/libpango-1.0-0.dll +0 -0
- data/vendor/local/bin/libpangocairo-1.0-0.dll +0 -0
- data/vendor/local/bin/libpangoft2-1.0-0.dll +0 -0
- data/vendor/local/bin/libpangowin32-1.0-0.dll +0 -0
- data/vendor/local/bin/makeconv.exe +0 -0
- data/vendor/local/bin/pango-view.exe +0 -0
- data/vendor/local/bin/pkgdata.exe +0 -0
- data/vendor/local/bin/uconv.exe +0 -0
- data/vendor/local/include/harfbuzz/hb-font.h +5 -0
- data/vendor/local/include/harfbuzz/hb-glib.h +2 -1
- data/vendor/local/include/harfbuzz/hb-ot-layout.h +19 -0
- data/vendor/local/include/harfbuzz/hb-shape-plan.h +19 -0
- data/vendor/local/include/harfbuzz/hb-version.h +3 -3
- data/vendor/local/include/unicode/alphaindex.h +2 -0
- data/vendor/local/include/unicode/appendable.h +2 -0
- data/vendor/local/include/unicode/basictz.h +2 -0
- data/vendor/local/include/unicode/brkiter.h +7 -0
- data/vendor/local/include/unicode/bytestream.h +2 -0
- data/vendor/local/include/unicode/bytestrie.h +3 -2
- data/vendor/local/include/unicode/bytestriebuilder.h +4 -4
- data/vendor/local/include/unicode/calendar.h +2 -0
- data/vendor/local/include/unicode/caniter.h +2 -0
- data/vendor/local/include/unicode/chariter.h +2 -0
- data/vendor/local/include/unicode/choicfmt.h +2 -0
- data/vendor/local/include/unicode/coleitr.h +2 -0
- data/vendor/local/include/unicode/coll.h +2 -0
- data/vendor/local/include/unicode/compactdecimalformat.h +4 -3
- data/vendor/local/include/unicode/curramt.h +2 -0
- data/vendor/local/include/unicode/currpinf.h +2 -0
- data/vendor/local/include/unicode/currunit.h +2 -0
- data/vendor/local/include/unicode/datefmt.h +2 -0
- data/vendor/local/include/unicode/dbbi.h +2 -0
- data/vendor/local/include/unicode/dcfmtsym.h +2 -0
- data/vendor/local/include/unicode/decimfmt.h +9 -5
- data/vendor/local/include/unicode/docmain.h +3 -1
- data/vendor/local/include/unicode/dtfmtsym.h +26 -14
- data/vendor/local/include/unicode/dtintrv.h +2 -0
- data/vendor/local/include/unicode/dtitvfmt.h +2 -0
- data/vendor/local/include/unicode/dtitvinf.h +8 -3
- data/vendor/local/include/unicode/dtptngen.h +13 -9
- data/vendor/local/include/unicode/dtrule.h +2 -0
- data/vendor/local/include/unicode/enumset.h +2 -0
- data/vendor/local/include/unicode/errorcode.h +2 -0
- data/vendor/local/include/unicode/fieldpos.h +5 -2
- data/vendor/local/include/unicode/filteredbrk.h +10 -12
- data/vendor/local/include/unicode/fmtable.h +4 -2
- data/vendor/local/include/unicode/format.h +2 -0
- data/vendor/local/include/unicode/fpositer.h +2 -0
- data/vendor/local/include/unicode/gender.h +2 -0
- data/vendor/local/include/unicode/gregocal.h +2 -0
- data/vendor/local/include/unicode/icudataver.h +2 -0
- data/vendor/local/include/unicode/icuplug.h +12 -2
- data/vendor/local/include/unicode/idna.h +6 -4
- data/vendor/local/include/unicode/listformatter.h +2 -0
- data/vendor/local/include/unicode/localpointer.h +19 -28
- data/vendor/local/include/unicode/locdspnm.h +2 -0
- data/vendor/local/include/unicode/locid.h +2 -0
- data/vendor/local/include/unicode/measfmt.h +21 -2
- data/vendor/local/include/unicode/measunit.h +50 -24
- data/vendor/local/include/unicode/measure.h +2 -0
- data/vendor/local/include/unicode/messagepattern.h +2 -0
- data/vendor/local/include/unicode/msgfmt.h +2 -0
- data/vendor/local/include/unicode/normalizer2.h +2 -0
- data/vendor/local/include/unicode/normlzr.h +4 -4
- data/vendor/local/include/unicode/numfmt.h +8 -6
- data/vendor/local/include/unicode/numsys.h +2 -0
- data/vendor/local/include/unicode/parseerr.h +2 -0
- data/vendor/local/include/unicode/parsepos.h +2 -0
- data/vendor/local/include/unicode/platform.h +8 -0
- data/vendor/local/include/unicode/plurfmt.h +2 -0
- data/vendor/local/include/unicode/plurrule.h +2 -0
- data/vendor/local/include/unicode/ptypes.h +2 -0
- data/vendor/local/include/unicode/putil.h +2 -0
- data/vendor/local/include/unicode/rbbi.h +18 -66
- data/vendor/local/include/unicode/rbnf.h +8 -0
- data/vendor/local/include/unicode/rbtz.h +2 -0
- data/vendor/local/include/unicode/regex.h +2 -0
- data/vendor/local/include/unicode/region.h +3 -1
- data/vendor/local/include/unicode/reldatefmt.h +19 -9
- data/vendor/local/include/unicode/rep.h +2 -0
- data/vendor/local/include/unicode/resbund.h +2 -0
- data/vendor/local/include/unicode/schriter.h +2 -0
- data/vendor/local/include/unicode/scientificnumberformatter.h +2 -0
- data/vendor/local/include/unicode/search.h +2 -0
- data/vendor/local/include/unicode/selfmt.h +2 -0
- data/vendor/local/include/unicode/simpleformatter.h +2 -0
- data/vendor/local/include/unicode/simpletz.h +2 -0
- data/vendor/local/include/unicode/smpdtfmt.h +9 -1
- data/vendor/local/include/unicode/sortkey.h +2 -0
- data/vendor/local/include/unicode/std_string.h +2 -0
- data/vendor/local/include/unicode/strenum.h +2 -0
- data/vendor/local/include/unicode/stringpiece.h +5 -3
- data/vendor/local/include/unicode/stringtriebuilder.h +13 -1
- data/vendor/local/include/unicode/stsearch.h +2 -0
- data/vendor/local/include/unicode/symtable.h +2 -0
- data/vendor/local/include/unicode/tblcoll.h +3 -1
- data/vendor/local/include/unicode/timezone.h +2 -0
- data/vendor/local/include/unicode/tmunit.h +8 -0
- data/vendor/local/include/unicode/tmutamt.h +2 -0
- data/vendor/local/include/unicode/tmutfmt.h +5 -0
- data/vendor/local/include/unicode/translit.h +2 -0
- data/vendor/local/include/unicode/tzfmt.h +2 -0
- data/vendor/local/include/unicode/tznames.h +16 -2
- data/vendor/local/include/unicode/tzrule.h +2 -0
- data/vendor/local/include/unicode/tztrans.h +2 -0
- data/vendor/local/include/unicode/ubidi.h +20 -8
- data/vendor/local/include/unicode/ubiditransform.h +321 -0
- data/vendor/local/include/unicode/ubrk.h +26 -7
- data/vendor/local/include/unicode/ucal.h +8 -4
- data/vendor/local/include/unicode/ucasemap.h +2 -0
- data/vendor/local/include/unicode/ucat.h +2 -0
- data/vendor/local/include/unicode/uchar.h +244 -17
- data/vendor/local/include/unicode/ucharstrie.h +2 -0
- data/vendor/local/include/unicode/ucharstriebuilder.h +2 -0
- data/vendor/local/include/unicode/uchriter.h +2 -0
- data/vendor/local/include/unicode/uclean.h +2 -0
- data/vendor/local/include/unicode/ucnv.h +8 -1
- data/vendor/local/include/unicode/ucnv_cb.h +2 -0
- data/vendor/local/include/unicode/ucnv_err.h +2 -0
- data/vendor/local/include/unicode/ucnvsel.h +2 -0
- data/vendor/local/include/unicode/ucol.h +28 -11
- data/vendor/local/include/unicode/ucoleitr.h +2 -0
- data/vendor/local/include/unicode/uconfig.h +2 -0
- data/vendor/local/include/unicode/ucsdet.h +2 -0
- data/vendor/local/include/unicode/ucurr.h +5 -1
- data/vendor/local/include/unicode/udat.h +13 -11
- data/vendor/local/include/unicode/udata.h +8 -1
- data/vendor/local/include/unicode/udateintervalformat.h +2 -0
- data/vendor/local/include/unicode/udatpg.h +15 -2
- data/vendor/local/include/unicode/udisplaycontext.h +34 -2
- data/vendor/local/include/unicode/uenum.h +2 -0
- data/vendor/local/include/unicode/ufieldpositer.h +4 -2
- data/vendor/local/include/unicode/uformattable.h +9 -1
- data/vendor/local/include/unicode/ugender.h +2 -0
- data/vendor/local/include/unicode/uidna.h +2 -0
- data/vendor/local/include/unicode/uiter.h +2 -0
- data/vendor/local/include/unicode/uldnames.h +2 -0
- data/vendor/local/include/unicode/ulistformatter.h +2 -0
- data/vendor/local/include/unicode/uloc.h +9 -3
- data/vendor/local/include/unicode/ulocdata.h +20 -2
- data/vendor/local/include/unicode/umachine.h +25 -0
- data/vendor/local/include/unicode/umisc.h +2 -0
- data/vendor/local/include/unicode/umsg.h +2 -0
- data/vendor/local/include/unicode/unifilt.h +2 -0
- data/vendor/local/include/unicode/unifunct.h +2 -0
- data/vendor/local/include/unicode/unimatch.h +2 -0
- data/vendor/local/include/unicode/unirepl.h +2 -0
- data/vendor/local/include/unicode/uniset.h +3 -1
- data/vendor/local/include/unicode/unistr.h +12 -14
- data/vendor/local/include/unicode/unorm.h +6 -2
- data/vendor/local/include/unicode/unorm2.h +2 -0
- data/vendor/local/include/unicode/unum.h +28 -10
- data/vendor/local/include/unicode/unumsys.h +2 -0
- data/vendor/local/include/unicode/uobject.h +2 -0
- data/vendor/local/include/unicode/upluralrules.h +6 -2
- data/vendor/local/include/unicode/uregex.h +2 -0
- data/vendor/local/include/unicode/uregion.h +6 -2
- data/vendor/local/include/unicode/ureldatefmt.h +13 -7
- data/vendor/local/include/unicode/urename.h +16 -4
- data/vendor/local/include/unicode/urep.h +2 -0
- data/vendor/local/include/unicode/ures.h +7 -1
- data/vendor/local/include/unicode/uscript.h +41 -29
- data/vendor/local/include/unicode/usearch.h +10 -4
- data/vendor/local/include/unicode/uset.h +5 -1
- data/vendor/local/include/unicode/usetiter.h +2 -0
- data/vendor/local/include/unicode/ushape.h +2 -0
- data/vendor/local/include/unicode/uspoof.h +876 -363
- data/vendor/local/include/unicode/usprep.h +2 -0
- data/vendor/local/include/unicode/ustdio.h +2 -0
- data/vendor/local/include/unicode/ustream.h +2 -0
- data/vendor/local/include/unicode/ustring.h +2 -0
- data/vendor/local/include/unicode/ustringtrie.h +2 -0
- data/vendor/local/include/unicode/utext.h +2 -0
- data/vendor/local/include/unicode/utf.h +2 -0
- data/vendor/local/include/unicode/utf16.h +2 -0
- data/vendor/local/include/unicode/utf32.h +2 -0
- data/vendor/local/include/unicode/utf8.h +2 -0
- data/vendor/local/include/unicode/utf_old.h +2 -0
- data/vendor/local/include/unicode/utmscale.h +2 -0
- data/vendor/local/include/unicode/utrace.h +20 -0
- data/vendor/local/include/unicode/utrans.h +2 -0
- data/vendor/local/include/unicode/utypes.h +71 -97
- data/vendor/local/include/unicode/uvernum.h +13 -12
- data/vendor/local/include/unicode/uversion.h +2 -0
- data/vendor/local/include/unicode/vtzone.h +2 -0
- data/vendor/local/lib/icu/{57.1 → 58.2}/Makefile.inc +8 -9
- data/vendor/local/lib/icu/{57.1 → 58.2}/pkgdata.inc +3 -3
- data/vendor/local/lib/icu/Makefile.inc +8 -9
- data/vendor/local/lib/icu/pkgdata.inc +3 -3
- data/vendor/local/lib/icudt.dll +0 -0
- data/vendor/local/lib/icudt58.dll +0 -0
- data/vendor/local/lib/icuin.dll +0 -0
- data/vendor/local/lib/icuin58.dll +0 -0
- data/vendor/local/lib/icuio.dll +0 -0
- data/vendor/local/lib/icuio58.dll +0 -0
- data/vendor/local/lib/icutest.dll +0 -0
- data/vendor/local/lib/{icutest57.dll → icutest58.dll} +0 -0
- data/vendor/local/lib/icutu.dll +0 -0
- data/vendor/local/lib/icutu58.dll +0 -0
- data/vendor/local/lib/icuuc.dll +0 -0
- data/vendor/local/lib/icuuc58.dll +0 -0
- data/vendor/local/lib/libharfbuzz-icu.a +0 -0
- data/vendor/local/lib/libharfbuzz-icu.la +4 -4
- data/vendor/local/lib/libharfbuzz.dll.a +0 -0
- data/vendor/local/lib/libharfbuzz.la +3 -3
- data/vendor/local/lib/libicudt.dll.a +0 -0
- data/vendor/local/lib/{icudt57.dll → libicudt58.dll} +0 -0
- data/vendor/local/lib/libicuin.dll.a +0 -0
- data/vendor/local/lib/libicuio.dll.a +0 -0
- data/vendor/local/lib/libicutest.dll.a +0 -0
- data/vendor/local/lib/libicutu.dll.a +0 -0
- data/vendor/local/lib/libicuuc.dll.a +0 -0
- data/vendor/local/lib/libpango-1.0.dll.a +0 -0
- data/vendor/local/lib/libpangocairo-1.0.dll.a +0 -0
- data/vendor/local/lib/libpangoft2-1.0.dll.a +0 -0
- data/vendor/local/lib/libpangowin32-1.0.dll.a +0 -0
- data/vendor/local/lib/pkgconfig/harfbuzz-icu.pc +1 -1
- data/vendor/local/lib/pkgconfig/harfbuzz.pc +2 -2
- data/vendor/local/lib/pkgconfig/icu-i18n.pc +8 -6
- data/vendor/local/lib/pkgconfig/icu-io.pc +8 -6
- data/vendor/local/lib/pkgconfig/icu-uc.pc +8 -6
- data/vendor/local/share/gtk-doc/html/harfbuzz/harfbuzz-hb-version.html +3 -3
- data/vendor/local/share/gtk-doc/html/harfbuzz/pt02.html +1 -1
- data/vendor/local/share/icu/{57.1 → 58.2}/LICENSE +52 -60
- data/vendor/local/share/icu/{57.1 → 58.2}/config/mh-mingw +4 -2
- data/vendor/local/share/icu/{57.1 → 58.2}/install-sh +0 -0
- data/vendor/local/share/icu/{57.1 → 58.2}/mkinstalldirs +2 -0
- data/vendor/local/share/man/man1/derb.1 +4 -2
- data/vendor/local/share/man/man1/genbrk.1 +4 -2
- data/vendor/local/share/man/man1/gencfu.1 +3 -1
- data/vendor/local/share/man/man1/gencnval.1 +5 -3
- data/vendor/local/share/man/man1/gendict.1 +4 -2
- data/vendor/local/share/man/man1/genrb.1 +5 -3
- data/vendor/local/share/man/man1/icu-config.1 +4 -5
- data/vendor/local/share/man/man1/makeconv.1 +5 -3
- data/vendor/local/share/man/man1/pkgdata.1 +4 -2
- data/vendor/local/share/man/man1/uconv.1 +4 -2
- data/vendor/local/share/man/man8/genccode.8 +4 -2
- data/vendor/local/share/man/man8/gencmn.8 +5 -3
- data/vendor/local/share/man/man8/gensprep.8 +5 -3
- data/vendor/local/share/man/man8/icupkg.8 +3 -1
- metadata +23 -40
- data/vendor/local/include/layout/LEFontInstance.h +0 -524
- data/vendor/local/include/layout/LEGlyphFilter.h +0 -45
- data/vendor/local/include/layout/LEGlyphStorage.h +0 -546
- data/vendor/local/include/layout/LEInsertionList.h +0 -177
- data/vendor/local/include/layout/LELanguages.h +0 -109
- data/vendor/local/include/layout/LEScripts.h +0 -204
- data/vendor/local/include/layout/LESwaps.h +0 -100
- data/vendor/local/include/layout/LETableReference.h +0 -435
- data/vendor/local/include/layout/LETypes.h +0 -728
- data/vendor/local/include/layout/LayoutEngine.h +0 -518
- data/vendor/local/include/layout/ParagraphLayout.h +0 -747
- data/vendor/local/include/layout/RunArrays.h +0 -676
- data/vendor/local/include/layout/loengine.h +0 -225
- data/vendor/local/include/layout/playout.h +0 -466
- data/vendor/local/include/layout/plruns.h +0 -441
- data/vendor/local/lib/icuin57.dll +0 -0
- data/vendor/local/lib/icuio57.dll +0 -0
- data/vendor/local/lib/icule.dll +0 -0
- data/vendor/local/lib/icule57.dll +0 -0
- data/vendor/local/lib/iculx.dll +0 -0
- data/vendor/local/lib/iculx57.dll +0 -0
- data/vendor/local/lib/icutu57.dll +0 -0
- data/vendor/local/lib/icuuc57.dll +0 -0
- data/vendor/local/lib/libicule.dll.a +0 -0
- data/vendor/local/lib/libiculx.dll.a +0 -0
- data/vendor/local/lib/pkgconfig/icu-le.pc +0 -38
- data/vendor/local/lib/pkgconfig/icu-lx.pc +0 -38
@@ -1,3 +1,5 @@
|
|
1
|
+
// Copyright (C) 2016 and later: Unicode, Inc. and others.
|
2
|
+
// License & terms of use: http://www.unicode.org/copyright.html
|
1
3
|
/*
|
2
4
|
*****************************************************************************************
|
3
5
|
* Copyright (C) 2014, International Business Machines
|
@@ -105,11 +107,13 @@ typedef enum URegionType {
|
|
105
107
|
*/
|
106
108
|
URGN_DEPRECATED,
|
107
109
|
|
110
|
+
#ifndef U_HIDE_DEPRECATED_API
|
108
111
|
/**
|
109
|
-
*
|
110
|
-
* @
|
112
|
+
* One more than the highest normal URegionType value.
|
113
|
+
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
111
114
|
*/
|
112
115
|
URGN_LIMIT
|
116
|
+
#endif // U_HIDE_DEPRECATED_API
|
113
117
|
} URegionType;
|
114
118
|
|
115
119
|
#if !UCONFIG_NO_FORMATTING
|
@@ -1,3 +1,5 @@
|
|
1
|
+
// Copyright (C) 2016 and later: Unicode, Inc. and others.
|
2
|
+
// License & terms of use: http://www.unicode.org/copyright.html
|
1
3
|
/*
|
2
4
|
*****************************************************************************************
|
3
5
|
* Copyright (C) 2016, International Business Machines
|
@@ -58,11 +60,13 @@ typedef enum UDateRelativeDateTimeFormatterStyle {
|
|
58
60
|
*/
|
59
61
|
UDAT_STYLE_NARROW,
|
60
62
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
63
|
+
#ifndef U_HIDE_DEPRECATED_API
|
64
|
+
/**
|
65
|
+
* One more than the highest normal UDateRelativeDateTimeFormatterStyle value.
|
66
|
+
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
67
|
+
*/
|
68
|
+
UDAT_STYLE_COUNT
|
69
|
+
#endif // U_HIDE_DEPRECATED_API
|
66
70
|
} UDateRelativeDateTimeFormatterStyle;
|
67
71
|
|
68
72
|
#ifndef U_HIDE_DRAFT_API
|
@@ -162,11 +166,13 @@ typedef enum URelativeDateTimeUnit {
|
|
162
166
|
* @draft ICU 57
|
163
167
|
*/
|
164
168
|
UDAT_REL_UNIT_SATURDAY,
|
169
|
+
#ifndef U_HIDE_DEPRECATED_API
|
165
170
|
/**
|
166
|
-
*
|
167
|
-
* @
|
171
|
+
* One more than the highest normal URelativeDateTimeUnit value.
|
172
|
+
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
168
173
|
*/
|
169
174
|
UDAT_REL_UNIT_COUNT
|
175
|
+
#endif // U_HIDE_DEPRECATED_API
|
170
176
|
} URelativeDateTimeUnit;
|
171
177
|
#endif /* U_HIDE_DRAFT_API */
|
172
178
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
// Copyright (C) 2016 and later: Unicode, Inc. and others.
|
2
|
+
// License & terms of use: http://www.unicode.org/copyright.html
|
1
3
|
/*
|
2
4
|
*******************************************************************************
|
3
5
|
* Copyright (C) 2002-2016, International Business Machines
|
@@ -474,6 +476,9 @@
|
|
474
476
|
#define ubidi_setReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_setReorderingOptions)
|
475
477
|
#define ubidi_writeReordered U_ICU_ENTRY_POINT_RENAME(ubidi_writeReordered)
|
476
478
|
#define ubidi_writeReverse U_ICU_ENTRY_POINT_RENAME(ubidi_writeReverse)
|
479
|
+
#define ubiditransform_close U_ICU_ENTRY_POINT_RENAME(ubiditransform_close)
|
480
|
+
#define ubiditransform_open U_ICU_ENTRY_POINT_RENAME(ubiditransform_open)
|
481
|
+
#define ubiditransform_transform U_ICU_ENTRY_POINT_RENAME(ubiditransform_transform)
|
477
482
|
#define ublock_getCode U_ICU_ENTRY_POINT_RENAME(ublock_getCode)
|
478
483
|
#define ubrk_close U_ICU_ENTRY_POINT_RENAME(ubrk_close)
|
479
484
|
#define ubrk_countAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_countAvailable)
|
@@ -1013,6 +1018,7 @@
|
|
1013
1018
|
#define ulist_getListSize U_ICU_ENTRY_POINT_RENAME(ulist_getListSize)
|
1014
1019
|
#define ulist_getNext U_ICU_ENTRY_POINT_RENAME(ulist_getNext)
|
1015
1020
|
#define ulist_next_keyword_value U_ICU_ENTRY_POINT_RENAME(ulist_next_keyword_value)
|
1021
|
+
#define ulist_removeString U_ICU_ENTRY_POINT_RENAME(ulist_removeString)
|
1016
1022
|
#define ulist_resetList U_ICU_ENTRY_POINT_RENAME(ulist_resetList)
|
1017
1023
|
#define ulist_reset_keyword_values_iterator U_ICU_ENTRY_POINT_RENAME(ulist_reset_keyword_values_iterator)
|
1018
1024
|
#define ulistfmt_close U_ICU_ENTRY_POINT_RENAME(ulistfmt_close)
|
@@ -1355,6 +1361,7 @@
|
|
1355
1361
|
#define uprv_toupper U_ICU_ENTRY_POINT_RENAME(uprv_toupper)
|
1356
1362
|
#define uprv_trunc U_ICU_ENTRY_POINT_RENAME(uprv_trunc)
|
1357
1363
|
#define uprv_tzname U_ICU_ENTRY_POINT_RENAME(uprv_tzname)
|
1364
|
+
#define uprv_tzname_clear_cache U_ICU_ENTRY_POINT_RENAME(uprv_tzname_clear_cache)
|
1358
1365
|
#define uprv_tzset U_ICU_ENTRY_POINT_RENAME(uprv_tzset)
|
1359
1366
|
#define uprv_uint16Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint16Comparator)
|
1360
1367
|
#define uprv_uint32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint32Comparator)
|
@@ -1456,10 +1463,7 @@
|
|
1456
1463
|
#define ures_countArrayItems U_ICU_ENTRY_POINT_RENAME(ures_countArrayItems)
|
1457
1464
|
#define ures_findResource U_ICU_ENTRY_POINT_RENAME(ures_findResource)
|
1458
1465
|
#define ures_findSubResource U_ICU_ENTRY_POINT_RENAME(ures_findSubResource)
|
1459
|
-
#define
|
1460
|
-
#define ures_getAllArrayItemsWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getAllArrayItemsWithFallback)
|
1461
|
-
#define ures_getAllTableItems U_ICU_ENTRY_POINT_RENAME(ures_getAllTableItems)
|
1462
|
-
#define ures_getAllTableItemsWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getAllTableItemsWithFallback)
|
1466
|
+
#define ures_getAllItemsWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getAllItemsWithFallback)
|
1463
1467
|
#define ures_getBinary U_ICU_ENTRY_POINT_RENAME(ures_getBinary)
|
1464
1468
|
#define ures_getByIndex U_ICU_ENTRY_POINT_RENAME(ures_getByIndex)
|
1465
1469
|
#define ures_getByKey U_ICU_ENTRY_POINT_RENAME(ures_getByKey)
|
@@ -1607,13 +1611,20 @@
|
|
1607
1611
|
#define uspoof_areConfusableUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUTF8)
|
1608
1612
|
#define uspoof_areConfusableUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUnicodeString)
|
1609
1613
|
#define uspoof_check U_ICU_ENTRY_POINT_RENAME(uspoof_check)
|
1614
|
+
#define uspoof_check2 U_ICU_ENTRY_POINT_RENAME(uspoof_check2)
|
1615
|
+
#define uspoof_check2UTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_check2UTF8)
|
1616
|
+
#define uspoof_check2UnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_check2UnicodeString)
|
1610
1617
|
#define uspoof_checkUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_checkUTF8)
|
1611
1618
|
#define uspoof_checkUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_checkUnicodeString)
|
1612
1619
|
#define uspoof_clone U_ICU_ENTRY_POINT_RENAME(uspoof_clone)
|
1613
1620
|
#define uspoof_close U_ICU_ENTRY_POINT_RENAME(uspoof_close)
|
1621
|
+
#define uspoof_closeCheckResult U_ICU_ENTRY_POINT_RENAME(uspoof_closeCheckResult)
|
1614
1622
|
#define uspoof_getAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedChars)
|
1615
1623
|
#define uspoof_getAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedLocales)
|
1616
1624
|
#define uspoof_getAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedUnicodeSet)
|
1625
|
+
#define uspoof_getCheckResultChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultChecks)
|
1626
|
+
#define uspoof_getCheckResultNumerics U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultNumerics)
|
1627
|
+
#define uspoof_getCheckResultRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultRestrictionLevel)
|
1617
1628
|
#define uspoof_getChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getChecks)
|
1618
1629
|
#define uspoof_getInclusionSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionSet)
|
1619
1630
|
#define uspoof_getInclusionUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionUnicodeSet)
|
@@ -1625,6 +1636,7 @@
|
|
1625
1636
|
#define uspoof_getSkeletonUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUnicodeString)
|
1626
1637
|
#define uspoof_internalInitStatics U_ICU_ENTRY_POINT_RENAME(uspoof_internalInitStatics)
|
1627
1638
|
#define uspoof_open U_ICU_ENTRY_POINT_RENAME(uspoof_open)
|
1639
|
+
#define uspoof_openCheckResult U_ICU_ENTRY_POINT_RENAME(uspoof_openCheckResult)
|
1628
1640
|
#define uspoof_openFromSerialized U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSerialized)
|
1629
1641
|
#define uspoof_openFromSource U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSource)
|
1630
1642
|
#define uspoof_serialize U_ICU_ENTRY_POINT_RENAME(uspoof_serialize)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
// Copyright (C) 2016 and later: Unicode, Inc. and others.
|
2
|
+
// License & terms of use: http://www.unicode.org/copyright.html
|
1
3
|
/*
|
2
4
|
******************************************************************************
|
3
5
|
* Copyright (C) 1997-2010, International Business Machines
|
@@ -1,3 +1,5 @@
|
|
1
|
+
// Copyright (C) 2016 and later: Unicode, Inc. and others.
|
2
|
+
// License & terms of use: http://www.unicode.org/copyright.html
|
1
3
|
/*
|
2
4
|
**********************************************************************
|
3
5
|
* Copyright (C) 1997-2016, International Business Machines
|
@@ -120,9 +122,13 @@ typedef enum {
|
|
120
122
|
RES_INT_VECTOR=URES_INT_VECTOR,
|
121
123
|
/** @deprecated ICU 2.6 Not used. */
|
122
124
|
RES_RESERVED=15,
|
123
|
-
#endif /* U_HIDE_DEPRECATED_API */
|
124
125
|
|
126
|
+
/**
|
127
|
+
* One more than the highest normal UResType value.
|
128
|
+
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
129
|
+
*/
|
125
130
|
URES_LIMIT = 16
|
131
|
+
#endif // U_HIDE_DEPRECATED_API
|
126
132
|
} UResType;
|
127
133
|
|
128
134
|
/*
|
@@ -1,6 +1,8 @@
|
|
1
|
+
// Copyright (C) 2016 and later: Unicode, Inc. and others.
|
2
|
+
// License & terms of use: http://www.unicode.org/copyright.html
|
1
3
|
/*
|
2
4
|
**********************************************************************
|
3
|
-
* Copyright (C) 1997-
|
5
|
+
* Copyright (C) 1997-2016, International Business Machines
|
4
6
|
* Corporation and others. All Rights Reserved.
|
5
7
|
**********************************************************************
|
6
8
|
*
|
@@ -32,13 +34,13 @@
|
|
32
34
|
* See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/)
|
33
35
|
* and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .
|
34
36
|
*
|
35
|
-
*
|
37
|
+
* In addition, constants for many ISO 15924 script codes
|
36
38
|
* are included, for use with language tags, CLDR data, and similar.
|
37
39
|
* Some of those codes are not used in the Unicode Character Database (UCD).
|
38
40
|
* For example, there are no characters that have a UCD script property value of
|
39
41
|
* Hans or Hant. All Han ideographs have the Hani script property value in Unicode.
|
40
42
|
*
|
41
|
-
* Private-use codes Qaaa..Qabx are not included.
|
43
|
+
* Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.
|
42
44
|
*
|
43
45
|
* Starting with ICU 55, script codes are only added when their scripts
|
44
46
|
* have been or will certainly be encoded in Unicode,
|
@@ -424,24 +426,39 @@ typedef enum UScriptCode {
|
|
424
426
|
/** @stable ICU 54 */
|
425
427
|
USCRIPT_SIDDHAM = 166,/* Sidd */
|
426
428
|
|
427
|
-
/**
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
429
|
+
/** @stable ICU 58 */
|
430
|
+
USCRIPT_ADLAM = 167,/* Adlm */
|
431
|
+
/** @stable ICU 58 */
|
432
|
+
USCRIPT_BHAIKSUKI = 168,/* Bhks */
|
433
|
+
/** @stable ICU 58 */
|
434
|
+
USCRIPT_MARCHEN = 169,/* Marc */
|
435
|
+
/** @stable ICU 58 */
|
436
|
+
USCRIPT_NEWA = 170,/* Newa */
|
437
|
+
/** @stable ICU 58 */
|
438
|
+
USCRIPT_OSAGE = 171,/* Osge */
|
439
|
+
|
440
|
+
/** @stable ICU 58 */
|
441
|
+
USCRIPT_HAN_WITH_BOPOMOFO = 172,/* Hanb */
|
442
|
+
/** @stable ICU 58 */
|
443
|
+
USCRIPT_JAMO = 173,/* Jamo */
|
444
|
+
/** @stable ICU 58 */
|
445
|
+
USCRIPT_SYMBOLS_EMOJI = 174,/* Zsye */
|
446
|
+
|
447
|
+
#ifndef U_HIDE_DEPRECATED_API
|
448
|
+
/**
|
449
|
+
* One more than the highest normal UScriptCode value.
|
450
|
+
* The highest value is available via u_getIntPropertyMaxValue(UCHAR_SCRIPT).
|
451
|
+
*
|
452
|
+
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
453
|
+
*/
|
454
|
+
USCRIPT_CODE_LIMIT = 175
|
455
|
+
#endif // U_HIDE_DEPRECATED_API
|
439
456
|
} UScriptCode;
|
440
457
|
|
441
458
|
/**
|
442
|
-
* Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
|
459
|
+
* Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
|
443
460
|
* Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym".
|
444
|
-
* Fills in USCRIPT_LATIN given "en" OR "en_US"
|
461
|
+
* Fills in USCRIPT_LATIN given "en" OR "en_US"
|
445
462
|
* If the required capacity is greater than the capacity of the destination buffer,
|
446
463
|
* then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned.
|
447
464
|
*
|
@@ -454,10 +471,10 @@ typedef enum UScriptCode {
|
|
454
471
|
* @param fillIn the UScriptCode buffer to fill in the script code
|
455
472
|
* @param capacity the capacity (size) fo UScriptCode buffer passed in.
|
456
473
|
* @param err the error status code.
|
457
|
-
* @return The number of script codes filled in the buffer passed in
|
474
|
+
* @return The number of script codes filled in the buffer passed in
|
458
475
|
* @stable ICU 2.4
|
459
476
|
*/
|
460
|
-
U_STABLE int32_t U_EXPORT2
|
477
|
+
U_STABLE int32_t U_EXPORT2
|
461
478
|
uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err);
|
462
479
|
|
463
480
|
/**
|
@@ -470,7 +487,7 @@ uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capac
|
|
470
487
|
* or NULL if scriptCode is invalid
|
471
488
|
* @stable ICU 2.4
|
472
489
|
*/
|
473
|
-
U_STABLE const char* U_EXPORT2
|
490
|
+
U_STABLE const char* U_EXPORT2
|
474
491
|
uscript_getName(UScriptCode scriptCode);
|
475
492
|
|
476
493
|
/**
|
@@ -482,18 +499,18 @@ uscript_getName(UScriptCode scriptCode);
|
|
482
499
|
* @return short script name (4-letter code), or NULL if scriptCode is invalid
|
483
500
|
* @stable ICU 2.4
|
484
501
|
*/
|
485
|
-
U_STABLE const char* U_EXPORT2
|
502
|
+
U_STABLE const char* U_EXPORT2
|
486
503
|
uscript_getShortName(UScriptCode scriptCode);
|
487
504
|
|
488
505
|
/**
|
489
506
|
* Gets the script code associated with the given codepoint.
|
490
|
-
* Returns USCRIPT_MALAYALAM given 0x0D02
|
507
|
+
* Returns USCRIPT_MALAYALAM given 0x0D02
|
491
508
|
* @param codepoint UChar32 codepoint
|
492
509
|
* @param err the error status code.
|
493
|
-
* @return The UScriptCode, or 0 if codepoint is invalid
|
510
|
+
* @return The UScriptCode, or 0 if codepoint is invalid
|
494
511
|
* @stable ICU 2.4
|
495
512
|
*/
|
496
|
-
U_STABLE UScriptCode U_EXPORT2
|
513
|
+
U_STABLE UScriptCode U_EXPORT2
|
497
514
|
uscript_getScript(UChar32 codepoint, UErrorCode *err);
|
498
515
|
|
499
516
|
/**
|
@@ -503,9 +520,6 @@ uscript_getScript(UChar32 codepoint, UErrorCode *err);
|
|
503
520
|
*
|
504
521
|
* Some characters are commonly used in multiple scripts.
|
505
522
|
* For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
|
506
|
-
*
|
507
|
-
* The Script_Extensions property is provisional. It may be modified or removed
|
508
|
-
* in future versions of the Unicode Standard, and thus in ICU.
|
509
523
|
* @param c code point
|
510
524
|
* @param sc script code
|
511
525
|
* @return TRUE if sc is in Script_Extensions(c)
|
@@ -532,8 +546,6 @@ uscript_hasScript(UChar32 c, UScriptCode sc);
|
|
532
546
|
* U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned.
|
533
547
|
* (Usual ICU buffer handling behavior.)
|
534
548
|
*
|
535
|
-
* The Script_Extensions property is provisional. It may be modified or removed
|
536
|
-
* in future versions of the Unicode Standard, and thus in ICU.
|
537
549
|
* @param c code point
|
538
550
|
* @param scripts output script code array
|
539
551
|
* @param capacity capacity of the scripts array
|
@@ -1,3 +1,5 @@
|
|
1
|
+
// Copyright (C) 2016 and later: Unicode, Inc. and others.
|
2
|
+
// License & terms of use: http://www.unicode.org/copyright.html
|
1
3
|
/*
|
2
4
|
**********************************************************************
|
3
5
|
* Copyright (C) 2001-2011,2014 IBM and others. All rights reserved.
|
@@ -188,11 +190,13 @@ typedef enum {
|
|
188
190
|
*/
|
189
191
|
USEARCH_ELEMENT_COMPARISON = 2,
|
190
192
|
|
193
|
+
#ifndef U_HIDE_DEPRECATED_API
|
191
194
|
/**
|
192
|
-
*
|
193
|
-
* @
|
195
|
+
* One more than the highest normal USearchAttribute value.
|
196
|
+
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
194
197
|
*/
|
195
198
|
USEARCH_ATTRIBUTE_COUNT = 3
|
199
|
+
#endif // U_HIDE_DEPRECATED_API
|
196
200
|
} USearchAttribute;
|
197
201
|
|
198
202
|
/**
|
@@ -262,11 +266,13 @@ typedef enum {
|
|
262
266
|
*/
|
263
267
|
USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD,
|
264
268
|
|
269
|
+
#ifndef U_HIDE_DEPRECATED_API
|
265
270
|
/**
|
266
|
-
*
|
267
|
-
* @
|
271
|
+
* One more than the highest normal USearchAttributeValue value.
|
272
|
+
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
268
273
|
*/
|
269
274
|
USEARCH_ATTRIBUTE_VALUE_COUNT
|
275
|
+
#endif // U_HIDE_DEPRECATED_API
|
270
276
|
} USearchAttributeValue;
|
271
277
|
|
272
278
|
/* open and close ------------------------------------------------------ */
|
@@ -1,3 +1,5 @@
|
|
1
|
+
// Copyright (C) 2016 and later: Unicode, Inc. and others.
|
2
|
+
// License & terms of use: http://www.unicode.org/copyright.html
|
1
3
|
/*
|
2
4
|
*******************************************************************************
|
3
5
|
*
|
@@ -196,11 +198,13 @@ typedef enum USetSpanCondition {
|
|
196
198
|
* @stable ICU 3.8
|
197
199
|
*/
|
198
200
|
USET_SPAN_SIMPLE = 2,
|
201
|
+
#ifndef U_HIDE_DEPRECATED_API
|
199
202
|
/**
|
200
203
|
* One more than the last span condition.
|
201
|
-
* @
|
204
|
+
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
202
205
|
*/
|
203
206
|
USET_SPAN_CONDITION_COUNT
|
207
|
+
#endif // U_HIDE_DEPRECATED_API
|
204
208
|
} USetSpanCondition;
|
205
209
|
|
206
210
|
enum {
|
@@ -1,6 +1,8 @@
|
|
1
|
+
// Copyright (C) 2016 and later: Unicode, Inc. and others.
|
2
|
+
// License & terms of use: http://www.unicode.org/copyright.html
|
1
3
|
/*
|
2
4
|
***************************************************************************
|
3
|
-
* Copyright (C) 2008-
|
5
|
+
* Copyright (C) 2008-2016, International Business Machines Corporation
|
4
6
|
* and others. All Rights Reserved.
|
5
7
|
***************************************************************************
|
6
8
|
* file name: uspoof.h
|
@@ -35,123 +37,350 @@
|
|
35
37
|
* \file
|
36
38
|
* \brief Unicode Security and Spoofing Detection, C API.
|
37
39
|
*
|
38
|
-
*
|
39
|
-
*
|
40
|
-
*
|
41
|
-
*
|
42
|
-
*
|
43
|
-
*
|
44
|
-
*
|
45
|
-
*
|
46
|
-
*
|
47
|
-
*
|
48
|
-
*
|
49
|
-
*
|
50
|
-
*
|
51
|
-
*
|
52
|
-
*
|
53
|
-
*
|
54
|
-
*
|
55
|
-
*
|
56
|
-
*
|
57
|
-
*
|
58
|
-
*
|
59
|
-
*
|
60
|
-
*
|
61
|
-
*
|
62
|
-
*
|
63
|
-
*
|
64
|
-
*
|
65
|
-
*
|
66
|
-
*
|
67
|
-
*
|
68
|
-
*
|
69
|
-
*
|
70
|
-
*
|
71
|
-
*
|
72
|
-
*
|
73
|
-
*
|
74
|
-
*
|
75
|
-
*
|
76
|
-
*
|
77
|
-
*
|
78
|
-
*
|
79
|
-
*
|
80
|
-
*
|
81
|
-
*
|
82
|
-
*
|
83
|
-
*
|
84
|
-
*
|
85
|
-
*
|
86
|
-
*
|
87
|
-
*
|
88
|
-
*
|
89
|
-
*
|
90
|
-
*
|
91
|
-
*
|
92
|
-
*
|
93
|
-
*
|
94
|
-
*
|
95
|
-
*
|
96
|
-
*
|
97
|
-
*
|
98
|
-
*
|
99
|
-
*
|
100
|
-
*
|
101
|
-
*
|
102
|
-
*
|
103
|
-
*
|
104
|
-
*
|
105
|
-
*
|
106
|
-
*
|
107
|
-
*
|
108
|
-
*
|
109
|
-
*
|
110
|
-
*
|
111
|
-
*
|
112
|
-
*
|
113
|
-
*
|
114
|
-
*
|
115
|
-
*
|
116
|
-
*
|
117
|
-
*
|
118
|
-
*
|
119
|
-
*
|
120
|
-
*
|
121
|
-
*
|
122
|
-
*
|
123
|
-
*
|
124
|
-
*
|
125
|
-
*
|
126
|
-
*
|
127
|
-
*
|
128
|
-
*
|
129
|
-
*
|
130
|
-
*
|
131
|
-
*
|
132
|
-
*
|
133
|
-
*
|
134
|
-
*
|
135
|
-
*
|
136
|
-
*
|
137
|
-
*
|
138
|
-
*
|
139
|
-
*
|
140
|
-
*
|
141
|
-
*
|
142
|
-
*
|
143
|
-
*
|
144
|
-
*
|
145
|
-
*
|
146
|
-
*
|
147
|
-
*
|
148
|
-
*
|
149
|
-
*
|
40
|
+
* <p>
|
41
|
+
* This class, based on <a href="http://unicode.org/reports/tr36">Unicode Technical Report #36</a> and
|
42
|
+
* <a href="http://unicode.org/reports/tr39">Unicode Technical Standard #39</a>, has two main functions:
|
43
|
+
*
|
44
|
+
* <ol>
|
45
|
+
* <li>Checking whether two strings are visually <em>confusable</em> with each other, such as "Harvest" and
|
46
|
+
* "Ηarvest", where the second string starts with the Greek capital letter Eta.</li>
|
47
|
+
* <li>Checking whether an individual string is likely to be an attempt at confusing the reader (<em>spoof
|
48
|
+
* detection</em>), such as "paypal" with some Latin characters substituted with Cyrillic look-alikes.</li>
|
49
|
+
* </ol>
|
50
|
+
*
|
51
|
+
* <p>
|
52
|
+
* Although originally designed as a method for flagging suspicious identifier strings such as URLs,
|
53
|
+
* <code>USpoofChecker</code> has a number of other practical use cases, such as preventing attempts to evade bad-word
|
54
|
+
* content filters.
|
55
|
+
*
|
56
|
+
* <p>
|
57
|
+
* The functions of this class are exposed as C API, with a handful of syntactical conveniences for C++.
|
58
|
+
*
|
59
|
+
* <h2>Confusables</h2>
|
60
|
+
*
|
61
|
+
* <p>
|
62
|
+
* The following example shows how to use <code>USpoofChecker</code> to check for confusability between two strings:
|
63
|
+
*
|
64
|
+
* \code{.c}
|
65
|
+
* UErrorCode status = U_ZERO_ERROR;
|
66
|
+
* UChar* str1 = (UChar*) u"Harvest";
|
67
|
+
* UChar* str2 = (UChar*) u"\u0397arvest"; // with U+0397 GREEK CAPITAL LETTER ETA
|
68
|
+
*
|
69
|
+
* USpoofChecker* sc = uspoof_open(&status);
|
70
|
+
* uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
|
71
|
+
*
|
72
|
+
* int32_t bitmask = uspoof_areConfusable(sc, str1, -1, str2, -1, &status);
|
73
|
+
* UBool result = bitmask != 0;
|
74
|
+
* // areConfusable: 1 (status: U_ZERO_ERROR)
|
75
|
+
* printf("areConfusable: %d (status: %s)\n", result, u_errorName(status));
|
76
|
+
* uspoof_close(sc);
|
77
|
+
* \endcode
|
78
|
+
*
|
79
|
+
* <p>
|
80
|
+
* The call to {@link uspoof_open} creates a <code>USpoofChecker</code> object; the call to {@link uspoof_setChecks}
|
81
|
+
* enables confusable checking and disables all other checks; the call to {@link uspoof_areConfusable} performs the
|
82
|
+
* confusability test; and the following line extracts the result out of the return value. For best performance,
|
83
|
+
* the instance should be created once (e.g., upon application startup), and the efficient
|
84
|
+
* {@link uspoof_areConfusable} method can be used at runtime.
|
85
|
+
*
|
86
|
+
* <p>
|
87
|
+
* The type {@link LocalUSpoofCheckerPointer} is exposed for C++ programmers. It will automatically call
|
88
|
+
* {@link uspoof_close} when the object goes out of scope:
|
89
|
+
*
|
90
|
+
* \code{.cpp}
|
91
|
+
* UErrorCode status = U_ZERO_ERROR;
|
92
|
+
* LocalUSpoofCheckerPointer sc(uspoof_open(&status));
|
93
|
+
* uspoof_setChecks(sc.getAlias(), USPOOF_CONFUSABLE, &status);
|
94
|
+
* // ...
|
95
|
+
* \endcode
|
96
|
+
*
|
97
|
+
* <p>
|
98
|
+
* UTS 39 defines two strings to be <em>confusable</em> if they map to the same <em>skeleton string</em>. A skeleton can
|
99
|
+
* be thought of as a "hash code". {@link uspoof_getSkeleton} computes the skeleton for a particular string, so
|
100
|
+
* the following snippet is equivalent to the example above:
|
101
|
+
*
|
102
|
+
* \code{.c}
|
103
|
+
* UErrorCode status = U_ZERO_ERROR;
|
104
|
+
* UChar* str1 = (UChar*) u"Harvest";
|
105
|
+
* UChar* str2 = (UChar*) u"\u0397arvest"; // with U+0397 GREEK CAPITAL LETTER ETA
|
106
|
+
*
|
107
|
+
* USpoofChecker* sc = uspoof_open(&status);
|
108
|
+
* uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
|
109
|
+
*
|
110
|
+
* // Get skeleton 1
|
111
|
+
* int32_t skel1Len = uspoof_getSkeleton(sc, 0, str1, -1, NULL, 0, &status);
|
112
|
+
* UChar* skel1 = (UChar*) malloc(++skel1Len * sizeof(UChar));
|
113
|
+
* status = U_ZERO_ERROR;
|
114
|
+
* uspoof_getSkeleton(sc, 0, str1, -1, skel1, skel1Len, &status);
|
115
|
+
*
|
116
|
+
* // Get skeleton 2
|
117
|
+
* int32_t skel2Len = uspoof_getSkeleton(sc, 0, str2, -1, NULL, 0, &status);
|
118
|
+
* UChar* skel2 = (UChar*) malloc(++skel2Len * sizeof(UChar));
|
119
|
+
* status = U_ZERO_ERROR;
|
120
|
+
* uspoof_getSkeleton(sc, 0, str2, -1, skel2, skel2Len, &status);
|
121
|
+
*
|
122
|
+
* // Are the skeletons the same?
|
123
|
+
* UBool result = u_strcmp(skel1, skel2) == 0;
|
124
|
+
* // areConfusable: 1 (status: U_ZERO_ERROR)
|
125
|
+
* printf("areConfusable: %d (status: %s)\n", result, u_errorName(status));
|
126
|
+
* uspoof_close(sc);
|
127
|
+
* free(skel1);
|
128
|
+
* free(skel2);
|
129
|
+
* \endcode
|
130
|
+
*
|
131
|
+
* <p>
|
132
|
+
* If you need to check if a string is confusable with any string in a dictionary of many strings, rather than calling
|
133
|
+
* {@link uspoof_areConfusable} many times in a loop, {@link uspoof_getSkeleton} can be used instead, as shown below:
|
134
|
+
*
|
135
|
+
* \code{.c}
|
136
|
+
* UErrorCode status = U_ZERO_ERROR;
|
137
|
+
* #define DICTIONARY_LENGTH 2
|
138
|
+
* UChar* dictionary[DICTIONARY_LENGTH] = { (UChar*) u"lorem", (UChar*) u"ipsum" };
|
139
|
+
* UChar* skeletons[DICTIONARY_LENGTH];
|
140
|
+
* UChar* str = (UChar*) u"1orern";
|
141
|
+
*
|
142
|
+
* // Setup:
|
143
|
+
* USpoofChecker* sc = uspoof_open(&status);
|
144
|
+
* uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
|
145
|
+
* for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
|
146
|
+
* UChar* word = dictionary[i];
|
147
|
+
* int32_t len = uspoof_getSkeleton(sc, 0, word, -1, NULL, 0, &status);
|
148
|
+
* skeletons[i] = (UChar*) malloc(++len * sizeof(UChar));
|
149
|
+
* status = U_ZERO_ERROR;
|
150
|
+
* uspoof_getSkeleton(sc, 0, word, -1, skeletons[i], len, &status);
|
151
|
+
* }
|
152
|
+
*
|
153
|
+
* // Live Check:
|
154
|
+
* {
|
155
|
+
* int32_t len = uspoof_getSkeleton(sc, 0, str, -1, NULL, 0, &status);
|
156
|
+
* UChar* skel = (UChar*) malloc(++len * sizeof(UChar));
|
157
|
+
* status = U_ZERO_ERROR;
|
158
|
+
* uspoof_getSkeleton(sc, 0, str, -1, skel, len, &status);
|
159
|
+
* UBool result = FALSE;
|
160
|
+
* for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
|
161
|
+
* result = u_strcmp(skel, skeletons[i]) == 0;
|
162
|
+
* if (result == TRUE) { break; }
|
163
|
+
* }
|
164
|
+
* // Has confusable in dictionary: 1 (status: U_ZERO_ERROR)
|
165
|
+
* printf("Has confusable in dictionary: %d (status: %s)\n", result, u_errorName(status));
|
166
|
+
* free(skel);
|
167
|
+
* }
|
168
|
+
*
|
169
|
+
* for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
|
170
|
+
* free(skeletons[i]);
|
171
|
+
* }
|
172
|
+
* uspoof_close(sc);
|
173
|
+
* \endcode
|
174
|
+
*
|
175
|
+
* <p>
|
176
|
+
* <b>Note:</b> Since the Unicode confusables mapping table is frequently updated, confusable skeletons are <em>not</em>
|
177
|
+
* guaranteed to be the same between ICU releases. We therefore recommend that you always compute confusable skeletons
|
178
|
+
* at runtime and do not rely on creating a permanent, or difficult to update, database of skeletons.
|
179
|
+
*
|
180
|
+
* <h2>Spoof Detection</h2>
|
181
|
+
*
|
182
|
+
* <p>
|
183
|
+
* The following snippet shows a minimal example of using <code>USpoofChecker</code> to perform spoof detection on a
|
184
|
+
* string:
|
185
|
+
*
|
186
|
+
* \code{.c}
|
187
|
+
* UErrorCode status = U_ZERO_ERROR;
|
188
|
+
* UChar* str = (UChar*) u"p\u0430ypal"; // with U+0430 CYRILLIC SMALL LETTER A
|
189
|
+
*
|
190
|
+
* // Get the default set of allowable characters:
|
191
|
+
* USet* allowed = uset_openEmpty();
|
192
|
+
* uset_addAll(allowed, uspoof_getRecommendedSet(&status));
|
193
|
+
* uset_addAll(allowed, uspoof_getInclusionSet(&status));
|
194
|
+
*
|
195
|
+
* USpoofChecker* sc = uspoof_open(&status);
|
196
|
+
* uspoof_setAllowedChars(sc, allowed, &status);
|
197
|
+
* uspoof_setRestrictionLevel(sc, USPOOF_MODERATELY_RESTRICTIVE);
|
198
|
+
*
|
199
|
+
* int32_t bitmask = uspoof_check(sc, str, -1, NULL, &status);
|
200
|
+
* UBool result = bitmask != 0;
|
201
|
+
* // fails checks: 1 (status: U_ZERO_ERROR)
|
202
|
+
* printf("fails checks: %d (status: %s)\n", result, u_errorName(status));
|
203
|
+
* uspoof_close(sc);
|
204
|
+
* uset_close(allowed);
|
205
|
+
* \endcode
|
206
|
+
*
|
207
|
+
* <p>
|
208
|
+
* As in the case for confusability checking, it is good practice to create one <code>USpoofChecker</code> instance at
|
209
|
+
* startup, and call the cheaper {@link uspoof_check} online. We specify the set of
|
210
|
+
* allowed characters to be those with type RECOMMENDED or INCLUSION, according to the recommendation in UTS 39.
|
211
|
+
*
|
212
|
+
* <p>
|
213
|
+
* In addition to {@link uspoof_check}, the function {@link uspoof_checkUTF8} is exposed for UTF8-encoded char* strings,
|
214
|
+
* and {@link uspoof_checkUnicodeString} is exposed for C++ programmers.
|
215
|
+
*
|
216
|
+
* <p>
|
217
|
+
* If the {@link USPOOF_AUX_INFO} check is enabled, a limited amount of information on why a string failed the checks
|
218
|
+
* is available in the returned bitmask. For complete information, use the {@link uspoof_check2} class of functions
|
219
|
+
* with a {@link USpoofCheckResult} parameter:
|
220
|
+
*
|
221
|
+
* \code{.c}
|
222
|
+
* UErrorCode status = U_ZERO_ERROR;
|
223
|
+
* UChar* str = (UChar*) u"p\u0430ypal"; // with U+0430 CYRILLIC SMALL LETTER A
|
224
|
+
*
|
225
|
+
* // Get the default set of allowable characters:
|
226
|
+
* USet* allowed = uset_openEmpty();
|
227
|
+
* uset_addAll(allowed, uspoof_getRecommendedSet(&status));
|
228
|
+
* uset_addAll(allowed, uspoof_getInclusionSet(&status));
|
229
|
+
*
|
230
|
+
* USpoofChecker* sc = uspoof_open(&status);
|
231
|
+
* uspoof_setAllowedChars(sc, allowed, &status);
|
232
|
+
* uspoof_setRestrictionLevel(sc, USPOOF_MODERATELY_RESTRICTIVE);
|
233
|
+
*
|
234
|
+
* USpoofCheckResult* checkResult = uspoof_openCheckResult(&status);
|
235
|
+
* int32_t bitmask = uspoof_check2(sc, str, -1, checkResult, &status);
|
236
|
+
*
|
237
|
+
* int32_t failures1 = bitmask;
|
238
|
+
* int32_t failures2 = uspoof_getCheckResultChecks(checkResult, &status);
|
239
|
+
* assert(failures1 == failures2);
|
240
|
+
* // checks that failed: 0x00000010 (status: U_ZERO_ERROR)
|
241
|
+
* printf("checks that failed: %#010x (status: %s)\n", failures1, u_errorName(status));
|
242
|
+
*
|
243
|
+
* // Cleanup:
|
244
|
+
* uspoof_close(sc);
|
245
|
+
* uset_close(allowed);
|
246
|
+
* uspoof_closeCheckResult(checkResult);
|
247
|
+
* \endcode
|
248
|
+
*
|
249
|
+
* C++ users can take advantage of a few syntactical conveniences. The following snippet is functionally
|
250
|
+
* equivalent to the one above:
|
251
|
+
*
|
252
|
+
* \code{.cpp}
|
253
|
+
* UErrorCode status = U_ZERO_ERROR;
|
254
|
+
* UnicodeString str((UChar*) u"p\u0430ypal"); // with U+0430 CYRILLIC SMALL LETTER A
|
255
|
+
*
|
256
|
+
* // Get the default set of allowable characters:
|
257
|
+
* UnicodeSet allowed;
|
258
|
+
* allowed.addAll(*uspoof_getRecommendedUnicodeSet(&status));
|
259
|
+
* allowed.addAll(*uspoof_getInclusionUnicodeSet(&status));
|
260
|
+
*
|
261
|
+
* LocalUSpoofCheckerPointer sc(uspoof_open(&status));
|
262
|
+
* uspoof_setAllowedChars(sc.getAlias(), allowed.toUSet(), &status);
|
263
|
+
* uspoof_setRestrictionLevel(sc.getAlias(), USPOOF_MODERATELY_RESTRICTIVE);
|
264
|
+
*
|
265
|
+
* LocalUSpoofCheckResultPointer checkResult(uspoof_openCheckResult(&status));
|
266
|
+
* int32_t bitmask = uspoof_check2UnicodeString(sc.getAlias(), str, checkResult.getAlias(), &status);
|
267
|
+
*
|
268
|
+
* int32_t failures1 = bitmask;
|
269
|
+
* int32_t failures2 = uspoof_getCheckResultChecks(checkResult.getAlias(), &status);
|
270
|
+
* assert(failures1 == failures2);
|
271
|
+
* // checks that failed: 0x00000010 (status: U_ZERO_ERROR)
|
272
|
+
* printf("checks that failed: %#010x (status: %s)\n", failures1, u_errorName(status));
|
273
|
+
*
|
274
|
+
* // Explicit cleanup not necessary.
|
275
|
+
* \endcode
|
276
|
+
*
|
277
|
+
* <p>
|
278
|
+
* The return value is a bitmask of the checks that failed. In this case, there was one check that failed:
|
279
|
+
* {@link USPOOF_RESTRICTION_LEVEL}, corresponding to the fifth bit (16). The possible checks are:
|
280
|
+
*
|
281
|
+
* <ul>
|
282
|
+
* <li><code>RESTRICTION_LEVEL</code>: flags strings that violate the
|
283
|
+
* <a href="http://unicode.org/reports/tr39/#Restriction_Level_Detection">Restriction Level</a> test as specified in UTS
|
284
|
+
* 39; in most cases, this means flagging strings that contain characters from multiple different scripts.</li>
|
285
|
+
* <li><code>INVISIBLE</code>: flags strings that contain invisible characters, such as zero-width spaces, or character
|
286
|
+
* sequences that are likely not to display, such as multiple occurrences of the same non-spacing mark.</li>
|
287
|
+
* <li><code>CHAR_LIMIT</code>: flags strings that contain characters outside of a specified set of acceptable
|
288
|
+
* characters. See {@link uspoof_setAllowedChars} and {@link uspoof_setAllowedLocales}.</li>
|
289
|
+
* <li><code>MIXED_NUMBERS</code>: flags strings that contain digits from multiple different numbering systems.</li>
|
290
|
+
* </ul>
|
291
|
+
*
|
292
|
+
* <p>
|
293
|
+
* These checks can be enabled independently of each other. For example, if you were interested in checking for only the
|
294
|
+
* INVISIBLE and MIXED_NUMBERS conditions, you could do:
|
295
|
+
*
|
296
|
+
* \code{.c}
|
297
|
+
* UErrorCode status = U_ZERO_ERROR;
|
298
|
+
* UChar* str = (UChar*) u"8\u09EA"; // 8 mixed with U+09EA BENGALI DIGIT FOUR
|
299
|
+
*
|
300
|
+
* USpoofChecker* sc = uspoof_open(&status);
|
301
|
+
* uspoof_setChecks(sc, USPOOF_INVISIBLE | USPOOF_MIXED_NUMBERS, &status);
|
302
|
+
*
|
303
|
+
* int32_t bitmask = uspoof_check2(sc, str, -1, NULL, &status);
|
304
|
+
* UBool result = bitmask != 0;
|
305
|
+
* // fails checks: 1 (status: U_ZERO_ERROR)
|
306
|
+
* printf("fails checks: %d (status: %s)\n", result, u_errorName(status));
|
307
|
+
* uspoof_close(sc);
|
308
|
+
* \endcode
|
309
|
+
*
|
310
|
+
* <p>
|
311
|
+
* Here is an example in C++ showing how to compute the restriction level of a string:
|
312
|
+
*
|
313
|
+
* \code{.cpp}
|
314
|
+
* UErrorCode status = U_ZERO_ERROR;
|
315
|
+
* UnicodeString str((UChar*) u"p\u0430ypal"); // with U+0430 CYRILLIC SMALL LETTER A
|
316
|
+
*
|
317
|
+
* // Get the default set of allowable characters:
|
318
|
+
* UnicodeSet allowed;
|
319
|
+
* allowed.addAll(*uspoof_getRecommendedUnicodeSet(&status));
|
320
|
+
* allowed.addAll(*uspoof_getInclusionUnicodeSet(&status));
|
321
|
+
*
|
322
|
+
* LocalUSpoofCheckerPointer sc(uspoof_open(&status));
|
323
|
+
* uspoof_setAllowedChars(sc.getAlias(), allowed.toUSet(), &status);
|
324
|
+
* uspoof_setRestrictionLevel(sc.getAlias(), USPOOF_MODERATELY_RESTRICTIVE);
|
325
|
+
* uspoof_setChecks(sc.getAlias(), USPOOF_RESTRICTION_LEVEL | USPOOF_AUX_INFO, &status);
|
326
|
+
*
|
327
|
+
* LocalUSpoofCheckResultPointer checkResult(uspoof_openCheckResult(&status));
|
328
|
+
* int32_t bitmask = uspoof_check2UnicodeString(sc.getAlias(), str, checkResult.getAlias(), &status);
|
329
|
+
*
|
330
|
+
* URestrictionLevel restrictionLevel = uspoof_getCheckResultRestrictionLevel(checkResult.getAlias(), &status);
|
331
|
+
* // Since USPOOF_AUX_INFO was enabled, the restriction level is also available in the upper bits of the bitmask:
|
332
|
+
* assert((restrictionLevel & bitmask) == restrictionLevel);
|
333
|
+
* // Restriction level: 0x50000000 (status: U_ZERO_ERROR)
|
334
|
+
* printf("Restriction level: %#010x (status: %s)\n", restrictionLevel, u_errorName(status));
|
335
|
+
* \endcode
|
336
|
+
*
|
337
|
+
* <p>
|
338
|
+
* The code '0x50000000' corresponds to the restriction level USPOOF_MINIMALLY_RESTRICTIVE. Since
|
339
|
+
* USPOOF_MINIMALLY_RESTRICTIVE is weaker than USPOOF_MODERATELY_RESTRICTIVE, the string fails the check.
|
340
|
+
*
|
341
|
+
* <p>
|
342
|
+
* <b>Note:</b> The Restriction Level is the most powerful of the checks. The full logic is documented in
|
343
|
+
* <a href="http://unicode.org/reports/tr39/#Restriction_Level_Detection">UTS 39</a>, but the basic idea is that strings
|
344
|
+
* are restricted to contain characters from only a single script, <em>except</em> that most scripts are allowed to have
|
345
|
+
* Latin characters interspersed. Although the default restriction level is <code>HIGHLY_RESTRICTIVE</code>, it is
|
346
|
+
* recommended that users set their restriction level to <code>MODERATELY_RESTRICTIVE</code>, which allows Latin mixed
|
347
|
+
* with all other scripts except Cyrillic, Greek, and Cherokee, with which it is often confusable. For more details on
|
348
|
+
* the levels, see UTS 39 or {@link URestrictionLevel}. The Restriction Level test is aware of the set of
|
349
|
+
* allowed characters set in {@link uspoof_setAllowedChars}. Note that characters which have script code
|
350
|
+
* COMMON or INHERITED, such as numbers and punctuation, are ignored when computing whether a string has multiple
|
351
|
+
* scripts.
|
352
|
+
*
|
353
|
+
* <h2>Additional Information</h2>
|
354
|
+
*
|
355
|
+
* <p>
|
356
|
+
* A <code>USpoofChecker</code> instance may be used repeatedly to perform checks on any number of identifiers.
|
357
|
+
*
|
358
|
+
* <p>
|
359
|
+
* <b>Thread Safety:</b> The test functions for checking a single identifier, or for testing whether
|
360
|
+
* two identifiers are possible confusable, are thread safe. They may called concurrently, from multiple threads,
|
361
|
+
* using the same USpoofChecker instance.
|
362
|
+
*
|
363
|
+
* <p>
|
364
|
+
* More generally, the standard ICU thread safety rules apply: functions that take a const USpoofChecker parameter are
|
365
|
+
* thread safe. Those that take a non-const USpoofChecker are not thread safe..
|
366
|
+
*
|
367
|
+
* @stable ICU 4.6
|
150
368
|
*/
|
151
369
|
|
152
370
|
struct USpoofChecker;
|
153
371
|
typedef struct USpoofChecker USpoofChecker; /**< typedef for C of USpoofChecker */
|
154
372
|
|
373
|
+
#ifndef U_HIDE_DRAFT_API
|
374
|
+
/**
|
375
|
+
* @see uspoof_openCheckResult
|
376
|
+
*/
|
377
|
+
struct USpoofCheckResult;
|
378
|
+
/**
|
379
|
+
* @see uspoof_openCheckResult
|
380
|
+
*/
|
381
|
+
typedef struct USpoofCheckResult USpoofCheckResult;
|
382
|
+
#endif /* U_HIDE_DRAFT_API */
|
383
|
+
|
155
384
|
/**
|
156
385
|
* Enum for the kinds of checks that USpoofChecker can perform.
|
157
386
|
* These enum values are used both to select the set of checks that
|
@@ -160,45 +389,61 @@ typedef struct USpoofChecker USpoofChecker; /**< typedef for C of USpoofChecker
|
|
160
389
|
* @stable ICU 4.2
|
161
390
|
*/
|
162
391
|
typedef enum USpoofChecks {
|
163
|
-
/**
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
392
|
+
/**
|
393
|
+
* When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
|
394
|
+
* that the two strings are visually confusable and that they are from the same script, according to UTS 39 section
|
395
|
+
* 4.
|
396
|
+
*
|
397
|
+
* @see uspoof_areConfusable
|
398
|
+
* @stable ICU 4.2
|
399
|
+
*/
|
168
400
|
USPOOF_SINGLE_SCRIPT_CONFUSABLE = 1,
|
169
401
|
|
170
|
-
/**
|
171
|
-
*
|
172
|
-
*
|
173
|
-
*
|
174
|
-
*
|
175
|
-
*
|
176
|
-
*
|
402
|
+
/**
|
403
|
+
* When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
|
404
|
+
* that the two strings are visually confusable and that they are <b>not</b> from the same script, according to UTS
|
405
|
+
* 39 section 4.
|
406
|
+
*
|
407
|
+
* @see uspoof_areConfusable
|
408
|
+
* @stable ICU 4.2
|
177
409
|
*/
|
178
410
|
USPOOF_MIXED_SCRIPT_CONFUSABLE = 2,
|
179
411
|
|
180
|
-
/**
|
181
|
-
*
|
182
|
-
*
|
183
|
-
*
|
184
|
-
*
|
185
|
-
*
|
186
|
-
*
|
187
|
-
* the identifiers are visually confusable.
|
412
|
+
/**
|
413
|
+
* When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
|
414
|
+
* that the two strings are visually confusable and that they are not from the same script but both of them are
|
415
|
+
* single-script strings, according to UTS 39 section 4.
|
416
|
+
*
|
417
|
+
* @see uspoof_areConfusable
|
418
|
+
* @stable ICU 4.2
|
188
419
|
*/
|
189
420
|
USPOOF_WHOLE_SCRIPT_CONFUSABLE = 4,
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
421
|
+
|
422
|
+
#ifndef U_HIDE_DRAFT_API
|
423
|
+
/**
|
424
|
+
* Enable this flag in {@link uspoof_setChecks} to turn on all types of confusables. You may set
|
425
|
+
* the checks to some subset of SINGLE_SCRIPT_CONFUSABLE, MIXED_SCRIPT_CONFUSABLE, or WHOLE_SCRIPT_CONFUSABLE to
|
426
|
+
* make {@link uspoof_areConfusable} return only those types of confusables.
|
427
|
+
*
|
428
|
+
* @see uspoof_areConfusable
|
429
|
+
* @see uspoof_getSkeleton
|
430
|
+
* @draft ICU 58
|
431
|
+
*/
|
432
|
+
USPOOF_CONFUSABLE = USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE,
|
433
|
+
#endif /* U_HIDE_DRAFT_API */
|
434
|
+
|
435
|
+
#ifndef U_HIDE_DEPRECATED_API
|
436
|
+
/**
|
437
|
+
* This flag is deprecated and no longer affects the behavior of SpoofChecker.
|
438
|
+
*
|
439
|
+
* @deprecated ICU 58 Any case confusable mappings were removed from UTS 39; the corresponding ICU API was deprecated.
|
440
|
+
*/
|
197
441
|
USPOOF_ANY_CASE = 8,
|
442
|
+
#endif /* U_HIDE_DEPRECATED_API */
|
198
443
|
|
199
444
|
/**
|
200
445
|
* Check that an identifier is no looser than the specified RestrictionLevel.
|
201
|
-
* The default if
|
446
|
+
* The default if {@link uspoof_setRestrictionLevel} is not called is HIGHLY_RESTRICTIVE.
|
202
447
|
*
|
203
448
|
* If USPOOF_AUX_INFO is enabled the actual restriction level of the
|
204
449
|
* identifier being tested will also be returned by uspoof_check().
|
@@ -211,7 +456,7 @@ typedef enum USpoofChecks {
|
|
211
456
|
*/
|
212
457
|
USPOOF_RESTRICTION_LEVEL = 16,
|
213
458
|
|
214
|
-
#ifndef U_HIDE_DEPRECATED_API
|
459
|
+
#ifndef U_HIDE_DEPRECATED_API
|
215
460
|
/** Check that an identifier contains only characters from a
|
216
461
|
* single script (plus chars from the common and inherited scripts.)
|
217
462
|
* Applies to checks of a single identifier check only.
|
@@ -219,7 +464,7 @@ typedef enum USpoofChecks {
|
|
219
464
|
*/
|
220
465
|
USPOOF_SINGLE_SCRIPT = USPOOF_RESTRICTION_LEVEL,
|
221
466
|
#endif /* U_HIDE_DEPRECATED_API */
|
222
|
-
|
467
|
+
|
223
468
|
/** Check an identifier for the presence of invisible characters,
|
224
469
|
* such as zero-width spaces, or character sequences that are
|
225
470
|
* likely not to display, such as multiple occurrences of the same
|
@@ -229,97 +474,119 @@ typedef enum USpoofChecks {
|
|
229
474
|
USPOOF_INVISIBLE = 32,
|
230
475
|
|
231
476
|
/** Check that an identifier contains only characters from a specified set
|
232
|
-
* of acceptable characters. See uspoof_setAllowedChars
|
233
|
-
* uspoof_setAllowedLocales
|
477
|
+
* of acceptable characters. See {@link uspoof_setAllowedChars} and
|
478
|
+
* {@link uspoof_setAllowedLocales}. Note that a string that fails this check
|
479
|
+
* will also fail the {@link USPOOF_RESTRICTION_LEVEL} check.
|
234
480
|
*/
|
235
481
|
USPOOF_CHAR_LIMIT = 64,
|
236
482
|
|
237
483
|
/**
|
238
|
-
* Check that an identifier does not
|
239
|
-
* more
|
240
|
-
*
|
484
|
+
* Check that an identifier does not mix numbers from different numbering systems.
|
485
|
+
* For more information, see UTS 39 section 5.3.
|
486
|
+
*
|
241
487
|
* @stable ICU 51
|
242
488
|
*/
|
243
489
|
USPOOF_MIXED_NUMBERS = 128,
|
244
490
|
|
245
491
|
/**
|
246
492
|
* Enable all spoof checks.
|
247
|
-
*
|
493
|
+
*
|
248
494
|
* @stable ICU 4.6
|
249
495
|
*/
|
250
496
|
USPOOF_ALL_CHECKS = 0xFFFF,
|
251
497
|
|
252
498
|
/**
|
253
499
|
* Enable the return of auxillary (non-error) information in the
|
254
|
-
* upper bits of the check results value.
|
500
|
+
* upper bits of the check results value.
|
255
501
|
*
|
256
|
-
* If this "check" is not enabled, the results of uspoof_check
|
257
|
-
* identifier passes all of the enabled checks.
|
502
|
+
* If this "check" is not enabled, the results of {@link uspoof_check} will be
|
503
|
+
* zero when an identifier passes all of the enabled checks.
|
258
504
|
*
|
259
|
-
* If this "check" is enabled, (uspoof_check() & USPOOF_ALL_CHECKS) will
|
260
|
-
* when an identifier passes all checks.
|
505
|
+
* If this "check" is enabled, (uspoof_check() & {@link USPOOF_ALL_CHECKS}) will
|
506
|
+
* be zero when an identifier passes all checks.
|
261
507
|
*
|
262
508
|
* @stable ICU 51
|
263
509
|
*/
|
264
510
|
USPOOF_AUX_INFO = 0x40000000
|
265
511
|
|
266
512
|
} USpoofChecks;
|
267
|
-
|
268
|
-
|
513
|
+
|
514
|
+
|
269
515
|
/**
|
270
|
-
* Constants from UAX #39 for use in
|
516
|
+
* Constants from UAX #39 for use in {@link uspoof_setRestrictionLevel}, and
|
271
517
|
* for returned identifier restriction levels in check results.
|
518
|
+
*
|
272
519
|
* @stable ICU 51
|
520
|
+
*
|
521
|
+
* @see uspoof_setRestrictionLevel
|
522
|
+
* @see uspoof_check
|
273
523
|
*/
|
274
524
|
typedef enum URestrictionLevel {
|
275
525
|
/**
|
276
|
-
*
|
277
|
-
*
|
526
|
+
* All characters in the string are in the identifier profile and all characters in the string are in the
|
527
|
+
* ASCII range.
|
528
|
+
*
|
278
529
|
* @stable ICU 51
|
279
530
|
*/
|
280
531
|
USPOOF_ASCII = 0x10000000,
|
281
532
|
/**
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
533
|
+
* The string classifies as ASCII-Only, or all characters in the string are in the identifier profile and
|
534
|
+
* the string is single-script, according to the definition in UTS 39 section 5.1.
|
535
|
+
*
|
536
|
+
* @stable ICU 53
|
537
|
+
*/
|
286
538
|
USPOOF_SINGLE_SCRIPT_RESTRICTIVE = 0x20000000,
|
287
539
|
/**
|
288
|
-
*
|
289
|
-
*
|
290
|
-
*
|
291
|
-
*
|
540
|
+
* The string classifies as Single Script, or all characters in the string are in the identifier profile and
|
541
|
+
* the string is covered by any of the following sets of scripts, according to the definition in UTS 39
|
542
|
+
* section 5.1:
|
543
|
+
* <ul>
|
544
|
+
* <li>Latin + Han + Bopomofo (or equivalently: Latn + Hanb)</li>
|
545
|
+
* <li>Latin + Han + Hiragana + Katakana (or equivalently: Latn + Jpan)</li>
|
546
|
+
* <li>Latin + Han + Hangul (or equivalently: Latn +Kore)</li>
|
547
|
+
* </ul>
|
548
|
+
* This is the default restriction in ICU.
|
549
|
+
*
|
292
550
|
* @stable ICU 51
|
293
551
|
*/
|
294
552
|
USPOOF_HIGHLY_RESTRICTIVE = 0x30000000,
|
295
553
|
/**
|
296
|
-
*
|
297
|
-
*
|
554
|
+
* The string classifies as Highly Restrictive, or all characters in the string are in the identifier profile
|
555
|
+
* and the string is covered by Latin and any one other Recommended or Aspirational script, except Cyrillic,
|
556
|
+
* Greek, and Cherokee.
|
557
|
+
*
|
298
558
|
* @stable ICU 51
|
299
559
|
*/
|
300
560
|
USPOOF_MODERATELY_RESTRICTIVE = 0x40000000,
|
301
561
|
/**
|
302
|
-
*
|
303
|
-
*
|
562
|
+
* All characters in the string are in the identifier profile. Allow arbitrary mixtures of scripts.
|
563
|
+
*
|
304
564
|
* @stable ICU 51
|
305
565
|
*/
|
306
566
|
USPOOF_MINIMALLY_RESTRICTIVE = 0x50000000,
|
307
567
|
/**
|
308
568
|
* Any valid identifiers, including characters outside of the Identifier Profile.
|
309
|
-
*
|
569
|
+
*
|
310
570
|
* @stable ICU 51
|
311
571
|
*/
|
312
572
|
USPOOF_UNRESTRICTIVE = 0x60000000,
|
313
573
|
/**
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
574
|
+
* Mask for selecting the Restriction Level bits from the return value of {@link uspoof_check}.
|
575
|
+
*
|
576
|
+
* @stable ICU 53
|
577
|
+
*/
|
578
|
+
USPOOF_RESTRICTION_LEVEL_MASK = 0x7F000000,
|
579
|
+
#ifndef U_HIDE_INTERNAL_API
|
580
|
+
/**
|
581
|
+
* An undefined restriction level.
|
582
|
+
* @internal
|
583
|
+
*/
|
584
|
+
USPOOF_UNDEFINED_RESTRICTIVE = -1
|
585
|
+
#endif /* U_HIDE_INTERNAL_API */
|
319
586
|
} URestrictionLevel;
|
320
587
|
|
321
588
|
/**
|
322
|
-
* Create a Unicode Spoof Checker, configured to perform all
|
589
|
+
* Create a Unicode Spoof Checker, configured to perform all
|
323
590
|
* checks except for USPOOF_LOCALE_LIMIT and USPOOF_CHAR_LIMIT.
|
324
591
|
* Note that additional checks may be added in the future,
|
325
592
|
* resulting in the changes to the default checking behavior.
|
@@ -359,10 +626,10 @@ uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLeng
|
|
359
626
|
|
360
627
|
/**
|
361
628
|
* Open a Spoof Checker from the source form of the spoof data.
|
362
|
-
* The
|
363
|
-
*
|
364
|
-
*
|
365
|
-
*
|
629
|
+
* The input corresponds to the Unicode data file confusables.txt
|
630
|
+
* as described in Unicode UAX #39. The syntax of the source data
|
631
|
+
* is as described in UAX #39 for this file, and the content of
|
632
|
+
* this file is acceptable input.
|
366
633
|
*
|
367
634
|
* The character encoding of the (char *) input text is UTF-8.
|
368
635
|
*
|
@@ -371,10 +638,9 @@ uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLeng
|
|
371
638
|
* @param confusablesLen The length of the confusables text, or -1 if the
|
372
639
|
* input string is zero terminated.
|
373
640
|
* @param confusablesWholeScript
|
374
|
-
*
|
375
|
-
*
|
376
|
-
*
|
377
|
-
* -1 if the input string is zero terminated.
|
641
|
+
* Deprecated in ICU 58. No longer used.
|
642
|
+
* @param confusablesWholeScriptLen
|
643
|
+
* Deprecated in ICU 58. No longer used.
|
378
644
|
* @param errType In the event of an error in the input, indicates
|
379
645
|
* which of the input files contains the error.
|
380
646
|
* The value is one of USPOOF_SINGLE_SCRIPT_CONFUSABLE or
|
@@ -435,8 +701,33 @@ uspoof_clone(const USpoofChecker *sc, UErrorCode *status);
|
|
435
701
|
|
436
702
|
|
437
703
|
/**
|
438
|
-
* Specify the
|
439
|
-
*
|
704
|
+
* Specify the bitmask of checks that will be performed by {@link uspoof_check}. Calling this method
|
705
|
+
* overwrites any checks that may have already been enabled. By default, all checks are enabled.
|
706
|
+
*
|
707
|
+
* To enable specific checks and disable all others, the "whitelisted" checks should be ORed together. For
|
708
|
+
* example, to fail strings containing characters outside of the set specified by {@link uspoof_setAllowedChars} and
|
709
|
+
* also strings that contain digits from mixed numbering systems:
|
710
|
+
*
|
711
|
+
* <pre>
|
712
|
+
* {@code
|
713
|
+
* uspoof_setChecks(USPOOF_CHAR_LIMIT | USPOOF_MIXED_NUMBERS);
|
714
|
+
* }
|
715
|
+
* </pre>
|
716
|
+
*
|
717
|
+
* To disable specific checks and enable all others, the "blacklisted" checks should be ANDed away from
|
718
|
+
* ALL_CHECKS. For example, if you are not planning to use the {@link uspoof_areConfusable} functionality,
|
719
|
+
* it is good practice to disable the CONFUSABLE check:
|
720
|
+
*
|
721
|
+
* <pre>
|
722
|
+
* {@code
|
723
|
+
* uspoof_setChecks(USPOOF_ALL_CHECKS & ~USPOOF_CONFUSABLE);
|
724
|
+
* }
|
725
|
+
* </pre>
|
726
|
+
*
|
727
|
+
* Note that methods such as {@link uspoof_setAllowedChars}, {@link uspoof_setAllowedLocales}, and
|
728
|
+
* {@link uspoof_setRestrictionLevel} will enable certain checks when called. Those methods will OR the check they
|
729
|
+
* enable onto the existing bitmask specified by this method. For more details, see the documentation of those
|
730
|
+
* methods.
|
440
731
|
*
|
441
732
|
* @param sc The USpoofChecker
|
442
733
|
* @param checks The set of checks that this spoof checker will perform.
|
@@ -451,7 +742,7 @@ uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status);
|
|
451
742
|
|
452
743
|
/**
|
453
744
|
* Get the set of checks that this Spoof Checker has been configured to perform.
|
454
|
-
*
|
745
|
+
*
|
455
746
|
* @param sc The USpoofChecker
|
456
747
|
* @param status The error code, set if this function encounters a problem.
|
457
748
|
* @return The set of checks that this spoof checker will perform.
|
@@ -464,19 +755,22 @@ U_STABLE int32_t U_EXPORT2
|
|
464
755
|
uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status);
|
465
756
|
|
466
757
|
/**
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
758
|
+
* Set the loosest restriction level allowed for strings. The default if this is not called is
|
759
|
+
* {@link USPOOF_HIGHLY_RESTRICTIVE}. Calling this method enables the {@link USPOOF_RESTRICTION_LEVEL} and
|
760
|
+
* {@link USPOOF_MIXED_NUMBERS} checks, corresponding to Sections 5.1 and 5.2 of UTS 39. To customize which checks are
|
761
|
+
* to be performed by {@link uspoof_check}, see {@link uspoof_setChecks}.
|
762
|
+
*
|
763
|
+
* @param sc The USpoofChecker
|
764
|
+
* @param restrictionLevel The loosest restriction level allowed.
|
765
|
+
* @see URestrictionLevel
|
766
|
+
* @stable ICU 51
|
767
|
+
*/
|
474
768
|
U_STABLE void U_EXPORT2
|
475
769
|
uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel);
|
476
770
|
|
477
771
|
|
478
772
|
/**
|
479
|
-
* Get the Restriction Level that will be tested if the checks include
|
773
|
+
* Get the Restriction Level that will be tested if the checks include {@link USPOOF_RESTRICTION_LEVEL}.
|
480
774
|
*
|
481
775
|
* @return The restriction level
|
482
776
|
* @see URestrictionLevel
|
@@ -486,7 +780,7 @@ U_STABLE URestrictionLevel U_EXPORT2
|
|
486
780
|
uspoof_getRestrictionLevel(const USpoofChecker *sc);
|
487
781
|
|
488
782
|
/**
|
489
|
-
* Limit characters that are acceptable in identifiers being checked to those
|
783
|
+
* Limit characters that are acceptable in identifiers being checked to those
|
490
784
|
* normally used with the languages associated with the specified locales.
|
491
785
|
* Any previously specified list of locales is replaced by the new settings.
|
492
786
|
*
|
@@ -499,7 +793,7 @@ uspoof_getRestrictionLevel(const USpoofChecker *sc);
|
|
499
793
|
* Supplying an empty string removes all restrictions;
|
500
794
|
* characters from any script will be allowed.
|
501
795
|
*
|
502
|
-
* The USPOOF_CHAR_LIMIT test is automatically enabled for this
|
796
|
+
* The {@link USPOOF_CHAR_LIMIT} test is automatically enabled for this
|
503
797
|
* USpoofChecker when calling this function with a non-empty list
|
504
798
|
* of locales.
|
505
799
|
*
|
@@ -511,9 +805,9 @@ uspoof_getRestrictionLevel(const USpoofChecker *sc);
|
|
511
805
|
* can be made to the result of uspoof_setAllowedLocales() by
|
512
806
|
* fetching the resulting set with uspoof_getAllowedChars(),
|
513
807
|
* manipulating it with the Unicode Set API, then resetting the
|
514
|
-
* spoof detectors limits with uspoof_setAllowedChars()
|
808
|
+
* spoof detectors limits with uspoof_setAllowedChars().
|
515
809
|
*
|
516
|
-
* @param sc The USpoofChecker
|
810
|
+
* @param sc The USpoofChecker
|
517
811
|
* @param localesList A list list of locales, from which the language
|
518
812
|
* and associated script are extracted. The locales
|
519
813
|
* are comma-separated if there is more than one.
|
@@ -537,18 +831,18 @@ uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode
|
|
537
831
|
*
|
538
832
|
* uspoof_setAllowedChars() will reset the list of allowed to be empty.
|
539
833
|
*
|
540
|
-
* The format of the returned list is the same as that supplied to
|
541
|
-
* uspoof_setAllowedLocales(), but returned list may not be identical
|
542
|
-
* to the originally specified string; the string may be reformatted,
|
834
|
+
* The format of the returned list is the same as that supplied to
|
835
|
+
* uspoof_setAllowedLocales(), but returned list may not be identical
|
836
|
+
* to the originally specified string; the string may be reformatted,
|
543
837
|
* and information other than languages from
|
544
838
|
* the originally specified locales may be omitted.
|
545
839
|
*
|
546
|
-
* @param sc The USpoofChecker
|
840
|
+
* @param sc The USpoofChecker
|
547
841
|
* @param status The error code, set if this function encounters a problem.
|
548
842
|
* @return A string containing a list of locales corresponding
|
549
843
|
* to the acceptable scripts, formatted like an
|
550
844
|
* HTTP Accept Language value.
|
551
|
-
*
|
845
|
+
*
|
552
846
|
* @stable ICU 4.2
|
553
847
|
*/
|
554
848
|
U_STABLE const char * U_EXPORT2
|
@@ -564,7 +858,7 @@ uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status);
|
|
564
858
|
* The USPOOF_CHAR_LIMIT test is automatically enabled for this
|
565
859
|
* USpoofChecker by this function.
|
566
860
|
*
|
567
|
-
* @param sc The USpoofChecker
|
861
|
+
* @param sc The USpoofChecker
|
568
862
|
* @param chars A Unicode Set containing the list of
|
569
863
|
* characters that are permitted. Ownership of the set
|
570
864
|
* remains with the caller. The incoming set is cloned by
|
@@ -591,7 +885,7 @@ uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status)
|
|
591
885
|
* or if a new set of allowed characters is specified.
|
592
886
|
*
|
593
887
|
*
|
594
|
-
* @param sc The USpoofChecker
|
888
|
+
* @param sc The USpoofChecker
|
595
889
|
* @param status The error code, set if this function encounters a problem.
|
596
890
|
* @return A USet containing the characters that are permitted by
|
597
891
|
* the USPOOF_CHAR_LIMIT test.
|
@@ -611,7 +905,7 @@ uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status);
|
|
611
905
|
* The USPOOF_CHAR_LIMIT test is automatically enabled for this
|
612
906
|
* USoofChecker by this function.
|
613
907
|
*
|
614
|
-
* @param sc The USpoofChecker
|
908
|
+
* @param sc The USpoofChecker
|
615
909
|
* @param chars A Unicode Set containing the list of
|
616
910
|
* characters that are permitted. Ownership of the set
|
617
911
|
* remains with the caller. The incoming set is cloned by
|
@@ -626,7 +920,7 @@ uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const icu::UnicodeSet *chars, UEr
|
|
626
920
|
|
627
921
|
/**
|
628
922
|
* Get a UnicodeSet for the characters permitted in an identifier.
|
629
|
-
* This corresponds to the limits imposed by the Set Allowed Characters /
|
923
|
+
* This corresponds to the limits imposed by the Set Allowed Characters /
|
630
924
|
* UnicodeSet functions. Limitations imposed by other checks will not be
|
631
925
|
* reflected in the set returned by this function.
|
632
926
|
*
|
@@ -638,7 +932,7 @@ uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const icu::UnicodeSet *chars, UEr
|
|
638
932
|
* or if a new set of allowed characters is specified.
|
639
933
|
*
|
640
934
|
*
|
641
|
-
* @param sc The USpoofChecker
|
935
|
+
* @param sc The USpoofChecker
|
642
936
|
* @param status The error code, set if this function encounters a problem.
|
643
937
|
* @return A UnicodeSet containing the characters that are permitted by
|
644
938
|
* the USPOOF_CHAR_LIMIT test.
|
@@ -653,17 +947,22 @@ uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status);
|
|
653
947
|
* Check the specified string for possible security issues.
|
654
948
|
* The text to be checked will typically be an identifier of some sort.
|
655
949
|
* The set of checks to be performed is specified with uspoof_setChecks().
|
656
|
-
*
|
657
|
-
*
|
950
|
+
*
|
951
|
+
* \note
|
952
|
+
* Consider using the newer API, {@link uspoof_check2}, instead.
|
953
|
+
* The newer API exposes additional information from the check procedure
|
954
|
+
* and is otherwise identical to this method.
|
955
|
+
*
|
956
|
+
* @param sc The USpoofChecker
|
658
957
|
* @param id The identifier to be checked for possible security issues,
|
659
958
|
* in UTF-16 format.
|
660
959
|
* @param length the length of the string to be checked, expressed in
|
661
|
-
* 16 bit UTF-16 code units, or -1 if the string is
|
960
|
+
* 16 bit UTF-16 code units, or -1 if the string is
|
662
961
|
* zero terminated.
|
663
|
-
* @param position
|
664
|
-
* Originally, the index of the first
|
665
|
-
*
|
666
|
-
* This parameter may be
|
962
|
+
* @param position Deprecated in ICU 51. Always returns zero.
|
963
|
+
* Originally, an out parameter for the index of the first
|
964
|
+
* string position that failed a check.
|
965
|
+
* This parameter may be NULL.
|
667
966
|
* @param status The error code, set if an error occurred while attempting to
|
668
967
|
* perform the check.
|
669
968
|
* Spoofing or security issues detected with the input string are
|
@@ -673,11 +972,12 @@ uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status);
|
|
673
972
|
* enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
|
674
973
|
* will be zero if the input string passes all of the
|
675
974
|
* enabled checks.
|
975
|
+
* @see uspoof_check2
|
676
976
|
* @stable ICU 4.2
|
677
977
|
*/
|
678
978
|
U_STABLE int32_t U_EXPORT2
|
679
979
|
uspoof_check(const USpoofChecker *sc,
|
680
|
-
const UChar *id, int32_t length,
|
980
|
+
const UChar *id, int32_t length,
|
681
981
|
int32_t *position,
|
682
982
|
UErrorCode *status);
|
683
983
|
|
@@ -686,16 +986,20 @@ uspoof_check(const USpoofChecker *sc,
|
|
686
986
|
* Check the specified string for possible security issues.
|
687
987
|
* The text to be checked will typically be an identifier of some sort.
|
688
988
|
* The set of checks to be performed is specified with uspoof_setChecks().
|
689
|
-
*
|
690
|
-
*
|
989
|
+
*
|
990
|
+
* \note
|
991
|
+
* Consider using the newer API, {@link uspoof_check2UTF8}, instead.
|
992
|
+
* The newer API exposes additional information from the check procedure
|
993
|
+
* and is otherwise identical to this method.
|
994
|
+
*
|
995
|
+
* @param sc The USpoofChecker
|
691
996
|
* @param id A identifier to be checked for possible security issues, in UTF8 format.
|
692
|
-
* @param length the length of the string to be checked, or -1 if the string is
|
997
|
+
* @param length the length of the string to be checked, or -1 if the string is
|
693
998
|
* zero terminated.
|
694
|
-
* @param position
|
695
|
-
* Originally, the index of the first
|
696
|
-
*
|
697
|
-
* This parameter may be
|
698
|
-
* @deprecated ICU 51
|
999
|
+
* @param position Deprecated in ICU 51. Always returns zero.
|
1000
|
+
* Originally, an out parameter for the index of the first
|
1001
|
+
* string position that failed a check.
|
1002
|
+
* This parameter may be NULL.
|
699
1003
|
* @param status The error code, set if an error occurred while attempting to
|
700
1004
|
* perform the check.
|
701
1005
|
* Spoofing or security issues detected with the input string are
|
@@ -707,6 +1011,7 @@ uspoof_check(const USpoofChecker *sc,
|
|
707
1011
|
* enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
|
708
1012
|
* will be zero if the input string passes all of the
|
709
1013
|
* enabled checks.
|
1014
|
+
* @see uspoof_check2UTF8
|
710
1015
|
* @stable ICU 4.2
|
711
1016
|
*/
|
712
1017
|
U_STABLE int32_t U_EXPORT2
|
@@ -721,14 +1026,18 @@ uspoof_checkUTF8(const USpoofChecker *sc,
|
|
721
1026
|
* Check the specified string for possible security issues.
|
722
1027
|
* The text to be checked will typically be an identifier of some sort.
|
723
1028
|
* The set of checks to be performed is specified with uspoof_setChecks().
|
724
|
-
*
|
725
|
-
*
|
1029
|
+
*
|
1030
|
+
* \note
|
1031
|
+
* Consider using the newer API, {@link uspoof_check2UnicodeString}, instead.
|
1032
|
+
* The newer API exposes additional information from the check procedure
|
1033
|
+
* and is otherwise identical to this method.
|
1034
|
+
*
|
1035
|
+
* @param sc The USpoofChecker
|
726
1036
|
* @param id A identifier to be checked for possible security issues.
|
727
|
-
* @param position
|
728
|
-
* Originally, the index of the first
|
729
|
-
*
|
730
|
-
* This parameter may be
|
731
|
-
* @deprecated ICU 51
|
1037
|
+
* @param position Deprecated in ICU 51. Always returns zero.
|
1038
|
+
* Originally, an out parameter for the index of the first
|
1039
|
+
* string position that failed a check.
|
1040
|
+
* This parameter may be NULL.
|
732
1041
|
* @param status The error code, set if an error occurred while attempting to
|
733
1042
|
* perform the check.
|
734
1043
|
* Spoofing or security issues detected with the input string are
|
@@ -738,45 +1047,249 @@ uspoof_checkUTF8(const USpoofChecker *sc,
|
|
738
1047
|
* enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
|
739
1048
|
* will be zero if the input string passes all of the
|
740
1049
|
* enabled checks.
|
1050
|
+
* @see uspoof_check2UnicodeString
|
741
1051
|
* @stable ICU 4.2
|
742
1052
|
*/
|
743
1053
|
U_STABLE int32_t U_EXPORT2
|
744
1054
|
uspoof_checkUnicodeString(const USpoofChecker *sc,
|
745
|
-
const icu::UnicodeString &id,
|
1055
|
+
const icu::UnicodeString &id,
|
746
1056
|
int32_t *position,
|
747
1057
|
UErrorCode *status);
|
1058
|
+
#endif
|
1059
|
+
|
1060
|
+
|
1061
|
+
#ifndef U_HIDE_DRAFT_API
|
1062
|
+
/**
|
1063
|
+
* Check the specified string for possible security issues.
|
1064
|
+
* The text to be checked will typically be an identifier of some sort.
|
1065
|
+
* The set of checks to be performed is specified with uspoof_setChecks().
|
1066
|
+
*
|
1067
|
+
* @param sc The USpoofChecker
|
1068
|
+
* @param id The identifier to be checked for possible security issues,
|
1069
|
+
* in UTF-16 format.
|
1070
|
+
* @param length the length of the string to be checked, or -1 if the string is
|
1071
|
+
* zero terminated.
|
1072
|
+
* @param checkResult An instance of USpoofCheckResult to be filled with
|
1073
|
+
* details about the identifier. Can be NULL.
|
1074
|
+
* @param status The error code, set if an error occurred while attempting to
|
1075
|
+
* perform the check.
|
1076
|
+
* Spoofing or security issues detected with the input string are
|
1077
|
+
* not reported here, but through the function's return value.
|
1078
|
+
* @return An integer value with bits set for any potential security
|
1079
|
+
* or spoofing issues detected. The bits are defined by
|
1080
|
+
* enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
|
1081
|
+
* will be zero if the input string passes all of the
|
1082
|
+
* enabled checks. Any information in this bitmask will be
|
1083
|
+
* consistent with the information saved in the optional
|
1084
|
+
* checkResult parameter.
|
1085
|
+
* @see uspoof_openCheckResult
|
1086
|
+
* @see uspoof_check2UTF8
|
1087
|
+
* @see uspoof_check2UnicodeString
|
1088
|
+
* @draft ICU 58
|
1089
|
+
*/
|
1090
|
+
U_DRAFT int32_t U_EXPORT2
|
1091
|
+
uspoof_check2(const USpoofChecker *sc,
|
1092
|
+
const UChar* id, int32_t length,
|
1093
|
+
USpoofCheckResult* checkResult,
|
1094
|
+
UErrorCode *status);
|
748
1095
|
|
1096
|
+
/**
|
1097
|
+
* Check the specified string for possible security issues.
|
1098
|
+
* The text to be checked will typically be an identifier of some sort.
|
1099
|
+
* The set of checks to be performed is specified with uspoof_setChecks().
|
1100
|
+
*
|
1101
|
+
* This version of {@link uspoof_check} accepts a USpoofCheckResult, which
|
1102
|
+
* returns additional information about the identifier. For more
|
1103
|
+
* information, see {@link uspoof_openCheckResult}.
|
1104
|
+
*
|
1105
|
+
* @param sc The USpoofChecker
|
1106
|
+
* @param id A identifier to be checked for possible security issues, in UTF8 format.
|
1107
|
+
* @param length the length of the string to be checked, or -1 if the string is
|
1108
|
+
* zero terminated.
|
1109
|
+
* @param checkResult An instance of USpoofCheckResult to be filled with
|
1110
|
+
* details about the identifier. Can be NULL.
|
1111
|
+
* @param status The error code, set if an error occurred while attempting to
|
1112
|
+
* perform the check.
|
1113
|
+
* Spoofing or security issues detected with the input string are
|
1114
|
+
* not reported here, but through the function's return value.
|
1115
|
+
* @return An integer value with bits set for any potential security
|
1116
|
+
* or spoofing issues detected. The bits are defined by
|
1117
|
+
* enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
|
1118
|
+
* will be zero if the input string passes all of the
|
1119
|
+
* enabled checks. Any information in this bitmask will be
|
1120
|
+
* consistent with the information saved in the optional
|
1121
|
+
* checkResult parameter.
|
1122
|
+
* @see uspoof_openCheckResult
|
1123
|
+
* @see uspoof_check2
|
1124
|
+
* @see uspoof_check2UnicodeString
|
1125
|
+
* @draft ICU 58
|
1126
|
+
*/
|
1127
|
+
U_DRAFT int32_t U_EXPORT2
|
1128
|
+
uspoof_check2UTF8(const USpoofChecker *sc,
|
1129
|
+
const char *id, int32_t length,
|
1130
|
+
USpoofCheckResult* checkResult,
|
1131
|
+
UErrorCode *status);
|
1132
|
+
|
1133
|
+
#if U_SHOW_CPLUSPLUS_API
|
1134
|
+
/**
|
1135
|
+
* Check the specified string for possible security issues.
|
1136
|
+
* The text to be checked will typically be an identifier of some sort.
|
1137
|
+
* The set of checks to be performed is specified with uspoof_setChecks().
|
1138
|
+
*
|
1139
|
+
* @param sc The USpoofChecker
|
1140
|
+
* @param id A identifier to be checked for possible security issues.
|
1141
|
+
* @param checkResult An instance of USpoofCheckResult to be filled with
|
1142
|
+
* details about the identifier. Can be NULL.
|
1143
|
+
* @param status The error code, set if an error occurred while attempting to
|
1144
|
+
* perform the check.
|
1145
|
+
* Spoofing or security issues detected with the input string are
|
1146
|
+
* not reported here, but through the function's return value.
|
1147
|
+
* @return An integer value with bits set for any potential security
|
1148
|
+
* or spoofing issues detected. The bits are defined by
|
1149
|
+
* enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
|
1150
|
+
* will be zero if the input string passes all of the
|
1151
|
+
* enabled checks. Any information in this bitmask will be
|
1152
|
+
* consistent with the information saved in the optional
|
1153
|
+
* checkResult parameter.
|
1154
|
+
* @see uspoof_openCheckResult
|
1155
|
+
* @see uspoof_check2
|
1156
|
+
* @see uspoof_check2UTF8
|
1157
|
+
* @draft ICU 58
|
1158
|
+
*/
|
1159
|
+
U_DRAFT int32_t U_EXPORT2
|
1160
|
+
uspoof_check2UnicodeString(const USpoofChecker *sc,
|
1161
|
+
const icu::UnicodeString &id,
|
1162
|
+
USpoofCheckResult* checkResult,
|
1163
|
+
UErrorCode *status);
|
749
1164
|
#endif
|
750
1165
|
|
1166
|
+
/**
|
1167
|
+
* Create a USpoofCheckResult, used by the {@link uspoof_check2} class of functions to return
|
1168
|
+
* information about the identifier. Information includes:
|
1169
|
+
* <ul>
|
1170
|
+
* <li>A bitmask of the checks that failed</li>
|
1171
|
+
* <li>The identifier's restriction level (UTS 39 section 5.2)</li>
|
1172
|
+
* <li>The set of numerics in the string (UTS 39 section 5.3)</li>
|
1173
|
+
* </ul>
|
1174
|
+
* The data held in a USpoofCheckResult is cleared whenever it is passed into a new call
|
1175
|
+
* of {@link uspoof_check2}.
|
1176
|
+
*
|
1177
|
+
* @param status The error code, set if this function encounters a problem.
|
1178
|
+
* @return the newly created USpoofCheckResult
|
1179
|
+
* @see uspoof_check2
|
1180
|
+
* @see uspoof_check2UTF8
|
1181
|
+
* @see uspoof_check2UnicodeString
|
1182
|
+
* @draft ICU 58
|
1183
|
+
*/
|
1184
|
+
U_DRAFT USpoofCheckResult* U_EXPORT2
|
1185
|
+
uspoof_openCheckResult(UErrorCode *status);
|
1186
|
+
|
1187
|
+
/**
|
1188
|
+
* Close a USpoofCheckResult, freeing any memory that was being held by
|
1189
|
+
* its implementation.
|
1190
|
+
*
|
1191
|
+
* @param checkResult The instance of USpoofCheckResult to close
|
1192
|
+
* @draft ICU 58
|
1193
|
+
*/
|
1194
|
+
U_DRAFT void U_EXPORT2
|
1195
|
+
uspoof_closeCheckResult(USpoofCheckResult *checkResult);
|
1196
|
+
|
1197
|
+
#if U_SHOW_CPLUSPLUS_API
|
1198
|
+
|
1199
|
+
U_NAMESPACE_BEGIN
|
1200
|
+
|
1201
|
+
/**
|
1202
|
+
* \class LocalUSpoofCheckResultPointer
|
1203
|
+
* "Smart pointer" class, closes a USpoofCheckResult via {@link uspoof_closeCheckResult}.
|
1204
|
+
* For most methods see the LocalPointerBase base class.
|
1205
|
+
*
|
1206
|
+
* @see LocalPointerBase
|
1207
|
+
* @see LocalPointer
|
1208
|
+
* @draft ICU 58
|
1209
|
+
*/
|
1210
|
+
U_DEFINE_LOCAL_OPEN_POINTER(LocalUSpoofCheckResultPointer, USpoofCheckResult, uspoof_closeCheckResult);
|
1211
|
+
|
1212
|
+
U_NAMESPACE_END
|
1213
|
+
|
1214
|
+
#endif
|
1215
|
+
|
1216
|
+
/**
|
1217
|
+
* Indicates which of the spoof check(s) have failed. The value is a bitwise OR of the constants for the tests
|
1218
|
+
* in question: USPOOF_RESTRICTION_LEVEL, USPOOF_CHAR_LIMIT, and so on.
|
1219
|
+
*
|
1220
|
+
* @param checkResult The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
|
1221
|
+
* @param status The error code, set if an error occurred.
|
1222
|
+
* @return An integer value with bits set for any potential security
|
1223
|
+
* or spoofing issues detected. The bits are defined by
|
1224
|
+
* enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS)
|
1225
|
+
* will be zero if the input string passes all of the
|
1226
|
+
* enabled checks.
|
1227
|
+
* @see uspoof_setChecks
|
1228
|
+
* @draft ICU 58
|
1229
|
+
*/
|
1230
|
+
U_DRAFT int32_t U_EXPORT2
|
1231
|
+
uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status);
|
1232
|
+
|
1233
|
+
/**
|
1234
|
+
* Gets the restriction level that the text meets, if the USPOOF_RESTRICTION_LEVEL check
|
1235
|
+
* was enabled; otherwise, undefined.
|
1236
|
+
*
|
1237
|
+
* @param checkResult The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
|
1238
|
+
* @param status The error code, set if an error occurred.
|
1239
|
+
* @return The restriction level contained in the USpoofCheckResult
|
1240
|
+
* @see uspoof_setRestrictionLevel
|
1241
|
+
* @draft ICU 58
|
1242
|
+
*/
|
1243
|
+
U_DRAFT URestrictionLevel U_EXPORT2
|
1244
|
+
uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status);
|
1245
|
+
|
1246
|
+
/**
|
1247
|
+
* Gets the set of numerics found in the string, if the USPOOF_MIXED_NUMBERS check was enabled;
|
1248
|
+
* otherwise, undefined. The set will contain the zero digit from each decimal number system found
|
1249
|
+
* in the input string. Ownership of the returned USet remains with the USpoofCheckResult.
|
1250
|
+
* The USet will be free'd when {@link uspoof_closeCheckResult} is called.
|
1251
|
+
*
|
1252
|
+
* @param checkResult The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
|
1253
|
+
* @return The set of numerics contained in the USpoofCheckResult
|
1254
|
+
* @param status The error code, set if an error occurred.
|
1255
|
+
* @draft ICU 58
|
1256
|
+
*/
|
1257
|
+
U_DRAFT const USet* U_EXPORT2
|
1258
|
+
uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status);
|
1259
|
+
#endif /* U_HIDE_DRAFT_API */
|
1260
|
+
|
751
1261
|
|
752
1262
|
/**
|
753
1263
|
* Check the whether two specified strings are visually confusable.
|
754
|
-
*
|
755
|
-
*
|
756
|
-
*
|
757
|
-
*
|
758
|
-
* The
|
759
|
-
*
|
760
|
-
*
|
761
|
-
*
|
762
|
-
*
|
763
|
-
*
|
764
|
-
*
|
765
|
-
*
|
766
|
-
*
|
767
|
-
*
|
1264
|
+
*
|
1265
|
+
* If the strings are confusable, the return value will be nonzero, as long as
|
1266
|
+
* {@link USPOOF_CONFUSABLE} was enabled in uspoof_setChecks().
|
1267
|
+
*
|
1268
|
+
* The bits in the return value correspond to flags for each of the classes of
|
1269
|
+
* confusables applicable to the two input strings. According to UTS 39
|
1270
|
+
* section 4, the possible flags are:
|
1271
|
+
*
|
1272
|
+
* <ul>
|
1273
|
+
* <li>{@link USPOOF_SINGLE_SCRIPT_CONFUSABLE}</li>
|
1274
|
+
* <li>{@link USPOOF_MIXED_SCRIPT_CONFUSABLE}</li>
|
1275
|
+
* <li>{@link USPOOF_WHOLE_SCRIPT_CONFUSABLE}</li>
|
1276
|
+
* </ul>
|
1277
|
+
*
|
1278
|
+
* If one or more of the above flags were not listed in uspoof_setChecks(), this
|
1279
|
+
* function will never report that class of confusable. The check
|
1280
|
+
* {@link USPOOF_CONFUSABLE} enables all three flags.
|
768
1281
|
*
|
769
1282
|
*
|
770
1283
|
* @param sc The USpoofChecker
|
771
|
-
* @param id1 The first of the two identifiers to be compared for
|
1284
|
+
* @param id1 The first of the two identifiers to be compared for
|
772
1285
|
* confusability. The strings are in UTF-16 format.
|
773
1286
|
* @param length1 the length of the first identifer, expressed in
|
774
|
-
* 16 bit UTF-16 code units, or -1 if the string is
|
1287
|
+
* 16 bit UTF-16 code units, or -1 if the string is
|
775
1288
|
* nul terminated.
|
776
|
-
* @param id2 The second of the two identifiers to be compared for
|
1289
|
+
* @param id2 The second of the two identifiers to be compared for
|
777
1290
|
* confusability. The identifiers are in UTF-16 format.
|
778
1291
|
* @param length2 The length of the second identifiers, expressed in
|
779
|
-
* 16 bit UTF-16 code units, or -1 if the string is
|
1292
|
+
* 16 bit UTF-16 code units, or -1 if the string is
|
780
1293
|
* nul terminated.
|
781
1294
|
* @param status The error code, set if an error occurred while attempting to
|
782
1295
|
* perform the check.
|
@@ -786,6 +1299,7 @@ uspoof_checkUnicodeString(const USpoofChecker *sc,
|
|
786
1299
|
* the type of confusability found, as defined by
|
787
1300
|
* enum USpoofChecks. Zero is returned if the identifiers
|
788
1301
|
* are not confusable.
|
1302
|
+
*
|
789
1303
|
* @stable ICU 4.2
|
790
1304
|
*/
|
791
1305
|
U_STABLE int32_t U_EXPORT2
|
@@ -797,19 +1311,16 @@ uspoof_areConfusable(const USpoofChecker *sc,
|
|
797
1311
|
|
798
1312
|
|
799
1313
|
/**
|
800
|
-
*
|
801
|
-
* The types of confusability to be tested - single script, mixed script,
|
802
|
-
* or whole script - are determined by the check options set for the
|
803
|
-
* USpoofChecker.
|
1314
|
+
* A version of {@link uspoof_areConfusable} accepting strings in UTF-8 format.
|
804
1315
|
*
|
805
1316
|
* @param sc The USpoofChecker
|
806
|
-
* @param id1 The first of the two identifiers to be compared for
|
1317
|
+
* @param id1 The first of the two identifiers to be compared for
|
807
1318
|
* confusability. The strings are in UTF-8 format.
|
808
|
-
* @param length1 the length of the first identifiers, in bytes, or -1
|
1319
|
+
* @param length1 the length of the first identifiers, in bytes, or -1
|
809
1320
|
* if the string is nul terminated.
|
810
|
-
* @param id2 The second of the two identifiers to be compared for
|
1321
|
+
* @param id2 The second of the two identifiers to be compared for
|
811
1322
|
* confusability. The strings are in UTF-8 format.
|
812
|
-
* @param length2 The length of the second string in bytes, or -1
|
1323
|
+
* @param length2 The length of the second string in bytes, or -1
|
813
1324
|
* if the string is nul terminated.
|
814
1325
|
* @param status The error code, set if an error occurred while attempting to
|
815
1326
|
* perform the check.
|
@@ -819,7 +1330,10 @@ uspoof_areConfusable(const USpoofChecker *sc,
|
|
819
1330
|
* the type of confusability found, as defined by
|
820
1331
|
* enum USpoofChecks. Zero is returned if the strings
|
821
1332
|
* are not confusable.
|
1333
|
+
*
|
822
1334
|
* @stable ICU 4.2
|
1335
|
+
*
|
1336
|
+
* @see uspoof_areConfusable
|
823
1337
|
*/
|
824
1338
|
U_STABLE int32_t U_EXPORT2
|
825
1339
|
uspoof_areConfusableUTF8(const USpoofChecker *sc,
|
@@ -832,15 +1346,12 @@ uspoof_areConfusableUTF8(const USpoofChecker *sc,
|
|
832
1346
|
|
833
1347
|
#if U_SHOW_CPLUSPLUS_API
|
834
1348
|
/**
|
835
|
-
*
|
836
|
-
* The types of confusability to be tested - single script, mixed script,
|
837
|
-
* or whole script - are determined by the check options set for the
|
838
|
-
* USpoofChecker.
|
1349
|
+
* A version of {@link uspoof_areConfusable} accepting UnicodeStrings.
|
839
1350
|
*
|
840
1351
|
* @param sc The USpoofChecker
|
841
|
-
* @param s1 The first of the two identifiers to be compared for
|
1352
|
+
* @param s1 The first of the two identifiers to be compared for
|
842
1353
|
* confusability. The strings are in UTF-8 format.
|
843
|
-
* @param s2 The second of the two identifiers to be compared for
|
1354
|
+
* @param s2 The second of the two identifiers to be compared for
|
844
1355
|
* confusability. The strings are in UTF-8 format.
|
845
1356
|
* @param status The error code, set if an error occurred while attempting to
|
846
1357
|
* perform the check.
|
@@ -850,7 +1361,10 @@ uspoof_areConfusableUTF8(const USpoofChecker *sc,
|
|
850
1361
|
* the type of confusability found, as defined by
|
851
1362
|
* enum USpoofChecks. Zero is returned if the identifiers
|
852
1363
|
* are not confusable.
|
1364
|
+
*
|
853
1365
|
* @stable ICU 4.2
|
1366
|
+
*
|
1367
|
+
* @see uspoof_areConfusable
|
854
1368
|
*/
|
855
1369
|
U_STABLE int32_t U_EXPORT2
|
856
1370
|
uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
|
@@ -861,112 +1375,107 @@ uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
|
|
861
1375
|
|
862
1376
|
|
863
1377
|
/**
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
*/
|
1378
|
+
* Get the "skeleton" for an identifier.
|
1379
|
+
* Skeletons are a transformation of the input identifier;
|
1380
|
+
* Two identifiers are confusable if their skeletons are identical.
|
1381
|
+
* See Unicode UAX #39 for additional information.
|
1382
|
+
*
|
1383
|
+
* Using skeletons directly makes it possible to quickly check
|
1384
|
+
* whether an identifier is confusable with any of some large
|
1385
|
+
* set of existing identifiers, by creating an efficiently
|
1386
|
+
* searchable collection of the skeletons.
|
1387
|
+
*
|
1388
|
+
* @param sc The USpoofChecker
|
1389
|
+
* @param type Deprecated in ICU 58. You may pass any number.
|
1390
|
+
* Originally, controlled which of the Unicode confusable data
|
1391
|
+
* tables to use.
|
1392
|
+
* @param id The input identifier whose skeleton will be computed.
|
1393
|
+
* @param length The length of the input identifier, expressed in 16 bit
|
1394
|
+
* UTF-16 code units, or -1 if the string is zero terminated.
|
1395
|
+
* @param dest The output buffer, to receive the skeleton string.
|
1396
|
+
* @param destCapacity The length of the output buffer, in 16 bit units.
|
1397
|
+
* The destCapacity may be zero, in which case the function will
|
1398
|
+
* return the actual length of the skeleton.
|
1399
|
+
* @param status The error code, set if an error occurred while attempting to
|
1400
|
+
* perform the check.
|
1401
|
+
* @return The length of the skeleton string. The returned length
|
1402
|
+
* is always that of the complete skeleton, even when the
|
1403
|
+
* supplied buffer is too small (or of zero length)
|
1404
|
+
*
|
1405
|
+
* @stable ICU 4.2
|
1406
|
+
* @see uspoof_areConfusable
|
1407
|
+
*/
|
895
1408
|
U_STABLE int32_t U_EXPORT2
|
896
1409
|
uspoof_getSkeleton(const USpoofChecker *sc,
|
897
1410
|
uint32_t type,
|
898
1411
|
const UChar *id, int32_t length,
|
899
1412
|
UChar *dest, int32_t destCapacity,
|
900
1413
|
UErrorCode *status);
|
901
|
-
|
1414
|
+
|
902
1415
|
/**
|
903
|
-
|
904
|
-
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
|
929
|
-
|
930
|
-
|
931
|
-
|
932
|
-
|
933
|
-
|
934
|
-
|
935
|
-
* @stable ICU 4.2
|
936
|
-
*/
|
1416
|
+
* Get the "skeleton" for an identifier.
|
1417
|
+
* Skeletons are a transformation of the input identifier;
|
1418
|
+
* Two identifiers are confusable if their skeletons are identical.
|
1419
|
+
* See Unicode UAX #39 for additional information.
|
1420
|
+
*
|
1421
|
+
* Using skeletons directly makes it possible to quickly check
|
1422
|
+
* whether an identifier is confusable with any of some large
|
1423
|
+
* set of existing identifiers, by creating an efficiently
|
1424
|
+
* searchable collection of the skeletons.
|
1425
|
+
*
|
1426
|
+
* @param sc The USpoofChecker
|
1427
|
+
* @param type Deprecated in ICU 58. You may pass any number.
|
1428
|
+
* Originally, controlled which of the Unicode confusable data
|
1429
|
+
* tables to use.
|
1430
|
+
* @param id The UTF-8 format identifier whose skeleton will be computed.
|
1431
|
+
* @param length The length of the input string, in bytes,
|
1432
|
+
* or -1 if the string is zero terminated.
|
1433
|
+
* @param dest The output buffer, to receive the skeleton string.
|
1434
|
+
* @param destCapacity The length of the output buffer, in bytes.
|
1435
|
+
* The destCapacity may be zero, in which case the function will
|
1436
|
+
* return the actual length of the skeleton.
|
1437
|
+
* @param status The error code, set if an error occurred while attempting to
|
1438
|
+
* perform the check. Possible Errors include U_INVALID_CHAR_FOUND
|
1439
|
+
* for invalid UTF-8 sequences, and
|
1440
|
+
* U_BUFFER_OVERFLOW_ERROR if the destination buffer is too small
|
1441
|
+
* to hold the complete skeleton.
|
1442
|
+
* @return The length of the skeleton string, in bytes. The returned length
|
1443
|
+
* is always that of the complete skeleton, even when the
|
1444
|
+
* supplied buffer is too small (or of zero length)
|
1445
|
+
*
|
1446
|
+
* @stable ICU 4.2
|
1447
|
+
*/
|
937
1448
|
U_STABLE int32_t U_EXPORT2
|
938
1449
|
uspoof_getSkeletonUTF8(const USpoofChecker *sc,
|
939
1450
|
uint32_t type,
|
940
1451
|
const char *id, int32_t length,
|
941
1452
|
char *dest, int32_t destCapacity,
|
942
1453
|
UErrorCode *status);
|
943
|
-
|
1454
|
+
|
944
1455
|
#if U_SHOW_CPLUSPLUS_API
|
945
1456
|
/**
|
946
|
-
|
947
|
-
|
948
|
-
|
949
|
-
|
950
|
-
|
951
|
-
|
952
|
-
|
953
|
-
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
* @stable ICU 4.2
|
969
|
-
*/
|
1457
|
+
* Get the "skeleton" for an identifier.
|
1458
|
+
* Skeletons are a transformation of the input identifier;
|
1459
|
+
* Two identifiers are confusable if their skeletons are identical.
|
1460
|
+
* See Unicode UAX #39 for additional information.
|
1461
|
+
*
|
1462
|
+
* Using skeletons directly makes it possible to quickly check
|
1463
|
+
* whether an identifier is confusable with any of some large
|
1464
|
+
* set of existing identifiers, by creating an efficiently
|
1465
|
+
* searchable collection of the skeletons.
|
1466
|
+
*
|
1467
|
+
* @param sc The USpoofChecker.
|
1468
|
+
* @param type Deprecated in ICU 58. You may pass any number.
|
1469
|
+
* Originally, controlled which of the Unicode confusable data
|
1470
|
+
* tables to use.
|
1471
|
+
* @param id The input identifier whose skeleton will be computed.
|
1472
|
+
* @param dest The output identifier, to receive the skeleton string.
|
1473
|
+
* @param status The error code, set if an error occurred while attempting to
|
1474
|
+
* perform the check.
|
1475
|
+
* @return A reference to the destination (skeleton) string.
|
1476
|
+
*
|
1477
|
+
* @stable ICU 4.2
|
1478
|
+
*/
|
970
1479
|
U_I18N_API icu::UnicodeString & U_EXPORT2
|
971
1480
|
uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
|
972
1481
|
uint32_t type,
|
@@ -977,7 +1486,8 @@ uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
|
|
977
1486
|
|
978
1487
|
/**
|
979
1488
|
* Get the set of Candidate Characters for Inclusion in Identifiers, as defined
|
980
|
-
* in
|
1489
|
+
* in http://unicode.org/Public/security/latest/xidmodifications.txt
|
1490
|
+
* and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
|
981
1491
|
*
|
982
1492
|
* The returned set is frozen. Ownership of the set remains with the ICU library; it must not
|
983
1493
|
* be deleted by the caller.
|
@@ -991,7 +1501,8 @@ uspoof_getInclusionSet(UErrorCode *status);
|
|
991
1501
|
|
992
1502
|
/**
|
993
1503
|
* Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined
|
994
|
-
* in
|
1504
|
+
* in http://unicode.org/Public/security/latest/xidmodifications.txt
|
1505
|
+
* and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
|
995
1506
|
*
|
996
1507
|
* The returned set is frozen. Ownership of the set remains with the ICU library; it must not
|
997
1508
|
* be deleted by the caller.
|
@@ -1007,7 +1518,8 @@ uspoof_getRecommendedSet(UErrorCode *status);
|
|
1007
1518
|
|
1008
1519
|
/**
|
1009
1520
|
* Get the set of Candidate Characters for Inclusion in Identifiers, as defined
|
1010
|
-
* in
|
1521
|
+
* in http://unicode.org/Public/security/latest/xidmodifications.txt
|
1522
|
+
* and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
|
1011
1523
|
*
|
1012
1524
|
* The returned set is frozen. Ownership of the set remains with the ICU library; it must not
|
1013
1525
|
* be deleted by the caller.
|
@@ -1021,7 +1533,8 @@ uspoof_getInclusionUnicodeSet(UErrorCode *status);
|
|
1021
1533
|
|
1022
1534
|
/**
|
1023
1535
|
* Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined
|
1024
|
-
* in
|
1536
|
+
* in http://unicode.org/Public/security/latest/xidmodifications.txt
|
1537
|
+
* and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
|
1025
1538
|
*
|
1026
1539
|
* The returned set is frozen. Ownership of the set remains with the ICU library; it must not
|
1027
1540
|
* be deleted by the caller.
|
@@ -1041,7 +1554,7 @@ uspoof_getRecommendedUnicodeSet(UErrorCode *status);
|
|
1041
1554
|
* instantiate a new Spoof Detector.
|
1042
1555
|
*
|
1043
1556
|
* The serialized spoof checker includes only the data compiled from the
|
1044
|
-
* Unicode data tables by uspoof_openFromSource(); it does not include
|
1557
|
+
* Unicode data tables by uspoof_openFromSource(); it does not include
|
1045
1558
|
* include any other state or configuration that may have been set.
|
1046
1559
|
*
|
1047
1560
|
* @param sc the Spoof Detector whose data is to be serialized.
|