utf8_validator 1.0.13 → 1.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/examples/fullstring.rb +4 -0
- data/lib/utf8_validator.rb +3 -1
- data/lib/validation/errors.rb +3 -1
- data/lib/validation/validator.rb +3 -0
- data/test/helper.rb +5 -0
- data/test/test_code_points.rb +127023 -0
- data/test/test_raise_request.rb +3 -0
- data/test/test_surrogate_half_first_point.rb +2094 -0
- data/test/test_utf8_validator.rb +578 -3
- data/utf8_validator.gemspec +7 -4
- data/utils/gencp.rb +59 -0
- metadata +5 -2
data/test/test_utf8_validator.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
#
|
3
|
+
#--
|
4
|
+
# Copyright (c) 2016 Guy Allard
|
5
|
+
#--
|
3
6
|
if Kernel.respond_to?(:require_relative)
|
4
7
|
require_relative("./helper")
|
5
8
|
else
|
@@ -194,7 +197,7 @@ class TestUtf8Validator < Test::Unit::TestCase
|
|
194
197
|
|
195
198
|
#--
|
196
199
|
# I do not see a need to test UTF-16 surrogate pairs. They are guaranteed
|
197
|
-
# to always fail if the preceding test succeeds. This is because the
|
200
|
+
# to always fail if the preceding test succeeds. This is because the
|
198
201
|
# preceeding test data values are always the first surrogate of the pair.
|
199
202
|
#
|
200
203
|
# UTF-16 surrogates are clearly something I do not understand.
|
@@ -384,7 +387,7 @@ straight from the Unicode 6.0 spec. See page 92.
|
|
384
387
|
]
|
385
388
|
good_data.each do |string|
|
386
389
|
assert @validator.valid_encoding?(string), "good unicode specs 01: #{string}"
|
387
|
-
assert string.force_encoding("UTF-8").valid_encoding?,
|
390
|
+
assert string.force_encoding("UTF-8").valid_encoding?,
|
388
391
|
"good unicode specs 01 19: #{string}" if @vercheck
|
389
392
|
end
|
390
393
|
|
@@ -6181,5 +6184,577 @@ http://www.unicode.org/versions/Unicode7.0.0/
|
|
6181
6184
|
end
|
6182
6185
|
end
|
6183
6186
|
|
6184
|
-
|
6187
|
+
def test_0810_zero_len_fillers
|
6188
|
+
test_data = [
|
6189
|
+
"A\u200bZ", # zero len filler ?
|
6190
|
+
"A\u200cZ", # zero len filler ?
|
6191
|
+
"A\u200dZ", # zero len filler ?
|
6192
|
+
]
|
6193
|
+
test_data.each do |string|
|
6194
|
+
assert @validator.valid_encoding?(string), "zero_len_fillers A: #{string}"
|
6195
|
+
assert string.force_encoding("UTF-8").valid_encoding?,
|
6196
|
+
"zero_len_fillers B: #{string}" if @vercheck
|
6197
|
+
end
|
6198
|
+
end
|
6199
|
+
|
6200
|
+
# grapheme clusters
|
6201
|
+
def test_0850_grapheme_clusters
|
6202
|
+
test_data = [
|
6203
|
+
"\u0067", # 0067 ( g ) LATIN SMALL LETTER G
|
6204
|
+
"\u0308", # 0308 ( ◌̈ ) COMBINING DIAERESIS
|
6205
|
+
"\u0067\u0308", # Combined
|
6206
|
+
"\uac01", # AC01 ( 각 ) HANGUL SYLLABLE GAG
|
6207
|
+
"\u1100", # 1100 ( ᄀ ) HANGUL CHOSEONG KIYEOK
|
6208
|
+
"\u1161", # 1161 ( ᅡ ) HANGUL JUNGSEONG A
|
6209
|
+
"\u11a8", # 11A8 ( ᆨ ) HANGUL JONGSEONG KIYEOK
|
6210
|
+
"\uac01\u1100\u1161\u11a8", # Combined
|
6211
|
+
# THAI
|
6212
|
+
"\u0e01", # ก 0E01 ( ก ) THAI CHARACTER KO KAI Thai ko
|
6213
|
+
# THAI
|
6214
|
+
"\u0e01", # ก 0E01 ( ก ) THAI CHARACTER KO KAI Thai ko
|
6215
|
+
"\u0e33", # 0E33 ( ำ ) THAI CHARACTER SARA AM
|
6216
|
+
"\u0e01\u0e33",
|
6217
|
+
# Extended grapheme clusters
|
6218
|
+
"\u0ba8", # நி 0BA8 ( ந ) TAMIL LETTER NA
|
6219
|
+
"\u0bbf", # 0BBF ( ி ) TAMIL VOWEL SIGN I Tamil ni
|
6220
|
+
"\u0e40", # เ 0E40 ( เ ) THAI CHARACTER SARA E Thai e
|
6221
|
+
"\u0e01", # กำ 0E01 ( ก ) THAI CHARACTER KO KAI
|
6222
|
+
"\u0e33", # 0E33 ( ำ ) THAI CHARACTER SARA AM Thai kam
|
6223
|
+
"\u0937", # षि 0937 ( ष ) DEVANAGARI LETTER SSA
|
6224
|
+
"\u093f", # 093F ( ि ) DEVANAGARI VOWEL SIGN I Devanagari ssi
|
6225
|
+
"\u0ba8\u0bbf\u0e40\u0e01\u0e33\u0937\u093f", # Combined
|
6226
|
+
# Legacy grapheme clusters
|
6227
|
+
"\u0e33", # ำ 0E33 ( ำ ) THAI CHARACTER SARA AM Thai am
|
6228
|
+
"\u0937", # ष 0937 ( ष ) DEVANAGARI LETTER SSA Devanagari ssa
|
6229
|
+
"\u093f", # ि 093F ( ि ) DEVANAGARI VOWEL SIGN I Devanagari i
|
6230
|
+
"\u0e33\u0937\u093f", # Combined
|
6231
|
+
# Tailored grapheme clusters
|
6232
|
+
"\u0063", # ch 0063 ( c ) LATIN SMALL LETTER C
|
6233
|
+
"\u0068", # 0068 ( h ) LATIN SMALL LETTER H Slovak ch digraph
|
6234
|
+
"\u0063\u0068", # Combined
|
6235
|
+
"\u006b", # kw 006B ( k ) LATIN SMALL LETTER K
|
6236
|
+
"\u02b7", # 02B7 ( ʷ ) MODIFIER LETTER SMALL W sequence with letter modifier
|
6237
|
+
"\u006b\u02b7", # Combined
|
6238
|
+
"\u0915", # क्षि 0915 ( क ) DEVANAGARI LETTER KA
|
6239
|
+
"\u094d", # 094D ( ् ) DEVANAGARI SIGN VIRAMA
|
6240
|
+
"\u0937", # 0937 ( ष ) DEVANAGARI LETTER SSA
|
6241
|
+
"\u093f", # 093F ( ि ) DEVANAGARI VOWEL SIGN I Devanagari kshi
|
6242
|
+
"\u0915\u094d\u0937\u093f",
|
6243
|
+
]
|
6244
|
+
test_data.each do |string|
|
6245
|
+
assert @validator.valid_encoding?(string), "grapheme clusters A: #{string}"
|
6246
|
+
assert string.force_encoding("UTF-8").valid_encoding?,
|
6247
|
+
"grapheme clusters B: #{string}" if @vercheck
|
6248
|
+
end
|
6249
|
+
end # of method
|
6250
|
+
|
6251
|
+
def test_0890_named_sequences
|
6252
|
+
test_data = [
|
6253
|
+
# NamedSequences-9.0.0.txt
|
6254
|
+
# Date: 2016-05-26, 00:00:00 GMT [KW, LI]
|
6255
|
+
# © 2016 Unicode®, Inc.
|
6256
|
+
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
6257
|
+
#
|
6258
|
+
# Unicode Character Database
|
6259
|
+
# For documentation, see http://www.unicode.org/reports/tr44/
|
6260
|
+
#
|
6261
|
+
# Unicode Named Character Sequences
|
6262
|
+
#
|
6263
|
+
# This file is a normative contributory data file in the Unicode
|
6264
|
+
# Character Database.
|
6265
|
+
#
|
6266
|
+
# Format:
|
6267
|
+
# Name of Sequence; Code Point Sequence for USI
|
6268
|
+
#
|
6269
|
+
# Code point sequences in the Unicode Character Database
|
6270
|
+
# use spaces as delimiters. The corresponding format for a
|
6271
|
+
# UCS Sequence Identifier (USI) in ISO/IEC 10646 uses
|
6272
|
+
# comma delimitation and angle brackets. Thus, a Unicode
|
6273
|
+
# named character sequence of the form:
|
6274
|
+
#
|
6275
|
+
# EXAMPLE NAME;1000 1001 1002
|
6276
|
+
#
|
6277
|
+
# in this data file, would correspond to an ISO/IEC 10646 USI
|
6278
|
+
# as follows:
|
6279
|
+
#
|
6280
|
+
# <1000, 1001, 1002>
|
6281
|
+
#
|
6282
|
+
# For more information, see UAX #34: Unicode Named Character
|
6283
|
+
# Sequences, at http://www.unicode.org/unicode/reports/tr34/
|
6284
|
+
#
|
6285
|
+
# Note: The order of entries in this file is not significant.
|
6286
|
+
# However, entries are generally in script order corresponding
|
6287
|
+
# to block order in the Unicode Standard, to make it easier
|
6288
|
+
# to find entries in the list.
|
6289
|
+
# ================================================
|
6290
|
+
# Latin letter plus accent combinations.
|
6291
|
+
# These are part of the original set of approved named sequences
|
6292
|
+
# for Unicode 4.1. 2005.
|
6293
|
+
"\u0100\u0300", # LATIN CAPITAL LETTER A WITH MACRON AND GRAVE
|
6294
|
+
"\u0101\u0300", # LATIN SMALL LETTER A WITH MACRON AND GRAVE
|
6295
|
+
"\u0045\u0329", # LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW
|
6296
|
+
"\u0065\u0329", # LATIN SMALL LETTER E WITH VERTICAL LINE BELOW
|
6297
|
+
"\u00C8\u0329", # LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW AND GRAVE
|
6298
|
+
"\u00E8\u0329", # LATIN SMALL LETTER E WITH VERTICAL LINE BELOW AND GRAVE
|
6299
|
+
"\u00C9\u0329", # LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW AND ACUTE
|
6300
|
+
"\u00E9\u0329", # LATIN SMALL LETTER E WITH VERTICAL LINE BELOW AND ACUTE
|
6301
|
+
"\u00CA\u0304", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND MACRON
|
6302
|
+
"\u00EA\u0304", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND MACRON
|
6303
|
+
"\u00CA\u030C", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND CARON
|
6304
|
+
"\u00EA\u030C", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND CARON
|
6305
|
+
"\u012A\u0300", # LATIN CAPITAL LETTER I WITH MACRON AND GRAVE
|
6306
|
+
"\u012B\u0300", # LATIN SMALL LETTER I WITH MACRON AND GRAVE
|
6307
|
+
"\u0069\u0307", # LATIN SMALL LETTER I WITH DOT ABOVE AND ACUTE
|
6308
|
+
"\u006E\u0360", # LATIN SMALL LETTER NG WITH TILDE ABOVE
|
6309
|
+
"\u004F\u0329", # LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW
|
6310
|
+
"\u006F\u0329", # LATIN SMALL LETTER O WITH VERTICAL LINE BELOW
|
6311
|
+
"\u00D2\u0329", # LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW AND GRAVE
|
6312
|
+
"\u00F2\u0329", # LATIN SMALL LETTER O WITH VERTICAL LINE BELOW AND GRAVE
|
6313
|
+
"\u00D3\u0329", # LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW AND ACUTE
|
6314
|
+
"\u00F3\u0329", # LATIN SMALL LETTER O WITH VERTICAL LINE BELOW AND ACUTE
|
6315
|
+
"\u0053\u0329", # LATIN CAPITAL LETTER S WITH VERTICAL LINE BELOW
|
6316
|
+
"\u0073\u0329", # LATIN SMALL LETTER S WITH VERTICAL LINE BELOW
|
6317
|
+
"\u016A\u0300", # LATIN CAPITAL LETTER U WITH MACRON AND GRAVE
|
6318
|
+
"\u016B\u0300", # LATIN SMALL LETTER U WITH MACRON AND GRAVE
|
6319
|
+
# Additions for Lithuanian.
|
6320
|
+
# Provisional 2006-05-18, Approved 2007-10-19
|
6321
|
+
"\u0104\u0301", # LATIN CAPITAL LETTER A WITH OGONEK AND ACUTE
|
6322
|
+
"\u0105\u0301", # LATIN SMALL LETTER A WITH OGONEK AND ACUTE
|
6323
|
+
"\u0104\u0303", # LATIN CAPITAL LETTER A WITH OGONEK AND TILDE
|
6324
|
+
"\u0105\u0303", # LATIN SMALL LETTER A WITH OGONEK AND TILDE
|
6325
|
+
"\u0118\u0301", # LATIN CAPITAL LETTER E WITH OGONEK AND ACUTE
|
6326
|
+
"\u0119\u0301", # LATIN SMALL LETTER E WITH OGONEK AND ACUTE
|
6327
|
+
"\u0118\u0303", # LATIN CAPITAL LETTER E WITH OGONEK AND TILDE
|
6328
|
+
"\u0119\u0303", # LATIN SMALL LETTER E WITH OGONEK AND TILDE
|
6329
|
+
"\u0116\u0301", # LATIN CAPITAL LETTER E WITH DOT ABOVE AND ACUTE
|
6330
|
+
"\u0117\u0301", # LATIN SMALL LETTER E WITH DOT ABOVE AND ACUTE
|
6331
|
+
"\u0116\u0303", # LATIN CAPITAL LETTER E WITH DOT ABOVE AND TILDE
|
6332
|
+
"\u0117\u0303", # LATIN SMALL LETTER E WITH DOT ABOVE AND TILDE
|
6333
|
+
"\u0069\u0307", # LATIN SMALL LETTER I WITH DOT ABOVE AND GRAVE
|
6334
|
+
"\u0069\u0307", # LATIN SMALL LETTER I WITH DOT ABOVE AND TILDE
|
6335
|
+
"\u012E\u0301", # LATIN CAPITAL LETTER I WITH OGONEK AND ACUTE
|
6336
|
+
"\u012F\u0307", # LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND ACUTE
|
6337
|
+
"\u012E\u0303", # LATIN CAPITAL LETTER I WITH OGONEK AND TILDE
|
6338
|
+
"\u012F\u0307", # LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND TILDE
|
6339
|
+
"\u004A\u0303", # LATIN CAPITAL LETTER J WITH TILDE
|
6340
|
+
"\u006A\u0307", # LATIN SMALL LETTER J WITH DOT ABOVE AND TILDE
|
6341
|
+
"\u004C\u0303", # LATIN CAPITAL LETTER L WITH TILDE
|
6342
|
+
"\u006C\u0303", # LATIN SMALL LETTER L WITH TILDE
|
6343
|
+
"\u004D\u0303", # LATIN CAPITAL LETTER M WITH TILDE
|
6344
|
+
"\u006D\u0303", # LATIN SMALL LETTER M WITH TILDE
|
6345
|
+
"\u0052\u0303", # LATIN CAPITAL LETTER R WITH TILDE
|
6346
|
+
"\u0072\u0303", # LATIN SMALL LETTER R WITH TILDE
|
6347
|
+
"\u0172\u0301", # LATIN CAPITAL LETTER U WITH OGONEK AND ACUTE
|
6348
|
+
"\u0173\u0301", # LATIN SMALL LETTER U WITH OGONEK AND ACUTE
|
6349
|
+
"\u0172\u0303", # LATIN CAPITAL LETTER U WITH OGONEK AND TILDE
|
6350
|
+
"\u0173\u0303", # LATIN SMALL LETTER U WITH OGONEK AND TILDE
|
6351
|
+
"\u016A\u0301", # LATIN CAPITAL LETTER U WITH MACRON AND ACUTE
|
6352
|
+
"\u016B\u0301", # LATIN SMALL LETTER U WITH MACRON AND ACUTE
|
6353
|
+
"\u016A\u0303", # LATIN CAPITAL LETTER U WITH MACRON AND TILDE
|
6354
|
+
"\u016B\u0303", # LATIN SMALL LETTER U WITH MACRON AND TILDE
|
6355
|
+
# Entries for JIS X 0213 compatibility mapping.
|
6356
|
+
# Provisional 2008-11-07, Approved 2010-05-14
|
6357
|
+
"\u00E6\u0300", # LATIN SMALL LETTER AE WITH GRAVE
|
6358
|
+
"\u0254\u0300", # LATIN SMALL LETTER OPEN O WITH GRAVE
|
6359
|
+
"\u0254\u0301", # LATIN SMALL LETTER OPEN O WITH ACUTE
|
6360
|
+
"\u028C\u0300", # LATIN SMALL LETTER TURNED V WITH GRAVE
|
6361
|
+
"\u028C\u0301", # LATIN SMALL LETTER TURNED V WITH ACUTE
|
6362
|
+
"\u0259\u0300", # LATIN SMALL LETTER SCHWA WITH GRAVE
|
6363
|
+
"\u0259\u0301", # LATIN SMALL LETTER SCHWA WITH ACUTE
|
6364
|
+
"\u025A\u0300", # LATIN SMALL LETTER HOOKED SCHWA WITH GRAVE
|
6365
|
+
"\u025A\u0301", # LATIN SMALL LETTER HOOKED SCHWA WITH ACUTE
|
6366
|
+
# Entries for Uyghur and Chagatai.
|
6367
|
+
# Provisional N/A, Approved 2012-11-08
|
6368
|
+
"\u0626\u0627", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH ALEF
|
6369
|
+
"\u0626\u0648", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH WAW
|
6370
|
+
"\u0626\u0649", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH ALEF MAKSURA
|
6371
|
+
"\u0626\u06C6", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH OE
|
6372
|
+
"\u0626\u06C7", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH U
|
6373
|
+
"\u0626\u06C8", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH YU
|
6374
|
+
"\u0626\u06D0", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH E
|
6375
|
+
"\u0626\u06D5", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH AE
|
6376
|
+
"\u0646\u06A9", # ARABIC SEQUENCE NOON WITH KEHEH
|
6377
|
+
# Entry for a Bangla entity.
|
6378
|
+
# Provisional 2009-08-10, Approved 2010-05-14
|
6379
|
+
#
|
6380
|
+
# Note that this same sequence is also used for the ASSAMESE LETTER KSSA.
|
6381
|
+
"\u0995\u09CD", # BENGALI LETTER KHINYA
|
6382
|
+
# Additions for Tamil.
|
6383
|
+
# Provisional 2008-02-08, Approved 2009-08-14
|
6384
|
+
#
|
6385
|
+
# A visual display of the Tamil named character sequences is available
|
6386
|
+
# in the documentation for the Unicode Standard. See Section 12.6, Tamil in
|
6387
|
+
# http://www.unicode.org/versions/latest/
|
6388
|
+
"\u0B95\u0BCD", # TAMIL CONSONANT K
|
6389
|
+
"\u0B99\u0BCD", # TAMIL CONSONANT NG
|
6390
|
+
"\u0B9A\u0BCD", # TAMIL CONSONANT C
|
6391
|
+
"\u0B9E\u0BCD", # TAMIL CONSONANT NY
|
6392
|
+
"\u0B9F\u0BCD", # TAMIL CONSONANT TT
|
6393
|
+
"\u0BA3\u0BCD", # TAMIL CONSONANT NN
|
6394
|
+
"\u0BA4\u0BCD", # TAMIL CONSONANT T
|
6395
|
+
"\u0BA8\u0BCD", # TAMIL CONSONANT N
|
6396
|
+
"\u0BAA\u0BCD", # TAMIL CONSONANT P
|
6397
|
+
"\u0BAE\u0BCD", # TAMIL CONSONANT M
|
6398
|
+
"\u0BAF\u0BCD", # TAMIL CONSONANT Y
|
6399
|
+
"\u0BB0\u0BCD", # TAMIL CONSONANT R
|
6400
|
+
"\u0BB2\u0BCD", # TAMIL CONSONANT L
|
6401
|
+
"\u0BB5\u0BCD", # TAMIL CONSONANT V
|
6402
|
+
"\u0BB4\u0BCD", # TAMIL CONSONANT LLL
|
6403
|
+
"\u0BB3\u0BCD", # TAMIL CONSONANT LL
|
6404
|
+
"\u0BB1\u0BCD", # TAMIL CONSONANT RR
|
6405
|
+
"\u0BA9\u0BCD", # TAMIL CONSONANT NNN
|
6406
|
+
"\u0B9C\u0BCD", # TAMIL CONSONANT J
|
6407
|
+
"\u0BB6\u0BCD", # TAMIL CONSONANT SH
|
6408
|
+
"\u0BB7\u0BCD", # TAMIL CONSONANT SS
|
6409
|
+
"\u0BB8\u0BCD", # TAMIL CONSONANT S
|
6410
|
+
"\u0BB9\u0BCD", # TAMIL CONSONANT H
|
6411
|
+
"\u0B95\u0BCD", # TAMIL CONSONANT KSS
|
6412
|
+
"\u0B95\u0BBE", # TAMIL SYLLABLE KAA
|
6413
|
+
"\u0B95\u0BBF", # TAMIL SYLLABLE KI
|
6414
|
+
"\u0B95\u0BC0", # TAMIL SYLLABLE KII
|
6415
|
+
"\u0B95\u0BC1", # TAMIL SYLLABLE KU
|
6416
|
+
"\u0B95\u0BC2", # TAMIL SYLLABLE KUU
|
6417
|
+
"\u0B95\u0BC6", # TAMIL SYLLABLE KE
|
6418
|
+
"\u0B95\u0BC7", # TAMIL SYLLABLE KEE
|
6419
|
+
"\u0B95\u0BC8", # TAMIL SYLLABLE KAI
|
6420
|
+
"\u0B95\u0BCA", # TAMIL SYLLABLE KO
|
6421
|
+
"\u0B95\u0BCB", # TAMIL SYLLABLE KOO
|
6422
|
+
"\u0B95\u0BCC", # TAMIL SYLLABLE KAU
|
6423
|
+
"\u0B99\u0BBE", # TAMIL SYLLABLE NGAA
|
6424
|
+
"\u0B99\u0BBF", # TAMIL SYLLABLE NGI
|
6425
|
+
"\u0B99\u0BC0", # TAMIL SYLLABLE NGII
|
6426
|
+
"\u0B99\u0BC1", # TAMIL SYLLABLE NGU
|
6427
|
+
"\u0B99\u0BC2", # TAMIL SYLLABLE NGUU
|
6428
|
+
"\u0B99\u0BC6", # TAMIL SYLLABLE NGE
|
6429
|
+
"\u0B99\u0BC7", # TAMIL SYLLABLE NGEE
|
6430
|
+
"\u0B99\u0BC8", # TAMIL SYLLABLE NGAI
|
6431
|
+
"\u0B99\u0BCA", # TAMIL SYLLABLE NGO
|
6432
|
+
"\u0B99\u0BCB", # TAMIL SYLLABLE NGOO
|
6433
|
+
"\u0B99\u0BCC", # TAMIL SYLLABLE NGAU
|
6434
|
+
"\u0B9A\u0BBE", # TAMIL SYLLABLE CAA
|
6435
|
+
"\u0B9A\u0BBF", # TAMIL SYLLABLE CI
|
6436
|
+
"\u0B9A\u0BC0", # TAMIL SYLLABLE CII
|
6437
|
+
"\u0B9A\u0BC1", # TAMIL SYLLABLE CU
|
6438
|
+
"\u0B9A\u0BC2", # TAMIL SYLLABLE CUU
|
6439
|
+
"\u0B9A\u0BC6", # TAMIL SYLLABLE CE
|
6440
|
+
"\u0B9A\u0BC7", # TAMIL SYLLABLE CEE
|
6441
|
+
"\u0B9A\u0BC8", # TAMIL SYLLABLE CAI
|
6442
|
+
"\u0B9A\u0BCA", # TAMIL SYLLABLE CO
|
6443
|
+
"\u0B9A\u0BCB", # TAMIL SYLLABLE COO
|
6444
|
+
"\u0B9A\u0BCC", # TAMIL SYLLABLE CAU
|
6445
|
+
"\u0B9E\u0BBE", # TAMIL SYLLABLE NYAA
|
6446
|
+
"\u0B9E\u0BBF", # TAMIL SYLLABLE NYI
|
6447
|
+
"\u0B9E\u0BC0", # TAMIL SYLLABLE NYII
|
6448
|
+
"\u0B9E\u0BC1", # TAMIL SYLLABLE NYU
|
6449
|
+
"\u0B9E\u0BC2", # TAMIL SYLLABLE NYUU
|
6450
|
+
"\u0B9E\u0BC6", # TAMIL SYLLABLE NYE
|
6451
|
+
"\u0B9E\u0BC7", # TAMIL SYLLABLE NYEE
|
6452
|
+
"\u0B9E\u0BC8", # TAMIL SYLLABLE NYAI
|
6453
|
+
"\u0B9E\u0BCA", # TAMIL SYLLABLE NYO
|
6454
|
+
"\u0B9E\u0BCB", # TAMIL SYLLABLE NYOO
|
6455
|
+
"\u0B9E\u0BCC", # TAMIL SYLLABLE NYAU
|
6456
|
+
"\u0B9F\u0BBE", # TAMIL SYLLABLE TTAA
|
6457
|
+
"\u0B9F\u0BBF", # TAMIL SYLLABLE TTI
|
6458
|
+
"\u0B9F\u0BC0", # TAMIL SYLLABLE TTII
|
6459
|
+
"\u0B9F\u0BC1", # TAMIL SYLLABLE TTU
|
6460
|
+
"\u0B9F\u0BC2", # TAMIL SYLLABLE TTUU
|
6461
|
+
"\u0B9F\u0BC6", # TAMIL SYLLABLE TTE
|
6462
|
+
"\u0B9F\u0BC7", # TAMIL SYLLABLE TTEE
|
6463
|
+
"\u0B9F\u0BC8", # TAMIL SYLLABLE TTAI
|
6464
|
+
"\u0B9F\u0BCA", # TAMIL SYLLABLE TTO
|
6465
|
+
"\u0B9F\u0BCB", # TAMIL SYLLABLE TTOO
|
6466
|
+
"\u0B9F\u0BCC", # TAMIL SYLLABLE TTAU
|
6467
|
+
"\u0BA3\u0BBE", # TAMIL SYLLABLE NNAA
|
6468
|
+
"\u0BA3\u0BBF", # TAMIL SYLLABLE NNI
|
6469
|
+
"\u0BA3\u0BC0", # TAMIL SYLLABLE NNII
|
6470
|
+
"\u0BA3\u0BC1", # TAMIL SYLLABLE NNU
|
6471
|
+
"\u0BA3\u0BC2", # TAMIL SYLLABLE NNUU
|
6472
|
+
"\u0BA3\u0BC6", # TAMIL SYLLABLE NNE
|
6473
|
+
"\u0BA3\u0BC7", # TAMIL SYLLABLE NNEE
|
6474
|
+
"\u0BA3\u0BC8", # TAMIL SYLLABLE NNAI
|
6475
|
+
"\u0BA3\u0BCA", # TAMIL SYLLABLE NNO
|
6476
|
+
"\u0BA3\u0BCB", # TAMIL SYLLABLE NNOO
|
6477
|
+
"\u0BA3\u0BCC", # TAMIL SYLLABLE NNAU
|
6478
|
+
"\u0BA4\u0BBE", # TAMIL SYLLABLE TAA
|
6479
|
+
"\u0BA4\u0BBF", # TAMIL SYLLABLE TI
|
6480
|
+
"\u0BA4\u0BC0", # TAMIL SYLLABLE TII
|
6481
|
+
"\u0BA4\u0BC1", # TAMIL SYLLABLE TU
|
6482
|
+
"\u0BA4\u0BC2", # TAMIL SYLLABLE TUU
|
6483
|
+
"\u0BA4\u0BC6", # TAMIL SYLLABLE TE
|
6484
|
+
"\u0BA4\u0BC7", # TAMIL SYLLABLE TEE
|
6485
|
+
"\u0BA4\u0BC8", # TAMIL SYLLABLE TAI
|
6486
|
+
"\u0BA4\u0BCA", # TAMIL SYLLABLE TO
|
6487
|
+
"\u0BA4\u0BCB", # TAMIL SYLLABLE TOO
|
6488
|
+
"\u0BA4\u0BCC", # TAMIL SYLLABLE TAU
|
6489
|
+
"\u0BA8\u0BBE", # TAMIL SYLLABLE NAA
|
6490
|
+
"\u0BA8\u0BBF", # TAMIL SYLLABLE NI
|
6491
|
+
"\u0BA8\u0BC0", # TAMIL SYLLABLE NII
|
6492
|
+
"\u0BA8\u0BC1", # TAMIL SYLLABLE NU
|
6493
|
+
"\u0BA8\u0BC2", # TAMIL SYLLABLE NUU
|
6494
|
+
"\u0BA8\u0BC6", # TAMIL SYLLABLE NE
|
6495
|
+
"\u0BA8\u0BC7", # TAMIL SYLLABLE NEE
|
6496
|
+
"\u0BA8\u0BC8", # TAMIL SYLLABLE NAI
|
6497
|
+
"\u0BA8\u0BCA", # TAMIL SYLLABLE NO
|
6498
|
+
"\u0BA8\u0BCB", # TAMIL SYLLABLE NOO
|
6499
|
+
"\u0BA8\u0BCC", # TAMIL SYLLABLE NAU
|
6500
|
+
"\u0BAA\u0BBE", # TAMIL SYLLABLE PAA
|
6501
|
+
"\u0BAA\u0BBF", # TAMIL SYLLABLE PI
|
6502
|
+
"\u0BAA\u0BC0", # TAMIL SYLLABLE PII
|
6503
|
+
"\u0BAA\u0BC1", # TAMIL SYLLABLE PU
|
6504
|
+
"\u0BAA\u0BC2", # TAMIL SYLLABLE PUU
|
6505
|
+
"\u0BAA\u0BC6", # TAMIL SYLLABLE PE
|
6506
|
+
"\u0BAA\u0BC7", # TAMIL SYLLABLE PEE
|
6507
|
+
"\u0BAA\u0BC8", # TAMIL SYLLABLE PAI
|
6508
|
+
"\u0BAA\u0BCA", # TAMIL SYLLABLE PO
|
6509
|
+
"\u0BAA\u0BCB", # TAMIL SYLLABLE POO
|
6510
|
+
"\u0BAA\u0BCC", # TAMIL SYLLABLE PAU
|
6511
|
+
"\u0BAE\u0BBE", # TAMIL SYLLABLE MAA
|
6512
|
+
"\u0BAE\u0BBF", # TAMIL SYLLABLE MI
|
6513
|
+
"\u0BAE\u0BC0", # TAMIL SYLLABLE MII
|
6514
|
+
"\u0BAE\u0BC1", # TAMIL SYLLABLE MU
|
6515
|
+
"\u0BAE\u0BC2", # TAMIL SYLLABLE MUU
|
6516
|
+
"\u0BAE\u0BC6", # TAMIL SYLLABLE ME
|
6517
|
+
"\u0BAE\u0BC7", # TAMIL SYLLABLE MEE
|
6518
|
+
"\u0BAE\u0BC8", # TAMIL SYLLABLE MAI
|
6519
|
+
"\u0BAE\u0BCA", # TAMIL SYLLABLE MO
|
6520
|
+
"\u0BAE\u0BCB", # TAMIL SYLLABLE MOO
|
6521
|
+
"\u0BAE\u0BCC", # TAMIL SYLLABLE MAU
|
6522
|
+
"\u0BAF\u0BBE", # TAMIL SYLLABLE YAA
|
6523
|
+
"\u0BAF\u0BBF", # TAMIL SYLLABLE YI
|
6524
|
+
"\u0BAF\u0BC0", # TAMIL SYLLABLE YII
|
6525
|
+
"\u0BAF\u0BC1", # TAMIL SYLLABLE YU
|
6526
|
+
"\u0BAF\u0BC2", # TAMIL SYLLABLE YUU
|
6527
|
+
"\u0BAF\u0BC6", # TAMIL SYLLABLE YE
|
6528
|
+
"\u0BAF\u0BC7", # TAMIL SYLLABLE YEE
|
6529
|
+
"\u0BAF\u0BC8", # TAMIL SYLLABLE YAI
|
6530
|
+
"\u0BAF\u0BCA", # TAMIL SYLLABLE YO
|
6531
|
+
"\u0BAF\u0BCB", # TAMIL SYLLABLE YOO
|
6532
|
+
"\u0BAF\u0BCC", # TAMIL SYLLABLE YAU
|
6533
|
+
"\u0BB0\u0BBE", # TAMIL SYLLABLE RAA
|
6534
|
+
"\u0BB0\u0BBF", # TAMIL SYLLABLE RI
|
6535
|
+
"\u0BB0\u0BC0", # TAMIL SYLLABLE RII
|
6536
|
+
"\u0BB0\u0BC1", # TAMIL SYLLABLE RU
|
6537
|
+
"\u0BB0\u0BC2", # TAMIL SYLLABLE RUU
|
6538
|
+
"\u0BB0\u0BC6", # TAMIL SYLLABLE RE
|
6539
|
+
"\u0BB0\u0BC7", # TAMIL SYLLABLE REE
|
6540
|
+
"\u0BB0\u0BC8", # TAMIL SYLLABLE RAI
|
6541
|
+
"\u0BB0\u0BCA", # TAMIL SYLLABLE RO
|
6542
|
+
"\u0BB0\u0BCB", # TAMIL SYLLABLE ROO
|
6543
|
+
"\u0BB0\u0BCC", # TAMIL SYLLABLE RAU
|
6544
|
+
"\u0BB2\u0BBE", # TAMIL SYLLABLE LAA
|
6545
|
+
"\u0BB2\u0BBF", # TAMIL SYLLABLE LI
|
6546
|
+
"\u0BB2\u0BC0", # TAMIL SYLLABLE LII
|
6547
|
+
"\u0BB2\u0BC1", # TAMIL SYLLABLE LU
|
6548
|
+
"\u0BB2\u0BC2", # TAMIL SYLLABLE LUU
|
6549
|
+
"\u0BB2\u0BC6", # TAMIL SYLLABLE LE
|
6550
|
+
"\u0BB2\u0BC7", # TAMIL SYLLABLE LEE
|
6551
|
+
"\u0BB2\u0BC8", # TAMIL SYLLABLE LAI
|
6552
|
+
"\u0BB2\u0BCA", # TAMIL SYLLABLE LO
|
6553
|
+
"\u0BB2\u0BCB", # TAMIL SYLLABLE LOO
|
6554
|
+
"\u0BB2\u0BCC", # TAMIL SYLLABLE LAU
|
6555
|
+
"\u0BB5\u0BBE", # TAMIL SYLLABLE VAA
|
6556
|
+
"\u0BB5\u0BBF", # TAMIL SYLLABLE VI
|
6557
|
+
"\u0BB5\u0BC0", # TAMIL SYLLABLE VII
|
6558
|
+
"\u0BB5\u0BC1", # TAMIL SYLLABLE VU
|
6559
|
+
"\u0BB5\u0BC2", # TAMIL SYLLABLE VUU
|
6560
|
+
"\u0BB5\u0BC6", # TAMIL SYLLABLE VE
|
6561
|
+
"\u0BB5\u0BC7", # TAMIL SYLLABLE VEE
|
6562
|
+
"\u0BB5\u0BC8", # TAMIL SYLLABLE VAI
|
6563
|
+
"\u0BB5\u0BCA", # TAMIL SYLLABLE VO
|
6564
|
+
"\u0BB5\u0BCB", # TAMIL SYLLABLE VOO
|
6565
|
+
"\u0BB5\u0BCC", # TAMIL SYLLABLE VAU
|
6566
|
+
"\u0BB4\u0BBE", # TAMIL SYLLABLE LLLAA
|
6567
|
+
"\u0BB4\u0BBF", # TAMIL SYLLABLE LLLI
|
6568
|
+
"\u0BB4\u0BC0", # TAMIL SYLLABLE LLLII
|
6569
|
+
"\u0BB4\u0BC1", # TAMIL SYLLABLE LLLU
|
6570
|
+
"\u0BB4\u0BC2", # TAMIL SYLLABLE LLLUU
|
6571
|
+
"\u0BB4\u0BC6", # TAMIL SYLLABLE LLLE
|
6572
|
+
"\u0BB4\u0BC7", # TAMIL SYLLABLE LLLEE
|
6573
|
+
"\u0BB4\u0BC8", # TAMIL SYLLABLE LLLAI
|
6574
|
+
"\u0BB4\u0BCA", # TAMIL SYLLABLE LLLO
|
6575
|
+
"\u0BB4\u0BCB", # TAMIL SYLLABLE LLLOO
|
6576
|
+
"\u0BB4\u0BCC", # TAMIL SYLLABLE LLLAU
|
6577
|
+
"\u0BB3\u0BBE", # TAMIL SYLLABLE LLAA
|
6578
|
+
"\u0BB3\u0BBF", # TAMIL SYLLABLE LLI
|
6579
|
+
"\u0BB3\u0BC0", # TAMIL SYLLABLE LLII
|
6580
|
+
"\u0BB3\u0BC1", # TAMIL SYLLABLE LLU
|
6581
|
+
"\u0BB3\u0BC2", # TAMIL SYLLABLE LLUU
|
6582
|
+
"\u0BB3\u0BC6", # TAMIL SYLLABLE LLE
|
6583
|
+
"\u0BB3\u0BC7", # TAMIL SYLLABLE LLEE
|
6584
|
+
"\u0BB3\u0BC8", # TAMIL SYLLABLE LLAI
|
6585
|
+
"\u0BB3\u0BCA", # TAMIL SYLLABLE LLO
|
6586
|
+
"\u0BB3\u0BCB", # TAMIL SYLLABLE LLOO
|
6587
|
+
"\u0BB3\u0BCC", # TAMIL SYLLABLE LLAU
|
6588
|
+
"\u0BB1\u0BBE", # TAMIL SYLLABLE RRAA
|
6589
|
+
"\u0BB1\u0BBF", # TAMIL SYLLABLE RRI
|
6590
|
+
"\u0BB1\u0BC0", # TAMIL SYLLABLE RRII
|
6591
|
+
"\u0BB1\u0BC1", # TAMIL SYLLABLE RRU
|
6592
|
+
"\u0BB1\u0BC2", # TAMIL SYLLABLE RRUU
|
6593
|
+
"\u0BB1\u0BC6", # TAMIL SYLLABLE RRE
|
6594
|
+
"\u0BB1\u0BC7", # TAMIL SYLLABLE RREE
|
6595
|
+
"\u0BB1\u0BC8", # TAMIL SYLLABLE RRAI
|
6596
|
+
"\u0BB1\u0BCA", # TAMIL SYLLABLE RRO
|
6597
|
+
"\u0BB1\u0BCB", # TAMIL SYLLABLE RROO
|
6598
|
+
"\u0BB1\u0BCC", # TAMIL SYLLABLE RRAU
|
6599
|
+
"\u0BA9\u0BBE", # TAMIL SYLLABLE NNNAA
|
6600
|
+
"\u0BA9\u0BBF", # TAMIL SYLLABLE NNNI
|
6601
|
+
"\u0BA9\u0BC0", # TAMIL SYLLABLE NNNII
|
6602
|
+
"\u0BA9\u0BC1", # TAMIL SYLLABLE NNNU
|
6603
|
+
"\u0BA9\u0BC2", # TAMIL SYLLABLE NNNUU
|
6604
|
+
"\u0BA9\u0BC6", # TAMIL SYLLABLE NNNE
|
6605
|
+
"\u0BA9\u0BC7", # TAMIL SYLLABLE NNNEE
|
6606
|
+
"\u0BA9\u0BC8", # TAMIL SYLLABLE NNNAI
|
6607
|
+
"\u0BA9\u0BCA", # TAMIL SYLLABLE NNNO
|
6608
|
+
"\u0BA9\u0BCB", # TAMIL SYLLABLE NNNOO
|
6609
|
+
"\u0BA9\u0BCC", # TAMIL SYLLABLE NNNAU
|
6610
|
+
"\u0B9C\u0BBE", # TAMIL SYLLABLE JAA
|
6611
|
+
"\u0B9C\u0BBF", # TAMIL SYLLABLE JI
|
6612
|
+
"\u0B9C\u0BC0", # TAMIL SYLLABLE JII
|
6613
|
+
"\u0B9C\u0BC1", # TAMIL SYLLABLE JU
|
6614
|
+
"\u0B9C\u0BC2", # TAMIL SYLLABLE JUU
|
6615
|
+
"\u0B9C\u0BC6", # TAMIL SYLLABLE JE
|
6616
|
+
"\u0B9C\u0BC7", # TAMIL SYLLABLE JEE
|
6617
|
+
"\u0B9C\u0BC8", # TAMIL SYLLABLE JAI
|
6618
|
+
"\u0B9C\u0BCA", # TAMIL SYLLABLE JO
|
6619
|
+
"\u0B9C\u0BCB", # TAMIL SYLLABLE JOO
|
6620
|
+
"\u0B9C\u0BCC", # TAMIL SYLLABLE JAU
|
6621
|
+
"\u0BB6\u0BBE", # TAMIL SYLLABLE SHAA
|
6622
|
+
"\u0BB6\u0BBF", # TAMIL SYLLABLE SHI
|
6623
|
+
"\u0BB6\u0BC0", # TAMIL SYLLABLE SHII
|
6624
|
+
"\u0BB6\u0BC1", # TAMIL SYLLABLE SHU
|
6625
|
+
"\u0BB6\u0BC2", # TAMIL SYLLABLE SHUU
|
6626
|
+
"\u0BB6\u0BC6", # TAMIL SYLLABLE SHE
|
6627
|
+
"\u0BB6\u0BC7", # TAMIL SYLLABLE SHEE
|
6628
|
+
"\u0BB6\u0BC8", # TAMIL SYLLABLE SHAI
|
6629
|
+
"\u0BB6\u0BCA", # TAMIL SYLLABLE SHO
|
6630
|
+
"\u0BB6\u0BCB", # TAMIL SYLLABLE SHOO
|
6631
|
+
"\u0BB6\u0BCC", # TAMIL SYLLABLE SHAU
|
6632
|
+
"\u0BB7\u0BBE", # TAMIL SYLLABLE SSAA
|
6633
|
+
"\u0BB7\u0BBF", # TAMIL SYLLABLE SSI
|
6634
|
+
"\u0BB7\u0BC0", # TAMIL SYLLABLE SSII
|
6635
|
+
"\u0BB7\u0BC1", # TAMIL SYLLABLE SSU
|
6636
|
+
"\u0BB7\u0BC2", # TAMIL SYLLABLE SSUU
|
6637
|
+
"\u0BB7\u0BC6", # TAMIL SYLLABLE SSE
|
6638
|
+
"\u0BB7\u0BC7", # TAMIL SYLLABLE SSEE
|
6639
|
+
"\u0BB7\u0BC8", # TAMIL SYLLABLE SSAI
|
6640
|
+
"\u0BB7\u0BCA", # TAMIL SYLLABLE SSO
|
6641
|
+
"\u0BB7\u0BCB", # TAMIL SYLLABLE SSOO
|
6642
|
+
"\u0BB7\u0BCC", # TAMIL SYLLABLE SSAU
|
6643
|
+
"\u0BB8\u0BBE", # TAMIL SYLLABLE SAA
|
6644
|
+
"\u0BB8\u0BBF", # TAMIL SYLLABLE SI
|
6645
|
+
"\u0BB8\u0BC0", # TAMIL SYLLABLE SII
|
6646
|
+
"\u0BB8\u0BC1", # TAMIL SYLLABLE SU
|
6647
|
+
"\u0BB8\u0BC2", # TAMIL SYLLABLE SUU
|
6648
|
+
"\u0BB8\u0BC6", # TAMIL SYLLABLE SE
|
6649
|
+
"\u0BB8\u0BC7", # TAMIL SYLLABLE SEE
|
6650
|
+
"\u0BB8\u0BC8", # TAMIL SYLLABLE SAI
|
6651
|
+
"\u0BB8\u0BCA", # TAMIL SYLLABLE SO
|
6652
|
+
"\u0BB8\u0BCB", # TAMIL SYLLABLE SOO
|
6653
|
+
"\u0BB8\u0BCC", # TAMIL SYLLABLE SAU
|
6654
|
+
"\u0BB9\u0BBE", # TAMIL SYLLABLE HAA
|
6655
|
+
"\u0BB9\u0BBF", # TAMIL SYLLABLE HI
|
6656
|
+
"\u0BB9\u0BC0", # TAMIL SYLLABLE HII
|
6657
|
+
"\u0BB9\u0BC1", # TAMIL SYLLABLE HU
|
6658
|
+
"\u0BB9\u0BC2", # TAMIL SYLLABLE HUU
|
6659
|
+
"\u0BB9\u0BC6", # TAMIL SYLLABLE HE
|
6660
|
+
"\u0BB9\u0BC7", # TAMIL SYLLABLE HEE
|
6661
|
+
"\u0BB9\u0BC8", # TAMIL SYLLABLE HAI
|
6662
|
+
"\u0BB9\u0BCA", # TAMIL SYLLABLE HO
|
6663
|
+
"\u0BB9\u0BCB", # TAMIL SYLLABLE HOO
|
6664
|
+
"\u0BB9\u0BCC", # TAMIL SYLLABLE HAU
|
6665
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSA
|
6666
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSAA
|
6667
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSI
|
6668
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSII
|
6669
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSU
|
6670
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSUU
|
6671
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSE
|
6672
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSEE
|
6673
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSAI
|
6674
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSO
|
6675
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSOO
|
6676
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSAU
|
6677
|
+
"\u0BB6\u0BCD", # TAMIL SYLLABLE SHRII
|
6678
|
+
# Sinhala medial consonants and "reph" form.
|
6679
|
+
# Provisional 2010-05-13, Approved 2011-08-05
|
6680
|
+
"\u0DCA\u200D", # SINHALA CONSONANT SIGN YANSAYA
|
6681
|
+
"\u0DCA\u200D", # SINHALA CONSONANT SIGN RAKAARAANSAYA
|
6682
|
+
"\u0DBB\u0DCA", # SINHALA CONSONANT SIGN REPAYA
|
6683
|
+
# Georgian letter plus accent sequence.
|
6684
|
+
# This is part of the original set of approved named sequences
|
6685
|
+
# for Unicode 4.1. 2005.
|
6686
|
+
"\u10E3\u0302", # GEORGIAN LETTER U-BRJGU
|
6687
|
+
# Khmer subjoined forms and other sequences.
|
6688
|
+
# These are part of the original set of approved named sequences
|
6689
|
+
# for Unicode 4.1. 2005.
|
6690
|
+
"\u17D2\u1780", # KHMER CONSONANT SIGN COENG KA
|
6691
|
+
"\u17D2\u1781", # KHMER CONSONANT SIGN COENG KHA
|
6692
|
+
"\u17D2\u1782", # KHMER CONSONANT SIGN COENG KO
|
6693
|
+
"\u17D2\u1783", # KHMER CONSONANT SIGN COENG KHO
|
6694
|
+
"\u17D2\u1784", # KHMER CONSONANT SIGN COENG NGO
|
6695
|
+
"\u17D2\u1785", # KHMER CONSONANT SIGN COENG CA
|
6696
|
+
"\u17D2\u1786", # KHMER CONSONANT SIGN COENG CHA
|
6697
|
+
"\u17D2\u1787", # KHMER CONSONANT SIGN COENG CO
|
6698
|
+
"\u17D2\u1788", # KHMER CONSONANT SIGN COENG CHO
|
6699
|
+
"\u17D2\u1789", # KHMER CONSONANT SIGN COENG NYO
|
6700
|
+
"\u17D2\u178A", # KHMER CONSONANT SIGN COENG DA
|
6701
|
+
"\u17D2\u178B", # KHMER CONSONANT SIGN COENG TTHA
|
6702
|
+
"\u17D2\u178C", # KHMER CONSONANT SIGN COENG DO
|
6703
|
+
"\u17D2\u178D", # KHMER CONSONANT SIGN COENG TTHO
|
6704
|
+
"\u17D2\u178E", # KHMER CONSONANT SIGN COENG NA
|
6705
|
+
"\u17D2\u178F", # KHMER CONSONANT SIGN COENG TA
|
6706
|
+
"\u17D2\u1790", # KHMER CONSONANT SIGN COENG THA
|
6707
|
+
"\u17D2\u1791", # KHMER CONSONANT SIGN COENG TO
|
6708
|
+
"\u17D2\u1792", # KHMER CONSONANT SIGN COENG THO
|
6709
|
+
"\u17D2\u1793", # KHMER CONSONANT SIGN COENG NO
|
6710
|
+
"\u17D2\u1794", # KHMER CONSONANT SIGN COENG BA
|
6711
|
+
"\u17D2\u1795", # KHMER CONSONANT SIGN COENG PHA
|
6712
|
+
"\u17D2\u1796", # KHMER CONSONANT SIGN COENG PO
|
6713
|
+
"\u17D2\u1797", # KHMER CONSONANT SIGN COENG PHO
|
6714
|
+
"\u17D2\u1798", # KHMER CONSONANT SIGN COENG MO
|
6715
|
+
"\u17D2\u1799", # KHMER CONSONANT SIGN COENG YO
|
6716
|
+
"\u17D2\u179A", # KHMER CONSONANT SIGN COENG RO
|
6717
|
+
"\u17D2\u179B", # KHMER CONSONANT SIGN COENG LO
|
6718
|
+
"\u17D2\u179C", # KHMER CONSONANT SIGN COENG VO
|
6719
|
+
"\u17D2\u179D", # KHMER CONSONANT SIGN COENG SHA
|
6720
|
+
"\u17D2\u179E", # KHMER CONSONANT SIGN COENG SSA
|
6721
|
+
"\u17D2\u179F", # KHMER CONSONANT SIGN COENG SA
|
6722
|
+
"\u17D2\u17A0", # KHMER CONSONANT SIGN COENG HA
|
6723
|
+
"\u17D2\u17A1", # KHMER CONSONANT SIGN COENG LA
|
6724
|
+
"\u17D2\u17A2", # KHMER VOWEL SIGN COENG QA
|
6725
|
+
"\u17D2\u17A7", # KHMER INDEPENDENT VOWEL SIGN COENG QU
|
6726
|
+
"\u17D2\u17AB", # KHMER INDEPENDENT VOWEL SIGN COENG RY
|
6727
|
+
"\u17D2\u17AC", # KHMER INDEPENDENT VOWEL SIGN COENG RYY
|
6728
|
+
"\u17D2\u17AF", # KHMER INDEPENDENT VOWEL SIGN COENG QE
|
6729
|
+
"\u17BB\u17C6", # KHMER VOWEL SIGN OM
|
6730
|
+
"\u17B6\u17C6", # KHMER VOWEL SIGN AAM
|
6731
|
+
# Entries for JIS X 0213 compatibility mapping.
|
6732
|
+
# Provisional 2008-11-07, Approved 2010-05-14
|
6733
|
+
#
|
6734
|
+
# Two of these were part of the original set of approved named sequences
|
6735
|
+
# for Unicode 4.1. 2005.
|
6736
|
+
"\u304B\u309A", # HIRAGANA LETTER BIDAKUON NGA
|
6737
|
+
"\u304D\u309A", # HIRAGANA LETTER BIDAKUON NGI
|
6738
|
+
"\u304F\u309A", # HIRAGANA LETTER BIDAKUON NGU
|
6739
|
+
"\u3051\u309A", # HIRAGANA LETTER BIDAKUON NGE
|
6740
|
+
"\u3053\u309A", # HIRAGANA LETTER BIDAKUON NGO
|
6741
|
+
"\u30AB\u309A", # KATAKANA LETTER BIDAKUON NGA
|
6742
|
+
"\u30AD\u309A", # KATAKANA LETTER BIDAKUON NGI
|
6743
|
+
"\u30AF\u309A", # KATAKANA LETTER BIDAKUON NGU
|
6744
|
+
"\u30B1\u309A", # KATAKANA LETTER BIDAKUON NGE
|
6745
|
+
"\u30B3\u309A", # KATAKANA LETTER BIDAKUON NGO
|
6746
|
+
"\u30BB\u309A", # KATAKANA LETTER AINU CE
|
6747
|
+
"\u30C4\u309A", # KATAKANA LETTER AINU TU
|
6748
|
+
"\u30C8\u309A", # KATAKANA LETTER AINU TO
|
6749
|
+
"\u31F7\u309A", # KATAKANA LETTER AINU P
|
6750
|
+
"\u02E5\u02E9", # MODIFIER LETTER EXTRA-HIGH EXTRA-LOW CONTOUR TONE BAR
|
6751
|
+
"\u02E9\u02E5", # MODIFIER LETTER EXTRA-LOW EXTRA-HIGH CONTOUR TONE BAR
|
6752
|
+
]
|
6753
|
+
test_data.each do |string|
|
6754
|
+
assert @validator.valid_encoding?(string), "_named_sequences A: #{string}"
|
6755
|
+
assert string.force_encoding("UTF-8").valid_encoding?,
|
6756
|
+
"_named_sequences B: #{string}" if @vercheck
|
6757
|
+
end
|
6758
|
+
end
|
6185
6759
|
|
6760
|
+
end # of class
|
data/utf8_validator.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: utf8_validator 1.0.
|
5
|
+
# stub: utf8_validator 1.0.14 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "utf8_validator"
|
9
|
-
s.version = "1.0.
|
9
|
+
s.version = "1.0.14"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Guy Allard"]
|
14
|
-
s.date = "2016-07-
|
14
|
+
s.date = "2016-07-13"
|
15
15
|
s.description = "A State Machine implementation of a UTF-8 Encoding \nValidation algorithm."
|
16
16
|
s.email = "allard.guy.m@gmail.com"
|
17
17
|
s.extra_rdoc_files = [
|
@@ -28,9 +28,12 @@ Gem::Specification.new do |s|
|
|
28
28
|
"lib/validation/errors.rb",
|
29
29
|
"lib/validation/validator.rb",
|
30
30
|
"test/helper.rb",
|
31
|
+
"test/test_code_points.rb",
|
31
32
|
"test/test_raise_request.rb",
|
33
|
+
"test/test_surrogate_half_first_point.rb",
|
32
34
|
"test/test_utf8_validator.rb",
|
33
|
-
"utf8_validator.gemspec"
|
35
|
+
"utf8_validator.gemspec",
|
36
|
+
"utils/gencp.rb"
|
34
37
|
]
|
35
38
|
s.homepage = "http://github.com/gmallard/utf8_validator"
|
36
39
|
s.licenses = ["MIT"]
|