utf8_validator 1.0.13 → 1.0.14
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/examples/fullstring.rb +4 -0
- data/lib/utf8_validator.rb +3 -1
- data/lib/validation/errors.rb +3 -1
- data/lib/validation/validator.rb +3 -0
- data/test/helper.rb +5 -0
- data/test/test_code_points.rb +127023 -0
- data/test/test_raise_request.rb +3 -0
- data/test/test_surrogate_half_first_point.rb +2094 -0
- data/test/test_utf8_validator.rb +578 -3
- data/utf8_validator.gemspec +7 -4
- data/utils/gencp.rb +59 -0
- metadata +5 -2
data/test/test_utf8_validator.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
#
|
3
|
+
#--
|
4
|
+
# Copyright (c) 2016 Guy Allard
|
5
|
+
#--
|
3
6
|
if Kernel.respond_to?(:require_relative)
|
4
7
|
require_relative("./helper")
|
5
8
|
else
|
@@ -194,7 +197,7 @@ class TestUtf8Validator < Test::Unit::TestCase
|
|
194
197
|
|
195
198
|
#--
|
196
199
|
# I do not see a need to test UTF-16 surrogate pairs. They are guaranteed
|
197
|
-
# to always fail if the preceding test succeeds. This is because the
|
200
|
+
# to always fail if the preceding test succeeds. This is because the
|
198
201
|
# preceeding test data values are always the first surrogate of the pair.
|
199
202
|
#
|
200
203
|
# UTF-16 surrogates are clearly something I do not understand.
|
@@ -384,7 +387,7 @@ straight from the Unicode 6.0 spec. See page 92.
|
|
384
387
|
]
|
385
388
|
good_data.each do |string|
|
386
389
|
assert @validator.valid_encoding?(string), "good unicode specs 01: #{string}"
|
387
|
-
assert string.force_encoding("UTF-8").valid_encoding?,
|
390
|
+
assert string.force_encoding("UTF-8").valid_encoding?,
|
388
391
|
"good unicode specs 01 19: #{string}" if @vercheck
|
389
392
|
end
|
390
393
|
|
@@ -6181,5 +6184,577 @@ http://www.unicode.org/versions/Unicode7.0.0/
|
|
6181
6184
|
end
|
6182
6185
|
end
|
6183
6186
|
|
6184
|
-
|
6187
|
+
def test_0810_zero_len_fillers
|
6188
|
+
test_data = [
|
6189
|
+
"A\u200bZ", # zero len filler ?
|
6190
|
+
"A\u200cZ", # zero len filler ?
|
6191
|
+
"A\u200dZ", # zero len filler ?
|
6192
|
+
]
|
6193
|
+
test_data.each do |string|
|
6194
|
+
assert @validator.valid_encoding?(string), "zero_len_fillers A: #{string}"
|
6195
|
+
assert string.force_encoding("UTF-8").valid_encoding?,
|
6196
|
+
"zero_len_fillers B: #{string}" if @vercheck
|
6197
|
+
end
|
6198
|
+
end
|
6199
|
+
|
6200
|
+
# grapheme clusters
|
6201
|
+
def test_0850_grapheme_clusters
|
6202
|
+
test_data = [
|
6203
|
+
"\u0067", # 0067 ( g ) LATIN SMALL LETTER G
|
6204
|
+
"\u0308", # 0308 ( ◌̈ ) COMBINING DIAERESIS
|
6205
|
+
"\u0067\u0308", # Combined
|
6206
|
+
"\uac01", # AC01 ( 각 ) HANGUL SYLLABLE GAG
|
6207
|
+
"\u1100", # 1100 ( ᄀ ) HANGUL CHOSEONG KIYEOK
|
6208
|
+
"\u1161", # 1161 ( ᅡ ) HANGUL JUNGSEONG A
|
6209
|
+
"\u11a8", # 11A8 ( ᆨ ) HANGUL JONGSEONG KIYEOK
|
6210
|
+
"\uac01\u1100\u1161\u11a8", # Combined
|
6211
|
+
# THAI
|
6212
|
+
"\u0e01", # ก 0E01 ( ก ) THAI CHARACTER KO KAI Thai ko
|
6213
|
+
# THAI
|
6214
|
+
"\u0e01", # ก 0E01 ( ก ) THAI CHARACTER KO KAI Thai ko
|
6215
|
+
"\u0e33", # 0E33 ( ำ ) THAI CHARACTER SARA AM
|
6216
|
+
"\u0e01\u0e33",
|
6217
|
+
# Extended grapheme clusters
|
6218
|
+
"\u0ba8", # நி 0BA8 ( ந ) TAMIL LETTER NA
|
6219
|
+
"\u0bbf", # 0BBF ( ி ) TAMIL VOWEL SIGN I Tamil ni
|
6220
|
+
"\u0e40", # เ 0E40 ( เ ) THAI CHARACTER SARA E Thai e
|
6221
|
+
"\u0e01", # กำ 0E01 ( ก ) THAI CHARACTER KO KAI
|
6222
|
+
"\u0e33", # 0E33 ( ำ ) THAI CHARACTER SARA AM Thai kam
|
6223
|
+
"\u0937", # षि 0937 ( ष ) DEVANAGARI LETTER SSA
|
6224
|
+
"\u093f", # 093F ( ि ) DEVANAGARI VOWEL SIGN I Devanagari ssi
|
6225
|
+
"\u0ba8\u0bbf\u0e40\u0e01\u0e33\u0937\u093f", # Combined
|
6226
|
+
# Legacy grapheme clusters
|
6227
|
+
"\u0e33", # ำ 0E33 ( ำ ) THAI CHARACTER SARA AM Thai am
|
6228
|
+
"\u0937", # ष 0937 ( ष ) DEVANAGARI LETTER SSA Devanagari ssa
|
6229
|
+
"\u093f", # ि 093F ( ि ) DEVANAGARI VOWEL SIGN I Devanagari i
|
6230
|
+
"\u0e33\u0937\u093f", # Combined
|
6231
|
+
# Tailored grapheme clusters
|
6232
|
+
"\u0063", # ch 0063 ( c ) LATIN SMALL LETTER C
|
6233
|
+
"\u0068", # 0068 ( h ) LATIN SMALL LETTER H Slovak ch digraph
|
6234
|
+
"\u0063\u0068", # Combined
|
6235
|
+
"\u006b", # kw 006B ( k ) LATIN SMALL LETTER K
|
6236
|
+
"\u02b7", # 02B7 ( ʷ ) MODIFIER LETTER SMALL W sequence with letter modifier
|
6237
|
+
"\u006b\u02b7", # Combined
|
6238
|
+
"\u0915", # क्षि 0915 ( क ) DEVANAGARI LETTER KA
|
6239
|
+
"\u094d", # 094D ( ् ) DEVANAGARI SIGN VIRAMA
|
6240
|
+
"\u0937", # 0937 ( ष ) DEVANAGARI LETTER SSA
|
6241
|
+
"\u093f", # 093F ( ि ) DEVANAGARI VOWEL SIGN I Devanagari kshi
|
6242
|
+
"\u0915\u094d\u0937\u093f",
|
6243
|
+
]
|
6244
|
+
test_data.each do |string|
|
6245
|
+
assert @validator.valid_encoding?(string), "grapheme clusters A: #{string}"
|
6246
|
+
assert string.force_encoding("UTF-8").valid_encoding?,
|
6247
|
+
"grapheme clusters B: #{string}" if @vercheck
|
6248
|
+
end
|
6249
|
+
end # of method
|
6250
|
+
|
6251
|
+
def test_0890_named_sequences
|
6252
|
+
test_data = [
|
6253
|
+
# NamedSequences-9.0.0.txt
|
6254
|
+
# Date: 2016-05-26, 00:00:00 GMT [KW, LI]
|
6255
|
+
# © 2016 Unicode®, Inc.
|
6256
|
+
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
6257
|
+
#
|
6258
|
+
# Unicode Character Database
|
6259
|
+
# For documentation, see http://www.unicode.org/reports/tr44/
|
6260
|
+
#
|
6261
|
+
# Unicode Named Character Sequences
|
6262
|
+
#
|
6263
|
+
# This file is a normative contributory data file in the Unicode
|
6264
|
+
# Character Database.
|
6265
|
+
#
|
6266
|
+
# Format:
|
6267
|
+
# Name of Sequence; Code Point Sequence for USI
|
6268
|
+
#
|
6269
|
+
# Code point sequences in the Unicode Character Database
|
6270
|
+
# use spaces as delimiters. The corresponding format for a
|
6271
|
+
# UCS Sequence Identifier (USI) in ISO/IEC 10646 uses
|
6272
|
+
# comma delimitation and angle brackets. Thus, a Unicode
|
6273
|
+
# named character sequence of the form:
|
6274
|
+
#
|
6275
|
+
# EXAMPLE NAME;1000 1001 1002
|
6276
|
+
#
|
6277
|
+
# in this data file, would correspond to an ISO/IEC 10646 USI
|
6278
|
+
# as follows:
|
6279
|
+
#
|
6280
|
+
# <1000, 1001, 1002>
|
6281
|
+
#
|
6282
|
+
# For more information, see UAX #34: Unicode Named Character
|
6283
|
+
# Sequences, at http://www.unicode.org/unicode/reports/tr34/
|
6284
|
+
#
|
6285
|
+
# Note: The order of entries in this file is not significant.
|
6286
|
+
# However, entries are generally in script order corresponding
|
6287
|
+
# to block order in the Unicode Standard, to make it easier
|
6288
|
+
# to find entries in the list.
|
6289
|
+
# ================================================
|
6290
|
+
# Latin letter plus accent combinations.
|
6291
|
+
# These are part of the original set of approved named sequences
|
6292
|
+
# for Unicode 4.1. 2005.
|
6293
|
+
"\u0100\u0300", # LATIN CAPITAL LETTER A WITH MACRON AND GRAVE
|
6294
|
+
"\u0101\u0300", # LATIN SMALL LETTER A WITH MACRON AND GRAVE
|
6295
|
+
"\u0045\u0329", # LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW
|
6296
|
+
"\u0065\u0329", # LATIN SMALL LETTER E WITH VERTICAL LINE BELOW
|
6297
|
+
"\u00C8\u0329", # LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW AND GRAVE
|
6298
|
+
"\u00E8\u0329", # LATIN SMALL LETTER E WITH VERTICAL LINE BELOW AND GRAVE
|
6299
|
+
"\u00C9\u0329", # LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW AND ACUTE
|
6300
|
+
"\u00E9\u0329", # LATIN SMALL LETTER E WITH VERTICAL LINE BELOW AND ACUTE
|
6301
|
+
"\u00CA\u0304", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND MACRON
|
6302
|
+
"\u00EA\u0304", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND MACRON
|
6303
|
+
"\u00CA\u030C", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND CARON
|
6304
|
+
"\u00EA\u030C", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND CARON
|
6305
|
+
"\u012A\u0300", # LATIN CAPITAL LETTER I WITH MACRON AND GRAVE
|
6306
|
+
"\u012B\u0300", # LATIN SMALL LETTER I WITH MACRON AND GRAVE
|
6307
|
+
"\u0069\u0307", # LATIN SMALL LETTER I WITH DOT ABOVE AND ACUTE
|
6308
|
+
"\u006E\u0360", # LATIN SMALL LETTER NG WITH TILDE ABOVE
|
6309
|
+
"\u004F\u0329", # LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW
|
6310
|
+
"\u006F\u0329", # LATIN SMALL LETTER O WITH VERTICAL LINE BELOW
|
6311
|
+
"\u00D2\u0329", # LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW AND GRAVE
|
6312
|
+
"\u00F2\u0329", # LATIN SMALL LETTER O WITH VERTICAL LINE BELOW AND GRAVE
|
6313
|
+
"\u00D3\u0329", # LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW AND ACUTE
|
6314
|
+
"\u00F3\u0329", # LATIN SMALL LETTER O WITH VERTICAL LINE BELOW AND ACUTE
|
6315
|
+
"\u0053\u0329", # LATIN CAPITAL LETTER S WITH VERTICAL LINE BELOW
|
6316
|
+
"\u0073\u0329", # LATIN SMALL LETTER S WITH VERTICAL LINE BELOW
|
6317
|
+
"\u016A\u0300", # LATIN CAPITAL LETTER U WITH MACRON AND GRAVE
|
6318
|
+
"\u016B\u0300", # LATIN SMALL LETTER U WITH MACRON AND GRAVE
|
6319
|
+
# Additions for Lithuanian.
|
6320
|
+
# Provisional 2006-05-18, Approved 2007-10-19
|
6321
|
+
"\u0104\u0301", # LATIN CAPITAL LETTER A WITH OGONEK AND ACUTE
|
6322
|
+
"\u0105\u0301", # LATIN SMALL LETTER A WITH OGONEK AND ACUTE
|
6323
|
+
"\u0104\u0303", # LATIN CAPITAL LETTER A WITH OGONEK AND TILDE
|
6324
|
+
"\u0105\u0303", # LATIN SMALL LETTER A WITH OGONEK AND TILDE
|
6325
|
+
"\u0118\u0301", # LATIN CAPITAL LETTER E WITH OGONEK AND ACUTE
|
6326
|
+
"\u0119\u0301", # LATIN SMALL LETTER E WITH OGONEK AND ACUTE
|
6327
|
+
"\u0118\u0303", # LATIN CAPITAL LETTER E WITH OGONEK AND TILDE
|
6328
|
+
"\u0119\u0303", # LATIN SMALL LETTER E WITH OGONEK AND TILDE
|
6329
|
+
"\u0116\u0301", # LATIN CAPITAL LETTER E WITH DOT ABOVE AND ACUTE
|
6330
|
+
"\u0117\u0301", # LATIN SMALL LETTER E WITH DOT ABOVE AND ACUTE
|
6331
|
+
"\u0116\u0303", # LATIN CAPITAL LETTER E WITH DOT ABOVE AND TILDE
|
6332
|
+
"\u0117\u0303", # LATIN SMALL LETTER E WITH DOT ABOVE AND TILDE
|
6333
|
+
"\u0069\u0307", # LATIN SMALL LETTER I WITH DOT ABOVE AND GRAVE
|
6334
|
+
"\u0069\u0307", # LATIN SMALL LETTER I WITH DOT ABOVE AND TILDE
|
6335
|
+
"\u012E\u0301", # LATIN CAPITAL LETTER I WITH OGONEK AND ACUTE
|
6336
|
+
"\u012F\u0307", # LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND ACUTE
|
6337
|
+
"\u012E\u0303", # LATIN CAPITAL LETTER I WITH OGONEK AND TILDE
|
6338
|
+
"\u012F\u0307", # LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND TILDE
|
6339
|
+
"\u004A\u0303", # LATIN CAPITAL LETTER J WITH TILDE
|
6340
|
+
"\u006A\u0307", # LATIN SMALL LETTER J WITH DOT ABOVE AND TILDE
|
6341
|
+
"\u004C\u0303", # LATIN CAPITAL LETTER L WITH TILDE
|
6342
|
+
"\u006C\u0303", # LATIN SMALL LETTER L WITH TILDE
|
6343
|
+
"\u004D\u0303", # LATIN CAPITAL LETTER M WITH TILDE
|
6344
|
+
"\u006D\u0303", # LATIN SMALL LETTER M WITH TILDE
|
6345
|
+
"\u0052\u0303", # LATIN CAPITAL LETTER R WITH TILDE
|
6346
|
+
"\u0072\u0303", # LATIN SMALL LETTER R WITH TILDE
|
6347
|
+
"\u0172\u0301", # LATIN CAPITAL LETTER U WITH OGONEK AND ACUTE
|
6348
|
+
"\u0173\u0301", # LATIN SMALL LETTER U WITH OGONEK AND ACUTE
|
6349
|
+
"\u0172\u0303", # LATIN CAPITAL LETTER U WITH OGONEK AND TILDE
|
6350
|
+
"\u0173\u0303", # LATIN SMALL LETTER U WITH OGONEK AND TILDE
|
6351
|
+
"\u016A\u0301", # LATIN CAPITAL LETTER U WITH MACRON AND ACUTE
|
6352
|
+
"\u016B\u0301", # LATIN SMALL LETTER U WITH MACRON AND ACUTE
|
6353
|
+
"\u016A\u0303", # LATIN CAPITAL LETTER U WITH MACRON AND TILDE
|
6354
|
+
"\u016B\u0303", # LATIN SMALL LETTER U WITH MACRON AND TILDE
|
6355
|
+
# Entries for JIS X 0213 compatibility mapping.
|
6356
|
+
# Provisional 2008-11-07, Approved 2010-05-14
|
6357
|
+
"\u00E6\u0300", # LATIN SMALL LETTER AE WITH GRAVE
|
6358
|
+
"\u0254\u0300", # LATIN SMALL LETTER OPEN O WITH GRAVE
|
6359
|
+
"\u0254\u0301", # LATIN SMALL LETTER OPEN O WITH ACUTE
|
6360
|
+
"\u028C\u0300", # LATIN SMALL LETTER TURNED V WITH GRAVE
|
6361
|
+
"\u028C\u0301", # LATIN SMALL LETTER TURNED V WITH ACUTE
|
6362
|
+
"\u0259\u0300", # LATIN SMALL LETTER SCHWA WITH GRAVE
|
6363
|
+
"\u0259\u0301", # LATIN SMALL LETTER SCHWA WITH ACUTE
|
6364
|
+
"\u025A\u0300", # LATIN SMALL LETTER HOOKED SCHWA WITH GRAVE
|
6365
|
+
"\u025A\u0301", # LATIN SMALL LETTER HOOKED SCHWA WITH ACUTE
|
6366
|
+
# Entries for Uyghur and Chagatai.
|
6367
|
+
# Provisional N/A, Approved 2012-11-08
|
6368
|
+
"\u0626\u0627", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH ALEF
|
6369
|
+
"\u0626\u0648", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH WAW
|
6370
|
+
"\u0626\u0649", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH ALEF MAKSURA
|
6371
|
+
"\u0626\u06C6", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH OE
|
6372
|
+
"\u0626\u06C7", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH U
|
6373
|
+
"\u0626\u06C8", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH YU
|
6374
|
+
"\u0626\u06D0", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH E
|
6375
|
+
"\u0626\u06D5", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH AE
|
6376
|
+
"\u0646\u06A9", # ARABIC SEQUENCE NOON WITH KEHEH
|
6377
|
+
# Entry for a Bangla entity.
|
6378
|
+
# Provisional 2009-08-10, Approved 2010-05-14
|
6379
|
+
#
|
6380
|
+
# Note that this same sequence is also used for the ASSAMESE LETTER KSSA.
|
6381
|
+
"\u0995\u09CD", # BENGALI LETTER KHINYA
|
6382
|
+
# Additions for Tamil.
|
6383
|
+
# Provisional 2008-02-08, Approved 2009-08-14
|
6384
|
+
#
|
6385
|
+
# A visual display of the Tamil named character sequences is available
|
6386
|
+
# in the documentation for the Unicode Standard. See Section 12.6, Tamil in
|
6387
|
+
# http://www.unicode.org/versions/latest/
|
6388
|
+
"\u0B95\u0BCD", # TAMIL CONSONANT K
|
6389
|
+
"\u0B99\u0BCD", # TAMIL CONSONANT NG
|
6390
|
+
"\u0B9A\u0BCD", # TAMIL CONSONANT C
|
6391
|
+
"\u0B9E\u0BCD", # TAMIL CONSONANT NY
|
6392
|
+
"\u0B9F\u0BCD", # TAMIL CONSONANT TT
|
6393
|
+
"\u0BA3\u0BCD", # TAMIL CONSONANT NN
|
6394
|
+
"\u0BA4\u0BCD", # TAMIL CONSONANT T
|
6395
|
+
"\u0BA8\u0BCD", # TAMIL CONSONANT N
|
6396
|
+
"\u0BAA\u0BCD", # TAMIL CONSONANT P
|
6397
|
+
"\u0BAE\u0BCD", # TAMIL CONSONANT M
|
6398
|
+
"\u0BAF\u0BCD", # TAMIL CONSONANT Y
|
6399
|
+
"\u0BB0\u0BCD", # TAMIL CONSONANT R
|
6400
|
+
"\u0BB2\u0BCD", # TAMIL CONSONANT L
|
6401
|
+
"\u0BB5\u0BCD", # TAMIL CONSONANT V
|
6402
|
+
"\u0BB4\u0BCD", # TAMIL CONSONANT LLL
|
6403
|
+
"\u0BB3\u0BCD", # TAMIL CONSONANT LL
|
6404
|
+
"\u0BB1\u0BCD", # TAMIL CONSONANT RR
|
6405
|
+
"\u0BA9\u0BCD", # TAMIL CONSONANT NNN
|
6406
|
+
"\u0B9C\u0BCD", # TAMIL CONSONANT J
|
6407
|
+
"\u0BB6\u0BCD", # TAMIL CONSONANT SH
|
6408
|
+
"\u0BB7\u0BCD", # TAMIL CONSONANT SS
|
6409
|
+
"\u0BB8\u0BCD", # TAMIL CONSONANT S
|
6410
|
+
"\u0BB9\u0BCD", # TAMIL CONSONANT H
|
6411
|
+
"\u0B95\u0BCD", # TAMIL CONSONANT KSS
|
6412
|
+
"\u0B95\u0BBE", # TAMIL SYLLABLE KAA
|
6413
|
+
"\u0B95\u0BBF", # TAMIL SYLLABLE KI
|
6414
|
+
"\u0B95\u0BC0", # TAMIL SYLLABLE KII
|
6415
|
+
"\u0B95\u0BC1", # TAMIL SYLLABLE KU
|
6416
|
+
"\u0B95\u0BC2", # TAMIL SYLLABLE KUU
|
6417
|
+
"\u0B95\u0BC6", # TAMIL SYLLABLE KE
|
6418
|
+
"\u0B95\u0BC7", # TAMIL SYLLABLE KEE
|
6419
|
+
"\u0B95\u0BC8", # TAMIL SYLLABLE KAI
|
6420
|
+
"\u0B95\u0BCA", # TAMIL SYLLABLE KO
|
6421
|
+
"\u0B95\u0BCB", # TAMIL SYLLABLE KOO
|
6422
|
+
"\u0B95\u0BCC", # TAMIL SYLLABLE KAU
|
6423
|
+
"\u0B99\u0BBE", # TAMIL SYLLABLE NGAA
|
6424
|
+
"\u0B99\u0BBF", # TAMIL SYLLABLE NGI
|
6425
|
+
"\u0B99\u0BC0", # TAMIL SYLLABLE NGII
|
6426
|
+
"\u0B99\u0BC1", # TAMIL SYLLABLE NGU
|
6427
|
+
"\u0B99\u0BC2", # TAMIL SYLLABLE NGUU
|
6428
|
+
"\u0B99\u0BC6", # TAMIL SYLLABLE NGE
|
6429
|
+
"\u0B99\u0BC7", # TAMIL SYLLABLE NGEE
|
6430
|
+
"\u0B99\u0BC8", # TAMIL SYLLABLE NGAI
|
6431
|
+
"\u0B99\u0BCA", # TAMIL SYLLABLE NGO
|
6432
|
+
"\u0B99\u0BCB", # TAMIL SYLLABLE NGOO
|
6433
|
+
"\u0B99\u0BCC", # TAMIL SYLLABLE NGAU
|
6434
|
+
"\u0B9A\u0BBE", # TAMIL SYLLABLE CAA
|
6435
|
+
"\u0B9A\u0BBF", # TAMIL SYLLABLE CI
|
6436
|
+
"\u0B9A\u0BC0", # TAMIL SYLLABLE CII
|
6437
|
+
"\u0B9A\u0BC1", # TAMIL SYLLABLE CU
|
6438
|
+
"\u0B9A\u0BC2", # TAMIL SYLLABLE CUU
|
6439
|
+
"\u0B9A\u0BC6", # TAMIL SYLLABLE CE
|
6440
|
+
"\u0B9A\u0BC7", # TAMIL SYLLABLE CEE
|
6441
|
+
"\u0B9A\u0BC8", # TAMIL SYLLABLE CAI
|
6442
|
+
"\u0B9A\u0BCA", # TAMIL SYLLABLE CO
|
6443
|
+
"\u0B9A\u0BCB", # TAMIL SYLLABLE COO
|
6444
|
+
"\u0B9A\u0BCC", # TAMIL SYLLABLE CAU
|
6445
|
+
"\u0B9E\u0BBE", # TAMIL SYLLABLE NYAA
|
6446
|
+
"\u0B9E\u0BBF", # TAMIL SYLLABLE NYI
|
6447
|
+
"\u0B9E\u0BC0", # TAMIL SYLLABLE NYII
|
6448
|
+
"\u0B9E\u0BC1", # TAMIL SYLLABLE NYU
|
6449
|
+
"\u0B9E\u0BC2", # TAMIL SYLLABLE NYUU
|
6450
|
+
"\u0B9E\u0BC6", # TAMIL SYLLABLE NYE
|
6451
|
+
"\u0B9E\u0BC7", # TAMIL SYLLABLE NYEE
|
6452
|
+
"\u0B9E\u0BC8", # TAMIL SYLLABLE NYAI
|
6453
|
+
"\u0B9E\u0BCA", # TAMIL SYLLABLE NYO
|
6454
|
+
"\u0B9E\u0BCB", # TAMIL SYLLABLE NYOO
|
6455
|
+
"\u0B9E\u0BCC", # TAMIL SYLLABLE NYAU
|
6456
|
+
"\u0B9F\u0BBE", # TAMIL SYLLABLE TTAA
|
6457
|
+
"\u0B9F\u0BBF", # TAMIL SYLLABLE TTI
|
6458
|
+
"\u0B9F\u0BC0", # TAMIL SYLLABLE TTII
|
6459
|
+
"\u0B9F\u0BC1", # TAMIL SYLLABLE TTU
|
6460
|
+
"\u0B9F\u0BC2", # TAMIL SYLLABLE TTUU
|
6461
|
+
"\u0B9F\u0BC6", # TAMIL SYLLABLE TTE
|
6462
|
+
"\u0B9F\u0BC7", # TAMIL SYLLABLE TTEE
|
6463
|
+
"\u0B9F\u0BC8", # TAMIL SYLLABLE TTAI
|
6464
|
+
"\u0B9F\u0BCA", # TAMIL SYLLABLE TTO
|
6465
|
+
"\u0B9F\u0BCB", # TAMIL SYLLABLE TTOO
|
6466
|
+
"\u0B9F\u0BCC", # TAMIL SYLLABLE TTAU
|
6467
|
+
"\u0BA3\u0BBE", # TAMIL SYLLABLE NNAA
|
6468
|
+
"\u0BA3\u0BBF", # TAMIL SYLLABLE NNI
|
6469
|
+
"\u0BA3\u0BC0", # TAMIL SYLLABLE NNII
|
6470
|
+
"\u0BA3\u0BC1", # TAMIL SYLLABLE NNU
|
6471
|
+
"\u0BA3\u0BC2", # TAMIL SYLLABLE NNUU
|
6472
|
+
"\u0BA3\u0BC6", # TAMIL SYLLABLE NNE
|
6473
|
+
"\u0BA3\u0BC7", # TAMIL SYLLABLE NNEE
|
6474
|
+
"\u0BA3\u0BC8", # TAMIL SYLLABLE NNAI
|
6475
|
+
"\u0BA3\u0BCA", # TAMIL SYLLABLE NNO
|
6476
|
+
"\u0BA3\u0BCB", # TAMIL SYLLABLE NNOO
|
6477
|
+
"\u0BA3\u0BCC", # TAMIL SYLLABLE NNAU
|
6478
|
+
"\u0BA4\u0BBE", # TAMIL SYLLABLE TAA
|
6479
|
+
"\u0BA4\u0BBF", # TAMIL SYLLABLE TI
|
6480
|
+
"\u0BA4\u0BC0", # TAMIL SYLLABLE TII
|
6481
|
+
"\u0BA4\u0BC1", # TAMIL SYLLABLE TU
|
6482
|
+
"\u0BA4\u0BC2", # TAMIL SYLLABLE TUU
|
6483
|
+
"\u0BA4\u0BC6", # TAMIL SYLLABLE TE
|
6484
|
+
"\u0BA4\u0BC7", # TAMIL SYLLABLE TEE
|
6485
|
+
"\u0BA4\u0BC8", # TAMIL SYLLABLE TAI
|
6486
|
+
"\u0BA4\u0BCA", # TAMIL SYLLABLE TO
|
6487
|
+
"\u0BA4\u0BCB", # TAMIL SYLLABLE TOO
|
6488
|
+
"\u0BA4\u0BCC", # TAMIL SYLLABLE TAU
|
6489
|
+
"\u0BA8\u0BBE", # TAMIL SYLLABLE NAA
|
6490
|
+
"\u0BA8\u0BBF", # TAMIL SYLLABLE NI
|
6491
|
+
"\u0BA8\u0BC0", # TAMIL SYLLABLE NII
|
6492
|
+
"\u0BA8\u0BC1", # TAMIL SYLLABLE NU
|
6493
|
+
"\u0BA8\u0BC2", # TAMIL SYLLABLE NUU
|
6494
|
+
"\u0BA8\u0BC6", # TAMIL SYLLABLE NE
|
6495
|
+
"\u0BA8\u0BC7", # TAMIL SYLLABLE NEE
|
6496
|
+
"\u0BA8\u0BC8", # TAMIL SYLLABLE NAI
|
6497
|
+
"\u0BA8\u0BCA", # TAMIL SYLLABLE NO
|
6498
|
+
"\u0BA8\u0BCB", # TAMIL SYLLABLE NOO
|
6499
|
+
"\u0BA8\u0BCC", # TAMIL SYLLABLE NAU
|
6500
|
+
"\u0BAA\u0BBE", # TAMIL SYLLABLE PAA
|
6501
|
+
"\u0BAA\u0BBF", # TAMIL SYLLABLE PI
|
6502
|
+
"\u0BAA\u0BC0", # TAMIL SYLLABLE PII
|
6503
|
+
"\u0BAA\u0BC1", # TAMIL SYLLABLE PU
|
6504
|
+
"\u0BAA\u0BC2", # TAMIL SYLLABLE PUU
|
6505
|
+
"\u0BAA\u0BC6", # TAMIL SYLLABLE PE
|
6506
|
+
"\u0BAA\u0BC7", # TAMIL SYLLABLE PEE
|
6507
|
+
"\u0BAA\u0BC8", # TAMIL SYLLABLE PAI
|
6508
|
+
"\u0BAA\u0BCA", # TAMIL SYLLABLE PO
|
6509
|
+
"\u0BAA\u0BCB", # TAMIL SYLLABLE POO
|
6510
|
+
"\u0BAA\u0BCC", # TAMIL SYLLABLE PAU
|
6511
|
+
"\u0BAE\u0BBE", # TAMIL SYLLABLE MAA
|
6512
|
+
"\u0BAE\u0BBF", # TAMIL SYLLABLE MI
|
6513
|
+
"\u0BAE\u0BC0", # TAMIL SYLLABLE MII
|
6514
|
+
"\u0BAE\u0BC1", # TAMIL SYLLABLE MU
|
6515
|
+
"\u0BAE\u0BC2", # TAMIL SYLLABLE MUU
|
6516
|
+
"\u0BAE\u0BC6", # TAMIL SYLLABLE ME
|
6517
|
+
"\u0BAE\u0BC7", # TAMIL SYLLABLE MEE
|
6518
|
+
"\u0BAE\u0BC8", # TAMIL SYLLABLE MAI
|
6519
|
+
"\u0BAE\u0BCA", # TAMIL SYLLABLE MO
|
6520
|
+
"\u0BAE\u0BCB", # TAMIL SYLLABLE MOO
|
6521
|
+
"\u0BAE\u0BCC", # TAMIL SYLLABLE MAU
|
6522
|
+
"\u0BAF\u0BBE", # TAMIL SYLLABLE YAA
|
6523
|
+
"\u0BAF\u0BBF", # TAMIL SYLLABLE YI
|
6524
|
+
"\u0BAF\u0BC0", # TAMIL SYLLABLE YII
|
6525
|
+
"\u0BAF\u0BC1", # TAMIL SYLLABLE YU
|
6526
|
+
"\u0BAF\u0BC2", # TAMIL SYLLABLE YUU
|
6527
|
+
"\u0BAF\u0BC6", # TAMIL SYLLABLE YE
|
6528
|
+
"\u0BAF\u0BC7", # TAMIL SYLLABLE YEE
|
6529
|
+
"\u0BAF\u0BC8", # TAMIL SYLLABLE YAI
|
6530
|
+
"\u0BAF\u0BCA", # TAMIL SYLLABLE YO
|
6531
|
+
"\u0BAF\u0BCB", # TAMIL SYLLABLE YOO
|
6532
|
+
"\u0BAF\u0BCC", # TAMIL SYLLABLE YAU
|
6533
|
+
"\u0BB0\u0BBE", # TAMIL SYLLABLE RAA
|
6534
|
+
"\u0BB0\u0BBF", # TAMIL SYLLABLE RI
|
6535
|
+
"\u0BB0\u0BC0", # TAMIL SYLLABLE RII
|
6536
|
+
"\u0BB0\u0BC1", # TAMIL SYLLABLE RU
|
6537
|
+
"\u0BB0\u0BC2", # TAMIL SYLLABLE RUU
|
6538
|
+
"\u0BB0\u0BC6", # TAMIL SYLLABLE RE
|
6539
|
+
"\u0BB0\u0BC7", # TAMIL SYLLABLE REE
|
6540
|
+
"\u0BB0\u0BC8", # TAMIL SYLLABLE RAI
|
6541
|
+
"\u0BB0\u0BCA", # TAMIL SYLLABLE RO
|
6542
|
+
"\u0BB0\u0BCB", # TAMIL SYLLABLE ROO
|
6543
|
+
"\u0BB0\u0BCC", # TAMIL SYLLABLE RAU
|
6544
|
+
"\u0BB2\u0BBE", # TAMIL SYLLABLE LAA
|
6545
|
+
"\u0BB2\u0BBF", # TAMIL SYLLABLE LI
|
6546
|
+
"\u0BB2\u0BC0", # TAMIL SYLLABLE LII
|
6547
|
+
"\u0BB2\u0BC1", # TAMIL SYLLABLE LU
|
6548
|
+
"\u0BB2\u0BC2", # TAMIL SYLLABLE LUU
|
6549
|
+
"\u0BB2\u0BC6", # TAMIL SYLLABLE LE
|
6550
|
+
"\u0BB2\u0BC7", # TAMIL SYLLABLE LEE
|
6551
|
+
"\u0BB2\u0BC8", # TAMIL SYLLABLE LAI
|
6552
|
+
"\u0BB2\u0BCA", # TAMIL SYLLABLE LO
|
6553
|
+
"\u0BB2\u0BCB", # TAMIL SYLLABLE LOO
|
6554
|
+
"\u0BB2\u0BCC", # TAMIL SYLLABLE LAU
|
6555
|
+
"\u0BB5\u0BBE", # TAMIL SYLLABLE VAA
|
6556
|
+
"\u0BB5\u0BBF", # TAMIL SYLLABLE VI
|
6557
|
+
"\u0BB5\u0BC0", # TAMIL SYLLABLE VII
|
6558
|
+
"\u0BB5\u0BC1", # TAMIL SYLLABLE VU
|
6559
|
+
"\u0BB5\u0BC2", # TAMIL SYLLABLE VUU
|
6560
|
+
"\u0BB5\u0BC6", # TAMIL SYLLABLE VE
|
6561
|
+
"\u0BB5\u0BC7", # TAMIL SYLLABLE VEE
|
6562
|
+
"\u0BB5\u0BC8", # TAMIL SYLLABLE VAI
|
6563
|
+
"\u0BB5\u0BCA", # TAMIL SYLLABLE VO
|
6564
|
+
"\u0BB5\u0BCB", # TAMIL SYLLABLE VOO
|
6565
|
+
"\u0BB5\u0BCC", # TAMIL SYLLABLE VAU
|
6566
|
+
"\u0BB4\u0BBE", # TAMIL SYLLABLE LLLAA
|
6567
|
+
"\u0BB4\u0BBF", # TAMIL SYLLABLE LLLI
|
6568
|
+
"\u0BB4\u0BC0", # TAMIL SYLLABLE LLLII
|
6569
|
+
"\u0BB4\u0BC1", # TAMIL SYLLABLE LLLU
|
6570
|
+
"\u0BB4\u0BC2", # TAMIL SYLLABLE LLLUU
|
6571
|
+
"\u0BB4\u0BC6", # TAMIL SYLLABLE LLLE
|
6572
|
+
"\u0BB4\u0BC7", # TAMIL SYLLABLE LLLEE
|
6573
|
+
"\u0BB4\u0BC8", # TAMIL SYLLABLE LLLAI
|
6574
|
+
"\u0BB4\u0BCA", # TAMIL SYLLABLE LLLO
|
6575
|
+
"\u0BB4\u0BCB", # TAMIL SYLLABLE LLLOO
|
6576
|
+
"\u0BB4\u0BCC", # TAMIL SYLLABLE LLLAU
|
6577
|
+
"\u0BB3\u0BBE", # TAMIL SYLLABLE LLAA
|
6578
|
+
"\u0BB3\u0BBF", # TAMIL SYLLABLE LLI
|
6579
|
+
"\u0BB3\u0BC0", # TAMIL SYLLABLE LLII
|
6580
|
+
"\u0BB3\u0BC1", # TAMIL SYLLABLE LLU
|
6581
|
+
"\u0BB3\u0BC2", # TAMIL SYLLABLE LLUU
|
6582
|
+
"\u0BB3\u0BC6", # TAMIL SYLLABLE LLE
|
6583
|
+
"\u0BB3\u0BC7", # TAMIL SYLLABLE LLEE
|
6584
|
+
"\u0BB3\u0BC8", # TAMIL SYLLABLE LLAI
|
6585
|
+
"\u0BB3\u0BCA", # TAMIL SYLLABLE LLO
|
6586
|
+
"\u0BB3\u0BCB", # TAMIL SYLLABLE LLOO
|
6587
|
+
"\u0BB3\u0BCC", # TAMIL SYLLABLE LLAU
|
6588
|
+
"\u0BB1\u0BBE", # TAMIL SYLLABLE RRAA
|
6589
|
+
"\u0BB1\u0BBF", # TAMIL SYLLABLE RRI
|
6590
|
+
"\u0BB1\u0BC0", # TAMIL SYLLABLE RRII
|
6591
|
+
"\u0BB1\u0BC1", # TAMIL SYLLABLE RRU
|
6592
|
+
"\u0BB1\u0BC2", # TAMIL SYLLABLE RRUU
|
6593
|
+
"\u0BB1\u0BC6", # TAMIL SYLLABLE RRE
|
6594
|
+
"\u0BB1\u0BC7", # TAMIL SYLLABLE RREE
|
6595
|
+
"\u0BB1\u0BC8", # TAMIL SYLLABLE RRAI
|
6596
|
+
"\u0BB1\u0BCA", # TAMIL SYLLABLE RRO
|
6597
|
+
"\u0BB1\u0BCB", # TAMIL SYLLABLE RROO
|
6598
|
+
"\u0BB1\u0BCC", # TAMIL SYLLABLE RRAU
|
6599
|
+
"\u0BA9\u0BBE", # TAMIL SYLLABLE NNNAA
|
6600
|
+
"\u0BA9\u0BBF", # TAMIL SYLLABLE NNNI
|
6601
|
+
"\u0BA9\u0BC0", # TAMIL SYLLABLE NNNII
|
6602
|
+
"\u0BA9\u0BC1", # TAMIL SYLLABLE NNNU
|
6603
|
+
"\u0BA9\u0BC2", # TAMIL SYLLABLE NNNUU
|
6604
|
+
"\u0BA9\u0BC6", # TAMIL SYLLABLE NNNE
|
6605
|
+
"\u0BA9\u0BC7", # TAMIL SYLLABLE NNNEE
|
6606
|
+
"\u0BA9\u0BC8", # TAMIL SYLLABLE NNNAI
|
6607
|
+
"\u0BA9\u0BCA", # TAMIL SYLLABLE NNNO
|
6608
|
+
"\u0BA9\u0BCB", # TAMIL SYLLABLE NNNOO
|
6609
|
+
"\u0BA9\u0BCC", # TAMIL SYLLABLE NNNAU
|
6610
|
+
"\u0B9C\u0BBE", # TAMIL SYLLABLE JAA
|
6611
|
+
"\u0B9C\u0BBF", # TAMIL SYLLABLE JI
|
6612
|
+
"\u0B9C\u0BC0", # TAMIL SYLLABLE JII
|
6613
|
+
"\u0B9C\u0BC1", # TAMIL SYLLABLE JU
|
6614
|
+
"\u0B9C\u0BC2", # TAMIL SYLLABLE JUU
|
6615
|
+
"\u0B9C\u0BC6", # TAMIL SYLLABLE JE
|
6616
|
+
"\u0B9C\u0BC7", # TAMIL SYLLABLE JEE
|
6617
|
+
"\u0B9C\u0BC8", # TAMIL SYLLABLE JAI
|
6618
|
+
"\u0B9C\u0BCA", # TAMIL SYLLABLE JO
|
6619
|
+
"\u0B9C\u0BCB", # TAMIL SYLLABLE JOO
|
6620
|
+
"\u0B9C\u0BCC", # TAMIL SYLLABLE JAU
|
6621
|
+
"\u0BB6\u0BBE", # TAMIL SYLLABLE SHAA
|
6622
|
+
"\u0BB6\u0BBF", # TAMIL SYLLABLE SHI
|
6623
|
+
"\u0BB6\u0BC0", # TAMIL SYLLABLE SHII
|
6624
|
+
"\u0BB6\u0BC1", # TAMIL SYLLABLE SHU
|
6625
|
+
"\u0BB6\u0BC2", # TAMIL SYLLABLE SHUU
|
6626
|
+
"\u0BB6\u0BC6", # TAMIL SYLLABLE SHE
|
6627
|
+
"\u0BB6\u0BC7", # TAMIL SYLLABLE SHEE
|
6628
|
+
"\u0BB6\u0BC8", # TAMIL SYLLABLE SHAI
|
6629
|
+
"\u0BB6\u0BCA", # TAMIL SYLLABLE SHO
|
6630
|
+
"\u0BB6\u0BCB", # TAMIL SYLLABLE SHOO
|
6631
|
+
"\u0BB6\u0BCC", # TAMIL SYLLABLE SHAU
|
6632
|
+
"\u0BB7\u0BBE", # TAMIL SYLLABLE SSAA
|
6633
|
+
"\u0BB7\u0BBF", # TAMIL SYLLABLE SSI
|
6634
|
+
"\u0BB7\u0BC0", # TAMIL SYLLABLE SSII
|
6635
|
+
"\u0BB7\u0BC1", # TAMIL SYLLABLE SSU
|
6636
|
+
"\u0BB7\u0BC2", # TAMIL SYLLABLE SSUU
|
6637
|
+
"\u0BB7\u0BC6", # TAMIL SYLLABLE SSE
|
6638
|
+
"\u0BB7\u0BC7", # TAMIL SYLLABLE SSEE
|
6639
|
+
"\u0BB7\u0BC8", # TAMIL SYLLABLE SSAI
|
6640
|
+
"\u0BB7\u0BCA", # TAMIL SYLLABLE SSO
|
6641
|
+
"\u0BB7\u0BCB", # TAMIL SYLLABLE SSOO
|
6642
|
+
"\u0BB7\u0BCC", # TAMIL SYLLABLE SSAU
|
6643
|
+
"\u0BB8\u0BBE", # TAMIL SYLLABLE SAA
|
6644
|
+
"\u0BB8\u0BBF", # TAMIL SYLLABLE SI
|
6645
|
+
"\u0BB8\u0BC0", # TAMIL SYLLABLE SII
|
6646
|
+
"\u0BB8\u0BC1", # TAMIL SYLLABLE SU
|
6647
|
+
"\u0BB8\u0BC2", # TAMIL SYLLABLE SUU
|
6648
|
+
"\u0BB8\u0BC6", # TAMIL SYLLABLE SE
|
6649
|
+
"\u0BB8\u0BC7", # TAMIL SYLLABLE SEE
|
6650
|
+
"\u0BB8\u0BC8", # TAMIL SYLLABLE SAI
|
6651
|
+
"\u0BB8\u0BCA", # TAMIL SYLLABLE SO
|
6652
|
+
"\u0BB8\u0BCB", # TAMIL SYLLABLE SOO
|
6653
|
+
"\u0BB8\u0BCC", # TAMIL SYLLABLE SAU
|
6654
|
+
"\u0BB9\u0BBE", # TAMIL SYLLABLE HAA
|
6655
|
+
"\u0BB9\u0BBF", # TAMIL SYLLABLE HI
|
6656
|
+
"\u0BB9\u0BC0", # TAMIL SYLLABLE HII
|
6657
|
+
"\u0BB9\u0BC1", # TAMIL SYLLABLE HU
|
6658
|
+
"\u0BB9\u0BC2", # TAMIL SYLLABLE HUU
|
6659
|
+
"\u0BB9\u0BC6", # TAMIL SYLLABLE HE
|
6660
|
+
"\u0BB9\u0BC7", # TAMIL SYLLABLE HEE
|
6661
|
+
"\u0BB9\u0BC8", # TAMIL SYLLABLE HAI
|
6662
|
+
"\u0BB9\u0BCA", # TAMIL SYLLABLE HO
|
6663
|
+
"\u0BB9\u0BCB", # TAMIL SYLLABLE HOO
|
6664
|
+
"\u0BB9\u0BCC", # TAMIL SYLLABLE HAU
|
6665
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSA
|
6666
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSAA
|
6667
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSI
|
6668
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSII
|
6669
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSU
|
6670
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSUU
|
6671
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSE
|
6672
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSEE
|
6673
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSAI
|
6674
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSO
|
6675
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSOO
|
6676
|
+
"\u0B95\u0BCD", # TAMIL SYLLABLE KSSAU
|
6677
|
+
"\u0BB6\u0BCD", # TAMIL SYLLABLE SHRII
|
6678
|
+
# Sinhala medial consonants and "reph" form.
|
6679
|
+
# Provisional 2010-05-13, Approved 2011-08-05
|
6680
|
+
"\u0DCA\u200D", # SINHALA CONSONANT SIGN YANSAYA
|
6681
|
+
"\u0DCA\u200D", # SINHALA CONSONANT SIGN RAKAARAANSAYA
|
6682
|
+
"\u0DBB\u0DCA", # SINHALA CONSONANT SIGN REPAYA
|
6683
|
+
# Georgian letter plus accent sequence.
|
6684
|
+
# This is part of the original set of approved named sequences
|
6685
|
+
# for Unicode 4.1. 2005.
|
6686
|
+
"\u10E3\u0302", # GEORGIAN LETTER U-BRJGU
|
6687
|
+
# Khmer subjoined forms and other sequences.
|
6688
|
+
# These are part of the original set of approved named sequences
|
6689
|
+
# for Unicode 4.1. 2005.
|
6690
|
+
"\u17D2\u1780", # KHMER CONSONANT SIGN COENG KA
|
6691
|
+
"\u17D2\u1781", # KHMER CONSONANT SIGN COENG KHA
|
6692
|
+
"\u17D2\u1782", # KHMER CONSONANT SIGN COENG KO
|
6693
|
+
"\u17D2\u1783", # KHMER CONSONANT SIGN COENG KHO
|
6694
|
+
"\u17D2\u1784", # KHMER CONSONANT SIGN COENG NGO
|
6695
|
+
"\u17D2\u1785", # KHMER CONSONANT SIGN COENG CA
|
6696
|
+
"\u17D2\u1786", # KHMER CONSONANT SIGN COENG CHA
|
6697
|
+
"\u17D2\u1787", # KHMER CONSONANT SIGN COENG CO
|
6698
|
+
"\u17D2\u1788", # KHMER CONSONANT SIGN COENG CHO
|
6699
|
+
"\u17D2\u1789", # KHMER CONSONANT SIGN COENG NYO
|
6700
|
+
"\u17D2\u178A", # KHMER CONSONANT SIGN COENG DA
|
6701
|
+
"\u17D2\u178B", # KHMER CONSONANT SIGN COENG TTHA
|
6702
|
+
"\u17D2\u178C", # KHMER CONSONANT SIGN COENG DO
|
6703
|
+
"\u17D2\u178D", # KHMER CONSONANT SIGN COENG TTHO
|
6704
|
+
"\u17D2\u178E", # KHMER CONSONANT SIGN COENG NA
|
6705
|
+
"\u17D2\u178F", # KHMER CONSONANT SIGN COENG TA
|
6706
|
+
"\u17D2\u1790", # KHMER CONSONANT SIGN COENG THA
|
6707
|
+
"\u17D2\u1791", # KHMER CONSONANT SIGN COENG TO
|
6708
|
+
"\u17D2\u1792", # KHMER CONSONANT SIGN COENG THO
|
6709
|
+
"\u17D2\u1793", # KHMER CONSONANT SIGN COENG NO
|
6710
|
+
"\u17D2\u1794", # KHMER CONSONANT SIGN COENG BA
|
6711
|
+
"\u17D2\u1795", # KHMER CONSONANT SIGN COENG PHA
|
6712
|
+
"\u17D2\u1796", # KHMER CONSONANT SIGN COENG PO
|
6713
|
+
"\u17D2\u1797", # KHMER CONSONANT SIGN COENG PHO
|
6714
|
+
"\u17D2\u1798", # KHMER CONSONANT SIGN COENG MO
|
6715
|
+
"\u17D2\u1799", # KHMER CONSONANT SIGN COENG YO
|
6716
|
+
"\u17D2\u179A", # KHMER CONSONANT SIGN COENG RO
|
6717
|
+
"\u17D2\u179B", # KHMER CONSONANT SIGN COENG LO
|
6718
|
+
"\u17D2\u179C", # KHMER CONSONANT SIGN COENG VO
|
6719
|
+
"\u17D2\u179D", # KHMER CONSONANT SIGN COENG SHA
|
6720
|
+
"\u17D2\u179E", # KHMER CONSONANT SIGN COENG SSA
|
6721
|
+
"\u17D2\u179F", # KHMER CONSONANT SIGN COENG SA
|
6722
|
+
"\u17D2\u17A0", # KHMER CONSONANT SIGN COENG HA
|
6723
|
+
"\u17D2\u17A1", # KHMER CONSONANT SIGN COENG LA
|
6724
|
+
"\u17D2\u17A2", # KHMER VOWEL SIGN COENG QA
|
6725
|
+
"\u17D2\u17A7", # KHMER INDEPENDENT VOWEL SIGN COENG QU
|
6726
|
+
"\u17D2\u17AB", # KHMER INDEPENDENT VOWEL SIGN COENG RY
|
6727
|
+
"\u17D2\u17AC", # KHMER INDEPENDENT VOWEL SIGN COENG RYY
|
6728
|
+
"\u17D2\u17AF", # KHMER INDEPENDENT VOWEL SIGN COENG QE
|
6729
|
+
"\u17BB\u17C6", # KHMER VOWEL SIGN OM
|
6730
|
+
"\u17B6\u17C6", # KHMER VOWEL SIGN AAM
|
6731
|
+
# Entries for JIS X 0213 compatibility mapping.
|
6732
|
+
# Provisional 2008-11-07, Approved 2010-05-14
|
6733
|
+
#
|
6734
|
+
# Two of these were part of the original set of approved named sequences
|
6735
|
+
# for Unicode 4.1. 2005.
|
6736
|
+
"\u304B\u309A", # HIRAGANA LETTER BIDAKUON NGA
|
6737
|
+
"\u304D\u309A", # HIRAGANA LETTER BIDAKUON NGI
|
6738
|
+
"\u304F\u309A", # HIRAGANA LETTER BIDAKUON NGU
|
6739
|
+
"\u3051\u309A", # HIRAGANA LETTER BIDAKUON NGE
|
6740
|
+
"\u3053\u309A", # HIRAGANA LETTER BIDAKUON NGO
|
6741
|
+
"\u30AB\u309A", # KATAKANA LETTER BIDAKUON NGA
|
6742
|
+
"\u30AD\u309A", # KATAKANA LETTER BIDAKUON NGI
|
6743
|
+
"\u30AF\u309A", # KATAKANA LETTER BIDAKUON NGU
|
6744
|
+
"\u30B1\u309A", # KATAKANA LETTER BIDAKUON NGE
|
6745
|
+
"\u30B3\u309A", # KATAKANA LETTER BIDAKUON NGO
|
6746
|
+
"\u30BB\u309A", # KATAKANA LETTER AINU CE
|
6747
|
+
"\u30C4\u309A", # KATAKANA LETTER AINU TU
|
6748
|
+
"\u30C8\u309A", # KATAKANA LETTER AINU TO
|
6749
|
+
"\u31F7\u309A", # KATAKANA LETTER AINU P
|
6750
|
+
"\u02E5\u02E9", # MODIFIER LETTER EXTRA-HIGH EXTRA-LOW CONTOUR TONE BAR
|
6751
|
+
"\u02E9\u02E5", # MODIFIER LETTER EXTRA-LOW EXTRA-HIGH CONTOUR TONE BAR
|
6752
|
+
]
|
6753
|
+
test_data.each do |string|
|
6754
|
+
assert @validator.valid_encoding?(string), "_named_sequences A: #{string}"
|
6755
|
+
assert string.force_encoding("UTF-8").valid_encoding?,
|
6756
|
+
"_named_sequences B: #{string}" if @vercheck
|
6757
|
+
end
|
6758
|
+
end
|
6185
6759
|
|
6760
|
+
end # of class
|
data/utf8_validator.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: utf8_validator 1.0.
|
5
|
+
# stub: utf8_validator 1.0.14 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "utf8_validator"
|
9
|
-
s.version = "1.0.
|
9
|
+
s.version = "1.0.14"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Guy Allard"]
|
14
|
-
s.date = "2016-07-
|
14
|
+
s.date = "2016-07-13"
|
15
15
|
s.description = "A State Machine implementation of a UTF-8 Encoding \nValidation algorithm."
|
16
16
|
s.email = "allard.guy.m@gmail.com"
|
17
17
|
s.extra_rdoc_files = [
|
@@ -28,9 +28,12 @@ Gem::Specification.new do |s|
|
|
28
28
|
"lib/validation/errors.rb",
|
29
29
|
"lib/validation/validator.rb",
|
30
30
|
"test/helper.rb",
|
31
|
+
"test/test_code_points.rb",
|
31
32
|
"test/test_raise_request.rb",
|
33
|
+
"test/test_surrogate_half_first_point.rb",
|
32
34
|
"test/test_utf8_validator.rb",
|
33
|
-
"utf8_validator.gemspec"
|
35
|
+
"utf8_validator.gemspec",
|
36
|
+
"utils/gencp.rb"
|
34
37
|
]
|
35
38
|
s.homepage = "http://github.com/gmallard/utf8_validator"
|
36
39
|
s.licenses = ["MIT"]
|