utf8_validator 1.0.13 → 1.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,8 @@
1
1
  # encoding: utf-8
2
2
  #
3
+ #--
4
+ # Copyright (c) 2016 Guy Allard
5
+ #--
3
6
  if Kernel.respond_to?(:require_relative)
4
7
  require_relative("./helper")
5
8
  else
@@ -194,7 +197,7 @@ class TestUtf8Validator < Test::Unit::TestCase
194
197
 
195
198
  #--
196
199
  # I do not see a need to test UTF-16 surrogate pairs. They are guaranteed
197
- # to always fail if the preceding test succeeds. This is because the
200
+ # to always fail if the preceding test succeeds. This is because the
198
201
  # preceeding test data values are always the first surrogate of the pair.
199
202
  #
200
203
  # UTF-16 surrogates are clearly something I do not understand.
@@ -384,7 +387,7 @@ straight from the Unicode 6.0 spec. See page 92.
384
387
  ]
385
388
  good_data.each do |string|
386
389
  assert @validator.valid_encoding?(string), "good unicode specs 01: #{string}"
387
- assert string.force_encoding("UTF-8").valid_encoding?,
390
+ assert string.force_encoding("UTF-8").valid_encoding?,
388
391
  "good unicode specs 01 19: #{string}" if @vercheck
389
392
  end
390
393
 
@@ -6181,5 +6184,577 @@ http://www.unicode.org/versions/Unicode7.0.0/
6181
6184
  end
6182
6185
  end
6183
6186
 
6184
- end
6187
+ def test_0810_zero_len_fillers
6188
+ test_data = [
6189
+ "A\u200bZ", # zero len filler ?
6190
+ "A\u200cZ", # zero len filler ?
6191
+ "A\u200dZ", # zero len filler ?
6192
+ ]
6193
+ test_data.each do |string|
6194
+ assert @validator.valid_encoding?(string), "zero_len_fillers A: #{string}"
6195
+ assert string.force_encoding("UTF-8").valid_encoding?,
6196
+ "zero_len_fillers B: #{string}" if @vercheck
6197
+ end
6198
+ end
6199
+
6200
+ # grapheme clusters
6201
+ def test_0850_grapheme_clusters
6202
+ test_data = [
6203
+ "\u0067", # 0067 ( g ) LATIN SMALL LETTER G
6204
+ "\u0308", # 0308 ( ◌̈ ) COMBINING DIAERESIS
6205
+ "\u0067\u0308", # Combined
6206
+ "\uac01", # AC01 ( 각 ) HANGUL SYLLABLE GAG
6207
+ "\u1100", # 1100 ( ᄀ ) HANGUL CHOSEONG KIYEOK
6208
+ "\u1161", # 1161 ( ᅡ ) HANGUL JUNGSEONG A
6209
+ "\u11a8", # 11A8 ( ᆨ ) HANGUL JONGSEONG KIYEOK
6210
+ "\uac01\u1100\u1161\u11a8", # Combined
6211
+ # THAI
6212
+ "\u0e01", # ก 0E01 ( ก ) THAI CHARACTER KO KAI Thai ko
6213
+ # THAI
6214
+ "\u0e01", # ก 0E01 ( ก ) THAI CHARACTER KO KAI Thai ko
6215
+ "\u0e33", # 0E33 ( ำ ) THAI CHARACTER SARA AM
6216
+ "\u0e01\u0e33",
6217
+ # Extended grapheme clusters
6218
+ "\u0ba8", # நி 0BA8 ( ந ) TAMIL LETTER NA
6219
+ "\u0bbf", # 0BBF ( ி ) TAMIL VOWEL SIGN I Tamil ni
6220
+ "\u0e40", # เ 0E40 ( เ ) THAI CHARACTER SARA E Thai e
6221
+ "\u0e01", # กำ 0E01 ( ก ) THAI CHARACTER KO KAI
6222
+ "\u0e33", # 0E33 ( ำ ) THAI CHARACTER SARA AM Thai kam
6223
+ "\u0937", # षि 0937 ( ष ) DEVANAGARI LETTER SSA
6224
+ "\u093f", # 093F ( ि ) DEVANAGARI VOWEL SIGN I Devanagari ssi
6225
+ "\u0ba8\u0bbf\u0e40\u0e01\u0e33\u0937\u093f", # Combined
6226
+ # Legacy grapheme clusters
6227
+ "\u0e33", # ำ 0E33 ( ำ ) THAI CHARACTER SARA AM Thai am
6228
+ "\u0937", # ष 0937 ( ष ) DEVANAGARI LETTER SSA Devanagari ssa
6229
+ "\u093f", # ि 093F ( ि ) DEVANAGARI VOWEL SIGN I Devanagari i
6230
+ "\u0e33\u0937\u093f", # Combined
6231
+ # Tailored grapheme clusters
6232
+ "\u0063", # ch 0063 ( c ) LATIN SMALL LETTER C
6233
+ "\u0068", # 0068 ( h ) LATIN SMALL LETTER H Slovak ch digraph
6234
+ "\u0063\u0068", # Combined
6235
+ "\u006b", # kw 006B ( k ) LATIN SMALL LETTER K
6236
+ "\u02b7", # 02B7 ( ʷ ) MODIFIER LETTER SMALL W sequence with letter modifier
6237
+ "\u006b\u02b7", # Combined
6238
+ "\u0915", # क्षि 0915 ( क ) DEVANAGARI LETTER KA
6239
+ "\u094d", # 094D ( ् ) DEVANAGARI SIGN VIRAMA
6240
+ "\u0937", # 0937 ( ष ) DEVANAGARI LETTER SSA
6241
+ "\u093f", # 093F ( ि ) DEVANAGARI VOWEL SIGN I Devanagari kshi
6242
+ "\u0915\u094d\u0937\u093f",
6243
+ ]
6244
+ test_data.each do |string|
6245
+ assert @validator.valid_encoding?(string), "grapheme clusters A: #{string}"
6246
+ assert string.force_encoding("UTF-8").valid_encoding?,
6247
+ "grapheme clusters B: #{string}" if @vercheck
6248
+ end
6249
+ end # of method
6250
+
6251
+ def test_0890_named_sequences
6252
+ test_data = [
6253
+ # NamedSequences-9.0.0.txt
6254
+ # Date: 2016-05-26, 00:00:00 GMT [KW, LI]
6255
+ # © 2016 Unicode®, Inc.
6256
+ # For terms of use, see http://www.unicode.org/terms_of_use.html
6257
+ #
6258
+ # Unicode Character Database
6259
+ # For documentation, see http://www.unicode.org/reports/tr44/
6260
+ #
6261
+ # Unicode Named Character Sequences
6262
+ #
6263
+ # This file is a normative contributory data file in the Unicode
6264
+ # Character Database.
6265
+ #
6266
+ # Format:
6267
+ # Name of Sequence; Code Point Sequence for USI
6268
+ #
6269
+ # Code point sequences in the Unicode Character Database
6270
+ # use spaces as delimiters. The corresponding format for a
6271
+ # UCS Sequence Identifier (USI) in ISO/IEC 10646 uses
6272
+ # comma delimitation and angle brackets. Thus, a Unicode
6273
+ # named character sequence of the form:
6274
+ #
6275
+ # EXAMPLE NAME;1000 1001 1002
6276
+ #
6277
+ # in this data file, would correspond to an ISO/IEC 10646 USI
6278
+ # as follows:
6279
+ #
6280
+ # <1000, 1001, 1002>
6281
+ #
6282
+ # For more information, see UAX #34: Unicode Named Character
6283
+ # Sequences, at http://www.unicode.org/unicode/reports/tr34/
6284
+ #
6285
+ # Note: The order of entries in this file is not significant.
6286
+ # However, entries are generally in script order corresponding
6287
+ # to block order in the Unicode Standard, to make it easier
6288
+ # to find entries in the list.
6289
+ # ================================================
6290
+ # Latin letter plus accent combinations.
6291
+ # These are part of the original set of approved named sequences
6292
+ # for Unicode 4.1. 2005.
6293
+ "\u0100\u0300", # LATIN CAPITAL LETTER A WITH MACRON AND GRAVE
6294
+ "\u0101\u0300", # LATIN SMALL LETTER A WITH MACRON AND GRAVE
6295
+ "\u0045\u0329", # LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW
6296
+ "\u0065\u0329", # LATIN SMALL LETTER E WITH VERTICAL LINE BELOW
6297
+ "\u00C8\u0329", # LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW AND GRAVE
6298
+ "\u00E8\u0329", # LATIN SMALL LETTER E WITH VERTICAL LINE BELOW AND GRAVE
6299
+ "\u00C9\u0329", # LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW AND ACUTE
6300
+ "\u00E9\u0329", # LATIN SMALL LETTER E WITH VERTICAL LINE BELOW AND ACUTE
6301
+ "\u00CA\u0304", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND MACRON
6302
+ "\u00EA\u0304", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND MACRON
6303
+ "\u00CA\u030C", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND CARON
6304
+ "\u00EA\u030C", # LATIN SMALL LETTER E WITH CIRCUMFLEX AND CARON
6305
+ "\u012A\u0300", # LATIN CAPITAL LETTER I WITH MACRON AND GRAVE
6306
+ "\u012B\u0300", # LATIN SMALL LETTER I WITH MACRON AND GRAVE
6307
+ "\u0069\u0307", # LATIN SMALL LETTER I WITH DOT ABOVE AND ACUTE
6308
+ "\u006E\u0360", # LATIN SMALL LETTER NG WITH TILDE ABOVE
6309
+ "\u004F\u0329", # LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW
6310
+ "\u006F\u0329", # LATIN SMALL LETTER O WITH VERTICAL LINE BELOW
6311
+ "\u00D2\u0329", # LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW AND GRAVE
6312
+ "\u00F2\u0329", # LATIN SMALL LETTER O WITH VERTICAL LINE BELOW AND GRAVE
6313
+ "\u00D3\u0329", # LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW AND ACUTE
6314
+ "\u00F3\u0329", # LATIN SMALL LETTER O WITH VERTICAL LINE BELOW AND ACUTE
6315
+ "\u0053\u0329", # LATIN CAPITAL LETTER S WITH VERTICAL LINE BELOW
6316
+ "\u0073\u0329", # LATIN SMALL LETTER S WITH VERTICAL LINE BELOW
6317
+ "\u016A\u0300", # LATIN CAPITAL LETTER U WITH MACRON AND GRAVE
6318
+ "\u016B\u0300", # LATIN SMALL LETTER U WITH MACRON AND GRAVE
6319
+ # Additions for Lithuanian.
6320
+ # Provisional 2006-05-18, Approved 2007-10-19
6321
+ "\u0104\u0301", # LATIN CAPITAL LETTER A WITH OGONEK AND ACUTE
6322
+ "\u0105\u0301", # LATIN SMALL LETTER A WITH OGONEK AND ACUTE
6323
+ "\u0104\u0303", # LATIN CAPITAL LETTER A WITH OGONEK AND TILDE
6324
+ "\u0105\u0303", # LATIN SMALL LETTER A WITH OGONEK AND TILDE
6325
+ "\u0118\u0301", # LATIN CAPITAL LETTER E WITH OGONEK AND ACUTE
6326
+ "\u0119\u0301", # LATIN SMALL LETTER E WITH OGONEK AND ACUTE
6327
+ "\u0118\u0303", # LATIN CAPITAL LETTER E WITH OGONEK AND TILDE
6328
+ "\u0119\u0303", # LATIN SMALL LETTER E WITH OGONEK AND TILDE
6329
+ "\u0116\u0301", # LATIN CAPITAL LETTER E WITH DOT ABOVE AND ACUTE
6330
+ "\u0117\u0301", # LATIN SMALL LETTER E WITH DOT ABOVE AND ACUTE
6331
+ "\u0116\u0303", # LATIN CAPITAL LETTER E WITH DOT ABOVE AND TILDE
6332
+ "\u0117\u0303", # LATIN SMALL LETTER E WITH DOT ABOVE AND TILDE
6333
+ "\u0069\u0307", # LATIN SMALL LETTER I WITH DOT ABOVE AND GRAVE
6334
+ "\u0069\u0307", # LATIN SMALL LETTER I WITH DOT ABOVE AND TILDE
6335
+ "\u012E\u0301", # LATIN CAPITAL LETTER I WITH OGONEK AND ACUTE
6336
+ "\u012F\u0307", # LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND ACUTE
6337
+ "\u012E\u0303", # LATIN CAPITAL LETTER I WITH OGONEK AND TILDE
6338
+ "\u012F\u0307", # LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND TILDE
6339
+ "\u004A\u0303", # LATIN CAPITAL LETTER J WITH TILDE
6340
+ "\u006A\u0307", # LATIN SMALL LETTER J WITH DOT ABOVE AND TILDE
6341
+ "\u004C\u0303", # LATIN CAPITAL LETTER L WITH TILDE
6342
+ "\u006C\u0303", # LATIN SMALL LETTER L WITH TILDE
6343
+ "\u004D\u0303", # LATIN CAPITAL LETTER M WITH TILDE
6344
+ "\u006D\u0303", # LATIN SMALL LETTER M WITH TILDE
6345
+ "\u0052\u0303", # LATIN CAPITAL LETTER R WITH TILDE
6346
+ "\u0072\u0303", # LATIN SMALL LETTER R WITH TILDE
6347
+ "\u0172\u0301", # LATIN CAPITAL LETTER U WITH OGONEK AND ACUTE
6348
+ "\u0173\u0301", # LATIN SMALL LETTER U WITH OGONEK AND ACUTE
6349
+ "\u0172\u0303", # LATIN CAPITAL LETTER U WITH OGONEK AND TILDE
6350
+ "\u0173\u0303", # LATIN SMALL LETTER U WITH OGONEK AND TILDE
6351
+ "\u016A\u0301", # LATIN CAPITAL LETTER U WITH MACRON AND ACUTE
6352
+ "\u016B\u0301", # LATIN SMALL LETTER U WITH MACRON AND ACUTE
6353
+ "\u016A\u0303", # LATIN CAPITAL LETTER U WITH MACRON AND TILDE
6354
+ "\u016B\u0303", # LATIN SMALL LETTER U WITH MACRON AND TILDE
6355
+ # Entries for JIS X 0213 compatibility mapping.
6356
+ # Provisional 2008-11-07, Approved 2010-05-14
6357
+ "\u00E6\u0300", # LATIN SMALL LETTER AE WITH GRAVE
6358
+ "\u0254\u0300", # LATIN SMALL LETTER OPEN O WITH GRAVE
6359
+ "\u0254\u0301", # LATIN SMALL LETTER OPEN O WITH ACUTE
6360
+ "\u028C\u0300", # LATIN SMALL LETTER TURNED V WITH GRAVE
6361
+ "\u028C\u0301", # LATIN SMALL LETTER TURNED V WITH ACUTE
6362
+ "\u0259\u0300", # LATIN SMALL LETTER SCHWA WITH GRAVE
6363
+ "\u0259\u0301", # LATIN SMALL LETTER SCHWA WITH ACUTE
6364
+ "\u025A\u0300", # LATIN SMALL LETTER HOOKED SCHWA WITH GRAVE
6365
+ "\u025A\u0301", # LATIN SMALL LETTER HOOKED SCHWA WITH ACUTE
6366
+ # Entries for Uyghur and Chagatai.
6367
+ # Provisional N/A, Approved 2012-11-08
6368
+ "\u0626\u0627", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH ALEF
6369
+ "\u0626\u0648", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH WAW
6370
+ "\u0626\u0649", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH ALEF MAKSURA
6371
+ "\u0626\u06C6", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH OE
6372
+ "\u0626\u06C7", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH U
6373
+ "\u0626\u06C8", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH YU
6374
+ "\u0626\u06D0", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH E
6375
+ "\u0626\u06D5", # ARABIC SEQUENCE YEH WITH HAMZA ABOVE WITH AE
6376
+ "\u0646\u06A9", # ARABIC SEQUENCE NOON WITH KEHEH
6377
+ # Entry for a Bangla entity.
6378
+ # Provisional 2009-08-10, Approved 2010-05-14
6379
+ #
6380
+ # Note that this same sequence is also used for the ASSAMESE LETTER KSSA.
6381
+ "\u0995\u09CD", # BENGALI LETTER KHINYA
6382
+ # Additions for Tamil.
6383
+ # Provisional 2008-02-08, Approved 2009-08-14
6384
+ #
6385
+ # A visual display of the Tamil named character sequences is available
6386
+ # in the documentation for the Unicode Standard. See Section 12.6, Tamil in
6387
+ # http://www.unicode.org/versions/latest/
6388
+ "\u0B95\u0BCD", # TAMIL CONSONANT K
6389
+ "\u0B99\u0BCD", # TAMIL CONSONANT NG
6390
+ "\u0B9A\u0BCD", # TAMIL CONSONANT C
6391
+ "\u0B9E\u0BCD", # TAMIL CONSONANT NY
6392
+ "\u0B9F\u0BCD", # TAMIL CONSONANT TT
6393
+ "\u0BA3\u0BCD", # TAMIL CONSONANT NN
6394
+ "\u0BA4\u0BCD", # TAMIL CONSONANT T
6395
+ "\u0BA8\u0BCD", # TAMIL CONSONANT N
6396
+ "\u0BAA\u0BCD", # TAMIL CONSONANT P
6397
+ "\u0BAE\u0BCD", # TAMIL CONSONANT M
6398
+ "\u0BAF\u0BCD", # TAMIL CONSONANT Y
6399
+ "\u0BB0\u0BCD", # TAMIL CONSONANT R
6400
+ "\u0BB2\u0BCD", # TAMIL CONSONANT L
6401
+ "\u0BB5\u0BCD", # TAMIL CONSONANT V
6402
+ "\u0BB4\u0BCD", # TAMIL CONSONANT LLL
6403
+ "\u0BB3\u0BCD", # TAMIL CONSONANT LL
6404
+ "\u0BB1\u0BCD", # TAMIL CONSONANT RR
6405
+ "\u0BA9\u0BCD", # TAMIL CONSONANT NNN
6406
+ "\u0B9C\u0BCD", # TAMIL CONSONANT J
6407
+ "\u0BB6\u0BCD", # TAMIL CONSONANT SH
6408
+ "\u0BB7\u0BCD", # TAMIL CONSONANT SS
6409
+ "\u0BB8\u0BCD", # TAMIL CONSONANT S
6410
+ "\u0BB9\u0BCD", # TAMIL CONSONANT H
6411
+ "\u0B95\u0BCD", # TAMIL CONSONANT KSS
6412
+ "\u0B95\u0BBE", # TAMIL SYLLABLE KAA
6413
+ "\u0B95\u0BBF", # TAMIL SYLLABLE KI
6414
+ "\u0B95\u0BC0", # TAMIL SYLLABLE KII
6415
+ "\u0B95\u0BC1", # TAMIL SYLLABLE KU
6416
+ "\u0B95\u0BC2", # TAMIL SYLLABLE KUU
6417
+ "\u0B95\u0BC6", # TAMIL SYLLABLE KE
6418
+ "\u0B95\u0BC7", # TAMIL SYLLABLE KEE
6419
+ "\u0B95\u0BC8", # TAMIL SYLLABLE KAI
6420
+ "\u0B95\u0BCA", # TAMIL SYLLABLE KO
6421
+ "\u0B95\u0BCB", # TAMIL SYLLABLE KOO
6422
+ "\u0B95\u0BCC", # TAMIL SYLLABLE KAU
6423
+ "\u0B99\u0BBE", # TAMIL SYLLABLE NGAA
6424
+ "\u0B99\u0BBF", # TAMIL SYLLABLE NGI
6425
+ "\u0B99\u0BC0", # TAMIL SYLLABLE NGII
6426
+ "\u0B99\u0BC1", # TAMIL SYLLABLE NGU
6427
+ "\u0B99\u0BC2", # TAMIL SYLLABLE NGUU
6428
+ "\u0B99\u0BC6", # TAMIL SYLLABLE NGE
6429
+ "\u0B99\u0BC7", # TAMIL SYLLABLE NGEE
6430
+ "\u0B99\u0BC8", # TAMIL SYLLABLE NGAI
6431
+ "\u0B99\u0BCA", # TAMIL SYLLABLE NGO
6432
+ "\u0B99\u0BCB", # TAMIL SYLLABLE NGOO
6433
+ "\u0B99\u0BCC", # TAMIL SYLLABLE NGAU
6434
+ "\u0B9A\u0BBE", # TAMIL SYLLABLE CAA
6435
+ "\u0B9A\u0BBF", # TAMIL SYLLABLE CI
6436
+ "\u0B9A\u0BC0", # TAMIL SYLLABLE CII
6437
+ "\u0B9A\u0BC1", # TAMIL SYLLABLE CU
6438
+ "\u0B9A\u0BC2", # TAMIL SYLLABLE CUU
6439
+ "\u0B9A\u0BC6", # TAMIL SYLLABLE CE
6440
+ "\u0B9A\u0BC7", # TAMIL SYLLABLE CEE
6441
+ "\u0B9A\u0BC8", # TAMIL SYLLABLE CAI
6442
+ "\u0B9A\u0BCA", # TAMIL SYLLABLE CO
6443
+ "\u0B9A\u0BCB", # TAMIL SYLLABLE COO
6444
+ "\u0B9A\u0BCC", # TAMIL SYLLABLE CAU
6445
+ "\u0B9E\u0BBE", # TAMIL SYLLABLE NYAA
6446
+ "\u0B9E\u0BBF", # TAMIL SYLLABLE NYI
6447
+ "\u0B9E\u0BC0", # TAMIL SYLLABLE NYII
6448
+ "\u0B9E\u0BC1", # TAMIL SYLLABLE NYU
6449
+ "\u0B9E\u0BC2", # TAMIL SYLLABLE NYUU
6450
+ "\u0B9E\u0BC6", # TAMIL SYLLABLE NYE
6451
+ "\u0B9E\u0BC7", # TAMIL SYLLABLE NYEE
6452
+ "\u0B9E\u0BC8", # TAMIL SYLLABLE NYAI
6453
+ "\u0B9E\u0BCA", # TAMIL SYLLABLE NYO
6454
+ "\u0B9E\u0BCB", # TAMIL SYLLABLE NYOO
6455
+ "\u0B9E\u0BCC", # TAMIL SYLLABLE NYAU
6456
+ "\u0B9F\u0BBE", # TAMIL SYLLABLE TTAA
6457
+ "\u0B9F\u0BBF", # TAMIL SYLLABLE TTI
6458
+ "\u0B9F\u0BC0", # TAMIL SYLLABLE TTII
6459
+ "\u0B9F\u0BC1", # TAMIL SYLLABLE TTU
6460
+ "\u0B9F\u0BC2", # TAMIL SYLLABLE TTUU
6461
+ "\u0B9F\u0BC6", # TAMIL SYLLABLE TTE
6462
+ "\u0B9F\u0BC7", # TAMIL SYLLABLE TTEE
6463
+ "\u0B9F\u0BC8", # TAMIL SYLLABLE TTAI
6464
+ "\u0B9F\u0BCA", # TAMIL SYLLABLE TTO
6465
+ "\u0B9F\u0BCB", # TAMIL SYLLABLE TTOO
6466
+ "\u0B9F\u0BCC", # TAMIL SYLLABLE TTAU
6467
+ "\u0BA3\u0BBE", # TAMIL SYLLABLE NNAA
6468
+ "\u0BA3\u0BBF", # TAMIL SYLLABLE NNI
6469
+ "\u0BA3\u0BC0", # TAMIL SYLLABLE NNII
6470
+ "\u0BA3\u0BC1", # TAMIL SYLLABLE NNU
6471
+ "\u0BA3\u0BC2", # TAMIL SYLLABLE NNUU
6472
+ "\u0BA3\u0BC6", # TAMIL SYLLABLE NNE
6473
+ "\u0BA3\u0BC7", # TAMIL SYLLABLE NNEE
6474
+ "\u0BA3\u0BC8", # TAMIL SYLLABLE NNAI
6475
+ "\u0BA3\u0BCA", # TAMIL SYLLABLE NNO
6476
+ "\u0BA3\u0BCB", # TAMIL SYLLABLE NNOO
6477
+ "\u0BA3\u0BCC", # TAMIL SYLLABLE NNAU
6478
+ "\u0BA4\u0BBE", # TAMIL SYLLABLE TAA
6479
+ "\u0BA4\u0BBF", # TAMIL SYLLABLE TI
6480
+ "\u0BA4\u0BC0", # TAMIL SYLLABLE TII
6481
+ "\u0BA4\u0BC1", # TAMIL SYLLABLE TU
6482
+ "\u0BA4\u0BC2", # TAMIL SYLLABLE TUU
6483
+ "\u0BA4\u0BC6", # TAMIL SYLLABLE TE
6484
+ "\u0BA4\u0BC7", # TAMIL SYLLABLE TEE
6485
+ "\u0BA4\u0BC8", # TAMIL SYLLABLE TAI
6486
+ "\u0BA4\u0BCA", # TAMIL SYLLABLE TO
6487
+ "\u0BA4\u0BCB", # TAMIL SYLLABLE TOO
6488
+ "\u0BA4\u0BCC", # TAMIL SYLLABLE TAU
6489
+ "\u0BA8\u0BBE", # TAMIL SYLLABLE NAA
6490
+ "\u0BA8\u0BBF", # TAMIL SYLLABLE NI
6491
+ "\u0BA8\u0BC0", # TAMIL SYLLABLE NII
6492
+ "\u0BA8\u0BC1", # TAMIL SYLLABLE NU
6493
+ "\u0BA8\u0BC2", # TAMIL SYLLABLE NUU
6494
+ "\u0BA8\u0BC6", # TAMIL SYLLABLE NE
6495
+ "\u0BA8\u0BC7", # TAMIL SYLLABLE NEE
6496
+ "\u0BA8\u0BC8", # TAMIL SYLLABLE NAI
6497
+ "\u0BA8\u0BCA", # TAMIL SYLLABLE NO
6498
+ "\u0BA8\u0BCB", # TAMIL SYLLABLE NOO
6499
+ "\u0BA8\u0BCC", # TAMIL SYLLABLE NAU
6500
+ "\u0BAA\u0BBE", # TAMIL SYLLABLE PAA
6501
+ "\u0BAA\u0BBF", # TAMIL SYLLABLE PI
6502
+ "\u0BAA\u0BC0", # TAMIL SYLLABLE PII
6503
+ "\u0BAA\u0BC1", # TAMIL SYLLABLE PU
6504
+ "\u0BAA\u0BC2", # TAMIL SYLLABLE PUU
6505
+ "\u0BAA\u0BC6", # TAMIL SYLLABLE PE
6506
+ "\u0BAA\u0BC7", # TAMIL SYLLABLE PEE
6507
+ "\u0BAA\u0BC8", # TAMIL SYLLABLE PAI
6508
+ "\u0BAA\u0BCA", # TAMIL SYLLABLE PO
6509
+ "\u0BAA\u0BCB", # TAMIL SYLLABLE POO
6510
+ "\u0BAA\u0BCC", # TAMIL SYLLABLE PAU
6511
+ "\u0BAE\u0BBE", # TAMIL SYLLABLE MAA
6512
+ "\u0BAE\u0BBF", # TAMIL SYLLABLE MI
6513
+ "\u0BAE\u0BC0", # TAMIL SYLLABLE MII
6514
+ "\u0BAE\u0BC1", # TAMIL SYLLABLE MU
6515
+ "\u0BAE\u0BC2", # TAMIL SYLLABLE MUU
6516
+ "\u0BAE\u0BC6", # TAMIL SYLLABLE ME
6517
+ "\u0BAE\u0BC7", # TAMIL SYLLABLE MEE
6518
+ "\u0BAE\u0BC8", # TAMIL SYLLABLE MAI
6519
+ "\u0BAE\u0BCA", # TAMIL SYLLABLE MO
6520
+ "\u0BAE\u0BCB", # TAMIL SYLLABLE MOO
6521
+ "\u0BAE\u0BCC", # TAMIL SYLLABLE MAU
6522
+ "\u0BAF\u0BBE", # TAMIL SYLLABLE YAA
6523
+ "\u0BAF\u0BBF", # TAMIL SYLLABLE YI
6524
+ "\u0BAF\u0BC0", # TAMIL SYLLABLE YII
6525
+ "\u0BAF\u0BC1", # TAMIL SYLLABLE YU
6526
+ "\u0BAF\u0BC2", # TAMIL SYLLABLE YUU
6527
+ "\u0BAF\u0BC6", # TAMIL SYLLABLE YE
6528
+ "\u0BAF\u0BC7", # TAMIL SYLLABLE YEE
6529
+ "\u0BAF\u0BC8", # TAMIL SYLLABLE YAI
6530
+ "\u0BAF\u0BCA", # TAMIL SYLLABLE YO
6531
+ "\u0BAF\u0BCB", # TAMIL SYLLABLE YOO
6532
+ "\u0BAF\u0BCC", # TAMIL SYLLABLE YAU
6533
+ "\u0BB0\u0BBE", # TAMIL SYLLABLE RAA
6534
+ "\u0BB0\u0BBF", # TAMIL SYLLABLE RI
6535
+ "\u0BB0\u0BC0", # TAMIL SYLLABLE RII
6536
+ "\u0BB0\u0BC1", # TAMIL SYLLABLE RU
6537
+ "\u0BB0\u0BC2", # TAMIL SYLLABLE RUU
6538
+ "\u0BB0\u0BC6", # TAMIL SYLLABLE RE
6539
+ "\u0BB0\u0BC7", # TAMIL SYLLABLE REE
6540
+ "\u0BB0\u0BC8", # TAMIL SYLLABLE RAI
6541
+ "\u0BB0\u0BCA", # TAMIL SYLLABLE RO
6542
+ "\u0BB0\u0BCB", # TAMIL SYLLABLE ROO
6543
+ "\u0BB0\u0BCC", # TAMIL SYLLABLE RAU
6544
+ "\u0BB2\u0BBE", # TAMIL SYLLABLE LAA
6545
+ "\u0BB2\u0BBF", # TAMIL SYLLABLE LI
6546
+ "\u0BB2\u0BC0", # TAMIL SYLLABLE LII
6547
+ "\u0BB2\u0BC1", # TAMIL SYLLABLE LU
6548
+ "\u0BB2\u0BC2", # TAMIL SYLLABLE LUU
6549
+ "\u0BB2\u0BC6", # TAMIL SYLLABLE LE
6550
+ "\u0BB2\u0BC7", # TAMIL SYLLABLE LEE
6551
+ "\u0BB2\u0BC8", # TAMIL SYLLABLE LAI
6552
+ "\u0BB2\u0BCA", # TAMIL SYLLABLE LO
6553
+ "\u0BB2\u0BCB", # TAMIL SYLLABLE LOO
6554
+ "\u0BB2\u0BCC", # TAMIL SYLLABLE LAU
6555
+ "\u0BB5\u0BBE", # TAMIL SYLLABLE VAA
6556
+ "\u0BB5\u0BBF", # TAMIL SYLLABLE VI
6557
+ "\u0BB5\u0BC0", # TAMIL SYLLABLE VII
6558
+ "\u0BB5\u0BC1", # TAMIL SYLLABLE VU
6559
+ "\u0BB5\u0BC2", # TAMIL SYLLABLE VUU
6560
+ "\u0BB5\u0BC6", # TAMIL SYLLABLE VE
6561
+ "\u0BB5\u0BC7", # TAMIL SYLLABLE VEE
6562
+ "\u0BB5\u0BC8", # TAMIL SYLLABLE VAI
6563
+ "\u0BB5\u0BCA", # TAMIL SYLLABLE VO
6564
+ "\u0BB5\u0BCB", # TAMIL SYLLABLE VOO
6565
+ "\u0BB5\u0BCC", # TAMIL SYLLABLE VAU
6566
+ "\u0BB4\u0BBE", # TAMIL SYLLABLE LLLAA
6567
+ "\u0BB4\u0BBF", # TAMIL SYLLABLE LLLI
6568
+ "\u0BB4\u0BC0", # TAMIL SYLLABLE LLLII
6569
+ "\u0BB4\u0BC1", # TAMIL SYLLABLE LLLU
6570
+ "\u0BB4\u0BC2", # TAMIL SYLLABLE LLLUU
6571
+ "\u0BB4\u0BC6", # TAMIL SYLLABLE LLLE
6572
+ "\u0BB4\u0BC7", # TAMIL SYLLABLE LLLEE
6573
+ "\u0BB4\u0BC8", # TAMIL SYLLABLE LLLAI
6574
+ "\u0BB4\u0BCA", # TAMIL SYLLABLE LLLO
6575
+ "\u0BB4\u0BCB", # TAMIL SYLLABLE LLLOO
6576
+ "\u0BB4\u0BCC", # TAMIL SYLLABLE LLLAU
6577
+ "\u0BB3\u0BBE", # TAMIL SYLLABLE LLAA
6578
+ "\u0BB3\u0BBF", # TAMIL SYLLABLE LLI
6579
+ "\u0BB3\u0BC0", # TAMIL SYLLABLE LLII
6580
+ "\u0BB3\u0BC1", # TAMIL SYLLABLE LLU
6581
+ "\u0BB3\u0BC2", # TAMIL SYLLABLE LLUU
6582
+ "\u0BB3\u0BC6", # TAMIL SYLLABLE LLE
6583
+ "\u0BB3\u0BC7", # TAMIL SYLLABLE LLEE
6584
+ "\u0BB3\u0BC8", # TAMIL SYLLABLE LLAI
6585
+ "\u0BB3\u0BCA", # TAMIL SYLLABLE LLO
6586
+ "\u0BB3\u0BCB", # TAMIL SYLLABLE LLOO
6587
+ "\u0BB3\u0BCC", # TAMIL SYLLABLE LLAU
6588
+ "\u0BB1\u0BBE", # TAMIL SYLLABLE RRAA
6589
+ "\u0BB1\u0BBF", # TAMIL SYLLABLE RRI
6590
+ "\u0BB1\u0BC0", # TAMIL SYLLABLE RRII
6591
+ "\u0BB1\u0BC1", # TAMIL SYLLABLE RRU
6592
+ "\u0BB1\u0BC2", # TAMIL SYLLABLE RRUU
6593
+ "\u0BB1\u0BC6", # TAMIL SYLLABLE RRE
6594
+ "\u0BB1\u0BC7", # TAMIL SYLLABLE RREE
6595
+ "\u0BB1\u0BC8", # TAMIL SYLLABLE RRAI
6596
+ "\u0BB1\u0BCA", # TAMIL SYLLABLE RRO
6597
+ "\u0BB1\u0BCB", # TAMIL SYLLABLE RROO
6598
+ "\u0BB1\u0BCC", # TAMIL SYLLABLE RRAU
6599
+ "\u0BA9\u0BBE", # TAMIL SYLLABLE NNNAA
6600
+ "\u0BA9\u0BBF", # TAMIL SYLLABLE NNNI
6601
+ "\u0BA9\u0BC0", # TAMIL SYLLABLE NNNII
6602
+ "\u0BA9\u0BC1", # TAMIL SYLLABLE NNNU
6603
+ "\u0BA9\u0BC2", # TAMIL SYLLABLE NNNUU
6604
+ "\u0BA9\u0BC6", # TAMIL SYLLABLE NNNE
6605
+ "\u0BA9\u0BC7", # TAMIL SYLLABLE NNNEE
6606
+ "\u0BA9\u0BC8", # TAMIL SYLLABLE NNNAI
6607
+ "\u0BA9\u0BCA", # TAMIL SYLLABLE NNNO
6608
+ "\u0BA9\u0BCB", # TAMIL SYLLABLE NNNOO
6609
+ "\u0BA9\u0BCC", # TAMIL SYLLABLE NNNAU
6610
+ "\u0B9C\u0BBE", # TAMIL SYLLABLE JAA
6611
+ "\u0B9C\u0BBF", # TAMIL SYLLABLE JI
6612
+ "\u0B9C\u0BC0", # TAMIL SYLLABLE JII
6613
+ "\u0B9C\u0BC1", # TAMIL SYLLABLE JU
6614
+ "\u0B9C\u0BC2", # TAMIL SYLLABLE JUU
6615
+ "\u0B9C\u0BC6", # TAMIL SYLLABLE JE
6616
+ "\u0B9C\u0BC7", # TAMIL SYLLABLE JEE
6617
+ "\u0B9C\u0BC8", # TAMIL SYLLABLE JAI
6618
+ "\u0B9C\u0BCA", # TAMIL SYLLABLE JO
6619
+ "\u0B9C\u0BCB", # TAMIL SYLLABLE JOO
6620
+ "\u0B9C\u0BCC", # TAMIL SYLLABLE JAU
6621
+ "\u0BB6\u0BBE", # TAMIL SYLLABLE SHAA
6622
+ "\u0BB6\u0BBF", # TAMIL SYLLABLE SHI
6623
+ "\u0BB6\u0BC0", # TAMIL SYLLABLE SHII
6624
+ "\u0BB6\u0BC1", # TAMIL SYLLABLE SHU
6625
+ "\u0BB6\u0BC2", # TAMIL SYLLABLE SHUU
6626
+ "\u0BB6\u0BC6", # TAMIL SYLLABLE SHE
6627
+ "\u0BB6\u0BC7", # TAMIL SYLLABLE SHEE
6628
+ "\u0BB6\u0BC8", # TAMIL SYLLABLE SHAI
6629
+ "\u0BB6\u0BCA", # TAMIL SYLLABLE SHO
6630
+ "\u0BB6\u0BCB", # TAMIL SYLLABLE SHOO
6631
+ "\u0BB6\u0BCC", # TAMIL SYLLABLE SHAU
6632
+ "\u0BB7\u0BBE", # TAMIL SYLLABLE SSAA
6633
+ "\u0BB7\u0BBF", # TAMIL SYLLABLE SSI
6634
+ "\u0BB7\u0BC0", # TAMIL SYLLABLE SSII
6635
+ "\u0BB7\u0BC1", # TAMIL SYLLABLE SSU
6636
+ "\u0BB7\u0BC2", # TAMIL SYLLABLE SSUU
6637
+ "\u0BB7\u0BC6", # TAMIL SYLLABLE SSE
6638
+ "\u0BB7\u0BC7", # TAMIL SYLLABLE SSEE
6639
+ "\u0BB7\u0BC8", # TAMIL SYLLABLE SSAI
6640
+ "\u0BB7\u0BCA", # TAMIL SYLLABLE SSO
6641
+ "\u0BB7\u0BCB", # TAMIL SYLLABLE SSOO
6642
+ "\u0BB7\u0BCC", # TAMIL SYLLABLE SSAU
6643
+ "\u0BB8\u0BBE", # TAMIL SYLLABLE SAA
6644
+ "\u0BB8\u0BBF", # TAMIL SYLLABLE SI
6645
+ "\u0BB8\u0BC0", # TAMIL SYLLABLE SII
6646
+ "\u0BB8\u0BC1", # TAMIL SYLLABLE SU
6647
+ "\u0BB8\u0BC2", # TAMIL SYLLABLE SUU
6648
+ "\u0BB8\u0BC6", # TAMIL SYLLABLE SE
6649
+ "\u0BB8\u0BC7", # TAMIL SYLLABLE SEE
6650
+ "\u0BB8\u0BC8", # TAMIL SYLLABLE SAI
6651
+ "\u0BB8\u0BCA", # TAMIL SYLLABLE SO
6652
+ "\u0BB8\u0BCB", # TAMIL SYLLABLE SOO
6653
+ "\u0BB8\u0BCC", # TAMIL SYLLABLE SAU
6654
+ "\u0BB9\u0BBE", # TAMIL SYLLABLE HAA
6655
+ "\u0BB9\u0BBF", # TAMIL SYLLABLE HI
6656
+ "\u0BB9\u0BC0", # TAMIL SYLLABLE HII
6657
+ "\u0BB9\u0BC1", # TAMIL SYLLABLE HU
6658
+ "\u0BB9\u0BC2", # TAMIL SYLLABLE HUU
6659
+ "\u0BB9\u0BC6", # TAMIL SYLLABLE HE
6660
+ "\u0BB9\u0BC7", # TAMIL SYLLABLE HEE
6661
+ "\u0BB9\u0BC8", # TAMIL SYLLABLE HAI
6662
+ "\u0BB9\u0BCA", # TAMIL SYLLABLE HO
6663
+ "\u0BB9\u0BCB", # TAMIL SYLLABLE HOO
6664
+ "\u0BB9\u0BCC", # TAMIL SYLLABLE HAU
6665
+ "\u0B95\u0BCD", # TAMIL SYLLABLE KSSA
6666
+ "\u0B95\u0BCD", # TAMIL SYLLABLE KSSAA
6667
+ "\u0B95\u0BCD", # TAMIL SYLLABLE KSSI
6668
+ "\u0B95\u0BCD", # TAMIL SYLLABLE KSSII
6669
+ "\u0B95\u0BCD", # TAMIL SYLLABLE KSSU
6670
+ "\u0B95\u0BCD", # TAMIL SYLLABLE KSSUU
6671
+ "\u0B95\u0BCD", # TAMIL SYLLABLE KSSE
6672
+ "\u0B95\u0BCD", # TAMIL SYLLABLE KSSEE
6673
+ "\u0B95\u0BCD", # TAMIL SYLLABLE KSSAI
6674
+ "\u0B95\u0BCD", # TAMIL SYLLABLE KSSO
6675
+ "\u0B95\u0BCD", # TAMIL SYLLABLE KSSOO
6676
+ "\u0B95\u0BCD", # TAMIL SYLLABLE KSSAU
6677
+ "\u0BB6\u0BCD", # TAMIL SYLLABLE SHRII
6678
+ # Sinhala medial consonants and "reph" form.
6679
+ # Provisional 2010-05-13, Approved 2011-08-05
6680
+ "\u0DCA\u200D", # SINHALA CONSONANT SIGN YANSAYA
6681
+ "\u0DCA\u200D", # SINHALA CONSONANT SIGN RAKAARAANSAYA
6682
+ "\u0DBB\u0DCA", # SINHALA CONSONANT SIGN REPAYA
6683
+ # Georgian letter plus accent sequence.
6684
+ # This is part of the original set of approved named sequences
6685
+ # for Unicode 4.1. 2005.
6686
+ "\u10E3\u0302", # GEORGIAN LETTER U-BRJGU
6687
+ # Khmer subjoined forms and other sequences.
6688
+ # These are part of the original set of approved named sequences
6689
+ # for Unicode 4.1. 2005.
6690
+ "\u17D2\u1780", # KHMER CONSONANT SIGN COENG KA
6691
+ "\u17D2\u1781", # KHMER CONSONANT SIGN COENG KHA
6692
+ "\u17D2\u1782", # KHMER CONSONANT SIGN COENG KO
6693
+ "\u17D2\u1783", # KHMER CONSONANT SIGN COENG KHO
6694
+ "\u17D2\u1784", # KHMER CONSONANT SIGN COENG NGO
6695
+ "\u17D2\u1785", # KHMER CONSONANT SIGN COENG CA
6696
+ "\u17D2\u1786", # KHMER CONSONANT SIGN COENG CHA
6697
+ "\u17D2\u1787", # KHMER CONSONANT SIGN COENG CO
6698
+ "\u17D2\u1788", # KHMER CONSONANT SIGN COENG CHO
6699
+ "\u17D2\u1789", # KHMER CONSONANT SIGN COENG NYO
6700
+ "\u17D2\u178A", # KHMER CONSONANT SIGN COENG DA
6701
+ "\u17D2\u178B", # KHMER CONSONANT SIGN COENG TTHA
6702
+ "\u17D2\u178C", # KHMER CONSONANT SIGN COENG DO
6703
+ "\u17D2\u178D", # KHMER CONSONANT SIGN COENG TTHO
6704
+ "\u17D2\u178E", # KHMER CONSONANT SIGN COENG NA
6705
+ "\u17D2\u178F", # KHMER CONSONANT SIGN COENG TA
6706
+ "\u17D2\u1790", # KHMER CONSONANT SIGN COENG THA
6707
+ "\u17D2\u1791", # KHMER CONSONANT SIGN COENG TO
6708
+ "\u17D2\u1792", # KHMER CONSONANT SIGN COENG THO
6709
+ "\u17D2\u1793", # KHMER CONSONANT SIGN COENG NO
6710
+ "\u17D2\u1794", # KHMER CONSONANT SIGN COENG BA
6711
+ "\u17D2\u1795", # KHMER CONSONANT SIGN COENG PHA
6712
+ "\u17D2\u1796", # KHMER CONSONANT SIGN COENG PO
6713
+ "\u17D2\u1797", # KHMER CONSONANT SIGN COENG PHO
6714
+ "\u17D2\u1798", # KHMER CONSONANT SIGN COENG MO
6715
+ "\u17D2\u1799", # KHMER CONSONANT SIGN COENG YO
6716
+ "\u17D2\u179A", # KHMER CONSONANT SIGN COENG RO
6717
+ "\u17D2\u179B", # KHMER CONSONANT SIGN COENG LO
6718
+ "\u17D2\u179C", # KHMER CONSONANT SIGN COENG VO
6719
+ "\u17D2\u179D", # KHMER CONSONANT SIGN COENG SHA
6720
+ "\u17D2\u179E", # KHMER CONSONANT SIGN COENG SSA
6721
+ "\u17D2\u179F", # KHMER CONSONANT SIGN COENG SA
6722
+ "\u17D2\u17A0", # KHMER CONSONANT SIGN COENG HA
6723
+ "\u17D2\u17A1", # KHMER CONSONANT SIGN COENG LA
6724
+ "\u17D2\u17A2", # KHMER VOWEL SIGN COENG QA
6725
+ "\u17D2\u17A7", # KHMER INDEPENDENT VOWEL SIGN COENG QU
6726
+ "\u17D2\u17AB", # KHMER INDEPENDENT VOWEL SIGN COENG RY
6727
+ "\u17D2\u17AC", # KHMER INDEPENDENT VOWEL SIGN COENG RYY
6728
+ "\u17D2\u17AF", # KHMER INDEPENDENT VOWEL SIGN COENG QE
6729
+ "\u17BB\u17C6", # KHMER VOWEL SIGN OM
6730
+ "\u17B6\u17C6", # KHMER VOWEL SIGN AAM
6731
+ # Entries for JIS X 0213 compatibility mapping.
6732
+ # Provisional 2008-11-07, Approved 2010-05-14
6733
+ #
6734
+ # Two of these were part of the original set of approved named sequences
6735
+ # for Unicode 4.1. 2005.
6736
+ "\u304B\u309A", # HIRAGANA LETTER BIDAKUON NGA
6737
+ "\u304D\u309A", # HIRAGANA LETTER BIDAKUON NGI
6738
+ "\u304F\u309A", # HIRAGANA LETTER BIDAKUON NGU
6739
+ "\u3051\u309A", # HIRAGANA LETTER BIDAKUON NGE
6740
+ "\u3053\u309A", # HIRAGANA LETTER BIDAKUON NGO
6741
+ "\u30AB\u309A", # KATAKANA LETTER BIDAKUON NGA
6742
+ "\u30AD\u309A", # KATAKANA LETTER BIDAKUON NGI
6743
+ "\u30AF\u309A", # KATAKANA LETTER BIDAKUON NGU
6744
+ "\u30B1\u309A", # KATAKANA LETTER BIDAKUON NGE
6745
+ "\u30B3\u309A", # KATAKANA LETTER BIDAKUON NGO
6746
+ "\u30BB\u309A", # KATAKANA LETTER AINU CE
6747
+ "\u30C4\u309A", # KATAKANA LETTER AINU TU
6748
+ "\u30C8\u309A", # KATAKANA LETTER AINU TO
6749
+ "\u31F7\u309A", # KATAKANA LETTER AINU P
6750
+ "\u02E5\u02E9", # MODIFIER LETTER EXTRA-HIGH EXTRA-LOW CONTOUR TONE BAR
6751
+ "\u02E9\u02E5", # MODIFIER LETTER EXTRA-LOW EXTRA-HIGH CONTOUR TONE BAR
6752
+ ]
6753
+ test_data.each do |string|
6754
+ assert @validator.valid_encoding?(string), "_named_sequences A: #{string}"
6755
+ assert string.force_encoding("UTF-8").valid_encoding?,
6756
+ "_named_sequences B: #{string}" if @vercheck
6757
+ end
6758
+ end
6185
6759
 
6760
+ end # of class
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: utf8_validator 1.0.13 ruby lib
5
+ # stub: utf8_validator 1.0.14 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "utf8_validator"
9
- s.version = "1.0.13"
9
+ s.version = "1.0.14"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib"]
13
13
  s.authors = ["Guy Allard"]
14
- s.date = "2016-07-08"
14
+ s.date = "2016-07-13"
15
15
  s.description = "A State Machine implementation of a UTF-8 Encoding \nValidation algorithm."
16
16
  s.email = "allard.guy.m@gmail.com"
17
17
  s.extra_rdoc_files = [
@@ -28,9 +28,12 @@ Gem::Specification.new do |s|
28
28
  "lib/validation/errors.rb",
29
29
  "lib/validation/validator.rb",
30
30
  "test/helper.rb",
31
+ "test/test_code_points.rb",
31
32
  "test/test_raise_request.rb",
33
+ "test/test_surrogate_half_first_point.rb",
32
34
  "test/test_utf8_validator.rb",
33
- "utf8_validator.gemspec"
35
+ "utf8_validator.gemspec",
36
+ "utils/gencp.rb"
34
37
  ]
35
38
  s.homepage = "http://github.com/gmallard/utf8_validator"
36
39
  s.licenses = ["MIT"]