twitter_cldr 1.3.6 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +47 -2
- data/lib/twitter_cldr/core_ext/calendars/datetime.rb +2 -2
- data/lib/twitter_cldr/core_ext/calendars/timespan.rb +11 -13
- data/lib/twitter_cldr/normalizers.rb +3 -0
- data/lib/twitter_cldr/normalizers/base.rb +34 -0
- data/lib/twitter_cldr/normalizers/nfc.rb +24 -0
- data/lib/twitter_cldr/normalizers/nfd.rb +1 -1
- data/lib/twitter_cldr/normalizers/nfkc.rb +126 -0
- data/lib/twitter_cldr/normalizers/nfkd.rb +9 -17
- data/lib/twitter_cldr/shared.rb +1 -1
- data/lib/twitter_cldr/shared/code_point.rb +116 -0
- data/lib/twitter_cldr/tokenizers/base.rb +2 -2
- data/lib/twitter_cldr/utils.rb +8 -0
- data/lib/twitter_cldr/version.rb +1 -1
- data/resources/unicode_data/blocks_hangul.yml +46 -0
- data/resources/unicode_data/composition_exclusions.yml +293 -0
- data/resources/unicode_data/decomposition_map.yml +4565 -0
- data/spec/normalizers/NormalizationTestShort.txt +66 -66
- data/spec/normalizers/base_spec.rb +17 -0
- data/spec/normalizers/normalization_spec.rb +10 -0
- data/spec/readme_spec.rb +26 -1
- data/spec/shared/code_point_spec.rb +152 -0
- data/spec/tokenizers/base_spec.rb +0 -10
- data/spec/utils/{code_point_spec.rb → code_points_spec.rb} +0 -0
- data/spec/utils_spec.rb +10 -0
- metadata +16 -10
- data/lib/twitter_cldr/shared/unicode_data.rb +0 -64
- data/spec/normalizers/nfd_spec.rb +0 -21
- data/spec/shared/unicode_data_spec.rb +0 -51
@@ -509,71 +509,71 @@ FF35;FF35;FF35;0055;0055; # (U; U; U; U; U; ) FULLWIDTH LATIN CAPITAL LETT
|
|
509
509
|
FF95;FF95;FF95;30E6;30E6; # (ユ; ユ; ユ; ユ; ユ; ) HALFWIDTH KATAKANA LETTER YU
|
510
510
|
FFED;FFED;FFED;25A0;25A0; # (■; ■; ■; ■; ■; ) HALFWIDTH BLACK SQUARE
|
511
511
|
FFA5;FFA5;FFA5;11AC;11AC; # (ᆬ; ᆬ; ᆬ; ᆬ; ᆬ; ) HALFWIDTH HANGUL LETTER NIEUN-CIEUC
|
512
|
-
110AB;110AB;110A5 110BA;110AB;110A5 110BA; # (
|
513
|
-
1109C;1109C;1109B 110BA;1109C;1109B 110BA; # (
|
514
|
-
1109A;1109A;11099 110BA;1109A;11099 110BA; # (
|
515
|
-
1112F;1112F;11132 11127;1112F;11132 11127; # (
|
516
|
-
1112E;1112E;11131 11127;1112E;11131 11127; # (
|
517
|
-
1D15F;1D158 1D165;1D158 1D165;1D158 1D165;1D158 1D165; # (
|
518
|
-
1D161;1D158 1D165 1D16F;1D158 1D165 1D16F;1D158 1D165 1D16F;1D158 1D165 1D16F; # (
|
519
|
-
1D1BB;1D1B9 1D165;1D1B9 1D165;1D1B9 1D165;1D1B9 1D165; # (
|
520
|
-
1D160;1D158 1D165 1D16E;1D158 1D165 1D16E;1D158 1D165 1D16E;1D158 1D165 1D16E; # (
|
521
|
-
1D162;1D158 1D165 1D170;1D158 1D165 1D170;1D158 1D165 1D170;1D158 1D165 1D170; # (
|
522
|
-
1D163;1D158 1D165 1D171;1D158 1D165 1D171;1D158 1D165 1D171;1D158 1D165 1D171; # (
|
523
|
-
1D1BC;1D1BA 1D165;1D1BA 1D165;1D1BA 1D165;1D1BA 1D165; # (
|
524
|
-
1D15E;1D157 1D165;1D157 1D165;1D157 1D165;1D157 1D165; # (
|
525
|
-
1D1BE;1D1BA 1D165 1D16E;1D1BA 1D165 1D16E;1D1BA 1D165 1D16E;1D1BA 1D165 1D16E; # (
|
526
|
-
1D164;1D158 1D165 1D172;1D158 1D165 1D172;1D158 1D165 1D172;1D158 1D165 1D172; # (
|
527
|
-
1D50D;1D50D;1D50D;004A;004A; # (
|
528
|
-
1D538;1D538;1D538;0041;0041; # (
|
529
|
-
1D65B;1D65B;1D65B;0066;0066; # (
|
530
|
-
1D73E;1D73E;1D73E;03B9;03B9; # (
|
531
|
-
1D57F;1D57F;1D57F;0054;0054; # (
|
532
|
-
1D6FE;1D6FE;1D6FE;03B3;03B3; # (
|
533
|
-
1D78A;1D78A;1D78A;03B5;03B5; # (
|
534
|
-
1D7F3;1D7F3;1D7F3;0037;0037; # (
|
535
|
-
1D778;1D778;1D778;03B9;03B9; # (
|
536
|
-
1D512;1D512;1D512;004F;004F; # (
|
537
|
-
1EE27;1EE27;1EE27;062D;062D; # (
|
538
|
-
1EE89;1EE89;1EE89;064A;064A; # (
|
539
|
-
1EE08;1EE08;1EE08;0637;0637; # (
|
540
|
-
1EEB0;1EEB0;1EEB0;0641;0641; # (
|
541
|
-
1EEBA;1EEBA;1EEBA;0638;0638; # (
|
542
|
-
1EE11;1EE11;1EE11;0635;0635; # (
|
543
|
-
1EE98;1EE98;1EE98;0630;0630; # (
|
544
|
-
1EEA2;1EEA2;1EEA2;062C;062C; # (
|
545
|
-
1EE4D;1EE4D;1EE4D;0646;0646; # (
|
546
|
-
1EE4F;1EE4F;1EE4F;0639;0639; # (
|
547
|
-
1F132;1F132;1F132;0043;0043; # (
|
548
|
-
1F13E;1F13E;1F13E;004F;004F; # (
|
549
|
-
1F11C;1F11C;1F11C;0028 004D 0029;0028 004D 0029; # (
|
550
|
-
1F102;1F102;1F102;0031 002C;0031 002C; # (
|
551
|
-
1F16B;1F16B;1F16B;004D 0044;004D 0044; # (
|
552
|
-
1F11D;1F11D;1F11D;0028 004E 0029;0028 004E 0029; # (
|
553
|
-
1F146;1F146;1F146;0057;0057; # (
|
554
|
-
1F107;1F107;1F107;0036 002C;0036 002C; # (
|
555
|
-
1F145;1F145;1F145;0056;0056; # (
|
556
|
-
1F112;1F112;1F112;0028 0043 0029;0028 0043 0029; # (
|
557
|
-
1F231;1F231;1F231;6253;6253; # (
|
558
|
-
1F22B;1F22B;1F22B;904A;904A; # (
|
559
|
-
1F243;1F243;1F243;3014 5B89 3015;3014 5B89 3015; # (
|
560
|
-
1F234;1F234;1F234;5408;5408; # (
|
561
|
-
1F238;1F238;1F238;7533;7533; # (
|
562
|
-
1F247;1F247;1F247;3014 52DD 3015;3014 52DD 3015; # (
|
563
|
-
1F217;1F217;1F217;5929;5929; # (
|
564
|
-
1F248;1F248;1F248;3014 6557 3015;3014 6557 3015; # (
|
565
|
-
1F224;1F224;1F224;58F0;58F0; # (
|
566
|
-
1F213;1F213;1F213;30C7;30C6 3099; # (
|
567
|
-
2F984;440B;440B;440B;440B; # (
|
568
|
-
2F9A4;26C36;26C36;26C36;26C36; # (
|
569
|
-
2F910;23F5E;23F5E;23F5E;23F5E; # (
|
570
|
-
2F9F7;2921A;2921A;2921A;2921A; # (
|
571
|
-
2F97F;8070;8070;8070;8070; # (
|
572
|
-
2F954;2569A;2569A;2569A;2569A; # (
|
573
|
-
2F96C;7D63;7D63;7D63;7D63; # (
|
574
|
-
2FA1B;9F16;9F16;9F16;9F16; # (
|
575
|
-
2F92D;3EB8;3EB8;3EB8;3EB8; # (
|
576
|
-
2F9C7;88DE;88DE;88DE;88DE; # (
|
512
|
+
110AB;110AB;110A5 110BA;110AB;110A5 110BA; # (Ⴋ; Ⴋ; Ⴅ◌Ⴚ; Ⴋ; Ⴅ◌Ⴚ; ) KAITHI LETTER VA
|
513
|
+
1109C;1109C;1109B 110BA;1109C;1109B 110BA; # (ႜ; ႜ; ႛ◌Ⴚ; ႜ; ႛ◌Ⴚ; ) KAITHI LETTER RHA
|
514
|
+
1109A;1109A;11099 110BA;1109A;11099 110BA; # (ႚ; ႚ; ႙◌Ⴚ; ႚ; ႙◌Ⴚ; ) KAITHI LETTER DDDHA
|
515
|
+
1112F;1112F;11132 11127;1112F;11132 11127; # (◌ᄯ; ◌ᄯ; ◌ᄲ◌ᄧ; ◌ᄯ; ◌ᄲ◌ᄧ; ) CHAKMA VOWEL SIGN AU
|
516
|
+
1112E;1112E;11131 11127;1112E;11131 11127; # (◌ᄮ; ◌ᄮ; ◌ᄱ◌ᄧ; ◌ᄮ; ◌ᄱ◌ᄧ; ) CHAKMA VOWEL SIGN O
|
517
|
+
1D15F;1D158 1D165;1D158 1D165;1D158 1D165;1D158 1D165; # (텟; 텘텥; 텘텥; 텘텥; 텘텥; ) MUSICAL SYMBOL QUARTER NOTE
|
518
|
+
1D161;1D158 1D165 1D16F;1D158 1D165 1D16F;1D158 1D165 1D16F;1D158 1D165 1D16F; # (텡; 텘텥텯; 텘텥텯; 텘텥텯; 텘텥텯; ) MUSICAL SYMBOL SIXTEENTH NOTE
|
519
|
+
1D1BB;1D1B9 1D165;1D1B9 1D165;1D1B9 1D165;1D1B9 1D165; # (톻; 톹텥; 톹텥; 톹텥; 톹텥; ) MUSICAL SYMBOL MINIMA
|
520
|
+
1D160;1D158 1D165 1D16E;1D158 1D165 1D16E;1D158 1D165 1D16E;1D158 1D165 1D16E; # (텠; 텘텥텮; 텘텥텮; 텘텥텮; 텘텥텮; ) MUSICAL SYMBOL EIGHTH NOTE
|
521
|
+
1D162;1D158 1D165 1D170;1D158 1D165 1D170;1D158 1D165 1D170;1D158 1D165 1D170; # (텢; 텘텥텰; 텘텥텰; 텘텥텰; 텘텥텰; ) MUSICAL SYMBOL THIRTY-SECOND NOTE
|
522
|
+
1D163;1D158 1D165 1D171;1D158 1D165 1D171;1D158 1D165 1D171;1D158 1D165 1D171; # (텣; 텘텥텱; 텘텥텱; 텘텥텱; 텘텥텱; ) MUSICAL SYMBOL SIXTY-FOURTH NOTE
|
523
|
+
1D1BC;1D1BA 1D165;1D1BA 1D165;1D1BA 1D165;1D1BA 1D165; # (톼; 톺텥; 톺텥; 톺텥; 톺텥; ) MUSICAL SYMBOL MINIMA BLACK
|
524
|
+
1D15E;1D157 1D165;1D157 1D165;1D157 1D165;1D157 1D165; # (텞; 텗텥; 텗텥; 텗텥; 텗텥; ) MUSICAL SYMBOL HALF NOTE
|
525
|
+
1D1BE;1D1BA 1D165 1D16E;1D1BA 1D165 1D16E;1D1BA 1D165 1D16E;1D1BA 1D165 1D16E; # (톾; 톺텥텮; 톺텥텮; 톺텥텮; 톺텥텮; ) MUSICAL SYMBOL SEMIMINIMA BLACK
|
526
|
+
1D164;1D158 1D165 1D172;1D158 1D165 1D172;1D158 1D165 1D172;1D158 1D165 1D172; # (텤; 텘텥텲; 텘텥텲; 텘텥텲; 텘텥텲; ) MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
|
527
|
+
1D50D;1D50D;1D50D;004A;004A; # (픍; 픍; 픍; J; J; ) MATHEMATICAL FRAKTUR CAPITAL J
|
528
|
+
1D538;1D538;1D538;0041;0041; # (픸; 픸; 픸; A; A; ) MATHEMATICAL DOUBLE-STRUCK CAPITAL A
|
529
|
+
1D65B;1D65B;1D65B;0066;0066; # (홛; 홛; 홛; f; f; ) MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL F
|
530
|
+
1D73E;1D73E;1D73E;03B9;03B9; # (휾; 휾; 휾; ι; ι; ) MATHEMATICAL BOLD ITALIC SMALL IOTA
|
531
|
+
1D57F;1D57F;1D57F;0054;0054; # (핿; 핿; 핿; T; T; ) MATHEMATICAL BOLD FRAKTUR CAPITAL T
|
532
|
+
1D6FE;1D6FE;1D6FE;03B3;03B3; # (훾; 훾; 훾; γ; γ; ) MATHEMATICAL ITALIC SMALL GAMMA
|
533
|
+
1D78A;1D78A;1D78A;03B5;03B5; # (힊; 힊; 힊; ε; ε; ) MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL
|
534
|
+
1D7F3;1D7F3;1D7F3;0037;0037; # (ퟳ; ퟳ; ퟳ; 7; 7; ) MATHEMATICAL SANS-SERIF BOLD DIGIT SEVEN
|
535
|
+
1D778;1D778;1D778;03B9;03B9; # (흸; 흸; 흸; ι; ι; ) MATHEMATICAL SANS-SERIF BOLD SMALL IOTA
|
536
|
+
1D512;1D512;1D512;004F;004F; # (픒; 픒; 픒; O; O; ) MATHEMATICAL FRAKTUR CAPITAL O
|
537
|
+
1EE27;1EE27;1EE27;062D;062D; # (; ; ; ح; ح; ) ARABIC MATHEMATICAL INITIAL HAH
|
538
|
+
1EE89;1EE89;1EE89;064A;064A; # (; ; ; ي; ي; ) ARABIC MATHEMATICAL LOOPED YEH
|
539
|
+
1EE08;1EE08;1EE08;0637;0637; # (; ; ; ط; ط; ) ARABIC MATHEMATICAL TAH
|
540
|
+
1EEB0;1EEB0;1EEB0;0641;0641; # (; ; ; ف; ف; ) ARABIC MATHEMATICAL DOUBLE-STRUCK FEH
|
541
|
+
1EEBA;1EEBA;1EEBA;0638;0638; # (; ; ; ظ; ظ; ) ARABIC MATHEMATICAL DOUBLE-STRUCK ZAH
|
542
|
+
1EE11;1EE11;1EE11;0635;0635; # (; ; ; ص; ص; ) ARABIC MATHEMATICAL SAD
|
543
|
+
1EE98;1EE98;1EE98;0630;0630; # (; ; ; ذ; ذ; ) ARABIC MATHEMATICAL LOOPED THAL
|
544
|
+
1EEA2;1EEA2;1EEA2;062C;062C; # (; ; ; ج; ج; ) ARABIC MATHEMATICAL DOUBLE-STRUCK JEEM
|
545
|
+
1EE4D;1EE4D;1EE4D;0646;0646; # (; ; ; ن; ن; ) ARABIC MATHEMATICAL TAILED NOON
|
546
|
+
1EE4F;1EE4F;1EE4F;0639;0639; # (; ; ; ع; ع; ) ARABIC MATHEMATICAL TAILED AIN
|
547
|
+
1F132;1F132;1F132;0043;0043; # (; ; ; C; C; ) SQUARED LATIN CAPITAL LETTER C
|
548
|
+
1F13E;1F13E;1F13E;004F;004F; # (; ; ; O; O; ) SQUARED LATIN CAPITAL LETTER O
|
549
|
+
1F11C;1F11C;1F11C;0028 004D 0029;0028 004D 0029; # (; ; ; (M); (M); ) PARENTHESIZED LATIN CAPITAL LETTER M
|
550
|
+
1F102;1F102;1F102;0031 002C;0031 002C; # (; ; ; 1,; 1,; ) DIGIT ONE COMMA
|
551
|
+
1F16B;1F16B;1F16B;004D 0044;004D 0044; # (; ; ; MD; MD; ) RAISED MD SIGN
|
552
|
+
1F11D;1F11D;1F11D;0028 004E 0029;0028 004E 0029; # (; ; ; (N); (N); ) PARENTHESIZED LATIN CAPITAL LETTER N
|
553
|
+
1F146;1F146;1F146;0057;0057; # (; ; ; W; W; ) SQUARED LATIN CAPITAL LETTER W
|
554
|
+
1F107;1F107;1F107;0036 002C;0036 002C; # (; ; ; 6,; 6,; ) DIGIT SIX COMMA
|
555
|
+
1F145;1F145;1F145;0056;0056; # (; ; ; V; V; ) SQUARED LATIN CAPITAL LETTER V
|
556
|
+
1F112;1F112;1F112;0028 0043 0029;0028 0043 0029; # (; ; ; (C); (C); ) PARENTHESIZED LATIN CAPITAL LETTER C
|
557
|
+
1F231;1F231;1F231;6253;6253; # (; ; ; 打; 打; ) SQUARED CJK UNIFIED IDEOGRAPH-6253
|
558
|
+
1F22B;1F22B;1F22B;904A;904A; # (; ; ; 遊; 遊; ) SQUARED CJK UNIFIED IDEOGRAPH-904A
|
559
|
+
1F243;1F243;1F243;3014 5B89 3015;3014 5B89 3015; # (; ; ; 〔安〕; 〔安〕; ) TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-5B89
|
560
|
+
1F234;1F234;1F234;5408;5408; # (; ; ; 合; 合; ) SQUARED CJK UNIFIED IDEOGRAPH-5408
|
561
|
+
1F238;1F238;1F238;7533;7533; # (; ; ; 申; 申; ) SQUARED CJK UNIFIED IDEOGRAPH-7533
|
562
|
+
1F247;1F247;1F247;3014 52DD 3015;3014 52DD 3015; # (; ; ; 〔勝〕; 〔勝〕; ) TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-52DD
|
563
|
+
1F217;1F217;1F217;5929;5929; # (; ; ; 天; 天; ) SQUARED CJK UNIFIED IDEOGRAPH-5929
|
564
|
+
1F248;1F248;1F248;3014 6557 3015;3014 6557 3015; # (; ; ; 〔敗〕; 〔敗〕; ) TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
|
565
|
+
1F224;1F224;1F224;58F0;58F0; # (; ; ; 声; 声; ) SQUARED CJK UNIFIED IDEOGRAPH-58F0
|
566
|
+
1F213;1F213;1F213;30C7;30C6 3099; # (; ; ; デ; テ◌゙; ) SQUARED KATAKANA DE
|
567
|
+
2F984;440B;440B;440B;440B; # (濾; 䐋; 䐋; 䐋; 䐋; ) CJK COMPATIBILITY IDEOGRAPH-2F984
|
568
|
+
2F9A4;26C36;26C36;26C36;26C36; # (捻; 氶; 氶; 氶; 氶; ) CJK COMPATIBILITY IDEOGRAPH-2F9A4
|
569
|
+
2F910;23F5E;23F5E;23F5E;23F5E; # (蘿; 㽞; 㽞; 㽞; 㽞; ) CJK COMPATIBILITY IDEOGRAPH-2F910
|
570
|
+
2F9F7;2921A;2921A;2921A;2921A; # (立; 鈚; 鈚; 鈚; 鈚; ) CJK COMPATIBILITY IDEOGRAPH-2F9F7
|
571
|
+
2F97F;8070;8070;8070;8070; # (勵; 聰; 聰; 聰; 聰; ) CJK COMPATIBILITY IDEOGRAPH-2F97F
|
572
|
+
2F954;2569A;2569A;2569A;2569A; # (凜; 嚚; 嚚; 嚚; 嚚; ) CJK COMPATIBILITY IDEOGRAPH-2F954
|
573
|
+
2F96C;7D63;7D63;7D63;7D63; # (塞; 絣; 絣; 絣; 絣; ) CJK COMPATIBILITY IDEOGRAPH-2F96C
|
574
|
+
2FA1B;9F16;9F16;9F16;9F16; # (福; 鼖; 鼖; 鼖; 鼖; ) CJK COMPATIBILITY IDEOGRAPH-2FA1B
|
575
|
+
2F92D;3EB8;3EB8;3EB8;3EB8; # (來; 㺸; 㺸; 㺸; 㺸; ) CJK COMPATIBILITY IDEOGRAPH-2F92D
|
576
|
+
2F9C7;88DE;88DE;88DE;88DE; # (劉; 裞; 裞; 裞; 裞; ) CJK COMPATIBILITY IDEOGRAPH-2F9C7
|
577
577
|
#
|
578
578
|
@Part2 # Canonical Order Test
|
579
579
|
#
|
@@ -582,7 +582,7 @@ FFA5;FFA5;FFA5;11AC;11AC; # (ᆬ; ᆬ; ᆬ; ᆬ; ᆬ; ) HALFWIDTH HANGUL LETTER
|
|
582
582
|
0061 1DC5 0315 0300 05AE 0062;0061 05AE 1DC5 0300 0315 0062;0061 05AE 1DC5 0300 0315 0062;0061 05AE 1DC5 0300 0315 0062;0061 05AE 1DC5 0300 0315 0062; # (a◌᷅◌̕◌̀◌֮b; a◌֮◌᷅◌̀◌̕b; a◌֮◌᷅◌̀◌̕b; a◌֮◌᷅◌̀◌̕b; a◌֮◌᷅◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING GRAVE-MACRON, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B
|
583
583
|
0061 FE20 0315 0300 05AE 0062;0061 05AE FE20 0300 0315 0062;0061 05AE FE20 0300 0315 0062;0061 05AE FE20 0300 0315 0062;0061 05AE FE20 0300 0315 0062; # (a◌︠◌̕◌̀◌֮b; a◌֮◌︠◌̀◌̕b; a◌֮◌︠◌̀◌̕b; a◌֮◌︠◌̀◌̕b; a◌֮◌︠◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING LIGATURE LEFT HALF, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B
|
584
584
|
0061 0315 0300 05AE 0658 0062;00E0 05AE 0658 0315 0062;0061 05AE 0300 0658 0315 0062;00E0 05AE 0658 0315 0062;0061 05AE 0300 0658 0315 0062; # (a◌̕◌̀◌֮◌٘b; à◌֮◌٘◌̕b; a◌֮◌̀◌٘◌̕b; à◌֮◌٘◌̕b; a◌֮◌̀◌٘◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, ARABIC MARK NOON GHUNNA, LATIN SMALL LETTER B
|
585
|
-
0061 1D182 059A 0316 302A 0062;0061 302A 1D182 0316 059A 0062;0061 302A 1D182 0316 059A 0062;0061 302A 1D182 0316 059A 0062;0061 302A 1D182 0316 059A 0062; # (a
|
585
|
+
0061 1D182 059A 0316 302A 0062;0061 302A 1D182 0316 059A 0062;0061 302A 1D182 0316 059A 0062;0061 302A 1D182 0316 059A 0062;0061 302A 1D182 0316 059A 0062; # (a◌톂◌֚◌̖◌〪b; a◌〪◌톂◌̖◌֚b; a◌〪◌톂◌̖◌֚b; a◌〪◌톂◌̖◌֚b; a◌〪◌톂◌̖◌֚b; ) LATIN SMALL LETTER A, MUSICAL SYMBOL COMBINING LOURE, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, IDEOGRAPHIC LEVEL TONE MARK, LATIN SMALL LETTER B
|
586
586
|
0061 0315 0300 05AE 1DFE 0062;00E0 05AE 1DFE 0315 0062;0061 05AE 0300 1DFE 0315 0062;00E0 05AE 1DFE 0315 0062;0061 05AE 0300 1DFE 0315 0062; # (a◌̕◌̀◌֮◌᷾b; à◌֮◌᷾◌̕b; a◌֮◌̀◌᷾◌̕b; à◌֮◌᷾◌̕b; a◌֮◌̀◌᷾◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING LEFT ARROWHEAD ABOVE, LATIN SMALL LETTER B
|
587
587
|
0061 0315 0300 05AE A679 0062;00E0 05AE A679 0315 0062;0061 05AE 0300 A679 0315 0062;00E0 05AE A679 0315 0062;0061 05AE 0300 A679 0315 0062; # (a◌̕◌̀◌֮◌ꙹb; à◌֮◌ꙹ◌̕b; a◌֮◌̀◌ꙹ◌̕b; à◌֮◌ꙹ◌̕b; a◌֮◌̀◌ꙹ◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING CYRILLIC LETTER YERU, LATIN SMALL LETTER B
|
588
588
|
0061 0315 0300 05AE 0F87 0062;00E0 05AE 0F87 0315 0062;0061 05AE 0300 0F87 0315 0062;00E0 05AE 0F87 0315 0062;0061 05AE 0300 0F87 0315 0062; # (a◌̕◌̀◌֮◌྇b; à◌֮◌྇◌̕b; a◌֮◌̀◌྇◌̕b; à◌֮◌྇◌̕b; a◌֮◌̀◌྇◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, TIBETAN SIGN YANG RTAGS, LATIN SMALL LETTER B
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require 'spec_helper'
|
7
|
+
|
8
|
+
include TwitterCldr::Normalizers
|
9
|
+
|
10
|
+
describe Base do
|
11
|
+
describe "#combining_class_for" do
|
12
|
+
it "returns the correct combining class for select code points" do
|
13
|
+
Base.combining_class_for("0303").should == 230 # combining tilde
|
14
|
+
Base.combining_class_for("006E").should == 0 # latin letter n
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -38,6 +38,16 @@ describe 'Unicode Normalization Algorithms' do
|
|
38
38
|
it_behaves_like 'a normalization algorithm'
|
39
39
|
end
|
40
40
|
|
41
|
+
describe NFC do
|
42
|
+
let(:invariants) { { 2 => [1, 2, 3], 4 => [4, 5] } }
|
43
|
+
it_behaves_like 'a normalization algorithm'
|
44
|
+
end
|
45
|
+
|
46
|
+
describe NFKC do
|
47
|
+
let(:invariants) { { 4 => [1, 2, 3, 4, 5] } }
|
48
|
+
it_behaves_like 'a normalization algorithm'
|
49
|
+
end
|
50
|
+
|
41
51
|
# Runs standard Unicode normalization tests from `file_path` for a given `normalizer`. Expected invariants are
|
42
52
|
# specified via `invariants` hash.
|
43
53
|
#
|
data/spec/readme_spec.rb
CHANGED
@@ -74,6 +74,31 @@ describe "README" do
|
|
74
74
|
dt.to_short_s.should == "12/12/11 21:44"
|
75
75
|
end
|
76
76
|
|
77
|
+
it "verifies relative time spans" do
|
78
|
+
(DateTime.now - 1).localize.ago.should == "1 day ago"
|
79
|
+
(DateTime.now - 0.5).localize.ago.should == "12 hours ago" # (i.e. half a day)
|
80
|
+
|
81
|
+
(DateTime.now + 1).localize.until.should == "In 1 day"
|
82
|
+
(DateTime.now + 0.5).localize.until.should == "In 12 hours"
|
83
|
+
|
84
|
+
(DateTime.now - 1).localize(:de).ago.should == "Vor 1 Tag"
|
85
|
+
(DateTime.now + 1).localize(:de).until.should == "In 1 Tag"
|
86
|
+
|
87
|
+
(DateTime.now - 1).localize(:de).ago(:unit => :hour).should == "Vor 24 Stunden"
|
88
|
+
(DateTime.now + 1).localize(:de).until(:unit => :hour).should == "In 24 Stunden"
|
89
|
+
|
90
|
+
# 86400 = 1 day in seconds, 259200 = 3 days in seconds
|
91
|
+
(Time.now + 86400).localize(:de).ago(:unit => :hour, :base_time => (Time.now + 259200)).should == "Vor 48 Stunden"
|
92
|
+
|
93
|
+
ts = TwitterCldr::LocalizedTimespan.new(86400, :de)
|
94
|
+
ts.to_s.should == "In 1 Tag"
|
95
|
+
ts.to_s(:hour).should == "In 24 Stunden"
|
96
|
+
|
97
|
+
ts = TwitterCldr::LocalizedTimespan.new(-86400, :de)
|
98
|
+
ts.to_s.should == "Vor 1 Tag"
|
99
|
+
ts.to_s(:hour).should == "Vor 24 Stunden"
|
100
|
+
end
|
101
|
+
|
77
102
|
it "verifies plural rules" do
|
78
103
|
1.localize(:ru).plural_rule.should == :one
|
79
104
|
2.localize(:ru).plural_rule.should == :few
|
@@ -149,7 +174,7 @@ describe "README" do
|
|
149
174
|
end
|
150
175
|
|
151
176
|
it "verifies code point conversions" do
|
152
|
-
code_point = TwitterCldr::Shared::
|
177
|
+
code_point = TwitterCldr::Shared::CodePoint.for_hex("1F3E9")
|
153
178
|
code_point.name.should == "LOVE HOTEL"
|
154
179
|
code_point.bidi_mirrored.should == "N"
|
155
180
|
code_point.category.should == "So"
|
@@ -0,0 +1,152 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require 'spec_helper'
|
7
|
+
|
8
|
+
include TwitterCldr::Shared
|
9
|
+
|
10
|
+
describe CodePoint do
|
11
|
+
describe "#for_hex" do
|
12
|
+
it "should retrieve information for any valid code point" do
|
13
|
+
data = CodePoint.for_hex('0301')
|
14
|
+
data.should be_a(CodePoint)
|
15
|
+
data.values.length.should == 15
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should return nil for invalid code points" do
|
19
|
+
CodePoint.for_hex('abcd').should be_nil
|
20
|
+
CodePoint.for_hex('FFFFFFF').should be_nil
|
21
|
+
CodePoint.for_hex('uytukhil123').should be_nil
|
22
|
+
end
|
23
|
+
|
24
|
+
it "fetches valid information for the specified code point" do
|
25
|
+
test_data = {
|
26
|
+
'17D1' => ['17D1','KHMER SIGN VIRIAM','Mn','0','NSM',"","","","",'N',"","","","",""],
|
27
|
+
'FE91' => ['FE91','ARABIC LETTER BEH INITIAL FORM','Lo','0','AL','<initial> 0628',"","","",'N','GLYPH FOR INITIAL ARABIC BAA',"","","",""],
|
28
|
+
'24B5' => ['24B5','PARENTHESIZED LATIN SMALL LETTER Z','So','0','L','<compat> 0028 007A 0029',"","","",'N',"","","","",""],
|
29
|
+
'2128' => ['2128','BLACK-LETTER CAPITAL Z','Lu','0','L','<font> 005A',"","","",'N','BLACK-LETTER Z',"","","",""],
|
30
|
+
'1F241'=> ['1F241','TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-4E09','So','0','L','<compat> 3014 4E09 3015',"","","",'N',"","","","",""]
|
31
|
+
}
|
32
|
+
test_data.each_pair do |code_point, data|
|
33
|
+
cp_data = CodePoint.for_hex(code_point)
|
34
|
+
cp_data.code_point.should == data[0]
|
35
|
+
cp_data.name.should == data[1]
|
36
|
+
cp_data.category.should == data[2]
|
37
|
+
cp_data.combining_class.should == data[3]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
it "fetches valid information for a code point within a range" do
|
42
|
+
test_data = {
|
43
|
+
'4E11' => ["4E11","<CJK Ideograph>","Lo","0","L","","","","","N","","","","",""],
|
44
|
+
'AC55' => ["AC55","<Hangul Syllable>","Lo","0","L","","","","","N","","","","",""],
|
45
|
+
'D7A1' => ["D7A1","<Hangul Syllable>","Lo","0","L","","","","","N","","","","",""],
|
46
|
+
'DAAA' => ["DAAA","<Non Private Use High Surrogate>","Cs","0","L","","","","","N","","","","",""],
|
47
|
+
'F8FE' => ["F8FE","<Private Use>","Co","0","L","","","","","N","","","","",""]
|
48
|
+
}
|
49
|
+
|
50
|
+
test_data.each_pair do |code_point, data|
|
51
|
+
cp_data = CodePoint.for_hex(code_point)
|
52
|
+
cp_data.code_point.should == data[0]
|
53
|
+
cp_data.name.should == data[1]
|
54
|
+
cp_data.category.should == data[2]
|
55
|
+
cp_data.combining_class.should == data[3]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
describe "#for_decomposition" do
|
61
|
+
let(:decomp_map) { { :"YYYY ZZZZ" => "0ABC" } }
|
62
|
+
|
63
|
+
before(:each) do
|
64
|
+
# clear the decomposition map after each test so mocks/stubs work
|
65
|
+
CodePoint.instance_variable_set(:@decomposition_map, nil)
|
66
|
+
stub(CodePoint).for_hex { |code_point| "I'm code point #{code_point}" }
|
67
|
+
end
|
68
|
+
|
69
|
+
after(:each) do
|
70
|
+
# clear the decomposition map after each test so mocks/stubs work
|
71
|
+
CodePoint.instance_variable_set(:@decomposition_map, nil)
|
72
|
+
end
|
73
|
+
|
74
|
+
context "with a stubbed decomposition map" do
|
75
|
+
before(:each) do
|
76
|
+
stub(TwitterCldr).get_resource(:unicode_data, :decomposition_map) { decomp_map }
|
77
|
+
end
|
78
|
+
|
79
|
+
it "should return a code point with the correct value" do
|
80
|
+
CodePoint.for_decomposition(["YYYY", "ZZZZ"]).should == "I'm code point 0ABC"
|
81
|
+
end
|
82
|
+
|
83
|
+
it "should return nil if no decomposition mapping exists" do
|
84
|
+
CodePoint.for_decomposition(["NO"]).should be_nil
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
it "should cache the decomposition map" do
|
89
|
+
mock(TwitterCldr).get_resource(:unicode_data, :decomposition_map) { decomp_map }.once
|
90
|
+
CodePoint.for_decomposition(["NO"]).should be_nil
|
91
|
+
CodePoint.for_decomposition(["NO"]).should be_nil
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
describe "#hangul_type" do
|
96
|
+
before(:each) do
|
97
|
+
stub(CodePoint).hangul_blocks { { :lparts => [1..10],
|
98
|
+
:vparts => [21..30],
|
99
|
+
:tparts => [41..50],
|
100
|
+
:compositions => [1..30],
|
101
|
+
:decompositions => [31..50] } }
|
102
|
+
end
|
103
|
+
|
104
|
+
it "returns nil if not part of a hangul block" do
|
105
|
+
CodePoint.hangul_type(100.to_s(16)).should == nil
|
106
|
+
end
|
107
|
+
|
108
|
+
it "returns the correct part (i.e. lpart, vpart, or tpart) before composition or decomposition" do
|
109
|
+
CodePoint.hangul_type(5.to_s(16)).should == :lparts
|
110
|
+
CodePoint.hangul_type(30.to_s(16)).should == :vparts
|
111
|
+
CodePoint.hangul_type(41.to_s(16)).should == :tparts
|
112
|
+
end
|
113
|
+
|
114
|
+
it "returns composition or decomposition if no part can be found" do
|
115
|
+
CodePoint.hangul_type(11.to_s(16)).should == :compositions
|
116
|
+
CodePoint.hangul_type(40.to_s(16)).should == :decompositions
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
describe "#excluded_from_composition?" do
|
121
|
+
it "excludes anything in the list of ranges" do
|
122
|
+
stub(CodePoint).composition_exclusions { [10..10, 13..14, 20..30] }
|
123
|
+
CodePoint.excluded_from_composition?(10.to_s(16)).should be_true
|
124
|
+
CodePoint.excluded_from_composition?(13.to_s(16)).should be_true
|
125
|
+
CodePoint.excluded_from_composition?(14.to_s(16)).should be_true
|
126
|
+
CodePoint.excluded_from_composition?(15.to_s(16)).should be_false
|
127
|
+
CodePoint.excluded_from_composition?(19.to_s(16)).should be_false
|
128
|
+
CodePoint.excluded_from_composition?(100.to_s(16)).should be_false
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
describe "#get_block" do
|
133
|
+
it "finds the block that corresponds to the code point" do
|
134
|
+
stub(TwitterCldr).get_resource(:unicode_data, :blocks) { [[:klingon, 122..307], [:hirogen, 1337..2200]] }
|
135
|
+
CodePoint.send(:get_block, 200.to_s(16)).should == [:klingon, 122..307]
|
136
|
+
CodePoint.send(:get_block, 2199.to_s(16)).should == [:hirogen, 1337..2200]
|
137
|
+
CodePoint.send(:get_block, 100.to_s(16)).should be_nil
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
describe "#get_range_start" do
|
142
|
+
it "returns the data for a non-explicit range" do
|
143
|
+
block_data = { "0" => ["1337", "<CJK Ideograph Extension A, First>"] }
|
144
|
+
CodePoint.send(:get_range_start, "ABC", block_data).should == ["ABC", "<CJK Ideograph Extension A>"]
|
145
|
+
end
|
146
|
+
|
147
|
+
it "returns nil if the block data doesn't contain a non-explicit range" do
|
148
|
+
block_data = { "0" => ["1337", "<CJK Ideograph Extension A>"] }
|
149
|
+
CodePoint.send(:get_range_start, "ABC", block_data).should == nil
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
@@ -147,16 +147,6 @@ describe Base do
|
|
147
147
|
end
|
148
148
|
end
|
149
149
|
|
150
|
-
describe "#compute_cache_key" do
|
151
|
-
it "returns a ruby hash of all the pieces concatenated with pipe characters" do
|
152
|
-
@base.send(:compute_cache_key, "space", "the", "final", "frontier").should == "space|the|final|frontier".hash
|
153
|
-
end
|
154
|
-
|
155
|
-
it "returns zero if no arguments are passed" do
|
156
|
-
@base.send(:compute_cache_key).should == 0
|
157
|
-
end
|
158
|
-
end
|
159
|
-
|
160
150
|
describe "#traverse" do
|
161
151
|
before(:each) do
|
162
152
|
@tree = { :admiral => { :captain => { :commander => { :lieutenant => "Found Me!" } } } }
|
File without changes
|
data/spec/utils_spec.rb
CHANGED
@@ -29,4 +29,14 @@ describe TwitterCldr::Utils do
|
|
29
29
|
end
|
30
30
|
|
31
31
|
end
|
32
|
+
|
33
|
+
describe "#compute_cache_key" do
|
34
|
+
it "returns a ruby hash of all the pieces concatenated with pipe characters" do
|
35
|
+
TwitterCldr::Utils.compute_cache_key("space", "the", "final", "frontier").should == "space|the|final|frontier".hash
|
36
|
+
end
|
37
|
+
|
38
|
+
it "returns zero if no arguments are passed" do
|
39
|
+
TwitterCldr::Utils.compute_cache_key.should == 0
|
40
|
+
end
|
41
|
+
end
|
32
42
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter_cldr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 7
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 1.
|
8
|
+
- 4
|
9
|
+
- 0
|
10
|
+
version: 1.4.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Cameron Dutro
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-06-
|
18
|
+
date: 2012-06-15 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: json
|
@@ -105,7 +105,7 @@ dependencies:
|
|
105
105
|
requirements:
|
106
106
|
- - ~>
|
107
107
|
- !ruby/object:Gem::Version
|
108
|
-
hash:
|
108
|
+
hash: 3424229193
|
109
109
|
segments:
|
110
110
|
- 0
|
111
111
|
- 9
|
@@ -221,16 +221,19 @@ files:
|
|
221
221
|
- lib/twitter_cldr/formatters/plurals/rules.rb
|
222
222
|
- lib/twitter_cldr/formatters/plurals.rb
|
223
223
|
- lib/twitter_cldr/formatters.rb
|
224
|
+
- lib/twitter_cldr/normalizers/base.rb
|
225
|
+
- lib/twitter_cldr/normalizers/nfc.rb
|
224
226
|
- lib/twitter_cldr/normalizers/nfd.rb
|
227
|
+
- lib/twitter_cldr/normalizers/nfkc.rb
|
225
228
|
- lib/twitter_cldr/normalizers/nfkd.rb
|
226
229
|
- lib/twitter_cldr/normalizers.rb
|
227
230
|
- lib/twitter_cldr/shared/calendar.rb
|
231
|
+
- lib/twitter_cldr/shared/code_point.rb
|
228
232
|
- lib/twitter_cldr/shared/currencies.rb
|
229
233
|
- lib/twitter_cldr/shared/languages.rb
|
230
234
|
- lib/twitter_cldr/shared/numbers.rb
|
231
235
|
- lib/twitter_cldr/shared/resources.rb
|
232
236
|
- lib/twitter_cldr/shared/timezones.rb
|
233
|
-
- lib/twitter_cldr/shared/unicode_data.rb
|
234
237
|
- lib/twitter_cldr/shared.rb
|
235
238
|
- lib/twitter_cldr/tokenizers/base.rb
|
236
239
|
- lib/twitter_cldr/tokenizers/calendars/date_tokenizer.rb
|
@@ -267,17 +270,17 @@ files:
|
|
267
270
|
- spec/formatters/numbers/percent_formatter_spec.rb
|
268
271
|
- spec/formatters/plurals/plural_formatter_spec.rb
|
269
272
|
- spec/formatters/plurals/rules_spec.rb
|
270
|
-
- spec/normalizers/
|
273
|
+
- spec/normalizers/base_spec.rb
|
271
274
|
- spec/normalizers/normalization_spec.rb
|
272
275
|
- spec/normalizers/NormalizationTest.txt
|
273
276
|
- spec/normalizers/NormalizationTestShort.txt
|
274
277
|
- spec/readme_spec.rb
|
275
278
|
- spec/shared/calendar_spec.rb
|
279
|
+
- spec/shared/code_point_spec.rb
|
276
280
|
- spec/shared/currencies_spec.rb
|
277
281
|
- spec/shared/languages_spec.rb
|
278
282
|
- spec/shared/numbers_spec.rb
|
279
283
|
- spec/shared/resources_spec.rb
|
280
|
-
- spec/shared/unicode_data_spec.rb
|
281
284
|
- spec/spec_helper.rb
|
282
285
|
- spec/tokenizers/base_spec.rb
|
283
286
|
- spec/tokenizers/calendars/date_tokenizer_spec.rb
|
@@ -288,7 +291,7 @@ files:
|
|
288
291
|
- spec/tokenizers/numbers/number_tokenizer_spec.rb
|
289
292
|
- spec/tokenizers/token_spec.rb
|
290
293
|
- spec/twitter_cldr_spec.rb
|
291
|
-
- spec/utils/
|
294
|
+
- spec/utils/code_points_spec.rb
|
292
295
|
- spec/utils/interpolation_spec.rb
|
293
296
|
- spec/utils_spec.rb
|
294
297
|
- resources/locales/ar/calendars.yml
|
@@ -453,6 +456,7 @@ files:
|
|
453
456
|
- resources/unicode_data/bengali.yml
|
454
457
|
- resources/unicode_data/block_elements.yml
|
455
458
|
- resources/unicode_data/blocks.yml
|
459
|
+
- resources/unicode_data/blocks_hangul.yml
|
456
460
|
- resources/unicode_data/bopomofo.yml
|
457
461
|
- resources/unicode_data/bopomofo_extended.yml
|
458
462
|
- resources/unicode_data/box_drawing.yml
|
@@ -482,6 +486,7 @@ files:
|
|
482
486
|
- resources/unicode_data/combining_diacritical_marks_supplement.yml
|
483
487
|
- resources/unicode_data/combining_half_marks.yml
|
484
488
|
- resources/unicode_data/common_indic_number_forms.yml
|
489
|
+
- resources/unicode_data/composition_exclusions.yml
|
485
490
|
- resources/unicode_data/control_pictures.yml
|
486
491
|
- resources/unicode_data/coptic.yml
|
487
492
|
- resources/unicode_data/counting_rod_numerals.yml
|
@@ -493,6 +498,7 @@ files:
|
|
493
498
|
- resources/unicode_data/cyrillic_extended_a.yml
|
494
499
|
- resources/unicode_data/cyrillic_extended_b.yml
|
495
500
|
- resources/unicode_data/cyrillic_supplement.yml
|
501
|
+
- resources/unicode_data/decomposition_map.yml
|
496
502
|
- resources/unicode_data/deseret.yml
|
497
503
|
- resources/unicode_data/devanagari.yml
|
498
504
|
- resources/unicode_data/devanagari_extended.yml
|