twitter_cldr 1.3.6 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +47 -2
- data/lib/twitter_cldr/core_ext/calendars/datetime.rb +2 -2
- data/lib/twitter_cldr/core_ext/calendars/timespan.rb +11 -13
- data/lib/twitter_cldr/normalizers.rb +3 -0
- data/lib/twitter_cldr/normalizers/base.rb +34 -0
- data/lib/twitter_cldr/normalizers/nfc.rb +24 -0
- data/lib/twitter_cldr/normalizers/nfd.rb +1 -1
- data/lib/twitter_cldr/normalizers/nfkc.rb +126 -0
- data/lib/twitter_cldr/normalizers/nfkd.rb +9 -17
- data/lib/twitter_cldr/shared.rb +1 -1
- data/lib/twitter_cldr/shared/code_point.rb +116 -0
- data/lib/twitter_cldr/tokenizers/base.rb +2 -2
- data/lib/twitter_cldr/utils.rb +8 -0
- data/lib/twitter_cldr/version.rb +1 -1
- data/resources/unicode_data/blocks_hangul.yml +46 -0
- data/resources/unicode_data/composition_exclusions.yml +293 -0
- data/resources/unicode_data/decomposition_map.yml +4565 -0
- data/spec/normalizers/NormalizationTestShort.txt +66 -66
- data/spec/normalizers/base_spec.rb +17 -0
- data/spec/normalizers/normalization_spec.rb +10 -0
- data/spec/readme_spec.rb +26 -1
- data/spec/shared/code_point_spec.rb +152 -0
- data/spec/tokenizers/base_spec.rb +0 -10
- data/spec/utils/{code_point_spec.rb → code_points_spec.rb} +0 -0
- data/spec/utils_spec.rb +10 -0
- metadata +16 -10
- data/lib/twitter_cldr/shared/unicode_data.rb +0 -64
- data/spec/normalizers/nfd_spec.rb +0 -21
- data/spec/shared/unicode_data_spec.rb +0 -51
@@ -509,71 +509,71 @@ FF35;FF35;FF35;0055;0055; # (U; U; U; U; U; ) FULLWIDTH LATIN CAPITAL LETT
|
|
509
509
|
FF95;FF95;FF95;30E6;30E6; # (ユ; ユ; ユ; ユ; ユ; ) HALFWIDTH KATAKANA LETTER YU
|
510
510
|
FFED;FFED;FFED;25A0;25A0; # (■; ■; ■; ■; ■; ) HALFWIDTH BLACK SQUARE
|
511
511
|
FFA5;FFA5;FFA5;11AC;11AC; # (ᆬ; ᆬ; ᆬ; ᆬ; ᆬ; ) HALFWIDTH HANGUL LETTER NIEUN-CIEUC
|
512
|
-
110AB;110AB;110A5 110BA;110AB;110A5 110BA; # (
|
513
|
-
1109C;1109C;1109B 110BA;1109C;1109B 110BA; # (
|
514
|
-
1109A;1109A;11099 110BA;1109A;11099 110BA; # (
|
515
|
-
1112F;1112F;11132 11127;1112F;11132 11127; # (
|
516
|
-
1112E;1112E;11131 11127;1112E;11131 11127; # (
|
517
|
-
1D15F;1D158 1D165;1D158 1D165;1D158 1D165;1D158 1D165; # (
|
518
|
-
1D161;1D158 1D165 1D16F;1D158 1D165 1D16F;1D158 1D165 1D16F;1D158 1D165 1D16F; # (
|
519
|
-
1D1BB;1D1B9 1D165;1D1B9 1D165;1D1B9 1D165;1D1B9 1D165; # (
|
520
|
-
1D160;1D158 1D165 1D16E;1D158 1D165 1D16E;1D158 1D165 1D16E;1D158 1D165 1D16E; # (
|
521
|
-
1D162;1D158 1D165 1D170;1D158 1D165 1D170;1D158 1D165 1D170;1D158 1D165 1D170; # (
|
522
|
-
1D163;1D158 1D165 1D171;1D158 1D165 1D171;1D158 1D165 1D171;1D158 1D165 1D171; # (
|
523
|
-
1D1BC;1D1BA 1D165;1D1BA 1D165;1D1BA 1D165;1D1BA 1D165; # (
|
524
|
-
1D15E;1D157 1D165;1D157 1D165;1D157 1D165;1D157 1D165; # (
|
525
|
-
1D1BE;1D1BA 1D165 1D16E;1D1BA 1D165 1D16E;1D1BA 1D165 1D16E;1D1BA 1D165 1D16E; # (
|
526
|
-
1D164;1D158 1D165 1D172;1D158 1D165 1D172;1D158 1D165 1D172;1D158 1D165 1D172; # (
|
527
|
-
1D50D;1D50D;1D50D;004A;004A; # (
|
528
|
-
1D538;1D538;1D538;0041;0041; # (
|
529
|
-
1D65B;1D65B;1D65B;0066;0066; # (
|
530
|
-
1D73E;1D73E;1D73E;03B9;03B9; # (
|
531
|
-
1D57F;1D57F;1D57F;0054;0054; # (
|
532
|
-
1D6FE;1D6FE;1D6FE;03B3;03B3; # (
|
533
|
-
1D78A;1D78A;1D78A;03B5;03B5; # (
|
534
|
-
1D7F3;1D7F3;1D7F3;0037;0037; # (
|
535
|
-
1D778;1D778;1D778;03B9;03B9; # (
|
536
|
-
1D512;1D512;1D512;004F;004F; # (
|
537
|
-
1EE27;1EE27;1EE27;062D;062D; # (
|
538
|
-
1EE89;1EE89;1EE89;064A;064A; # (
|
539
|
-
1EE08;1EE08;1EE08;0637;0637; # (
|
540
|
-
1EEB0;1EEB0;1EEB0;0641;0641; # (
|
541
|
-
1EEBA;1EEBA;1EEBA;0638;0638; # (
|
542
|
-
1EE11;1EE11;1EE11;0635;0635; # (
|
543
|
-
1EE98;1EE98;1EE98;0630;0630; # (
|
544
|
-
1EEA2;1EEA2;1EEA2;062C;062C; # (
|
545
|
-
1EE4D;1EE4D;1EE4D;0646;0646; # (
|
546
|
-
1EE4F;1EE4F;1EE4F;0639;0639; # (
|
547
|
-
1F132;1F132;1F132;0043;0043; # (
|
548
|
-
1F13E;1F13E;1F13E;004F;004F; # (
|
549
|
-
1F11C;1F11C;1F11C;0028 004D 0029;0028 004D 0029; # (
|
550
|
-
1F102;1F102;1F102;0031 002C;0031 002C; # (
|
551
|
-
1F16B;1F16B;1F16B;004D 0044;004D 0044; # (
|
552
|
-
1F11D;1F11D;1F11D;0028 004E 0029;0028 004E 0029; # (
|
553
|
-
1F146;1F146;1F146;0057;0057; # (
|
554
|
-
1F107;1F107;1F107;0036 002C;0036 002C; # (
|
555
|
-
1F145;1F145;1F145;0056;0056; # (
|
556
|
-
1F112;1F112;1F112;0028 0043 0029;0028 0043 0029; # (
|
557
|
-
1F231;1F231;1F231;6253;6253; # (
|
558
|
-
1F22B;1F22B;1F22B;904A;904A; # (
|
559
|
-
1F243;1F243;1F243;3014 5B89 3015;3014 5B89 3015; # (
|
560
|
-
1F234;1F234;1F234;5408;5408; # (
|
561
|
-
1F238;1F238;1F238;7533;7533; # (
|
562
|
-
1F247;1F247;1F247;3014 52DD 3015;3014 52DD 3015; # (
|
563
|
-
1F217;1F217;1F217;5929;5929; # (
|
564
|
-
1F248;1F248;1F248;3014 6557 3015;3014 6557 3015; # (
|
565
|
-
1F224;1F224;1F224;58F0;58F0; # (
|
566
|
-
1F213;1F213;1F213;30C7;30C6 3099; # (
|
567
|
-
2F984;440B;440B;440B;440B; # (
|
568
|
-
2F9A4;26C36;26C36;26C36;26C36; # (
|
569
|
-
2F910;23F5E;23F5E;23F5E;23F5E; # (
|
570
|
-
2F9F7;2921A;2921A;2921A;2921A; # (
|
571
|
-
2F97F;8070;8070;8070;8070; # (
|
572
|
-
2F954;2569A;2569A;2569A;2569A; # (
|
573
|
-
2F96C;7D63;7D63;7D63;7D63; # (
|
574
|
-
2FA1B;9F16;9F16;9F16;9F16; # (
|
575
|
-
2F92D;3EB8;3EB8;3EB8;3EB8; # (
|
576
|
-
2F9C7;88DE;88DE;88DE;88DE; # (
|
512
|
+
110AB;110AB;110A5 110BA;110AB;110A5 110BA; # (Ⴋ; Ⴋ; Ⴅ◌Ⴚ; Ⴋ; Ⴅ◌Ⴚ; ) KAITHI LETTER VA
|
513
|
+
1109C;1109C;1109B 110BA;1109C;1109B 110BA; # (ႜ; ႜ; ႛ◌Ⴚ; ႜ; ႛ◌Ⴚ; ) KAITHI LETTER RHA
|
514
|
+
1109A;1109A;11099 110BA;1109A;11099 110BA; # (ႚ; ႚ; ႙◌Ⴚ; ႚ; ႙◌Ⴚ; ) KAITHI LETTER DDDHA
|
515
|
+
1112F;1112F;11132 11127;1112F;11132 11127; # (◌ᄯ; ◌ᄯ; ◌ᄲ◌ᄧ; ◌ᄯ; ◌ᄲ◌ᄧ; ) CHAKMA VOWEL SIGN AU
|
516
|
+
1112E;1112E;11131 11127;1112E;11131 11127; # (◌ᄮ; ◌ᄮ; ◌ᄱ◌ᄧ; ◌ᄮ; ◌ᄱ◌ᄧ; ) CHAKMA VOWEL SIGN O
|
517
|
+
1D15F;1D158 1D165;1D158 1D165;1D158 1D165;1D158 1D165; # (텟; 텘텥; 텘텥; 텘텥; 텘텥; ) MUSICAL SYMBOL QUARTER NOTE
|
518
|
+
1D161;1D158 1D165 1D16F;1D158 1D165 1D16F;1D158 1D165 1D16F;1D158 1D165 1D16F; # (텡; 텘텥텯; 텘텥텯; 텘텥텯; 텘텥텯; ) MUSICAL SYMBOL SIXTEENTH NOTE
|
519
|
+
1D1BB;1D1B9 1D165;1D1B9 1D165;1D1B9 1D165;1D1B9 1D165; # (톻; 톹텥; 톹텥; 톹텥; 톹텥; ) MUSICAL SYMBOL MINIMA
|
520
|
+
1D160;1D158 1D165 1D16E;1D158 1D165 1D16E;1D158 1D165 1D16E;1D158 1D165 1D16E; # (텠; 텘텥텮; 텘텥텮; 텘텥텮; 텘텥텮; ) MUSICAL SYMBOL EIGHTH NOTE
|
521
|
+
1D162;1D158 1D165 1D170;1D158 1D165 1D170;1D158 1D165 1D170;1D158 1D165 1D170; # (텢; 텘텥텰; 텘텥텰; 텘텥텰; 텘텥텰; ) MUSICAL SYMBOL THIRTY-SECOND NOTE
|
522
|
+
1D163;1D158 1D165 1D171;1D158 1D165 1D171;1D158 1D165 1D171;1D158 1D165 1D171; # (텣; 텘텥텱; 텘텥텱; 텘텥텱; 텘텥텱; ) MUSICAL SYMBOL SIXTY-FOURTH NOTE
|
523
|
+
1D1BC;1D1BA 1D165;1D1BA 1D165;1D1BA 1D165;1D1BA 1D165; # (톼; 톺텥; 톺텥; 톺텥; 톺텥; ) MUSICAL SYMBOL MINIMA BLACK
|
524
|
+
1D15E;1D157 1D165;1D157 1D165;1D157 1D165;1D157 1D165; # (텞; 텗텥; 텗텥; 텗텥; 텗텥; ) MUSICAL SYMBOL HALF NOTE
|
525
|
+
1D1BE;1D1BA 1D165 1D16E;1D1BA 1D165 1D16E;1D1BA 1D165 1D16E;1D1BA 1D165 1D16E; # (톾; 톺텥텮; 톺텥텮; 톺텥텮; 톺텥텮; ) MUSICAL SYMBOL SEMIMINIMA BLACK
|
526
|
+
1D164;1D158 1D165 1D172;1D158 1D165 1D172;1D158 1D165 1D172;1D158 1D165 1D172; # (텤; 텘텥텲; 텘텥텲; 텘텥텲; 텘텥텲; ) MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
|
527
|
+
1D50D;1D50D;1D50D;004A;004A; # (픍; 픍; 픍; J; J; ) MATHEMATICAL FRAKTUR CAPITAL J
|
528
|
+
1D538;1D538;1D538;0041;0041; # (픸; 픸; 픸; A; A; ) MATHEMATICAL DOUBLE-STRUCK CAPITAL A
|
529
|
+
1D65B;1D65B;1D65B;0066;0066; # (홛; 홛; 홛; f; f; ) MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL F
|
530
|
+
1D73E;1D73E;1D73E;03B9;03B9; # (휾; 휾; 휾; ι; ι; ) MATHEMATICAL BOLD ITALIC SMALL IOTA
|
531
|
+
1D57F;1D57F;1D57F;0054;0054; # (핿; 핿; 핿; T; T; ) MATHEMATICAL BOLD FRAKTUR CAPITAL T
|
532
|
+
1D6FE;1D6FE;1D6FE;03B3;03B3; # (훾; 훾; 훾; γ; γ; ) MATHEMATICAL ITALIC SMALL GAMMA
|
533
|
+
1D78A;1D78A;1D78A;03B5;03B5; # (힊; 힊; 힊; ε; ε; ) MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL
|
534
|
+
1D7F3;1D7F3;1D7F3;0037;0037; # (ퟳ; ퟳ; ퟳ; 7; 7; ) MATHEMATICAL SANS-SERIF BOLD DIGIT SEVEN
|
535
|
+
1D778;1D778;1D778;03B9;03B9; # (흸; 흸; 흸; ι; ι; ) MATHEMATICAL SANS-SERIF BOLD SMALL IOTA
|
536
|
+
1D512;1D512;1D512;004F;004F; # (픒; 픒; 픒; O; O; ) MATHEMATICAL FRAKTUR CAPITAL O
|
537
|
+
1EE27;1EE27;1EE27;062D;062D; # (; ; ; ح; ح; ) ARABIC MATHEMATICAL INITIAL HAH
|
538
|
+
1EE89;1EE89;1EE89;064A;064A; # (; ; ; ي; ي; ) ARABIC MATHEMATICAL LOOPED YEH
|
539
|
+
1EE08;1EE08;1EE08;0637;0637; # (; ; ; ط; ط; ) ARABIC MATHEMATICAL TAH
|
540
|
+
1EEB0;1EEB0;1EEB0;0641;0641; # (; ; ; ف; ف; ) ARABIC MATHEMATICAL DOUBLE-STRUCK FEH
|
541
|
+
1EEBA;1EEBA;1EEBA;0638;0638; # (; ; ; ظ; ظ; ) ARABIC MATHEMATICAL DOUBLE-STRUCK ZAH
|
542
|
+
1EE11;1EE11;1EE11;0635;0635; # (; ; ; ص; ص; ) ARABIC MATHEMATICAL SAD
|
543
|
+
1EE98;1EE98;1EE98;0630;0630; # (; ; ; ذ; ذ; ) ARABIC MATHEMATICAL LOOPED THAL
|
544
|
+
1EEA2;1EEA2;1EEA2;062C;062C; # (; ; ; ج; ج; ) ARABIC MATHEMATICAL DOUBLE-STRUCK JEEM
|
545
|
+
1EE4D;1EE4D;1EE4D;0646;0646; # (; ; ; ن; ن; ) ARABIC MATHEMATICAL TAILED NOON
|
546
|
+
1EE4F;1EE4F;1EE4F;0639;0639; # (; ; ; ع; ع; ) ARABIC MATHEMATICAL TAILED AIN
|
547
|
+
1F132;1F132;1F132;0043;0043; # (; ; ; C; C; ) SQUARED LATIN CAPITAL LETTER C
|
548
|
+
1F13E;1F13E;1F13E;004F;004F; # (; ; ; O; O; ) SQUARED LATIN CAPITAL LETTER O
|
549
|
+
1F11C;1F11C;1F11C;0028 004D 0029;0028 004D 0029; # (; ; ; (M); (M); ) PARENTHESIZED LATIN CAPITAL LETTER M
|
550
|
+
1F102;1F102;1F102;0031 002C;0031 002C; # (; ; ; 1,; 1,; ) DIGIT ONE COMMA
|
551
|
+
1F16B;1F16B;1F16B;004D 0044;004D 0044; # (; ; ; MD; MD; ) RAISED MD SIGN
|
552
|
+
1F11D;1F11D;1F11D;0028 004E 0029;0028 004E 0029; # (; ; ; (N); (N); ) PARENTHESIZED LATIN CAPITAL LETTER N
|
553
|
+
1F146;1F146;1F146;0057;0057; # (; ; ; W; W; ) SQUARED LATIN CAPITAL LETTER W
|
554
|
+
1F107;1F107;1F107;0036 002C;0036 002C; # (; ; ; 6,; 6,; ) DIGIT SIX COMMA
|
555
|
+
1F145;1F145;1F145;0056;0056; # (; ; ; V; V; ) SQUARED LATIN CAPITAL LETTER V
|
556
|
+
1F112;1F112;1F112;0028 0043 0029;0028 0043 0029; # (; ; ; (C); (C); ) PARENTHESIZED LATIN CAPITAL LETTER C
|
557
|
+
1F231;1F231;1F231;6253;6253; # (; ; ; 打; 打; ) SQUARED CJK UNIFIED IDEOGRAPH-6253
|
558
|
+
1F22B;1F22B;1F22B;904A;904A; # (; ; ; 遊; 遊; ) SQUARED CJK UNIFIED IDEOGRAPH-904A
|
559
|
+
1F243;1F243;1F243;3014 5B89 3015;3014 5B89 3015; # (; ; ; 〔安〕; 〔安〕; ) TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-5B89
|
560
|
+
1F234;1F234;1F234;5408;5408; # (; ; ; 合; 合; ) SQUARED CJK UNIFIED IDEOGRAPH-5408
|
561
|
+
1F238;1F238;1F238;7533;7533; # (; ; ; 申; 申; ) SQUARED CJK UNIFIED IDEOGRAPH-7533
|
562
|
+
1F247;1F247;1F247;3014 52DD 3015;3014 52DD 3015; # (; ; ; 〔勝〕; 〔勝〕; ) TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-52DD
|
563
|
+
1F217;1F217;1F217;5929;5929; # (; ; ; 天; 天; ) SQUARED CJK UNIFIED IDEOGRAPH-5929
|
564
|
+
1F248;1F248;1F248;3014 6557 3015;3014 6557 3015; # (; ; ; 〔敗〕; 〔敗〕; ) TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
|
565
|
+
1F224;1F224;1F224;58F0;58F0; # (; ; ; 声; 声; ) SQUARED CJK UNIFIED IDEOGRAPH-58F0
|
566
|
+
1F213;1F213;1F213;30C7;30C6 3099; # (; ; ; デ; テ◌゙; ) SQUARED KATAKANA DE
|
567
|
+
2F984;440B;440B;440B;440B; # (濾; 䐋; 䐋; 䐋; 䐋; ) CJK COMPATIBILITY IDEOGRAPH-2F984
|
568
|
+
2F9A4;26C36;26C36;26C36;26C36; # (捻; 氶; 氶; 氶; 氶; ) CJK COMPATIBILITY IDEOGRAPH-2F9A4
|
569
|
+
2F910;23F5E;23F5E;23F5E;23F5E; # (蘿; 㽞; 㽞; 㽞; 㽞; ) CJK COMPATIBILITY IDEOGRAPH-2F910
|
570
|
+
2F9F7;2921A;2921A;2921A;2921A; # (立; 鈚; 鈚; 鈚; 鈚; ) CJK COMPATIBILITY IDEOGRAPH-2F9F7
|
571
|
+
2F97F;8070;8070;8070;8070; # (勵; 聰; 聰; 聰; 聰; ) CJK COMPATIBILITY IDEOGRAPH-2F97F
|
572
|
+
2F954;2569A;2569A;2569A;2569A; # (凜; 嚚; 嚚; 嚚; 嚚; ) CJK COMPATIBILITY IDEOGRAPH-2F954
|
573
|
+
2F96C;7D63;7D63;7D63;7D63; # (塞; 絣; 絣; 絣; 絣; ) CJK COMPATIBILITY IDEOGRAPH-2F96C
|
574
|
+
2FA1B;9F16;9F16;9F16;9F16; # (福; 鼖; 鼖; 鼖; 鼖; ) CJK COMPATIBILITY IDEOGRAPH-2FA1B
|
575
|
+
2F92D;3EB8;3EB8;3EB8;3EB8; # (來; 㺸; 㺸; 㺸; 㺸; ) CJK COMPATIBILITY IDEOGRAPH-2F92D
|
576
|
+
2F9C7;88DE;88DE;88DE;88DE; # (劉; 裞; 裞; 裞; 裞; ) CJK COMPATIBILITY IDEOGRAPH-2F9C7
|
577
577
|
#
|
578
578
|
@Part2 # Canonical Order Test
|
579
579
|
#
|
@@ -582,7 +582,7 @@ FFA5;FFA5;FFA5;11AC;11AC; # (ᆬ; ᆬ; ᆬ; ᆬ; ᆬ; ) HALFWIDTH HANGUL LETTER
|
|
582
582
|
0061 1DC5 0315 0300 05AE 0062;0061 05AE 1DC5 0300 0315 0062;0061 05AE 1DC5 0300 0315 0062;0061 05AE 1DC5 0300 0315 0062;0061 05AE 1DC5 0300 0315 0062; # (a◌᷅◌̕◌̀◌֮b; a◌֮◌᷅◌̀◌̕b; a◌֮◌᷅◌̀◌̕b; a◌֮◌᷅◌̀◌̕b; a◌֮◌᷅◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING GRAVE-MACRON, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B
|
583
583
|
0061 FE20 0315 0300 05AE 0062;0061 05AE FE20 0300 0315 0062;0061 05AE FE20 0300 0315 0062;0061 05AE FE20 0300 0315 0062;0061 05AE FE20 0300 0315 0062; # (a◌︠◌̕◌̀◌֮b; a◌֮◌︠◌̀◌̕b; a◌֮◌︠◌̀◌̕b; a◌֮◌︠◌̀◌̕b; a◌֮◌︠◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING LIGATURE LEFT HALF, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B
|
584
584
|
0061 0315 0300 05AE 0658 0062;00E0 05AE 0658 0315 0062;0061 05AE 0300 0658 0315 0062;00E0 05AE 0658 0315 0062;0061 05AE 0300 0658 0315 0062; # (a◌̕◌̀◌֮◌٘b; à◌֮◌٘◌̕b; a◌֮◌̀◌٘◌̕b; à◌֮◌٘◌̕b; a◌֮◌̀◌٘◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, ARABIC MARK NOON GHUNNA, LATIN SMALL LETTER B
|
585
|
-
0061 1D182 059A 0316 302A 0062;0061 302A 1D182 0316 059A 0062;0061 302A 1D182 0316 059A 0062;0061 302A 1D182 0316 059A 0062;0061 302A 1D182 0316 059A 0062; # (a
|
585
|
+
0061 1D182 059A 0316 302A 0062;0061 302A 1D182 0316 059A 0062;0061 302A 1D182 0316 059A 0062;0061 302A 1D182 0316 059A 0062;0061 302A 1D182 0316 059A 0062; # (a◌톂◌֚◌̖◌〪b; a◌〪◌톂◌̖◌֚b; a◌〪◌톂◌̖◌֚b; a◌〪◌톂◌̖◌֚b; a◌〪◌톂◌̖◌֚b; ) LATIN SMALL LETTER A, MUSICAL SYMBOL COMBINING LOURE, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, IDEOGRAPHIC LEVEL TONE MARK, LATIN SMALL LETTER B
|
586
586
|
0061 0315 0300 05AE 1DFE 0062;00E0 05AE 1DFE 0315 0062;0061 05AE 0300 1DFE 0315 0062;00E0 05AE 1DFE 0315 0062;0061 05AE 0300 1DFE 0315 0062; # (a◌̕◌̀◌֮◌᷾b; à◌֮◌᷾◌̕b; a◌֮◌̀◌᷾◌̕b; à◌֮◌᷾◌̕b; a◌֮◌̀◌᷾◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING LEFT ARROWHEAD ABOVE, LATIN SMALL LETTER B
|
587
587
|
0061 0315 0300 05AE A679 0062;00E0 05AE A679 0315 0062;0061 05AE 0300 A679 0315 0062;00E0 05AE A679 0315 0062;0061 05AE 0300 A679 0315 0062; # (a◌̕◌̀◌֮◌ꙹb; à◌֮◌ꙹ◌̕b; a◌֮◌̀◌ꙹ◌̕b; à◌֮◌ꙹ◌̕b; a◌֮◌̀◌ꙹ◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING CYRILLIC LETTER YERU, LATIN SMALL LETTER B
|
588
588
|
0061 0315 0300 05AE 0F87 0062;00E0 05AE 0F87 0315 0062;0061 05AE 0300 0F87 0315 0062;00E0 05AE 0F87 0315 0062;0061 05AE 0300 0F87 0315 0062; # (a◌̕◌̀◌֮◌྇b; à◌֮◌྇◌̕b; a◌֮◌̀◌྇◌̕b; à◌֮◌྇◌̕b; a◌֮◌̀◌྇◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, TIBETAN SIGN YANG RTAGS, LATIN SMALL LETTER B
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require 'spec_helper'
|
7
|
+
|
8
|
+
include TwitterCldr::Normalizers
|
9
|
+
|
10
|
+
describe Base do
|
11
|
+
describe "#combining_class_for" do
|
12
|
+
it "returns the correct combining class for select code points" do
|
13
|
+
Base.combining_class_for("0303").should == 230 # combining tilde
|
14
|
+
Base.combining_class_for("006E").should == 0 # latin letter n
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -38,6 +38,16 @@ describe 'Unicode Normalization Algorithms' do
|
|
38
38
|
it_behaves_like 'a normalization algorithm'
|
39
39
|
end
|
40
40
|
|
41
|
+
describe NFC do
|
42
|
+
let(:invariants) { { 2 => [1, 2, 3], 4 => [4, 5] } }
|
43
|
+
it_behaves_like 'a normalization algorithm'
|
44
|
+
end
|
45
|
+
|
46
|
+
describe NFKC do
|
47
|
+
let(:invariants) { { 4 => [1, 2, 3, 4, 5] } }
|
48
|
+
it_behaves_like 'a normalization algorithm'
|
49
|
+
end
|
50
|
+
|
41
51
|
# Runs standard Unicode normalization tests from `file_path` for a given `normalizer`. Expected invariants are
|
42
52
|
# specified via `invariants` hash.
|
43
53
|
#
|
data/spec/readme_spec.rb
CHANGED
@@ -74,6 +74,31 @@ describe "README" do
|
|
74
74
|
dt.to_short_s.should == "12/12/11 21:44"
|
75
75
|
end
|
76
76
|
|
77
|
+
it "verifies relative time spans" do
|
78
|
+
(DateTime.now - 1).localize.ago.should == "1 day ago"
|
79
|
+
(DateTime.now - 0.5).localize.ago.should == "12 hours ago" # (i.e. half a day)
|
80
|
+
|
81
|
+
(DateTime.now + 1).localize.until.should == "In 1 day"
|
82
|
+
(DateTime.now + 0.5).localize.until.should == "In 12 hours"
|
83
|
+
|
84
|
+
(DateTime.now - 1).localize(:de).ago.should == "Vor 1 Tag"
|
85
|
+
(DateTime.now + 1).localize(:de).until.should == "In 1 Tag"
|
86
|
+
|
87
|
+
(DateTime.now - 1).localize(:de).ago(:unit => :hour).should == "Vor 24 Stunden"
|
88
|
+
(DateTime.now + 1).localize(:de).until(:unit => :hour).should == "In 24 Stunden"
|
89
|
+
|
90
|
+
# 86400 = 1 day in seconds, 259200 = 3 days in seconds
|
91
|
+
(Time.now + 86400).localize(:de).ago(:unit => :hour, :base_time => (Time.now + 259200)).should == "Vor 48 Stunden"
|
92
|
+
|
93
|
+
ts = TwitterCldr::LocalizedTimespan.new(86400, :de)
|
94
|
+
ts.to_s.should == "In 1 Tag"
|
95
|
+
ts.to_s(:hour).should == "In 24 Stunden"
|
96
|
+
|
97
|
+
ts = TwitterCldr::LocalizedTimespan.new(-86400, :de)
|
98
|
+
ts.to_s.should == "Vor 1 Tag"
|
99
|
+
ts.to_s(:hour).should == "Vor 24 Stunden"
|
100
|
+
end
|
101
|
+
|
77
102
|
it "verifies plural rules" do
|
78
103
|
1.localize(:ru).plural_rule.should == :one
|
79
104
|
2.localize(:ru).plural_rule.should == :few
|
@@ -149,7 +174,7 @@ describe "README" do
|
|
149
174
|
end
|
150
175
|
|
151
176
|
it "verifies code point conversions" do
|
152
|
-
code_point = TwitterCldr::Shared::
|
177
|
+
code_point = TwitterCldr::Shared::CodePoint.for_hex("1F3E9")
|
153
178
|
code_point.name.should == "LOVE HOTEL"
|
154
179
|
code_point.bidi_mirrored.should == "N"
|
155
180
|
code_point.category.should == "So"
|
@@ -0,0 +1,152 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require 'spec_helper'
|
7
|
+
|
8
|
+
include TwitterCldr::Shared
|
9
|
+
|
10
|
+
describe CodePoint do
|
11
|
+
describe "#for_hex" do
|
12
|
+
it "should retrieve information for any valid code point" do
|
13
|
+
data = CodePoint.for_hex('0301')
|
14
|
+
data.should be_a(CodePoint)
|
15
|
+
data.values.length.should == 15
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should return nil for invalid code points" do
|
19
|
+
CodePoint.for_hex('abcd').should be_nil
|
20
|
+
CodePoint.for_hex('FFFFFFF').should be_nil
|
21
|
+
CodePoint.for_hex('uytukhil123').should be_nil
|
22
|
+
end
|
23
|
+
|
24
|
+
it "fetches valid information for the specified code point" do
|
25
|
+
test_data = {
|
26
|
+
'17D1' => ['17D1','KHMER SIGN VIRIAM','Mn','0','NSM',"","","","",'N',"","","","",""],
|
27
|
+
'FE91' => ['FE91','ARABIC LETTER BEH INITIAL FORM','Lo','0','AL','<initial> 0628',"","","",'N','GLYPH FOR INITIAL ARABIC BAA',"","","",""],
|
28
|
+
'24B5' => ['24B5','PARENTHESIZED LATIN SMALL LETTER Z','So','0','L','<compat> 0028 007A 0029',"","","",'N',"","","","",""],
|
29
|
+
'2128' => ['2128','BLACK-LETTER CAPITAL Z','Lu','0','L','<font> 005A',"","","",'N','BLACK-LETTER Z',"","","",""],
|
30
|
+
'1F241'=> ['1F241','TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-4E09','So','0','L','<compat> 3014 4E09 3015',"","","",'N',"","","","",""]
|
31
|
+
}
|
32
|
+
test_data.each_pair do |code_point, data|
|
33
|
+
cp_data = CodePoint.for_hex(code_point)
|
34
|
+
cp_data.code_point.should == data[0]
|
35
|
+
cp_data.name.should == data[1]
|
36
|
+
cp_data.category.should == data[2]
|
37
|
+
cp_data.combining_class.should == data[3]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
it "fetches valid information for a code point within a range" do
|
42
|
+
test_data = {
|
43
|
+
'4E11' => ["4E11","<CJK Ideograph>","Lo","0","L","","","","","N","","","","",""],
|
44
|
+
'AC55' => ["AC55","<Hangul Syllable>","Lo","0","L","","","","","N","","","","",""],
|
45
|
+
'D7A1' => ["D7A1","<Hangul Syllable>","Lo","0","L","","","","","N","","","","",""],
|
46
|
+
'DAAA' => ["DAAA","<Non Private Use High Surrogate>","Cs","0","L","","","","","N","","","","",""],
|
47
|
+
'F8FE' => ["F8FE","<Private Use>","Co","0","L","","","","","N","","","","",""]
|
48
|
+
}
|
49
|
+
|
50
|
+
test_data.each_pair do |code_point, data|
|
51
|
+
cp_data = CodePoint.for_hex(code_point)
|
52
|
+
cp_data.code_point.should == data[0]
|
53
|
+
cp_data.name.should == data[1]
|
54
|
+
cp_data.category.should == data[2]
|
55
|
+
cp_data.combining_class.should == data[3]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
describe "#for_decomposition" do
|
61
|
+
let(:decomp_map) { { :"YYYY ZZZZ" => "0ABC" } }
|
62
|
+
|
63
|
+
before(:each) do
|
64
|
+
# clear the decomposition map after each test so mocks/stubs work
|
65
|
+
CodePoint.instance_variable_set(:@decomposition_map, nil)
|
66
|
+
stub(CodePoint).for_hex { |code_point| "I'm code point #{code_point}" }
|
67
|
+
end
|
68
|
+
|
69
|
+
after(:each) do
|
70
|
+
# clear the decomposition map after each test so mocks/stubs work
|
71
|
+
CodePoint.instance_variable_set(:@decomposition_map, nil)
|
72
|
+
end
|
73
|
+
|
74
|
+
context "with a stubbed decomposition map" do
|
75
|
+
before(:each) do
|
76
|
+
stub(TwitterCldr).get_resource(:unicode_data, :decomposition_map) { decomp_map }
|
77
|
+
end
|
78
|
+
|
79
|
+
it "should return a code point with the correct value" do
|
80
|
+
CodePoint.for_decomposition(["YYYY", "ZZZZ"]).should == "I'm code point 0ABC"
|
81
|
+
end
|
82
|
+
|
83
|
+
it "should return nil if no decomposition mapping exists" do
|
84
|
+
CodePoint.for_decomposition(["NO"]).should be_nil
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
it "should cache the decomposition map" do
|
89
|
+
mock(TwitterCldr).get_resource(:unicode_data, :decomposition_map) { decomp_map }.once
|
90
|
+
CodePoint.for_decomposition(["NO"]).should be_nil
|
91
|
+
CodePoint.for_decomposition(["NO"]).should be_nil
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
describe "#hangul_type" do
|
96
|
+
before(:each) do
|
97
|
+
stub(CodePoint).hangul_blocks { { :lparts => [1..10],
|
98
|
+
:vparts => [21..30],
|
99
|
+
:tparts => [41..50],
|
100
|
+
:compositions => [1..30],
|
101
|
+
:decompositions => [31..50] } }
|
102
|
+
end
|
103
|
+
|
104
|
+
it "returns nil if not part of a hangul block" do
|
105
|
+
CodePoint.hangul_type(100.to_s(16)).should == nil
|
106
|
+
end
|
107
|
+
|
108
|
+
it "returns the correct part (i.e. lpart, vpart, or tpart) before composition or decomposition" do
|
109
|
+
CodePoint.hangul_type(5.to_s(16)).should == :lparts
|
110
|
+
CodePoint.hangul_type(30.to_s(16)).should == :vparts
|
111
|
+
CodePoint.hangul_type(41.to_s(16)).should == :tparts
|
112
|
+
end
|
113
|
+
|
114
|
+
it "returns composition or decomposition if no part can be found" do
|
115
|
+
CodePoint.hangul_type(11.to_s(16)).should == :compositions
|
116
|
+
CodePoint.hangul_type(40.to_s(16)).should == :decompositions
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
describe "#excluded_from_composition?" do
|
121
|
+
it "excludes anything in the list of ranges" do
|
122
|
+
stub(CodePoint).composition_exclusions { [10..10, 13..14, 20..30] }
|
123
|
+
CodePoint.excluded_from_composition?(10.to_s(16)).should be_true
|
124
|
+
CodePoint.excluded_from_composition?(13.to_s(16)).should be_true
|
125
|
+
CodePoint.excluded_from_composition?(14.to_s(16)).should be_true
|
126
|
+
CodePoint.excluded_from_composition?(15.to_s(16)).should be_false
|
127
|
+
CodePoint.excluded_from_composition?(19.to_s(16)).should be_false
|
128
|
+
CodePoint.excluded_from_composition?(100.to_s(16)).should be_false
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
describe "#get_block" do
|
133
|
+
it "finds the block that corresponds to the code point" do
|
134
|
+
stub(TwitterCldr).get_resource(:unicode_data, :blocks) { [[:klingon, 122..307], [:hirogen, 1337..2200]] }
|
135
|
+
CodePoint.send(:get_block, 200.to_s(16)).should == [:klingon, 122..307]
|
136
|
+
CodePoint.send(:get_block, 2199.to_s(16)).should == [:hirogen, 1337..2200]
|
137
|
+
CodePoint.send(:get_block, 100.to_s(16)).should be_nil
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
describe "#get_range_start" do
|
142
|
+
it "returns the data for a non-explicit range" do
|
143
|
+
block_data = { "0" => ["1337", "<CJK Ideograph Extension A, First>"] }
|
144
|
+
CodePoint.send(:get_range_start, "ABC", block_data).should == ["ABC", "<CJK Ideograph Extension A>"]
|
145
|
+
end
|
146
|
+
|
147
|
+
it "returns nil if the block data doesn't contain a non-explicit range" do
|
148
|
+
block_data = { "0" => ["1337", "<CJK Ideograph Extension A>"] }
|
149
|
+
CodePoint.send(:get_range_start, "ABC", block_data).should == nil
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
@@ -147,16 +147,6 @@ describe Base do
|
|
147
147
|
end
|
148
148
|
end
|
149
149
|
|
150
|
-
describe "#compute_cache_key" do
|
151
|
-
it "returns a ruby hash of all the pieces concatenated with pipe characters" do
|
152
|
-
@base.send(:compute_cache_key, "space", "the", "final", "frontier").should == "space|the|final|frontier".hash
|
153
|
-
end
|
154
|
-
|
155
|
-
it "returns zero if no arguments are passed" do
|
156
|
-
@base.send(:compute_cache_key).should == 0
|
157
|
-
end
|
158
|
-
end
|
159
|
-
|
160
150
|
describe "#traverse" do
|
161
151
|
before(:each) do
|
162
152
|
@tree = { :admiral => { :captain => { :commander => { :lieutenant => "Found Me!" } } } }
|
File without changes
|
data/spec/utils_spec.rb
CHANGED
@@ -29,4 +29,14 @@ describe TwitterCldr::Utils do
|
|
29
29
|
end
|
30
30
|
|
31
31
|
end
|
32
|
+
|
33
|
+
describe "#compute_cache_key" do
|
34
|
+
it "returns a ruby hash of all the pieces concatenated with pipe characters" do
|
35
|
+
TwitterCldr::Utils.compute_cache_key("space", "the", "final", "frontier").should == "space|the|final|frontier".hash
|
36
|
+
end
|
37
|
+
|
38
|
+
it "returns zero if no arguments are passed" do
|
39
|
+
TwitterCldr::Utils.compute_cache_key.should == 0
|
40
|
+
end
|
41
|
+
end
|
32
42
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter_cldr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 7
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 1.
|
8
|
+
- 4
|
9
|
+
- 0
|
10
|
+
version: 1.4.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Cameron Dutro
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-06-
|
18
|
+
date: 2012-06-15 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: json
|
@@ -105,7 +105,7 @@ dependencies:
|
|
105
105
|
requirements:
|
106
106
|
- - ~>
|
107
107
|
- !ruby/object:Gem::Version
|
108
|
-
hash:
|
108
|
+
hash: 3424229193
|
109
109
|
segments:
|
110
110
|
- 0
|
111
111
|
- 9
|
@@ -221,16 +221,19 @@ files:
|
|
221
221
|
- lib/twitter_cldr/formatters/plurals/rules.rb
|
222
222
|
- lib/twitter_cldr/formatters/plurals.rb
|
223
223
|
- lib/twitter_cldr/formatters.rb
|
224
|
+
- lib/twitter_cldr/normalizers/base.rb
|
225
|
+
- lib/twitter_cldr/normalizers/nfc.rb
|
224
226
|
- lib/twitter_cldr/normalizers/nfd.rb
|
227
|
+
- lib/twitter_cldr/normalizers/nfkc.rb
|
225
228
|
- lib/twitter_cldr/normalizers/nfkd.rb
|
226
229
|
- lib/twitter_cldr/normalizers.rb
|
227
230
|
- lib/twitter_cldr/shared/calendar.rb
|
231
|
+
- lib/twitter_cldr/shared/code_point.rb
|
228
232
|
- lib/twitter_cldr/shared/currencies.rb
|
229
233
|
- lib/twitter_cldr/shared/languages.rb
|
230
234
|
- lib/twitter_cldr/shared/numbers.rb
|
231
235
|
- lib/twitter_cldr/shared/resources.rb
|
232
236
|
- lib/twitter_cldr/shared/timezones.rb
|
233
|
-
- lib/twitter_cldr/shared/unicode_data.rb
|
234
237
|
- lib/twitter_cldr/shared.rb
|
235
238
|
- lib/twitter_cldr/tokenizers/base.rb
|
236
239
|
- lib/twitter_cldr/tokenizers/calendars/date_tokenizer.rb
|
@@ -267,17 +270,17 @@ files:
|
|
267
270
|
- spec/formatters/numbers/percent_formatter_spec.rb
|
268
271
|
- spec/formatters/plurals/plural_formatter_spec.rb
|
269
272
|
- spec/formatters/plurals/rules_spec.rb
|
270
|
-
- spec/normalizers/
|
273
|
+
- spec/normalizers/base_spec.rb
|
271
274
|
- spec/normalizers/normalization_spec.rb
|
272
275
|
- spec/normalizers/NormalizationTest.txt
|
273
276
|
- spec/normalizers/NormalizationTestShort.txt
|
274
277
|
- spec/readme_spec.rb
|
275
278
|
- spec/shared/calendar_spec.rb
|
279
|
+
- spec/shared/code_point_spec.rb
|
276
280
|
- spec/shared/currencies_spec.rb
|
277
281
|
- spec/shared/languages_spec.rb
|
278
282
|
- spec/shared/numbers_spec.rb
|
279
283
|
- spec/shared/resources_spec.rb
|
280
|
-
- spec/shared/unicode_data_spec.rb
|
281
284
|
- spec/spec_helper.rb
|
282
285
|
- spec/tokenizers/base_spec.rb
|
283
286
|
- spec/tokenizers/calendars/date_tokenizer_spec.rb
|
@@ -288,7 +291,7 @@ files:
|
|
288
291
|
- spec/tokenizers/numbers/number_tokenizer_spec.rb
|
289
292
|
- spec/tokenizers/token_spec.rb
|
290
293
|
- spec/twitter_cldr_spec.rb
|
291
|
-
- spec/utils/
|
294
|
+
- spec/utils/code_points_spec.rb
|
292
295
|
- spec/utils/interpolation_spec.rb
|
293
296
|
- spec/utils_spec.rb
|
294
297
|
- resources/locales/ar/calendars.yml
|
@@ -453,6 +456,7 @@ files:
|
|
453
456
|
- resources/unicode_data/bengali.yml
|
454
457
|
- resources/unicode_data/block_elements.yml
|
455
458
|
- resources/unicode_data/blocks.yml
|
459
|
+
- resources/unicode_data/blocks_hangul.yml
|
456
460
|
- resources/unicode_data/bopomofo.yml
|
457
461
|
- resources/unicode_data/bopomofo_extended.yml
|
458
462
|
- resources/unicode_data/box_drawing.yml
|
@@ -482,6 +486,7 @@ files:
|
|
482
486
|
- resources/unicode_data/combining_diacritical_marks_supplement.yml
|
483
487
|
- resources/unicode_data/combining_half_marks.yml
|
484
488
|
- resources/unicode_data/common_indic_number_forms.yml
|
489
|
+
- resources/unicode_data/composition_exclusions.yml
|
485
490
|
- resources/unicode_data/control_pictures.yml
|
486
491
|
- resources/unicode_data/coptic.yml
|
487
492
|
- resources/unicode_data/counting_rod_numerals.yml
|
@@ -493,6 +498,7 @@ files:
|
|
493
498
|
- resources/unicode_data/cyrillic_extended_a.yml
|
494
499
|
- resources/unicode_data/cyrillic_extended_b.yml
|
495
500
|
- resources/unicode_data/cyrillic_supplement.yml
|
501
|
+
- resources/unicode_data/decomposition_map.yml
|
496
502
|
- resources/unicode_data/deseret.yml
|
497
503
|
- resources/unicode_data/devanagari.yml
|
498
504
|
- resources/unicode_data/devanagari_extended.yml
|