twitter_cldr 1.3.6 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,64 +0,0 @@
1
- # encoding: UTF-8
2
-
3
- # Copyright 2012 Twitter, Inc
4
- # http://www.apache.org/licenses/LICENSE-2.0
5
-
6
- module TwitterCldr
7
- module Shared
8
- module UnicodeData
9
-
10
- class << self
11
-
12
- def for_code_point(code_point)
13
- blocks = TwitterCldr.get_resource(:unicode_data, :blocks)
14
-
15
- #Find the target block
16
- target = blocks.find do |block_name, range|
17
- range.include? code_point.to_i(16)
18
- end
19
-
20
- if target
21
- block_data = TwitterCldr.get_resource(:unicode_data, target.first)
22
- code_point_data = block_data.fetch(code_point.to_sym) { |code_point_sym| get_range_start(code_point_sym, block_data) }
23
- Attributes.new(*code_point_data) if code_point_data
24
- end
25
- end
26
-
27
- private
28
-
29
- # Check if block constitutes a range. The code point beginning a range will have a name enclosed in <>, ending with 'First'
30
- # eg: <CJK Ideograph Extension A, First>
31
- # http://unicode.org/reports/tr44/#Code_Point_Ranges
32
- def get_range_start(code_point, block_data)
33
- start_code_point = block_data.keys.sort_by { |key| key.to_s.to_i(16) }.first
34
- start_data = block_data[start_code_point].clone
35
- if start_data[1] =~ /<.*, First>/
36
- start_data[0] = code_point.to_s
37
- start_data[1] = start_data[1].sub(', First', '')
38
- start_data
39
- end
40
- end
41
-
42
- end
43
-
44
- Attributes = Struct.new(
45
- :code_point,
46
- :name,
47
- :category,
48
- :combining_class,
49
- :bidi_class,
50
- :decomposition,
51
- :digit_value,
52
- :non_decimal_digit_value,
53
- :numeric_value,
54
- :bidi_mirrored,
55
- :unicode1_name,
56
- :iso_comment,
57
- :simple_uppercase_map,
58
- :simple_lowercase_map,
59
- :simple_titlecase_map
60
- )
61
-
62
- end
63
- end
64
- end
@@ -1,21 +0,0 @@
1
- # encoding: UTF-8
2
-
3
- # Copyright 2012 Twitter, Inc
4
- # http://www.apache.org/licenses/LICENSE-2.0
5
-
6
- require 'spec_helper'
7
-
8
- include TwitterCldr::Normalizers
9
-
10
- describe NFD do
11
-
12
- describe "#normalize" do
13
- NFD.normalize("庠摪饢鼢豦樄澸脧鱵礩翜艰").should == "庠摪饢鼢豦樄澸脧鱵礩翜艰"
14
- NFD.normalize("䷙䷿").should == "䷙䷿"
15
- NFD.normalize("ᎿᎲᎪᏨᎨᏪᎧᎵᏥ").should == "ᎿᎲᎪᏨᎨᏪᎧᎵᏥ"
16
- NFD.normalize("ᆙᅓᆼᄋᇶ").should == "ᆙᅓᆼᄋᇶ"
17
- NFD.normalize("…‾⁋
⁒‒′‾⁖").should == "…‾⁋
⁒‒′‾⁖"
18
- NFD.normalize("ⶾⷕⶱⷀ").should == "ⶾⷕⶱⷀ"
19
- end
20
-
21
- end
@@ -1,51 +0,0 @@
1
- # encoding: UTF-8
2
-
3
- # Copyright 2012 Twitter, Inc
4
- # http://www.apache.org/licenses/LICENSE-2.0
5
-
6
- require 'spec_helper'
7
-
8
- include TwitterCldr::Shared
9
-
10
- describe UnicodeData do
11
- describe "#for_code_point" do
12
- it "should retrieve information for any valid code point" do
13
- data = UnicodeData.for_code_point('0301')
14
- data.should be_a(Struct)
15
- data.length.should == 15
16
- end
17
-
18
- it "should return nil for invalid code points" do
19
- UnicodeData.for_code_point('abcd').should be_nil
20
- UnicodeData.for_code_point('FFFFFFF').should be_nil
21
- UnicodeData.for_code_point('uytukhil123').should be_nil
22
- end
23
-
24
- it "fetches valid information for the specified code point" do
25
- test_data = {
26
- '17D1' => ['17D1','KHMER SIGN VIRIAM','Mn','0','NSM',"","","","",'N',"","","","",""],
27
- 'FE91' => ['FE91','ARABIC LETTER BEH INITIAL FORM','Lo','0','AL','<initial> 0628',"","","",'N','GLYPH FOR INITIAL ARABIC BAA',"","","",""],
28
- '24B5' => ['24B5','PARENTHESIZED LATIN SMALL LETTER Z','So','0','L','<compat> 0028 007A 0029',"","","",'N',"","","","",""],
29
- '2128' => ['2128','BLACK-LETTER CAPITAL Z','Lu','0','L','<font> 005A',"","","",'N','BLACK-LETTER Z',"","","",""],
30
- '1F241'=> ['1F241','TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-4E09','So','0','L','<compat> 3014 4E09 3015',"","","",'N',"","","","",""]
31
- }
32
- test_data.each_pair do |code_point, data|
33
- UnicodeData.for_code_point(code_point).values.should == data
34
- end
35
- end
36
-
37
- it "fetches valid information for a code point within a range" do
38
- test_data = {
39
- '4E11' => ["4E11","<CJK Ideograph>","Lo","0","L","","","","","N","","","","",""],
40
- 'AC55' => ["AC55","<Hangul Syllable>","Lo","0","L","","","","","N","","","","",""],
41
- 'D7A1' => ["D7A1","<Hangul Syllable>","Lo","0","L","","","","","N","","","","",""],
42
- 'DAAA' => ["DAAA","<Non Private Use High Surrogate>","Cs","0","L","","","","","N","","","","",""],
43
- 'F8FE' => ["F8FE","<Private Use>","Co","0","L","","","","","N","","","","",""]
44
- }
45
-
46
- test_data.each_pair do |code_point, data|
47
- UnicodeData.for_code_point(code_point).values.should == data
48
- end
49
- end
50
- end
51
- end