twitter_cldr 5.1.0 → 5.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +5 -5
  3. data/lib/twitter_cldr.rb +1 -0
  4. data/lib/twitter_cldr/resources.rb +2 -8
  5. data/lib/twitter_cldr/resources/loader.rb +6 -4
  6. data/lib/twitter_cldr/resources/locales_resources_importer.rb +0 -1
  7. data/lib/twitter_cldr/resources/segment_rules_importer.rb +202 -0
  8. data/lib/twitter_cldr/resources/segment_tests_importer.rb +3 -1
  9. data/lib/twitter_cldr/segmentation.rb +10 -8
  10. data/lib/twitter_cldr/segmentation/break_iterator.rb +15 -11
  11. data/lib/twitter_cldr/segmentation/category_table.rb +56 -0
  12. data/lib/twitter_cldr/segmentation/cursor.rb +10 -5
  13. data/lib/twitter_cldr/segmentation/metadata.rb +20 -0
  14. data/lib/twitter_cldr/segmentation/null_suppressions.rb +18 -0
  15. data/lib/twitter_cldr/segmentation/rule_set.rb +23 -79
  16. data/lib/twitter_cldr/segmentation/state_machine.rb +125 -0
  17. data/lib/twitter_cldr/segmentation/state_table.rb +46 -0
  18. data/lib/twitter_cldr/segmentation/status_table.rb +30 -0
  19. data/lib/twitter_cldr/segmentation/suppressions.rb +79 -0
  20. data/lib/twitter_cldr/shared/caser.rb +1 -1
  21. data/lib/twitter_cldr/shared/locale.rb +6 -2
  22. data/lib/twitter_cldr/version.rb +1 -1
  23. data/resources/shared/segments/rules/el/sentence.yml +723 -0
  24. data/resources/shared/segments/rules/en-US-POSIX/word.yml +527 -0
  25. data/resources/shared/segments/rules/ja/line.yml +964 -0
  26. data/resources/shared/segments/rules/ja/word.yml +527 -0
  27. data/resources/shared/segments/rules/root/grapheme.yml +463 -0
  28. data/resources/shared/segments/rules/root/line.yml +964 -0
  29. data/resources/shared/segments/rules/root/sentence.yml +723 -0
  30. data/resources/shared/segments/rules/root/word.yml +527 -0
  31. data/resources/shared/segments/rules/zh-Hant/line.yml +964 -0
  32. data/resources/shared/segments/rules/zh/line.yml +964 -0
  33. data/resources/shared/segments/suppressions/de/sentence.yml +5 -0
  34. data/resources/shared/segments/suppressions/en/sentence.yml +5 -0
  35. data/resources/shared/segments/suppressions/es/sentence.yml +5 -0
  36. data/resources/shared/segments/suppressions/fr/sentence.yml +5 -0
  37. data/resources/shared/segments/suppressions/it/sentence.yml +5 -0
  38. data/resources/shared/segments/suppressions/pt/sentence.yml +5 -0
  39. data/resources/shared/segments/suppressions/ru/sentence.yml +5 -0
  40. data/resources/shared/segments/tests/grapheme_break_test.yml +603 -0
  41. data/resources/shared/segments/tests/line_break_test.yml +7348 -0
  42. data/resources/uli/segments/de.yml +5 -230
  43. data/resources/uli/segments/en.yml +3 -154
  44. data/resources/uli/segments/es.yml +5 -145
  45. data/resources/uli/segments/fr.yml +5 -68
  46. data/resources/uli/segments/it.yml +3 -48
  47. data/resources/uli/segments/pt.yml +5 -173
  48. data/resources/uli/segments/ru.yml +3 -10
  49. data/spec/segmentation/rule_set_spec.rb +54 -27
  50. metadata +29 -9
  51. data/lib/twitter_cldr/resources/uli.rb +0 -12
  52. data/lib/twitter_cldr/resources/uli/segment_exceptions_importer.rb +0 -59
  53. data/lib/twitter_cldr/segmentation/parser.rb +0 -71
  54. data/lib/twitter_cldr/segmentation/rule.rb +0 -79
  55. data/lib/twitter_cldr/segmentation/rule_set_builder.rb +0 -142
  56. data/resources/shared/segments/segments_root.yml +0 -869
  57. data/spec/segmentation/parser_spec.rb +0 -104
@@ -1,104 +0,0 @@
1
- # encoding: UTF-8
2
-
3
- # Copyright 2012 Twitter, Inc
4
- # http://www.apache.org/licenses/LICENSE-2.0
5
-
6
- require 'spec_helper'
7
-
8
- describe 'Segmentation' do
9
- let(:parser) { TwitterCldr::Segmentation::Parser.new }
10
-
11
- def parse(tokens, options = {})
12
- parser.parse(tokens, options)
13
- end
14
-
15
- describe TwitterCldr::Segmentation::Parser do
16
- let(:symbol_table) do
17
- TwitterCldr::Parsers::SymbolTable.new({
18
- "$FOO" => parser.tokenize_regex("[abc]")
19
- })
20
- end
21
-
22
- describe "#parse" do
23
- it "should parse a rule with a break" do
24
- rule = parse("[a-z] ÷ [0-9]")
25
- expect(rule.left.to_regexp_str).to eq("(?:[\\u{0061}-\\u{007a}])")
26
- expect(rule.right.to_regexp_str).to eq("(?:[\\u{0030}-\\u{0039}])")
27
- expect(rule.boundary_symbol).to eq(:break)
28
- end
29
-
30
- it "should parse a rule with a non-break" do
31
- rule = parse("[a-z] × [0-9]")
32
-
33
- expect(rule.left.to_regexp_str).to eq(
34
- "(?:[\\u{0061}-\\u{007a}])"
35
- )
36
-
37
- expect(rule.right.to_regexp_str).to eq(
38
- "(?:[\\u{0030}-\\u{0039}])"
39
- )
40
-
41
- expect(rule.boundary_symbol).to eq(:no_break)
42
- end
43
-
44
- it "should parse a rule containing a variable" do
45
- rule = parse("$FOO × bar", symbol_table: symbol_table)
46
-
47
- expect(rule.left.to_regexp_str).to eq(
48
- "(?:[\\u{0061}-\\u{0063}])"
49
- )
50
-
51
- expect(rule.right.to_regexp_str).to eq(
52
- "(?:\\u{0062})(?:\\u{0061})(?:\\u{0072})"
53
- )
54
-
55
- expect(rule.boundary_symbol).to eq(:no_break)
56
- end
57
- end
58
- end
59
-
60
- describe TwitterCldr::Segmentation::BreakRule do
61
- describe "#match" do
62
- let(:rule) { parse("[a-z] ÷ [0-9]") }
63
-
64
- it "rule should be the right type" do
65
- expect(rule).to be_a(TwitterCldr::Segmentation::BreakRule)
66
- end
67
-
68
- it "should match and return the right offset and text" do
69
- cursor = TwitterCldr::Segmentation::Cursor.new("c7")
70
- match = rule.match(cursor)
71
- expect(match.boundary_offset).to eq([0, 2])
72
- expect(match.boundary_position).to eq(1)
73
- end
74
-
75
- it "should not match if the input string doesn't contain a matching right- and/or left-hand side" do
76
- expect(rule.match(TwitterCldr::Segmentation::Cursor.new("C7"))).to be_nil
77
- expect(rule.match(TwitterCldr::Segmentation::Cursor.new("cc"))).to be_nil
78
- expect(rule.match(TwitterCldr::Segmentation::Cursor.new("CC"))).to be_nil
79
- end
80
- end
81
- end
82
-
83
- describe TwitterCldr::Segmentation::NoBreakRule do
84
- describe "#match" do
85
- let(:rule) { parse("[a-z] × [0-9]") }
86
-
87
- it "rule should be the right type" do
88
- expect(rule).to be_a(TwitterCldr::Segmentation::NoBreakRule)
89
- end
90
-
91
- it "should match and return the right offset and text" do
92
- match = rule.match(TwitterCldr::Segmentation::Cursor.new("c7"))
93
- expect(match.boundary_offset).to eq([0, 2])
94
- expect(match.boundary_position).to eq(1)
95
- end
96
-
97
- it "should not match if the input string doesn't contain matching text" do
98
- expect(rule.match(TwitterCldr::Segmentation::Cursor.new("C7"))).to be_nil
99
- expect(rule.match(TwitterCldr::Segmentation::Cursor.new("cc"))).to be_nil
100
- expect(rule.match(TwitterCldr::Segmentation::Cursor.new("CC"))).to be_nil
101
- end
102
- end
103
- end
104
- end