normalize_country 0.2.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,120 @@
1
+ module NormalizeCountry
2
+ class Tokenizer # :nodoc: all
3
+ # Need "-" to match emoji names
4
+ # Need regex to match emojis
5
+ PATTERN = /[[:word:]]+/
6
+
7
+ def initialize(s)
8
+ @scanner = StringScanner.new(s)
9
+ end
10
+
11
+ def scan
12
+ @scanner.scan_until(PATTERN)
13
+ @scanner.matched
14
+ end
15
+
16
+ def peek
17
+ match = scan
18
+ @scanner.unscan if match
19
+ match
20
+ end
21
+
22
+ def end?
23
+ peek.nil?
24
+ end
25
+ end
26
+
27
+ class Scanner
28
+ def initialize(options = nil)
29
+ options ||= {}
30
+
31
+ @to = options[:to] || NormalizeCountry.to
32
+ @table = lookup_table(options[:from] || NormalizeCountry.formats) # need aliases!!!
33
+ end
34
+
35
+ def convert(text)
36
+ s = Tokenizer.new(text)
37
+
38
+ matches = []
39
+ stack = []
40
+ match_position = @table
41
+
42
+ while !s.end?
43
+ word = s.peek.downcase
44
+ if !match_position[word]
45
+ s.scan
46
+ next
47
+ end
48
+
49
+ stack << s.scan
50
+ alternatives = match_position[stack[-1].downcase]
51
+
52
+ peek = s.peek
53
+ if alternatives && peek && alternatives[peek.downcase]
54
+ match_position = alternatives
55
+ next
56
+ end
57
+
58
+ if match_position[stack[-1].downcase][:match]
59
+ text = stack.join(" ")
60
+ matches << { :matched => text, :converted => NormalizeCountry(text, :to => @to) }
61
+
62
+ end
63
+
64
+ stack.clear
65
+ match_position = @table
66
+ end
67
+
68
+ matches
69
+ end
70
+
71
+ private
72
+
73
+ def lookup_table(from_formats)
74
+ table = {}
75
+
76
+ NormalizeCountry::Countries.values.uniq.each_with_object(table) do |country, o|
77
+ # country.name # eq all
78
+ # no way to get aliases
79
+ from_formats.each do |format|
80
+ name = country[format]
81
+ next unless name
82
+
83
+ head = o
84
+ parts = name.split(/[[:space:]]+/)
85
+ parts.each_with_index do |word, i|
86
+ # options[:case_sensitive_codes] = [x,y] # or true
87
+ # options[:case_sensitive_formats] = [x,y]
88
+ # options[:case_sensitive_formats] = true # alpha2, alpha3, fifa, ioc
89
+ word.downcase! #unless parts.size == 1 && ABBRV_FORMATS.include?(format)
90
+
91
+ # if head[word]
92
+ # if i == parts.size - 1
93
+ # head[word][:match] = true
94
+ # else
95
+ # head = head[word]
96
+ # end
97
+ # else
98
+ # head[word] = {}
99
+ # if i == parts.size - 1
100
+ # head[word][:match] = true
101
+ # else
102
+ # head = head[word]
103
+ # end
104
+ # end
105
+
106
+ head[word] = {} unless head[word]
107
+
108
+ if i == parts.size - 1
109
+ head[word][:match] = true
110
+ else
111
+ head = head[word]
112
+ end
113
+ end
114
+ end
115
+
116
+ o
117
+ end
118
+ end
119
+ end
120
+ end
@@ -4,7 +4,7 @@ require "minitest/autorun"
4
4
  require "normalize_country"
5
5
 
6
6
  describe NormalizeCountry do
7
- COUNTRY_COUNT = 247
7
+ COUNTRY_COUNT = 249
8
8
 
9
9
  it "normalizes to a country's ISO name by default" do
10
10
  NormalizeCountry.convert("USA").must_equal("United States")
@@ -110,15 +110,9 @@ describe NormalizeCountry do
110
110
 
111
111
  describe ".formats" do
112
112
  it "returns a list of supported formats" do
113
- expected = [:alpha2, :alpha3, :fifa, :ioc, :iso_name, :numeric, :official, :short, :simple, :emoji]
113
+ expected = [:alpha2, :alpha3, :fifa, :ioc, :iso_name, :numeric, :official, :short, :shortcode, :simple, :emoji]
114
114
  formats = NormalizeCountry.formats
115
-
116
- # Ugh, support this in 1.8.7 for a least one version
117
- if Symbol < Comparable
118
- formats.sort.must_equal(expected.sort)
119
- else
120
- formats.sort_by { |f| f.to_s }.must_equal(expected.sort_by { |f| f.to_s })
121
- end
115
+ formats.sort.must_equal(expected.sort)
122
116
  end
123
117
  end
124
118
 
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: normalize_country
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Skye Shaw
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-11-13 00:00:00.000000000 Z
11
+ date: 2021-05-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '0.9'
19
+ version: 12.3.3
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '0.9'
26
+ version: 12.3.3
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: minitest
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -54,6 +54,7 @@ files:
54
54
  - bin/normalize_country
55
55
  - lib/normalize_country.rb
56
56
  - lib/normalize_country/countries/en.yml
57
+ - lib/normalize_country/scanner.rb
57
58
  - spec/normalize_country_spec.rb
58
59
  homepage: http://github.com/sshaw/normalize_country
59
60
  licenses:
@@ -75,7 +76,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
75
76
  version: '0'
76
77
  requirements: []
77
78
  rubyforge_project:
78
- rubygems_version: 2.5.1
79
+ rubygems_version: 2.7.6
79
80
  signing_key:
80
81
  specification_version: 4
81
82
  summary: Convert country names and codes to a standard