normalize_country 0.2.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.rdoc +6 -0
- data/lib/normalize_country.rb +5 -3
- data/lib/normalize_country/countries/en.yml +341 -11
- data/lib/normalize_country/scanner.rb +120 -0
- data/spec/normalize_country_spec.rb +3 -9
- metadata +8 -7
@@ -0,0 +1,120 @@
|
|
1
|
+
module NormalizeCountry
|
2
|
+
class Tokenizer # :nodoc: all
|
3
|
+
# Need "-" to match emoji names
|
4
|
+
# Need regex to match emojis
|
5
|
+
PATTERN = /[[:word:]]+/
|
6
|
+
|
7
|
+
def initialize(s)
|
8
|
+
@scanner = StringScanner.new(s)
|
9
|
+
end
|
10
|
+
|
11
|
+
def scan
|
12
|
+
@scanner.scan_until(PATTERN)
|
13
|
+
@scanner.matched
|
14
|
+
end
|
15
|
+
|
16
|
+
def peek
|
17
|
+
match = scan
|
18
|
+
@scanner.unscan if match
|
19
|
+
match
|
20
|
+
end
|
21
|
+
|
22
|
+
def end?
|
23
|
+
peek.nil?
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
class Scanner
|
28
|
+
def initialize(options = nil)
|
29
|
+
options ||= {}
|
30
|
+
|
31
|
+
@to = options[:to] || NormalizeCountry.to
|
32
|
+
@table = lookup_table(options[:from] || NormalizeCountry.formats) # need aliases!!!
|
33
|
+
end
|
34
|
+
|
35
|
+
def convert(text)
|
36
|
+
s = Tokenizer.new(text)
|
37
|
+
|
38
|
+
matches = []
|
39
|
+
stack = []
|
40
|
+
match_position = @table
|
41
|
+
|
42
|
+
while !s.end?
|
43
|
+
word = s.peek.downcase
|
44
|
+
if !match_position[word]
|
45
|
+
s.scan
|
46
|
+
next
|
47
|
+
end
|
48
|
+
|
49
|
+
stack << s.scan
|
50
|
+
alternatives = match_position[stack[-1].downcase]
|
51
|
+
|
52
|
+
peek = s.peek
|
53
|
+
if alternatives && peek && alternatives[peek.downcase]
|
54
|
+
match_position = alternatives
|
55
|
+
next
|
56
|
+
end
|
57
|
+
|
58
|
+
if match_position[stack[-1].downcase][:match]
|
59
|
+
text = stack.join(" ")
|
60
|
+
matches << { :matched => text, :converted => NormalizeCountry(text, :to => @to) }
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
stack.clear
|
65
|
+
match_position = @table
|
66
|
+
end
|
67
|
+
|
68
|
+
matches
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def lookup_table(from_formats)
|
74
|
+
table = {}
|
75
|
+
|
76
|
+
NormalizeCountry::Countries.values.uniq.each_with_object(table) do |country, o|
|
77
|
+
# country.name # eq all
|
78
|
+
# no way to get aliases
|
79
|
+
from_formats.each do |format|
|
80
|
+
name = country[format]
|
81
|
+
next unless name
|
82
|
+
|
83
|
+
head = o
|
84
|
+
parts = name.split(/[[:space:]]+/)
|
85
|
+
parts.each_with_index do |word, i|
|
86
|
+
# options[:case_sensitive_codes] = [x,y] # or true
|
87
|
+
# options[:case_sensitive_formats] = [x,y]
|
88
|
+
# options[:case_sensitive_formats] = true # alpha2, alpha3, fifa, ioc
|
89
|
+
word.downcase! #unless parts.size == 1 && ABBRV_FORMATS.include?(format)
|
90
|
+
|
91
|
+
# if head[word]
|
92
|
+
# if i == parts.size - 1
|
93
|
+
# head[word][:match] = true
|
94
|
+
# else
|
95
|
+
# head = head[word]
|
96
|
+
# end
|
97
|
+
# else
|
98
|
+
# head[word] = {}
|
99
|
+
# if i == parts.size - 1
|
100
|
+
# head[word][:match] = true
|
101
|
+
# else
|
102
|
+
# head = head[word]
|
103
|
+
# end
|
104
|
+
# end
|
105
|
+
|
106
|
+
head[word] = {} unless head[word]
|
107
|
+
|
108
|
+
if i == parts.size - 1
|
109
|
+
head[word][:match] = true
|
110
|
+
else
|
111
|
+
head = head[word]
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
o
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -4,7 +4,7 @@ require "minitest/autorun"
|
|
4
4
|
require "normalize_country"
|
5
5
|
|
6
6
|
describe NormalizeCountry do
|
7
|
-
COUNTRY_COUNT =
|
7
|
+
COUNTRY_COUNT = 249
|
8
8
|
|
9
9
|
it "normalizes to a country's ISO name by default" do
|
10
10
|
NormalizeCountry.convert("USA").must_equal("United States")
|
@@ -110,15 +110,9 @@ describe NormalizeCountry do
|
|
110
110
|
|
111
111
|
describe ".formats" do
|
112
112
|
it "returns a list of supported formats" do
|
113
|
-
expected = [:alpha2, :alpha3, :fifa, :ioc, :iso_name, :numeric, :official, :short, :simple, :emoji]
|
113
|
+
expected = [:alpha2, :alpha3, :fifa, :ioc, :iso_name, :numeric, :official, :short, :shortcode, :simple, :emoji]
|
114
114
|
formats = NormalizeCountry.formats
|
115
|
-
|
116
|
-
# Ugh, support this in 1.8.7 for a least one version
|
117
|
-
if Symbol < Comparable
|
118
|
-
formats.sort.must_equal(expected.sort)
|
119
|
-
else
|
120
|
-
formats.sort_by { |f| f.to_s }.must_equal(expected.sort_by { |f| f.to_s })
|
121
|
-
end
|
115
|
+
formats.sort.must_equal(expected.sort)
|
122
116
|
end
|
123
117
|
end
|
124
118
|
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: normalize_country
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Skye Shaw
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-05-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 12.3.3
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 12.3.3
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: minitest
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -54,6 +54,7 @@ files:
|
|
54
54
|
- bin/normalize_country
|
55
55
|
- lib/normalize_country.rb
|
56
56
|
- lib/normalize_country/countries/en.yml
|
57
|
+
- lib/normalize_country/scanner.rb
|
57
58
|
- spec/normalize_country_spec.rb
|
58
59
|
homepage: http://github.com/sshaw/normalize_country
|
59
60
|
licenses:
|
@@ -75,7 +76,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
75
76
|
version: '0'
|
76
77
|
requirements: []
|
77
78
|
rubyforge_project:
|
78
|
-
rubygems_version: 2.
|
79
|
+
rubygems_version: 2.7.6
|
79
80
|
signing_key:
|
80
81
|
specification_version: 4
|
81
82
|
summary: Convert country names and codes to a standard
|