normalize_country 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.rdoc +4 -0
- data/lib/normalize_country.rb +2 -2
- data/lib/normalize_country/countries/en.yml +4 -0
- data/lib/normalize_country/scanner.rb +121 -0
- metadata +8 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 6124424b5e181869babd13d860f376b0f50f556897a1d9eb2d81137d12a85971
|
4
|
+
data.tar.gz: d0459074064cdcf9e8c9aa6a20f47a15b750be93b11406faccf281990dc2572a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eb739c3d7b976793f8b479d4e271a704733d85183c3c55ea3b9e768b03c8992a8c5ed5fdc9d08a03e14757ae4ae5df821f0d5a671bda1a04d7acdf2f419583d9
|
7
|
+
data.tar.gz: bfdedc36669f192bd2c2efc85fd51e7b99340b14f614e2f6ffe8f726a0db29989dcc699c0fd4d41bb0f288487b4325e1474e41212604d6268824597754620743
|
data/README.rdoc
CHANGED
@@ -123,3 +123,7 @@ Upon further investigation I've found the following:
|
|
123
123
|
* {country_codes}[https://github.com/SunDawg/country_codes] ISO country names and currency data
|
124
124
|
* {i18n_data}[https://github.com/grosser/i18n_data]: ISO country names in different languages, includes alpha codes
|
125
125
|
* {ModelUN}[https://github.com/uhhuhyeah/model_un]: Similar to this gem but with less support for conversion, it does include US states
|
126
|
+
|
127
|
+
=== See Also
|
128
|
+
|
129
|
+
* {National Colors}[https://github.com/sshaw/national_colors]
|
data/lib/normalize_country.rb
CHANGED
@@ -1897,6 +1897,8 @@ NI:
|
|
1897
1897
|
emoji: "\U0001F1F3\U0001F1EE"
|
1898
1898
|
shortcode: ":flag-ni:"
|
1899
1899
|
NL:
|
1900
|
+
aliases:
|
1901
|
+
- Holland
|
1900
1902
|
alpha2: NL
|
1901
1903
|
alpha3: NLD
|
1902
1904
|
fifa: NED
|
@@ -2627,6 +2629,8 @@ TZ:
|
|
2627
2629
|
emoji: "\U0001F1F9\U0001F1FF"
|
2628
2630
|
shortcode: ":flag-tz:"
|
2629
2631
|
UA:
|
2632
|
+
aliases:
|
2633
|
+
- The Ukraine
|
2630
2634
|
alpha2: UA
|
2631
2635
|
alpha3: UKR
|
2632
2636
|
fifa: UKR
|
@@ -0,0 +1,121 @@
|
|
1
|
+
module NormalizeCountry
|
2
|
+
class Tokenizer
|
3
|
+
# Need numbers to match ISO codes
|
4
|
+
# Need "-" to match emoji names
|
5
|
+
# Need regex to match emojis
|
6
|
+
PATTERN = /[[:word:]]+/
|
7
|
+
|
8
|
+
def initialize(s)
|
9
|
+
@scanner = StringScanner.new(s)
|
10
|
+
end
|
11
|
+
|
12
|
+
def scan
|
13
|
+
@scanner.scan_until(PATTERN)
|
14
|
+
@scanner.matched
|
15
|
+
end
|
16
|
+
|
17
|
+
def peek
|
18
|
+
match = scan
|
19
|
+
@scanner.unscan if match
|
20
|
+
match
|
21
|
+
end
|
22
|
+
|
23
|
+
def end?
|
24
|
+
peek.nil?
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class Scanner
|
29
|
+
def initialize(options = nil)
|
30
|
+
options ||= {}
|
31
|
+
|
32
|
+
@to = options[:to] || NormalizeCountry.to
|
33
|
+
@table = lookup_table(options[:from] || NormalizeCountry.formats) # need aliases!!!
|
34
|
+
end
|
35
|
+
|
36
|
+
def convert(text)
|
37
|
+
s = Tokenizer.new(text)
|
38
|
+
|
39
|
+
matches = []
|
40
|
+
stack = []
|
41
|
+
match_position = @table
|
42
|
+
|
43
|
+
while !s.end?
|
44
|
+
word = s.peek.downcase
|
45
|
+
if !match_position[word]
|
46
|
+
s.scan
|
47
|
+
next
|
48
|
+
end
|
49
|
+
|
50
|
+
stack << s.scan
|
51
|
+
alternatives = match_position[stack[-1].downcase]
|
52
|
+
|
53
|
+
peek = s.peek
|
54
|
+
if alternatives && peek && alternatives[peek.downcase]
|
55
|
+
match_position = alternatives
|
56
|
+
next
|
57
|
+
end
|
58
|
+
|
59
|
+
if match_position[stack[-1].downcase][:match]
|
60
|
+
text = stack.join(" ")
|
61
|
+
matches << { :matched => text, :converted => NormalizeCountry(text, :to => @to) }
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
stack.clear
|
66
|
+
match_position = @table
|
67
|
+
end
|
68
|
+
|
69
|
+
matches
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
def lookup_table(from_formats)
|
75
|
+
table = {}
|
76
|
+
|
77
|
+
NormalizeCountry::Countries.values.uniq.each_with_object(table) do |country, o|
|
78
|
+
# country.name # eq all
|
79
|
+
# no way to get aliases
|
80
|
+
from_formats.each do |format|
|
81
|
+
name = country[format]
|
82
|
+
next unless name
|
83
|
+
|
84
|
+
head = o
|
85
|
+
parts = name.split(/[[:space:]]+/)
|
86
|
+
parts.each_with_index do |word, i|
|
87
|
+
# options[:case_sensitive_codes] = [x,y] # or true
|
88
|
+
# options[:case_sensitive_formats] = [x,y]
|
89
|
+
# options[:case_sensitive_formats] = true # alpha2, alpha3, fifa, ioc
|
90
|
+
word.downcase! #unless parts.size == 1 && ABBRV_FORMATS.include?(format)
|
91
|
+
|
92
|
+
# if head[word]
|
93
|
+
# if i == parts.size - 1
|
94
|
+
# head[word][:match] = true
|
95
|
+
# else
|
96
|
+
# head = head[word]
|
97
|
+
# end
|
98
|
+
# else
|
99
|
+
# head[word] = {}
|
100
|
+
# if i == parts.size - 1
|
101
|
+
# head[word][:match] = true
|
102
|
+
# else
|
103
|
+
# head = head[word]
|
104
|
+
# end
|
105
|
+
# end
|
106
|
+
|
107
|
+
head[word] = {} unless head[word]
|
108
|
+
|
109
|
+
if i == parts.size - 1
|
110
|
+
head[word][:match] = true
|
111
|
+
else
|
112
|
+
head = head[word]
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
o
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: normalize_country
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Skye Shaw
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 12.3.3
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 12.3.3
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: minitest
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -54,6 +54,7 @@ files:
|
|
54
54
|
- bin/normalize_country
|
55
55
|
- lib/normalize_country.rb
|
56
56
|
- lib/normalize_country/countries/en.yml
|
57
|
+
- lib/normalize_country/scanner.rb
|
57
58
|
- spec/normalize_country_spec.rb
|
58
59
|
homepage: http://github.com/sshaw/normalize_country
|
59
60
|
licenses:
|
@@ -75,7 +76,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
75
76
|
version: '0'
|
76
77
|
requirements: []
|
77
78
|
rubyforge_project:
|
78
|
-
rubygems_version: 2.6
|
79
|
+
rubygems_version: 2.7.6
|
79
80
|
signing_key:
|
80
81
|
specification_version: 4
|
81
82
|
summary: Convert country names and codes to a standard
|