normalize_country 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.rdoc +6 -2
- data/lib/normalize_country/countries/en.yml +2 -0
- data/lib/normalize_country.rb +2 -2
- metadata +6 -8
- data/lib/normalize_country/scanner.rb +0 -120
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 793c9aaa61fc028c685a5a27681a670e12675e3f2d5db2bb4343167da1d24d94
|
4
|
+
data.tar.gz: 3a8c27532a071ec3baf5114a0ff1c35c643b85944fc45f0cb69586261eb1616c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 800a8cd65c7ed894a1af4950e03e795a86a2debbec7641aba2d676d6ebd99061a28e0672590d6ce087078813ac365ef1eff3f4a19adbb925cc8d7b8022a7fd98
|
7
|
+
data.tar.gz: ec34c172d11d352a1107254f9e9382a8a6e50538cf6cdf4b9a354cb751d6f8cce1b422900182dc530ff051fd6f9e08b2093ba5f5571c43a8c0b1b765222840f6
|
data/README.rdoc
CHANGED
@@ -2,8 +2,7 @@
|
|
2
2
|
|
3
3
|
Convert country names and codes to a standard.
|
4
4
|
|
5
|
-
{<img src="https://
|
6
|
-
{<img src="https://codeclimate.com/github/sshaw/normalize_country.svg" />}[https://codeclimate.com/github/sshaw/normalize_country]
|
5
|
+
{<img src="https://github.com/sshaw/normalize_country/actions/workflows/ci.yml/badge.svg" alt="Build Status" />}[https://github.com/sshaw/normalize_country]
|
7
6
|
|
8
7
|
=== Overview
|
9
8
|
|
@@ -59,6 +58,11 @@ will convert to/from the following:
|
|
59
58
|
|
60
59
|
A list of valid formats can be obtained by calling +NormalizeCountry.formats+.
|
61
60
|
|
61
|
+
=== Custom Database
|
62
|
+
|
63
|
+
Set the +NORMALIZE_COUNTRY_DB+ environment variable.
|
64
|
+
See <code>lib/normalize_country/countries/en.yml</code> for the expected format.
|
65
|
+
|
62
66
|
=== Obtaining an Array or Hash
|
63
67
|
|
64
68
|
NormalizeCountry.to_a # Defaults to NormalizeCountry.to
|
data/lib/normalize_country.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
require "yaml"
|
4
4
|
|
5
5
|
module NormalizeCountry
|
6
|
-
VERSION = "0.3.
|
6
|
+
VERSION = "0.3.3"
|
7
7
|
Countries = {}
|
8
8
|
|
9
9
|
class << self
|
@@ -78,7 +78,7 @@ module NormalizeCountry
|
|
78
78
|
end
|
79
79
|
end
|
80
80
|
|
81
|
-
path = File.join(File.dirname(__FILE__), "normalize_country", "countries", "en.yml")
|
81
|
+
path = ENV["NORMALIZE_COUNTRY_DB"] || File.join(File.dirname(__FILE__), "normalize_country", "countries", "en.yml")
|
82
82
|
data = YAML.load_file(path)
|
83
83
|
data.values.each do |mapping|
|
84
84
|
country = Country.new(mapping)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: normalize_country
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Skye Shaw
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-02-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -54,13 +54,12 @@ files:
|
|
54
54
|
- bin/normalize_country
|
55
55
|
- lib/normalize_country.rb
|
56
56
|
- lib/normalize_country/countries/en.yml
|
57
|
-
- lib/normalize_country/scanner.rb
|
58
57
|
- spec/normalize_country_spec.rb
|
59
58
|
homepage: http://github.com/sshaw/normalize_country
|
60
59
|
licenses:
|
61
60
|
- MIT
|
62
61
|
metadata: {}
|
63
|
-
post_install_message:
|
62
|
+
post_install_message:
|
64
63
|
rdoc_options: []
|
65
64
|
require_paths:
|
66
65
|
- lib
|
@@ -75,9 +74,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
75
74
|
- !ruby/object:Gem::Version
|
76
75
|
version: '0'
|
77
76
|
requirements: []
|
78
|
-
|
79
|
-
|
80
|
-
signing_key:
|
77
|
+
rubygems_version: 3.5.16
|
78
|
+
signing_key:
|
81
79
|
specification_version: 4
|
82
80
|
summary: Convert country names and codes to a standard
|
83
81
|
test_files:
|
@@ -1,120 +0,0 @@
|
|
1
|
-
module NormalizeCountry
|
2
|
-
class Tokenizer # :nodoc: all
|
3
|
-
# Need "-" to match emoji names
|
4
|
-
# Need regex to match emojis
|
5
|
-
PATTERN = /[[:word:]]+/
|
6
|
-
|
7
|
-
def initialize(s)
|
8
|
-
@scanner = StringScanner.new(s)
|
9
|
-
end
|
10
|
-
|
11
|
-
def scan
|
12
|
-
@scanner.scan_until(PATTERN)
|
13
|
-
@scanner.matched
|
14
|
-
end
|
15
|
-
|
16
|
-
def peek
|
17
|
-
match = scan
|
18
|
-
@scanner.unscan if match
|
19
|
-
match
|
20
|
-
end
|
21
|
-
|
22
|
-
def end?
|
23
|
-
peek.nil?
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
class Scanner
|
28
|
-
def initialize(options = nil)
|
29
|
-
options ||= {}
|
30
|
-
|
31
|
-
@to = options[:to] || NormalizeCountry.to
|
32
|
-
@table = lookup_table(options[:from] || NormalizeCountry.formats) # need aliases!!!
|
33
|
-
end
|
34
|
-
|
35
|
-
def convert(text)
|
36
|
-
s = Tokenizer.new(text)
|
37
|
-
|
38
|
-
matches = []
|
39
|
-
stack = []
|
40
|
-
match_position = @table
|
41
|
-
|
42
|
-
while !s.end?
|
43
|
-
word = s.peek.downcase
|
44
|
-
if !match_position[word]
|
45
|
-
s.scan
|
46
|
-
next
|
47
|
-
end
|
48
|
-
|
49
|
-
stack << s.scan
|
50
|
-
alternatives = match_position[stack[-1].downcase]
|
51
|
-
|
52
|
-
peek = s.peek
|
53
|
-
if alternatives && peek && alternatives[peek.downcase]
|
54
|
-
match_position = alternatives
|
55
|
-
next
|
56
|
-
end
|
57
|
-
|
58
|
-
if match_position[stack[-1].downcase][:match]
|
59
|
-
text = stack.join(" ")
|
60
|
-
matches << { :matched => text, :converted => NormalizeCountry(text, :to => @to) }
|
61
|
-
|
62
|
-
end
|
63
|
-
|
64
|
-
stack.clear
|
65
|
-
match_position = @table
|
66
|
-
end
|
67
|
-
|
68
|
-
matches
|
69
|
-
end
|
70
|
-
|
71
|
-
private
|
72
|
-
|
73
|
-
def lookup_table(from_formats)
|
74
|
-
table = {}
|
75
|
-
|
76
|
-
NormalizeCountry::Countries.values.uniq.each_with_object(table) do |country, o|
|
77
|
-
# country.name # eq all
|
78
|
-
# no way to get aliases
|
79
|
-
from_formats.each do |format|
|
80
|
-
name = country[format]
|
81
|
-
next unless name
|
82
|
-
|
83
|
-
head = o
|
84
|
-
parts = name.split(/[[:space:]]+/)
|
85
|
-
parts.each_with_index do |word, i|
|
86
|
-
# options[:case_sensitive_codes] = [x,y] # or true
|
87
|
-
# options[:case_sensitive_formats] = [x,y]
|
88
|
-
# options[:case_sensitive_formats] = true # alpha2, alpha3, fifa, ioc
|
89
|
-
word.downcase! #unless parts.size == 1 && ABBRV_FORMATS.include?(format)
|
90
|
-
|
91
|
-
# if head[word]
|
92
|
-
# if i == parts.size - 1
|
93
|
-
# head[word][:match] = true
|
94
|
-
# else
|
95
|
-
# head = head[word]
|
96
|
-
# end
|
97
|
-
# else
|
98
|
-
# head[word] = {}
|
99
|
-
# if i == parts.size - 1
|
100
|
-
# head[word][:match] = true
|
101
|
-
# else
|
102
|
-
# head = head[word]
|
103
|
-
# end
|
104
|
-
# end
|
105
|
-
|
106
|
-
head[word] = {} unless head[word]
|
107
|
-
|
108
|
-
if i == parts.size - 1
|
109
|
-
head[word][:match] = true
|
110
|
-
else
|
111
|
-
head = head[word]
|
112
|
-
end
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
o
|
117
|
-
end
|
118
|
-
end
|
119
|
-
end
|
120
|
-
end
|