normalize_country 0.2.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,120 @@
1
+ module NormalizeCountry
2
+ class Tokenizer # :nodoc: all
3
+ # Need "-" to match emoji names
4
+ # Need regex to match emojis
5
+ PATTERN = /[[:word:]]+/
6
+
7
+ def initialize(s)
8
+ @scanner = StringScanner.new(s)
9
+ end
10
+
11
+ def scan
12
+ @scanner.scan_until(PATTERN)
13
+ @scanner.matched
14
+ end
15
+
16
+ def peek
17
+ match = scan
18
+ @scanner.unscan if match
19
+ match
20
+ end
21
+
22
+ def end?
23
+ peek.nil?
24
+ end
25
+ end
26
+
27
+ class Scanner
28
+ def initialize(options = nil)
29
+ options ||= {}
30
+
31
+ @to = options[:to] || NormalizeCountry.to
32
+ @table = lookup_table(options[:from] || NormalizeCountry.formats) # need aliases!!!
33
+ end
34
+
35
+ def convert(text)
36
+ s = Tokenizer.new(text)
37
+
38
+ matches = []
39
+ stack = []
40
+ match_position = @table
41
+
42
+ while !s.end?
43
+ word = s.peek.downcase
44
+ if !match_position[word]
45
+ s.scan
46
+ next
47
+ end
48
+
49
+ stack << s.scan
50
+ alternatives = match_position[stack[-1].downcase]
51
+
52
+ peek = s.peek
53
+ if alternatives && peek && alternatives[peek.downcase]
54
+ match_position = alternatives
55
+ next
56
+ end
57
+
58
+ if match_position[stack[-1].downcase][:match]
59
+ text = stack.join(" ")
60
+ matches << { :matched => text, :converted => NormalizeCountry(text, :to => @to) }
61
+
62
+ end
63
+
64
+ stack.clear
65
+ match_position = @table
66
+ end
67
+
68
+ matches
69
+ end
70
+
71
+ private
72
+
73
+ def lookup_table(from_formats)
74
+ table = {}
75
+
76
+ NormalizeCountry::Countries.values.uniq.each_with_object(table) do |country, o|
77
+ # country.name # eq all
78
+ # no way to get aliases
79
+ from_formats.each do |format|
80
+ name = country[format]
81
+ next unless name
82
+
83
+ head = o
84
+ parts = name.split(/[[:space:]]+/)
85
+ parts.each_with_index do |word, i|
86
+ # options[:case_sensitive_codes] = [x,y] # or true
87
+ # options[:case_sensitive_formats] = [x,y]
88
+ # options[:case_sensitive_formats] = true # alpha2, alpha3, fifa, ioc
89
+ word.downcase! #unless parts.size == 1 && ABBRV_FORMATS.include?(format)
90
+
91
+ # if head[word]
92
+ # if i == parts.size - 1
93
+ # head[word][:match] = true
94
+ # else
95
+ # head = head[word]
96
+ # end
97
+ # else
98
+ # head[word] = {}
99
+ # if i == parts.size - 1
100
+ # head[word][:match] = true
101
+ # else
102
+ # head = head[word]
103
+ # end
104
+ # end
105
+
106
+ head[word] = {} unless head[word]
107
+
108
+ if i == parts.size - 1
109
+ head[word][:match] = true
110
+ else
111
+ head = head[word]
112
+ end
113
+ end
114
+ end
115
+
116
+ o
117
+ end
118
+ end
119
+ end
120
+ end
@@ -4,7 +4,7 @@ require "minitest/autorun"
4
4
  require "normalize_country"
5
5
 
6
6
  describe NormalizeCountry do
7
- COUNTRY_COUNT = 247
7
+ COUNTRY_COUNT = 249
8
8
 
9
9
  it "normalizes to a country's ISO name by default" do
10
10
  NormalizeCountry.convert("USA").must_equal("United States")
@@ -110,15 +110,9 @@ describe NormalizeCountry do
110
110
 
111
111
  describe ".formats" do
112
112
  it "returns a list of supported formats" do
113
- expected = [:alpha2, :alpha3, :fifa, :ioc, :iso_name, :numeric, :official, :short, :simple, :emoji]
113
+ expected = [:alpha2, :alpha3, :fifa, :ioc, :iso_name, :numeric, :official, :short, :shortcode, :simple, :emoji]
114
114
  formats = NormalizeCountry.formats
115
-
116
- # Ugh, support this in 1.8.7 for a least one version
117
- if Symbol < Comparable
118
- formats.sort.must_equal(expected.sort)
119
- else
120
- formats.sort_by { |f| f.to_s }.must_equal(expected.sort_by { |f| f.to_s })
121
- end
115
+ formats.sort.must_equal(expected.sort)
122
116
  end
123
117
  end
124
118
 
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: normalize_country
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Skye Shaw
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-11-13 00:00:00.000000000 Z
11
+ date: 2021-05-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '0.9'
19
+ version: 12.3.3
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '0.9'
26
+ version: 12.3.3
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: minitest
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -54,6 +54,7 @@ files:
54
54
  - bin/normalize_country
55
55
  - lib/normalize_country.rb
56
56
  - lib/normalize_country/countries/en.yml
57
+ - lib/normalize_country/scanner.rb
57
58
  - spec/normalize_country_spec.rb
58
59
  homepage: http://github.com/sshaw/normalize_country
59
60
  licenses:
@@ -75,7 +76,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
75
76
  version: '0'
76
77
  requirements: []
77
78
  rubyforge_project:
78
- rubygems_version: 2.5.1
79
+ rubygems_version: 2.7.6
79
80
  signing_key:
80
81
  specification_version: 4
81
82
  summary: Convert country names and codes to a standard