normalize_country 0.2.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,121 @@
1
+ module NormalizeCountry
2
+ class Tokenizer
3
+ # Need numbers to match ISO codes
4
+ # Need "-" to match emoji names
5
+ # Need regex to match emojis
6
+ PATTERN = /[[:word:]]+/
7
+
8
+ def initialize(s)
9
+ @scanner = StringScanner.new(s)
10
+ end
11
+
12
+ def scan
13
+ @scanner.scan_until(PATTERN)
14
+ @scanner.matched
15
+ end
16
+
17
+ def peek
18
+ match = scan
19
+ @scanner.unscan if match
20
+ match
21
+ end
22
+
23
+ def end?
24
+ peek.nil?
25
+ end
26
+ end
27
+
28
+ class Scanner
29
+ def initialize(options = nil)
30
+ options ||= {}
31
+
32
+ @to = options[:to] || NormalizeCountry.to
33
+ @table = lookup_table(options[:from] || NormalizeCountry.formats) # need aliases!!!
34
+ end
35
+
36
+ def convert(text)
37
+ s = Tokenizer.new(text)
38
+
39
+ matches = []
40
+ stack = []
41
+ match_position = @table
42
+
43
+ while !s.end?
44
+ word = s.peek.downcase
45
+ if !match_position[word]
46
+ s.scan
47
+ next
48
+ end
49
+
50
+ stack << s.scan
51
+ alternatives = match_position[stack[-1].downcase]
52
+
53
+ peek = s.peek
54
+ if alternatives && peek && alternatives[peek.downcase]
55
+ match_position = alternatives
56
+ next
57
+ end
58
+
59
+ if match_position[stack[-1].downcase][:match]
60
+ text = stack.join(" ")
61
+ matches << { :matched => text, :converted => NormalizeCountry(text, :to => @to) }
62
+
63
+ end
64
+
65
+ stack.clear
66
+ match_position = @table
67
+ end
68
+
69
+ matches
70
+ end
71
+
72
+ private
73
+
74
+ def lookup_table(from_formats)
75
+ table = {}
76
+
77
+ NormalizeCountry::Countries.values.uniq.each_with_object(table) do |country, o|
78
+ # country.name # eq all
79
+ # no way to get aliases
80
+ from_formats.each do |format|
81
+ name = country[format]
82
+ next unless name
83
+
84
+ head = o
85
+ parts = name.split(/[[:space:]]+/)
86
+ parts.each_with_index do |word, i|
87
+ # options[:case_sensitive_codes] = [x,y] # or true
88
+ # options[:case_sensitive_formats] = [x,y]
89
+ # options[:case_sensitive_formats] = true # alpha2, alpha3, fifa, ioc
90
+ word.downcase! #unless parts.size == 1 && ABBRV_FORMATS.include?(format)
91
+
92
+ # if head[word]
93
+ # if i == parts.size - 1
94
+ # head[word][:match] = true
95
+ # else
96
+ # head = head[word]
97
+ # end
98
+ # else
99
+ # head[word] = {}
100
+ # if i == parts.size - 1
101
+ # head[word][:match] = true
102
+ # else
103
+ # head = head[word]
104
+ # end
105
+ # end
106
+
107
+ head[word] = {} unless head[word]
108
+
109
+ if i == parts.size - 1
110
+ head[word][:match] = true
111
+ else
112
+ head = head[word]
113
+ end
114
+ end
115
+ end
116
+
117
+ o
118
+ end
119
+ end
120
+ end
121
+ end
@@ -4,7 +4,7 @@ require "minitest/autorun"
4
4
  require "normalize_country"
5
5
 
6
6
  describe NormalizeCountry do
7
- COUNTRY_COUNT = 247
7
+ COUNTRY_COUNT = 249
8
8
 
9
9
  it "normalizes to a country's ISO name by default" do
10
10
  NormalizeCountry.convert("USA").must_equal("United States")
@@ -110,15 +110,9 @@ describe NormalizeCountry do
110
110
 
111
111
  describe ".formats" do
112
112
  it "returns a list of supported formats" do
113
- expected = [:alpha2, :alpha3, :fifa, :ioc, :iso_name, :numeric, :official, :short, :simple, :emoji]
113
+ expected = [:alpha2, :alpha3, :fifa, :ioc, :iso_name, :numeric, :official, :short, :shortcode, :simple, :emoji]
114
114
  formats = NormalizeCountry.formats
115
-
116
- # Ugh, support this in 1.8.7 for a least one version
117
- if Symbol < Comparable
118
- formats.sort.must_equal(expected.sort)
119
- else
120
- formats.sort_by { |f| f.to_s }.must_equal(expected.sort_by { |f| f.to_s })
121
- end
115
+ formats.sort.must_equal(expected.sort)
122
116
  end
123
117
  end
124
118
 
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: normalize_country
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Skye Shaw
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-13 00:00:00.000000000 Z
11
+ date: 2021-01-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '0.9'
19
+ version: 12.3.3
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '0.9'
26
+ version: 12.3.3
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: minitest
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -54,6 +54,7 @@ files:
54
54
  - bin/normalize_country
55
55
  - lib/normalize_country.rb
56
56
  - lib/normalize_country/countries/en.yml
57
+ - lib/normalize_country/scanner.rb
57
58
  - spec/normalize_country_spec.rb
58
59
  homepage: http://github.com/sshaw/normalize_country
59
60
  licenses:
@@ -75,7 +76,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
75
76
  version: '0'
76
77
  requirements: []
77
78
  rubyforge_project:
78
- rubygems_version: 2.5.1
79
+ rubygems_version: 2.7.6
79
80
  signing_key:
80
81
  specification_version: 4
81
82
  summary: Convert country names and codes to a standard