pumi 0.19.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,46 @@
1
+ <div id="communes-list">
2
+ <% provinces.each do |province| %>
3
+ <% province_page = URI.parse(province.links[:wikipedia]).path.split("/").last %>
4
+ ==[[<%= province_page %>|<%= province.full_name_en %>]]==
5
+ <div id=province-communes-<%= province.id %>>
6
+ <%= province.name_en %> contains <%= province.communes_summary %>.<ref>{{cite web|url=http://db.ncdd.gov.kh/gazetteer/view/province.castle?pv=<%= province.id %> |title=<%= province.name_en %> |publisher=National Committee for Sub-National Democratic Development }}</ref>
7
+
8
+ <% province.districts.each do |district| %>
9
+ <% if district.links[:wikipedia] %>
10
+ <% district_page = URI.parse(district.links[:wikipedia]).path.split("/").last %>
11
+ ===[[<%= district_page %>|<%= district.full_name_en %>]]===
12
+ <% else %>
13
+ ===<%= district.full_name_en %>===
14
+ <% end %>
15
+
16
+ <div id=district-communes-<%= district.id %>>
17
+ <%= district.name_en %> contains <%= district.communes_summary %>.<ref>{{cite web|url=http://db.ncdd.gov.kh/gazetteer/view/district.castle?ds=<%= district.id %> |title=<%= district.name_en %> |publisher=National Committee for Sub-National Democratic Development }}</ref>
18
+
19
+ {| class="wikitable sortable"
20
+ |-
21
+ ! #
22
+ ! Name
23
+ ! Khmer
24
+ ! Administrative Unit
25
+ ! Geocode
26
+ |-
27
+
28
+ <% district.communes.each_with_index do |commune, index| %>
29
+ | <%= index + 1 %>
30
+ <% if commune.links[:wikipedia] %>
31
+ <% commune_page = URI.parse(commune.links[:wikipedia]).path.split("/").last.gsub("_", " ") %>
32
+ | [[<%= commune_page %>|<%= commune.name_en %>]]
33
+ <% else %>
34
+ | <%= commune.name_en %>
35
+ <% end %>
36
+ | <%= commune.name_km %>
37
+ | <%= "#{commune.administrative_unit.name_en} (#{commune.administrative_unit.name_km} #{commune.administrative_unit.name_latin})" %>
38
+ | <%= commune.id %>
39
+ |-
40
+ <% end %>
41
+ |}
42
+ </div>
43
+ <% end %>
44
+ </div>
45
+ <% end %>
46
+ </div>
@@ -0,0 +1,27 @@
1
+ <div id=province-districts-<%= province.id %>>
2
+ <%= province.name_en %> contains <%= districts_summary %>. <ref>{{cite web|url=http://db.ncdd.gov.kh/gazetteer/view/province.castle?pv=<%= province.id %> |title=<%= province.name_en %> |publisher=National Committee for Sub-National Democratic Development }}</ref>
3
+
4
+ {| class="wikitable sortable"
5
+ |-
6
+ ! #
7
+ ! Name
8
+ ! Khmer
9
+ ! Administrative Unit
10
+ ! Geocode
11
+ |-
12
+
13
+ <% districts.each_with_index do |district, index| %>
14
+ | <%= index + 1 %>
15
+ <% if district.links[:wikipedia] %>
16
+ <% district_page = URI.parse(district.links[:wikipedia]).path.split("/").last.gsub("_", " ") %>
17
+ | [[<%= district_page %>|<%= district.name_en %>]]
18
+ <% else %>
19
+ | <%= district.name_en %>
20
+ <% end %>
21
+ | <%= district.name_km %>
22
+ | <%= "#{district.administrative_unit.name_en} (#{district.administrative_unit.name_km} #{district.administrative_unit.name_latin})" %>
23
+ | <%= district.id %>
24
+ |-
25
+ <% end %>
26
+ |}
27
+ </div>
@@ -0,0 +1,10 @@
1
+ module Pumi
2
+ module Bot
3
+ module Wikipedia
4
+ end
5
+ end
6
+ end
7
+
8
+ require_relative "wikipedia/article"
9
+ require_relative "wikipedia/communes_in_cambodia_article"
10
+ require_relative "wikipedia/districts_in_cambodia_article"
data/lib/pumi/bot.rb ADDED
@@ -0,0 +1,6 @@
1
+ module Pumi
2
+ module Bot
3
+ end
4
+ end
5
+
6
+ require_relative "bot/wikipedia"
@@ -0,0 +1,251 @@
1
+ require "geocoder"
2
+
3
+ module Pumi
4
+ module DataSource
5
+ class Geocoder
6
+ Result = Struct.new(:code, :lat, :long, :bounding_box, keyword_init: true)
7
+
8
+ class AbstractGeocoder
9
+ Result = Struct.new(
10
+ :lat, :long, :bounding_box, :country_code,
11
+ :types, :iso3166_2, :district_name_en,
12
+ :name,
13
+ keyword_init: true
14
+ )
15
+
16
+ Misspelling = Struct.new(:incorrect_text, :correct_text, keyword_init: true)
17
+
18
+ MISSPELLINGS = []
19
+
20
+ class AbstractProvider
21
+ attr_reader :geocoder, :name
22
+
23
+ def initialize(geocoder:, name:)
24
+ @geocoder = geocoder
25
+ @name = name
26
+ end
27
+
28
+ def search(term)
29
+ geocoder.search(term, lookup: name).map do |result|
30
+ build_result(result.data)
31
+ end
32
+ end
33
+ end
34
+
35
+ class Google < AbstractProvider
36
+ private
37
+
38
+ def build_result(data)
39
+ province_name_en = find_address_component(
40
+ data,
41
+ "administrative_area_level_1"
42
+ )&.fetch("long_name")
43
+ province = Pumi::Province.where(full_name_en: province_name_en).first
44
+ Result.new(
45
+ name: data.dig("address_components", 0, "long_name"),
46
+ lat: data.dig("geometry", "location", "lat"),
47
+ long: data.dig("geometry", "location", "lng"),
48
+ bounding_box: [
49
+ data.dig("geometry", "bounds", "northeast", "lat"),
50
+ data.dig("geometry", "bounds", "northeast", "lng"),
51
+ data.dig("geometry", "bounds", "southwest", "lat"),
52
+ data.dig("geometry", "bounds", "southwest", "lng")
53
+ ],
54
+ country_code: find_address_component(data, "country").fetch("short_name").upcase,
55
+ district_name_en: find_address_component(
56
+ data,
57
+ "administrative_area_level_2"
58
+ )&.fetch("long_name"),
59
+ types: data["types"],
60
+ iso3166_2: province&.iso3166_2
61
+ )
62
+ end
63
+
64
+ def find_address_component(data, type)
65
+ data.fetch("address_components").find do |c|
66
+ c.fetch("types").include?(type)
67
+ end
68
+ end
69
+ end
70
+
71
+ class Nominatim < AbstractProvider
72
+ private
73
+
74
+ def build_result(data)
75
+ Result.new(
76
+ name: nil,
77
+ lat: data["lat"],
78
+ long: data["lon"],
79
+ bounding_box: data["boundingbox"],
80
+ types: Array(data["type"]),
81
+ iso3166_2: data.dig("address", "ISO3166-2-lvl4"),
82
+ country_code: data.dig("address", "country_code")&.upcase,
83
+ district_name_en: data.dig("address", "county")
84
+ )
85
+ end
86
+ end
87
+
88
+ PROVIDERS = {
89
+ nominatim: Nominatim,
90
+ google: Google
91
+ }.freeze
92
+
93
+ attr_reader :providers, :options
94
+
95
+ def initialize(geocoder: ::Geocoder, providers: PROVIDERS.keys, **options)
96
+ @options = options
97
+
98
+ geocoder.configure(
99
+ google: {
100
+ api_key: ENV["GOOGLE_API_KEY"]
101
+ }
102
+ )
103
+
104
+ @providers = Array(providers).map do |name|
105
+ PROVIDERS.fetch(name).new(geocoder:, name:)
106
+ end
107
+ end
108
+
109
+ def geocode_all
110
+ locations.each_with_object([]).with_index do |(location, results), _index|
111
+ next if !options[:regeocode] && !location.geodata.nil?
112
+
113
+ geocoder_result = geocode(location)
114
+
115
+ if geocoder_result.nil?
116
+ ungeocoded_locations << location
117
+ next
118
+ end
119
+
120
+ results << build_result(code: location.id, geocoder_result:)
121
+ end
122
+ end
123
+
124
+ private
125
+
126
+ def geocode(location)
127
+ providers.each do |provider|
128
+ Array(build_search_term(location)).each do |search_term|
129
+ all_results = provider.search(search_term)
130
+ geocoder_result = filter(location, all_results)
131
+
132
+ return geocoder_result unless geocoder_result.nil?
133
+ end
134
+ end
135
+
136
+ nil
137
+ end
138
+
139
+ def build_result(code:, geocoder_result:)
140
+ Geocoder::Result.new(
141
+ code:,
142
+ lat: geocoder_result.lat,
143
+ long: geocoder_result.long,
144
+ bounding_box: geocoder_result.bounding_box
145
+ )
146
+ end
147
+
148
+ def build_search_term(location)
149
+ [location.full_name_km, location.name_km].map do |term|
150
+ MISSPELLINGS.find { |m| m.correct_text == term }&.incorrect_text || term
151
+ end
152
+ end
153
+
154
+ def ungeocoded_locations
155
+ @ungeocoded_locations ||= []
156
+ end
157
+ end
158
+
159
+ class CambodianProvinces < AbstractGeocoder
160
+ private
161
+
162
+ def locations
163
+ @locations ||= Pumi::Province.all
164
+ end
165
+
166
+ def build_search_term(province)
167
+ province.iso3166_2
168
+ end
169
+
170
+ def filter(province, geocoder_results)
171
+ geocoder_results.find do |r|
172
+ r.iso3166_2 == province.iso3166_2 && r.types.include?("administrative")
173
+ end
174
+ end
175
+ end
176
+
177
+ class CambodianDistricts < AbstractGeocoder
178
+ private
179
+
180
+ def locations
181
+ @locations ||= Pumi::District.all
182
+ end
183
+
184
+ def filter(district, geocoder_results)
185
+ geocoder_results.find do |r|
186
+ r.country_code == "KH" &&
187
+ r.iso3166_2 == district.province.iso3166_2 && (
188
+ %w[administrative_area_level_2 town city administrative].any? do |type|
189
+ r.types.include?(type)
190
+ end || (%w[locality political].sort == r.types.sort)
191
+ )
192
+ end
193
+ end
194
+ end
195
+
196
+ class CambodianCommunes < AbstractGeocoder
197
+ private
198
+
199
+ def locations
200
+ @locations ||= Pumi::Commune.all
201
+ end
202
+
203
+ def filter(commune, geocoder_results)
204
+ geocoder_results.find do |r|
205
+ r.country_code == "KH" &&
206
+ (r.iso3166_2 == commune.province.iso3166_2 || r.district_name_en.to_s.downcase.include?(commune.district.name_en.downcase)) &&
207
+ %w[administrative_area_level_3 village suburb neighbourhood].any? do |type|
208
+ r.types.include?(type)
209
+ end
210
+ end
211
+ end
212
+ end
213
+
214
+ attr_reader :data_file, :geocoder
215
+
216
+ def initialize(data_file:, geocoder:)
217
+ @data_file = data_file
218
+ @geocoder = geocoder
219
+ end
220
+
221
+ def load_data!(output_dir: "data")
222
+ data.each do |code, attributes|
223
+ geocoded_result = geocoded_results.find { |r| r.code == code }
224
+
225
+ next if geocoded_result.nil?
226
+
227
+ attributes["geodata"] ||= {}
228
+ attributes["geodata"]["lat"] = geocoded_result.lat
229
+ attributes["geodata"]["long"] = geocoded_result.long
230
+ attributes["geodata"]["bounding_box"] = geocoded_result.bounding_box
231
+ end
232
+
233
+ write_data!(output_dir)
234
+ end
235
+
236
+ private
237
+
238
+ def data
239
+ @data ||= data_file.read
240
+ end
241
+
242
+ def write_data!(data_directory)
243
+ data_file.write(data, data_directory:)
244
+ end
245
+
246
+ def geocoded_results
247
+ @geocoded_results ||= geocoder.geocode_all
248
+ end
249
+ end
250
+ end
251
+ end
@@ -0,0 +1,29 @@
1
+ module Pumi
2
+ module DataSource
3
+ class ISO31662
4
+ attr_reader :data_file
5
+
6
+ def initialize(data_file: Pumi::DataFile.new(:provinces))
7
+ @data_file = data_file
8
+ end
9
+
10
+ def load_data!(output_dir: "data")
11
+ data.each do |code, attributes|
12
+ attributes["iso3166_2"] = "KH-#{code.to_i}"
13
+ end
14
+
15
+ write_data!(output_dir)
16
+ end
17
+
18
+ private
19
+
20
+ def data
21
+ @data ||= data_file.read
22
+ end
23
+
24
+ def write_data!(data_directory)
25
+ data_file.write(data, data_directory:)
26
+ end
27
+ end
28
+ end
29
+ end