pumi 0.19.0 → 0.20.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,46 @@
1
+ <div id="communes-list">
2
+ <% provinces.each do |province| %>
3
+ <% province_page = URI.parse(province.links[:wikipedia]).path.split("/").last %>
4
+ ==[[<%= province_page %>|<%= province.full_name_en %>]]==
5
+ <div id=province-communes-<%= province.id %>>
6
+ <%= province.name_en %> contains <%= province.communes_summary %>.<ref>{{cite web|url=http://db.ncdd.gov.kh/gazetteer/view/province.castle?pv=<%= province.id %> |title=<%= province.name_en %> |publisher=National Committee for Sub-National Democratic Development }}</ref>
7
+
8
+ <% province.districts.each do |district| %>
9
+ <% if district.links[:wikipedia] %>
10
+ <% district_page = URI.parse(district.links[:wikipedia]).path.split("/").last %>
11
+ ===[[<%= district_page %>|<%= district.full_name_en %>]]===
12
+ <% else %>
13
+ ===<%= district.full_name_en %>===
14
+ <% end %>
15
+
16
+ <div id=district-communes-<%= district.id %>>
17
+ <%= district.name_en %> contains <%= district.communes_summary %>.<ref>{{cite web|url=http://db.ncdd.gov.kh/gazetteer/view/district.castle?ds=<%= district.id %> |title=<%= district.name_en %> |publisher=National Committee for Sub-National Democratic Development }}</ref>
18
+
19
+ {| class="wikitable sortable"
20
+ |-
21
+ ! #
22
+ ! Name
23
+ ! Khmer
24
+ ! Administrative Unit
25
+ ! Geocode
26
+ |-
27
+
28
+ <% district.communes.each_with_index do |commune, index| %>
29
+ | <%= index + 1 %>
30
+ <% if commune.links[:wikipedia] %>
31
+ <% commune_page = URI.parse(commune.links[:wikipedia]).path.split("/").last.gsub("_", " ") %>
32
+ | [[<%= commune_page %>|<%= commune.name_en %>]]
33
+ <% else %>
34
+ | <%= commune.name_en %>
35
+ <% end %>
36
+ | <%= commune.name_km %>
37
+ | <%= "#{commune.administrative_unit.name_en} (#{commune.administrative_unit.name_km} #{commune.administrative_unit.name_latin})" %>
38
+ | <%= commune.id %>
39
+ |-
40
+ <% end %>
41
+ |}
42
+ </div>
43
+ <% end %>
44
+ </div>
45
+ <% end %>
46
+ </div>
@@ -0,0 +1,27 @@
1
+ <div id=province-districts-<%= province.id %>>
2
+ <%= province.name_en %> contains <%= districts_summary %>. <ref>{{cite web|url=http://db.ncdd.gov.kh/gazetteer/view/province.castle?pv=<%= province.id %> |title=<%= province.name_en %> |publisher=National Committee for Sub-National Democratic Development }}</ref>
3
+
4
+ {| class="wikitable sortable"
5
+ |-
6
+ ! #
7
+ ! Name
8
+ ! Khmer
9
+ ! Administrative Unit
10
+ ! Geocode
11
+ |-
12
+
13
+ <% districts.each_with_index do |district, index| %>
14
+ | <%= index + 1 %>
15
+ <% if district.links[:wikipedia] %>
16
+ <% district_page = URI.parse(district.links[:wikipedia]).path.split("/").last.gsub("_", " ") %>
17
+ | [[<%= district_page %>|<%= district.name_en %>]]
18
+ <% else %>
19
+ | <%= district.name_en %>
20
+ <% end %>
21
+ | <%= district.name_km %>
22
+ | <%= "#{district.administrative_unit.name_en} (#{district.administrative_unit.name_km} #{district.administrative_unit.name_latin})" %>
23
+ | <%= district.id %>
24
+ |-
25
+ <% end %>
26
+ |}
27
+ </div>
@@ -0,0 +1,10 @@
1
+ module Pumi
2
+ module Bot
3
+ module Wikipedia
4
+ end
5
+ end
6
+ end
7
+
8
+ require_relative "wikipedia/article"
9
+ require_relative "wikipedia/communes_in_cambodia_article"
10
+ require_relative "wikipedia/districts_in_cambodia_article"
data/lib/pumi/bot.rb ADDED
@@ -0,0 +1,6 @@
1
+ module Pumi
2
+ module Bot
3
+ end
4
+ end
5
+
6
+ require_relative "bot/wikipedia"
@@ -0,0 +1,251 @@
1
+ require "geocoder"
2
+
3
+ module Pumi
4
+ module DataSource
5
+ class Geocoder
6
+ Result = Struct.new(:code, :lat, :long, :bounding_box, keyword_init: true)
7
+
8
+ class AbstractGeocoder
9
+ Result = Struct.new(
10
+ :lat, :long, :bounding_box, :country_code,
11
+ :types, :iso3166_2, :district_name_en,
12
+ :name,
13
+ keyword_init: true
14
+ )
15
+
16
+ Misspelling = Struct.new(:incorrect_text, :correct_text, keyword_init: true)
17
+
18
+ MISSPELLINGS = []
19
+
20
+ class AbstractProvider
21
+ attr_reader :geocoder, :name
22
+
23
+ def initialize(geocoder:, name:)
24
+ @geocoder = geocoder
25
+ @name = name
26
+ end
27
+
28
+ def search(term)
29
+ geocoder.search(term, lookup: name).map do |result|
30
+ build_result(result.data)
31
+ end
32
+ end
33
+ end
34
+
35
+ class Google < AbstractProvider
36
+ private
37
+
38
+ def build_result(data)
39
+ province_name_en = find_address_component(
40
+ data,
41
+ "administrative_area_level_1"
42
+ )&.fetch("long_name")
43
+ province = Pumi::Province.where(full_name_en: province_name_en).first
44
+ Result.new(
45
+ name: data.dig("address_components", 0, "long_name"),
46
+ lat: data.dig("geometry", "location", "lat"),
47
+ long: data.dig("geometry", "location", "lng"),
48
+ bounding_box: [
49
+ data.dig("geometry", "bounds", "northeast", "lat"),
50
+ data.dig("geometry", "bounds", "northeast", "lng"),
51
+ data.dig("geometry", "bounds", "southwest", "lat"),
52
+ data.dig("geometry", "bounds", "southwest", "lng")
53
+ ],
54
+ country_code: find_address_component(data, "country").fetch("short_name").upcase,
55
+ district_name_en: find_address_component(
56
+ data,
57
+ "administrative_area_level_2"
58
+ )&.fetch("long_name"),
59
+ types: data["types"],
60
+ iso3166_2: province&.iso3166_2
61
+ )
62
+ end
63
+
64
+ def find_address_component(data, type)
65
+ data.fetch("address_components").find do |c|
66
+ c.fetch("types").include?(type)
67
+ end
68
+ end
69
+ end
70
+
71
+ class Nominatim < AbstractProvider
72
+ private
73
+
74
+ def build_result(data)
75
+ Result.new(
76
+ name: nil,
77
+ lat: data["lat"],
78
+ long: data["lon"],
79
+ bounding_box: data["boundingbox"],
80
+ types: Array(data["type"]),
81
+ iso3166_2: data.dig("address", "ISO3166-2-lvl4"),
82
+ country_code: data.dig("address", "country_code")&.upcase,
83
+ district_name_en: data.dig("address", "county")
84
+ )
85
+ end
86
+ end
87
+
88
+ PROVIDERS = {
89
+ nominatim: Nominatim,
90
+ google: Google
91
+ }.freeze
92
+
93
+ attr_reader :providers, :options
94
+
95
+ def initialize(geocoder: ::Geocoder, providers: PROVIDERS.keys, **options)
96
+ @options = options
97
+
98
+ geocoder.configure(
99
+ google: {
100
+ api_key: ENV["GOOGLE_API_KEY"]
101
+ }
102
+ )
103
+
104
+ @providers = Array(providers).map do |name|
105
+ PROVIDERS.fetch(name).new(geocoder:, name:)
106
+ end
107
+ end
108
+
109
+ def geocode_all
110
+ locations.each_with_object([]).with_index do |(location, results), _index|
111
+ next if !options[:regeocode] && !location.geodata.nil?
112
+
113
+ geocoder_result = geocode(location)
114
+
115
+ if geocoder_result.nil?
116
+ ungeocoded_locations << location
117
+ next
118
+ end
119
+
120
+ results << build_result(code: location.id, geocoder_result:)
121
+ end
122
+ end
123
+
124
+ private
125
+
126
+ def geocode(location)
127
+ providers.each do |provider|
128
+ Array(build_search_term(location)).each do |search_term|
129
+ all_results = provider.search(search_term)
130
+ geocoder_result = filter(location, all_results)
131
+
132
+ return geocoder_result unless geocoder_result.nil?
133
+ end
134
+ end
135
+
136
+ nil
137
+ end
138
+
139
+ def build_result(code:, geocoder_result:)
140
+ Geocoder::Result.new(
141
+ code:,
142
+ lat: geocoder_result.lat,
143
+ long: geocoder_result.long,
144
+ bounding_box: geocoder_result.bounding_box
145
+ )
146
+ end
147
+
148
+ def build_search_term(location)
149
+ [location.full_name_km, location.name_km].map do |term|
150
+ MISSPELLINGS.find { |m| m.correct_text == term }&.incorrect_text || term
151
+ end
152
+ end
153
+
154
+ def ungeocoded_locations
155
+ @ungeocoded_locations ||= []
156
+ end
157
+ end
158
+
159
+ class CambodianProvinces < AbstractGeocoder
160
+ private
161
+
162
+ def locations
163
+ @locations ||= Pumi::Province.all
164
+ end
165
+
166
+ def build_search_term(province)
167
+ province.iso3166_2
168
+ end
169
+
170
+ def filter(province, geocoder_results)
171
+ geocoder_results.find do |r|
172
+ r.iso3166_2 == province.iso3166_2 && r.types.include?("administrative")
173
+ end
174
+ end
175
+ end
176
+
177
+ class CambodianDistricts < AbstractGeocoder
178
+ private
179
+
180
+ def locations
181
+ @locations ||= Pumi::District.all
182
+ end
183
+
184
+ def filter(district, geocoder_results)
185
+ geocoder_results.find do |r|
186
+ r.country_code == "KH" &&
187
+ r.iso3166_2 == district.province.iso3166_2 && (
188
+ %w[administrative_area_level_2 town city administrative].any? do |type|
189
+ r.types.include?(type)
190
+ end || (%w[locality political].sort == r.types.sort)
191
+ )
192
+ end
193
+ end
194
+ end
195
+
196
+ class CambodianCommunes < AbstractGeocoder
197
+ private
198
+
199
+ def locations
200
+ @locations ||= Pumi::Commune.all
201
+ end
202
+
203
+ def filter(commune, geocoder_results)
204
+ geocoder_results.find do |r|
205
+ r.country_code == "KH" &&
206
+ (r.iso3166_2 == commune.province.iso3166_2 || r.district_name_en.to_s.downcase.include?(commune.district.name_en.downcase)) &&
207
+ %w[administrative_area_level_3 village suburb neighbourhood].any? do |type|
208
+ r.types.include?(type)
209
+ end
210
+ end
211
+ end
212
+ end
213
+
214
+ attr_reader :data_file, :geocoder
215
+
216
+ def initialize(data_file:, geocoder:)
217
+ @data_file = data_file
218
+ @geocoder = geocoder
219
+ end
220
+
221
+ def load_data!(output_dir: "data")
222
+ data.each do |code, attributes|
223
+ geocoded_result = geocoded_results.find { |r| r.code == code }
224
+
225
+ next if geocoded_result.nil?
226
+
227
+ attributes["geodata"] ||= {}
228
+ attributes["geodata"]["lat"] = geocoded_result.lat
229
+ attributes["geodata"]["long"] = geocoded_result.long
230
+ attributes["geodata"]["bounding_box"] = geocoded_result.bounding_box
231
+ end
232
+
233
+ write_data!(output_dir)
234
+ end
235
+
236
+ private
237
+
238
+ def data
239
+ @data ||= data_file.read
240
+ end
241
+
242
+ def write_data!(data_directory)
243
+ data_file.write(data, data_directory:)
244
+ end
245
+
246
+ def geocoded_results
247
+ @geocoded_results ||= geocoder.geocode_all
248
+ end
249
+ end
250
+ end
251
+ end
@@ -0,0 +1,29 @@
1
+ module Pumi
2
+ module DataSource
3
+ class ISO31662
4
+ attr_reader :data_file
5
+
6
+ def initialize(data_file: Pumi::DataFile.new(:provinces))
7
+ @data_file = data_file
8
+ end
9
+
10
+ def load_data!(output_dir: "data")
11
+ data.each do |code, attributes|
12
+ attributes["iso3166_2"] = "KH-#{code.to_i}"
13
+ end
14
+
15
+ write_data!(output_dir)
16
+ end
17
+
18
+ private
19
+
20
+ def data
21
+ @data ||= data_file.read
22
+ end
23
+
24
+ def write_data!(data_directory)
25
+ data_file.write(data, data_directory:)
26
+ end
27
+ end
28
+ end
29
+ end