pumi 0.16.0 → 0.18.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +10 -0
- data/.github/workflows/dependabot-auto-merge.yml +17 -0
- data/.github/workflows/update_data.yml +2 -2
- data/.tool-versions +1 -1
- data/CHANGELOG.md +14 -0
- data/README.md +5 -0
- data/bin/parse_data +16 -3
- data/data/communes.yml +562 -0
- data/data/districts.yml +402 -0
- data/data/provinces.yml +52 -2
- data/data/villages.yml +16 -0
- data/lib/pumi/data_file.rb +32 -0
- data/lib/pumi/data_source/ncdd.rb +100 -0
- data/lib/pumi/data_source/wikipedia.rb +665 -0
- data/lib/pumi/data_source.rb +7 -0
- data/lib/pumi/location.rb +3 -1
- data/lib/pumi/parser.rb +20 -13
- data/lib/pumi/scraper/result.rb +5 -0
- data/lib/pumi/version.rb +1 -1
- data/lib/pumi.rb +2 -1
- data/pumi.gemspec +1 -0
- metadata +24 -4
- data/lib/pumi/data_parser.rb +0 -75
data/data/provinces.yml
CHANGED
@@ -8,6 +8,8 @@ provinces:
|
|
8
8
|
km: ខេត្ត
|
9
9
|
latin: Khaet
|
10
10
|
en: Province
|
11
|
+
links:
|
12
|
+
wikipedia: https://en.wikipedia.org/wiki/Banteay_Meanchey_province
|
11
13
|
'02':
|
12
14
|
name:
|
13
15
|
km: បាត់ដំបង
|
@@ -16,6 +18,8 @@ provinces:
|
|
16
18
|
km: ខេត្ត
|
17
19
|
latin: Khaet
|
18
20
|
en: Province
|
21
|
+
links:
|
22
|
+
wikipedia: https://en.wikipedia.org/wiki/Battambang_province
|
19
23
|
'03':
|
20
24
|
name:
|
21
25
|
km: កំពង់ចាម
|
@@ -24,6 +28,8 @@ provinces:
|
|
24
28
|
km: ខេត្ត
|
25
29
|
latin: Khaet
|
26
30
|
en: Province
|
31
|
+
links:
|
32
|
+
wikipedia: https://en.wikipedia.org/wiki/Kampong_Cham_province
|
27
33
|
'04':
|
28
34
|
name:
|
29
35
|
km: កំពង់ឆ្នាំង
|
@@ -32,6 +38,8 @@ provinces:
|
|
32
38
|
km: ខេត្ត
|
33
39
|
latin: Khaet
|
34
40
|
en: Province
|
41
|
+
links:
|
42
|
+
wikipedia: https://en.wikipedia.org/wiki/Kampong_Chhnang_province
|
35
43
|
'05':
|
36
44
|
name:
|
37
45
|
km: កំពង់ស្ពឺ
|
@@ -40,6 +48,8 @@ provinces:
|
|
40
48
|
km: ខេត្ត
|
41
49
|
latin: Khaet
|
42
50
|
en: Province
|
51
|
+
links:
|
52
|
+
wikipedia: https://en.wikipedia.org/wiki/Kampong_Speu_province
|
43
53
|
'06':
|
44
54
|
name:
|
45
55
|
km: កំពង់ធំ
|
@@ -48,6 +58,8 @@ provinces:
|
|
48
58
|
km: ខេត្ត
|
49
59
|
latin: Khaet
|
50
60
|
en: Province
|
61
|
+
links:
|
62
|
+
wikipedia: https://en.wikipedia.org/wiki/Kampong_Thom_province
|
51
63
|
'07':
|
52
64
|
name:
|
53
65
|
km: កំពត
|
@@ -56,14 +68,18 @@ provinces:
|
|
56
68
|
km: ខេត្ត
|
57
69
|
latin: Khaet
|
58
70
|
en: Province
|
71
|
+
links:
|
72
|
+
wikipedia: https://en.wikipedia.org/wiki/Kampot_province
|
59
73
|
'08':
|
60
74
|
name:
|
61
|
-
km:
|
75
|
+
km: កណ្តាល
|
62
76
|
latin: Kandal
|
63
77
|
administrative_unit:
|
64
78
|
km: ខេត្ត
|
65
79
|
latin: Khaet
|
66
80
|
en: Province
|
81
|
+
links:
|
82
|
+
wikipedia: https://en.wikipedia.org/wiki/Kandal_province
|
67
83
|
'09':
|
68
84
|
name:
|
69
85
|
km: កោះកុង
|
@@ -72,6 +88,8 @@ provinces:
|
|
72
88
|
km: ខេត្ត
|
73
89
|
latin: Khaet
|
74
90
|
en: Province
|
91
|
+
links:
|
92
|
+
wikipedia: https://en.wikipedia.org/wiki/Koh_Kong_province
|
75
93
|
'10':
|
76
94
|
name:
|
77
95
|
km: ក្រចេះ
|
@@ -80,6 +98,8 @@ provinces:
|
|
80
98
|
km: ខេត្ត
|
81
99
|
latin: Khaet
|
82
100
|
en: Province
|
101
|
+
links:
|
102
|
+
wikipedia: https://en.wikipedia.org/wiki/Krati%C3%A9_province
|
83
103
|
'11':
|
84
104
|
name:
|
85
105
|
km: មណ្ឌលគិរី
|
@@ -88,6 +108,8 @@ provinces:
|
|
88
108
|
km: ខេត្ត
|
89
109
|
latin: Khaet
|
90
110
|
en: Province
|
111
|
+
links:
|
112
|
+
wikipedia: https://en.wikipedia.org/wiki/Mondulkiri_province
|
91
113
|
'12':
|
92
114
|
name:
|
93
115
|
km: ភ្នំពេញ
|
@@ -96,6 +118,8 @@ provinces:
|
|
96
118
|
km: រាជធានី
|
97
119
|
latin: Reach Theani
|
98
120
|
en: Capital
|
121
|
+
links:
|
122
|
+
wikipedia: https://en.wikipedia.org/wiki/Phnom_Penh
|
99
123
|
'13':
|
100
124
|
name:
|
101
125
|
km: ព្រះវិហារ
|
@@ -104,6 +128,8 @@ provinces:
|
|
104
128
|
km: ខេត្ត
|
105
129
|
latin: Khaet
|
106
130
|
en: Province
|
131
|
+
links:
|
132
|
+
wikipedia: https://en.wikipedia.org/wiki/Preah_Vihear_province
|
107
133
|
'14':
|
108
134
|
name:
|
109
135
|
km: ព្រៃវែង
|
@@ -112,6 +138,8 @@ provinces:
|
|
112
138
|
km: ខេត្ត
|
113
139
|
latin: Khaet
|
114
140
|
en: Province
|
141
|
+
links:
|
142
|
+
wikipedia: https://en.wikipedia.org/wiki/Prey_Veng_province
|
115
143
|
'15':
|
116
144
|
name:
|
117
145
|
km: ពោធិ៍សាត់
|
@@ -120,6 +148,8 @@ provinces:
|
|
120
148
|
km: ខេត្ត
|
121
149
|
latin: Khaet
|
122
150
|
en: Province
|
151
|
+
links:
|
152
|
+
wikipedia: https://en.wikipedia.org/wiki/Pursat_province
|
123
153
|
'16':
|
124
154
|
name:
|
125
155
|
km: រតនគិរី
|
@@ -128,6 +158,8 @@ provinces:
|
|
128
158
|
km: ខេត្ត
|
129
159
|
latin: Khaet
|
130
160
|
en: Province
|
161
|
+
links:
|
162
|
+
wikipedia: https://en.wikipedia.org/wiki/Ratanakiri_province
|
131
163
|
'17':
|
132
164
|
name:
|
133
165
|
km: សៀមរាប
|
@@ -136,6 +168,8 @@ provinces:
|
|
136
168
|
km: ខេត្ត
|
137
169
|
latin: Khaet
|
138
170
|
en: Province
|
171
|
+
links:
|
172
|
+
wikipedia: https://en.wikipedia.org/wiki/Siem_Reap_province
|
139
173
|
'18':
|
140
174
|
name:
|
141
175
|
km: ព្រះសីហនុ
|
@@ -144,6 +178,8 @@ provinces:
|
|
144
178
|
km: ខេត្ត
|
145
179
|
latin: Khaet
|
146
180
|
en: Province
|
181
|
+
links:
|
182
|
+
wikipedia: https://en.wikipedia.org/wiki/Sihanoukville_province
|
147
183
|
'19':
|
148
184
|
name:
|
149
185
|
km: ស្ទឹងត្រែង
|
@@ -152,6 +188,8 @@ provinces:
|
|
152
188
|
km: ខេត្ត
|
153
189
|
latin: Khaet
|
154
190
|
en: Province
|
191
|
+
links:
|
192
|
+
wikipedia: https://en.wikipedia.org/wiki/Stung_Treng_province
|
155
193
|
'20':
|
156
194
|
name:
|
157
195
|
km: ស្វាយរៀង
|
@@ -160,6 +198,8 @@ provinces:
|
|
160
198
|
km: ខេត្ត
|
161
199
|
latin: Khaet
|
162
200
|
en: Province
|
201
|
+
links:
|
202
|
+
wikipedia: https://en.wikipedia.org/wiki/Svay_Rieng_province
|
163
203
|
'21':
|
164
204
|
name:
|
165
205
|
km: តាកែវ
|
@@ -168,14 +208,18 @@ provinces:
|
|
168
208
|
km: ខេត្ត
|
169
209
|
latin: Khaet
|
170
210
|
en: Province
|
211
|
+
links:
|
212
|
+
wikipedia: https://en.wikipedia.org/wiki/Tak%C3%A9o_province
|
171
213
|
'22':
|
172
214
|
name:
|
173
|
-
km:
|
215
|
+
km: ឧត្តរមានជ័យ
|
174
216
|
latin: Oddar Meanchey
|
175
217
|
administrative_unit:
|
176
218
|
km: ខេត្ត
|
177
219
|
latin: Khaet
|
178
220
|
en: Province
|
221
|
+
links:
|
222
|
+
wikipedia: https://en.wikipedia.org/wiki/Oddar_Meanchey_province
|
179
223
|
'23':
|
180
224
|
name:
|
181
225
|
km: កែប
|
@@ -184,6 +228,8 @@ provinces:
|
|
184
228
|
km: ខេត្ត
|
185
229
|
latin: Khaet
|
186
230
|
en: Province
|
231
|
+
links:
|
232
|
+
wikipedia: https://en.wikipedia.org/wiki/Kep_province
|
187
233
|
'24':
|
188
234
|
name:
|
189
235
|
km: ប៉ៃលិន
|
@@ -192,6 +238,8 @@ provinces:
|
|
192
238
|
km: ខេត្ត
|
193
239
|
latin: Khaet
|
194
240
|
en: Province
|
241
|
+
links:
|
242
|
+
wikipedia: https://en.wikipedia.org/wiki/Pailin_province
|
195
243
|
'25':
|
196
244
|
name:
|
197
245
|
km: ត្បូងឃ្មុំ
|
@@ -200,3 +248,5 @@ provinces:
|
|
200
248
|
km: ខេត្ត
|
201
249
|
latin: Khaet
|
202
250
|
en: Province
|
251
|
+
links:
|
252
|
+
wikipedia: https://en.wikipedia.org/wiki/Tboung_Khmum_province
|
data/data/villages.yml
CHANGED
@@ -53016,6 +53016,22 @@ villages:
|
|
53016
53016
|
km: ភូមិ
|
53017
53017
|
latin: Phum
|
53018
53018
|
en: Village
|
53019
|
+
'08130602':
|
53020
|
+
name:
|
53021
|
+
km: ស្វាយជ្រុំ
|
53022
|
+
latin: Svay Chrum
|
53023
|
+
administrative_unit:
|
53024
|
+
km: ភូមិ
|
53025
|
+
latin: Phum
|
53026
|
+
en: Village
|
53027
|
+
'08130603':
|
53028
|
+
name:
|
53029
|
+
km: បារាជ
|
53030
|
+
latin: Ba Reach
|
53031
|
+
administrative_unit:
|
53032
|
+
km: ភូមិ
|
53033
|
+
latin: Phum
|
53034
|
+
en: Village
|
53019
53035
|
'08130701':
|
53020
53036
|
name:
|
53021
53037
|
km: អរិយក្សត្រ
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require "yaml"
|
2
|
+
require "pathname"
|
3
|
+
|
4
|
+
module Pumi
|
5
|
+
class DataFile
|
6
|
+
DEFAULT_DATA_DIRECTORY = File.join(File.expand_path("..", File.dirname(__dir__)), "data")
|
7
|
+
TYPES = %w[provinces districts communes villages].freeze
|
8
|
+
|
9
|
+
attr_reader :type
|
10
|
+
|
11
|
+
def initialize(type)
|
12
|
+
@type = type.to_s
|
13
|
+
raise ArgumentError, "#{type} is not included in #{TYPES}" unless TYPES.include?(@type)
|
14
|
+
end
|
15
|
+
|
16
|
+
def read(data_directory: DEFAULT_DATA_DIRECTORY)
|
17
|
+
YAML.load_file(data_file(data_directory)).fetch(type)
|
18
|
+
end
|
19
|
+
|
20
|
+
def write(data, data_directory: DEFAULT_DATA_DIRECTORY)
|
21
|
+
return if data.empty?
|
22
|
+
|
23
|
+
File.write(data_file(data_directory), { type => data.sort.to_h }.to_yaml)
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def data_file(data_directory)
|
29
|
+
Pathname(data_directory).join("#{type}.yml")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require "pathname"
|
2
|
+
require "csv"
|
3
|
+
require "yaml"
|
4
|
+
|
5
|
+
# https://en.wikipedia.org/wiki/Administrative_divisions_of_Cambodia
|
6
|
+
# https://en.wikipedia.org/wiki/Romanization_of_Khmer
|
7
|
+
# https://en.wikipedia.org/wiki/United_Nations_Group_of_Experts_on_Geographical_Names
|
8
|
+
|
9
|
+
module Pumi
|
10
|
+
module DataSource
|
11
|
+
class NCDD
|
12
|
+
CSV_HEADERS = %w[type code name_km name_latin reference note1 note2].freeze
|
13
|
+
|
14
|
+
AdministrativeUnit = Struct.new(:en, :km, :latin, :code_length, :group, :type, keyword_init: true)
|
15
|
+
Row = Struct.new(:code, :name_km, :name_latin, :type, keyword_init: true) do
|
16
|
+
def administrative_unit
|
17
|
+
ADMINISTRATIVE_UNITS.fetch(type)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
ADMINISTRATIVE_UNITS = {
|
22
|
+
"ស្រុក" => AdministrativeUnit.new(en: "District", km: "ស្រុក", latin: "Srok", code_length: 4, group: "districts"),
|
23
|
+
"ខណ្ឌ" => AdministrativeUnit.new(en: "Section", km: "ខណ្ឌ", latin: "Khan", code_length: 4, group: "districts"),
|
24
|
+
"ក្រុង" => AdministrativeUnit.new(en: "Municipality", km: "ក្រុង", latin: "Krong", code_length: 4, group: "districts"),
|
25
|
+
"ឃុំ" => AdministrativeUnit.new(en: "Commune", km: "ឃុំ", latin: "Khum", code_length: 6, group: "communes"),
|
26
|
+
"សង្កាត់" => AdministrativeUnit.new(en: "Quarter", km: "សង្កាត់", latin: "Sangkat", code_length: 6, group: "communes"),
|
27
|
+
"ភូមិ" => AdministrativeUnit.new(en: "Village", km: "ភូមិ", latin: "Phum", code_length: 8, group: "villages")
|
28
|
+
}.freeze
|
29
|
+
|
30
|
+
def load_data!(source_dir: "tmp", output_dir: "data")
|
31
|
+
source_files(source_dir).each do |file|
|
32
|
+
parse_source_file(file)
|
33
|
+
end
|
34
|
+
|
35
|
+
write_data!(output_dir)
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def parse_source_file(file)
|
41
|
+
CSV.read(file, headers: CSV_HEADERS).each do |csv_row|
|
42
|
+
row = build_row(csv_row)
|
43
|
+
|
44
|
+
next unless row.code
|
45
|
+
next if row.administrative_unit.code_length != row.code.length
|
46
|
+
|
47
|
+
write_location(row)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def data
|
52
|
+
@data ||= {}
|
53
|
+
end
|
54
|
+
|
55
|
+
def build_row(row)
|
56
|
+
Row.new(
|
57
|
+
code: parse_location_code(row),
|
58
|
+
name_km: row.fetch("name_km"),
|
59
|
+
name_latin: row.fetch("name_latin"),
|
60
|
+
type: row.fetch("type")
|
61
|
+
)
|
62
|
+
end
|
63
|
+
|
64
|
+
def parse_location_code(row)
|
65
|
+
code = row.fetch("code")
|
66
|
+
return if code.to_s.gsub(/\D/, "").empty?
|
67
|
+
|
68
|
+
code = code.rjust(code.length + 1, "0") if code.length.odd?
|
69
|
+
code
|
70
|
+
end
|
71
|
+
|
72
|
+
def write_location(row)
|
73
|
+
data[row.administrative_unit.group] ||= {}
|
74
|
+
data[row.administrative_unit.group][row.code] = {
|
75
|
+
"name" => {
|
76
|
+
"km" => row.name_km,
|
77
|
+
"latin" => row.name_latin
|
78
|
+
},
|
79
|
+
"administrative_unit" => {
|
80
|
+
"km" => row.administrative_unit.km,
|
81
|
+
"latin" => row.administrative_unit.latin,
|
82
|
+
"en" => row.administrative_unit.en
|
83
|
+
}
|
84
|
+
}
|
85
|
+
end
|
86
|
+
|
87
|
+
def source_files(source_dir)
|
88
|
+
Pathname.glob("#{source_dir}/*.csv").select(&:file?)
|
89
|
+
end
|
90
|
+
|
91
|
+
def write_data!(output_dir)
|
92
|
+
return if data.empty?
|
93
|
+
|
94
|
+
ADMINISTRATIVE_UNITS.values.map(&:group).uniq do |data_group|
|
95
|
+
DataFile.new(data_group).write(data.fetch(data_group), data_directory: output_dir)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|