pumi 0.17.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +5 -0
- data/bin/parse_data +16 -3
- data/data/communes.yml +562 -0
- data/data/districts.yml +402 -0
- data/data/provinces.yml +52 -2
- data/lib/pumi/data_file.rb +32 -0
- data/lib/pumi/data_source/ncdd.rb +100 -0
- data/lib/pumi/data_source/wikipedia.rb +665 -0
- data/lib/pumi/data_source.rb +7 -0
- data/lib/pumi/location.rb +3 -1
- data/lib/pumi/parser.rb +20 -13
- data/lib/pumi/scraper/result.rb +5 -0
- data/lib/pumi/version.rb +1 -1
- data/lib/pumi.rb +2 -1
- data/pumi.gemspec +1 -0
- metadata +21 -3
- data/lib/pumi/data_parser.rb +0 -75
data/data/provinces.yml
CHANGED
@@ -8,6 +8,8 @@ provinces:
|
|
8
8
|
km: ខេត្ត
|
9
9
|
latin: Khaet
|
10
10
|
en: Province
|
11
|
+
links:
|
12
|
+
wikipedia: https://en.wikipedia.org/wiki/Banteay_Meanchey_province
|
11
13
|
'02':
|
12
14
|
name:
|
13
15
|
km: បាត់ដំបង
|
@@ -16,6 +18,8 @@ provinces:
|
|
16
18
|
km: ខេត្ត
|
17
19
|
latin: Khaet
|
18
20
|
en: Province
|
21
|
+
links:
|
22
|
+
wikipedia: https://en.wikipedia.org/wiki/Battambang_province
|
19
23
|
'03':
|
20
24
|
name:
|
21
25
|
km: កំពង់ចាម
|
@@ -24,6 +28,8 @@ provinces:
|
|
24
28
|
km: ខេត្ត
|
25
29
|
latin: Khaet
|
26
30
|
en: Province
|
31
|
+
links:
|
32
|
+
wikipedia: https://en.wikipedia.org/wiki/Kampong_Cham_province
|
27
33
|
'04':
|
28
34
|
name:
|
29
35
|
km: កំពង់ឆ្នាំង
|
@@ -32,6 +38,8 @@ provinces:
|
|
32
38
|
km: ខេត្ត
|
33
39
|
latin: Khaet
|
34
40
|
en: Province
|
41
|
+
links:
|
42
|
+
wikipedia: https://en.wikipedia.org/wiki/Kampong_Chhnang_province
|
35
43
|
'05':
|
36
44
|
name:
|
37
45
|
km: កំពង់ស្ពឺ
|
@@ -40,6 +48,8 @@ provinces:
|
|
40
48
|
km: ខេត្ត
|
41
49
|
latin: Khaet
|
42
50
|
en: Province
|
51
|
+
links:
|
52
|
+
wikipedia: https://en.wikipedia.org/wiki/Kampong_Speu_province
|
43
53
|
'06':
|
44
54
|
name:
|
45
55
|
km: កំពង់ធំ
|
@@ -48,6 +58,8 @@ provinces:
|
|
48
58
|
km: ខេត្ត
|
49
59
|
latin: Khaet
|
50
60
|
en: Province
|
61
|
+
links:
|
62
|
+
wikipedia: https://en.wikipedia.org/wiki/Kampong_Thom_province
|
51
63
|
'07':
|
52
64
|
name:
|
53
65
|
km: កំពត
|
@@ -56,14 +68,18 @@ provinces:
|
|
56
68
|
km: ខេត្ត
|
57
69
|
latin: Khaet
|
58
70
|
en: Province
|
71
|
+
links:
|
72
|
+
wikipedia: https://en.wikipedia.org/wiki/Kampot_province
|
59
73
|
'08':
|
60
74
|
name:
|
61
|
-
km:
|
75
|
+
km: កណ្តាល
|
62
76
|
latin: Kandal
|
63
77
|
administrative_unit:
|
64
78
|
km: ខេត្ត
|
65
79
|
latin: Khaet
|
66
80
|
en: Province
|
81
|
+
links:
|
82
|
+
wikipedia: https://en.wikipedia.org/wiki/Kandal_province
|
67
83
|
'09':
|
68
84
|
name:
|
69
85
|
km: កោះកុង
|
@@ -72,6 +88,8 @@ provinces:
|
|
72
88
|
km: ខេត្ត
|
73
89
|
latin: Khaet
|
74
90
|
en: Province
|
91
|
+
links:
|
92
|
+
wikipedia: https://en.wikipedia.org/wiki/Koh_Kong_province
|
75
93
|
'10':
|
76
94
|
name:
|
77
95
|
km: ក្រចេះ
|
@@ -80,6 +98,8 @@ provinces:
|
|
80
98
|
km: ខេត្ត
|
81
99
|
latin: Khaet
|
82
100
|
en: Province
|
101
|
+
links:
|
102
|
+
wikipedia: https://en.wikipedia.org/wiki/Krati%C3%A9_province
|
83
103
|
'11':
|
84
104
|
name:
|
85
105
|
km: មណ្ឌលគិរី
|
@@ -88,6 +108,8 @@ provinces:
|
|
88
108
|
km: ខេត្ត
|
89
109
|
latin: Khaet
|
90
110
|
en: Province
|
111
|
+
links:
|
112
|
+
wikipedia: https://en.wikipedia.org/wiki/Mondulkiri_province
|
91
113
|
'12':
|
92
114
|
name:
|
93
115
|
km: ភ្នំពេញ
|
@@ -96,6 +118,8 @@ provinces:
|
|
96
118
|
km: រាជធានី
|
97
119
|
latin: Reach Theani
|
98
120
|
en: Capital
|
121
|
+
links:
|
122
|
+
wikipedia: https://en.wikipedia.org/wiki/Phnom_Penh
|
99
123
|
'13':
|
100
124
|
name:
|
101
125
|
km: ព្រះវិហារ
|
@@ -104,6 +128,8 @@ provinces:
|
|
104
128
|
km: ខេត្ត
|
105
129
|
latin: Khaet
|
106
130
|
en: Province
|
131
|
+
links:
|
132
|
+
wikipedia: https://en.wikipedia.org/wiki/Preah_Vihear_province
|
107
133
|
'14':
|
108
134
|
name:
|
109
135
|
km: ព្រៃវែង
|
@@ -112,6 +138,8 @@ provinces:
|
|
112
138
|
km: ខេត្ត
|
113
139
|
latin: Khaet
|
114
140
|
en: Province
|
141
|
+
links:
|
142
|
+
wikipedia: https://en.wikipedia.org/wiki/Prey_Veng_province
|
115
143
|
'15':
|
116
144
|
name:
|
117
145
|
km: ពោធិ៍សាត់
|
@@ -120,6 +148,8 @@ provinces:
|
|
120
148
|
km: ខេត្ត
|
121
149
|
latin: Khaet
|
122
150
|
en: Province
|
151
|
+
links:
|
152
|
+
wikipedia: https://en.wikipedia.org/wiki/Pursat_province
|
123
153
|
'16':
|
124
154
|
name:
|
125
155
|
km: រតនគិរី
|
@@ -128,6 +158,8 @@ provinces:
|
|
128
158
|
km: ខេត្ត
|
129
159
|
latin: Khaet
|
130
160
|
en: Province
|
161
|
+
links:
|
162
|
+
wikipedia: https://en.wikipedia.org/wiki/Ratanakiri_province
|
131
163
|
'17':
|
132
164
|
name:
|
133
165
|
km: សៀមរាប
|
@@ -136,6 +168,8 @@ provinces:
|
|
136
168
|
km: ខេត្ត
|
137
169
|
latin: Khaet
|
138
170
|
en: Province
|
171
|
+
links:
|
172
|
+
wikipedia: https://en.wikipedia.org/wiki/Siem_Reap_province
|
139
173
|
'18':
|
140
174
|
name:
|
141
175
|
km: ព្រះសីហនុ
|
@@ -144,6 +178,8 @@ provinces:
|
|
144
178
|
km: ខេត្ត
|
145
179
|
latin: Khaet
|
146
180
|
en: Province
|
181
|
+
links:
|
182
|
+
wikipedia: https://en.wikipedia.org/wiki/Sihanoukville_province
|
147
183
|
'19':
|
148
184
|
name:
|
149
185
|
km: ស្ទឹងត្រែង
|
@@ -152,6 +188,8 @@ provinces:
|
|
152
188
|
km: ខេត្ត
|
153
189
|
latin: Khaet
|
154
190
|
en: Province
|
191
|
+
links:
|
192
|
+
wikipedia: https://en.wikipedia.org/wiki/Stung_Treng_province
|
155
193
|
'20':
|
156
194
|
name:
|
157
195
|
km: ស្វាយរៀង
|
@@ -160,6 +198,8 @@ provinces:
|
|
160
198
|
km: ខេត្ត
|
161
199
|
latin: Khaet
|
162
200
|
en: Province
|
201
|
+
links:
|
202
|
+
wikipedia: https://en.wikipedia.org/wiki/Svay_Rieng_province
|
163
203
|
'21':
|
164
204
|
name:
|
165
205
|
km: តាកែវ
|
@@ -168,14 +208,18 @@ provinces:
|
|
168
208
|
km: ខេត្ត
|
169
209
|
latin: Khaet
|
170
210
|
en: Province
|
211
|
+
links:
|
212
|
+
wikipedia: https://en.wikipedia.org/wiki/Tak%C3%A9o_province
|
171
213
|
'22':
|
172
214
|
name:
|
173
|
-
km:
|
215
|
+
km: ឧត្តរមានជ័យ
|
174
216
|
latin: Oddar Meanchey
|
175
217
|
administrative_unit:
|
176
218
|
km: ខេត្ត
|
177
219
|
latin: Khaet
|
178
220
|
en: Province
|
221
|
+
links:
|
222
|
+
wikipedia: https://en.wikipedia.org/wiki/Oddar_Meanchey_province
|
179
223
|
'23':
|
180
224
|
name:
|
181
225
|
km: កែប
|
@@ -184,6 +228,8 @@ provinces:
|
|
184
228
|
km: ខេត្ត
|
185
229
|
latin: Khaet
|
186
230
|
en: Province
|
231
|
+
links:
|
232
|
+
wikipedia: https://en.wikipedia.org/wiki/Kep_province
|
187
233
|
'24':
|
188
234
|
name:
|
189
235
|
km: ប៉ៃលិន
|
@@ -192,6 +238,8 @@ provinces:
|
|
192
238
|
km: ខេត្ត
|
193
239
|
latin: Khaet
|
194
240
|
en: Province
|
241
|
+
links:
|
242
|
+
wikipedia: https://en.wikipedia.org/wiki/Pailin_province
|
195
243
|
'25':
|
196
244
|
name:
|
197
245
|
km: ត្បូងឃ្មុំ
|
@@ -200,3 +248,5 @@ provinces:
|
|
200
248
|
km: ខេត្ត
|
201
249
|
latin: Khaet
|
202
250
|
en: Province
|
251
|
+
links:
|
252
|
+
wikipedia: https://en.wikipedia.org/wiki/Tboung_Khmum_province
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require "yaml"
|
2
|
+
require "pathname"
|
3
|
+
|
4
|
+
module Pumi
|
5
|
+
class DataFile
|
6
|
+
DEFAULT_DATA_DIRECTORY = File.join(File.expand_path("..", File.dirname(__dir__)), "data")
|
7
|
+
TYPES = %w[provinces districts communes villages].freeze
|
8
|
+
|
9
|
+
attr_reader :type
|
10
|
+
|
11
|
+
def initialize(type)
|
12
|
+
@type = type.to_s
|
13
|
+
raise ArgumentError, "#{type} is not included in #{TYPES}" unless TYPES.include?(@type)
|
14
|
+
end
|
15
|
+
|
16
|
+
def read(data_directory: DEFAULT_DATA_DIRECTORY)
|
17
|
+
YAML.load_file(data_file(data_directory)).fetch(type)
|
18
|
+
end
|
19
|
+
|
20
|
+
def write(data, data_directory: DEFAULT_DATA_DIRECTORY)
|
21
|
+
return if data.empty?
|
22
|
+
|
23
|
+
File.write(data_file(data_directory), { type => data.sort.to_h }.to_yaml)
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def data_file(data_directory)
|
29
|
+
Pathname(data_directory).join("#{type}.yml")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require "pathname"
|
2
|
+
require "csv"
|
3
|
+
require "yaml"
|
4
|
+
|
5
|
+
# https://en.wikipedia.org/wiki/Administrative_divisions_of_Cambodia
|
6
|
+
# https://en.wikipedia.org/wiki/Romanization_of_Khmer
|
7
|
+
# https://en.wikipedia.org/wiki/United_Nations_Group_of_Experts_on_Geographical_Names
|
8
|
+
|
9
|
+
module Pumi
|
10
|
+
module DataSource
|
11
|
+
class NCDD
|
12
|
+
CSV_HEADERS = %w[type code name_km name_latin reference note1 note2].freeze
|
13
|
+
|
14
|
+
AdministrativeUnit = Struct.new(:en, :km, :latin, :code_length, :group, :type, keyword_init: true)
|
15
|
+
Row = Struct.new(:code, :name_km, :name_latin, :type, keyword_init: true) do
|
16
|
+
def administrative_unit
|
17
|
+
ADMINISTRATIVE_UNITS.fetch(type)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
ADMINISTRATIVE_UNITS = {
|
22
|
+
"ស្រុក" => AdministrativeUnit.new(en: "District", km: "ស្រុក", latin: "Srok", code_length: 4, group: "districts"),
|
23
|
+
"ខណ្ឌ" => AdministrativeUnit.new(en: "Section", km: "ខណ្ឌ", latin: "Khan", code_length: 4, group: "districts"),
|
24
|
+
"ក្រុង" => AdministrativeUnit.new(en: "Municipality", km: "ក្រុង", latin: "Krong", code_length: 4, group: "districts"),
|
25
|
+
"ឃុំ" => AdministrativeUnit.new(en: "Commune", km: "ឃុំ", latin: "Khum", code_length: 6, group: "communes"),
|
26
|
+
"សង្កាត់" => AdministrativeUnit.new(en: "Quarter", km: "សង្កាត់", latin: "Sangkat", code_length: 6, group: "communes"),
|
27
|
+
"ភូមិ" => AdministrativeUnit.new(en: "Village", km: "ភូមិ", latin: "Phum", code_length: 8, group: "villages")
|
28
|
+
}.freeze
|
29
|
+
|
30
|
+
def load_data!(source_dir: "tmp", output_dir: "data")
|
31
|
+
source_files(source_dir).each do |file|
|
32
|
+
parse_source_file(file)
|
33
|
+
end
|
34
|
+
|
35
|
+
write_data!(output_dir)
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def parse_source_file(file)
|
41
|
+
CSV.read(file, headers: CSV_HEADERS).each do |csv_row|
|
42
|
+
row = build_row(csv_row)
|
43
|
+
|
44
|
+
next unless row.code
|
45
|
+
next if row.administrative_unit.code_length != row.code.length
|
46
|
+
|
47
|
+
write_location(row)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def data
|
52
|
+
@data ||= {}
|
53
|
+
end
|
54
|
+
|
55
|
+
def build_row(row)
|
56
|
+
Row.new(
|
57
|
+
code: parse_location_code(row),
|
58
|
+
name_km: row.fetch("name_km"),
|
59
|
+
name_latin: row.fetch("name_latin"),
|
60
|
+
type: row.fetch("type")
|
61
|
+
)
|
62
|
+
end
|
63
|
+
|
64
|
+
def parse_location_code(row)
|
65
|
+
code = row.fetch("code")
|
66
|
+
return if code.to_s.gsub(/\D/, "").empty?
|
67
|
+
|
68
|
+
code = code.rjust(code.length + 1, "0") if code.length.odd?
|
69
|
+
code
|
70
|
+
end
|
71
|
+
|
72
|
+
def write_location(row)
|
73
|
+
data[row.administrative_unit.group] ||= {}
|
74
|
+
data[row.administrative_unit.group][row.code] = {
|
75
|
+
"name" => {
|
76
|
+
"km" => row.name_km,
|
77
|
+
"latin" => row.name_latin
|
78
|
+
},
|
79
|
+
"administrative_unit" => {
|
80
|
+
"km" => row.administrative_unit.km,
|
81
|
+
"latin" => row.administrative_unit.latin,
|
82
|
+
"en" => row.administrative_unit.en
|
83
|
+
}
|
84
|
+
}
|
85
|
+
end
|
86
|
+
|
87
|
+
def source_files(source_dir)
|
88
|
+
Pathname.glob("#{source_dir}/*.csv").select(&:file?)
|
89
|
+
end
|
90
|
+
|
91
|
+
def write_data!(output_dir)
|
92
|
+
return if data.empty?
|
93
|
+
|
94
|
+
ADMINISTRATIVE_UNITS.values.map(&:group).uniq do |data_group|
|
95
|
+
DataFile.new(data_group).write(data.fetch(data_group), data_directory: output_dir)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|