pumi 0.17.0 → 0.18.0

Sign up to get free protection for your applications and to get access to all the features.
data/data/provinces.yml CHANGED
@@ -8,6 +8,8 @@ provinces:
8
8
  km: ខេត្ត
9
9
  latin: Khaet
10
10
  en: Province
11
+ links:
12
+ wikipedia: https://en.wikipedia.org/wiki/Banteay_Meanchey_province
11
13
  '02':
12
14
  name:
13
15
  km: បាត់ដំបង
@@ -16,6 +18,8 @@ provinces:
16
18
  km: ខេត្ត
17
19
  latin: Khaet
18
20
  en: Province
21
+ links:
22
+ wikipedia: https://en.wikipedia.org/wiki/Battambang_province
19
23
  '03':
20
24
  name:
21
25
  km: កំពង់ចាម
@@ -24,6 +28,8 @@ provinces:
24
28
  km: ខេត្ត
25
29
  latin: Khaet
26
30
  en: Province
31
+ links:
32
+ wikipedia: https://en.wikipedia.org/wiki/Kampong_Cham_province
27
33
  '04':
28
34
  name:
29
35
  km: កំពង់ឆ្នាំង
@@ -32,6 +38,8 @@ provinces:
32
38
  km: ខេត្ត
33
39
  latin: Khaet
34
40
  en: Province
41
+ links:
42
+ wikipedia: https://en.wikipedia.org/wiki/Kampong_Chhnang_province
35
43
  '05':
36
44
  name:
37
45
  km: កំពង់ស្ពឺ
@@ -40,6 +48,8 @@ provinces:
40
48
  km: ខេត្ត
41
49
  latin: Khaet
42
50
  en: Province
51
+ links:
52
+ wikipedia: https://en.wikipedia.org/wiki/Kampong_Speu_province
43
53
  '06':
44
54
  name:
45
55
  km: កំពង់ធំ
@@ -48,6 +58,8 @@ provinces:
48
58
  km: ខេត្ត
49
59
  latin: Khaet
50
60
  en: Province
61
+ links:
62
+ wikipedia: https://en.wikipedia.org/wiki/Kampong_Thom_province
51
63
  '07':
52
64
  name:
53
65
  km: កំពត
@@ -56,14 +68,18 @@ provinces:
56
68
  km: ខេត្ត
57
69
  latin: Khaet
58
70
  en: Province
71
+ links:
72
+ wikipedia: https://en.wikipedia.org/wiki/Kampot_province
59
73
  '08':
60
74
  name:
61
- km: កណ្ដាល
75
+ km: កណ្តាល
62
76
  latin: Kandal
63
77
  administrative_unit:
64
78
  km: ខេត្ត
65
79
  latin: Khaet
66
80
  en: Province
81
+ links:
82
+ wikipedia: https://en.wikipedia.org/wiki/Kandal_province
67
83
  '09':
68
84
  name:
69
85
  km: កោះកុង
@@ -72,6 +88,8 @@ provinces:
72
88
  km: ខេត្ត
73
89
  latin: Khaet
74
90
  en: Province
91
+ links:
92
+ wikipedia: https://en.wikipedia.org/wiki/Koh_Kong_province
75
93
  '10':
76
94
  name:
77
95
  km: ក្រចេះ
@@ -80,6 +98,8 @@ provinces:
80
98
  km: ខេត្ត
81
99
  latin: Khaet
82
100
  en: Province
101
+ links:
102
+ wikipedia: https://en.wikipedia.org/wiki/Krati%C3%A9_province
83
103
  '11':
84
104
  name:
85
105
  km: មណ្ឌលគិរី
@@ -88,6 +108,8 @@ provinces:
88
108
  km: ខេត្ត
89
109
  latin: Khaet
90
110
  en: Province
111
+ links:
112
+ wikipedia: https://en.wikipedia.org/wiki/Mondulkiri_province
91
113
  '12':
92
114
  name:
93
115
  km: ភ្នំពេញ
@@ -96,6 +118,8 @@ provinces:
96
118
  km: រាជធានី
97
119
  latin: Reach Theani
98
120
  en: Capital
121
+ links:
122
+ wikipedia: https://en.wikipedia.org/wiki/Phnom_Penh
99
123
  '13':
100
124
  name:
101
125
  km: ព្រះវិហារ
@@ -104,6 +128,8 @@ provinces:
104
128
  km: ខេត្ត
105
129
  latin: Khaet
106
130
  en: Province
131
+ links:
132
+ wikipedia: https://en.wikipedia.org/wiki/Preah_Vihear_province
107
133
  '14':
108
134
  name:
109
135
  km: ព្រៃវែង
@@ -112,6 +138,8 @@ provinces:
112
138
  km: ខេត្ត
113
139
  latin: Khaet
114
140
  en: Province
141
+ links:
142
+ wikipedia: https://en.wikipedia.org/wiki/Prey_Veng_province
115
143
  '15':
116
144
  name:
117
145
  km: ពោធិ៍សាត់
@@ -120,6 +148,8 @@ provinces:
120
148
  km: ខេត្ត
121
149
  latin: Khaet
122
150
  en: Province
151
+ links:
152
+ wikipedia: https://en.wikipedia.org/wiki/Pursat_province
123
153
  '16':
124
154
  name:
125
155
  km: រតនគិរី
@@ -128,6 +158,8 @@ provinces:
128
158
  km: ខេត្ត
129
159
  latin: Khaet
130
160
  en: Province
161
+ links:
162
+ wikipedia: https://en.wikipedia.org/wiki/Ratanakiri_province
131
163
  '17':
132
164
  name:
133
165
  km: សៀមរាប
@@ -136,6 +168,8 @@ provinces:
136
168
  km: ខេត្ត
137
169
  latin: Khaet
138
170
  en: Province
171
+ links:
172
+ wikipedia: https://en.wikipedia.org/wiki/Siem_Reap_province
139
173
  '18':
140
174
  name:
141
175
  km: ព្រះសីហនុ
@@ -144,6 +178,8 @@ provinces:
144
178
  km: ខេត្ត
145
179
  latin: Khaet
146
180
  en: Province
181
+ links:
182
+ wikipedia: https://en.wikipedia.org/wiki/Sihanoukville_province
147
183
  '19':
148
184
  name:
149
185
  km: ស្ទឹងត្រែង
@@ -152,6 +188,8 @@ provinces:
152
188
  km: ខេត្ត
153
189
  latin: Khaet
154
190
  en: Province
191
+ links:
192
+ wikipedia: https://en.wikipedia.org/wiki/Stung_Treng_province
155
193
  '20':
156
194
  name:
157
195
  km: ស្វាយរៀង
@@ -160,6 +198,8 @@ provinces:
160
198
  km: ខេត្ត
161
199
  latin: Khaet
162
200
  en: Province
201
+ links:
202
+ wikipedia: https://en.wikipedia.org/wiki/Svay_Rieng_province
163
203
  '21':
164
204
  name:
165
205
  km: តាកែវ
@@ -168,14 +208,18 @@ provinces:
168
208
  km: ខេត្ត
169
209
  latin: Khaet
170
210
  en: Province
211
+ links:
212
+ wikipedia: https://en.wikipedia.org/wiki/Tak%C3%A9o_province
171
213
  '22':
172
214
  name:
173
- km: ឧត្ដរមានជ័យ
215
+ km: ឧត្តរមានជ័យ
174
216
  latin: Oddar Meanchey
175
217
  administrative_unit:
176
218
  km: ខេត្ត
177
219
  latin: Khaet
178
220
  en: Province
221
+ links:
222
+ wikipedia: https://en.wikipedia.org/wiki/Oddar_Meanchey_province
179
223
  '23':
180
224
  name:
181
225
  km: កែប
@@ -184,6 +228,8 @@ provinces:
184
228
  km: ខេត្ត
185
229
  latin: Khaet
186
230
  en: Province
231
+ links:
232
+ wikipedia: https://en.wikipedia.org/wiki/Kep_province
187
233
  '24':
188
234
  name:
189
235
  km: ប៉ៃលិន
@@ -192,6 +238,8 @@ provinces:
192
238
  km: ខេត្ត
193
239
  latin: Khaet
194
240
  en: Province
241
+ links:
242
+ wikipedia: https://en.wikipedia.org/wiki/Pailin_province
195
243
  '25':
196
244
  name:
197
245
  km: ត្បូងឃ្មុំ
@@ -200,3 +248,5 @@ provinces:
200
248
  km: ខេត្ត
201
249
  latin: Khaet
202
250
  en: Province
251
+ links:
252
+ wikipedia: https://en.wikipedia.org/wiki/Tboung_Khmum_province
@@ -0,0 +1,32 @@
1
+ require "yaml"
2
+ require "pathname"
3
+
4
+ module Pumi
5
+ class DataFile
6
+ DEFAULT_DATA_DIRECTORY = File.join(File.expand_path("..", File.dirname(__dir__)), "data")
7
+ TYPES = %w[provinces districts communes villages].freeze
8
+
9
+ attr_reader :type
10
+
11
+ def initialize(type)
12
+ @type = type.to_s
13
+ raise ArgumentError, "#{type} is not included in #{TYPES}" unless TYPES.include?(@type)
14
+ end
15
+
16
+ def read(data_directory: DEFAULT_DATA_DIRECTORY)
17
+ YAML.load_file(data_file(data_directory)).fetch(type)
18
+ end
19
+
20
+ def write(data, data_directory: DEFAULT_DATA_DIRECTORY)
21
+ return if data.empty?
22
+
23
+ File.write(data_file(data_directory), { type => data.sort.to_h }.to_yaml)
24
+ end
25
+
26
+ private
27
+
28
+ def data_file(data_directory)
29
+ Pathname(data_directory).join("#{type}.yml")
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,100 @@
1
+ require "pathname"
2
+ require "csv"
3
+ require "yaml"
4
+
5
+ # https://en.wikipedia.org/wiki/Administrative_divisions_of_Cambodia
6
+ # https://en.wikipedia.org/wiki/Romanization_of_Khmer
7
+ # https://en.wikipedia.org/wiki/United_Nations_Group_of_Experts_on_Geographical_Names
8
+
9
+ module Pumi
10
+ module DataSource
11
+ class NCDD
12
+ CSV_HEADERS = %w[type code name_km name_latin reference note1 note2].freeze
13
+
14
+ AdministrativeUnit = Struct.new(:en, :km, :latin, :code_length, :group, :type, keyword_init: true)
15
+ Row = Struct.new(:code, :name_km, :name_latin, :type, keyword_init: true) do
16
+ def administrative_unit
17
+ ADMINISTRATIVE_UNITS.fetch(type)
18
+ end
19
+ end
20
+
21
+ ADMINISTRATIVE_UNITS = {
22
+ "ស្រុក" => AdministrativeUnit.new(en: "District", km: "ស្រុក", latin: "Srok", code_length: 4, group: "districts"),
23
+ "ខណ្ឌ" => AdministrativeUnit.new(en: "Section", km: "ខណ្ឌ", latin: "Khan", code_length: 4, group: "districts"),
24
+ "ក្រុង" => AdministrativeUnit.new(en: "Municipality", km: "ក្រុង", latin: "Krong", code_length: 4, group: "districts"),
25
+ "ឃុំ" => AdministrativeUnit.new(en: "Commune", km: "ឃុំ", latin: "Khum", code_length: 6, group: "communes"),
26
+ "សង្កាត់" => AdministrativeUnit.new(en: "Quarter", km: "សង្កាត់", latin: "Sangkat", code_length: 6, group: "communes"),
27
+ "ភូមិ" => AdministrativeUnit.new(en: "Village", km: "ភូមិ", latin: "Phum", code_length: 8, group: "villages")
28
+ }.freeze
29
+
30
+ def load_data!(source_dir: "tmp", output_dir: "data")
31
+ source_files(source_dir).each do |file|
32
+ parse_source_file(file)
33
+ end
34
+
35
+ write_data!(output_dir)
36
+ end
37
+
38
+ private
39
+
40
+ def parse_source_file(file)
41
+ CSV.read(file, headers: CSV_HEADERS).each do |csv_row|
42
+ row = build_row(csv_row)
43
+
44
+ next unless row.code
45
+ next if row.administrative_unit.code_length != row.code.length
46
+
47
+ write_location(row)
48
+ end
49
+ end
50
+
51
+ def data
52
+ @data ||= {}
53
+ end
54
+
55
+ def build_row(row)
56
+ Row.new(
57
+ code: parse_location_code(row),
58
+ name_km: row.fetch("name_km"),
59
+ name_latin: row.fetch("name_latin"),
60
+ type: row.fetch("type")
61
+ )
62
+ end
63
+
64
+ def parse_location_code(row)
65
+ code = row.fetch("code")
66
+ return if code.to_s.gsub(/\D/, "").empty?
67
+
68
+ code = code.rjust(code.length + 1, "0") if code.length.odd?
69
+ code
70
+ end
71
+
72
+ def write_location(row)
73
+ data[row.administrative_unit.group] ||= {}
74
+ data[row.administrative_unit.group][row.code] = {
75
+ "name" => {
76
+ "km" => row.name_km,
77
+ "latin" => row.name_latin
78
+ },
79
+ "administrative_unit" => {
80
+ "km" => row.administrative_unit.km,
81
+ "latin" => row.administrative_unit.latin,
82
+ "en" => row.administrative_unit.en
83
+ }
84
+ }
85
+ end
86
+
87
+ def source_files(source_dir)
88
+ Pathname.glob("#{source_dir}/*.csv").select(&:file?)
89
+ end
90
+
91
+ def write_data!(output_dir)
92
+ return if data.empty?
93
+
94
+ ADMINISTRATIVE_UNITS.values.map(&:group).uniq do |data_group|
95
+ DataFile.new(data_group).write(data.fetch(data_group), data_directory: output_dir)
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end