pumi 0.17.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/data/provinces.yml CHANGED
@@ -8,6 +8,8 @@ provinces:
8
8
  km: ខេត្ត
9
9
  latin: Khaet
10
10
  en: Province
11
+ links:
12
+ wikipedia: https://en.wikipedia.org/wiki/Banteay_Meanchey_province
11
13
  '02':
12
14
  name:
13
15
  km: បាត់ដំបង
@@ -16,6 +18,8 @@ provinces:
16
18
  km: ខេត្ត
17
19
  latin: Khaet
18
20
  en: Province
21
+ links:
22
+ wikipedia: https://en.wikipedia.org/wiki/Battambang_province
19
23
  '03':
20
24
  name:
21
25
  km: កំពង់ចាម
@@ -24,6 +28,8 @@ provinces:
24
28
  km: ខេត្ត
25
29
  latin: Khaet
26
30
  en: Province
31
+ links:
32
+ wikipedia: https://en.wikipedia.org/wiki/Kampong_Cham_province
27
33
  '04':
28
34
  name:
29
35
  km: កំពង់ឆ្នាំង
@@ -32,6 +38,8 @@ provinces:
32
38
  km: ខេត្ត
33
39
  latin: Khaet
34
40
  en: Province
41
+ links:
42
+ wikipedia: https://en.wikipedia.org/wiki/Kampong_Chhnang_province
35
43
  '05':
36
44
  name:
37
45
  km: កំពង់ស្ពឺ
@@ -40,6 +48,8 @@ provinces:
40
48
  km: ខេត្ត
41
49
  latin: Khaet
42
50
  en: Province
51
+ links:
52
+ wikipedia: https://en.wikipedia.org/wiki/Kampong_Speu_province
43
53
  '06':
44
54
  name:
45
55
  km: កំពង់ធំ
@@ -48,6 +58,8 @@ provinces:
48
58
  km: ខេត្ត
49
59
  latin: Khaet
50
60
  en: Province
61
+ links:
62
+ wikipedia: https://en.wikipedia.org/wiki/Kampong_Thom_province
51
63
  '07':
52
64
  name:
53
65
  km: កំពត
@@ -56,14 +68,18 @@ provinces:
56
68
  km: ខេត្ត
57
69
  latin: Khaet
58
70
  en: Province
71
+ links:
72
+ wikipedia: https://en.wikipedia.org/wiki/Kampot_province
59
73
  '08':
60
74
  name:
61
- km: កណ្ដាល
75
+ km: កណ្តាល
62
76
  latin: Kandal
63
77
  administrative_unit:
64
78
  km: ខេត្ត
65
79
  latin: Khaet
66
80
  en: Province
81
+ links:
82
+ wikipedia: https://en.wikipedia.org/wiki/Kandal_province
67
83
  '09':
68
84
  name:
69
85
  km: កោះកុង
@@ -72,6 +88,8 @@ provinces:
72
88
  km: ខេត្ត
73
89
  latin: Khaet
74
90
  en: Province
91
+ links:
92
+ wikipedia: https://en.wikipedia.org/wiki/Koh_Kong_province
75
93
  '10':
76
94
  name:
77
95
  km: ក្រចេះ
@@ -80,6 +98,8 @@ provinces:
80
98
  km: ខេត្ត
81
99
  latin: Khaet
82
100
  en: Province
101
+ links:
102
+ wikipedia: https://en.wikipedia.org/wiki/Krati%C3%A9_province
83
103
  '11':
84
104
  name:
85
105
  km: មណ្ឌលគិរី
@@ -88,6 +108,8 @@ provinces:
88
108
  km: ខេត្ត
89
109
  latin: Khaet
90
110
  en: Province
111
+ links:
112
+ wikipedia: https://en.wikipedia.org/wiki/Mondulkiri_province
91
113
  '12':
92
114
  name:
93
115
  km: ភ្នំពេញ
@@ -96,6 +118,8 @@ provinces:
96
118
  km: រាជធានី
97
119
  latin: Reach Theani
98
120
  en: Capital
121
+ links:
122
+ wikipedia: https://en.wikipedia.org/wiki/Phnom_Penh
99
123
  '13':
100
124
  name:
101
125
  km: ព្រះវិហារ
@@ -104,6 +128,8 @@ provinces:
104
128
  km: ខេត្ត
105
129
  latin: Khaet
106
130
  en: Province
131
+ links:
132
+ wikipedia: https://en.wikipedia.org/wiki/Preah_Vihear_province
107
133
  '14':
108
134
  name:
109
135
  km: ព្រៃវែង
@@ -112,6 +138,8 @@ provinces:
112
138
  km: ខេត្ត
113
139
  latin: Khaet
114
140
  en: Province
141
+ links:
142
+ wikipedia: https://en.wikipedia.org/wiki/Prey_Veng_province
115
143
  '15':
116
144
  name:
117
145
  km: ពោធិ៍សាត់
@@ -120,6 +148,8 @@ provinces:
120
148
  km: ខេត្ត
121
149
  latin: Khaet
122
150
  en: Province
151
+ links:
152
+ wikipedia: https://en.wikipedia.org/wiki/Pursat_province
123
153
  '16':
124
154
  name:
125
155
  km: រតនគិរី
@@ -128,6 +158,8 @@ provinces:
128
158
  km: ខេត្ត
129
159
  latin: Khaet
130
160
  en: Province
161
+ links:
162
+ wikipedia: https://en.wikipedia.org/wiki/Ratanakiri_province
131
163
  '17':
132
164
  name:
133
165
  km: សៀមរាប
@@ -136,6 +168,8 @@ provinces:
136
168
  km: ខេត្ត
137
169
  latin: Khaet
138
170
  en: Province
171
+ links:
172
+ wikipedia: https://en.wikipedia.org/wiki/Siem_Reap_province
139
173
  '18':
140
174
  name:
141
175
  km: ព្រះសីហនុ
@@ -144,6 +178,8 @@ provinces:
144
178
  km: ខេត្ត
145
179
  latin: Khaet
146
180
  en: Province
181
+ links:
182
+ wikipedia: https://en.wikipedia.org/wiki/Sihanoukville_province
147
183
  '19':
148
184
  name:
149
185
  km: ស្ទឹងត្រែង
@@ -152,6 +188,8 @@ provinces:
152
188
  km: ខេត្ត
153
189
  latin: Khaet
154
190
  en: Province
191
+ links:
192
+ wikipedia: https://en.wikipedia.org/wiki/Stung_Treng_province
155
193
  '20':
156
194
  name:
157
195
  km: ស្វាយរៀង
@@ -160,6 +198,8 @@ provinces:
160
198
  km: ខេត្ត
161
199
  latin: Khaet
162
200
  en: Province
201
+ links:
202
+ wikipedia: https://en.wikipedia.org/wiki/Svay_Rieng_province
163
203
  '21':
164
204
  name:
165
205
  km: តាកែវ
@@ -168,14 +208,18 @@ provinces:
168
208
  km: ខេត្ត
169
209
  latin: Khaet
170
210
  en: Province
211
+ links:
212
+ wikipedia: https://en.wikipedia.org/wiki/Tak%C3%A9o_province
171
213
  '22':
172
214
  name:
173
- km: ឧត្ដរមានជ័យ
215
+ km: ឧត្តរមានជ័យ
174
216
  latin: Oddar Meanchey
175
217
  administrative_unit:
176
218
  km: ខេត្ត
177
219
  latin: Khaet
178
220
  en: Province
221
+ links:
222
+ wikipedia: https://en.wikipedia.org/wiki/Oddar_Meanchey_province
179
223
  '23':
180
224
  name:
181
225
  km: កែប
@@ -184,6 +228,8 @@ provinces:
184
228
  km: ខេត្ត
185
229
  latin: Khaet
186
230
  en: Province
231
+ links:
232
+ wikipedia: https://en.wikipedia.org/wiki/Kep_province
187
233
  '24':
188
234
  name:
189
235
  km: ប៉ៃលិន
@@ -192,6 +238,8 @@ provinces:
192
238
  km: ខេត្ត
193
239
  latin: Khaet
194
240
  en: Province
241
+ links:
242
+ wikipedia: https://en.wikipedia.org/wiki/Pailin_province
195
243
  '25':
196
244
  name:
197
245
  km: ត្បូងឃ្មុំ
@@ -200,3 +248,5 @@ provinces:
200
248
  km: ខេត្ត
201
249
  latin: Khaet
202
250
  en: Province
251
+ links:
252
+ wikipedia: https://en.wikipedia.org/wiki/Tboung_Khmum_province
@@ -0,0 +1,32 @@
1
+ require "yaml"
2
+ require "pathname"
3
+
4
+ module Pumi
5
+ class DataFile
6
+ DEFAULT_DATA_DIRECTORY = File.join(File.expand_path("..", File.dirname(__dir__)), "data")
7
+ TYPES = %w[provinces districts communes villages].freeze
8
+
9
+ attr_reader :type
10
+
11
+ def initialize(type)
12
+ @type = type.to_s
13
+ raise ArgumentError, "#{type} is not included in #{TYPES}" unless TYPES.include?(@type)
14
+ end
15
+
16
+ def read(data_directory: DEFAULT_DATA_DIRECTORY)
17
+ YAML.load_file(data_file(data_directory)).fetch(type)
18
+ end
19
+
20
+ def write(data, data_directory: DEFAULT_DATA_DIRECTORY)
21
+ return if data.empty?
22
+
23
+ File.write(data_file(data_directory), { type => data.sort.to_h }.to_yaml)
24
+ end
25
+
26
+ private
27
+
28
+ def data_file(data_directory)
29
+ Pathname(data_directory).join("#{type}.yml")
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,100 @@
1
+ require "pathname"
2
+ require "csv"
3
+ require "yaml"
4
+
5
+ # https://en.wikipedia.org/wiki/Administrative_divisions_of_Cambodia
6
+ # https://en.wikipedia.org/wiki/Romanization_of_Khmer
7
+ # https://en.wikipedia.org/wiki/United_Nations_Group_of_Experts_on_Geographical_Names
8
+
9
+ module Pumi
10
+ module DataSource
11
+ class NCDD
12
+ CSV_HEADERS = %w[type code name_km name_latin reference note1 note2].freeze
13
+
14
+ AdministrativeUnit = Struct.new(:en, :km, :latin, :code_length, :group, :type, keyword_init: true)
15
+ Row = Struct.new(:code, :name_km, :name_latin, :type, keyword_init: true) do
16
+ def administrative_unit
17
+ ADMINISTRATIVE_UNITS.fetch(type)
18
+ end
19
+ end
20
+
21
+ ADMINISTRATIVE_UNITS = {
22
+ "ស្រុក" => AdministrativeUnit.new(en: "District", km: "ស្រុក", latin: "Srok", code_length: 4, group: "districts"),
23
+ "ខណ្ឌ" => AdministrativeUnit.new(en: "Section", km: "ខណ្ឌ", latin: "Khan", code_length: 4, group: "districts"),
24
+ "ក្រុង" => AdministrativeUnit.new(en: "Municipality", km: "ក្រុង", latin: "Krong", code_length: 4, group: "districts"),
25
+ "ឃុំ" => AdministrativeUnit.new(en: "Commune", km: "ឃុំ", latin: "Khum", code_length: 6, group: "communes"),
26
+ "សង្កាត់" => AdministrativeUnit.new(en: "Quarter", km: "សង្កាត់", latin: "Sangkat", code_length: 6, group: "communes"),
27
+ "ភូមិ" => AdministrativeUnit.new(en: "Village", km: "ភូមិ", latin: "Phum", code_length: 8, group: "villages")
28
+ }.freeze
29
+
30
+ def load_data!(source_dir: "tmp", output_dir: "data")
31
+ source_files(source_dir).each do |file|
32
+ parse_source_file(file)
33
+ end
34
+
35
+ write_data!(output_dir)
36
+ end
37
+
38
+ private
39
+
40
+ def parse_source_file(file)
41
+ CSV.read(file, headers: CSV_HEADERS).each do |csv_row|
42
+ row = build_row(csv_row)
43
+
44
+ next unless row.code
45
+ next if row.administrative_unit.code_length != row.code.length
46
+
47
+ write_location(row)
48
+ end
49
+ end
50
+
51
+ def data
52
+ @data ||= {}
53
+ end
54
+
55
+ def build_row(row)
56
+ Row.new(
57
+ code: parse_location_code(row),
58
+ name_km: row.fetch("name_km"),
59
+ name_latin: row.fetch("name_latin"),
60
+ type: row.fetch("type")
61
+ )
62
+ end
63
+
64
+ def parse_location_code(row)
65
+ code = row.fetch("code")
66
+ return if code.to_s.gsub(/\D/, "").empty?
67
+
68
+ code = code.rjust(code.length + 1, "0") if code.length.odd?
69
+ code
70
+ end
71
+
72
+ def write_location(row)
73
+ data[row.administrative_unit.group] ||= {}
74
+ data[row.administrative_unit.group][row.code] = {
75
+ "name" => {
76
+ "km" => row.name_km,
77
+ "latin" => row.name_latin
78
+ },
79
+ "administrative_unit" => {
80
+ "km" => row.administrative_unit.km,
81
+ "latin" => row.administrative_unit.latin,
82
+ "en" => row.administrative_unit.en
83
+ }
84
+ }
85
+ end
86
+
87
+ def source_files(source_dir)
88
+ Pathname.glob("#{source_dir}/*.csv").select(&:file?)
89
+ end
90
+
91
+ def write_data!(output_dir)
92
+ return if data.empty?
93
+
94
+ ADMINISTRATIVE_UNITS.values.map(&:group).uniq do |data_group|
95
+ DataFile.new(data_group).write(data.fetch(data_group), data_directory: output_dir)
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end