pumi 0.12.2 → 0.13.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,75 @@
1
+ require "pathname"
2
+ require "csv"
3
+ require "yaml"
4
+
5
+ # https://en.wikipedia.org/wiki/Administrative_divisions_of_Cambodia
6
+ # https://en.wikipedia.org/wiki/Romanization_of_Khmer
7
+ # https://en.wikipedia.org/wiki/United_Nations_Group_of_Experts_on_Geographical_Names
8
+
9
+ module Pumi
10
+ class DataParser
11
+ CSV_HEADERS = %w[type code name_km name_latin reference note1 note2].freeze
12
+
13
+ AdministrativeUnit = Struct.new(:en, :km, :latin, :code_length, :group, keyword_init: true)
14
+ ADMINISTRATIVE_UNITS = {
15
+ "ស្រុក" => AdministrativeUnit.new(en: "District", km: "ស្រុក", latin: "Srok", code_length: 4, group: "districts"),
16
+ "ខណ្ឌ" => AdministrativeUnit.new(en: "Section", km: "ខណ្ឌ", latin: "Khan", code_length: 4, group: "districts"),
17
+ "ក្រុង" => AdministrativeUnit.new(en: "Municipality", km: "ក្រុង", latin: "Krong", code_length: 4, group: "districts"),
18
+ "ឃុំ" => AdministrativeUnit.new(en: "Commune", km: "ឃុំ", latin: "Khum", code_length: 6, group: "communes"),
19
+ "សង្កាត់" => AdministrativeUnit.new(en: "Quarter", km: "សង្កាត់", latin: "Sangkat", code_length: 6, group: "communes"),
20
+ "ភូមិ" => AdministrativeUnit.new(en: "Village", km: "ភូមិ", latin: "Phum", code_length: 8, group: "villages")
21
+ }.freeze
22
+
23
+ def load_data!(source_dir: "tmp")
24
+ data = {}
25
+
26
+ source_files(source_dir).each do |file|
27
+ CSV.read(file, headers: CSV_HEADERS).each do |row|
28
+ code = row.fetch("code")
29
+ next if code.to_s.gsub(/\D/, "").empty?
30
+
31
+ code = code.rjust(code.length + 1, "0") if code.length.odd?
32
+ administrative_unit = ADMINISTRATIVE_UNITS.fetch(row.fetch("type"))
33
+
34
+ next if administrative_unit.code_length != code.length
35
+
36
+ data[administrative_unit.group] ||= {}
37
+ data[administrative_unit.group][code] = {
38
+ "name" => {
39
+ "km" => row.fetch("name_km"),
40
+ "latin" => row.fetch("name_latin")
41
+ },
42
+ "administrative_unit" => {
43
+ "km" => administrative_unit.km,
44
+ "latin" => administrative_unit.latin,
45
+ "en" => administrative_unit.en
46
+ }
47
+ }
48
+ end
49
+ end
50
+
51
+ data
52
+ end
53
+
54
+ def write_data!(data, destination_dir: "data")
55
+ return if data.empty?
56
+
57
+ data_groups.each do |group|
58
+ File.write(
59
+ "#{destination_dir}/#{group}.yml",
60
+ { group => data.fetch(group).sort.to_h }.to_yaml
61
+ )
62
+ end
63
+ end
64
+
65
+ private
66
+
67
+ def source_files(source_dir)
68
+ Pathname.glob("#{source_dir}/*.csv").select(&:file?)
69
+ end
70
+
71
+ def data_groups
72
+ ADMINISTRATIVE_UNITS.values.map(&:group).uniq
73
+ end
74
+ end
75
+ end
data/lib/pumi/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Pumi
2
- VERSION = "0.12.2".freeze
2
+ VERSION = "0.13.1".freeze
3
3
  end
data/lib/pumi.rb CHANGED
@@ -23,3 +23,4 @@ require_relative "pumi/district"
23
23
  require_relative "pumi/commune"
24
24
  require_relative "pumi/village"
25
25
  require_relative "pumi/parser"
26
+ require_relative "pumi/data_parser"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pumi
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.2
4
+ version: 0.13.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Wilkie
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-02-01 00:00:00.000000000 Z
11
+ date: 2023-03-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bump
@@ -274,6 +274,7 @@ files:
274
274
  - lib/pumi.rb
275
275
  - lib/pumi/administrative_unit.rb
276
276
  - lib/pumi/commune.rb
277
+ - lib/pumi/data_parser.rb
277
278
  - lib/pumi/data_store.rb
278
279
  - lib/pumi/district.rb
279
280
  - lib/pumi/location.rb