pumi 0.16.0 → 0.18.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +10 -0
- data/.github/workflows/dependabot-auto-merge.yml +17 -0
- data/.github/workflows/update_data.yml +2 -2
- data/.tool-versions +1 -1
- data/CHANGELOG.md +14 -0
- data/README.md +5 -0
- data/bin/parse_data +16 -3
- data/data/communes.yml +562 -0
- data/data/districts.yml +402 -0
- data/data/provinces.yml +52 -2
- data/data/villages.yml +16 -0
- data/lib/pumi/data_file.rb +32 -0
- data/lib/pumi/data_source/ncdd.rb +100 -0
- data/lib/pumi/data_source/wikipedia.rb +665 -0
- data/lib/pumi/data_source.rb +7 -0
- data/lib/pumi/location.rb +3 -1
- data/lib/pumi/parser.rb +20 -13
- data/lib/pumi/scraper/result.rb +5 -0
- data/lib/pumi/version.rb +1 -1
- data/lib/pumi.rb +2 -1
- data/pumi.gemspec +1 -0
- metadata +24 -4
- data/lib/pumi/data_parser.rb +0 -75
data/lib/pumi/parser.rb
CHANGED
@@ -3,9 +3,12 @@ require "pathname"
|
|
3
3
|
|
4
4
|
module Pumi
|
5
5
|
class Parser
|
6
|
-
|
6
|
+
DEFAULT_DATA_DIRECTORY = File.join(File.expand_path("..", File.dirname(__dir__)), "data")
|
7
|
+
|
8
|
+
AdministrativeDivision = Struct.new(
|
9
|
+
:type, :name, :data_key, :id_length, :parent_divisions, keyword_init: true
|
10
|
+
)
|
7
11
|
|
8
|
-
AdministrativeDivision = Struct.new(:type, :name, :data_key, :id_length, :parent_divisions, keyword_init: true)
|
9
12
|
PROVINCE = AdministrativeDivision.new(
|
10
13
|
type: Province,
|
11
14
|
name: :province,
|
@@ -48,19 +51,16 @@ module Pumi
|
|
48
51
|
AddressType.new(locale: :en, default_delimiter: ", ")
|
49
52
|
].freeze
|
50
53
|
|
51
|
-
attr_reader :type, :administrative_division
|
54
|
+
attr_reader :type, :data_directory, :administrative_division
|
52
55
|
|
53
|
-
def initialize(type)
|
56
|
+
def initialize(type, data_directory: DEFAULT_DATA_DIRECTORY)
|
54
57
|
@type = type
|
58
|
+
@data_directory = Pathname(data_directory)
|
55
59
|
@administrative_division = ADMINISTRATIVE_DIVISIONS.fetch(type)
|
56
60
|
end
|
57
61
|
|
58
62
|
def load
|
59
|
-
|
60
|
-
DATA_DIRECTORY.join("#{administrative_division.data_key}.yml")
|
61
|
-
).fetch(administrative_division.data_key.to_s)
|
62
|
-
|
63
|
-
data.each_with_object({}) do |(id, attributes), result|
|
63
|
+
raw_data.each_with_object({}) do |(id, attributes), result|
|
64
64
|
location_data = build_location_data(id, attributes)
|
65
65
|
add_parent_divisions(location_data)
|
66
66
|
add_addresses(location_data)
|
@@ -71,6 +71,12 @@ module Pumi
|
|
71
71
|
|
72
72
|
private
|
73
73
|
|
74
|
+
def raw_data
|
75
|
+
@raw_data ||= YAML.load_file(
|
76
|
+
data_directory.join("#{administrative_division.data_key}.yml")
|
77
|
+
).fetch(administrative_division.data_key.to_s)
|
78
|
+
end
|
79
|
+
|
74
80
|
def build_location_data(id, attributes)
|
75
81
|
name = attributes.fetch("name")
|
76
82
|
name_km = name.fetch("km")
|
@@ -80,10 +86,11 @@ module Pumi
|
|
80
86
|
)
|
81
87
|
|
82
88
|
{
|
83
|
-
id
|
84
|
-
administrative_unit
|
85
|
-
name_km
|
86
|
-
name_latin
|
89
|
+
id:,
|
90
|
+
administrative_unit:,
|
91
|
+
name_km:,
|
92
|
+
name_latin:,
|
93
|
+
links: attributes.fetch("links", {}).transform_keys(&:to_sym),
|
87
94
|
name_en: name_latin,
|
88
95
|
full_name_km: [
|
89
96
|
administrative_unit_name(name_km, administrative_unit.name_km),
|
data/lib/pumi/version.rb
CHANGED
data/lib/pumi.rb
CHANGED
data/pumi.gemspec
CHANGED
@@ -26,6 +26,7 @@ Gem::Specification.new do |spec|
|
|
26
26
|
spec.add_development_dependency "capybara"
|
27
27
|
spec.add_development_dependency "coffee-rails"
|
28
28
|
spec.add_development_dependency "jquery-rails"
|
29
|
+
spec.add_development_dependency "nokogiri"
|
29
30
|
spec.add_development_dependency "pry"
|
30
31
|
spec.add_development_dependency "puma"
|
31
32
|
spec.add_development_dependency "rails"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pumi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.18.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Wilkie
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-05-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: nokogiri
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: pry
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -230,7 +244,9 @@ files:
|
|
230
244
|
- ".github/actions/fetch_data/Dockerfile"
|
231
245
|
- ".github/actions/fetch_data/action.yml"
|
232
246
|
- ".github/actions/fetch_data/entrypoint.sh"
|
247
|
+
- ".github/dependabot.yml"
|
233
248
|
- ".github/workflows/build.yml"
|
249
|
+
- ".github/workflows/dependabot-auto-merge.yml"
|
234
250
|
- ".github/workflows/release.yml"
|
235
251
|
- ".github/workflows/update_data.yml"
|
236
252
|
- ".gitignore"
|
@@ -260,7 +276,10 @@ files:
|
|
260
276
|
- lib/pumi.rb
|
261
277
|
- lib/pumi/administrative_unit.rb
|
262
278
|
- lib/pumi/commune.rb
|
263
|
-
- lib/pumi/
|
279
|
+
- lib/pumi/data_file.rb
|
280
|
+
- lib/pumi/data_source.rb
|
281
|
+
- lib/pumi/data_source/ncdd.rb
|
282
|
+
- lib/pumi/data_source/wikipedia.rb
|
264
283
|
- lib/pumi/data_store.rb
|
265
284
|
- lib/pumi/district.rb
|
266
285
|
- lib/pumi/location.rb
|
@@ -268,6 +287,7 @@ files:
|
|
268
287
|
- lib/pumi/province.rb
|
269
288
|
- lib/pumi/rails.rb
|
270
289
|
- lib/pumi/rails/engine.rb
|
290
|
+
- lib/pumi/scraper/result.rb
|
271
291
|
- lib/pumi/store_cache.rb
|
272
292
|
- lib/pumi/version.rb
|
273
293
|
- lib/pumi/village.rb
|
@@ -296,7 +316,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
296
316
|
- !ruby/object:Gem::Version
|
297
317
|
version: '0'
|
298
318
|
requirements: []
|
299
|
-
rubygems_version: 3.4.
|
319
|
+
rubygems_version: 3.4.10
|
300
320
|
signing_key:
|
301
321
|
specification_version: 4
|
302
322
|
summary: Villages (ភូមិ), Communes (ឃុំ - សង្កាត់), Districts (ស្រុក - ខណ្ឌ) and Provinces
|
data/lib/pumi/data_parser.rb
DELETED
@@ -1,75 +0,0 @@
|
|
1
|
-
require "pathname"
|
2
|
-
require "csv"
|
3
|
-
require "yaml"
|
4
|
-
|
5
|
-
# https://en.wikipedia.org/wiki/Administrative_divisions_of_Cambodia
|
6
|
-
# https://en.wikipedia.org/wiki/Romanization_of_Khmer
|
7
|
-
# https://en.wikipedia.org/wiki/United_Nations_Group_of_Experts_on_Geographical_Names
|
8
|
-
|
9
|
-
module Pumi
|
10
|
-
class DataParser
|
11
|
-
CSV_HEADERS = %w[type code name_km name_latin reference note1 note2].freeze
|
12
|
-
|
13
|
-
AdministrativeUnit = Struct.new(:en, :km, :latin, :code_length, :group, keyword_init: true)
|
14
|
-
ADMINISTRATIVE_UNITS = {
|
15
|
-
"ស្រុក" => AdministrativeUnit.new(en: "District", km: "ស្រុក", latin: "Srok", code_length: 4, group: "districts"),
|
16
|
-
"ខណ្ឌ" => AdministrativeUnit.new(en: "Section", km: "ខណ្ឌ", latin: "Khan", code_length: 4, group: "districts"),
|
17
|
-
"ក្រុង" => AdministrativeUnit.new(en: "Municipality", km: "ក្រុង", latin: "Krong", code_length: 4, group: "districts"),
|
18
|
-
"ឃុំ" => AdministrativeUnit.new(en: "Commune", km: "ឃុំ", latin: "Khum", code_length: 6, group: "communes"),
|
19
|
-
"សង្កាត់" => AdministrativeUnit.new(en: "Quarter", km: "សង្កាត់", latin: "Sangkat", code_length: 6, group: "communes"),
|
20
|
-
"ភូមិ" => AdministrativeUnit.new(en: "Village", km: "ភូមិ", latin: "Phum", code_length: 8, group: "villages")
|
21
|
-
}.freeze
|
22
|
-
|
23
|
-
def load_data!(source_dir: "tmp")
|
24
|
-
data = {}
|
25
|
-
|
26
|
-
source_files(source_dir).each do |file|
|
27
|
-
CSV.read(file, headers: CSV_HEADERS).each do |row|
|
28
|
-
code = row.fetch("code")
|
29
|
-
next if code.to_s.gsub(/\D/, "").empty?
|
30
|
-
|
31
|
-
code = code.rjust(code.length + 1, "0") if code.length.odd?
|
32
|
-
administrative_unit = ADMINISTRATIVE_UNITS.fetch(row.fetch("type"))
|
33
|
-
|
34
|
-
next if administrative_unit.code_length != code.length
|
35
|
-
|
36
|
-
data[administrative_unit.group] ||= {}
|
37
|
-
data[administrative_unit.group][code] = {
|
38
|
-
"name" => {
|
39
|
-
"km" => row.fetch("name_km"),
|
40
|
-
"latin" => row.fetch("name_latin")
|
41
|
-
},
|
42
|
-
"administrative_unit" => {
|
43
|
-
"km" => administrative_unit.km,
|
44
|
-
"latin" => administrative_unit.latin,
|
45
|
-
"en" => administrative_unit.en
|
46
|
-
}
|
47
|
-
}
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
data
|
52
|
-
end
|
53
|
-
|
54
|
-
def write_data!(data, destination_dir: "data")
|
55
|
-
return if data.empty?
|
56
|
-
|
57
|
-
data_groups.each do |group|
|
58
|
-
File.write(
|
59
|
-
"#{destination_dir}/#{group}.yml",
|
60
|
-
{ group => data.fetch(group).sort.to_h }.to_yaml
|
61
|
-
)
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
private
|
66
|
-
|
67
|
-
def source_files(source_dir)
|
68
|
-
Pathname.glob("#{source_dir}/*.csv").select(&:file?)
|
69
|
-
end
|
70
|
-
|
71
|
-
def data_groups
|
72
|
-
ADMINISTRATIVE_UNITS.values.map(&:group).uniq
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|