pumi 0.17.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/pumi/parser.rb CHANGED
@@ -3,9 +3,12 @@ require "pathname"
3
3
 
4
4
  module Pumi
5
5
  class Parser
6
- DATA_DIRECTORY = Pathname.new(File.expand_path("..", File.dirname(__dir__))).join("data")
6
+ DEFAULT_DATA_DIRECTORY = File.join(File.expand_path("..", File.dirname(__dir__)), "data")
7
+
8
+ AdministrativeDivision = Struct.new(
9
+ :type, :name, :data_key, :id_length, :parent_divisions, keyword_init: true
10
+ )
7
11
 
8
- AdministrativeDivision = Struct.new(:type, :name, :data_key, :id_length, :parent_divisions, keyword_init: true)
9
12
  PROVINCE = AdministrativeDivision.new(
10
13
  type: Province,
11
14
  name: :province,
@@ -48,19 +51,16 @@ module Pumi
48
51
  AddressType.new(locale: :en, default_delimiter: ", ")
49
52
  ].freeze
50
53
 
51
- attr_reader :type, :administrative_division
54
+ attr_reader :type, :data_directory, :administrative_division
52
55
 
53
- def initialize(type)
56
+ def initialize(type, data_directory: DEFAULT_DATA_DIRECTORY)
54
57
  @type = type
58
+ @data_directory = Pathname(data_directory)
55
59
  @administrative_division = ADMINISTRATIVE_DIVISIONS.fetch(type)
56
60
  end
57
61
 
58
62
  def load
59
- data = YAML.load_file(
60
- DATA_DIRECTORY.join("#{administrative_division.data_key}.yml")
61
- ).fetch(administrative_division.data_key.to_s)
62
-
63
- data.each_with_object({}) do |(id, attributes), result|
63
+ raw_data.each_with_object({}) do |(id, attributes), result|
64
64
  location_data = build_location_data(id, attributes)
65
65
  add_parent_divisions(location_data)
66
66
  add_addresses(location_data)
@@ -71,6 +71,12 @@ module Pumi
71
71
 
72
72
  private
73
73
 
74
+ def raw_data
75
+ @raw_data ||= YAML.load_file(
76
+ data_directory.join("#{administrative_division.data_key}.yml")
77
+ ).fetch(administrative_division.data_key.to_s)
78
+ end
79
+
74
80
  def build_location_data(id, attributes)
75
81
  name = attributes.fetch("name")
76
82
  name_km = name.fetch("km")
@@ -80,10 +86,11 @@ module Pumi
80
86
  )
81
87
 
82
88
  {
83
- id: id,
84
- administrative_unit: administrative_unit,
85
- name_km: name_km,
86
- name_latin: name_latin,
89
+ id:,
90
+ administrative_unit:,
91
+ name_km:,
92
+ name_latin:,
93
+ links: attributes.fetch("links", {}).transform_keys(&:to_sym),
87
94
  name_en: name_latin,
88
95
  full_name_km: [
89
96
  administrative_unit_name(name_km, administrative_unit.name_km),
@@ -0,0 +1,5 @@
1
+ module Pumi
2
+ module Scraper
3
+ Result = Struct.new(:code, :wikipedia, keyword_init: true)
4
+ end
5
+ end
data/lib/pumi/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Pumi
2
- VERSION = "0.17.0".freeze
2
+ VERSION = "0.18.0".freeze
3
3
  end
data/lib/pumi.rb CHANGED
@@ -23,4 +23,5 @@ require_relative "pumi/district"
23
23
  require_relative "pumi/commune"
24
24
  require_relative "pumi/village"
25
25
  require_relative "pumi/parser"
26
- require_relative "pumi/data_parser"
26
+ require_relative "pumi/data_file"
27
+ require_relative "pumi/data_source"
data/pumi.gemspec CHANGED
@@ -26,6 +26,7 @@ Gem::Specification.new do |spec|
26
26
  spec.add_development_dependency "capybara"
27
27
  spec.add_development_dependency "coffee-rails"
28
28
  spec.add_development_dependency "jquery-rails"
29
+ spec.add_development_dependency "nokogiri"
29
30
  spec.add_development_dependency "pry"
30
31
  spec.add_development_dependency "puma"
31
32
  spec.add_development_dependency "rails"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pumi
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.0
4
+ version: 0.18.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Wilkie
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-26 00:00:00.000000000 Z
11
+ date: 2023-05-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: nokogiri
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: pry
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -262,7 +276,10 @@ files:
262
276
  - lib/pumi.rb
263
277
  - lib/pumi/administrative_unit.rb
264
278
  - lib/pumi/commune.rb
265
- - lib/pumi/data_parser.rb
279
+ - lib/pumi/data_file.rb
280
+ - lib/pumi/data_source.rb
281
+ - lib/pumi/data_source/ncdd.rb
282
+ - lib/pumi/data_source/wikipedia.rb
266
283
  - lib/pumi/data_store.rb
267
284
  - lib/pumi/district.rb
268
285
  - lib/pumi/location.rb
@@ -270,6 +287,7 @@ files:
270
287
  - lib/pumi/province.rb
271
288
  - lib/pumi/rails.rb
272
289
  - lib/pumi/rails/engine.rb
290
+ - lib/pumi/scraper/result.rb
273
291
  - lib/pumi/store_cache.rb
274
292
  - lib/pumi/version.rb
275
293
  - lib/pumi/village.rb
@@ -1,75 +0,0 @@
1
- require "pathname"
2
- require "csv"
3
- require "yaml"
4
-
5
- # https://en.wikipedia.org/wiki/Administrative_divisions_of_Cambodia
6
- # https://en.wikipedia.org/wiki/Romanization_of_Khmer
7
- # https://en.wikipedia.org/wiki/United_Nations_Group_of_Experts_on_Geographical_Names
8
-
9
- module Pumi
10
- class DataParser
11
- CSV_HEADERS = %w[type code name_km name_latin reference note1 note2].freeze
12
-
13
- AdministrativeUnit = Struct.new(:en, :km, :latin, :code_length, :group, keyword_init: true)
14
- ADMINISTRATIVE_UNITS = {
15
- "ស្រុក" => AdministrativeUnit.new(en: "District", km: "ស្រុក", latin: "Srok", code_length: 4, group: "districts"),
16
- "ខណ្ឌ" => AdministrativeUnit.new(en: "Section", km: "ខណ្ឌ", latin: "Khan", code_length: 4, group: "districts"),
17
- "ក្រុង" => AdministrativeUnit.new(en: "Municipality", km: "ក្រុង", latin: "Krong", code_length: 4, group: "districts"),
18
- "ឃុំ" => AdministrativeUnit.new(en: "Commune", km: "ឃុំ", latin: "Khum", code_length: 6, group: "communes"),
19
- "សង្កាត់" => AdministrativeUnit.new(en: "Quarter", km: "សង្កាត់", latin: "Sangkat", code_length: 6, group: "communes"),
20
- "ភូមិ" => AdministrativeUnit.new(en: "Village", km: "ភូមិ", latin: "Phum", code_length: 8, group: "villages")
21
- }.freeze
22
-
23
- def load_data!(source_dir: "tmp")
24
- data = {}
25
-
26
- source_files(source_dir).each do |file|
27
- CSV.read(file, headers: CSV_HEADERS).each do |row|
28
- code = row.fetch("code")
29
- next if code.to_s.gsub(/\D/, "").empty?
30
-
31
- code = code.rjust(code.length + 1, "0") if code.length.odd?
32
- administrative_unit = ADMINISTRATIVE_UNITS.fetch(row.fetch("type"))
33
-
34
- next if administrative_unit.code_length != code.length
35
-
36
- data[administrative_unit.group] ||= {}
37
- data[administrative_unit.group][code] = {
38
- "name" => {
39
- "km" => row.fetch("name_km"),
40
- "latin" => row.fetch("name_latin")
41
- },
42
- "administrative_unit" => {
43
- "km" => administrative_unit.km,
44
- "latin" => administrative_unit.latin,
45
- "en" => administrative_unit.en
46
- }
47
- }
48
- end
49
- end
50
-
51
- data
52
- end
53
-
54
- def write_data!(data, destination_dir: "data")
55
- return if data.empty?
56
-
57
- data_groups.each do |group|
58
- File.write(
59
- "#{destination_dir}/#{group}.yml",
60
- { group => data.fetch(group).sort.to_h }.to_yaml
61
- )
62
- end
63
- end
64
-
65
- private
66
-
67
- def source_files(source_dir)
68
- Pathname.glob("#{source_dir}/*.csv").select(&:file?)
69
- end
70
-
71
- def data_groups
72
- ADMINISTRATIVE_UNITS.values.map(&:group).uniq
73
- end
74
- end
75
- end