pumi 0.17.0 → 0.18.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/pumi/parser.rb CHANGED
@@ -3,9 +3,12 @@ require "pathname"
3
3
 
4
4
  module Pumi
5
5
  class Parser
6
- DATA_DIRECTORY = Pathname.new(File.expand_path("..", File.dirname(__dir__))).join("data")
6
+ DEFAULT_DATA_DIRECTORY = File.join(File.expand_path("..", File.dirname(__dir__)), "data")
7
+
8
+ AdministrativeDivision = Struct.new(
9
+ :type, :name, :data_key, :id_length, :parent_divisions, keyword_init: true
10
+ )
7
11
 
8
- AdministrativeDivision = Struct.new(:type, :name, :data_key, :id_length, :parent_divisions, keyword_init: true)
9
12
  PROVINCE = AdministrativeDivision.new(
10
13
  type: Province,
11
14
  name: :province,
@@ -48,19 +51,16 @@ module Pumi
48
51
  AddressType.new(locale: :en, default_delimiter: ", ")
49
52
  ].freeze
50
53
 
51
- attr_reader :type, :administrative_division
54
+ attr_reader :type, :data_directory, :administrative_division
52
55
 
53
- def initialize(type)
56
+ def initialize(type, data_directory: DEFAULT_DATA_DIRECTORY)
54
57
  @type = type
58
+ @data_directory = Pathname(data_directory)
55
59
  @administrative_division = ADMINISTRATIVE_DIVISIONS.fetch(type)
56
60
  end
57
61
 
58
62
  def load
59
- data = YAML.load_file(
60
- DATA_DIRECTORY.join("#{administrative_division.data_key}.yml")
61
- ).fetch(administrative_division.data_key.to_s)
62
-
63
- data.each_with_object({}) do |(id, attributes), result|
63
+ raw_data.each_with_object({}) do |(id, attributes), result|
64
64
  location_data = build_location_data(id, attributes)
65
65
  add_parent_divisions(location_data)
66
66
  add_addresses(location_data)
@@ -71,6 +71,12 @@ module Pumi
71
71
 
72
72
  private
73
73
 
74
+ def raw_data
75
+ @raw_data ||= YAML.load_file(
76
+ data_directory.join("#{administrative_division.data_key}.yml")
77
+ ).fetch(administrative_division.data_key.to_s)
78
+ end
79
+
74
80
  def build_location_data(id, attributes)
75
81
  name = attributes.fetch("name")
76
82
  name_km = name.fetch("km")
@@ -80,10 +86,11 @@ module Pumi
80
86
  )
81
87
 
82
88
  {
83
- id: id,
84
- administrative_unit: administrative_unit,
85
- name_km: name_km,
86
- name_latin: name_latin,
89
+ id:,
90
+ administrative_unit:,
91
+ name_km:,
92
+ name_latin:,
93
+ links: attributes.fetch("links", {}).transform_keys(&:to_sym),
87
94
  name_en: name_latin,
88
95
  full_name_km: [
89
96
  administrative_unit_name(name_km, administrative_unit.name_km),
@@ -0,0 +1,5 @@
1
+ module Pumi
2
+ module Scraper
3
+ Result = Struct.new(:code, :wikipedia, keyword_init: true)
4
+ end
5
+ end
data/lib/pumi/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Pumi
2
- VERSION = "0.17.0".freeze
2
+ VERSION = "0.18.0".freeze
3
3
  end
data/lib/pumi.rb CHANGED
@@ -23,4 +23,5 @@ require_relative "pumi/district"
23
23
  require_relative "pumi/commune"
24
24
  require_relative "pumi/village"
25
25
  require_relative "pumi/parser"
26
- require_relative "pumi/data_parser"
26
+ require_relative "pumi/data_file"
27
+ require_relative "pumi/data_source"
data/pumi.gemspec CHANGED
@@ -26,6 +26,7 @@ Gem::Specification.new do |spec|
26
26
  spec.add_development_dependency "capybara"
27
27
  spec.add_development_dependency "coffee-rails"
28
28
  spec.add_development_dependency "jquery-rails"
29
+ spec.add_development_dependency "nokogiri"
29
30
  spec.add_development_dependency "pry"
30
31
  spec.add_development_dependency "puma"
31
32
  spec.add_development_dependency "rails"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pumi
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.0
4
+ version: 0.18.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Wilkie
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-26 00:00:00.000000000 Z
11
+ date: 2023-05-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: nokogiri
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: pry
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -262,7 +276,10 @@ files:
262
276
  - lib/pumi.rb
263
277
  - lib/pumi/administrative_unit.rb
264
278
  - lib/pumi/commune.rb
265
- - lib/pumi/data_parser.rb
279
+ - lib/pumi/data_file.rb
280
+ - lib/pumi/data_source.rb
281
+ - lib/pumi/data_source/ncdd.rb
282
+ - lib/pumi/data_source/wikipedia.rb
266
283
  - lib/pumi/data_store.rb
267
284
  - lib/pumi/district.rb
268
285
  - lib/pumi/location.rb
@@ -270,6 +287,7 @@ files:
270
287
  - lib/pumi/province.rb
271
288
  - lib/pumi/rails.rb
272
289
  - lib/pumi/rails/engine.rb
290
+ - lib/pumi/scraper/result.rb
273
291
  - lib/pumi/store_cache.rb
274
292
  - lib/pumi/version.rb
275
293
  - lib/pumi/village.rb
@@ -1,75 +0,0 @@
1
- require "pathname"
2
- require "csv"
3
- require "yaml"
4
-
5
- # https://en.wikipedia.org/wiki/Administrative_divisions_of_Cambodia
6
- # https://en.wikipedia.org/wiki/Romanization_of_Khmer
7
- # https://en.wikipedia.org/wiki/United_Nations_Group_of_Experts_on_Geographical_Names
8
-
9
- module Pumi
10
- class DataParser
11
- CSV_HEADERS = %w[type code name_km name_latin reference note1 note2].freeze
12
-
13
- AdministrativeUnit = Struct.new(:en, :km, :latin, :code_length, :group, keyword_init: true)
14
- ADMINISTRATIVE_UNITS = {
15
- "ស្រុក" => AdministrativeUnit.new(en: "District", km: "ស្រុក", latin: "Srok", code_length: 4, group: "districts"),
16
- "ខណ្ឌ" => AdministrativeUnit.new(en: "Section", km: "ខណ្ឌ", latin: "Khan", code_length: 4, group: "districts"),
17
- "ក្រុង" => AdministrativeUnit.new(en: "Municipality", km: "ក្រុង", latin: "Krong", code_length: 4, group: "districts"),
18
- "ឃុំ" => AdministrativeUnit.new(en: "Commune", km: "ឃុំ", latin: "Khum", code_length: 6, group: "communes"),
19
- "សង្កាត់" => AdministrativeUnit.new(en: "Quarter", km: "សង្កាត់", latin: "Sangkat", code_length: 6, group: "communes"),
20
- "ភូមិ" => AdministrativeUnit.new(en: "Village", km: "ភូមិ", latin: "Phum", code_length: 8, group: "villages")
21
- }.freeze
22
-
23
- def load_data!(source_dir: "tmp")
24
- data = {}
25
-
26
- source_files(source_dir).each do |file|
27
- CSV.read(file, headers: CSV_HEADERS).each do |row|
28
- code = row.fetch("code")
29
- next if code.to_s.gsub(/\D/, "").empty?
30
-
31
- code = code.rjust(code.length + 1, "0") if code.length.odd?
32
- administrative_unit = ADMINISTRATIVE_UNITS.fetch(row.fetch("type"))
33
-
34
- next if administrative_unit.code_length != code.length
35
-
36
- data[administrative_unit.group] ||= {}
37
- data[administrative_unit.group][code] = {
38
- "name" => {
39
- "km" => row.fetch("name_km"),
40
- "latin" => row.fetch("name_latin")
41
- },
42
- "administrative_unit" => {
43
- "km" => administrative_unit.km,
44
- "latin" => administrative_unit.latin,
45
- "en" => administrative_unit.en
46
- }
47
- }
48
- end
49
- end
50
-
51
- data
52
- end
53
-
54
- def write_data!(data, destination_dir: "data")
55
- return if data.empty?
56
-
57
- data_groups.each do |group|
58
- File.write(
59
- "#{destination_dir}/#{group}.yml",
60
- { group => data.fetch(group).sort.to_h }.to_yaml
61
- )
62
- end
63
- end
64
-
65
- private
66
-
67
- def source_files(source_dir)
68
- Pathname.glob("#{source_dir}/*.csv").select(&:file?)
69
- end
70
-
71
- def data_groups
72
- ADMINISTRATIVE_UNITS.values.map(&:group).uniq
73
- end
74
- end
75
- end