worlddb-models 2.2.2 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +31 -13
  3. data/README.md +7 -7
  4. data/Rakefile +1 -1
  5. data/lib/worlddb/deleter.rb +6 -1
  6. data/lib/worlddb/helpers/value_helper.rb +117 -0
  7. data/lib/worlddb/matcher.rb +99 -135
  8. data/lib/worlddb/matcher_adm.rb +82 -0
  9. data/lib/worlddb/models/city.rb +30 -208
  10. data/lib/worlddb/models/city_base.rb +220 -0
  11. data/lib/worlddb/models/continent.rb +9 -0
  12. data/lib/worlddb/models/country.rb +21 -4
  13. data/lib/worlddb/models/forward.rb +25 -9
  14. data/lib/worlddb/models/lang.rb +6 -0
  15. data/lib/worlddb/models/place.rb +1 -1
  16. data/lib/worlddb/models/state.rb +83 -0
  17. data/lib/worlddb/models/{region.rb → state_base.rb} +52 -36
  18. data/lib/worlddb/models/tagdb/tag.rb +1 -1
  19. data/lib/worlddb/models.rb +11 -8
  20. data/lib/worlddb/patterns.rb +4 -4
  21. data/lib/worlddb/reader.rb +68 -39
  22. data/lib/worlddb/reader_file.rb +36 -3
  23. data/lib/worlddb/reader_zip.rb +33 -3
  24. data/lib/worlddb/readers/base.rb +149 -0
  25. data/lib/worlddb/readers/city.rb +2 -65
  26. data/lib/worlddb/readers/country.rb +2 -63
  27. data/lib/worlddb/readers/lang.rb +3 -68
  28. data/lib/worlddb/readers/state.rb +61 -0
  29. data/lib/worlddb/readers/state_tree.rb +118 -0
  30. data/lib/worlddb/readers/usage.rb +2 -65
  31. data/lib/worlddb/schema.rb +142 -43
  32. data/lib/worlddb/stats.rb +7 -4
  33. data/lib/worlddb/tree_reader.rb +97 -0
  34. data/lib/worlddb/version.rb +2 -2
  35. data/test/adm/test_fixture_matcher_adm2.rb +73 -0
  36. data/test/{test_fixture_matcher_adm3.rb → adm/test_fixture_matcher_adm3.rb} +6 -6
  37. data/test/adm/test_fixture_matcher_tree.rb +52 -0
  38. data/test/{test_read_adm.rb → adm/test_read_adm.rb} +13 -20
  39. data/test/adm/test_read_tree.rb +63 -0
  40. data/test/data/at-austria/2--n-niederoesterreich/counties.txt +6 -4
  41. data/test/data/at-austria/orte.txt +23 -0
  42. data/test/data/at-austria/setups/tree.txt +9 -0
  43. data/test/data/de-deutschland/3--by-bayern/4--oberfranken/counties.txt +14 -13
  44. data/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt +104 -0
  45. data/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte_ii.txt +17 -0
  46. data/test/data/de-deutschland/3--by-bayern/{districts.txt → parts.txt} +1 -1
  47. data/test/data/de-deutschland/orte.txt +12 -0
  48. data/test/data/de-deutschland/setups/adm.txt +1 -1
  49. data/test/data/de-deutschland/setups/tree.txt +9 -0
  50. data/test/helper.rb +8 -1
  51. data/test/test_fixture_matchers.rb +9 -10
  52. data/test/test_fixture_matchers_ii.rb +20 -19
  53. data/test/test_model_city.rb +26 -9
  54. data/test/{test_model_comp.rb → test_model_compat.rb} +15 -13
  55. data/test/test_model_country.rb +1 -1
  56. data/test/test_model_state.rb +54 -0
  57. data/test/test_model_states_at.rb +111 -0
  58. data/test/test_model_states_de.rb +147 -0
  59. data/test/test_models.rb +10 -3
  60. data/test/test_parse_city.rb +70 -0
  61. data/test/test_parse_country.rb +56 -0
  62. data/test/test_parse_state.rb +46 -0
  63. data/test/test_state_tree_reader_at.rb +54 -0
  64. data/test/test_state_tree_reader_de.rb +71 -0
  65. data/test/test_tree_reader.rb +39 -0
  66. metadata +50 -22
  67. data/lib/worlddb/models/city_compat.rb +0 -27
  68. data/lib/worlddb/models/continent_compat.rb +0 -24
  69. data/lib/worlddb/models/country_compat.rb +0 -35
  70. data/lib/worlddb/models/lang_compat.rb +0 -23
  71. data/lib/worlddb/models/region_compat.rb +0 -26
  72. data/lib/worlddb/readers/region.rb +0 -79
  73. data/test/test_fixture_matcher_adm2.rb +0 -62
  74. data/test/test_model_region.rb +0 -50
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e9b1bdb5484b5c74ef690b349dfd92dbb7fbd5d2
4
- data.tar.gz: ed1e496ec5d29624d915dd9e93559eda99ead12e
3
+ metadata.gz: fe2b1dd265c24760835eeefbd445c3a8302eb554
4
+ data.tar.gz: 35aeaaac8672937b8388825aa4c36cc11ce4651b
5
5
  SHA512:
6
- metadata.gz: 9ddd1c5c80416d77525a33cf1a54a79d953c2bc4303f7d305e0790b9d578990be7cfaf76576b5e2d9fc17ebc46f2bf3d353aa25d68771a8fb13a68f074a944e4
7
- data.tar.gz: 8098bfbbbdaf6e23f47fe26a02315747581130c97455b2521cf4264d352c0d95f435dab6d8fe128b410ec1d839e6d423e1f937effef8a42771f51d7a72cadf68
6
+ metadata.gz: 60c27ed1d0fceebca6edef19c0a50a542dafdbd66b71bdf9ee0e84aa641de7569d856b9fae697ffb844428ee24b72c505e663dae1bd9b4bc8d8b54f66e9bef9c
7
+ data.tar.gz: 6acf8fcfe52845533582c7dbd010f17feb834beb0110c278e2cc72607d7240be894bff150facf73749f9f5419685e8216d05db6732c351fb44df209af52aec8f
data/Manifest.txt CHANGED
@@ -3,22 +3,21 @@ Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
5
  lib/worlddb/deleter.rb
6
+ lib/worlddb/helpers/value_helper.rb
6
7
  lib/worlddb/matcher.rb
8
+ lib/worlddb/matcher_adm.rb
7
9
  lib/worlddb/models.rb
8
10
  lib/worlddb/models/city.rb
9
- lib/worlddb/models/city_compat.rb
11
+ lib/worlddb/models/city_base.rb
10
12
  lib/worlddb/models/continent.rb
11
- lib/worlddb/models/continent_compat.rb
12
13
  lib/worlddb/models/country.rb
13
14
  lib/worlddb/models/country_code.rb
14
- lib/worlddb/models/country_compat.rb
15
15
  lib/worlddb/models/forward.rb
16
16
  lib/worlddb/models/lang.rb
17
- lib/worlddb/models/lang_compat.rb
18
17
  lib/worlddb/models/name.rb
19
18
  lib/worlddb/models/place.rb
20
- lib/worlddb/models/region.rb
21
- lib/worlddb/models/region_compat.rb
19
+ lib/worlddb/models/state.rb
20
+ lib/worlddb/models/state_base.rb
22
21
  lib/worlddb/models/tagdb/tag.rb
23
22
  lib/worlddb/models/tagdb/tagging.rb
24
23
  lib/worlddb/models/usage.rb
@@ -26,32 +25,51 @@ lib/worlddb/patterns.rb
26
25
  lib/worlddb/reader.rb
27
26
  lib/worlddb/reader_file.rb
28
27
  lib/worlddb/reader_zip.rb
28
+ lib/worlddb/readers/base.rb
29
29
  lib/worlddb/readers/city.rb
30
30
  lib/worlddb/readers/country.rb
31
31
  lib/worlddb/readers/lang.rb
32
- lib/worlddb/readers/region.rb
32
+ lib/worlddb/readers/state.rb
33
+ lib/worlddb/readers/state_tree.rb
33
34
  lib/worlddb/readers/usage.rb
34
35
  lib/worlddb/schema.rb
35
36
  lib/worlddb/stats.rb
37
+ lib/worlddb/tree_reader.rb
36
38
  lib/worlddb/version.rb
39
+ test/adm/test_fixture_matcher_adm2.rb
40
+ test/adm/test_fixture_matcher_adm3.rb
41
+ test/adm/test_fixture_matcher_tree.rb
42
+ test/adm/test_read_adm.rb
43
+ test/adm/test_read_tree.rb
37
44
  test/data/at-austria/1--b-burgenland/counties.txt
38
45
  test/data/at-austria/2--n-niederoesterreich/counties.txt
39
46
  test/data/at-austria/3--w-wien/counties.txt
47
+ test/data/at-austria/orte.txt
40
48
  test/data/at-austria/setups/adm.txt
49
+ test/data/at-austria/setups/tree.txt
41
50
  test/data/at-austria/states.txt
42
51
  test/data/de-deutschland/3--by-bayern/1--oberbayern/counties.txt
43
52
  test/data/de-deutschland/3--by-bayern/4--oberfranken/counties.txt
44
- test/data/de-deutschland/3--by-bayern/districts.txt
53
+ test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt
54
+ test/data/de-deutschland/3--by-bayern/4--oberfranken/orte_ii.txt
55
+ test/data/de-deutschland/3--by-bayern/parts.txt
56
+ test/data/de-deutschland/orte.txt
45
57
  test/data/de-deutschland/setups/adm.txt
58
+ test/data/de-deutschland/setups/tree.txt
46
59
  test/data/de-deutschland/states.txt
47
60
  test/helper.rb
48
- test/test_fixture_matcher_adm2.rb
49
- test/test_fixture_matcher_adm3.rb
50
61
  test/test_fixture_matchers.rb
51
62
  test/test_fixture_matchers_ii.rb
52
63
  test/test_model_city.rb
53
- test/test_model_comp.rb
64
+ test/test_model_compat.rb
54
65
  test/test_model_country.rb
55
- test/test_model_region.rb
66
+ test/test_model_state.rb
67
+ test/test_model_states_at.rb
68
+ test/test_model_states_de.rb
56
69
  test/test_models.rb
57
- test/test_read_adm.rb
70
+ test/test_parse_city.rb
71
+ test/test_parse_country.rb
72
+ test/test_parse_state.rb
73
+ test/test_state_tree_reader_at.rb
74
+ test/test_state_tree_reader_de.rb
75
+ test/test_tree_reader.rb
data/README.md CHANGED
@@ -30,9 +30,9 @@ Everything is a place.
30
30
  at.area
31
31
  # => 83_871
32
32
 
33
- at.regions.count
33
+ at.states.count
34
34
  # => 9
35
- at.regions
35
+ at.states
36
36
  # => [ 'Wien', 'Niederösterreich', 'Oberösterreich', ... ]
37
37
 
38
38
  at.cities.by_pop
@@ -52,9 +52,9 @@ Everything is a place.
52
52
  la = City.find_by! key: 'losangeles'
53
53
  la.name
54
54
  # => 'Los Angeles'
55
- la.region.name
55
+ la.state.name
56
56
  # => 'California'
57
- la.region.key
57
+ la.state.key
58
58
  # => 'ca'
59
59
  la.country.name
60
60
  # => 'United States'
@@ -73,11 +73,11 @@ Everything is a place.
73
73
  # => ['Austria, 'Belgium', 'Cyprus', ... ]
74
74
 
75
75
  flanders = Tag.find_by! key: 'flanders'
76
- flanders.regions.count
76
+ flanders.states.count
77
77
  # => 5
78
- flanders.regions
78
+ flanders.states
79
79
  # => ['Antwerpen', 'Brabant Wallon', 'Limburg', 'Oost-Vlaanderen', 'West-Vlaanderen']
80
- flanders.regions.first.country.name
80
+ flanders.states.first.country.name
81
81
  # => 'Belgium'
82
82
 
83
83
  and so on.
data/Rakefile CHANGED
@@ -17,7 +17,7 @@ Hoe.spec 'worlddb-models' do
17
17
  self.extra_deps = [
18
18
  ['props', '>= 1.1.2'], # settings / prop(ertie)s / env / INI
19
19
  ['logutils', '>= 0.6.1'], # logging
20
- ['textutils', '>= 0.10.0'],
20
+ ['textutils', '>= 1.2.2'],
21
21
 
22
22
  ['tagutils', '>= 0.3.0'], # tags n categories for activerecord
23
23
  ['activerecord-utils', '>= 0.2.0'],
@@ -18,7 +18,12 @@ module WorldDb
18
18
  Name.delete_all
19
19
  Place.delete_all
20
20
  City.delete_all
21
- Region.delete_all
21
+ Metro.delete_all
22
+ District.delete_all
23
+ State.delete_all
24
+ Part.delete_all
25
+ County.delete_all
26
+ Muni.delete_all
22
27
  Country.delete_all
23
28
  Continent.delete_all
24
29
  Usage.delete_all
@@ -0,0 +1,117 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ ## NOTE:
5
+ ## add helpers to textutils(!!) valuehelper
6
+ ## do NOT create worlddb module
7
+
8
+ module TextUtils
9
+ module ValueHelper
10
+
11
+ ## todo/check: add to pair of matchers??
12
+ # e.g. match_country and match_country!
13
+ # - match_country will use find_by_key and match_country will use find_by_key! - why? why not?
14
+
15
+ def match_country( value )
16
+ if value =~ /^country:/ # country:
17
+ country_key = value[8..-1] # cut off country: prefix
18
+ country = WorldDb::Model::Country.find_by_key!( country_key )
19
+ yield( country )
20
+ true # bingo - match found
21
+ else
22
+ false # no match found
23
+ end
24
+ end
25
+
26
+ def match_supra( value )
27
+ if value =~ /^supra:/ # supra:
28
+ country_key = value[6..-1] # cut off supra: prefix
29
+ country = WorldDb::Model::Country.find_by_key!( country_key )
30
+ yield( country )
31
+ true # bingo - match found
32
+ else
33
+ false # no match found
34
+ end
35
+ end
36
+
37
+ def match_supra_flag( value ) # supranational (country)
38
+ if value =~ /^supra$/ # supra(national)
39
+ yield( true )
40
+ true # bingo - match found
41
+ else
42
+ false # no match found
43
+ end
44
+ end
45
+
46
+
47
+ def is_state?( value ) # note: was is_region? (use new name only)
48
+ # assume state code e.g. TX or N
49
+ #
50
+ # fix: allow three letter states too e.g. BRU (brussels)
51
+ match_result = value =~ /^[A-Z]{1,2}$/
52
+ # match found if 0,1,2,3 etc or no match if nil
53
+ # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
54
+ match_result != nil
55
+ end
56
+
57
+ ## fix/todo: use match_state_for_country! w/ !!! why? why not?
58
+ def match_state_for_country( value, country_id ) ## NB: required country_id
59
+ if value =~ /^state:/ ## state:
60
+ state_key = value[6..-1] ## cut off state: prefix
61
+ state = WorldDb::Model::State.find_by_key_and_country_id!( state_key, country_id )
62
+ yield( state )
63
+ true # bingo - match found
64
+ elsif is_state?( value ) ## assume state code e.g. TX or N
65
+ state = WorldDb::Model::State.find_by_key_and_country_id!( value.downcase, country_id )
66
+ yield( state )
67
+ true # bingo - match found
68
+ else
69
+ false # no match found
70
+ end
71
+ end
72
+
73
+
74
+ def match_city( value ) # NB: might be nil (city not found)
75
+ if value =~ /^city:/ ## city:
76
+ city_key = value[5..-1] ## cut off city: prefix
77
+ city = WorldDb::Model::City.find_by_key( city_key )
78
+ yield( city ) # NB: might be nil (city not found)
79
+ true # bingo - match found
80
+ else
81
+ false # no match found
82
+ end
83
+ end
84
+
85
+ def match_metro( value )
86
+ if value =~ /^metro:/ ## metro:
87
+ metro_key = value[6..-1] ## cut off metro: prefix
88
+ metro = WorldDb::Model::Metro.find_by_key!( metro_key )
89
+ yield( metro )
90
+ true # bingo - match found
91
+ else
92
+ false # no match found
93
+ end
94
+ end
95
+
96
+ def match_metro_flag( value )
97
+ if value =~ /^metro$/ # metro(politan area)
98
+ yield( true )
99
+ true # bingo - match found
100
+ else
101
+ false # no match found
102
+ end
103
+ end
104
+
105
+ def match_metro_pop( value )
106
+ if value =~ /^m:/ # m:
107
+ num = value[2..-1].gsub(/[ _]/, '').to_i # cut off m: prefix; allow space and _ in number
108
+ yield( num )
109
+ true # bingo - match found
110
+ else
111
+ false # no match found
112
+ end
113
+ end
114
+
115
+ end # module ValueHelper
116
+ end # module TextUtils
117
+
@@ -4,26 +4,88 @@ module WorldDb
4
4
 
5
5
  module Matcher
6
6
 
7
+ def match_tree_for_country( name, &blk ) ## rename to state_tree ?? why? why not??
8
+ ## match state_tree (for now use orte.txt for austria, deutschland etc.)
9
+ ## todo/fix: add more "generic" names
10
+
11
+ simple_match_xxx_for_country( name, 'orte', &blk ) ## note: uses special **simple**_match_xxx_...
12
+ end
13
+
14
+
15
+ def match_cities_for_country( name, &blk )
16
+ ## todo: check if there's a better (more ruby way) to pass along code block ??
17
+ ## e.g. try
18
+ ## match_xxx_for_country( name, 'cities') { |country_key| yield(country_key) }
19
+
20
+ match_xxx_for_country( name, 'cities', &blk )
21
+ end
22
+
23
+
24
+
25
+ def match_states_for_country( name, &blk )
26
+ ## todo/fix: remove regions (obsolete) - no longer supported
27
+ ## also try synonyms e.g. old regions (if not match for states)
28
+ found = match_xxx_for_country( name, 'states', &blk )
29
+ found = match_xxx_for_country( name, 'regions', &blk ) unless found
30
+ found
31
+ end
32
+
33
+ def match_states_abbr_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
34
+ ## also try synonyms e.g. old regions (if not match for states)
35
+ found = match_xxx_for_country( name, 'states\.abbr', &blk )
36
+ found = match_xxx_for_country( name, 'regions\.abbr', &blk ) unless found
37
+ found
38
+ end
39
+
40
+ def match_states_iso_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
41
+ ## also try synonyms e.g. old regions (if not match for states)
42
+ found = match_xxx_for_country( name, 'states\.iso', &blk )
43
+ found = match_xxx_for_country( name, 'regions\.iso', &blk ) unless found
44
+ found
45
+ end
46
+
47
+ def match_states_nuts_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
48
+ ## also try synonyms e.g. old regions (if not match for states)
49
+ found = match_xxx_for_country( name, 'states\.nuts', &blk )
50
+ found = match_xxx_for_country( name, 'regions\.nuts', &blk ) unless found
51
+ found
52
+ end
53
+
54
+
55
+ def match_countries_for_continent( name )
56
+ if name =~ /^([a-z][a-z\-_]+[a-z])\/countries/ # e.g. africa/countries or america/countries
57
+ ### NB: continent changed to regions (e.g. middle-east, caribbean, north-america, etc.)
58
+ ## auto-add continent (from folder structure) as tag
59
+ ## fix: allow dash/hyphen/minus in tag
60
+ continent = $1.dup
61
+ yield( continent )
62
+ true
63
+ else
64
+ false # no match found
65
+ end
66
+ end
67
+
68
+
69
+ private
70
+
7
71
  # note: returns code as capture
8
72
  WORLD_COUNTRY_CODE_PATTERN = '([a-z]{2,3})'
9
73
  WORLD_COUNTRY_CLASSIC_PATTERN = "#{WORLD_COUNTRY_CODE_PATTERN}-[^\\/]+" ## note: if you use "" need to double escape backslash!!!
10
74
  WORLD_COUNTRY_MODERN_PATTERN = "[0-9]+--#{WORLD_COUNTRY_CODE_PATTERN}-[^\\/]+" ## note: if you use "" need to double escape backslash!!!
11
75
 
12
76
  # note: returns code as capture
13
- WORLD_REGION_CODE_PATTERN = '([a-z]{1,3})'
14
- WORLD_REGION_CLASSIC_PATTERN = "#{WORLD_REGION_CODE_PATTERN}-[^\\/]+"
15
- WORLD_REGION_MODERN_PATTERN = "[0-9]+--#{WORLD_REGION_CODE_PATTERN}-[^\\/]+"
16
-
17
- # note: returns name as capture (no code required)
18
- WORLD_ADMIN_MODERN_PATTERN = "[0-9]+--([^\\/]+)"
77
+ WORLD_STATE_CODE_PATTERN = '([a-z]{1,3})'
78
+ WORLD_STATE_CLASSIC_PATTERN = "#{WORLD_STATE_CODE_PATTERN}-[^\\/]+"
79
+ WORLD_STATE_MODERN_PATTERN = "[0-9]+--#{WORLD_STATE_CODE_PATTERN}-[^\\/]+"
19
80
 
20
- ## allow optional folders -- TODO: add restriction ?? e.g. must be 4+ alphas ???
81
+ ## allow optional folders -- TODO: add restriction ?? e.g. must be 4+ alphas ???
21
82
  WORLD_OPT_FOLDERS_PATTERN = "(?:\\/[^\\/]+)*" ## check: use double \\ or just \ ??
22
83
 
23
84
 
24
- def match_xxx_for_country( name, xxx ) # xxx e.g. cities|regions|beers|breweries
85
+
86
+ def match_xxx_for_country( name, xxx ) # xxx e.g. cities|states|beers|breweries
25
87
  # auto-add required country code (from folder structure)
26
- # note: always let match_xxx_for_country_n_region go first
88
+ # note: always let match_xxx_for_country_n_state go first
27
89
 
28
90
  # note: allow /cities and /1--hokkaido--cities
29
91
  xxx_pattern = "(?:#{xxx}|[0-9]+--[^\\/]+?--#{xxx})" # note: double escape \\ required for backslash
@@ -48,7 +110,7 @@ module Matcher
48
110
  #
49
111
  # (3) classic style: e.g. /at/beers (europe/at/cities)
50
112
  #
51
- # (4) new style w/ region w/o abbrev/code e.g. /ja-japon/1--hokkaido/cities
113
+ # (4) new style w/ state w/o abbrev/code e.g. /ja-japon/1--hokkaido/cities
52
114
  #
53
115
  # (5) compact style (country part of filename):
54
116
  # e.g. /at-austria--cities or /europe/at-austria--cities
@@ -57,12 +119,27 @@ module Matcher
57
119
  end
58
120
  end
59
121
 
122
+ def simple_match_xxx_for_country( name, xxx )
123
+ xxx_pattern = "(?:#{xxx})" ## just xxx for now
124
+
125
+ ## used for state tree (e.g. orte.txt)
126
+ if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ ||
127
+ name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/
128
+
129
+ country_key = $1.dup
130
+ yield( country_key )
131
+ true # bingo - match found
132
+ else
133
+ false # no match found
134
+ end
135
+ end
136
+
60
137
 
61
- def match_xxx_for_country_n_region( name, xxx ) # xxx e.g. wine|wineries
138
+ def match_xxx_for_country_n_state( name, xxx ) # xxx e.g. wine|wineries
62
139
 
63
- # auto-add required country n region code (from folder structure)
140
+ # auto-add required country n state code (from folder structure)
64
141
 
65
- ## -- allow opt_folders after long regions (e.g. additional subregion/zone)
142
+ ## -- allow opt_folders after long states (e.g. additional substate/zone)
66
143
  ## -- allow anything (prefixes) before -- for xxx
67
144
  # e.g. at-austria!/1--n-niederoesterreich--eastern/wagram--wines
68
145
  # at-austria!/1--n-niederoesterreich--eastern/wagram--wagram--wines
@@ -72,12 +149,12 @@ module Matcher
72
149
  oldoldold_xxx_pattern = "(?:#{xxx}|[^\\/]+--#{xxx})"
73
150
  xxx_pattern = "(?:#{xxx}|[^\\/]+#{xxx})" # note: double escape \\ required for backslash
74
151
 
75
- ## note: for now only (style #2) n (style #3) that is long region allow opt folders
152
+ ## note: for now only (style #2) n (style #3) that is long state allow opt folders
76
153
 
77
- if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_CLASSIC_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ || # (1)
78
- name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}\/#{WORLD_REGION_MODERN_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ || # (2)
79
- name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_MODERN_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ || # (3)
80
- name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}\/#{WORLD_REGION_CLASSIC_PATTERN}\/#{xxx_pattern}/ # (4)
154
+ if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_CLASSIC_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ || # (1)
155
+ name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}\/#{WORLD_STATE_MODERN_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ || # (2)
156
+ name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_MODERN_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ || # (3)
157
+ name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}\/#{WORLD_STATE_CLASSIC_PATTERN}\/#{xxx_pattern}/ # (4)
81
158
 
82
159
  #######
83
160
  # nb: country must start name (^) or coming after / e.g. europe/at-austria/...
@@ -89,134 +166,21 @@ module Matcher
89
166
  #
90
167
  # (3)
91
168
  # new new mixed style e.g. /at-austria/1--w-wien--eastern/cities
92
- # "classic" country plus new new region
169
+ # "classic" country plus new new state
93
170
  #
94
171
  # (4)
95
172
  # new new mixed style e.g. /1--at-austria--central/w-wien/cities
96
- # new new country plus "classic" region
97
-
98
- country_key = $1.dup
99
- region_key = $2.dup
100
- yield( country_key, region_key )
101
- true # bingo - match found
102
- else
103
- false # no match found
104
- end
105
- end
106
-
107
-
108
-
109
- def match_xxx_for_country_n_adm1( name, xxx ) # xxx e.g. districts|counties|etc.
110
-
111
- # auto-add required country n regions (from folder structure)
112
- #
113
- # e.g. de-deutschland!/3--by-bayern/districts (regierungsbezirke)
114
- # europe/de-deutschland!/3--by-bayern/districts
115
- #
116
- # at-austria!/1--n-niederoesterreich/counties (bezirke)
117
-
118
- xxx_pattern = "#{xxx}"
119
-
120
- if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_MODERN_PATTERN}\/#{xxx_pattern}/ ||
121
- name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_CLASSIC_PATTERN}\/#{xxx_pattern}/
122
-
123
- country_key = $1.dup
124
- region_key = $2.dup
125
- yield( country_key, region_key )
126
- true # bingo - match found
127
- else
128
- false # no match found
129
- end
130
- end
131
-
132
-
133
- def match_xxx_for_country_n_adm1_n_adm2( name, xxx ) # xxx e.g. districts|counties|etc.
134
-
135
- # auto-add required country n regions (from folder structure)
136
- #
137
- # e.g. de-deutschland!/3--by-bayern/4--oberfranken/counties (landkreise)
138
- # europe/de-deutschland!/3--by-bayern/4--oberfranken/counties
139
-
140
- xxx_pattern = "#{xxx}"
141
-
142
- if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_MODERN_PATTERN}\/#{WORLD_ADMIN_MODERN_PATTERN}\/#{xxx_pattern}/ ||
143
- name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_CLASSIC_PATTERN}\/#{WORLD_ADMIN_MODERN_PATTERN}\/#{xxx_pattern}/
173
+ # new new country plus "classic" state
144
174
 
145
175
  country_key = $1.dup
146
- region_key = $2.dup
147
- adm2 = $3.dup # lowercase name e.g. oberfranken, oberbayern, etc.
148
- yield( country_key, region_key, adm2 )
176
+ state_key = $2.dup
177
+ yield( country_key, state_key )
149
178
  true # bingo - match found
150
179
  else
151
180
  false # no match found
152
181
  end
153
182
  end
154
183
 
155
-
156
- def match_adm2_for_country( name, &blk )
157
- ## note: also try synonyms e.g. districts|counties
158
- ## note: counties might also be an adm3 match
159
- found = match_xxx_for_country_n_adm1( name, 'districts', &blk )
160
- found = match_xxx_for_country_n_adm1( name, 'counties', &blk ) unless found
161
- found
162
- end
163
-
164
- def match_adm3_for_country( name, &blk )
165
- match_xxx_for_country_n_adm1_n_adm2( name, 'counties', &blk )
166
- end
167
-
168
-
169
-
170
- def match_cities_for_country( name, &blk )
171
- ## todo: check if there's a better (more ruby way) to pass along code block ??
172
- ## e.g. try
173
- ## match_xxx_for_country( name, 'cities') { |country_key| yield(country_key) }
174
-
175
- match_xxx_for_country( name, 'cities', &blk )
176
- end
177
-
178
- def match_regions_for_country( name, &blk )
179
- ## also try synonyms e.g. old regions (if not match for states)
180
- found = match_xxx_for_country( name, 'states', &blk )
181
- found = match_xxx_for_country( name, 'regions', &blk ) unless found
182
- found
183
- end
184
-
185
- def match_regions_abbr_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
186
- ## also try synonyms e.g. old regions (if not match for states)
187
- found = match_xxx_for_country( name, 'states\.abbr', &blk )
188
- found = match_xxx_for_country( name, 'regions\.abbr', &blk ) unless found
189
- found
190
- end
191
-
192
- def match_regions_iso_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
193
- ## also try synonyms e.g. old regions (if not match for states)
194
- found = match_xxx_for_country( name, 'states\.iso', &blk )
195
- found = match_xxx_for_country( name, 'regions\.iso', &blk ) unless found
196
- found
197
- end
198
-
199
- def match_regions_nuts_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
200
- ## also try synonyms e.g. old regions (if not match for states)
201
- found = match_xxx_for_country( name, 'states\.nuts', &blk )
202
- found = match_xxx_for_country( name, 'regions\.nuts', &blk ) unless found
203
- found
204
- end
205
-
206
-
207
- def match_countries_for_continent( name )
208
- if name =~ /^([a-z][a-z\-_]+[a-z])\/countries/ # e.g. africa/countries or america/countries
209
- ### NB: continent changed to regions (e.g. middle-east, caribbean, north-america, etc.)
210
- ## auto-add continent (from folder structure) as tag
211
- ## fix: allow dash/hyphen/minus in tag
212
- continent = $1.dup
213
- yield( continent )
214
- true
215
- else
216
- false # no match found
217
- end
218
- end
219
-
220
184
  end # module Matcher
221
185
 
222
186
  end # module WorldDb
@@ -0,0 +1,82 @@
1
+ # encoding: UTF-8
2
+
3
+ ####
4
+ ## matchers for adm2,adm3,etc
5
+ ## e.g. parts (regierungsbezirke)
6
+ ## counties (kreise,bezirke)
7
+
8
+ module WorldDb
9
+
10
+ module Matcher
11
+
12
+ def match_adm2_parts_for_country( name, &blk )
13
+ match_xxx_for_country_n_adm1( name, 'parts', &blk )
14
+ end
15
+
16
+ def match_adm2_counties_for_country( name, &blk )
17
+ ## note: counties might also be an adm3 match
18
+ match_xxx_for_country_n_adm1( name, 'counties', &blk )
19
+ end
20
+
21
+ def match_adm3_counties_for_country( name, &blk )
22
+ match_xxx_for_country_n_adm1_n_adm2( name, 'counties', &blk )
23
+ end
24
+
25
+
26
+ private
27
+ # note: returns name as capture (no code required)
28
+ WORLD_ADMIN_MODERN_PATTERN = "[0-9]+--([^\\/]+)"
29
+
30
+ ###
31
+ ## todo/check: adm1 => state
32
+ ## why use adm1 and not state ?? duplicate of match___country_n_state ??
33
+ def match_xxx_for_country_n_adm1( name, xxx ) # xxx e.g. parts|counties|etc.
34
+
35
+ # auto-add required country n states (from folder structure)
36
+ #
37
+ # e.g. de-deutschland!/3--by-bayern/parts (regierungsbezirke)
38
+ # europe/de-deutschland!/3--by-bayern/parts
39
+ #
40
+ # at-austria!/1--n-niederoesterreich/counties (bezirke)
41
+
42
+ xxx_pattern = "#{xxx}"
43
+
44
+ if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_MODERN_PATTERN}\/#{xxx_pattern}/ ||
45
+ name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_CLASSIC_PATTERN}\/#{xxx_pattern}/
46
+
47
+ country_key = $1.dup
48
+ state_key = $2.dup
49
+ yield( country_key, state_key )
50
+ true # bingo - match found
51
+ else
52
+ false # no match found
53
+ end
54
+ end
55
+
56
+
57
+ def match_xxx_for_country_n_adm1_n_adm2( name, xxx ) # xxx e.g. parts|counties|etc.
58
+
59
+ # auto-add required country n states (from folder structure)
60
+ #
61
+ # e.g. de-deutschland!/3--by-bayern/4--oberfranken/counties (landkreise)
62
+ # europe/de-deutschland!/3--by-bayern/4--oberfranken/counties
63
+
64
+ xxx_pattern = "#{xxx}"
65
+
66
+ if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_MODERN_PATTERN}\/#{WORLD_ADMIN_MODERN_PATTERN}\/#{xxx_pattern}/ ||
67
+ name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_CLASSIC_PATTERN}\/#{WORLD_ADMIN_MODERN_PATTERN}\/#{xxx_pattern}/
68
+
69
+ country_key = $1.dup
70
+ state_key = $2.dup
71
+ adm2 = $3.dup # lowercase name e.g. oberfranken, oberbayern, etc.
72
+ yield( country_key, state_key, adm2 )
73
+ true # bingo - match found
74
+ else
75
+ false # no match found
76
+ end
77
+ end
78
+
79
+ end # module Matcher
80
+
81
+ end # module WorldDb
82
+