worlddb-models 2.2.2 → 2.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +31 -13
  3. data/README.md +7 -7
  4. data/Rakefile +1 -1
  5. data/lib/worlddb/deleter.rb +6 -1
  6. data/lib/worlddb/helpers/value_helper.rb +117 -0
  7. data/lib/worlddb/matcher.rb +99 -135
  8. data/lib/worlddb/matcher_adm.rb +82 -0
  9. data/lib/worlddb/models/city.rb +30 -208
  10. data/lib/worlddb/models/city_base.rb +220 -0
  11. data/lib/worlddb/models/continent.rb +9 -0
  12. data/lib/worlddb/models/country.rb +21 -4
  13. data/lib/worlddb/models/forward.rb +25 -9
  14. data/lib/worlddb/models/lang.rb +6 -0
  15. data/lib/worlddb/models/place.rb +1 -1
  16. data/lib/worlddb/models/state.rb +83 -0
  17. data/lib/worlddb/models/{region.rb → state_base.rb} +52 -36
  18. data/lib/worlddb/models/tagdb/tag.rb +1 -1
  19. data/lib/worlddb/models.rb +11 -8
  20. data/lib/worlddb/patterns.rb +4 -4
  21. data/lib/worlddb/reader.rb +68 -39
  22. data/lib/worlddb/reader_file.rb +36 -3
  23. data/lib/worlddb/reader_zip.rb +33 -3
  24. data/lib/worlddb/readers/base.rb +149 -0
  25. data/lib/worlddb/readers/city.rb +2 -65
  26. data/lib/worlddb/readers/country.rb +2 -63
  27. data/lib/worlddb/readers/lang.rb +3 -68
  28. data/lib/worlddb/readers/state.rb +61 -0
  29. data/lib/worlddb/readers/state_tree.rb +118 -0
  30. data/lib/worlddb/readers/usage.rb +2 -65
  31. data/lib/worlddb/schema.rb +142 -43
  32. data/lib/worlddb/stats.rb +7 -4
  33. data/lib/worlddb/tree_reader.rb +97 -0
  34. data/lib/worlddb/version.rb +2 -2
  35. data/test/adm/test_fixture_matcher_adm2.rb +73 -0
  36. data/test/{test_fixture_matcher_adm3.rb → adm/test_fixture_matcher_adm3.rb} +6 -6
  37. data/test/adm/test_fixture_matcher_tree.rb +52 -0
  38. data/test/{test_read_adm.rb → adm/test_read_adm.rb} +13 -20
  39. data/test/adm/test_read_tree.rb +63 -0
  40. data/test/data/at-austria/2--n-niederoesterreich/counties.txt +6 -4
  41. data/test/data/at-austria/orte.txt +23 -0
  42. data/test/data/at-austria/setups/tree.txt +9 -0
  43. data/test/data/de-deutschland/3--by-bayern/4--oberfranken/counties.txt +14 -13
  44. data/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt +104 -0
  45. data/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte_ii.txt +17 -0
  46. data/test/data/de-deutschland/3--by-bayern/{districts.txt → parts.txt} +1 -1
  47. data/test/data/de-deutschland/orte.txt +12 -0
  48. data/test/data/de-deutschland/setups/adm.txt +1 -1
  49. data/test/data/de-deutschland/setups/tree.txt +9 -0
  50. data/test/helper.rb +8 -1
  51. data/test/test_fixture_matchers.rb +9 -10
  52. data/test/test_fixture_matchers_ii.rb +20 -19
  53. data/test/test_model_city.rb +26 -9
  54. data/test/{test_model_comp.rb → test_model_compat.rb} +15 -13
  55. data/test/test_model_country.rb +1 -1
  56. data/test/test_model_state.rb +54 -0
  57. data/test/test_model_states_at.rb +111 -0
  58. data/test/test_model_states_de.rb +147 -0
  59. data/test/test_models.rb +10 -3
  60. data/test/test_parse_city.rb +70 -0
  61. data/test/test_parse_country.rb +56 -0
  62. data/test/test_parse_state.rb +46 -0
  63. data/test/test_state_tree_reader_at.rb +54 -0
  64. data/test/test_state_tree_reader_de.rb +71 -0
  65. data/test/test_tree_reader.rb +39 -0
  66. metadata +50 -22
  67. data/lib/worlddb/models/city_compat.rb +0 -27
  68. data/lib/worlddb/models/continent_compat.rb +0 -24
  69. data/lib/worlddb/models/country_compat.rb +0 -35
  70. data/lib/worlddb/models/lang_compat.rb +0 -23
  71. data/lib/worlddb/models/region_compat.rb +0 -26
  72. data/lib/worlddb/readers/region.rb +0 -79
  73. data/test/test_fixture_matcher_adm2.rb +0 -62
  74. data/test/test_model_region.rb +0 -50
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e9b1bdb5484b5c74ef690b349dfd92dbb7fbd5d2
4
- data.tar.gz: ed1e496ec5d29624d915dd9e93559eda99ead12e
3
+ metadata.gz: fe2b1dd265c24760835eeefbd445c3a8302eb554
4
+ data.tar.gz: 35aeaaac8672937b8388825aa4c36cc11ce4651b
5
5
  SHA512:
6
- metadata.gz: 9ddd1c5c80416d77525a33cf1a54a79d953c2bc4303f7d305e0790b9d578990be7cfaf76576b5e2d9fc17ebc46f2bf3d353aa25d68771a8fb13a68f074a944e4
7
- data.tar.gz: 8098bfbbbdaf6e23f47fe26a02315747581130c97455b2521cf4264d352c0d95f435dab6d8fe128b410ec1d839e6d423e1f937effef8a42771f51d7a72cadf68
6
+ metadata.gz: 60c27ed1d0fceebca6edef19c0a50a542dafdbd66b71bdf9ee0e84aa641de7569d856b9fae697ffb844428ee24b72c505e663dae1bd9b4bc8d8b54f66e9bef9c
7
+ data.tar.gz: 6acf8fcfe52845533582c7dbd010f17feb834beb0110c278e2cc72607d7240be894bff150facf73749f9f5419685e8216d05db6732c351fb44df209af52aec8f
data/Manifest.txt CHANGED
@@ -3,22 +3,21 @@ Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
5
  lib/worlddb/deleter.rb
6
+ lib/worlddb/helpers/value_helper.rb
6
7
  lib/worlddb/matcher.rb
8
+ lib/worlddb/matcher_adm.rb
7
9
  lib/worlddb/models.rb
8
10
  lib/worlddb/models/city.rb
9
- lib/worlddb/models/city_compat.rb
11
+ lib/worlddb/models/city_base.rb
10
12
  lib/worlddb/models/continent.rb
11
- lib/worlddb/models/continent_compat.rb
12
13
  lib/worlddb/models/country.rb
13
14
  lib/worlddb/models/country_code.rb
14
- lib/worlddb/models/country_compat.rb
15
15
  lib/worlddb/models/forward.rb
16
16
  lib/worlddb/models/lang.rb
17
- lib/worlddb/models/lang_compat.rb
18
17
  lib/worlddb/models/name.rb
19
18
  lib/worlddb/models/place.rb
20
- lib/worlddb/models/region.rb
21
- lib/worlddb/models/region_compat.rb
19
+ lib/worlddb/models/state.rb
20
+ lib/worlddb/models/state_base.rb
22
21
  lib/worlddb/models/tagdb/tag.rb
23
22
  lib/worlddb/models/tagdb/tagging.rb
24
23
  lib/worlddb/models/usage.rb
@@ -26,32 +25,51 @@ lib/worlddb/patterns.rb
26
25
  lib/worlddb/reader.rb
27
26
  lib/worlddb/reader_file.rb
28
27
  lib/worlddb/reader_zip.rb
28
+ lib/worlddb/readers/base.rb
29
29
  lib/worlddb/readers/city.rb
30
30
  lib/worlddb/readers/country.rb
31
31
  lib/worlddb/readers/lang.rb
32
- lib/worlddb/readers/region.rb
32
+ lib/worlddb/readers/state.rb
33
+ lib/worlddb/readers/state_tree.rb
33
34
  lib/worlddb/readers/usage.rb
34
35
  lib/worlddb/schema.rb
35
36
  lib/worlddb/stats.rb
37
+ lib/worlddb/tree_reader.rb
36
38
  lib/worlddb/version.rb
39
+ test/adm/test_fixture_matcher_adm2.rb
40
+ test/adm/test_fixture_matcher_adm3.rb
41
+ test/adm/test_fixture_matcher_tree.rb
42
+ test/adm/test_read_adm.rb
43
+ test/adm/test_read_tree.rb
37
44
  test/data/at-austria/1--b-burgenland/counties.txt
38
45
  test/data/at-austria/2--n-niederoesterreich/counties.txt
39
46
  test/data/at-austria/3--w-wien/counties.txt
47
+ test/data/at-austria/orte.txt
40
48
  test/data/at-austria/setups/adm.txt
49
+ test/data/at-austria/setups/tree.txt
41
50
  test/data/at-austria/states.txt
42
51
  test/data/de-deutschland/3--by-bayern/1--oberbayern/counties.txt
43
52
  test/data/de-deutschland/3--by-bayern/4--oberfranken/counties.txt
44
- test/data/de-deutschland/3--by-bayern/districts.txt
53
+ test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt
54
+ test/data/de-deutschland/3--by-bayern/4--oberfranken/orte_ii.txt
55
+ test/data/de-deutschland/3--by-bayern/parts.txt
56
+ test/data/de-deutschland/orte.txt
45
57
  test/data/de-deutschland/setups/adm.txt
58
+ test/data/de-deutschland/setups/tree.txt
46
59
  test/data/de-deutschland/states.txt
47
60
  test/helper.rb
48
- test/test_fixture_matcher_adm2.rb
49
- test/test_fixture_matcher_adm3.rb
50
61
  test/test_fixture_matchers.rb
51
62
  test/test_fixture_matchers_ii.rb
52
63
  test/test_model_city.rb
53
- test/test_model_comp.rb
64
+ test/test_model_compat.rb
54
65
  test/test_model_country.rb
55
- test/test_model_region.rb
66
+ test/test_model_state.rb
67
+ test/test_model_states_at.rb
68
+ test/test_model_states_de.rb
56
69
  test/test_models.rb
57
- test/test_read_adm.rb
70
+ test/test_parse_city.rb
71
+ test/test_parse_country.rb
72
+ test/test_parse_state.rb
73
+ test/test_state_tree_reader_at.rb
74
+ test/test_state_tree_reader_de.rb
75
+ test/test_tree_reader.rb
data/README.md CHANGED
@@ -30,9 +30,9 @@ Everything is a place.
30
30
  at.area
31
31
  # => 83_871
32
32
 
33
- at.regions.count
33
+ at.states.count
34
34
  # => 9
35
- at.regions
35
+ at.states
36
36
  # => [ 'Wien', 'Niederösterreich', 'Oberösterreich', ... ]
37
37
 
38
38
  at.cities.by_pop
@@ -52,9 +52,9 @@ Everything is a place.
52
52
  la = City.find_by! key: 'losangeles'
53
53
  la.name
54
54
  # => 'Los Angeles'
55
- la.region.name
55
+ la.state.name
56
56
  # => 'California'
57
- la.region.key
57
+ la.state.key
58
58
  # => 'ca'
59
59
  la.country.name
60
60
  # => 'United States'
@@ -73,11 +73,11 @@ Everything is a place.
73
73
  # => ['Austria, 'Belgium', 'Cyprus', ... ]
74
74
 
75
75
  flanders = Tag.find_by! key: 'flanders'
76
- flanders.regions.count
76
+ flanders.states.count
77
77
  # => 5
78
- flanders.regions
78
+ flanders.states
79
79
  # => ['Antwerpen', 'Brabant Wallon', 'Limburg', 'Oost-Vlaanderen', 'West-Vlaanderen']
80
- flanders.regions.first.country.name
80
+ flanders.states.first.country.name
81
81
  # => 'Belgium'
82
82
 
83
83
  and so on.
data/Rakefile CHANGED
@@ -17,7 +17,7 @@ Hoe.spec 'worlddb-models' do
17
17
  self.extra_deps = [
18
18
  ['props', '>= 1.1.2'], # settings / prop(ertie)s / env / INI
19
19
  ['logutils', '>= 0.6.1'], # logging
20
- ['textutils', '>= 0.10.0'],
20
+ ['textutils', '>= 1.2.2'],
21
21
 
22
22
  ['tagutils', '>= 0.3.0'], # tags n categories for activerecord
23
23
  ['activerecord-utils', '>= 0.2.0'],
@@ -18,7 +18,12 @@ module WorldDb
18
18
  Name.delete_all
19
19
  Place.delete_all
20
20
  City.delete_all
21
- Region.delete_all
21
+ Metro.delete_all
22
+ District.delete_all
23
+ State.delete_all
24
+ Part.delete_all
25
+ County.delete_all
26
+ Muni.delete_all
22
27
  Country.delete_all
23
28
  Continent.delete_all
24
29
  Usage.delete_all
@@ -0,0 +1,117 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ ## NOTE:
5
+ ## add helpers to textutils(!!) valuehelper
6
+ ## do NOT create worlddb module
7
+
8
+ module TextUtils
9
+ module ValueHelper
10
+
11
+ ## todo/check: add to pair of matchers??
12
+ # e.g. match_country and match_country!
13
+ # - match_country will use find_by_key and match_country will use find_by_key! - why? why not?
14
+
15
+ def match_country( value )
16
+ if value =~ /^country:/ # country:
17
+ country_key = value[8..-1] # cut off country: prefix
18
+ country = WorldDb::Model::Country.find_by_key!( country_key )
19
+ yield( country )
20
+ true # bingo - match found
21
+ else
22
+ false # no match found
23
+ end
24
+ end
25
+
26
+ def match_supra( value )
27
+ if value =~ /^supra:/ # supra:
28
+ country_key = value[6..-1] # cut off supra: prefix
29
+ country = WorldDb::Model::Country.find_by_key!( country_key )
30
+ yield( country )
31
+ true # bingo - match found
32
+ else
33
+ false # no match found
34
+ end
35
+ end
36
+
37
+ def match_supra_flag( value ) # supranational (country)
38
+ if value =~ /^supra$/ # supra(national)
39
+ yield( true )
40
+ true # bingo - match found
41
+ else
42
+ false # no match found
43
+ end
44
+ end
45
+
46
+
47
+ def is_state?( value ) # note: was is_region? (use new name only)
48
+ # assume state code e.g. TX or N
49
+ #
50
+ # fix: allow three letter states too e.g. BRU (brussels)
51
+ match_result = value =~ /^[A-Z]{1,2}$/
52
+ # match found if 0,1,2,3 etc or no match if nil
53
+ # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
54
+ match_result != nil
55
+ end
56
+
57
+ ## fix/todo: use match_state_for_country! w/ !!! why? why not?
58
+ def match_state_for_country( value, country_id ) ## NB: required country_id
59
+ if value =~ /^state:/ ## state:
60
+ state_key = value[6..-1] ## cut off state: prefix
61
+ state = WorldDb::Model::State.find_by_key_and_country_id!( state_key, country_id )
62
+ yield( state )
63
+ true # bingo - match found
64
+ elsif is_state?( value ) ## assume state code e.g. TX or N
65
+ state = WorldDb::Model::State.find_by_key_and_country_id!( value.downcase, country_id )
66
+ yield( state )
67
+ true # bingo - match found
68
+ else
69
+ false # no match found
70
+ end
71
+ end
72
+
73
+
74
+ def match_city( value ) # NB: might be nil (city not found)
75
+ if value =~ /^city:/ ## city:
76
+ city_key = value[5..-1] ## cut off city: prefix
77
+ city = WorldDb::Model::City.find_by_key( city_key )
78
+ yield( city ) # NB: might be nil (city not found)
79
+ true # bingo - match found
80
+ else
81
+ false # no match found
82
+ end
83
+ end
84
+
85
+ def match_metro( value )
86
+ if value =~ /^metro:/ ## metro:
87
+ metro_key = value[6..-1] ## cut off metro: prefix
88
+ metro = WorldDb::Model::Metro.find_by_key!( metro_key )
89
+ yield( metro )
90
+ true # bingo - match found
91
+ else
92
+ false # no match found
93
+ end
94
+ end
95
+
96
+ def match_metro_flag( value )
97
+ if value =~ /^metro$/ # metro(politan area)
98
+ yield( true )
99
+ true # bingo - match found
100
+ else
101
+ false # no match found
102
+ end
103
+ end
104
+
105
+ def match_metro_pop( value )
106
+ if value =~ /^m:/ # m:
107
+ num = value[2..-1].gsub(/[ _]/, '').to_i # cut off m: prefix; allow space and _ in number
108
+ yield( num )
109
+ true # bingo - match found
110
+ else
111
+ false # no match found
112
+ end
113
+ end
114
+
115
+ end # module ValueHelper
116
+ end # module TextUtils
117
+
@@ -4,26 +4,88 @@ module WorldDb
4
4
 
5
5
  module Matcher
6
6
 
7
+ def match_tree_for_country( name, &blk ) ## rename to state_tree ?? why? why not??
8
+ ## match state_tree (for now use orte.txt for austria, deutschland etc.)
9
+ ## todo/fix: add more "generic" names
10
+
11
+ simple_match_xxx_for_country( name, 'orte', &blk ) ## note: uses special **simple**_match_xxx_...
12
+ end
13
+
14
+
15
+ def match_cities_for_country( name, &blk )
16
+ ## todo: check if there's a better (more ruby way) to pass along code block ??
17
+ ## e.g. try
18
+ ## match_xxx_for_country( name, 'cities') { |country_key| yield(country_key) }
19
+
20
+ match_xxx_for_country( name, 'cities', &blk )
21
+ end
22
+
23
+
24
+
25
+ def match_states_for_country( name, &blk )
26
+ ## todo/fix: remove regions (obsolete) - no longer supported
27
+ ## also try synonyms e.g. old regions (if not match for states)
28
+ found = match_xxx_for_country( name, 'states', &blk )
29
+ found = match_xxx_for_country( name, 'regions', &blk ) unless found
30
+ found
31
+ end
32
+
33
+ def match_states_abbr_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
34
+ ## also try synonyms e.g. old regions (if not match for states)
35
+ found = match_xxx_for_country( name, 'states\.abbr', &blk )
36
+ found = match_xxx_for_country( name, 'regions\.abbr', &blk ) unless found
37
+ found
38
+ end
39
+
40
+ def match_states_iso_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
41
+ ## also try synonyms e.g. old regions (if not match for states)
42
+ found = match_xxx_for_country( name, 'states\.iso', &blk )
43
+ found = match_xxx_for_country( name, 'regions\.iso', &blk ) unless found
44
+ found
45
+ end
46
+
47
+ def match_states_nuts_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
48
+ ## also try synonyms e.g. old regions (if not match for states)
49
+ found = match_xxx_for_country( name, 'states\.nuts', &blk )
50
+ found = match_xxx_for_country( name, 'regions\.nuts', &blk ) unless found
51
+ found
52
+ end
53
+
54
+
55
+ def match_countries_for_continent( name )
56
+ if name =~ /^([a-z][a-z\-_]+[a-z])\/countries/ # e.g. africa/countries or america/countries
57
+ ### NB: continent changed to regions (e.g. middle-east, caribbean, north-america, etc.)
58
+ ## auto-add continent (from folder structure) as tag
59
+ ## fix: allow dash/hyphen/minus in tag
60
+ continent = $1.dup
61
+ yield( continent )
62
+ true
63
+ else
64
+ false # no match found
65
+ end
66
+ end
67
+
68
+
69
+ private
70
+
7
71
  # note: returns code as capture
8
72
  WORLD_COUNTRY_CODE_PATTERN = '([a-z]{2,3})'
9
73
  WORLD_COUNTRY_CLASSIC_PATTERN = "#{WORLD_COUNTRY_CODE_PATTERN}-[^\\/]+" ## note: if you use "" need to double escape backslash!!!
10
74
  WORLD_COUNTRY_MODERN_PATTERN = "[0-9]+--#{WORLD_COUNTRY_CODE_PATTERN}-[^\\/]+" ## note: if you use "" need to double escape backslash!!!
11
75
 
12
76
  # note: returns code as capture
13
- WORLD_REGION_CODE_PATTERN = '([a-z]{1,3})'
14
- WORLD_REGION_CLASSIC_PATTERN = "#{WORLD_REGION_CODE_PATTERN}-[^\\/]+"
15
- WORLD_REGION_MODERN_PATTERN = "[0-9]+--#{WORLD_REGION_CODE_PATTERN}-[^\\/]+"
16
-
17
- # note: returns name as capture (no code required)
18
- WORLD_ADMIN_MODERN_PATTERN = "[0-9]+--([^\\/]+)"
77
+ WORLD_STATE_CODE_PATTERN = '([a-z]{1,3})'
78
+ WORLD_STATE_CLASSIC_PATTERN = "#{WORLD_STATE_CODE_PATTERN}-[^\\/]+"
79
+ WORLD_STATE_MODERN_PATTERN = "[0-9]+--#{WORLD_STATE_CODE_PATTERN}-[^\\/]+"
19
80
 
20
- ## allow optional folders -- TODO: add restriction ?? e.g. must be 4+ alphas ???
81
+ ## allow optional folders -- TODO: add restriction ?? e.g. must be 4+ alphas ???
21
82
  WORLD_OPT_FOLDERS_PATTERN = "(?:\\/[^\\/]+)*" ## check: use double \\ or just \ ??
22
83
 
23
84
 
24
- def match_xxx_for_country( name, xxx ) # xxx e.g. cities|regions|beers|breweries
85
+
86
+ def match_xxx_for_country( name, xxx ) # xxx e.g. cities|states|beers|breweries
25
87
  # auto-add required country code (from folder structure)
26
- # note: always let match_xxx_for_country_n_region go first
88
+ # note: always let match_xxx_for_country_n_state go first
27
89
 
28
90
  # note: allow /cities and /1--hokkaido--cities
29
91
  xxx_pattern = "(?:#{xxx}|[0-9]+--[^\\/]+?--#{xxx})" # note: double escape \\ required for backslash
@@ -48,7 +110,7 @@ module Matcher
48
110
  #
49
111
  # (3) classic style: e.g. /at/beers (europe/at/cities)
50
112
  #
51
- # (4) new style w/ region w/o abbrev/code e.g. /ja-japon/1--hokkaido/cities
113
+ # (4) new style w/ state w/o abbrev/code e.g. /ja-japon/1--hokkaido/cities
52
114
  #
53
115
  # (5) compact style (country part of filename):
54
116
  # e.g. /at-austria--cities or /europe/at-austria--cities
@@ -57,12 +119,27 @@ module Matcher
57
119
  end
58
120
  end
59
121
 
122
+ def simple_match_xxx_for_country( name, xxx )
123
+ xxx_pattern = "(?:#{xxx})" ## just xxx for now
124
+
125
+ ## used for state tree (e.g. orte.txt)
126
+ if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ ||
127
+ name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/
128
+
129
+ country_key = $1.dup
130
+ yield( country_key )
131
+ true # bingo - match found
132
+ else
133
+ false # no match found
134
+ end
135
+ end
136
+
60
137
 
61
- def match_xxx_for_country_n_region( name, xxx ) # xxx e.g. wine|wineries
138
+ def match_xxx_for_country_n_state( name, xxx ) # xxx e.g. wine|wineries
62
139
 
63
- # auto-add required country n region code (from folder structure)
140
+ # auto-add required country n state code (from folder structure)
64
141
 
65
- ## -- allow opt_folders after long regions (e.g. additional subregion/zone)
142
+ ## -- allow opt_folders after long states (e.g. additional substate/zone)
66
143
  ## -- allow anything (prefixes) before -- for xxx
67
144
  # e.g. at-austria!/1--n-niederoesterreich--eastern/wagram--wines
68
145
  # at-austria!/1--n-niederoesterreich--eastern/wagram--wagram--wines
@@ -72,12 +149,12 @@ module Matcher
72
149
  oldoldold_xxx_pattern = "(?:#{xxx}|[^\\/]+--#{xxx})"
73
150
  xxx_pattern = "(?:#{xxx}|[^\\/]+#{xxx})" # note: double escape \\ required for backslash
74
151
 
75
- ## note: for now only (style #2) n (style #3) that is long region allow opt folders
152
+ ## note: for now only (style #2) n (style #3) that is long state allow opt folders
76
153
 
77
- if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_CLASSIC_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ || # (1)
78
- name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}\/#{WORLD_REGION_MODERN_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ || # (2)
79
- name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_MODERN_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ || # (3)
80
- name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}\/#{WORLD_REGION_CLASSIC_PATTERN}\/#{xxx_pattern}/ # (4)
154
+ if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_CLASSIC_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ || # (1)
155
+ name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}\/#{WORLD_STATE_MODERN_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ || # (2)
156
+ name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_MODERN_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ || # (3)
157
+ name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}\/#{WORLD_STATE_CLASSIC_PATTERN}\/#{xxx_pattern}/ # (4)
81
158
 
82
159
  #######
83
160
  # nb: country must start name (^) or coming after / e.g. europe/at-austria/...
@@ -89,134 +166,21 @@ module Matcher
89
166
  #
90
167
  # (3)
91
168
  # new new mixed style e.g. /at-austria/1--w-wien--eastern/cities
92
- # "classic" country plus new new region
169
+ # "classic" country plus new new state
93
170
  #
94
171
  # (4)
95
172
  # new new mixed style e.g. /1--at-austria--central/w-wien/cities
96
- # new new country plus "classic" region
97
-
98
- country_key = $1.dup
99
- region_key = $2.dup
100
- yield( country_key, region_key )
101
- true # bingo - match found
102
- else
103
- false # no match found
104
- end
105
- end
106
-
107
-
108
-
109
- def match_xxx_for_country_n_adm1( name, xxx ) # xxx e.g. districts|counties|etc.
110
-
111
- # auto-add required country n regions (from folder structure)
112
- #
113
- # e.g. de-deutschland!/3--by-bayern/districts (regierungsbezirke)
114
- # europe/de-deutschland!/3--by-bayern/districts
115
- #
116
- # at-austria!/1--n-niederoesterreich/counties (bezirke)
117
-
118
- xxx_pattern = "#{xxx}"
119
-
120
- if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_MODERN_PATTERN}\/#{xxx_pattern}/ ||
121
- name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_CLASSIC_PATTERN}\/#{xxx_pattern}/
122
-
123
- country_key = $1.dup
124
- region_key = $2.dup
125
- yield( country_key, region_key )
126
- true # bingo - match found
127
- else
128
- false # no match found
129
- end
130
- end
131
-
132
-
133
- def match_xxx_for_country_n_adm1_n_adm2( name, xxx ) # xxx e.g. districts|counties|etc.
134
-
135
- # auto-add required country n regions (from folder structure)
136
- #
137
- # e.g. de-deutschland!/3--by-bayern/4--oberfranken/counties (landkreise)
138
- # europe/de-deutschland!/3--by-bayern/4--oberfranken/counties
139
-
140
- xxx_pattern = "#{xxx}"
141
-
142
- if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_MODERN_PATTERN}\/#{WORLD_ADMIN_MODERN_PATTERN}\/#{xxx_pattern}/ ||
143
- name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_CLASSIC_PATTERN}\/#{WORLD_ADMIN_MODERN_PATTERN}\/#{xxx_pattern}/
173
+ # new new country plus "classic" state
144
174
 
145
175
  country_key = $1.dup
146
- region_key = $2.dup
147
- adm2 = $3.dup # lowercase name e.g. oberfranken, oberbayern, etc.
148
- yield( country_key, region_key, adm2 )
176
+ state_key = $2.dup
177
+ yield( country_key, state_key )
149
178
  true # bingo - match found
150
179
  else
151
180
  false # no match found
152
181
  end
153
182
  end
154
183
 
155
-
156
- def match_adm2_for_country( name, &blk )
157
- ## note: also try synonyms e.g. districts|counties
158
- ## note: counties might also be an adm3 match
159
- found = match_xxx_for_country_n_adm1( name, 'districts', &blk )
160
- found = match_xxx_for_country_n_adm1( name, 'counties', &blk ) unless found
161
- found
162
- end
163
-
164
- def match_adm3_for_country( name, &blk )
165
- match_xxx_for_country_n_adm1_n_adm2( name, 'counties', &blk )
166
- end
167
-
168
-
169
-
170
- def match_cities_for_country( name, &blk )
171
- ## todo: check if there's a better (more ruby way) to pass along code block ??
172
- ## e.g. try
173
- ## match_xxx_for_country( name, 'cities') { |country_key| yield(country_key) }
174
-
175
- match_xxx_for_country( name, 'cities', &blk )
176
- end
177
-
178
- def match_regions_for_country( name, &blk )
179
- ## also try synonyms e.g. old regions (if not match for states)
180
- found = match_xxx_for_country( name, 'states', &blk )
181
- found = match_xxx_for_country( name, 'regions', &blk ) unless found
182
- found
183
- end
184
-
185
- def match_regions_abbr_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
186
- ## also try synonyms e.g. old regions (if not match for states)
187
- found = match_xxx_for_country( name, 'states\.abbr', &blk )
188
- found = match_xxx_for_country( name, 'regions\.abbr', &blk ) unless found
189
- found
190
- end
191
-
192
- def match_regions_iso_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
193
- ## also try synonyms e.g. old regions (if not match for states)
194
- found = match_xxx_for_country( name, 'states\.iso', &blk )
195
- found = match_xxx_for_country( name, 'regions\.iso', &blk ) unless found
196
- found
197
- end
198
-
199
- def match_regions_nuts_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
200
- ## also try synonyms e.g. old regions (if not match for states)
201
- found = match_xxx_for_country( name, 'states\.nuts', &blk )
202
- found = match_xxx_for_country( name, 'regions\.nuts', &blk ) unless found
203
- found
204
- end
205
-
206
-
207
- def match_countries_for_continent( name )
208
- if name =~ /^([a-z][a-z\-_]+[a-z])\/countries/ # e.g. africa/countries or america/countries
209
- ### NB: continent changed to regions (e.g. middle-east, caribbean, north-america, etc.)
210
- ## auto-add continent (from folder structure) as tag
211
- ## fix: allow dash/hyphen/minus in tag
212
- continent = $1.dup
213
- yield( continent )
214
- true
215
- else
216
- false # no match found
217
- end
218
- end
219
-
220
184
  end # module Matcher
221
185
 
222
186
  end # module WorldDb
@@ -0,0 +1,82 @@
1
+ # encoding: UTF-8
2
+
3
+ ####
4
+ ## matchers for adm2,adm3,etc
5
+ ## e.g. parts (regierungsbezirke)
6
+ ## counties (kreise,bezirke)
7
+
8
+ module WorldDb
9
+
10
+ module Matcher
11
+
12
+ def match_adm2_parts_for_country( name, &blk )
13
+ match_xxx_for_country_n_adm1( name, 'parts', &blk )
14
+ end
15
+
16
+ def match_adm2_counties_for_country( name, &blk )
17
+ ## note: counties might also be an adm3 match
18
+ match_xxx_for_country_n_adm1( name, 'counties', &blk )
19
+ end
20
+
21
+ def match_adm3_counties_for_country( name, &blk )
22
+ match_xxx_for_country_n_adm1_n_adm2( name, 'counties', &blk )
23
+ end
24
+
25
+
26
+ private
27
+ # note: returns name as capture (no code required)
28
+ WORLD_ADMIN_MODERN_PATTERN = "[0-9]+--([^\\/]+)"
29
+
30
+ ###
31
+ ## todo/check: adm1 => state
32
+ ## why use adm1 and not state ?? duplicate of match___country_n_state ??
33
+ def match_xxx_for_country_n_adm1( name, xxx ) # xxx e.g. parts|counties|etc.
34
+
35
+ # auto-add required country n states (from folder structure)
36
+ #
37
+ # e.g. de-deutschland!/3--by-bayern/parts (regierungsbezirke)
38
+ # europe/de-deutschland!/3--by-bayern/parts
39
+ #
40
+ # at-austria!/1--n-niederoesterreich/counties (bezirke)
41
+
42
+ xxx_pattern = "#{xxx}"
43
+
44
+ if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_MODERN_PATTERN}\/#{xxx_pattern}/ ||
45
+ name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_CLASSIC_PATTERN}\/#{xxx_pattern}/
46
+
47
+ country_key = $1.dup
48
+ state_key = $2.dup
49
+ yield( country_key, state_key )
50
+ true # bingo - match found
51
+ else
52
+ false # no match found
53
+ end
54
+ end
55
+
56
+
57
+ def match_xxx_for_country_n_adm1_n_adm2( name, xxx ) # xxx e.g. parts|counties|etc.
58
+
59
+ # auto-add required country n states (from folder structure)
60
+ #
61
+ # e.g. de-deutschland!/3--by-bayern/4--oberfranken/counties (landkreise)
62
+ # europe/de-deutschland!/3--by-bayern/4--oberfranken/counties
63
+
64
+ xxx_pattern = "#{xxx}"
65
+
66
+ if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_MODERN_PATTERN}\/#{WORLD_ADMIN_MODERN_PATTERN}\/#{xxx_pattern}/ ||
67
+ name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_CLASSIC_PATTERN}\/#{WORLD_ADMIN_MODERN_PATTERN}\/#{xxx_pattern}/
68
+
69
+ country_key = $1.dup
70
+ state_key = $2.dup
71
+ adm2 = $3.dup # lowercase name e.g. oberfranken, oberbayern, etc.
72
+ yield( country_key, state_key, adm2 )
73
+ true # bingo - match found
74
+ else
75
+ false # no match found
76
+ end
77
+ end
78
+
79
+ end # module Matcher
80
+
81
+ end # module WorldDb
82
+