worlddb-models 2.2.2 → 2.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +31 -13
- data/README.md +7 -7
- data/Rakefile +1 -1
- data/lib/worlddb/deleter.rb +6 -1
- data/lib/worlddb/helpers/value_helper.rb +117 -0
- data/lib/worlddb/matcher.rb +99 -135
- data/lib/worlddb/matcher_adm.rb +82 -0
- data/lib/worlddb/models/city.rb +30 -208
- data/lib/worlddb/models/city_base.rb +220 -0
- data/lib/worlddb/models/continent.rb +9 -0
- data/lib/worlddb/models/country.rb +21 -4
- data/lib/worlddb/models/forward.rb +25 -9
- data/lib/worlddb/models/lang.rb +6 -0
- data/lib/worlddb/models/place.rb +1 -1
- data/lib/worlddb/models/state.rb +83 -0
- data/lib/worlddb/models/{region.rb → state_base.rb} +52 -36
- data/lib/worlddb/models/tagdb/tag.rb +1 -1
- data/lib/worlddb/models.rb +11 -8
- data/lib/worlddb/patterns.rb +4 -4
- data/lib/worlddb/reader.rb +68 -39
- data/lib/worlddb/reader_file.rb +36 -3
- data/lib/worlddb/reader_zip.rb +33 -3
- data/lib/worlddb/readers/base.rb +149 -0
- data/lib/worlddb/readers/city.rb +2 -65
- data/lib/worlddb/readers/country.rb +2 -63
- data/lib/worlddb/readers/lang.rb +3 -68
- data/lib/worlddb/readers/state.rb +61 -0
- data/lib/worlddb/readers/state_tree.rb +118 -0
- data/lib/worlddb/readers/usage.rb +2 -65
- data/lib/worlddb/schema.rb +142 -43
- data/lib/worlddb/stats.rb +7 -4
- data/lib/worlddb/tree_reader.rb +97 -0
- data/lib/worlddb/version.rb +2 -2
- data/test/adm/test_fixture_matcher_adm2.rb +73 -0
- data/test/{test_fixture_matcher_adm3.rb → adm/test_fixture_matcher_adm3.rb} +6 -6
- data/test/adm/test_fixture_matcher_tree.rb +52 -0
- data/test/{test_read_adm.rb → adm/test_read_adm.rb} +13 -20
- data/test/adm/test_read_tree.rb +63 -0
- data/test/data/at-austria/2--n-niederoesterreich/counties.txt +6 -4
- data/test/data/at-austria/orte.txt +23 -0
- data/test/data/at-austria/setups/tree.txt +9 -0
- data/test/data/de-deutschland/3--by-bayern/4--oberfranken/counties.txt +14 -13
- data/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt +104 -0
- data/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte_ii.txt +17 -0
- data/test/data/de-deutschland/3--by-bayern/{districts.txt → parts.txt} +1 -1
- data/test/data/de-deutschland/orte.txt +12 -0
- data/test/data/de-deutschland/setups/adm.txt +1 -1
- data/test/data/de-deutschland/setups/tree.txt +9 -0
- data/test/helper.rb +8 -1
- data/test/test_fixture_matchers.rb +9 -10
- data/test/test_fixture_matchers_ii.rb +20 -19
- data/test/test_model_city.rb +26 -9
- data/test/{test_model_comp.rb → test_model_compat.rb} +15 -13
- data/test/test_model_country.rb +1 -1
- data/test/test_model_state.rb +54 -0
- data/test/test_model_states_at.rb +111 -0
- data/test/test_model_states_de.rb +147 -0
- data/test/test_models.rb +10 -3
- data/test/test_parse_city.rb +70 -0
- data/test/test_parse_country.rb +56 -0
- data/test/test_parse_state.rb +46 -0
- data/test/test_state_tree_reader_at.rb +54 -0
- data/test/test_state_tree_reader_de.rb +71 -0
- data/test/test_tree_reader.rb +39 -0
- metadata +50 -22
- data/lib/worlddb/models/city_compat.rb +0 -27
- data/lib/worlddb/models/continent_compat.rb +0 -24
- data/lib/worlddb/models/country_compat.rb +0 -35
- data/lib/worlddb/models/lang_compat.rb +0 -23
- data/lib/worlddb/models/region_compat.rb +0 -26
- data/lib/worlddb/readers/region.rb +0 -79
- data/test/test_fixture_matcher_adm2.rb +0 -62
- data/test/test_model_region.rb +0 -50
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fe2b1dd265c24760835eeefbd445c3a8302eb554
|
4
|
+
data.tar.gz: 35aeaaac8672937b8388825aa4c36cc11ce4651b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 60c27ed1d0fceebca6edef19c0a50a542dafdbd66b71bdf9ee0e84aa641de7569d856b9fae697ffb844428ee24b72c505e663dae1bd9b4bc8d8b54f66e9bef9c
|
7
|
+
data.tar.gz: 6acf8fcfe52845533582c7dbd010f17feb834beb0110c278e2cc72607d7240be894bff150facf73749f9f5419685e8216d05db6732c351fb44df209af52aec8f
|
data/Manifest.txt
CHANGED
@@ -3,22 +3,21 @@ Manifest.txt
|
|
3
3
|
README.md
|
4
4
|
Rakefile
|
5
5
|
lib/worlddb/deleter.rb
|
6
|
+
lib/worlddb/helpers/value_helper.rb
|
6
7
|
lib/worlddb/matcher.rb
|
8
|
+
lib/worlddb/matcher_adm.rb
|
7
9
|
lib/worlddb/models.rb
|
8
10
|
lib/worlddb/models/city.rb
|
9
|
-
lib/worlddb/models/
|
11
|
+
lib/worlddb/models/city_base.rb
|
10
12
|
lib/worlddb/models/continent.rb
|
11
|
-
lib/worlddb/models/continent_compat.rb
|
12
13
|
lib/worlddb/models/country.rb
|
13
14
|
lib/worlddb/models/country_code.rb
|
14
|
-
lib/worlddb/models/country_compat.rb
|
15
15
|
lib/worlddb/models/forward.rb
|
16
16
|
lib/worlddb/models/lang.rb
|
17
|
-
lib/worlddb/models/lang_compat.rb
|
18
17
|
lib/worlddb/models/name.rb
|
19
18
|
lib/worlddb/models/place.rb
|
20
|
-
lib/worlddb/models/
|
21
|
-
lib/worlddb/models/
|
19
|
+
lib/worlddb/models/state.rb
|
20
|
+
lib/worlddb/models/state_base.rb
|
22
21
|
lib/worlddb/models/tagdb/tag.rb
|
23
22
|
lib/worlddb/models/tagdb/tagging.rb
|
24
23
|
lib/worlddb/models/usage.rb
|
@@ -26,32 +25,51 @@ lib/worlddb/patterns.rb
|
|
26
25
|
lib/worlddb/reader.rb
|
27
26
|
lib/worlddb/reader_file.rb
|
28
27
|
lib/worlddb/reader_zip.rb
|
28
|
+
lib/worlddb/readers/base.rb
|
29
29
|
lib/worlddb/readers/city.rb
|
30
30
|
lib/worlddb/readers/country.rb
|
31
31
|
lib/worlddb/readers/lang.rb
|
32
|
-
lib/worlddb/readers/
|
32
|
+
lib/worlddb/readers/state.rb
|
33
|
+
lib/worlddb/readers/state_tree.rb
|
33
34
|
lib/worlddb/readers/usage.rb
|
34
35
|
lib/worlddb/schema.rb
|
35
36
|
lib/worlddb/stats.rb
|
37
|
+
lib/worlddb/tree_reader.rb
|
36
38
|
lib/worlddb/version.rb
|
39
|
+
test/adm/test_fixture_matcher_adm2.rb
|
40
|
+
test/adm/test_fixture_matcher_adm3.rb
|
41
|
+
test/adm/test_fixture_matcher_tree.rb
|
42
|
+
test/adm/test_read_adm.rb
|
43
|
+
test/adm/test_read_tree.rb
|
37
44
|
test/data/at-austria/1--b-burgenland/counties.txt
|
38
45
|
test/data/at-austria/2--n-niederoesterreich/counties.txt
|
39
46
|
test/data/at-austria/3--w-wien/counties.txt
|
47
|
+
test/data/at-austria/orte.txt
|
40
48
|
test/data/at-austria/setups/adm.txt
|
49
|
+
test/data/at-austria/setups/tree.txt
|
41
50
|
test/data/at-austria/states.txt
|
42
51
|
test/data/de-deutschland/3--by-bayern/1--oberbayern/counties.txt
|
43
52
|
test/data/de-deutschland/3--by-bayern/4--oberfranken/counties.txt
|
44
|
-
test/data/de-deutschland/3--by-bayern/
|
53
|
+
test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt
|
54
|
+
test/data/de-deutschland/3--by-bayern/4--oberfranken/orte_ii.txt
|
55
|
+
test/data/de-deutschland/3--by-bayern/parts.txt
|
56
|
+
test/data/de-deutschland/orte.txt
|
45
57
|
test/data/de-deutschland/setups/adm.txt
|
58
|
+
test/data/de-deutschland/setups/tree.txt
|
46
59
|
test/data/de-deutschland/states.txt
|
47
60
|
test/helper.rb
|
48
|
-
test/test_fixture_matcher_adm2.rb
|
49
|
-
test/test_fixture_matcher_adm3.rb
|
50
61
|
test/test_fixture_matchers.rb
|
51
62
|
test/test_fixture_matchers_ii.rb
|
52
63
|
test/test_model_city.rb
|
53
|
-
test/
|
64
|
+
test/test_model_compat.rb
|
54
65
|
test/test_model_country.rb
|
55
|
-
test/
|
66
|
+
test/test_model_state.rb
|
67
|
+
test/test_model_states_at.rb
|
68
|
+
test/test_model_states_de.rb
|
56
69
|
test/test_models.rb
|
57
|
-
test/
|
70
|
+
test/test_parse_city.rb
|
71
|
+
test/test_parse_country.rb
|
72
|
+
test/test_parse_state.rb
|
73
|
+
test/test_state_tree_reader_at.rb
|
74
|
+
test/test_state_tree_reader_de.rb
|
75
|
+
test/test_tree_reader.rb
|
data/README.md
CHANGED
@@ -30,9 +30,9 @@ Everything is a place.
|
|
30
30
|
at.area
|
31
31
|
# => 83_871
|
32
32
|
|
33
|
-
at.
|
33
|
+
at.states.count
|
34
34
|
# => 9
|
35
|
-
at.
|
35
|
+
at.states
|
36
36
|
# => [ 'Wien', 'Niederösterreich', 'Oberösterreich', ... ]
|
37
37
|
|
38
38
|
at.cities.by_pop
|
@@ -52,9 +52,9 @@ Everything is a place.
|
|
52
52
|
la = City.find_by! key: 'losangeles'
|
53
53
|
la.name
|
54
54
|
# => 'Los Angeles'
|
55
|
-
la.
|
55
|
+
la.state.name
|
56
56
|
# => 'California'
|
57
|
-
la.
|
57
|
+
la.state.key
|
58
58
|
# => 'ca'
|
59
59
|
la.country.name
|
60
60
|
# => 'United States'
|
@@ -73,11 +73,11 @@ Everything is a place.
|
|
73
73
|
# => ['Austria, 'Belgium', 'Cyprus', ... ]
|
74
74
|
|
75
75
|
flanders = Tag.find_by! key: 'flanders'
|
76
|
-
flanders.
|
76
|
+
flanders.states.count
|
77
77
|
# => 5
|
78
|
-
flanders.
|
78
|
+
flanders.states
|
79
79
|
# => ['Antwerpen', 'Brabant Wallon', 'Limburg', 'Oost-Vlaanderen', 'West-Vlaanderen']
|
80
|
-
flanders.
|
80
|
+
flanders.states.first.country.name
|
81
81
|
# => 'Belgium'
|
82
82
|
|
83
83
|
and so on.
|
data/Rakefile
CHANGED
@@ -17,7 +17,7 @@ Hoe.spec 'worlddb-models' do
|
|
17
17
|
self.extra_deps = [
|
18
18
|
['props', '>= 1.1.2'], # settings / prop(ertie)s / env / INI
|
19
19
|
['logutils', '>= 0.6.1'], # logging
|
20
|
-
['textutils', '>=
|
20
|
+
['textutils', '>= 1.2.2'],
|
21
21
|
|
22
22
|
['tagutils', '>= 0.3.0'], # tags n categories for activerecord
|
23
23
|
['activerecord-utils', '>= 0.2.0'],
|
data/lib/worlddb/deleter.rb
CHANGED
@@ -18,7 +18,12 @@ module WorldDb
|
|
18
18
|
Name.delete_all
|
19
19
|
Place.delete_all
|
20
20
|
City.delete_all
|
21
|
-
|
21
|
+
Metro.delete_all
|
22
|
+
District.delete_all
|
23
|
+
State.delete_all
|
24
|
+
Part.delete_all
|
25
|
+
County.delete_all
|
26
|
+
Muni.delete_all
|
22
27
|
Country.delete_all
|
23
28
|
Continent.delete_all
|
24
29
|
Usage.delete_all
|
@@ -0,0 +1,117 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
## NOTE:
|
5
|
+
## add helpers to textutils(!!) valuehelper
|
6
|
+
## do NOT create worlddb module
|
7
|
+
|
8
|
+
module TextUtils
|
9
|
+
module ValueHelper
|
10
|
+
|
11
|
+
## todo/check: add to pair of matchers??
|
12
|
+
# e.g. match_country and match_country!
|
13
|
+
# - match_country will use find_by_key and match_country will use find_by_key! - why? why not?
|
14
|
+
|
15
|
+
def match_country( value )
|
16
|
+
if value =~ /^country:/ # country:
|
17
|
+
country_key = value[8..-1] # cut off country: prefix
|
18
|
+
country = WorldDb::Model::Country.find_by_key!( country_key )
|
19
|
+
yield( country )
|
20
|
+
true # bingo - match found
|
21
|
+
else
|
22
|
+
false # no match found
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def match_supra( value )
|
27
|
+
if value =~ /^supra:/ # supra:
|
28
|
+
country_key = value[6..-1] # cut off supra: prefix
|
29
|
+
country = WorldDb::Model::Country.find_by_key!( country_key )
|
30
|
+
yield( country )
|
31
|
+
true # bingo - match found
|
32
|
+
else
|
33
|
+
false # no match found
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def match_supra_flag( value ) # supranational (country)
|
38
|
+
if value =~ /^supra$/ # supra(national)
|
39
|
+
yield( true )
|
40
|
+
true # bingo - match found
|
41
|
+
else
|
42
|
+
false # no match found
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
def is_state?( value ) # note: was is_region? (use new name only)
|
48
|
+
# assume state code e.g. TX or N
|
49
|
+
#
|
50
|
+
# fix: allow three letter states too e.g. BRU (brussels)
|
51
|
+
match_result = value =~ /^[A-Z]{1,2}$/
|
52
|
+
# match found if 0,1,2,3 etc or no match if nil
|
53
|
+
# note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
|
54
|
+
match_result != nil
|
55
|
+
end
|
56
|
+
|
57
|
+
## fix/todo: use match_state_for_country! w/ !!! why? why not?
|
58
|
+
def match_state_for_country( value, country_id ) ## NB: required country_id
|
59
|
+
if value =~ /^state:/ ## state:
|
60
|
+
state_key = value[6..-1] ## cut off state: prefix
|
61
|
+
state = WorldDb::Model::State.find_by_key_and_country_id!( state_key, country_id )
|
62
|
+
yield( state )
|
63
|
+
true # bingo - match found
|
64
|
+
elsif is_state?( value ) ## assume state code e.g. TX or N
|
65
|
+
state = WorldDb::Model::State.find_by_key_and_country_id!( value.downcase, country_id )
|
66
|
+
yield( state )
|
67
|
+
true # bingo - match found
|
68
|
+
else
|
69
|
+
false # no match found
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
def match_city( value ) # NB: might be nil (city not found)
|
75
|
+
if value =~ /^city:/ ## city:
|
76
|
+
city_key = value[5..-1] ## cut off city: prefix
|
77
|
+
city = WorldDb::Model::City.find_by_key( city_key )
|
78
|
+
yield( city ) # NB: might be nil (city not found)
|
79
|
+
true # bingo - match found
|
80
|
+
else
|
81
|
+
false # no match found
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def match_metro( value )
|
86
|
+
if value =~ /^metro:/ ## metro:
|
87
|
+
metro_key = value[6..-1] ## cut off metro: prefix
|
88
|
+
metro = WorldDb::Model::Metro.find_by_key!( metro_key )
|
89
|
+
yield( metro )
|
90
|
+
true # bingo - match found
|
91
|
+
else
|
92
|
+
false # no match found
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def match_metro_flag( value )
|
97
|
+
if value =~ /^metro$/ # metro(politan area)
|
98
|
+
yield( true )
|
99
|
+
true # bingo - match found
|
100
|
+
else
|
101
|
+
false # no match found
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def match_metro_pop( value )
|
106
|
+
if value =~ /^m:/ # m:
|
107
|
+
num = value[2..-1].gsub(/[ _]/, '').to_i # cut off m: prefix; allow space and _ in number
|
108
|
+
yield( num )
|
109
|
+
true # bingo - match found
|
110
|
+
else
|
111
|
+
false # no match found
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
end # module ValueHelper
|
116
|
+
end # module TextUtils
|
117
|
+
|
data/lib/worlddb/matcher.rb
CHANGED
@@ -4,26 +4,88 @@ module WorldDb
|
|
4
4
|
|
5
5
|
module Matcher
|
6
6
|
|
7
|
+
def match_tree_for_country( name, &blk ) ## rename to state_tree ?? why? why not??
|
8
|
+
## match state_tree (for now use orte.txt for austria, deutschland etc.)
|
9
|
+
## todo/fix: add more "generic" names
|
10
|
+
|
11
|
+
simple_match_xxx_for_country( name, 'orte', &blk ) ## note: uses special **simple**_match_xxx_...
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
def match_cities_for_country( name, &blk )
|
16
|
+
## todo: check if there's a better (more ruby way) to pass along code block ??
|
17
|
+
## e.g. try
|
18
|
+
## match_xxx_for_country( name, 'cities') { |country_key| yield(country_key) }
|
19
|
+
|
20
|
+
match_xxx_for_country( name, 'cities', &blk )
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
def match_states_for_country( name, &blk )
|
26
|
+
## todo/fix: remove regions (obsolete) - no longer supported
|
27
|
+
## also try synonyms e.g. old regions (if not match for states)
|
28
|
+
found = match_xxx_for_country( name, 'states', &blk )
|
29
|
+
found = match_xxx_for_country( name, 'regions', &blk ) unless found
|
30
|
+
found
|
31
|
+
end
|
32
|
+
|
33
|
+
def match_states_abbr_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
|
34
|
+
## also try synonyms e.g. old regions (if not match for states)
|
35
|
+
found = match_xxx_for_country( name, 'states\.abbr', &blk )
|
36
|
+
found = match_xxx_for_country( name, 'regions\.abbr', &blk ) unless found
|
37
|
+
found
|
38
|
+
end
|
39
|
+
|
40
|
+
def match_states_iso_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
|
41
|
+
## also try synonyms e.g. old regions (if not match for states)
|
42
|
+
found = match_xxx_for_country( name, 'states\.iso', &blk )
|
43
|
+
found = match_xxx_for_country( name, 'regions\.iso', &blk ) unless found
|
44
|
+
found
|
45
|
+
end
|
46
|
+
|
47
|
+
def match_states_nuts_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
|
48
|
+
## also try synonyms e.g. old regions (if not match for states)
|
49
|
+
found = match_xxx_for_country( name, 'states\.nuts', &blk )
|
50
|
+
found = match_xxx_for_country( name, 'regions\.nuts', &blk ) unless found
|
51
|
+
found
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
def match_countries_for_continent( name )
|
56
|
+
if name =~ /^([a-z][a-z\-_]+[a-z])\/countries/ # e.g. africa/countries or america/countries
|
57
|
+
### NB: continent changed to regions (e.g. middle-east, caribbean, north-america, etc.)
|
58
|
+
## auto-add continent (from folder structure) as tag
|
59
|
+
## fix: allow dash/hyphen/minus in tag
|
60
|
+
continent = $1.dup
|
61
|
+
yield( continent )
|
62
|
+
true
|
63
|
+
else
|
64
|
+
false # no match found
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
private
|
70
|
+
|
7
71
|
# note: returns code as capture
|
8
72
|
WORLD_COUNTRY_CODE_PATTERN = '([a-z]{2,3})'
|
9
73
|
WORLD_COUNTRY_CLASSIC_PATTERN = "#{WORLD_COUNTRY_CODE_PATTERN}-[^\\/]+" ## note: if you use "" need to double escape backslash!!!
|
10
74
|
WORLD_COUNTRY_MODERN_PATTERN = "[0-9]+--#{WORLD_COUNTRY_CODE_PATTERN}-[^\\/]+" ## note: if you use "" need to double escape backslash!!!
|
11
75
|
|
12
76
|
# note: returns code as capture
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
# note: returns name as capture (no code required)
|
18
|
-
WORLD_ADMIN_MODERN_PATTERN = "[0-9]+--([^\\/]+)"
|
77
|
+
WORLD_STATE_CODE_PATTERN = '([a-z]{1,3})'
|
78
|
+
WORLD_STATE_CLASSIC_PATTERN = "#{WORLD_STATE_CODE_PATTERN}-[^\\/]+"
|
79
|
+
WORLD_STATE_MODERN_PATTERN = "[0-9]+--#{WORLD_STATE_CODE_PATTERN}-[^\\/]+"
|
19
80
|
|
20
|
-
|
81
|
+
## allow optional folders -- TODO: add restriction ?? e.g. must be 4+ alphas ???
|
21
82
|
WORLD_OPT_FOLDERS_PATTERN = "(?:\\/[^\\/]+)*" ## check: use double \\ or just \ ??
|
22
83
|
|
23
84
|
|
24
|
-
|
85
|
+
|
86
|
+
def match_xxx_for_country( name, xxx ) # xxx e.g. cities|states|beers|breweries
|
25
87
|
# auto-add required country code (from folder structure)
|
26
|
-
# note: always let
|
88
|
+
# note: always let match_xxx_for_country_n_state go first
|
27
89
|
|
28
90
|
# note: allow /cities and /1--hokkaido--cities
|
29
91
|
xxx_pattern = "(?:#{xxx}|[0-9]+--[^\\/]+?--#{xxx})" # note: double escape \\ required for backslash
|
@@ -48,7 +110,7 @@ module Matcher
|
|
48
110
|
#
|
49
111
|
# (3) classic style: e.g. /at/beers (europe/at/cities)
|
50
112
|
#
|
51
|
-
# (4) new style w/
|
113
|
+
# (4) new style w/ state w/o abbrev/code e.g. /ja-japon/1--hokkaido/cities
|
52
114
|
#
|
53
115
|
# (5) compact style (country part of filename):
|
54
116
|
# e.g. /at-austria--cities or /europe/at-austria--cities
|
@@ -57,12 +119,27 @@ module Matcher
|
|
57
119
|
end
|
58
120
|
end
|
59
121
|
|
122
|
+
def simple_match_xxx_for_country( name, xxx )
|
123
|
+
xxx_pattern = "(?:#{xxx})" ## just xxx for now
|
124
|
+
|
125
|
+
## used for state tree (e.g. orte.txt)
|
126
|
+
if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ ||
|
127
|
+
name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/
|
128
|
+
|
129
|
+
country_key = $1.dup
|
130
|
+
yield( country_key )
|
131
|
+
true # bingo - match found
|
132
|
+
else
|
133
|
+
false # no match found
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
60
137
|
|
61
|
-
def
|
138
|
+
def match_xxx_for_country_n_state( name, xxx ) # xxx e.g. wine|wineries
|
62
139
|
|
63
|
-
# auto-add required country n
|
140
|
+
# auto-add required country n state code (from folder structure)
|
64
141
|
|
65
|
-
## -- allow opt_folders after long
|
142
|
+
## -- allow opt_folders after long states (e.g. additional substate/zone)
|
66
143
|
## -- allow anything (prefixes) before -- for xxx
|
67
144
|
# e.g. at-austria!/1--n-niederoesterreich--eastern/wagram--wines
|
68
145
|
# at-austria!/1--n-niederoesterreich--eastern/wagram--wagram--wines
|
@@ -72,12 +149,12 @@ module Matcher
|
|
72
149
|
oldoldold_xxx_pattern = "(?:#{xxx}|[^\\/]+--#{xxx})"
|
73
150
|
xxx_pattern = "(?:#{xxx}|[^\\/]+#{xxx})" # note: double escape \\ required for backslash
|
74
151
|
|
75
|
-
## note: for now only (style #2) n (style #3) that is long
|
152
|
+
## note: for now only (style #2) n (style #3) that is long state allow opt folders
|
76
153
|
|
77
|
-
if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{
|
78
|
-
name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}\/#{
|
79
|
-
name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{
|
80
|
-
name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}\/#{
|
154
|
+
if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_CLASSIC_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ || # (1)
|
155
|
+
name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}\/#{WORLD_STATE_MODERN_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ || # (2)
|
156
|
+
name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_MODERN_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ || # (3)
|
157
|
+
name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}\/#{WORLD_STATE_CLASSIC_PATTERN}\/#{xxx_pattern}/ # (4)
|
81
158
|
|
82
159
|
#######
|
83
160
|
# nb: country must start name (^) or coming after / e.g. europe/at-austria/...
|
@@ -89,134 +166,21 @@ module Matcher
|
|
89
166
|
#
|
90
167
|
# (3)
|
91
168
|
# new new mixed style e.g. /at-austria/1--w-wien--eastern/cities
|
92
|
-
# "classic" country plus new new
|
169
|
+
# "classic" country plus new new state
|
93
170
|
#
|
94
171
|
# (4)
|
95
172
|
# new new mixed style e.g. /1--at-austria--central/w-wien/cities
|
96
|
-
# new new country plus "classic"
|
97
|
-
|
98
|
-
country_key = $1.dup
|
99
|
-
region_key = $2.dup
|
100
|
-
yield( country_key, region_key )
|
101
|
-
true # bingo - match found
|
102
|
-
else
|
103
|
-
false # no match found
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
def match_xxx_for_country_n_adm1( name, xxx ) # xxx e.g. districts|counties|etc.
|
110
|
-
|
111
|
-
# auto-add required country n regions (from folder structure)
|
112
|
-
#
|
113
|
-
# e.g. de-deutschland!/3--by-bayern/districts (regierungsbezirke)
|
114
|
-
# europe/de-deutschland!/3--by-bayern/districts
|
115
|
-
#
|
116
|
-
# at-austria!/1--n-niederoesterreich/counties (bezirke)
|
117
|
-
|
118
|
-
xxx_pattern = "#{xxx}"
|
119
|
-
|
120
|
-
if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_MODERN_PATTERN}\/#{xxx_pattern}/ ||
|
121
|
-
name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_CLASSIC_PATTERN}\/#{xxx_pattern}/
|
122
|
-
|
123
|
-
country_key = $1.dup
|
124
|
-
region_key = $2.dup
|
125
|
-
yield( country_key, region_key )
|
126
|
-
true # bingo - match found
|
127
|
-
else
|
128
|
-
false # no match found
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
|
-
|
133
|
-
def match_xxx_for_country_n_adm1_n_adm2( name, xxx ) # xxx e.g. districts|counties|etc.
|
134
|
-
|
135
|
-
# auto-add required country n regions (from folder structure)
|
136
|
-
#
|
137
|
-
# e.g. de-deutschland!/3--by-bayern/4--oberfranken/counties (landkreise)
|
138
|
-
# europe/de-deutschland!/3--by-bayern/4--oberfranken/counties
|
139
|
-
|
140
|
-
xxx_pattern = "#{xxx}"
|
141
|
-
|
142
|
-
if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_MODERN_PATTERN}\/#{WORLD_ADMIN_MODERN_PATTERN}\/#{xxx_pattern}/ ||
|
143
|
-
name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_CLASSIC_PATTERN}\/#{WORLD_ADMIN_MODERN_PATTERN}\/#{xxx_pattern}/
|
173
|
+
# new new country plus "classic" state
|
144
174
|
|
145
175
|
country_key = $1.dup
|
146
|
-
|
147
|
-
|
148
|
-
yield( country_key, region_key, adm2 )
|
176
|
+
state_key = $2.dup
|
177
|
+
yield( country_key, state_key )
|
149
178
|
true # bingo - match found
|
150
179
|
else
|
151
180
|
false # no match found
|
152
181
|
end
|
153
182
|
end
|
154
183
|
|
155
|
-
|
156
|
-
def match_adm2_for_country( name, &blk )
|
157
|
-
## note: also try synonyms e.g. districts|counties
|
158
|
-
## note: counties might also be an adm3 match
|
159
|
-
found = match_xxx_for_country_n_adm1( name, 'districts', &blk )
|
160
|
-
found = match_xxx_for_country_n_adm1( name, 'counties', &blk ) unless found
|
161
|
-
found
|
162
|
-
end
|
163
|
-
|
164
|
-
def match_adm3_for_country( name, &blk )
|
165
|
-
match_xxx_for_country_n_adm1_n_adm2( name, 'counties', &blk )
|
166
|
-
end
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
def match_cities_for_country( name, &blk )
|
171
|
-
## todo: check if there's a better (more ruby way) to pass along code block ??
|
172
|
-
## e.g. try
|
173
|
-
## match_xxx_for_country( name, 'cities') { |country_key| yield(country_key) }
|
174
|
-
|
175
|
-
match_xxx_for_country( name, 'cities', &blk )
|
176
|
-
end
|
177
|
-
|
178
|
-
def match_regions_for_country( name, &blk )
|
179
|
-
## also try synonyms e.g. old regions (if not match for states)
|
180
|
-
found = match_xxx_for_country( name, 'states', &blk )
|
181
|
-
found = match_xxx_for_country( name, 'regions', &blk ) unless found
|
182
|
-
found
|
183
|
-
end
|
184
|
-
|
185
|
-
def match_regions_abbr_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
|
186
|
-
## also try synonyms e.g. old regions (if not match for states)
|
187
|
-
found = match_xxx_for_country( name, 'states\.abbr', &blk )
|
188
|
-
found = match_xxx_for_country( name, 'regions\.abbr', &blk ) unless found
|
189
|
-
found
|
190
|
-
end
|
191
|
-
|
192
|
-
def match_regions_iso_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
|
193
|
-
## also try synonyms e.g. old regions (if not match for states)
|
194
|
-
found = match_xxx_for_country( name, 'states\.iso', &blk )
|
195
|
-
found = match_xxx_for_country( name, 'regions\.iso', &blk ) unless found
|
196
|
-
found
|
197
|
-
end
|
198
|
-
|
199
|
-
def match_regions_nuts_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
|
200
|
-
## also try synonyms e.g. old regions (if not match for states)
|
201
|
-
found = match_xxx_for_country( name, 'states\.nuts', &blk )
|
202
|
-
found = match_xxx_for_country( name, 'regions\.nuts', &blk ) unless found
|
203
|
-
found
|
204
|
-
end
|
205
|
-
|
206
|
-
|
207
|
-
def match_countries_for_continent( name )
|
208
|
-
if name =~ /^([a-z][a-z\-_]+[a-z])\/countries/ # e.g. africa/countries or america/countries
|
209
|
-
### NB: continent changed to regions (e.g. middle-east, caribbean, north-america, etc.)
|
210
|
-
## auto-add continent (from folder structure) as tag
|
211
|
-
## fix: allow dash/hyphen/minus in tag
|
212
|
-
continent = $1.dup
|
213
|
-
yield( continent )
|
214
|
-
true
|
215
|
-
else
|
216
|
-
false # no match found
|
217
|
-
end
|
218
|
-
end
|
219
|
-
|
220
184
|
end # module Matcher
|
221
185
|
|
222
186
|
end # module WorldDb
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
####
|
4
|
+
## matchers for adm2,adm3,etc
|
5
|
+
## e.g. parts (regierungsbezirke)
|
6
|
+
## counties (kreise,bezirke)
|
7
|
+
|
8
|
+
module WorldDb
|
9
|
+
|
10
|
+
module Matcher
|
11
|
+
|
12
|
+
def match_adm2_parts_for_country( name, &blk )
|
13
|
+
match_xxx_for_country_n_adm1( name, 'parts', &blk )
|
14
|
+
end
|
15
|
+
|
16
|
+
def match_adm2_counties_for_country( name, &blk )
|
17
|
+
## note: counties might also be an adm3 match
|
18
|
+
match_xxx_for_country_n_adm1( name, 'counties', &blk )
|
19
|
+
end
|
20
|
+
|
21
|
+
def match_adm3_counties_for_country( name, &blk )
|
22
|
+
match_xxx_for_country_n_adm1_n_adm2( name, 'counties', &blk )
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
private
|
27
|
+
# note: returns name as capture (no code required)
|
28
|
+
WORLD_ADMIN_MODERN_PATTERN = "[0-9]+--([^\\/]+)"
|
29
|
+
|
30
|
+
###
|
31
|
+
## todo/check: adm1 => state
|
32
|
+
## why use adm1 and not state ?? duplicate of match___country_n_state ??
|
33
|
+
def match_xxx_for_country_n_adm1( name, xxx ) # xxx e.g. parts|counties|etc.
|
34
|
+
|
35
|
+
# auto-add required country n states (from folder structure)
|
36
|
+
#
|
37
|
+
# e.g. de-deutschland!/3--by-bayern/parts (regierungsbezirke)
|
38
|
+
# europe/de-deutschland!/3--by-bayern/parts
|
39
|
+
#
|
40
|
+
# at-austria!/1--n-niederoesterreich/counties (bezirke)
|
41
|
+
|
42
|
+
xxx_pattern = "#{xxx}"
|
43
|
+
|
44
|
+
if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_MODERN_PATTERN}\/#{xxx_pattern}/ ||
|
45
|
+
name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_CLASSIC_PATTERN}\/#{xxx_pattern}/
|
46
|
+
|
47
|
+
country_key = $1.dup
|
48
|
+
state_key = $2.dup
|
49
|
+
yield( country_key, state_key )
|
50
|
+
true # bingo - match found
|
51
|
+
else
|
52
|
+
false # no match found
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
def match_xxx_for_country_n_adm1_n_adm2( name, xxx ) # xxx e.g. parts|counties|etc.
|
58
|
+
|
59
|
+
# auto-add required country n states (from folder structure)
|
60
|
+
#
|
61
|
+
# e.g. de-deutschland!/3--by-bayern/4--oberfranken/counties (landkreise)
|
62
|
+
# europe/de-deutschland!/3--by-bayern/4--oberfranken/counties
|
63
|
+
|
64
|
+
xxx_pattern = "#{xxx}"
|
65
|
+
|
66
|
+
if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_MODERN_PATTERN}\/#{WORLD_ADMIN_MODERN_PATTERN}\/#{xxx_pattern}/ ||
|
67
|
+
name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_CLASSIC_PATTERN}\/#{WORLD_ADMIN_MODERN_PATTERN}\/#{xxx_pattern}/
|
68
|
+
|
69
|
+
country_key = $1.dup
|
70
|
+
state_key = $2.dup
|
71
|
+
adm2 = $3.dup # lowercase name e.g. oberfranken, oberbayern, etc.
|
72
|
+
yield( country_key, state_key, adm2 )
|
73
|
+
true # bingo - match found
|
74
|
+
else
|
75
|
+
false # no match found
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
end # module Matcher
|
80
|
+
|
81
|
+
end # module WorldDb
|
82
|
+
|