worlddb-models 2.2.2 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +31 -13
- data/README.md +7 -7
- data/Rakefile +1 -1
- data/lib/worlddb/deleter.rb +6 -1
- data/lib/worlddb/helpers/value_helper.rb +117 -0
- data/lib/worlddb/matcher.rb +99 -135
- data/lib/worlddb/matcher_adm.rb +82 -0
- data/lib/worlddb/models/city.rb +30 -208
- data/lib/worlddb/models/city_base.rb +220 -0
- data/lib/worlddb/models/continent.rb +9 -0
- data/lib/worlddb/models/country.rb +21 -4
- data/lib/worlddb/models/forward.rb +25 -9
- data/lib/worlddb/models/lang.rb +6 -0
- data/lib/worlddb/models/place.rb +1 -1
- data/lib/worlddb/models/state.rb +83 -0
- data/lib/worlddb/models/{region.rb → state_base.rb} +52 -36
- data/lib/worlddb/models/tagdb/tag.rb +1 -1
- data/lib/worlddb/models.rb +11 -8
- data/lib/worlddb/patterns.rb +4 -4
- data/lib/worlddb/reader.rb +68 -39
- data/lib/worlddb/reader_file.rb +36 -3
- data/lib/worlddb/reader_zip.rb +33 -3
- data/lib/worlddb/readers/base.rb +149 -0
- data/lib/worlddb/readers/city.rb +2 -65
- data/lib/worlddb/readers/country.rb +2 -63
- data/lib/worlddb/readers/lang.rb +3 -68
- data/lib/worlddb/readers/state.rb +61 -0
- data/lib/worlddb/readers/state_tree.rb +118 -0
- data/lib/worlddb/readers/usage.rb +2 -65
- data/lib/worlddb/schema.rb +142 -43
- data/lib/worlddb/stats.rb +7 -4
- data/lib/worlddb/tree_reader.rb +97 -0
- data/lib/worlddb/version.rb +2 -2
- data/test/adm/test_fixture_matcher_adm2.rb +73 -0
- data/test/{test_fixture_matcher_adm3.rb → adm/test_fixture_matcher_adm3.rb} +6 -6
- data/test/adm/test_fixture_matcher_tree.rb +52 -0
- data/test/{test_read_adm.rb → adm/test_read_adm.rb} +13 -20
- data/test/adm/test_read_tree.rb +63 -0
- data/test/data/at-austria/2--n-niederoesterreich/counties.txt +6 -4
- data/test/data/at-austria/orte.txt +23 -0
- data/test/data/at-austria/setups/tree.txt +9 -0
- data/test/data/de-deutschland/3--by-bayern/4--oberfranken/counties.txt +14 -13
- data/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt +104 -0
- data/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte_ii.txt +17 -0
- data/test/data/de-deutschland/3--by-bayern/{districts.txt → parts.txt} +1 -1
- data/test/data/de-deutschland/orte.txt +12 -0
- data/test/data/de-deutschland/setups/adm.txt +1 -1
- data/test/data/de-deutschland/setups/tree.txt +9 -0
- data/test/helper.rb +8 -1
- data/test/test_fixture_matchers.rb +9 -10
- data/test/test_fixture_matchers_ii.rb +20 -19
- data/test/test_model_city.rb +26 -9
- data/test/{test_model_comp.rb → test_model_compat.rb} +15 -13
- data/test/test_model_country.rb +1 -1
- data/test/test_model_state.rb +54 -0
- data/test/test_model_states_at.rb +111 -0
- data/test/test_model_states_de.rb +147 -0
- data/test/test_models.rb +10 -3
- data/test/test_parse_city.rb +70 -0
- data/test/test_parse_country.rb +56 -0
- data/test/test_parse_state.rb +46 -0
- data/test/test_state_tree_reader_at.rb +54 -0
- data/test/test_state_tree_reader_de.rb +71 -0
- data/test/test_tree_reader.rb +39 -0
- metadata +50 -22
- data/lib/worlddb/models/city_compat.rb +0 -27
- data/lib/worlddb/models/continent_compat.rb +0 -24
- data/lib/worlddb/models/country_compat.rb +0 -35
- data/lib/worlddb/models/lang_compat.rb +0 -23
- data/lib/worlddb/models/region_compat.rb +0 -26
- data/lib/worlddb/readers/region.rb +0 -79
- data/test/test_fixture_matcher_adm2.rb +0 -62
- data/test/test_model_region.rb +0 -50
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fe2b1dd265c24760835eeefbd445c3a8302eb554
|
4
|
+
data.tar.gz: 35aeaaac8672937b8388825aa4c36cc11ce4651b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 60c27ed1d0fceebca6edef19c0a50a542dafdbd66b71bdf9ee0e84aa641de7569d856b9fae697ffb844428ee24b72c505e663dae1bd9b4bc8d8b54f66e9bef9c
|
7
|
+
data.tar.gz: 6acf8fcfe52845533582c7dbd010f17feb834beb0110c278e2cc72607d7240be894bff150facf73749f9f5419685e8216d05db6732c351fb44df209af52aec8f
|
data/Manifest.txt
CHANGED
@@ -3,22 +3,21 @@ Manifest.txt
|
|
3
3
|
README.md
|
4
4
|
Rakefile
|
5
5
|
lib/worlddb/deleter.rb
|
6
|
+
lib/worlddb/helpers/value_helper.rb
|
6
7
|
lib/worlddb/matcher.rb
|
8
|
+
lib/worlddb/matcher_adm.rb
|
7
9
|
lib/worlddb/models.rb
|
8
10
|
lib/worlddb/models/city.rb
|
9
|
-
lib/worlddb/models/
|
11
|
+
lib/worlddb/models/city_base.rb
|
10
12
|
lib/worlddb/models/continent.rb
|
11
|
-
lib/worlddb/models/continent_compat.rb
|
12
13
|
lib/worlddb/models/country.rb
|
13
14
|
lib/worlddb/models/country_code.rb
|
14
|
-
lib/worlddb/models/country_compat.rb
|
15
15
|
lib/worlddb/models/forward.rb
|
16
16
|
lib/worlddb/models/lang.rb
|
17
|
-
lib/worlddb/models/lang_compat.rb
|
18
17
|
lib/worlddb/models/name.rb
|
19
18
|
lib/worlddb/models/place.rb
|
20
|
-
lib/worlddb/models/
|
21
|
-
lib/worlddb/models/
|
19
|
+
lib/worlddb/models/state.rb
|
20
|
+
lib/worlddb/models/state_base.rb
|
22
21
|
lib/worlddb/models/tagdb/tag.rb
|
23
22
|
lib/worlddb/models/tagdb/tagging.rb
|
24
23
|
lib/worlddb/models/usage.rb
|
@@ -26,32 +25,51 @@ lib/worlddb/patterns.rb
|
|
26
25
|
lib/worlddb/reader.rb
|
27
26
|
lib/worlddb/reader_file.rb
|
28
27
|
lib/worlddb/reader_zip.rb
|
28
|
+
lib/worlddb/readers/base.rb
|
29
29
|
lib/worlddb/readers/city.rb
|
30
30
|
lib/worlddb/readers/country.rb
|
31
31
|
lib/worlddb/readers/lang.rb
|
32
|
-
lib/worlddb/readers/
|
32
|
+
lib/worlddb/readers/state.rb
|
33
|
+
lib/worlddb/readers/state_tree.rb
|
33
34
|
lib/worlddb/readers/usage.rb
|
34
35
|
lib/worlddb/schema.rb
|
35
36
|
lib/worlddb/stats.rb
|
37
|
+
lib/worlddb/tree_reader.rb
|
36
38
|
lib/worlddb/version.rb
|
39
|
+
test/adm/test_fixture_matcher_adm2.rb
|
40
|
+
test/adm/test_fixture_matcher_adm3.rb
|
41
|
+
test/adm/test_fixture_matcher_tree.rb
|
42
|
+
test/adm/test_read_adm.rb
|
43
|
+
test/adm/test_read_tree.rb
|
37
44
|
test/data/at-austria/1--b-burgenland/counties.txt
|
38
45
|
test/data/at-austria/2--n-niederoesterreich/counties.txt
|
39
46
|
test/data/at-austria/3--w-wien/counties.txt
|
47
|
+
test/data/at-austria/orte.txt
|
40
48
|
test/data/at-austria/setups/adm.txt
|
49
|
+
test/data/at-austria/setups/tree.txt
|
41
50
|
test/data/at-austria/states.txt
|
42
51
|
test/data/de-deutschland/3--by-bayern/1--oberbayern/counties.txt
|
43
52
|
test/data/de-deutschland/3--by-bayern/4--oberfranken/counties.txt
|
44
|
-
test/data/de-deutschland/3--by-bayern/
|
53
|
+
test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt
|
54
|
+
test/data/de-deutschland/3--by-bayern/4--oberfranken/orte_ii.txt
|
55
|
+
test/data/de-deutschland/3--by-bayern/parts.txt
|
56
|
+
test/data/de-deutschland/orte.txt
|
45
57
|
test/data/de-deutschland/setups/adm.txt
|
58
|
+
test/data/de-deutschland/setups/tree.txt
|
46
59
|
test/data/de-deutschland/states.txt
|
47
60
|
test/helper.rb
|
48
|
-
test/test_fixture_matcher_adm2.rb
|
49
|
-
test/test_fixture_matcher_adm3.rb
|
50
61
|
test/test_fixture_matchers.rb
|
51
62
|
test/test_fixture_matchers_ii.rb
|
52
63
|
test/test_model_city.rb
|
53
|
-
test/
|
64
|
+
test/test_model_compat.rb
|
54
65
|
test/test_model_country.rb
|
55
|
-
test/
|
66
|
+
test/test_model_state.rb
|
67
|
+
test/test_model_states_at.rb
|
68
|
+
test/test_model_states_de.rb
|
56
69
|
test/test_models.rb
|
57
|
-
test/
|
70
|
+
test/test_parse_city.rb
|
71
|
+
test/test_parse_country.rb
|
72
|
+
test/test_parse_state.rb
|
73
|
+
test/test_state_tree_reader_at.rb
|
74
|
+
test/test_state_tree_reader_de.rb
|
75
|
+
test/test_tree_reader.rb
|
data/README.md
CHANGED
@@ -30,9 +30,9 @@ Everything is a place.
|
|
30
30
|
at.area
|
31
31
|
# => 83_871
|
32
32
|
|
33
|
-
at.
|
33
|
+
at.states.count
|
34
34
|
# => 9
|
35
|
-
at.
|
35
|
+
at.states
|
36
36
|
# => [ 'Wien', 'Niederösterreich', 'Oberösterreich', ... ]
|
37
37
|
|
38
38
|
at.cities.by_pop
|
@@ -52,9 +52,9 @@ Everything is a place.
|
|
52
52
|
la = City.find_by! key: 'losangeles'
|
53
53
|
la.name
|
54
54
|
# => 'Los Angeles'
|
55
|
-
la.
|
55
|
+
la.state.name
|
56
56
|
# => 'California'
|
57
|
-
la.
|
57
|
+
la.state.key
|
58
58
|
# => 'ca'
|
59
59
|
la.country.name
|
60
60
|
# => 'United States'
|
@@ -73,11 +73,11 @@ Everything is a place.
|
|
73
73
|
# => ['Austria, 'Belgium', 'Cyprus', ... ]
|
74
74
|
|
75
75
|
flanders = Tag.find_by! key: 'flanders'
|
76
|
-
flanders.
|
76
|
+
flanders.states.count
|
77
77
|
# => 5
|
78
|
-
flanders.
|
78
|
+
flanders.states
|
79
79
|
# => ['Antwerpen', 'Brabant Wallon', 'Limburg', 'Oost-Vlaanderen', 'West-Vlaanderen']
|
80
|
-
flanders.
|
80
|
+
flanders.states.first.country.name
|
81
81
|
# => 'Belgium'
|
82
82
|
|
83
83
|
and so on.
|
data/Rakefile
CHANGED
@@ -17,7 +17,7 @@ Hoe.spec 'worlddb-models' do
|
|
17
17
|
self.extra_deps = [
|
18
18
|
['props', '>= 1.1.2'], # settings / prop(ertie)s / env / INI
|
19
19
|
['logutils', '>= 0.6.1'], # logging
|
20
|
-
['textutils', '>=
|
20
|
+
['textutils', '>= 1.2.2'],
|
21
21
|
|
22
22
|
['tagutils', '>= 0.3.0'], # tags n categories for activerecord
|
23
23
|
['activerecord-utils', '>= 0.2.0'],
|
data/lib/worlddb/deleter.rb
CHANGED
@@ -18,7 +18,12 @@ module WorldDb
|
|
18
18
|
Name.delete_all
|
19
19
|
Place.delete_all
|
20
20
|
City.delete_all
|
21
|
-
|
21
|
+
Metro.delete_all
|
22
|
+
District.delete_all
|
23
|
+
State.delete_all
|
24
|
+
Part.delete_all
|
25
|
+
County.delete_all
|
26
|
+
Muni.delete_all
|
22
27
|
Country.delete_all
|
23
28
|
Continent.delete_all
|
24
29
|
Usage.delete_all
|
@@ -0,0 +1,117 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
## NOTE:
|
5
|
+
## add helpers to textutils(!!) valuehelper
|
6
|
+
## do NOT create worlddb module
|
7
|
+
|
8
|
+
module TextUtils
|
9
|
+
module ValueHelper
|
10
|
+
|
11
|
+
## todo/check: add to pair of matchers??
|
12
|
+
# e.g. match_country and match_country!
|
13
|
+
# - match_country will use find_by_key and match_country will use find_by_key! - why? why not?
|
14
|
+
|
15
|
+
def match_country( value )
|
16
|
+
if value =~ /^country:/ # country:
|
17
|
+
country_key = value[8..-1] # cut off country: prefix
|
18
|
+
country = WorldDb::Model::Country.find_by_key!( country_key )
|
19
|
+
yield( country )
|
20
|
+
true # bingo - match found
|
21
|
+
else
|
22
|
+
false # no match found
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def match_supra( value )
|
27
|
+
if value =~ /^supra:/ # supra:
|
28
|
+
country_key = value[6..-1] # cut off supra: prefix
|
29
|
+
country = WorldDb::Model::Country.find_by_key!( country_key )
|
30
|
+
yield( country )
|
31
|
+
true # bingo - match found
|
32
|
+
else
|
33
|
+
false # no match found
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def match_supra_flag( value ) # supranational (country)
|
38
|
+
if value =~ /^supra$/ # supra(national)
|
39
|
+
yield( true )
|
40
|
+
true # bingo - match found
|
41
|
+
else
|
42
|
+
false # no match found
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
def is_state?( value ) # note: was is_region? (use new name only)
|
48
|
+
# assume state code e.g. TX or N
|
49
|
+
#
|
50
|
+
# fix: allow three letter states too e.g. BRU (brussels)
|
51
|
+
match_result = value =~ /^[A-Z]{1,2}$/
|
52
|
+
# match found if 0,1,2,3 etc or no match if nil
|
53
|
+
# note: return bool e.g. false|true (not 0,1,2,3 etc. and nil)
|
54
|
+
match_result != nil
|
55
|
+
end
|
56
|
+
|
57
|
+
## fix/todo: use match_state_for_country! w/ !!! why? why not?
|
58
|
+
def match_state_for_country( value, country_id ) ## NB: required country_id
|
59
|
+
if value =~ /^state:/ ## state:
|
60
|
+
state_key = value[6..-1] ## cut off state: prefix
|
61
|
+
state = WorldDb::Model::State.find_by_key_and_country_id!( state_key, country_id )
|
62
|
+
yield( state )
|
63
|
+
true # bingo - match found
|
64
|
+
elsif is_state?( value ) ## assume state code e.g. TX or N
|
65
|
+
state = WorldDb::Model::State.find_by_key_and_country_id!( value.downcase, country_id )
|
66
|
+
yield( state )
|
67
|
+
true # bingo - match found
|
68
|
+
else
|
69
|
+
false # no match found
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
def match_city( value ) # NB: might be nil (city not found)
|
75
|
+
if value =~ /^city:/ ## city:
|
76
|
+
city_key = value[5..-1] ## cut off city: prefix
|
77
|
+
city = WorldDb::Model::City.find_by_key( city_key )
|
78
|
+
yield( city ) # NB: might be nil (city not found)
|
79
|
+
true # bingo - match found
|
80
|
+
else
|
81
|
+
false # no match found
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def match_metro( value )
|
86
|
+
if value =~ /^metro:/ ## metro:
|
87
|
+
metro_key = value[6..-1] ## cut off metro: prefix
|
88
|
+
metro = WorldDb::Model::Metro.find_by_key!( metro_key )
|
89
|
+
yield( metro )
|
90
|
+
true # bingo - match found
|
91
|
+
else
|
92
|
+
false # no match found
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def match_metro_flag( value )
|
97
|
+
if value =~ /^metro$/ # metro(politan area)
|
98
|
+
yield( true )
|
99
|
+
true # bingo - match found
|
100
|
+
else
|
101
|
+
false # no match found
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def match_metro_pop( value )
|
106
|
+
if value =~ /^m:/ # m:
|
107
|
+
num = value[2..-1].gsub(/[ _]/, '').to_i # cut off m: prefix; allow space and _ in number
|
108
|
+
yield( num )
|
109
|
+
true # bingo - match found
|
110
|
+
else
|
111
|
+
false # no match found
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
end # module ValueHelper
|
116
|
+
end # module TextUtils
|
117
|
+
|
data/lib/worlddb/matcher.rb
CHANGED
@@ -4,26 +4,88 @@ module WorldDb
|
|
4
4
|
|
5
5
|
module Matcher
|
6
6
|
|
7
|
+
def match_tree_for_country( name, &blk ) ## rename to state_tree ?? why? why not??
|
8
|
+
## match state_tree (for now use orte.txt for austria, deutschland etc.)
|
9
|
+
## todo/fix: add more "generic" names
|
10
|
+
|
11
|
+
simple_match_xxx_for_country( name, 'orte', &blk ) ## note: uses special **simple**_match_xxx_...
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
def match_cities_for_country( name, &blk )
|
16
|
+
## todo: check if there's a better (more ruby way) to pass along code block ??
|
17
|
+
## e.g. try
|
18
|
+
## match_xxx_for_country( name, 'cities') { |country_key| yield(country_key) }
|
19
|
+
|
20
|
+
match_xxx_for_country( name, 'cities', &blk )
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
def match_states_for_country( name, &blk )
|
26
|
+
## todo/fix: remove regions (obsolete) - no longer supported
|
27
|
+
## also try synonyms e.g. old regions (if not match for states)
|
28
|
+
found = match_xxx_for_country( name, 'states', &blk )
|
29
|
+
found = match_xxx_for_country( name, 'regions', &blk ) unless found
|
30
|
+
found
|
31
|
+
end
|
32
|
+
|
33
|
+
def match_states_abbr_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
|
34
|
+
## also try synonyms e.g. old regions (if not match for states)
|
35
|
+
found = match_xxx_for_country( name, 'states\.abbr', &blk )
|
36
|
+
found = match_xxx_for_country( name, 'regions\.abbr', &blk ) unless found
|
37
|
+
found
|
38
|
+
end
|
39
|
+
|
40
|
+
def match_states_iso_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
|
41
|
+
## also try synonyms e.g. old regions (if not match for states)
|
42
|
+
found = match_xxx_for_country( name, 'states\.iso', &blk )
|
43
|
+
found = match_xxx_for_country( name, 'regions\.iso', &blk ) unless found
|
44
|
+
found
|
45
|
+
end
|
46
|
+
|
47
|
+
def match_states_nuts_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
|
48
|
+
## also try synonyms e.g. old regions (if not match for states)
|
49
|
+
found = match_xxx_for_country( name, 'states\.nuts', &blk )
|
50
|
+
found = match_xxx_for_country( name, 'regions\.nuts', &blk ) unless found
|
51
|
+
found
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
def match_countries_for_continent( name )
|
56
|
+
if name =~ /^([a-z][a-z\-_]+[a-z])\/countries/ # e.g. africa/countries or america/countries
|
57
|
+
### NB: continent changed to regions (e.g. middle-east, caribbean, north-america, etc.)
|
58
|
+
## auto-add continent (from folder structure) as tag
|
59
|
+
## fix: allow dash/hyphen/minus in tag
|
60
|
+
continent = $1.dup
|
61
|
+
yield( continent )
|
62
|
+
true
|
63
|
+
else
|
64
|
+
false # no match found
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
private
|
70
|
+
|
7
71
|
# note: returns code as capture
|
8
72
|
WORLD_COUNTRY_CODE_PATTERN = '([a-z]{2,3})'
|
9
73
|
WORLD_COUNTRY_CLASSIC_PATTERN = "#{WORLD_COUNTRY_CODE_PATTERN}-[^\\/]+" ## note: if you use "" need to double escape backslash!!!
|
10
74
|
WORLD_COUNTRY_MODERN_PATTERN = "[0-9]+--#{WORLD_COUNTRY_CODE_PATTERN}-[^\\/]+" ## note: if you use "" need to double escape backslash!!!
|
11
75
|
|
12
76
|
# note: returns code as capture
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
# note: returns name as capture (no code required)
|
18
|
-
WORLD_ADMIN_MODERN_PATTERN = "[0-9]+--([^\\/]+)"
|
77
|
+
WORLD_STATE_CODE_PATTERN = '([a-z]{1,3})'
|
78
|
+
WORLD_STATE_CLASSIC_PATTERN = "#{WORLD_STATE_CODE_PATTERN}-[^\\/]+"
|
79
|
+
WORLD_STATE_MODERN_PATTERN = "[0-9]+--#{WORLD_STATE_CODE_PATTERN}-[^\\/]+"
|
19
80
|
|
20
|
-
|
81
|
+
## allow optional folders -- TODO: add restriction ?? e.g. must be 4+ alphas ???
|
21
82
|
WORLD_OPT_FOLDERS_PATTERN = "(?:\\/[^\\/]+)*" ## check: use double \\ or just \ ??
|
22
83
|
|
23
84
|
|
24
|
-
|
85
|
+
|
86
|
+
def match_xxx_for_country( name, xxx ) # xxx e.g. cities|states|beers|breweries
|
25
87
|
# auto-add required country code (from folder structure)
|
26
|
-
# note: always let
|
88
|
+
# note: always let match_xxx_for_country_n_state go first
|
27
89
|
|
28
90
|
# note: allow /cities and /1--hokkaido--cities
|
29
91
|
xxx_pattern = "(?:#{xxx}|[0-9]+--[^\\/]+?--#{xxx})" # note: double escape \\ required for backslash
|
@@ -48,7 +110,7 @@ module Matcher
|
|
48
110
|
#
|
49
111
|
# (3) classic style: e.g. /at/beers (europe/at/cities)
|
50
112
|
#
|
51
|
-
# (4) new style w/
|
113
|
+
# (4) new style w/ state w/o abbrev/code e.g. /ja-japon/1--hokkaido/cities
|
52
114
|
#
|
53
115
|
# (5) compact style (country part of filename):
|
54
116
|
# e.g. /at-austria--cities or /europe/at-austria--cities
|
@@ -57,12 +119,27 @@ module Matcher
|
|
57
119
|
end
|
58
120
|
end
|
59
121
|
|
122
|
+
def simple_match_xxx_for_country( name, xxx )
|
123
|
+
xxx_pattern = "(?:#{xxx})" ## just xxx for now
|
124
|
+
|
125
|
+
## used for state tree (e.g. orte.txt)
|
126
|
+
if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ ||
|
127
|
+
name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/
|
128
|
+
|
129
|
+
country_key = $1.dup
|
130
|
+
yield( country_key )
|
131
|
+
true # bingo - match found
|
132
|
+
else
|
133
|
+
false # no match found
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
60
137
|
|
61
|
-
def
|
138
|
+
def match_xxx_for_country_n_state( name, xxx ) # xxx e.g. wine|wineries
|
62
139
|
|
63
|
-
# auto-add required country n
|
140
|
+
# auto-add required country n state code (from folder structure)
|
64
141
|
|
65
|
-
## -- allow opt_folders after long
|
142
|
+
## -- allow opt_folders after long states (e.g. additional substate/zone)
|
66
143
|
## -- allow anything (prefixes) before -- for xxx
|
67
144
|
# e.g. at-austria!/1--n-niederoesterreich--eastern/wagram--wines
|
68
145
|
# at-austria!/1--n-niederoesterreich--eastern/wagram--wagram--wines
|
@@ -72,12 +149,12 @@ module Matcher
|
|
72
149
|
oldoldold_xxx_pattern = "(?:#{xxx}|[^\\/]+--#{xxx})"
|
73
150
|
xxx_pattern = "(?:#{xxx}|[^\\/]+#{xxx})" # note: double escape \\ required for backslash
|
74
151
|
|
75
|
-
## note: for now only (style #2) n (style #3) that is long
|
152
|
+
## note: for now only (style #2) n (style #3) that is long state allow opt folders
|
76
153
|
|
77
|
-
if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{
|
78
|
-
name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}\/#{
|
79
|
-
name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{
|
80
|
-
name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}\/#{
|
154
|
+
if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_CLASSIC_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ || # (1)
|
155
|
+
name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}\/#{WORLD_STATE_MODERN_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ || # (2)
|
156
|
+
name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_MODERN_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ || # (3)
|
157
|
+
name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}\/#{WORLD_STATE_CLASSIC_PATTERN}\/#{xxx_pattern}/ # (4)
|
81
158
|
|
82
159
|
#######
|
83
160
|
# nb: country must start name (^) or coming after / e.g. europe/at-austria/...
|
@@ -89,134 +166,21 @@ module Matcher
|
|
89
166
|
#
|
90
167
|
# (3)
|
91
168
|
# new new mixed style e.g. /at-austria/1--w-wien--eastern/cities
|
92
|
-
# "classic" country plus new new
|
169
|
+
# "classic" country plus new new state
|
93
170
|
#
|
94
171
|
# (4)
|
95
172
|
# new new mixed style e.g. /1--at-austria--central/w-wien/cities
|
96
|
-
# new new country plus "classic"
|
97
|
-
|
98
|
-
country_key = $1.dup
|
99
|
-
region_key = $2.dup
|
100
|
-
yield( country_key, region_key )
|
101
|
-
true # bingo - match found
|
102
|
-
else
|
103
|
-
false # no match found
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
def match_xxx_for_country_n_adm1( name, xxx ) # xxx e.g. districts|counties|etc.
|
110
|
-
|
111
|
-
# auto-add required country n regions (from folder structure)
|
112
|
-
#
|
113
|
-
# e.g. de-deutschland!/3--by-bayern/districts (regierungsbezirke)
|
114
|
-
# europe/de-deutschland!/3--by-bayern/districts
|
115
|
-
#
|
116
|
-
# at-austria!/1--n-niederoesterreich/counties (bezirke)
|
117
|
-
|
118
|
-
xxx_pattern = "#{xxx}"
|
119
|
-
|
120
|
-
if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_MODERN_PATTERN}\/#{xxx_pattern}/ ||
|
121
|
-
name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_CLASSIC_PATTERN}\/#{xxx_pattern}/
|
122
|
-
|
123
|
-
country_key = $1.dup
|
124
|
-
region_key = $2.dup
|
125
|
-
yield( country_key, region_key )
|
126
|
-
true # bingo - match found
|
127
|
-
else
|
128
|
-
false # no match found
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
|
-
|
133
|
-
def match_xxx_for_country_n_adm1_n_adm2( name, xxx ) # xxx e.g. districts|counties|etc.
|
134
|
-
|
135
|
-
# auto-add required country n regions (from folder structure)
|
136
|
-
#
|
137
|
-
# e.g. de-deutschland!/3--by-bayern/4--oberfranken/counties (landkreise)
|
138
|
-
# europe/de-deutschland!/3--by-bayern/4--oberfranken/counties
|
139
|
-
|
140
|
-
xxx_pattern = "#{xxx}"
|
141
|
-
|
142
|
-
if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_MODERN_PATTERN}\/#{WORLD_ADMIN_MODERN_PATTERN}\/#{xxx_pattern}/ ||
|
143
|
-
name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_CLASSIC_PATTERN}\/#{WORLD_ADMIN_MODERN_PATTERN}\/#{xxx_pattern}/
|
173
|
+
# new new country plus "classic" state
|
144
174
|
|
145
175
|
country_key = $1.dup
|
146
|
-
|
147
|
-
|
148
|
-
yield( country_key, region_key, adm2 )
|
176
|
+
state_key = $2.dup
|
177
|
+
yield( country_key, state_key )
|
149
178
|
true # bingo - match found
|
150
179
|
else
|
151
180
|
false # no match found
|
152
181
|
end
|
153
182
|
end
|
154
183
|
|
155
|
-
|
156
|
-
def match_adm2_for_country( name, &blk )
|
157
|
-
## note: also try synonyms e.g. districts|counties
|
158
|
-
## note: counties might also be an adm3 match
|
159
|
-
found = match_xxx_for_country_n_adm1( name, 'districts', &blk )
|
160
|
-
found = match_xxx_for_country_n_adm1( name, 'counties', &blk ) unless found
|
161
|
-
found
|
162
|
-
end
|
163
|
-
|
164
|
-
def match_adm3_for_country( name, &blk )
|
165
|
-
match_xxx_for_country_n_adm1_n_adm2( name, 'counties', &blk )
|
166
|
-
end
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
def match_cities_for_country( name, &blk )
|
171
|
-
## todo: check if there's a better (more ruby way) to pass along code block ??
|
172
|
-
## e.g. try
|
173
|
-
## match_xxx_for_country( name, 'cities') { |country_key| yield(country_key) }
|
174
|
-
|
175
|
-
match_xxx_for_country( name, 'cities', &blk )
|
176
|
-
end
|
177
|
-
|
178
|
-
def match_regions_for_country( name, &blk )
|
179
|
-
## also try synonyms e.g. old regions (if not match for states)
|
180
|
-
found = match_xxx_for_country( name, 'states', &blk )
|
181
|
-
found = match_xxx_for_country( name, 'regions', &blk ) unless found
|
182
|
-
found
|
183
|
-
end
|
184
|
-
|
185
|
-
def match_regions_abbr_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
|
186
|
-
## also try synonyms e.g. old regions (if not match for states)
|
187
|
-
found = match_xxx_for_country( name, 'states\.abbr', &blk )
|
188
|
-
found = match_xxx_for_country( name, 'regions\.abbr', &blk ) unless found
|
189
|
-
found
|
190
|
-
end
|
191
|
-
|
192
|
-
def match_regions_iso_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
|
193
|
-
## also try synonyms e.g. old regions (if not match for states)
|
194
|
-
found = match_xxx_for_country( name, 'states\.iso', &blk )
|
195
|
-
found = match_xxx_for_country( name, 'regions\.iso', &blk ) unless found
|
196
|
-
found
|
197
|
-
end
|
198
|
-
|
199
|
-
def match_regions_nuts_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
|
200
|
-
## also try synonyms e.g. old regions (if not match for states)
|
201
|
-
found = match_xxx_for_country( name, 'states\.nuts', &blk )
|
202
|
-
found = match_xxx_for_country( name, 'regions\.nuts', &blk ) unless found
|
203
|
-
found
|
204
|
-
end
|
205
|
-
|
206
|
-
|
207
|
-
def match_countries_for_continent( name )
|
208
|
-
if name =~ /^([a-z][a-z\-_]+[a-z])\/countries/ # e.g. africa/countries or america/countries
|
209
|
-
### NB: continent changed to regions (e.g. middle-east, caribbean, north-america, etc.)
|
210
|
-
## auto-add continent (from folder structure) as tag
|
211
|
-
## fix: allow dash/hyphen/minus in tag
|
212
|
-
continent = $1.dup
|
213
|
-
yield( continent )
|
214
|
-
true
|
215
|
-
else
|
216
|
-
false # no match found
|
217
|
-
end
|
218
|
-
end
|
219
|
-
|
220
184
|
end # module Matcher
|
221
185
|
|
222
186
|
end # module WorldDb
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
####
|
4
|
+
## matchers for adm2,adm3,etc
|
5
|
+
## e.g. parts (regierungsbezirke)
|
6
|
+
## counties (kreise,bezirke)
|
7
|
+
|
8
|
+
module WorldDb
|
9
|
+
|
10
|
+
module Matcher
|
11
|
+
|
12
|
+
def match_adm2_parts_for_country( name, &blk )
|
13
|
+
match_xxx_for_country_n_adm1( name, 'parts', &blk )
|
14
|
+
end
|
15
|
+
|
16
|
+
def match_adm2_counties_for_country( name, &blk )
|
17
|
+
## note: counties might also be an adm3 match
|
18
|
+
match_xxx_for_country_n_adm1( name, 'counties', &blk )
|
19
|
+
end
|
20
|
+
|
21
|
+
def match_adm3_counties_for_country( name, &blk )
|
22
|
+
match_xxx_for_country_n_adm1_n_adm2( name, 'counties', &blk )
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
private
|
27
|
+
# note: returns name as capture (no code required)
|
28
|
+
WORLD_ADMIN_MODERN_PATTERN = "[0-9]+--([^\\/]+)"
|
29
|
+
|
30
|
+
###
|
31
|
+
## todo/check: adm1 => state
|
32
|
+
## why use adm1 and not state ?? duplicate of match___country_n_state ??
|
33
|
+
def match_xxx_for_country_n_adm1( name, xxx ) # xxx e.g. parts|counties|etc.
|
34
|
+
|
35
|
+
# auto-add required country n states (from folder structure)
|
36
|
+
#
|
37
|
+
# e.g. de-deutschland!/3--by-bayern/parts (regierungsbezirke)
|
38
|
+
# europe/de-deutschland!/3--by-bayern/parts
|
39
|
+
#
|
40
|
+
# at-austria!/1--n-niederoesterreich/counties (bezirke)
|
41
|
+
|
42
|
+
xxx_pattern = "#{xxx}"
|
43
|
+
|
44
|
+
if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_MODERN_PATTERN}\/#{xxx_pattern}/ ||
|
45
|
+
name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_CLASSIC_PATTERN}\/#{xxx_pattern}/
|
46
|
+
|
47
|
+
country_key = $1.dup
|
48
|
+
state_key = $2.dup
|
49
|
+
yield( country_key, state_key )
|
50
|
+
true # bingo - match found
|
51
|
+
else
|
52
|
+
false # no match found
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
def match_xxx_for_country_n_adm1_n_adm2( name, xxx ) # xxx e.g. parts|counties|etc.
|
58
|
+
|
59
|
+
# auto-add required country n states (from folder structure)
|
60
|
+
#
|
61
|
+
# e.g. de-deutschland!/3--by-bayern/4--oberfranken/counties (landkreise)
|
62
|
+
# europe/de-deutschland!/3--by-bayern/4--oberfranken/counties
|
63
|
+
|
64
|
+
xxx_pattern = "#{xxx}"
|
65
|
+
|
66
|
+
if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_MODERN_PATTERN}\/#{WORLD_ADMIN_MODERN_PATTERN}\/#{xxx_pattern}/ ||
|
67
|
+
name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_STATE_CLASSIC_PATTERN}\/#{WORLD_ADMIN_MODERN_PATTERN}\/#{xxx_pattern}/
|
68
|
+
|
69
|
+
country_key = $1.dup
|
70
|
+
state_key = $2.dup
|
71
|
+
adm2 = $3.dup # lowercase name e.g. oberfranken, oberbayern, etc.
|
72
|
+
yield( country_key, state_key, adm2 )
|
73
|
+
true # bingo - match found
|
74
|
+
else
|
75
|
+
false # no match found
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
end # module Matcher
|
80
|
+
|
81
|
+
end # module WorldDb
|
82
|
+
|