worlddb-models 2.2.2 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +31 -13
- data/README.md +7 -7
- data/Rakefile +1 -1
- data/lib/worlddb/deleter.rb +6 -1
- data/lib/worlddb/helpers/value_helper.rb +117 -0
- data/lib/worlddb/matcher.rb +99 -135
- data/lib/worlddb/matcher_adm.rb +82 -0
- data/lib/worlddb/models/city.rb +30 -208
- data/lib/worlddb/models/city_base.rb +220 -0
- data/lib/worlddb/models/continent.rb +9 -0
- data/lib/worlddb/models/country.rb +21 -4
- data/lib/worlddb/models/forward.rb +25 -9
- data/lib/worlddb/models/lang.rb +6 -0
- data/lib/worlddb/models/place.rb +1 -1
- data/lib/worlddb/models/state.rb +83 -0
- data/lib/worlddb/models/{region.rb → state_base.rb} +52 -36
- data/lib/worlddb/models/tagdb/tag.rb +1 -1
- data/lib/worlddb/models.rb +11 -8
- data/lib/worlddb/patterns.rb +4 -4
- data/lib/worlddb/reader.rb +68 -39
- data/lib/worlddb/reader_file.rb +36 -3
- data/lib/worlddb/reader_zip.rb +33 -3
- data/lib/worlddb/readers/base.rb +149 -0
- data/lib/worlddb/readers/city.rb +2 -65
- data/lib/worlddb/readers/country.rb +2 -63
- data/lib/worlddb/readers/lang.rb +3 -68
- data/lib/worlddb/readers/state.rb +61 -0
- data/lib/worlddb/readers/state_tree.rb +118 -0
- data/lib/worlddb/readers/usage.rb +2 -65
- data/lib/worlddb/schema.rb +142 -43
- data/lib/worlddb/stats.rb +7 -4
- data/lib/worlddb/tree_reader.rb +97 -0
- data/lib/worlddb/version.rb +2 -2
- data/test/adm/test_fixture_matcher_adm2.rb +73 -0
- data/test/{test_fixture_matcher_adm3.rb → adm/test_fixture_matcher_adm3.rb} +6 -6
- data/test/adm/test_fixture_matcher_tree.rb +52 -0
- data/test/{test_read_adm.rb → adm/test_read_adm.rb} +13 -20
- data/test/adm/test_read_tree.rb +63 -0
- data/test/data/at-austria/2--n-niederoesterreich/counties.txt +6 -4
- data/test/data/at-austria/orte.txt +23 -0
- data/test/data/at-austria/setups/tree.txt +9 -0
- data/test/data/de-deutschland/3--by-bayern/4--oberfranken/counties.txt +14 -13
- data/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt +104 -0
- data/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte_ii.txt +17 -0
- data/test/data/de-deutschland/3--by-bayern/{districts.txt → parts.txt} +1 -1
- data/test/data/de-deutschland/orte.txt +12 -0
- data/test/data/de-deutschland/setups/adm.txt +1 -1
- data/test/data/de-deutschland/setups/tree.txt +9 -0
- data/test/helper.rb +8 -1
- data/test/test_fixture_matchers.rb +9 -10
- data/test/test_fixture_matchers_ii.rb +20 -19
- data/test/test_model_city.rb +26 -9
- data/test/{test_model_comp.rb → test_model_compat.rb} +15 -13
- data/test/test_model_country.rb +1 -1
- data/test/test_model_state.rb +54 -0
- data/test/test_model_states_at.rb +111 -0
- data/test/test_model_states_de.rb +147 -0
- data/test/test_models.rb +10 -3
- data/test/test_parse_city.rb +70 -0
- data/test/test_parse_country.rb +56 -0
- data/test/test_parse_state.rb +46 -0
- data/test/test_state_tree_reader_at.rb +54 -0
- data/test/test_state_tree_reader_de.rb +71 -0
- data/test/test_tree_reader.rb +39 -0
- metadata +50 -22
- data/lib/worlddb/models/city_compat.rb +0 -27
- data/lib/worlddb/models/continent_compat.rb +0 -24
- data/lib/worlddb/models/country_compat.rb +0 -35
- data/lib/worlddb/models/lang_compat.rb +0 -23
- data/lib/worlddb/models/region_compat.rb +0 -26
- data/lib/worlddb/readers/region.rb +0 -79
- data/test/test_fixture_matcher_adm2.rb +0 -62
- data/test/test_model_region.rb +0 -50
@@ -0,0 +1,149 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module WorldDb
|
4
|
+
|
5
|
+
|
6
|
+
###
|
7
|
+
# todo/fix:
|
8
|
+
# try to merge ReaderBaseWithMoreAttribs and ReaderBaseWithOpts into one base - why? why not?
|
9
|
+
|
10
|
+
|
11
|
+
class ReaderBaseWithMoreAttribs
|
12
|
+
|
13
|
+
include LogUtils::Logging
|
14
|
+
|
15
|
+
## make models available by default with namespace
|
16
|
+
# e.g. lets you use Usage instead of Model::Usage
|
17
|
+
include Models
|
18
|
+
|
19
|
+
## value helpers e.g. is_year?, is_taglist? etc.
|
20
|
+
include TextUtils::ValueHelper
|
21
|
+
|
22
|
+
|
23
|
+
def self.from_zip( zip_file, entry_path, more_attribs={} )
|
24
|
+
## get text content from zip
|
25
|
+
|
26
|
+
entry = zip_file.find_entry( entry_path )
|
27
|
+
|
28
|
+
## todo/fix: add force encoding to utf-8 ??
|
29
|
+
## check!!!
|
30
|
+
## clean/prepprocess lines
|
31
|
+
## e.g. CR/LF (/r/n) to LF (e.g. /n)
|
32
|
+
text = entry.get_input_stream().read()
|
33
|
+
|
34
|
+
## NOTE: needs logger ref; only available in instance methods; use global logger for now
|
35
|
+
logger = LogUtils::Logger.root
|
36
|
+
logger.debug "text.encoding.name (before): #{text.encoding.name}"
|
37
|
+
#####
|
38
|
+
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
39
|
+
## NB:
|
40
|
+
# for now "hardcoded" to utf8 - what else can we do?
|
41
|
+
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
42
|
+
text = text.force_encoding( Encoding::UTF_8 )
|
43
|
+
logger.debug "text.encoding.name (after): #{text.encoding.name}"
|
44
|
+
|
45
|
+
## todo:
|
46
|
+
# NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
|
47
|
+
## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
|
48
|
+
|
49
|
+
self.from_string( text, more_attribs )
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.from_file( path, more_attribs={} )
|
53
|
+
## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
|
54
|
+
## - see textutils/utils.rb
|
55
|
+
text = File.read_utf8( path )
|
56
|
+
self.from_string( text, more_attribs )
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.from_string( text, more_attribs={} )
|
60
|
+
puts "[debug] ReaderBase.from_string calling #{self.name}.new" # note: assume self is derived class (object)
|
61
|
+
self.new( text, more_attribs )
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
def skip_tags?() @skip_tags == true; end
|
66
|
+
def strict?() @strict == true; end
|
67
|
+
|
68
|
+
def initialize( text, more_attribs={} )
|
69
|
+
## todo/fix: how to add opts={} ???
|
70
|
+
|
71
|
+
@text = text
|
72
|
+
@more_attribs = more_attribs
|
73
|
+
end
|
74
|
+
|
75
|
+
end # class ReaderBaseWithMoreAttribs
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
class ReaderBaseWithOpts
|
80
|
+
|
81
|
+
include LogUtils::Logging
|
82
|
+
|
83
|
+
## make models available by default with namespace
|
84
|
+
# e.g. lets you use Usage instead of Model::Usage
|
85
|
+
include Models
|
86
|
+
|
87
|
+
## value helpers e.g. is_year?, is_taglist? etc.
|
88
|
+
include TextUtils::ValueHelper
|
89
|
+
|
90
|
+
|
91
|
+
## todo: add opts={} etc.
|
92
|
+
def self.from_zip( zip_file, entry_path )
|
93
|
+
## get text content from zip
|
94
|
+
|
95
|
+
entry = zip_file.find_entry( entry_path )
|
96
|
+
|
97
|
+
## todo/fix: add force encoding to utf-8 ??
|
98
|
+
## check!!!
|
99
|
+
## clean/prepprocess lines
|
100
|
+
## e.g. CR/LF (/r/n) to LF (e.g. /n)
|
101
|
+
text = entry.get_input_stream().read()
|
102
|
+
|
103
|
+
## NOTE: needs logger ref; only available in instance methods; use global logger for now
|
104
|
+
logger = LogUtils::Logger.root
|
105
|
+
logger.debug "text.encoding.name (before): #{text.encoding.name}"
|
106
|
+
#####
|
107
|
+
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
108
|
+
## NB:
|
109
|
+
# for now "hardcoded" to utf8 - what else can we do?
|
110
|
+
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
111
|
+
text = text.force_encoding( Encoding::UTF_8 )
|
112
|
+
logger.debug "text.encoding.name (after): #{text.encoding.name}"
|
113
|
+
|
114
|
+
## todo:
|
115
|
+
# NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
|
116
|
+
## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
|
117
|
+
|
118
|
+
self.from_string( text )
|
119
|
+
end
|
120
|
+
|
121
|
+
def self.from_file( path, opts={} )
|
122
|
+
## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
|
123
|
+
## - see textutils/utils.rb
|
124
|
+
text = File.read_utf8( path )
|
125
|
+
self.from_string( text, opts )
|
126
|
+
end
|
127
|
+
|
128
|
+
def self.from_string( text, opts={} )
|
129
|
+
puts "[debug] ReaderBase.from_string calling #{self.name}.new" # note: assume self is derived class (object)
|
130
|
+
self.new( text, opts )
|
131
|
+
end
|
132
|
+
|
133
|
+
|
134
|
+
def skip_tags?() @skip_tags == true; end
|
135
|
+
def strict?() @strict == true; end
|
136
|
+
|
137
|
+
def initialize( text, opts={} )
|
138
|
+
@text = text
|
139
|
+
|
140
|
+
## option: do NOT generate/add any tags for countries/regions/cities
|
141
|
+
@skip_tags = opts[:skip_tags].present? ? true : false
|
142
|
+
## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
|
143
|
+
@strict = opts[:strict].present? ? true : false
|
144
|
+
end
|
145
|
+
|
146
|
+
end # class ReaderBaseWithOpts
|
147
|
+
|
148
|
+
|
149
|
+
end # module WorldDb
|
data/lib/worlddb/readers/city.rb
CHANGED
@@ -2,72 +2,9 @@
|
|
2
2
|
|
3
3
|
module WorldDb
|
4
4
|
|
5
|
-
class CityReader
|
5
|
+
class CityReader < ReaderBaseWithMoreAttribs
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
## make models available by default with namespace
|
10
|
-
# e.g. lets you use Usage instead of Model::Usage
|
11
|
-
include Models
|
12
|
-
|
13
|
-
## value helpers e.g. is_year?, is_taglist? etc.
|
14
|
-
include TextUtils::ValueHelper
|
15
|
-
|
16
|
-
|
17
|
-
def self.from_zip( zip_file, entry_path, more_attribs={} )
|
18
|
-
## get text content from zip
|
19
|
-
|
20
|
-
entry = zip_file.find_entry( entry_path )
|
21
|
-
|
22
|
-
## todo/fix: add force encoding to utf-8 ??
|
23
|
-
## check!!!
|
24
|
-
## clean/prepprocess lines
|
25
|
-
## e.g. CR/LF (/r/n) to LF (e.g. /n)
|
26
|
-
text = entry.get_input_stream().read()
|
27
|
-
|
28
|
-
## NOTE: needs logger ref; only available in instance methods; use global logger for now
|
29
|
-
logger = LogUtils::Logger.root
|
30
|
-
logger.debug "text.encoding.name (before): #{text.encoding.name}"
|
31
|
-
#####
|
32
|
-
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
33
|
-
## NB:
|
34
|
-
# for now "hardcoded" to utf8 - what else can we do?
|
35
|
-
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
36
|
-
text = text.force_encoding( Encoding::UTF_8 )
|
37
|
-
logger.debug "text.encoding.name (after): #{text.encoding.name}"
|
38
|
-
|
39
|
-
## todo:
|
40
|
-
# NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
|
41
|
-
## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
|
42
|
-
|
43
|
-
self.from_string( text, more_attribs )
|
44
|
-
end
|
45
|
-
|
46
|
-
|
47
|
-
def self.from_file( path, more_attribs={} )
|
48
|
-
## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
|
49
|
-
## - see textutils/utils.rb
|
50
|
-
text = File.read_utf8( path )
|
51
|
-
self.from_string( text, more_attribs )
|
52
|
-
end
|
53
|
-
|
54
|
-
def self.from_string( text, more_attribs={} )
|
55
|
-
CityReader.new( text, more_attribs )
|
56
|
-
end
|
57
|
-
|
58
|
-
|
59
|
-
def skip_tags?() @skip_tags == true; end
|
60
|
-
def strict?() @strict == true; end
|
61
|
-
|
62
|
-
def initialize( text, more_attribs={} )
|
63
|
-
## todo/fix: how to add opts={} ???
|
64
|
-
|
65
|
-
@text = text
|
66
|
-
@more_attribs = more_attribs
|
67
|
-
end
|
68
|
-
|
69
|
-
|
70
|
-
def read()
|
7
|
+
def read
|
71
8
|
reader = ValuesReader.from_string( @text, @more_attribs )
|
72
9
|
|
73
10
|
reader.each_line do |attribs, values|
|
@@ -2,70 +2,9 @@
|
|
2
2
|
|
3
3
|
module WorldDb
|
4
4
|
|
5
|
-
class CountryReader
|
5
|
+
class CountryReader < ReaderBaseWithMoreAttribs
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
## make models available by default with namespace
|
10
|
-
# e.g. lets you use Usage instead of Model::Usage
|
11
|
-
include Models
|
12
|
-
|
13
|
-
## value helpers e.g. is_year?, is_taglist? etc.
|
14
|
-
include TextUtils::ValueHelper
|
15
|
-
|
16
|
-
|
17
|
-
def self.from_zip( zip_file, entry_path, more_attribs={} )
|
18
|
-
## get text content from zip
|
19
|
-
|
20
|
-
entry = zip_file.find_entry( entry_path )
|
21
|
-
|
22
|
-
## todo/fix: add force encoding to utf-8 ??
|
23
|
-
## check!!!
|
24
|
-
## clean/prepprocess lines
|
25
|
-
## e.g. CR/LF (/r/n) to LF (e.g. /n)
|
26
|
-
text = entry.get_input_stream().read()
|
27
|
-
|
28
|
-
## NOTE: needs logger ref; only available in instance methods; use global logger for now
|
29
|
-
logger = LogUtils::Logger.root
|
30
|
-
logger.debug "text.encoding.name (before): #{text.encoding.name}"
|
31
|
-
#####
|
32
|
-
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
33
|
-
## NB:
|
34
|
-
# for now "hardcoded" to utf8 - what else can we do?
|
35
|
-
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
36
|
-
text = text.force_encoding( Encoding::UTF_8 )
|
37
|
-
logger.debug "text.encoding.name (after): #{text.encoding.name}"
|
38
|
-
|
39
|
-
## todo:
|
40
|
-
# NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
|
41
|
-
## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
|
42
|
-
|
43
|
-
self.from_string( text, more_attribs )
|
44
|
-
end
|
45
|
-
|
46
|
-
def self.from_file( path, more_attribs={} )
|
47
|
-
## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
|
48
|
-
## - see textutils/utils.rb
|
49
|
-
text = File.read_utf8( path )
|
50
|
-
self.from_string( text, more_attribs )
|
51
|
-
end
|
52
|
-
|
53
|
-
def self.from_string( text, more_attribs={} )
|
54
|
-
CountryReader.new( text, more_attribs )
|
55
|
-
end
|
56
|
-
|
57
|
-
|
58
|
-
def skip_tags?() @skip_tags == true; end
|
59
|
-
def strict?() @strict == true; end
|
60
|
-
|
61
|
-
def initialize( text, more_attribs={} )
|
62
|
-
## todo/fix: how to add opts={} ???
|
63
|
-
|
64
|
-
@text = text
|
65
|
-
@more_attribs = more_attribs
|
66
|
-
end
|
67
|
-
|
68
|
-
def read()
|
7
|
+
def read
|
69
8
|
reader = ValuesReader.from_string( @text, @more_attribs )
|
70
9
|
|
71
10
|
reader.each_line do |attribs, values|
|
data/lib/worlddb/readers/lang.rb
CHANGED
@@ -2,73 +2,9 @@
|
|
2
2
|
|
3
3
|
module WorldDb
|
4
4
|
|
5
|
-
class LangReader
|
5
|
+
class LangReader < ReaderBaseWithOpts
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
## make models available by default with namespace
|
10
|
-
# e.g. lets you use Usage instead of Model::Usage
|
11
|
-
include Models
|
12
|
-
|
13
|
-
## value helpers e.g. is_year?, is_taglist? etc.
|
14
|
-
include TextUtils::ValueHelper
|
15
|
-
|
16
|
-
|
17
|
-
## todo: add opts={} etc.
|
18
|
-
def self.from_zip( zip_file, entry_path )
|
19
|
-
## get text content from zip
|
20
|
-
|
21
|
-
entry = zip_file.find_entry( entry_path )
|
22
|
-
|
23
|
-
## todo/fix: add force encoding to utf-8 ??
|
24
|
-
## check!!!
|
25
|
-
## clean/prepprocess lines
|
26
|
-
## e.g. CR/LF (/r/n) to LF (e.g. /n)
|
27
|
-
text = entry.get_input_stream().read()
|
28
|
-
|
29
|
-
## NOTE: needs logger ref; only available in instance methods; use global logger for now
|
30
|
-
logger = LogUtils::Logger.root
|
31
|
-
logger.debug "text.encoding.name (before): #{text.encoding.name}"
|
32
|
-
#####
|
33
|
-
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
34
|
-
## NB:
|
35
|
-
# for now "hardcoded" to utf8 - what else can we do?
|
36
|
-
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
37
|
-
text = text.force_encoding( Encoding::UTF_8 )
|
38
|
-
logger.debug "text.encoding.name (after): #{text.encoding.name}"
|
39
|
-
|
40
|
-
## todo:
|
41
|
-
# NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
|
42
|
-
## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
|
43
|
-
|
44
|
-
self.from_string( text )
|
45
|
-
end
|
46
|
-
|
47
|
-
def self.from_file( path, opts={} )
|
48
|
-
## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
|
49
|
-
## - see textutils/utils.rb
|
50
|
-
text = File.read_utf8( path )
|
51
|
-
self.from_string( text, opts )
|
52
|
-
end
|
53
|
-
|
54
|
-
def self.from_string( text, opts={} )
|
55
|
-
LangReader.new( text, opts )
|
56
|
-
end
|
57
|
-
|
58
|
-
|
59
|
-
def skip_tags?() @skip_tags == true; end
|
60
|
-
def strict?() @strict == true; end
|
61
|
-
|
62
|
-
def initialize( text, opts={} )
|
63
|
-
@text = text
|
64
|
-
|
65
|
-
## option: do NOT generate/add any tags for countries/regions/cities
|
66
|
-
@skip_tags = opts[:skip_tags].present? ? true : false
|
67
|
-
## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
|
68
|
-
@strict = opts[:strict].present? ? true : false
|
69
|
-
end
|
70
|
-
|
71
|
-
def read()
|
7
|
+
def read
|
72
8
|
reader = HashReader.from_string( @text )
|
73
9
|
|
74
10
|
reader.each do |key, value|
|
@@ -100,8 +36,7 @@ class LangReader
|
|
100
36
|
lang.update_attributes!( lang_attribs )
|
101
37
|
end # each key,value
|
102
38
|
|
103
|
-
end # method
|
104
|
-
|
39
|
+
end # method read
|
105
40
|
|
106
41
|
end # class LangReader
|
107
42
|
end # module WorldDb
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module WorldDb
|
4
|
+
|
5
|
+
class StateReader < ReaderBaseWithMoreAttribs
|
6
|
+
|
7
|
+
def read
|
8
|
+
reader = ValuesReader.from_string( @text, @more_attribs )
|
9
|
+
|
10
|
+
reader.each_line do |attribs, values|
|
11
|
+
opts = { skip_tags: skip_tags? }
|
12
|
+
State.create_or_update_from_attribs( attribs, values, opts )
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
end # class StateReader
|
17
|
+
|
18
|
+
|
19
|
+
class PartReader < ReaderBaseWithMoreAttribs
|
20
|
+
|
21
|
+
def read
|
22
|
+
reader = ValuesReader.from_string( @text, @more_attribs )
|
23
|
+
|
24
|
+
reader.each_line do |attribs, values|
|
25
|
+
opts = { skip_tags: skip_tags? }
|
26
|
+
Part.create_or_update_from_attribs( attribs, values, opts )
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
end # class PartReader
|
31
|
+
|
32
|
+
|
33
|
+
class CountyReader < ReaderBaseWithMoreAttribs
|
34
|
+
|
35
|
+
def read
|
36
|
+
reader = ValuesReader.from_string( @text, @more_attribs )
|
37
|
+
|
38
|
+
reader.each_line do |attribs, values|
|
39
|
+
opts = { skip_tags: skip_tags? }
|
40
|
+
County.create_or_update_from_attribs( attribs, values, opts )
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end # class CountyReader
|
45
|
+
|
46
|
+
|
47
|
+
class MuniReader < ReaderBaseWithMoreAttribs
|
48
|
+
|
49
|
+
def read
|
50
|
+
reader = ValuesReader.from_string( @text, @more_attribs )
|
51
|
+
|
52
|
+
reader.each_line do |attribs, values|
|
53
|
+
opts = { skip_tags: skip_tags? }
|
54
|
+
Muni.create_or_update_from_attribs( attribs, values, opts )
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
end # class MuniReader
|
59
|
+
|
60
|
+
|
61
|
+
end # module WorldDb
|
@@ -0,0 +1,118 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module WorldDb
|
4
|
+
|
5
|
+
class StateTreeReader < ReaderBaseWithMoreAttribs
|
6
|
+
|
7
|
+
def read
|
8
|
+
## for now requires country_id !!
|
9
|
+
country = Country.find( @more_attribs[ :country_id ] )
|
10
|
+
puts "[StateTreeReader] country key: #{country.key}, name: #{country.name}"
|
11
|
+
|
12
|
+
reader = TreeReader.from_string( @text )
|
13
|
+
|
14
|
+
stack = [] # note: last_level => stack.size; starts w/ 0
|
15
|
+
|
16
|
+
if country.key == 'de'
|
17
|
+
## use state (1) > part (2) > county (3) > muni (4) > city (5)
|
18
|
+
state_level = 1 # Land
|
19
|
+
part_level = 2 # Regierungsbezirk
|
20
|
+
county_level = 3 # Landkreis, Kreisfreie Stadt
|
21
|
+
muni_level = 4 # Gemeinde
|
22
|
+
city_level = 5 # Stadt, Ort, etc.
|
23
|
+
else
|
24
|
+
## use state (1) > county (2) > muni (3) > city (4)
|
25
|
+
state_level = 1
|
26
|
+
part_level = -1 ## note: not in use (-1)
|
27
|
+
county_level = 2
|
28
|
+
muni_level = 3
|
29
|
+
city_level = 4
|
30
|
+
end
|
31
|
+
|
32
|
+
reader.each_line do |nodes|
|
33
|
+
names = nodes.map { |item| "(#{item.level}) #{item.value}" }
|
34
|
+
node = nodes.last
|
35
|
+
|
36
|
+
puts " #{names.join( ' › ' )}:"
|
37
|
+
puts " key: >#{node.key}<, level: >#{node.level}<, value: >#{node.value}<"
|
38
|
+
|
39
|
+
if node.level == state_level # 1
|
40
|
+
rec = State.where( "name like '#{node.value}%'" ).first
|
41
|
+
elsif node.level == part_level # 2
|
42
|
+
state = stack[0]
|
43
|
+
rec = Part.where( "name like '#{node.value}%' AND state_id = #{state.id}" ).first
|
44
|
+
elsif node.level == county_level # 2 or 3
|
45
|
+
state = stack[0]
|
46
|
+
rec = County.where( "name like '#{node.value}%' AND state_id = #{state.id}" ).first
|
47
|
+
elsif node.level == muni_level # 3 or 4
|
48
|
+
state = stack[0]
|
49
|
+
rec = Muni.where( "name like '#{node.value}%' AND state_id = #{state.id}" ).first
|
50
|
+
elsif node.level == city_level # 4 or 5
|
51
|
+
## note: city requires country scope for lookup
|
52
|
+
## todo/fix: how to deal with cities with the same name
|
53
|
+
## in the same country (and same state and same county etc.) ??? - add some examples here
|
54
|
+
rec = City.where( "name like '#{node.value}%' AND country_id = #{country.id}" ).first
|
55
|
+
else
|
56
|
+
puts "*** (fatal) error: unknown level for tree node: #{node.inspect}"
|
57
|
+
## todo/fix: exit here
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
if rec.present?
|
62
|
+
puts "ok - record match found: #{rec.inspect}"
|
63
|
+
else
|
64
|
+
## note: for now only auto-adds munis n cities
|
65
|
+
if node.level == muni_level # 3 or 4
|
66
|
+
## add muni
|
67
|
+
key = TextUtils.title_to_key( node.value )
|
68
|
+
name = node.value
|
69
|
+
level = node.level
|
70
|
+
state = stack[0]
|
71
|
+
county = stack[county_level-1] # note: stack is zero-based (thus, -1)
|
72
|
+
puts "*** adding muni record:"
|
73
|
+
rec = Muni.create!( key: key,
|
74
|
+
name: name,
|
75
|
+
level: level,
|
76
|
+
state_id: state.id,
|
77
|
+
county_id: county.id )
|
78
|
+
elsif node.level == city_level # 4 or 5
|
79
|
+
## add city
|
80
|
+
key = TextUtils.title_to_key( node.value )
|
81
|
+
name = node.value
|
82
|
+
state = stack[0]
|
83
|
+
county = stack[county_level-1] # note: stack is zero-based (thus, -1)
|
84
|
+
muni = stack[muni_level-1] # note: stack is zero-based (thus, -1)
|
85
|
+
puts "*** adding city record:"
|
86
|
+
rec = City.create!( key: key,
|
87
|
+
name: name,
|
88
|
+
state_id: state.id,
|
89
|
+
## add county_id too ???
|
90
|
+
muni_id: muni.id,
|
91
|
+
country_id: country.id )
|
92
|
+
else
|
93
|
+
puts "*** (fatal) error: record not found for tree node: #{node.inspect}"
|
94
|
+
## todo/fix: exit here
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
level_diff = node.level - stack.size
|
99
|
+
|
100
|
+
if level_diff > 0
|
101
|
+
logger.debug "[StateTreeReader] up +#{level_diff}"
|
102
|
+
## FIX!!! todo/check/verify/assert: always must be +1
|
103
|
+
elsif level_diff < 0
|
104
|
+
logger.debug "[StateTreeReader] down #{level_diff}"
|
105
|
+
level_diff.abs.times { stack.pop }
|
106
|
+
stack.pop
|
107
|
+
else
|
108
|
+
## same level
|
109
|
+
stack.pop
|
110
|
+
end
|
111
|
+
stack.push( rec ) ## hierarchy of records (mirrors hierarchy of read-in text)
|
112
|
+
|
113
|
+
end # each_line
|
114
|
+
end # method read
|
115
|
+
|
116
|
+
end # class StateTreeReader
|
117
|
+
end # module WorldDb
|
118
|
+
|
@@ -2,72 +2,9 @@
|
|
2
2
|
|
3
3
|
module WorldDb
|
4
4
|
|
5
|
-
class UsageReader
|
5
|
+
class UsageReader < ReaderBaseWithOpts
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
## make models available by default with namespace
|
10
|
-
# e.g. lets you use Usage instead of Model::Usage
|
11
|
-
include Models
|
12
|
-
|
13
|
-
## value helpers e.g. is_year?, is_taglist? etc.
|
14
|
-
include TextUtils::ValueHelper
|
15
|
-
|
16
|
-
## todo: add opts
|
17
|
-
def self.from_zip( zip_file, entry_path )
|
18
|
-
## get text content from zip
|
19
|
-
|
20
|
-
entry = zip_file.find_entry( entry_path )
|
21
|
-
|
22
|
-
## todo/fix: add force encoding to utf-8 ??
|
23
|
-
## check!!!
|
24
|
-
## clean/prepprocess lines
|
25
|
-
## e.g. CR/LF (/r/n) to LF (e.g. /n)
|
26
|
-
text = entry.get_input_stream().read()
|
27
|
-
|
28
|
-
## NOTE: needs logger ref; only available in instance methods; use global logger for now
|
29
|
-
logger = LogUtils::Logger.root
|
30
|
-
logger.debug "text.encoding.name (before): #{text.encoding.name}"
|
31
|
-
#####
|
32
|
-
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
33
|
-
## NB:
|
34
|
-
# for now "hardcoded" to utf8 - what else can we do?
|
35
|
-
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
36
|
-
text = text.force_encoding( Encoding::UTF_8 )
|
37
|
-
logger.debug "text.encoding.name (after): #{text.encoding.name}"
|
38
|
-
|
39
|
-
## todo:
|
40
|
-
# NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
|
41
|
-
## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
|
42
|
-
|
43
|
-
self.from_string( text )
|
44
|
-
end
|
45
|
-
|
46
|
-
def self.from_file( path, opts={} )
|
47
|
-
## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
|
48
|
-
## - see textutils/utils.rb
|
49
|
-
text = File.read_utf8( path )
|
50
|
-
self.from_string( text, opts )
|
51
|
-
end
|
52
|
-
|
53
|
-
def self.from_string( text, opts={} )
|
54
|
-
UsageReader.new( text, opts )
|
55
|
-
end
|
56
|
-
|
57
|
-
|
58
|
-
def skip_tags?() @skip_tags == true; end
|
59
|
-
def strict?() @strict == true; end
|
60
|
-
|
61
|
-
def initialize( text, opts={} )
|
62
|
-
@text = text
|
63
|
-
|
64
|
-
## option: do NOT generate/add any tags for countries/regions/cities
|
65
|
-
@skip_tags = opts[:skip_tags].present? ? true : false
|
66
|
-
## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
|
67
|
-
@strict = opts[:strict].present? ? true : false
|
68
|
-
end
|
69
|
-
|
70
|
-
def read()
|
7
|
+
def read
|
71
8
|
reader = HashReader.from_string( @text )
|
72
9
|
|
73
10
|
reader.each do |key, value|
|