worlddb-models 2.2.2 → 2.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +31 -13
- data/README.md +7 -7
- data/Rakefile +1 -1
- data/lib/worlddb/deleter.rb +6 -1
- data/lib/worlddb/helpers/value_helper.rb +117 -0
- data/lib/worlddb/matcher.rb +99 -135
- data/lib/worlddb/matcher_adm.rb +82 -0
- data/lib/worlddb/models/city.rb +30 -208
- data/lib/worlddb/models/city_base.rb +220 -0
- data/lib/worlddb/models/continent.rb +9 -0
- data/lib/worlddb/models/country.rb +21 -4
- data/lib/worlddb/models/forward.rb +25 -9
- data/lib/worlddb/models/lang.rb +6 -0
- data/lib/worlddb/models/place.rb +1 -1
- data/lib/worlddb/models/state.rb +83 -0
- data/lib/worlddb/models/{region.rb → state_base.rb} +52 -36
- data/lib/worlddb/models/tagdb/tag.rb +1 -1
- data/lib/worlddb/models.rb +11 -8
- data/lib/worlddb/patterns.rb +4 -4
- data/lib/worlddb/reader.rb +68 -39
- data/lib/worlddb/reader_file.rb +36 -3
- data/lib/worlddb/reader_zip.rb +33 -3
- data/lib/worlddb/readers/base.rb +149 -0
- data/lib/worlddb/readers/city.rb +2 -65
- data/lib/worlddb/readers/country.rb +2 -63
- data/lib/worlddb/readers/lang.rb +3 -68
- data/lib/worlddb/readers/state.rb +61 -0
- data/lib/worlddb/readers/state_tree.rb +118 -0
- data/lib/worlddb/readers/usage.rb +2 -65
- data/lib/worlddb/schema.rb +142 -43
- data/lib/worlddb/stats.rb +7 -4
- data/lib/worlddb/tree_reader.rb +97 -0
- data/lib/worlddb/version.rb +2 -2
- data/test/adm/test_fixture_matcher_adm2.rb +73 -0
- data/test/{test_fixture_matcher_adm3.rb → adm/test_fixture_matcher_adm3.rb} +6 -6
- data/test/adm/test_fixture_matcher_tree.rb +52 -0
- data/test/{test_read_adm.rb → adm/test_read_adm.rb} +13 -20
- data/test/adm/test_read_tree.rb +63 -0
- data/test/data/at-austria/2--n-niederoesterreich/counties.txt +6 -4
- data/test/data/at-austria/orte.txt +23 -0
- data/test/data/at-austria/setups/tree.txt +9 -0
- data/test/data/de-deutschland/3--by-bayern/4--oberfranken/counties.txt +14 -13
- data/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt +104 -0
- data/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte_ii.txt +17 -0
- data/test/data/de-deutschland/3--by-bayern/{districts.txt → parts.txt} +1 -1
- data/test/data/de-deutschland/orte.txt +12 -0
- data/test/data/de-deutschland/setups/adm.txt +1 -1
- data/test/data/de-deutschland/setups/tree.txt +9 -0
- data/test/helper.rb +8 -1
- data/test/test_fixture_matchers.rb +9 -10
- data/test/test_fixture_matchers_ii.rb +20 -19
- data/test/test_model_city.rb +26 -9
- data/test/{test_model_comp.rb → test_model_compat.rb} +15 -13
- data/test/test_model_country.rb +1 -1
- data/test/test_model_state.rb +54 -0
- data/test/test_model_states_at.rb +111 -0
- data/test/test_model_states_de.rb +147 -0
- data/test/test_models.rb +10 -3
- data/test/test_parse_city.rb +70 -0
- data/test/test_parse_country.rb +56 -0
- data/test/test_parse_state.rb +46 -0
- data/test/test_state_tree_reader_at.rb +54 -0
- data/test/test_state_tree_reader_de.rb +71 -0
- data/test/test_tree_reader.rb +39 -0
- metadata +50 -22
- data/lib/worlddb/models/city_compat.rb +0 -27
- data/lib/worlddb/models/continent_compat.rb +0 -24
- data/lib/worlddb/models/country_compat.rb +0 -35
- data/lib/worlddb/models/lang_compat.rb +0 -23
- data/lib/worlddb/models/region_compat.rb +0 -26
- data/lib/worlddb/readers/region.rb +0 -79
- data/test/test_fixture_matcher_adm2.rb +0 -62
- data/test/test_model_region.rb +0 -50
@@ -0,0 +1,149 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module WorldDb
|
4
|
+
|
5
|
+
|
6
|
+
###
|
7
|
+
# todo/fix:
|
8
|
+
# try to merge ReaderBaseWithMoreAttribs and ReaderBaseWithOpts into one base - why? why not?
|
9
|
+
|
10
|
+
|
11
|
+
class ReaderBaseWithMoreAttribs
|
12
|
+
|
13
|
+
include LogUtils::Logging
|
14
|
+
|
15
|
+
## make models available by default with namespace
|
16
|
+
# e.g. lets you use Usage instead of Model::Usage
|
17
|
+
include Models
|
18
|
+
|
19
|
+
## value helpers e.g. is_year?, is_taglist? etc.
|
20
|
+
include TextUtils::ValueHelper
|
21
|
+
|
22
|
+
|
23
|
+
def self.from_zip( zip_file, entry_path, more_attribs={} )
|
24
|
+
## get text content from zip
|
25
|
+
|
26
|
+
entry = zip_file.find_entry( entry_path )
|
27
|
+
|
28
|
+
## todo/fix: add force encoding to utf-8 ??
|
29
|
+
## check!!!
|
30
|
+
## clean/prepprocess lines
|
31
|
+
## e.g. CR/LF (/r/n) to LF (e.g. /n)
|
32
|
+
text = entry.get_input_stream().read()
|
33
|
+
|
34
|
+
## NOTE: needs logger ref; only available in instance methods; use global logger for now
|
35
|
+
logger = LogUtils::Logger.root
|
36
|
+
logger.debug "text.encoding.name (before): #{text.encoding.name}"
|
37
|
+
#####
|
38
|
+
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
39
|
+
## NB:
|
40
|
+
# for now "hardcoded" to utf8 - what else can we do?
|
41
|
+
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
42
|
+
text = text.force_encoding( Encoding::UTF_8 )
|
43
|
+
logger.debug "text.encoding.name (after): #{text.encoding.name}"
|
44
|
+
|
45
|
+
## todo:
|
46
|
+
# NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
|
47
|
+
## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
|
48
|
+
|
49
|
+
self.from_string( text, more_attribs )
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.from_file( path, more_attribs={} )
|
53
|
+
## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
|
54
|
+
## - see textutils/utils.rb
|
55
|
+
text = File.read_utf8( path )
|
56
|
+
self.from_string( text, more_attribs )
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.from_string( text, more_attribs={} )
|
60
|
+
puts "[debug] ReaderBase.from_string calling #{self.name}.new" # note: assume self is derived class (object)
|
61
|
+
self.new( text, more_attribs )
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
def skip_tags?() @skip_tags == true; end
|
66
|
+
def strict?() @strict == true; end
|
67
|
+
|
68
|
+
def initialize( text, more_attribs={} )
|
69
|
+
## todo/fix: how to add opts={} ???
|
70
|
+
|
71
|
+
@text = text
|
72
|
+
@more_attribs = more_attribs
|
73
|
+
end
|
74
|
+
|
75
|
+
end # class ReaderBaseWithMoreAttribs
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
class ReaderBaseWithOpts
|
80
|
+
|
81
|
+
include LogUtils::Logging
|
82
|
+
|
83
|
+
## make models available by default with namespace
|
84
|
+
# e.g. lets you use Usage instead of Model::Usage
|
85
|
+
include Models
|
86
|
+
|
87
|
+
## value helpers e.g. is_year?, is_taglist? etc.
|
88
|
+
include TextUtils::ValueHelper
|
89
|
+
|
90
|
+
|
91
|
+
## todo: add opts={} etc.
|
92
|
+
def self.from_zip( zip_file, entry_path )
|
93
|
+
## get text content from zip
|
94
|
+
|
95
|
+
entry = zip_file.find_entry( entry_path )
|
96
|
+
|
97
|
+
## todo/fix: add force encoding to utf-8 ??
|
98
|
+
## check!!!
|
99
|
+
## clean/prepprocess lines
|
100
|
+
## e.g. CR/LF (/r/n) to LF (e.g. /n)
|
101
|
+
text = entry.get_input_stream().read()
|
102
|
+
|
103
|
+
## NOTE: needs logger ref; only available in instance methods; use global logger for now
|
104
|
+
logger = LogUtils::Logger.root
|
105
|
+
logger.debug "text.encoding.name (before): #{text.encoding.name}"
|
106
|
+
#####
|
107
|
+
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
108
|
+
## NB:
|
109
|
+
# for now "hardcoded" to utf8 - what else can we do?
|
110
|
+
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
111
|
+
text = text.force_encoding( Encoding::UTF_8 )
|
112
|
+
logger.debug "text.encoding.name (after): #{text.encoding.name}"
|
113
|
+
|
114
|
+
## todo:
|
115
|
+
# NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
|
116
|
+
## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
|
117
|
+
|
118
|
+
self.from_string( text )
|
119
|
+
end
|
120
|
+
|
121
|
+
def self.from_file( path, opts={} )
|
122
|
+
## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
|
123
|
+
## - see textutils/utils.rb
|
124
|
+
text = File.read_utf8( path )
|
125
|
+
self.from_string( text, opts )
|
126
|
+
end
|
127
|
+
|
128
|
+
def self.from_string( text, opts={} )
|
129
|
+
puts "[debug] ReaderBase.from_string calling #{self.name}.new" # note: assume self is derived class (object)
|
130
|
+
self.new( text, opts )
|
131
|
+
end
|
132
|
+
|
133
|
+
|
134
|
+
def skip_tags?() @skip_tags == true; end
|
135
|
+
def strict?() @strict == true; end
|
136
|
+
|
137
|
+
def initialize( text, opts={} )
|
138
|
+
@text = text
|
139
|
+
|
140
|
+
## option: do NOT generate/add any tags for countries/regions/cities
|
141
|
+
@skip_tags = opts[:skip_tags].present? ? true : false
|
142
|
+
## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
|
143
|
+
@strict = opts[:strict].present? ? true : false
|
144
|
+
end
|
145
|
+
|
146
|
+
end # class ReaderBaseWithOpts
|
147
|
+
|
148
|
+
|
149
|
+
end # module WorldDb
|
data/lib/worlddb/readers/city.rb
CHANGED
@@ -2,72 +2,9 @@
|
|
2
2
|
|
3
3
|
module WorldDb
|
4
4
|
|
5
|
-
class CityReader
|
5
|
+
class CityReader < ReaderBaseWithMoreAttribs
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
## make models available by default with namespace
|
10
|
-
# e.g. lets you use Usage instead of Model::Usage
|
11
|
-
include Models
|
12
|
-
|
13
|
-
## value helpers e.g. is_year?, is_taglist? etc.
|
14
|
-
include TextUtils::ValueHelper
|
15
|
-
|
16
|
-
|
17
|
-
def self.from_zip( zip_file, entry_path, more_attribs={} )
|
18
|
-
## get text content from zip
|
19
|
-
|
20
|
-
entry = zip_file.find_entry( entry_path )
|
21
|
-
|
22
|
-
## todo/fix: add force encoding to utf-8 ??
|
23
|
-
## check!!!
|
24
|
-
## clean/prepprocess lines
|
25
|
-
## e.g. CR/LF (/r/n) to LF (e.g. /n)
|
26
|
-
text = entry.get_input_stream().read()
|
27
|
-
|
28
|
-
## NOTE: needs logger ref; only available in instance methods; use global logger for now
|
29
|
-
logger = LogUtils::Logger.root
|
30
|
-
logger.debug "text.encoding.name (before): #{text.encoding.name}"
|
31
|
-
#####
|
32
|
-
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
33
|
-
## NB:
|
34
|
-
# for now "hardcoded" to utf8 - what else can we do?
|
35
|
-
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
36
|
-
text = text.force_encoding( Encoding::UTF_8 )
|
37
|
-
logger.debug "text.encoding.name (after): #{text.encoding.name}"
|
38
|
-
|
39
|
-
## todo:
|
40
|
-
# NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
|
41
|
-
## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
|
42
|
-
|
43
|
-
self.from_string( text, more_attribs )
|
44
|
-
end
|
45
|
-
|
46
|
-
|
47
|
-
def self.from_file( path, more_attribs={} )
|
48
|
-
## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
|
49
|
-
## - see textutils/utils.rb
|
50
|
-
text = File.read_utf8( path )
|
51
|
-
self.from_string( text, more_attribs )
|
52
|
-
end
|
53
|
-
|
54
|
-
def self.from_string( text, more_attribs={} )
|
55
|
-
CityReader.new( text, more_attribs )
|
56
|
-
end
|
57
|
-
|
58
|
-
|
59
|
-
def skip_tags?() @skip_tags == true; end
|
60
|
-
def strict?() @strict == true; end
|
61
|
-
|
62
|
-
def initialize( text, more_attribs={} )
|
63
|
-
## todo/fix: how to add opts={} ???
|
64
|
-
|
65
|
-
@text = text
|
66
|
-
@more_attribs = more_attribs
|
67
|
-
end
|
68
|
-
|
69
|
-
|
70
|
-
def read()
|
7
|
+
def read
|
71
8
|
reader = ValuesReader.from_string( @text, @more_attribs )
|
72
9
|
|
73
10
|
reader.each_line do |attribs, values|
|
@@ -2,70 +2,9 @@
|
|
2
2
|
|
3
3
|
module WorldDb
|
4
4
|
|
5
|
-
class CountryReader
|
5
|
+
class CountryReader < ReaderBaseWithMoreAttribs
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
## make models available by default with namespace
|
10
|
-
# e.g. lets you use Usage instead of Model::Usage
|
11
|
-
include Models
|
12
|
-
|
13
|
-
## value helpers e.g. is_year?, is_taglist? etc.
|
14
|
-
include TextUtils::ValueHelper
|
15
|
-
|
16
|
-
|
17
|
-
def self.from_zip( zip_file, entry_path, more_attribs={} )
|
18
|
-
## get text content from zip
|
19
|
-
|
20
|
-
entry = zip_file.find_entry( entry_path )
|
21
|
-
|
22
|
-
## todo/fix: add force encoding to utf-8 ??
|
23
|
-
## check!!!
|
24
|
-
## clean/prepprocess lines
|
25
|
-
## e.g. CR/LF (/r/n) to LF (e.g. /n)
|
26
|
-
text = entry.get_input_stream().read()
|
27
|
-
|
28
|
-
## NOTE: needs logger ref; only available in instance methods; use global logger for now
|
29
|
-
logger = LogUtils::Logger.root
|
30
|
-
logger.debug "text.encoding.name (before): #{text.encoding.name}"
|
31
|
-
#####
|
32
|
-
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
33
|
-
## NB:
|
34
|
-
# for now "hardcoded" to utf8 - what else can we do?
|
35
|
-
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
36
|
-
text = text.force_encoding( Encoding::UTF_8 )
|
37
|
-
logger.debug "text.encoding.name (after): #{text.encoding.name}"
|
38
|
-
|
39
|
-
## todo:
|
40
|
-
# NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
|
41
|
-
## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
|
42
|
-
|
43
|
-
self.from_string( text, more_attribs )
|
44
|
-
end
|
45
|
-
|
46
|
-
def self.from_file( path, more_attribs={} )
|
47
|
-
## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
|
48
|
-
## - see textutils/utils.rb
|
49
|
-
text = File.read_utf8( path )
|
50
|
-
self.from_string( text, more_attribs )
|
51
|
-
end
|
52
|
-
|
53
|
-
def self.from_string( text, more_attribs={} )
|
54
|
-
CountryReader.new( text, more_attribs )
|
55
|
-
end
|
56
|
-
|
57
|
-
|
58
|
-
def skip_tags?() @skip_tags == true; end
|
59
|
-
def strict?() @strict == true; end
|
60
|
-
|
61
|
-
def initialize( text, more_attribs={} )
|
62
|
-
## todo/fix: how to add opts={} ???
|
63
|
-
|
64
|
-
@text = text
|
65
|
-
@more_attribs = more_attribs
|
66
|
-
end
|
67
|
-
|
68
|
-
def read()
|
7
|
+
def read
|
69
8
|
reader = ValuesReader.from_string( @text, @more_attribs )
|
70
9
|
|
71
10
|
reader.each_line do |attribs, values|
|
data/lib/worlddb/readers/lang.rb
CHANGED
@@ -2,73 +2,9 @@
|
|
2
2
|
|
3
3
|
module WorldDb
|
4
4
|
|
5
|
-
class LangReader
|
5
|
+
class LangReader < ReaderBaseWithOpts
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
## make models available by default with namespace
|
10
|
-
# e.g. lets you use Usage instead of Model::Usage
|
11
|
-
include Models
|
12
|
-
|
13
|
-
## value helpers e.g. is_year?, is_taglist? etc.
|
14
|
-
include TextUtils::ValueHelper
|
15
|
-
|
16
|
-
|
17
|
-
## todo: add opts={} etc.
|
18
|
-
def self.from_zip( zip_file, entry_path )
|
19
|
-
## get text content from zip
|
20
|
-
|
21
|
-
entry = zip_file.find_entry( entry_path )
|
22
|
-
|
23
|
-
## todo/fix: add force encoding to utf-8 ??
|
24
|
-
## check!!!
|
25
|
-
## clean/prepprocess lines
|
26
|
-
## e.g. CR/LF (/r/n) to LF (e.g. /n)
|
27
|
-
text = entry.get_input_stream().read()
|
28
|
-
|
29
|
-
## NOTE: needs logger ref; only available in instance methods; use global logger for now
|
30
|
-
logger = LogUtils::Logger.root
|
31
|
-
logger.debug "text.encoding.name (before): #{text.encoding.name}"
|
32
|
-
#####
|
33
|
-
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
34
|
-
## NB:
|
35
|
-
# for now "hardcoded" to utf8 - what else can we do?
|
36
|
-
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
37
|
-
text = text.force_encoding( Encoding::UTF_8 )
|
38
|
-
logger.debug "text.encoding.name (after): #{text.encoding.name}"
|
39
|
-
|
40
|
-
## todo:
|
41
|
-
# NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
|
42
|
-
## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
|
43
|
-
|
44
|
-
self.from_string( text )
|
45
|
-
end
|
46
|
-
|
47
|
-
def self.from_file( path, opts={} )
|
48
|
-
## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
|
49
|
-
## - see textutils/utils.rb
|
50
|
-
text = File.read_utf8( path )
|
51
|
-
self.from_string( text, opts )
|
52
|
-
end
|
53
|
-
|
54
|
-
def self.from_string( text, opts={} )
|
55
|
-
LangReader.new( text, opts )
|
56
|
-
end
|
57
|
-
|
58
|
-
|
59
|
-
def skip_tags?() @skip_tags == true; end
|
60
|
-
def strict?() @strict == true; end
|
61
|
-
|
62
|
-
def initialize( text, opts={} )
|
63
|
-
@text = text
|
64
|
-
|
65
|
-
## option: do NOT generate/add any tags for countries/regions/cities
|
66
|
-
@skip_tags = opts[:skip_tags].present? ? true : false
|
67
|
-
## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
|
68
|
-
@strict = opts[:strict].present? ? true : false
|
69
|
-
end
|
70
|
-
|
71
|
-
def read()
|
7
|
+
def read
|
72
8
|
reader = HashReader.from_string( @text )
|
73
9
|
|
74
10
|
reader.each do |key, value|
|
@@ -100,8 +36,7 @@ class LangReader
|
|
100
36
|
lang.update_attributes!( lang_attribs )
|
101
37
|
end # each key,value
|
102
38
|
|
103
|
-
end # method
|
104
|
-
|
39
|
+
end # method read
|
105
40
|
|
106
41
|
end # class LangReader
|
107
42
|
end # module WorldDb
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module WorldDb
|
4
|
+
|
5
|
+
class StateReader < ReaderBaseWithMoreAttribs
|
6
|
+
|
7
|
+
def read
|
8
|
+
reader = ValuesReader.from_string( @text, @more_attribs )
|
9
|
+
|
10
|
+
reader.each_line do |attribs, values|
|
11
|
+
opts = { skip_tags: skip_tags? }
|
12
|
+
State.create_or_update_from_attribs( attribs, values, opts )
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
end # class StateReader
|
17
|
+
|
18
|
+
|
19
|
+
class PartReader < ReaderBaseWithMoreAttribs
|
20
|
+
|
21
|
+
def read
|
22
|
+
reader = ValuesReader.from_string( @text, @more_attribs )
|
23
|
+
|
24
|
+
reader.each_line do |attribs, values|
|
25
|
+
opts = { skip_tags: skip_tags? }
|
26
|
+
Part.create_or_update_from_attribs( attribs, values, opts )
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
end # class PartReader
|
31
|
+
|
32
|
+
|
33
|
+
class CountyReader < ReaderBaseWithMoreAttribs
|
34
|
+
|
35
|
+
def read
|
36
|
+
reader = ValuesReader.from_string( @text, @more_attribs )
|
37
|
+
|
38
|
+
reader.each_line do |attribs, values|
|
39
|
+
opts = { skip_tags: skip_tags? }
|
40
|
+
County.create_or_update_from_attribs( attribs, values, opts )
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end # class CountyReader
|
45
|
+
|
46
|
+
|
47
|
+
class MuniReader < ReaderBaseWithMoreAttribs
|
48
|
+
|
49
|
+
def read
|
50
|
+
reader = ValuesReader.from_string( @text, @more_attribs )
|
51
|
+
|
52
|
+
reader.each_line do |attribs, values|
|
53
|
+
opts = { skip_tags: skip_tags? }
|
54
|
+
Muni.create_or_update_from_attribs( attribs, values, opts )
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
end # class MuniReader
|
59
|
+
|
60
|
+
|
61
|
+
end # module WorldDb
|
@@ -0,0 +1,118 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module WorldDb
|
4
|
+
|
5
|
+
class StateTreeReader < ReaderBaseWithMoreAttribs
|
6
|
+
|
7
|
+
def read
|
8
|
+
## for now requires country_id !!
|
9
|
+
country = Country.find( @more_attribs[ :country_id ] )
|
10
|
+
puts "[StateTreeReader] country key: #{country.key}, name: #{country.name}"
|
11
|
+
|
12
|
+
reader = TreeReader.from_string( @text )
|
13
|
+
|
14
|
+
stack = [] # note: last_level => stack.size; starts w/ 0
|
15
|
+
|
16
|
+
if country.key == 'de'
|
17
|
+
## use state (1) > part (2) > county (3) > muni (4) > city (5)
|
18
|
+
state_level = 1 # Land
|
19
|
+
part_level = 2 # Regierungsbezirk
|
20
|
+
county_level = 3 # Landkreis, Kreisfreie Stadt
|
21
|
+
muni_level = 4 # Gemeinde
|
22
|
+
city_level = 5 # Stadt, Ort, etc.
|
23
|
+
else
|
24
|
+
## use state (1) > county (2) > muni (3) > city (4)
|
25
|
+
state_level = 1
|
26
|
+
part_level = -1 ## note: not in use (-1)
|
27
|
+
county_level = 2
|
28
|
+
muni_level = 3
|
29
|
+
city_level = 4
|
30
|
+
end
|
31
|
+
|
32
|
+
reader.each_line do |nodes|
|
33
|
+
names = nodes.map { |item| "(#{item.level}) #{item.value}" }
|
34
|
+
node = nodes.last
|
35
|
+
|
36
|
+
puts " #{names.join( ' › ' )}:"
|
37
|
+
puts " key: >#{node.key}<, level: >#{node.level}<, value: >#{node.value}<"
|
38
|
+
|
39
|
+
if node.level == state_level # 1
|
40
|
+
rec = State.where( "name like '#{node.value}%'" ).first
|
41
|
+
elsif node.level == part_level # 2
|
42
|
+
state = stack[0]
|
43
|
+
rec = Part.where( "name like '#{node.value}%' AND state_id = #{state.id}" ).first
|
44
|
+
elsif node.level == county_level # 2 or 3
|
45
|
+
state = stack[0]
|
46
|
+
rec = County.where( "name like '#{node.value}%' AND state_id = #{state.id}" ).first
|
47
|
+
elsif node.level == muni_level # 3 or 4
|
48
|
+
state = stack[0]
|
49
|
+
rec = Muni.where( "name like '#{node.value}%' AND state_id = #{state.id}" ).first
|
50
|
+
elsif node.level == city_level # 4 or 5
|
51
|
+
## note: city requires country scope for lookup
|
52
|
+
## todo/fix: how to deal with cities with the same name
|
53
|
+
## in the same country (and same state and same county etc.) ??? - add some examples here
|
54
|
+
rec = City.where( "name like '#{node.value}%' AND country_id = #{country.id}" ).first
|
55
|
+
else
|
56
|
+
puts "*** (fatal) error: unknown level for tree node: #{node.inspect}"
|
57
|
+
## todo/fix: exit here
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
if rec.present?
|
62
|
+
puts "ok - record match found: #{rec.inspect}"
|
63
|
+
else
|
64
|
+
## note: for now only auto-adds munis n cities
|
65
|
+
if node.level == muni_level # 3 or 4
|
66
|
+
## add muni
|
67
|
+
key = TextUtils.title_to_key( node.value )
|
68
|
+
name = node.value
|
69
|
+
level = node.level
|
70
|
+
state = stack[0]
|
71
|
+
county = stack[county_level-1] # note: stack is zero-based (thus, -1)
|
72
|
+
puts "*** adding muni record:"
|
73
|
+
rec = Muni.create!( key: key,
|
74
|
+
name: name,
|
75
|
+
level: level,
|
76
|
+
state_id: state.id,
|
77
|
+
county_id: county.id )
|
78
|
+
elsif node.level == city_level # 4 or 5
|
79
|
+
## add city
|
80
|
+
key = TextUtils.title_to_key( node.value )
|
81
|
+
name = node.value
|
82
|
+
state = stack[0]
|
83
|
+
county = stack[county_level-1] # note: stack is zero-based (thus, -1)
|
84
|
+
muni = stack[muni_level-1] # note: stack is zero-based (thus, -1)
|
85
|
+
puts "*** adding city record:"
|
86
|
+
rec = City.create!( key: key,
|
87
|
+
name: name,
|
88
|
+
state_id: state.id,
|
89
|
+
## add county_id too ???
|
90
|
+
muni_id: muni.id,
|
91
|
+
country_id: country.id )
|
92
|
+
else
|
93
|
+
puts "*** (fatal) error: record not found for tree node: #{node.inspect}"
|
94
|
+
## todo/fix: exit here
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
level_diff = node.level - stack.size
|
99
|
+
|
100
|
+
if level_diff > 0
|
101
|
+
logger.debug "[StateTreeReader] up +#{level_diff}"
|
102
|
+
## FIX!!! todo/check/verify/assert: always must be +1
|
103
|
+
elsif level_diff < 0
|
104
|
+
logger.debug "[StateTreeReader] down #{level_diff}"
|
105
|
+
level_diff.abs.times { stack.pop }
|
106
|
+
stack.pop
|
107
|
+
else
|
108
|
+
## same level
|
109
|
+
stack.pop
|
110
|
+
end
|
111
|
+
stack.push( rec ) ## hierarchy of records (mirrors hierarchy of read-in text)
|
112
|
+
|
113
|
+
end # each_line
|
114
|
+
end # method read
|
115
|
+
|
116
|
+
end # class StateTreeReader
|
117
|
+
end # module WorldDb
|
118
|
+
|
@@ -2,72 +2,9 @@
|
|
2
2
|
|
3
3
|
module WorldDb
|
4
4
|
|
5
|
-
class UsageReader
|
5
|
+
class UsageReader < ReaderBaseWithOpts
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
## make models available by default with namespace
|
10
|
-
# e.g. lets you use Usage instead of Model::Usage
|
11
|
-
include Models
|
12
|
-
|
13
|
-
## value helpers e.g. is_year?, is_taglist? etc.
|
14
|
-
include TextUtils::ValueHelper
|
15
|
-
|
16
|
-
## todo: add opts
|
17
|
-
def self.from_zip( zip_file, entry_path )
|
18
|
-
## get text content from zip
|
19
|
-
|
20
|
-
entry = zip_file.find_entry( entry_path )
|
21
|
-
|
22
|
-
## todo/fix: add force encoding to utf-8 ??
|
23
|
-
## check!!!
|
24
|
-
## clean/prepprocess lines
|
25
|
-
## e.g. CR/LF (/r/n) to LF (e.g. /n)
|
26
|
-
text = entry.get_input_stream().read()
|
27
|
-
|
28
|
-
## NOTE: needs logger ref; only available in instance methods; use global logger for now
|
29
|
-
logger = LogUtils::Logger.root
|
30
|
-
logger.debug "text.encoding.name (before): #{text.encoding.name}"
|
31
|
-
#####
|
32
|
-
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
33
|
-
## NB:
|
34
|
-
# for now "hardcoded" to utf8 - what else can we do?
|
35
|
-
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
36
|
-
text = text.force_encoding( Encoding::UTF_8 )
|
37
|
-
logger.debug "text.encoding.name (after): #{text.encoding.name}"
|
38
|
-
|
39
|
-
## todo:
|
40
|
-
# NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
|
41
|
-
## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
|
42
|
-
|
43
|
-
self.from_string( text )
|
44
|
-
end
|
45
|
-
|
46
|
-
def self.from_file( path, opts={} )
|
47
|
-
## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
|
48
|
-
## - see textutils/utils.rb
|
49
|
-
text = File.read_utf8( path )
|
50
|
-
self.from_string( text, opts )
|
51
|
-
end
|
52
|
-
|
53
|
-
def self.from_string( text, opts={} )
|
54
|
-
UsageReader.new( text, opts )
|
55
|
-
end
|
56
|
-
|
57
|
-
|
58
|
-
def skip_tags?() @skip_tags == true; end
|
59
|
-
def strict?() @strict == true; end
|
60
|
-
|
61
|
-
def initialize( text, opts={} )
|
62
|
-
@text = text
|
63
|
-
|
64
|
-
## option: do NOT generate/add any tags for countries/regions/cities
|
65
|
-
@skip_tags = opts[:skip_tags].present? ? true : false
|
66
|
-
## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
|
67
|
-
@strict = opts[:strict].present? ? true : false
|
68
|
-
end
|
69
|
-
|
70
|
-
def read()
|
7
|
+
def read
|
71
8
|
reader = HashReader.from_string( @text )
|
72
9
|
|
73
10
|
reader.each do |key, value|
|