worlddb-models 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gemtest +0 -0
- data/HISTORY.md +4 -0
- data/Manifest.txt +43 -0
- data/README.md +85 -0
- data/Rakefile +44 -0
- data/lib/worlddb/deleter.rb +32 -0
- data/lib/worlddb/matcher.rb +143 -0
- data/lib/worlddb/models/city.rb +240 -0
- data/lib/worlddb/models/city_comp.rb +27 -0
- data/lib/worlddb/models/continent.rb +41 -0
- data/lib/worlddb/models/continent_comp.rb +24 -0
- data/lib/worlddb/models/country.rb +328 -0
- data/lib/worlddb/models/country_code.rb +41 -0
- data/lib/worlddb/models/country_comp.rb +35 -0
- data/lib/worlddb/models/forward.rb +57 -0
- data/lib/worlddb/models/lang.rb +18 -0
- data/lib/worlddb/models/lang_comp.rb +23 -0
- data/lib/worlddb/models/name.rb +13 -0
- data/lib/worlddb/models/place.rb +16 -0
- data/lib/worlddb/models/region.rb +176 -0
- data/lib/worlddb/models/region_comp.rb +26 -0
- data/lib/worlddb/models/tagdb/tag.rb +16 -0
- data/lib/worlddb/models/tagdb/tagging.rb +15 -0
- data/lib/worlddb/models/usage.rb +17 -0
- data/lib/worlddb/models.rb +200 -0
- data/lib/worlddb/patterns.rb +54 -0
- data/lib/worlddb/reader.rb +224 -0
- data/lib/worlddb/reader_file.rb +86 -0
- data/lib/worlddb/reader_zip.rb +160 -0
- data/lib/worlddb/readers/city.rb +81 -0
- data/lib/worlddb/readers/country.rb +78 -0
- data/lib/worlddb/readers/lang.rb +107 -0
- data/lib/worlddb/readers/region.rb +79 -0
- data/lib/worlddb/readers/usage.rb +98 -0
- data/lib/worlddb/schema.rb +202 -0
- data/lib/worlddb/stats.rb +31 -0
- data/lib/worlddb/version.rb +23 -0
- data/test/helper.rb +26 -0
- data/test/test_fixture_matchers.rb +112 -0
- data/test/test_model_city.rb +60 -0
- data/test/test_model_comp.rb +48 -0
- data/test/test_model_country.rb +53 -0
- data/test/test_model_region.rb +50 -0
- data/test/test_models.rb +35 -0
- metadata +252 -0
@@ -0,0 +1,224 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module WorldDb
|
4
|
+
|
5
|
+
|
6
|
+
class ReaderBase
|
7
|
+
|
8
|
+
include LogUtils::Logging
|
9
|
+
|
10
|
+
## make models available in sportdb module by default with namespace
|
11
|
+
# e.g. lets you use City instead of Models::City
|
12
|
+
include Models
|
13
|
+
include Matcher # e.g. match_cities_for_country, match_regions_for_country, etc.
|
14
|
+
|
15
|
+
## value helpers e.g. is_year?, is_taglist? etc.
|
16
|
+
include TextUtils::ValueHelper
|
17
|
+
|
18
|
+
|
19
|
+
def skip_tags?() @skip_tags == true; end
|
20
|
+
def strict?() @strict == true; end
|
21
|
+
|
22
|
+
|
23
|
+
def initialize( opts={} )
|
24
|
+
## option: do NOT generate/add any tags for countries/regions/cities
|
25
|
+
@skip_tags = opts[:skip_tags].present? ? true : false
|
26
|
+
## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
|
27
|
+
@strict = opts[:strict].present? ? true : false
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
def load_setup( name )
|
32
|
+
reader = create_fixture_reader( name )
|
33
|
+
|
34
|
+
reader.each do |fixture|
|
35
|
+
load( fixture )
|
36
|
+
end
|
37
|
+
end # method load_setup
|
38
|
+
|
39
|
+
|
40
|
+
def load( name )
|
41
|
+
|
42
|
+
if name =~ /^continents/
|
43
|
+
load_continent_defs( name )
|
44
|
+
elsif name =~ /\/continents/
|
45
|
+
load_continent_refs( name )
|
46
|
+
elsif name =~ /^lang/
|
47
|
+
## todo: pass along opts too
|
48
|
+
## use match_usage( name ) - why? why not?? ???
|
49
|
+
r = create_lang_reader( name )
|
50
|
+
r.read()
|
51
|
+
elsif name =~ /\/lang/
|
52
|
+
## todo: pass along opts too
|
53
|
+
## use match_usage( name ) - why? why not?? ???
|
54
|
+
r = create_usage_reader( name )
|
55
|
+
r.read()
|
56
|
+
elsif name =~ /\/fifa/ ||
|
57
|
+
name =~ /\/fips/ ||
|
58
|
+
name =~ /\/internet/ ||
|
59
|
+
name =~ /\/ioc/ ||
|
60
|
+
name =~ /\/iso/ ||
|
61
|
+
name =~ /\/motor/
|
62
|
+
load_codes( name )
|
63
|
+
elsif name =~ /^tag.*\.\d$/
|
64
|
+
## todo: pass along opts too
|
65
|
+
## use match_tags( name ) - why? why not?? ???
|
66
|
+
|
67
|
+
######## FIX: add back again
|
68
|
+
### fix: use read() only, that is, w/o name
|
69
|
+
## r = create_tag_reader( name )
|
70
|
+
## r.read()
|
71
|
+
elsif match_countries_for_continent( name ) do |continent| # # e.g. africa/countries or america/countries
|
72
|
+
### NB: continent changed to regions (e.g. middle-east, caribbean, north-america, etc.)
|
73
|
+
## auto-add continent (from folder structure) as tag
|
74
|
+
## fix: allow dash/hyphen/minus in tag
|
75
|
+
|
76
|
+
### todo/fix: add opts - how??
|
77
|
+
r = create_country_reader( name, tags: continent.tr('-', '_') )
|
78
|
+
r.read()
|
79
|
+
end
|
80
|
+
elsif match_cities_for_country( name ) do |country_key| # name =~ /\/([a-z]{2})\/cities/
|
81
|
+
## auto-add required country code (from folder structure)
|
82
|
+
country = Country.find_by_key!( country_key )
|
83
|
+
logger.debug "Country #{country.key} >#{country.title} (#{country.code})<"
|
84
|
+
|
85
|
+
r = create_city_reader( name, country_id: country.id )
|
86
|
+
r.read()
|
87
|
+
end
|
88
|
+
elsif match_regions_abbr_for_country( name ) do |country_key| # name =~ /\/([a-z]{2})\/regions\.abbr/
|
89
|
+
load_regions_xxx( country_key, 'abbr', name )
|
90
|
+
end
|
91
|
+
elsif match_regions_iso_for_country( name ) do |country_key| # name =~ /\/([a-z]{2})\/regions\.iso/
|
92
|
+
load_regions_xxx( country_key, 'iso', name )
|
93
|
+
end
|
94
|
+
elsif match_regions_nuts_for_country( name ) do |country_key| # name =~ /\/([a-z]{2})\/regions\.nuts/
|
95
|
+
load_regions_xxx( country_key, 'nuts', name )
|
96
|
+
end
|
97
|
+
elsif match_regions_for_country( name ) do |country_key| # name =~ /\/([a-z]{2})\/regions/
|
98
|
+
## auto-add required country code (from folder structure)
|
99
|
+
country = Country.find_by_key!( country_key )
|
100
|
+
logger.debug "Country #{country.key} >#{country.title} (#{country.code})<"
|
101
|
+
|
102
|
+
r = create_region_reader( name, country_id: country.id )
|
103
|
+
r.read()
|
104
|
+
end
|
105
|
+
else
|
106
|
+
logger.error "unknown world.db fixture type >#{name}<"
|
107
|
+
# todo/fix: exit w/ error
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
### use RegionAttrReader
|
113
|
+
def load_regions_xxx( country_key, xxx, name )
|
114
|
+
country = Country.find_by_key!( country_key )
|
115
|
+
logger.debug "Country #{country.key} >#{country.title} (#{country.code})<"
|
116
|
+
|
117
|
+
reader = create_hash_reader( name )
|
118
|
+
|
119
|
+
reader.each do |key, value|
|
120
|
+
region = Region.find_by_country_id_and_key!( country.id, key )
|
121
|
+
region.send( "#{xxx}=", value )
|
122
|
+
region.save!
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
|
127
|
+
### use ContinentRefReader
|
128
|
+
def load_continent_refs( name )
|
129
|
+
reader = create_hash_reader( name )
|
130
|
+
|
131
|
+
reader.each do |key, value|
|
132
|
+
country = Country.find_by_key!( key )
|
133
|
+
continent = Continent.find_by_key!( value )
|
134
|
+
country.continent_id = continent.id
|
135
|
+
country.save!
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
### use ContinentDef Reader
|
140
|
+
def load_continent_defs( name, more_attribs={} )
|
141
|
+
reader = create_values_reader( name, more_attribs )
|
142
|
+
|
143
|
+
reader.each_line do |attribs, values|
|
144
|
+
|
145
|
+
## check optional values
|
146
|
+
values.each_with_index do |value, index|
|
147
|
+
logger.warn "unknown type for value >#{value}<"
|
148
|
+
end
|
149
|
+
|
150
|
+
rec = Continent.find_by_key( attribs[ :key ] )
|
151
|
+
if rec.present?
|
152
|
+
logger.debug "update Continent #{rec.id}-#{rec.key}:"
|
153
|
+
else
|
154
|
+
logger.debug "create Continent:"
|
155
|
+
rec = Continent.new
|
156
|
+
end
|
157
|
+
|
158
|
+
logger.debug attribs.to_json
|
159
|
+
|
160
|
+
rec.update_attributes!( attribs )
|
161
|
+
|
162
|
+
end # each lines
|
163
|
+
end # load_continent_defs
|
164
|
+
|
165
|
+
|
166
|
+
def load_codes( name )
|
167
|
+
reader = create_line_reader( name )
|
168
|
+
|
169
|
+
reader.each_line do |line|
|
170
|
+
|
171
|
+
values = line.split(',')
|
172
|
+
|
173
|
+
## logger.debug '[>' + values.join( '<|>' ) + '<]'
|
174
|
+
|
175
|
+
if name =~ /iso/
|
176
|
+
# special case for iso
|
177
|
+
# country ref, alpha2, alpha3, num
|
178
|
+
country_name = values[0].strip
|
179
|
+
else
|
180
|
+
# code, country ref
|
181
|
+
country_name = values[1].strip
|
182
|
+
end
|
183
|
+
|
184
|
+
## try to find country
|
185
|
+
cty = Country.search_by_name( country_name )
|
186
|
+
|
187
|
+
if cty.nil?
|
188
|
+
logger.warn "no country match found for >#{country_name}<; skipping line; in [#{name}]"
|
189
|
+
next
|
190
|
+
end
|
191
|
+
|
192
|
+
if name =~ /\/fifa/
|
193
|
+
cty.fifa = values[0].strip
|
194
|
+
elsif name =~ /\/fips/
|
195
|
+
cty.fips = values[0].strip
|
196
|
+
elsif name =~ /\/internet/
|
197
|
+
# NOTE: remove (optional) leading . e.g. .at becomes at
|
198
|
+
cty.net = values[0].sub( /^\s*\./,'' ).strip
|
199
|
+
elsif name =~ /\/ioc/
|
200
|
+
cty.ioc = values[0].strip
|
201
|
+
elsif name =~ /\/motor/
|
202
|
+
cty.motor = values[0].strip
|
203
|
+
elsif name =~ /\/iso/
|
204
|
+
cty.alpha2 = values[1].strip
|
205
|
+
cty.alpha3 = values[2].strip
|
206
|
+
# NOTE: num is a string!!! use (rename to) num_str - why? why not?
|
207
|
+
cty.num = values[3].strip
|
208
|
+
else
|
209
|
+
logger.warn "warn: unknown country code type; skipping line; in [#{name}]"
|
210
|
+
next
|
211
|
+
end
|
212
|
+
|
213
|
+
cty.save!
|
214
|
+
end
|
215
|
+
end # method load_codes
|
216
|
+
|
217
|
+
|
218
|
+
####
|
219
|
+
# helper methods
|
220
|
+
## todo: also add City.search_by_name etc. !!!
|
221
|
+
|
222
|
+
|
223
|
+
end # class ReaderBase
|
224
|
+
end # module WorldDb
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module WorldDb
|
4
|
+
|
5
|
+
class Reader < ReaderBase
|
6
|
+
|
7
|
+
def initialize( include_path, opts={} )
|
8
|
+
super( opts )
|
9
|
+
|
10
|
+
@include_path = include_path
|
11
|
+
end
|
12
|
+
|
13
|
+
|
14
|
+
def create_fixture_reader( name )
|
15
|
+
path = "#{@include_path}/#{name}.txt"
|
16
|
+
logger.info "parsing data (setup) '#{name}' (#{path})..."
|
17
|
+
|
18
|
+
FixtureReader.from_file( path )
|
19
|
+
end
|
20
|
+
|
21
|
+
def create_lang_reader( name )
|
22
|
+
path = "#{@include_path}/#{name}.yml" ## hash reader - use .yml??
|
23
|
+
logger.info "parsing data (lang) '#{name}' (#{path})..."
|
24
|
+
|
25
|
+
LangReader.from_file( path )
|
26
|
+
end
|
27
|
+
|
28
|
+
def create_usage_reader( name )
|
29
|
+
path = "#{@include_path}/#{name}.yml" ## hash reader - use .yml??
|
30
|
+
logger.info "parsing data (usage) '#{name}' (#{path})..."
|
31
|
+
|
32
|
+
UsageReader.from_file( path )
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
def create_country_reader( name, more_attribs={} )
|
37
|
+
path = "#{@include_path}/#{name}.txt"
|
38
|
+
logger.info "parsing data (country) '#{name}' (#{path})..."
|
39
|
+
|
40
|
+
CountryReader.from_file( path, more_attribs )
|
41
|
+
end
|
42
|
+
|
43
|
+
def create_region_reader( name, more_attribs={} )
|
44
|
+
path = "#{@include_path}/#{name}.txt"
|
45
|
+
logger.info "parsing data (region) '#{name}' (#{path})..."
|
46
|
+
|
47
|
+
RegionReader.from_file( path, more_attribs )
|
48
|
+
end
|
49
|
+
|
50
|
+
def create_city_reader( name, more_attribs={} )
|
51
|
+
path = "#{@include_path}/#{name}.txt"
|
52
|
+
logger.info "parsing data (city) '#{name}' (#{path})..."
|
53
|
+
|
54
|
+
CityReader.from_file( path, more_attribs )
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
def create_hash_reader( name )
|
59
|
+
path = "#{@include_path}/#{name}.yml"
|
60
|
+
logger.info "parsing data (hash) '#{name}' (#{path})..."
|
61
|
+
|
62
|
+
HashReader.from_file( path )
|
63
|
+
end
|
64
|
+
|
65
|
+
def create_values_reader( name, more_attribs={} )
|
66
|
+
path = "#{@include_path}/#{name}.txt"
|
67
|
+
logger.info "parsing data (values) '#{name}' (#{path})..."
|
68
|
+
|
69
|
+
ValuesReader.from_file( path, more_attribs )
|
70
|
+
end
|
71
|
+
|
72
|
+
def create_line_reader( name )
|
73
|
+
path = "#{@include_path}/#{name}.txt"
|
74
|
+
logger.info "parsing data (line) '#{name}' (#{path})..."
|
75
|
+
|
76
|
+
LineReader.from_file( path )
|
77
|
+
end
|
78
|
+
|
79
|
+
# def create_tag_reader( name )
|
80
|
+
# ## fix: change to new from_file() style
|
81
|
+
# TagDb::TagReader.new( @include_path )
|
82
|
+
# end
|
83
|
+
|
84
|
+
|
85
|
+
end # class Reader
|
86
|
+
end # module WorldDb
|
@@ -0,0 +1,160 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module WorldDb
|
4
|
+
|
5
|
+
class ZipReader < ReaderBase
|
6
|
+
|
7
|
+
|
8
|
+
def initialize( name, include_path, opts = {} )
|
9
|
+
super( opts )
|
10
|
+
|
11
|
+
## todo/fix: make include_path an opts (included in opts?) - why? why not??
|
12
|
+
path = "#{include_path}/#{name}.zip"
|
13
|
+
|
14
|
+
## todo: check if zip exists
|
15
|
+
@zip_file = Zip::File.open( path ) ## NOTE: do NOT create if file is missing; let it crash
|
16
|
+
|
17
|
+
### allow prefix (path) in name
|
18
|
+
### e.g. assume all files relative to setup manifest
|
19
|
+
## e.g. at-austria-master/setups/all.txt or
|
20
|
+
## be-belgium-master/setups/all.txt
|
21
|
+
## for
|
22
|
+
## setups/all.txt
|
23
|
+
###
|
24
|
+
## will get (re)set w/ fixture/setup reader
|
25
|
+
##
|
26
|
+
## todo/fix: change/rename to @relative_path ?? - why? why not?
|
27
|
+
@zip_prefix = ''
|
28
|
+
end
|
29
|
+
|
30
|
+
def close
|
31
|
+
## todo/check: add a close method - why? why not ???
|
32
|
+
@zip_file.close
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
def create_fixture_reader( name )
|
38
|
+
## e.g. pass in => setups/all or setups/test etc. e.g. w/o .txt extension
|
39
|
+
query = "**/#{name}.txt"
|
40
|
+
|
41
|
+
## note: returns an array of Zip::Entry
|
42
|
+
candidates = @zip_file.glob( query )
|
43
|
+
pp candidates
|
44
|
+
|
45
|
+
## use first candidates entry as match
|
46
|
+
## todo/fix: issue warning if more than one entries/matches!!
|
47
|
+
|
48
|
+
## get fullpath e.g. at-austria-master/setups/all.txt
|
49
|
+
path = candidates[0].name
|
50
|
+
logger.debug " zip entry path >>#{path}<<"
|
51
|
+
|
52
|
+
## cut-off at-austria-master/ NOTE: includes trailing slash (if present)
|
53
|
+
## logger.debug " path.size #{path.size} >>#{path}<<"
|
54
|
+
## logger.debug " name.size #{name.size+4} >>#{name}<<"
|
55
|
+
|
56
|
+
## note: add +4 for extension (.txt)
|
57
|
+
@zip_prefix = path[ 0...(path.size-(name.size+4)) ]
|
58
|
+
logger.debug " zip entry prefix >>#{@zip_prefix}<<"
|
59
|
+
|
60
|
+
logger.info "parsing data (setup) in zip '#{name}' (#{path})..."
|
61
|
+
|
62
|
+
FixtureReader.from_zip( @zip_file, path )
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
def create_lang_reader( name )
|
67
|
+
path = name_to_zip_entry_path( name, '.yml' ) ## hash reader - use .yml??
|
68
|
+
logger.info "parsing data (lang) in zip '#{name}' (#{path})..."
|
69
|
+
|
70
|
+
LangReader.from_zip( @zip_file, path )
|
71
|
+
end
|
72
|
+
|
73
|
+
def create_usage_reader( name )
|
74
|
+
path = name_to_zip_entry_path( name, '.yml' ) ## hash reader - use .yml??
|
75
|
+
logger.info "parsing data (usage) in zip '#{name}' (#{path})..."
|
76
|
+
|
77
|
+
UsageReader.from_zip( @zip_file, path )
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
def create_country_reader( name, more_attribs={} )
|
82
|
+
path = name_to_zip_entry_path( name )
|
83
|
+
logger.info "parsing data (country) in zip '#{name}' (#{path})..."
|
84
|
+
|
85
|
+
CountryReader.from_zip( @zip_file, path, more_attribs )
|
86
|
+
end
|
87
|
+
|
88
|
+
def create_region_reader( name, more_attribs={} )
|
89
|
+
path = name_to_zip_entry_path( name )
|
90
|
+
logger.info "parsing data (region) in zip '#{name}' (#{path})..."
|
91
|
+
|
92
|
+
RegionReader.from_zip( @zip_file, path, more_attribs )
|
93
|
+
end
|
94
|
+
|
95
|
+
def create_city_reader( name, more_attribs={} )
|
96
|
+
path = name_to_zip_entry_path( name )
|
97
|
+
logger.info "parsing data (city) in zip '#{name}' (#{path})..."
|
98
|
+
|
99
|
+
CityReader.from_zip( @zip_file, path, more_attribs )
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
def create_hash_reader( name )
|
104
|
+
path = name_to_zip_entry_path( name, '.yml' ) ## hash reader - use .yml??
|
105
|
+
logger.info "parsing data (hash) in zip '#{name}' (#{path})..."
|
106
|
+
|
107
|
+
HashReader.from_zip( @zip_file, path )
|
108
|
+
end
|
109
|
+
|
110
|
+
def create_values_reader( name, more_attribs={} )
|
111
|
+
path = name_to_zip_entry_path( name )
|
112
|
+
logger.info "parsing data (values) in zip '#{name}' (#{path})..."
|
113
|
+
|
114
|
+
ValuesReader.from_zip( @zip_file, path, more_attribs )
|
115
|
+
end
|
116
|
+
|
117
|
+
def create_line_reader( name )
|
118
|
+
path = name_to_zip_entry_path( name )
|
119
|
+
logger.info "parsing data (line) in zip '#{name}' (#{path})..."
|
120
|
+
|
121
|
+
LineReader.from_zip( @zip_file, path )
|
122
|
+
end
|
123
|
+
|
124
|
+
# def create_tag_reader( name )
|
125
|
+
# ## fix: change to new from_file() style
|
126
|
+
# TagDb::TagReader.new( @include_path )
|
127
|
+
# end
|
128
|
+
|
129
|
+
private
|
130
|
+
|
131
|
+
def path_to_real_path( path )
|
132
|
+
# map name to name_real_path
|
133
|
+
# name might include !/ for virtual path (gets cut off)
|
134
|
+
# e.g. at-austria!/w-wien/beers becomse w-wien/beers
|
135
|
+
pos = path.index( '!/')
|
136
|
+
if pos.nil?
|
137
|
+
path # not found; real path is the same as name
|
138
|
+
else
|
139
|
+
# cut off everything until !/ e.g.
|
140
|
+
# at-austria!/w-wien/beers becomes
|
141
|
+
# w-wien/beers
|
142
|
+
path[ (pos+2)..-1 ]
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def name_to_zip_entry_path( name, extension='.txt' )
|
147
|
+
path = "#{name}#{extension}"
|
148
|
+
|
149
|
+
real_path = path_to_real_path( path )
|
150
|
+
|
151
|
+
# NOTE: add possible zip entry prefix path
|
152
|
+
# (if present includes trailing slash e.g. /)
|
153
|
+
entry_path = "#{@zip_prefix}#{real_path}"
|
154
|
+
entry_path
|
155
|
+
end
|
156
|
+
|
157
|
+
|
158
|
+
|
159
|
+
end # class ZipReader
|
160
|
+
end # module WorldDb
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module WorldDb
|
4
|
+
|
5
|
+
class CityReader
|
6
|
+
|
7
|
+
include LogUtils::Logging
|
8
|
+
|
9
|
+
## make models available by default with namespace
|
10
|
+
# e.g. lets you use Usage instead of Model::Usage
|
11
|
+
include Models
|
12
|
+
|
13
|
+
## value helpers e.g. is_year?, is_taglist? etc.
|
14
|
+
include TextUtils::ValueHelper
|
15
|
+
|
16
|
+
|
17
|
+
def self.from_zip( zip_file, entry_path, more_attribs={} )
|
18
|
+
## get text content from zip
|
19
|
+
|
20
|
+
entry = zip_file.find_entry( entry_path )
|
21
|
+
|
22
|
+
## todo/fix: add force encoding to utf-8 ??
|
23
|
+
## check!!!
|
24
|
+
## clean/prepprocess lines
|
25
|
+
## e.g. CR/LF (/r/n) to LF (e.g. /n)
|
26
|
+
text = entry.get_input_stream().read()
|
27
|
+
|
28
|
+
## NOTE: needs logger ref; only available in instance methods; use global logger for now
|
29
|
+
logger = LogUtils::Logger.root
|
30
|
+
logger.debug "text.encoding.name (before): #{text.encoding.name}"
|
31
|
+
#####
|
32
|
+
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
33
|
+
## NB:
|
34
|
+
# for now "hardcoded" to utf8 - what else can we do?
|
35
|
+
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
36
|
+
text = text.force_encoding( Encoding::UTF_8 )
|
37
|
+
logger.debug "text.encoding.name (after): #{text.encoding.name}"
|
38
|
+
|
39
|
+
## todo:
|
40
|
+
# NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
|
41
|
+
## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
|
42
|
+
|
43
|
+
self.from_string( text, more_attribs )
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
def self.from_file( path, more_attribs={} )
|
48
|
+
## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
|
49
|
+
## - see textutils/utils.rb
|
50
|
+
text = File.read_utf8( path )
|
51
|
+
self.from_string( text, more_attribs )
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.from_string( text, more_attribs={} )
|
55
|
+
CityReader.new( text, more_attribs )
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
def skip_tags?() @skip_tags == true; end
|
60
|
+
def strict?() @strict == true; end
|
61
|
+
|
62
|
+
def initialize( text, more_attribs={} )
|
63
|
+
## todo/fix: how to add opts={} ???
|
64
|
+
|
65
|
+
@text = text
|
66
|
+
@more_attribs = more_attribs
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
def read()
|
71
|
+
reader = ValuesReader.from_string( @text, @more_attribs )
|
72
|
+
|
73
|
+
reader.each_line do |attribs, values|
|
74
|
+
opts = { skip_tags: skip_tags? }
|
75
|
+
City.create_or_update_from_attribs( attribs, values, opts )
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
end # class CityReader
|
80
|
+
end # module WorldDb
|
81
|
+
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module WorldDb
|
4
|
+
|
5
|
+
class CountryReader
|
6
|
+
|
7
|
+
include LogUtils::Logging
|
8
|
+
|
9
|
+
## make models available by default with namespace
|
10
|
+
# e.g. lets you use Usage instead of Model::Usage
|
11
|
+
include Models
|
12
|
+
|
13
|
+
## value helpers e.g. is_year?, is_taglist? etc.
|
14
|
+
include TextUtils::ValueHelper
|
15
|
+
|
16
|
+
|
17
|
+
def self.from_zip( zip_file, entry_path, more_attribs={} )
|
18
|
+
## get text content from zip
|
19
|
+
|
20
|
+
entry = zip_file.find_entry( entry_path )
|
21
|
+
|
22
|
+
## todo/fix: add force encoding to utf-8 ??
|
23
|
+
## check!!!
|
24
|
+
## clean/prepprocess lines
|
25
|
+
## e.g. CR/LF (/r/n) to LF (e.g. /n)
|
26
|
+
text = entry.get_input_stream().read()
|
27
|
+
|
28
|
+
## NOTE: needs logger ref; only available in instance methods; use global logger for now
|
29
|
+
logger = LogUtils::Logger.root
|
30
|
+
logger.debug "text.encoding.name (before): #{text.encoding.name}"
|
31
|
+
#####
|
32
|
+
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
33
|
+
## NB:
|
34
|
+
# for now "hardcoded" to utf8 - what else can we do?
|
35
|
+
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
36
|
+
text = text.force_encoding( Encoding::UTF_8 )
|
37
|
+
logger.debug "text.encoding.name (after): #{text.encoding.name}"
|
38
|
+
|
39
|
+
## todo:
|
40
|
+
# NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
|
41
|
+
## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
|
42
|
+
|
43
|
+
self.from_string( text, more_attribs )
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.from_file( path, more_attribs={} )
|
47
|
+
## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
|
48
|
+
## - see textutils/utils.rb
|
49
|
+
text = File.read_utf8( path )
|
50
|
+
self.from_string( text, more_attribs )
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.from_string( text, more_attribs={} )
|
54
|
+
CountryReader.new( text, more_attribs )
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
def skip_tags?() @skip_tags == true; end
|
59
|
+
def strict?() @strict == true; end
|
60
|
+
|
61
|
+
def initialize( text, more_attribs={} )
|
62
|
+
## todo/fix: how to add opts={} ???
|
63
|
+
|
64
|
+
@text = text
|
65
|
+
@more_attribs = more_attribs
|
66
|
+
end
|
67
|
+
|
68
|
+
def read()
|
69
|
+
reader = ValuesReader.from_string( @text, @more_attribs )
|
70
|
+
|
71
|
+
reader.each_line do |attribs, values|
|
72
|
+
opts = { skip_tags: skip_tags? }
|
73
|
+
Country.create_or_update_from_attribs( attribs, values, opts )
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
end # class CountryReader
|
78
|
+
end # module WorldDb
|