worlddb-models 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.gemtest +0 -0
  3. data/HISTORY.md +4 -0
  4. data/Manifest.txt +43 -0
  5. data/README.md +85 -0
  6. data/Rakefile +44 -0
  7. data/lib/worlddb/deleter.rb +32 -0
  8. data/lib/worlddb/matcher.rb +143 -0
  9. data/lib/worlddb/models/city.rb +240 -0
  10. data/lib/worlddb/models/city_comp.rb +27 -0
  11. data/lib/worlddb/models/continent.rb +41 -0
  12. data/lib/worlddb/models/continent_comp.rb +24 -0
  13. data/lib/worlddb/models/country.rb +328 -0
  14. data/lib/worlddb/models/country_code.rb +41 -0
  15. data/lib/worlddb/models/country_comp.rb +35 -0
  16. data/lib/worlddb/models/forward.rb +57 -0
  17. data/lib/worlddb/models/lang.rb +18 -0
  18. data/lib/worlddb/models/lang_comp.rb +23 -0
  19. data/lib/worlddb/models/name.rb +13 -0
  20. data/lib/worlddb/models/place.rb +16 -0
  21. data/lib/worlddb/models/region.rb +176 -0
  22. data/lib/worlddb/models/region_comp.rb +26 -0
  23. data/lib/worlddb/models/tagdb/tag.rb +16 -0
  24. data/lib/worlddb/models/tagdb/tagging.rb +15 -0
  25. data/lib/worlddb/models/usage.rb +17 -0
  26. data/lib/worlddb/models.rb +200 -0
  27. data/lib/worlddb/patterns.rb +54 -0
  28. data/lib/worlddb/reader.rb +224 -0
  29. data/lib/worlddb/reader_file.rb +86 -0
  30. data/lib/worlddb/reader_zip.rb +160 -0
  31. data/lib/worlddb/readers/city.rb +81 -0
  32. data/lib/worlddb/readers/country.rb +78 -0
  33. data/lib/worlddb/readers/lang.rb +107 -0
  34. data/lib/worlddb/readers/region.rb +79 -0
  35. data/lib/worlddb/readers/usage.rb +98 -0
  36. data/lib/worlddb/schema.rb +202 -0
  37. data/lib/worlddb/stats.rb +31 -0
  38. data/lib/worlddb/version.rb +23 -0
  39. data/test/helper.rb +26 -0
  40. data/test/test_fixture_matchers.rb +112 -0
  41. data/test/test_model_city.rb +60 -0
  42. data/test/test_model_comp.rb +48 -0
  43. data/test/test_model_country.rb +53 -0
  44. data/test/test_model_region.rb +50 -0
  45. data/test/test_models.rb +35 -0
  46. metadata +252 -0
@@ -0,0 +1,224 @@
1
+ # encoding: UTF-8
2
+
3
+ module WorldDb
4
+
5
+
6
+ class ReaderBase
7
+
8
+ include LogUtils::Logging
9
+
10
+ ## make models available in sportdb module by default with namespace
11
+ # e.g. lets you use City instead of Models::City
12
+ include Models
13
+ include Matcher # e.g. match_cities_for_country, match_regions_for_country, etc.
14
+
15
+ ## value helpers e.g. is_year?, is_taglist? etc.
16
+ include TextUtils::ValueHelper
17
+
18
+
19
+ def skip_tags?() @skip_tags == true; end
20
+ def strict?() @strict == true; end
21
+
22
+
23
+ def initialize( opts={} )
24
+ ## option: do NOT generate/add any tags for countries/regions/cities
25
+ @skip_tags = opts[:skip_tags].present? ? true : false
26
+ ## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
27
+ @strict = opts[:strict].present? ? true : false
28
+ end
29
+
30
+
31
+ def load_setup( name )
32
+ reader = create_fixture_reader( name )
33
+
34
+ reader.each do |fixture|
35
+ load( fixture )
36
+ end
37
+ end # method load_setup
38
+
39
+
40
+ def load( name )
41
+
42
+ if name =~ /^continents/
43
+ load_continent_defs( name )
44
+ elsif name =~ /\/continents/
45
+ load_continent_refs( name )
46
+ elsif name =~ /^lang/
47
+ ## todo: pass along opts too
48
+ ## use match_usage( name ) - why? why not?? ???
49
+ r = create_lang_reader( name )
50
+ r.read()
51
+ elsif name =~ /\/lang/
52
+ ## todo: pass along opts too
53
+ ## use match_usage( name ) - why? why not?? ???
54
+ r = create_usage_reader( name )
55
+ r.read()
56
+ elsif name =~ /\/fifa/ ||
57
+ name =~ /\/fips/ ||
58
+ name =~ /\/internet/ ||
59
+ name =~ /\/ioc/ ||
60
+ name =~ /\/iso/ ||
61
+ name =~ /\/motor/
62
+ load_codes( name )
63
+ elsif name =~ /^tag.*\.\d$/
64
+ ## todo: pass along opts too
65
+ ## use match_tags( name ) - why? why not?? ???
66
+
67
+ ######## FIX: add back again
68
+ ### fix: use read() only, that is, w/o name
69
+ ## r = create_tag_reader( name )
70
+ ## r.read()
71
+ elsif match_countries_for_continent( name ) do |continent| # # e.g. africa/countries or america/countries
72
+ ### NB: continent changed to regions (e.g. middle-east, caribbean, north-america, etc.)
73
+ ## auto-add continent (from folder structure) as tag
74
+ ## fix: allow dash/hyphen/minus in tag
75
+
76
+ ### todo/fix: add opts - how??
77
+ r = create_country_reader( name, tags: continent.tr('-', '_') )
78
+ r.read()
79
+ end
80
+ elsif match_cities_for_country( name ) do |country_key| # name =~ /\/([a-z]{2})\/cities/
81
+ ## auto-add required country code (from folder structure)
82
+ country = Country.find_by_key!( country_key )
83
+ logger.debug "Country #{country.key} >#{country.title} (#{country.code})<"
84
+
85
+ r = create_city_reader( name, country_id: country.id )
86
+ r.read()
87
+ end
88
+ elsif match_regions_abbr_for_country( name ) do |country_key| # name =~ /\/([a-z]{2})\/regions\.abbr/
89
+ load_regions_xxx( country_key, 'abbr', name )
90
+ end
91
+ elsif match_regions_iso_for_country( name ) do |country_key| # name =~ /\/([a-z]{2})\/regions\.iso/
92
+ load_regions_xxx( country_key, 'iso', name )
93
+ end
94
+ elsif match_regions_nuts_for_country( name ) do |country_key| # name =~ /\/([a-z]{2})\/regions\.nuts/
95
+ load_regions_xxx( country_key, 'nuts', name )
96
+ end
97
+ elsif match_regions_for_country( name ) do |country_key| # name =~ /\/([a-z]{2})\/regions/
98
+ ## auto-add required country code (from folder structure)
99
+ country = Country.find_by_key!( country_key )
100
+ logger.debug "Country #{country.key} >#{country.title} (#{country.code})<"
101
+
102
+ r = create_region_reader( name, country_id: country.id )
103
+ r.read()
104
+ end
105
+ else
106
+ logger.error "unknown world.db fixture type >#{name}<"
107
+ # todo/fix: exit w/ error
108
+ end
109
+ end
110
+
111
+
112
+ ### use RegionAttrReader
113
+ def load_regions_xxx( country_key, xxx, name )
114
+ country = Country.find_by_key!( country_key )
115
+ logger.debug "Country #{country.key} >#{country.title} (#{country.code})<"
116
+
117
+ reader = create_hash_reader( name )
118
+
119
+ reader.each do |key, value|
120
+ region = Region.find_by_country_id_and_key!( country.id, key )
121
+ region.send( "#{xxx}=", value )
122
+ region.save!
123
+ end
124
+ end
125
+
126
+
127
+ ### use ContinentRefReader
128
+ def load_continent_refs( name )
129
+ reader = create_hash_reader( name )
130
+
131
+ reader.each do |key, value|
132
+ country = Country.find_by_key!( key )
133
+ continent = Continent.find_by_key!( value )
134
+ country.continent_id = continent.id
135
+ country.save!
136
+ end
137
+ end
138
+
139
+ ### use ContinentDef Reader
140
+ def load_continent_defs( name, more_attribs={} )
141
+ reader = create_values_reader( name, more_attribs )
142
+
143
+ reader.each_line do |attribs, values|
144
+
145
+ ## check optional values
146
+ values.each_with_index do |value, index|
147
+ logger.warn "unknown type for value >#{value}<"
148
+ end
149
+
150
+ rec = Continent.find_by_key( attribs[ :key ] )
151
+ if rec.present?
152
+ logger.debug "update Continent #{rec.id}-#{rec.key}:"
153
+ else
154
+ logger.debug "create Continent:"
155
+ rec = Continent.new
156
+ end
157
+
158
+ logger.debug attribs.to_json
159
+
160
+ rec.update_attributes!( attribs )
161
+
162
+ end # each lines
163
+ end # load_continent_defs
164
+
165
+
166
+ def load_codes( name )
167
+ reader = create_line_reader( name )
168
+
169
+ reader.each_line do |line|
170
+
171
+ values = line.split(',')
172
+
173
+ ## logger.debug '[>' + values.join( '<|>' ) + '<]'
174
+
175
+ if name =~ /iso/
176
+ # special case for iso
177
+ # country ref, alpha2, alpha3, num
178
+ country_name = values[0].strip
179
+ else
180
+ # code, country ref
181
+ country_name = values[1].strip
182
+ end
183
+
184
+ ## try to find country
185
+ cty = Country.search_by_name( country_name )
186
+
187
+ if cty.nil?
188
+ logger.warn "no country match found for >#{country_name}<; skipping line; in [#{name}]"
189
+ next
190
+ end
191
+
192
+ if name =~ /\/fifa/
193
+ cty.fifa = values[0].strip
194
+ elsif name =~ /\/fips/
195
+ cty.fips = values[0].strip
196
+ elsif name =~ /\/internet/
197
+ # NOTE: remove (optional) leading . e.g. .at becomes at
198
+ cty.net = values[0].sub( /^\s*\./,'' ).strip
199
+ elsif name =~ /\/ioc/
200
+ cty.ioc = values[0].strip
201
+ elsif name =~ /\/motor/
202
+ cty.motor = values[0].strip
203
+ elsif name =~ /\/iso/
204
+ cty.alpha2 = values[1].strip
205
+ cty.alpha3 = values[2].strip
206
+ # NOTE: num is a string!!! use (rename to) num_str - why? why not?
207
+ cty.num = values[3].strip
208
+ else
209
+ logger.warn "warn: unknown country code type; skipping line; in [#{name}]"
210
+ next
211
+ end
212
+
213
+ cty.save!
214
+ end
215
+ end # method load_codes
216
+
217
+
218
+ ####
219
+ # helper methods
220
+ ## todo: also add City.search_by_name etc. !!!
221
+
222
+
223
+ end # class ReaderBase
224
+ end # module WorldDb
@@ -0,0 +1,86 @@
1
+ # encoding: UTF-8
2
+
3
+ module WorldDb
4
+
5
+ class Reader < ReaderBase
6
+
7
+ def initialize( include_path, opts={} )
8
+ super( opts )
9
+
10
+ @include_path = include_path
11
+ end
12
+
13
+
14
+ def create_fixture_reader( name )
15
+ path = "#{@include_path}/#{name}.txt"
16
+ logger.info "parsing data (setup) '#{name}' (#{path})..."
17
+
18
+ FixtureReader.from_file( path )
19
+ end
20
+
21
+ def create_lang_reader( name )
22
+ path = "#{@include_path}/#{name}.yml" ## hash reader - use .yml??
23
+ logger.info "parsing data (lang) '#{name}' (#{path})..."
24
+
25
+ LangReader.from_file( path )
26
+ end
27
+
28
+ def create_usage_reader( name )
29
+ path = "#{@include_path}/#{name}.yml" ## hash reader - use .yml??
30
+ logger.info "parsing data (usage) '#{name}' (#{path})..."
31
+
32
+ UsageReader.from_file( path )
33
+ end
34
+
35
+
36
+ def create_country_reader( name, more_attribs={} )
37
+ path = "#{@include_path}/#{name}.txt"
38
+ logger.info "parsing data (country) '#{name}' (#{path})..."
39
+
40
+ CountryReader.from_file( path, more_attribs )
41
+ end
42
+
43
+ def create_region_reader( name, more_attribs={} )
44
+ path = "#{@include_path}/#{name}.txt"
45
+ logger.info "parsing data (region) '#{name}' (#{path})..."
46
+
47
+ RegionReader.from_file( path, more_attribs )
48
+ end
49
+
50
+ def create_city_reader( name, more_attribs={} )
51
+ path = "#{@include_path}/#{name}.txt"
52
+ logger.info "parsing data (city) '#{name}' (#{path})..."
53
+
54
+ CityReader.from_file( path, more_attribs )
55
+ end
56
+
57
+
58
+ def create_hash_reader( name )
59
+ path = "#{@include_path}/#{name}.yml"
60
+ logger.info "parsing data (hash) '#{name}' (#{path})..."
61
+
62
+ HashReader.from_file( path )
63
+ end
64
+
65
+ def create_values_reader( name, more_attribs={} )
66
+ path = "#{@include_path}/#{name}.txt"
67
+ logger.info "parsing data (values) '#{name}' (#{path})..."
68
+
69
+ ValuesReader.from_file( path, more_attribs )
70
+ end
71
+
72
+ def create_line_reader( name )
73
+ path = "#{@include_path}/#{name}.txt"
74
+ logger.info "parsing data (line) '#{name}' (#{path})..."
75
+
76
+ LineReader.from_file( path )
77
+ end
78
+
79
+ # def create_tag_reader( name )
80
+ # ## fix: change to new from_file() style
81
+ # TagDb::TagReader.new( @include_path )
82
+ # end
83
+
84
+
85
+ end # class Reader
86
+ end # module WorldDb
@@ -0,0 +1,160 @@
1
+ # encoding: UTF-8
2
+
3
+ module WorldDb
4
+
5
+ class ZipReader < ReaderBase
6
+
7
+
8
+ def initialize( name, include_path, opts = {} )
9
+ super( opts )
10
+
11
+ ## todo/fix: make include_path an opts (included in opts?) - why? why not??
12
+ path = "#{include_path}/#{name}.zip"
13
+
14
+ ## todo: check if zip exists
15
+ @zip_file = Zip::File.open( path ) ## NOTE: do NOT create if file is missing; let it crash
16
+
17
+ ### allow prefix (path) in name
18
+ ### e.g. assume all files relative to setup manifest
19
+ ## e.g. at-austria-master/setups/all.txt or
20
+ ## be-belgium-master/setups/all.txt
21
+ ## for
22
+ ## setups/all.txt
23
+ ###
24
+ ## will get (re)set w/ fixture/setup reader
25
+ ##
26
+ ## todo/fix: change/rename to @relative_path ?? - why? why not?
27
+ @zip_prefix = ''
28
+ end
29
+
30
+ def close
31
+ ## todo/check: add a close method - why? why not ???
32
+ @zip_file.close
33
+ end
34
+
35
+
36
+
37
+ def create_fixture_reader( name )
38
+ ## e.g. pass in => setups/all or setups/test etc. e.g. w/o .txt extension
39
+ query = "**/#{name}.txt"
40
+
41
+ ## note: returns an array of Zip::Entry
42
+ candidates = @zip_file.glob( query )
43
+ pp candidates
44
+
45
+ ## use first candidates entry as match
46
+ ## todo/fix: issue warning if more than one entries/matches!!
47
+
48
+ ## get fullpath e.g. at-austria-master/setups/all.txt
49
+ path = candidates[0].name
50
+ logger.debug " zip entry path >>#{path}<<"
51
+
52
+ ## cut-off at-austria-master/ NOTE: includes trailing slash (if present)
53
+ ## logger.debug " path.size #{path.size} >>#{path}<<"
54
+ ## logger.debug " name.size #{name.size+4} >>#{name}<<"
55
+
56
+ ## note: add +4 for extension (.txt)
57
+ @zip_prefix = path[ 0...(path.size-(name.size+4)) ]
58
+ logger.debug " zip entry prefix >>#{@zip_prefix}<<"
59
+
60
+ logger.info "parsing data (setup) in zip '#{name}' (#{path})..."
61
+
62
+ FixtureReader.from_zip( @zip_file, path )
63
+ end
64
+
65
+
66
+ def create_lang_reader( name )
67
+ path = name_to_zip_entry_path( name, '.yml' ) ## hash reader - use .yml??
68
+ logger.info "parsing data (lang) in zip '#{name}' (#{path})..."
69
+
70
+ LangReader.from_zip( @zip_file, path )
71
+ end
72
+
73
+ def create_usage_reader( name )
74
+ path = name_to_zip_entry_path( name, '.yml' ) ## hash reader - use .yml??
75
+ logger.info "parsing data (usage) in zip '#{name}' (#{path})..."
76
+
77
+ UsageReader.from_zip( @zip_file, path )
78
+ end
79
+
80
+
81
+ def create_country_reader( name, more_attribs={} )
82
+ path = name_to_zip_entry_path( name )
83
+ logger.info "parsing data (country) in zip '#{name}' (#{path})..."
84
+
85
+ CountryReader.from_zip( @zip_file, path, more_attribs )
86
+ end
87
+
88
+ def create_region_reader( name, more_attribs={} )
89
+ path = name_to_zip_entry_path( name )
90
+ logger.info "parsing data (region) in zip '#{name}' (#{path})..."
91
+
92
+ RegionReader.from_zip( @zip_file, path, more_attribs )
93
+ end
94
+
95
+ def create_city_reader( name, more_attribs={} )
96
+ path = name_to_zip_entry_path( name )
97
+ logger.info "parsing data (city) in zip '#{name}' (#{path})..."
98
+
99
+ CityReader.from_zip( @zip_file, path, more_attribs )
100
+ end
101
+
102
+
103
+ def create_hash_reader( name )
104
+ path = name_to_zip_entry_path( name, '.yml' ) ## hash reader - use .yml??
105
+ logger.info "parsing data (hash) in zip '#{name}' (#{path})..."
106
+
107
+ HashReader.from_zip( @zip_file, path )
108
+ end
109
+
110
+ def create_values_reader( name, more_attribs={} )
111
+ path = name_to_zip_entry_path( name )
112
+ logger.info "parsing data (values) in zip '#{name}' (#{path})..."
113
+
114
+ ValuesReader.from_zip( @zip_file, path, more_attribs )
115
+ end
116
+
117
+ def create_line_reader( name )
118
+ path = name_to_zip_entry_path( name )
119
+ logger.info "parsing data (line) in zip '#{name}' (#{path})..."
120
+
121
+ LineReader.from_zip( @zip_file, path )
122
+ end
123
+
124
+ # def create_tag_reader( name )
125
+ # ## fix: change to new from_file() style
126
+ # TagDb::TagReader.new( @include_path )
127
+ # end
128
+
129
+ private
130
+
131
+ def path_to_real_path( path )
132
+ # map name to name_real_path
133
+ # name might include !/ for virtual path (gets cut off)
134
+ # e.g. at-austria!/w-wien/beers becomse w-wien/beers
135
+ pos = path.index( '!/')
136
+ if pos.nil?
137
+ path # not found; real path is the same as name
138
+ else
139
+ # cut off everything until !/ e.g.
140
+ # at-austria!/w-wien/beers becomes
141
+ # w-wien/beers
142
+ path[ (pos+2)..-1 ]
143
+ end
144
+ end
145
+
146
+ def name_to_zip_entry_path( name, extension='.txt' )
147
+ path = "#{name}#{extension}"
148
+
149
+ real_path = path_to_real_path( path )
150
+
151
+ # NOTE: add possible zip entry prefix path
152
+ # (if present includes trailing slash e.g. /)
153
+ entry_path = "#{@zip_prefix}#{real_path}"
154
+ entry_path
155
+ end
156
+
157
+
158
+
159
+ end # class ZipReader
160
+ end # module WorldDb
@@ -0,0 +1,81 @@
1
+ # encoding: UTF-8
2
+
3
+ module WorldDb
4
+
5
+ class CityReader
6
+
7
+ include LogUtils::Logging
8
+
9
+ ## make models available by default with namespace
10
+ # e.g. lets you use Usage instead of Model::Usage
11
+ include Models
12
+
13
+ ## value helpers e.g. is_year?, is_taglist? etc.
14
+ include TextUtils::ValueHelper
15
+
16
+
17
+ def self.from_zip( zip_file, entry_path, more_attribs={} )
18
+ ## get text content from zip
19
+
20
+ entry = zip_file.find_entry( entry_path )
21
+
22
+ ## todo/fix: add force encoding to utf-8 ??
23
+ ## check!!!
24
+ ## clean/prepprocess lines
25
+ ## e.g. CR/LF (/r/n) to LF (e.g. /n)
26
+ text = entry.get_input_stream().read()
27
+
28
+ ## NOTE: needs logger ref; only available in instance methods; use global logger for now
29
+ logger = LogUtils::Logger.root
30
+ logger.debug "text.encoding.name (before): #{text.encoding.name}"
31
+ #####
32
+ # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
33
+ ## NB:
34
+ # for now "hardcoded" to utf8 - what else can we do?
35
+ # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
36
+ text = text.force_encoding( Encoding::UTF_8 )
37
+ logger.debug "text.encoding.name (after): #{text.encoding.name}"
38
+
39
+ ## todo:
40
+ # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
41
+ ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
42
+
43
+ self.from_string( text, more_attribs )
44
+ end
45
+
46
+
47
+ def self.from_file( path, more_attribs={} )
48
+ ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
49
+ ## - see textutils/utils.rb
50
+ text = File.read_utf8( path )
51
+ self.from_string( text, more_attribs )
52
+ end
53
+
54
+ def self.from_string( text, more_attribs={} )
55
+ CityReader.new( text, more_attribs )
56
+ end
57
+
58
+
59
+ def skip_tags?() @skip_tags == true; end
60
+ def strict?() @strict == true; end
61
+
62
+ def initialize( text, more_attribs={} )
63
+ ## todo/fix: how to add opts={} ???
64
+
65
+ @text = text
66
+ @more_attribs = more_attribs
67
+ end
68
+
69
+
70
+ def read()
71
+ reader = ValuesReader.from_string( @text, @more_attribs )
72
+
73
+ reader.each_line do |attribs, values|
74
+ opts = { skip_tags: skip_tags? }
75
+ City.create_or_update_from_attribs( attribs, values, opts )
76
+ end
77
+ end
78
+
79
+ end # class CityReader
80
+ end # module WorldDb
81
+
@@ -0,0 +1,78 @@
1
+ # encoding: UTF-8
2
+
3
+ module WorldDb
4
+
5
+ class CountryReader
6
+
7
+ include LogUtils::Logging
8
+
9
+ ## make models available by default with namespace
10
+ # e.g. lets you use Usage instead of Model::Usage
11
+ include Models
12
+
13
+ ## value helpers e.g. is_year?, is_taglist? etc.
14
+ include TextUtils::ValueHelper
15
+
16
+
17
+ def self.from_zip( zip_file, entry_path, more_attribs={} )
18
+ ## get text content from zip
19
+
20
+ entry = zip_file.find_entry( entry_path )
21
+
22
+ ## todo/fix: add force encoding to utf-8 ??
23
+ ## check!!!
24
+ ## clean/prepprocess lines
25
+ ## e.g. CR/LF (/r/n) to LF (e.g. /n)
26
+ text = entry.get_input_stream().read()
27
+
28
+ ## NOTE: needs logger ref; only available in instance methods; use global logger for now
29
+ logger = LogUtils::Logger.root
30
+ logger.debug "text.encoding.name (before): #{text.encoding.name}"
31
+ #####
32
+ # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
33
+ ## NB:
34
+ # for now "hardcoded" to utf8 - what else can we do?
35
+ # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
36
+ text = text.force_encoding( Encoding::UTF_8 )
37
+ logger.debug "text.encoding.name (after): #{text.encoding.name}"
38
+
39
+ ## todo:
40
+ # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
41
+ ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
42
+
43
+ self.from_string( text, more_attribs )
44
+ end
45
+
46
+ def self.from_file( path, more_attribs={} )
47
+ ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
48
+ ## - see textutils/utils.rb
49
+ text = File.read_utf8( path )
50
+ self.from_string( text, more_attribs )
51
+ end
52
+
53
+ def self.from_string( text, more_attribs={} )
54
+ CountryReader.new( text, more_attribs )
55
+ end
56
+
57
+
58
+ def skip_tags?() @skip_tags == true; end
59
+ def strict?() @strict == true; end
60
+
61
+ def initialize( text, more_attribs={} )
62
+ ## todo/fix: how to add opts={} ???
63
+
64
+ @text = text
65
+ @more_attribs = more_attribs
66
+ end
67
+
68
+ def read()
69
+ reader = ValuesReader.from_string( @text, @more_attribs )
70
+
71
+ reader.each_line do |attribs, values|
72
+ opts = { skip_tags: skip_tags? }
73
+ Country.create_or_update_from_attribs( attribs, values, opts )
74
+ end
75
+ end
76
+
77
+ end # class CountryReader
78
+ end # module WorldDb