worlddb-models 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.gemtest +0 -0
  3. data/HISTORY.md +4 -0
  4. data/Manifest.txt +43 -0
  5. data/README.md +85 -0
  6. data/Rakefile +44 -0
  7. data/lib/worlddb/deleter.rb +32 -0
  8. data/lib/worlddb/matcher.rb +143 -0
  9. data/lib/worlddb/models/city.rb +240 -0
  10. data/lib/worlddb/models/city_comp.rb +27 -0
  11. data/lib/worlddb/models/continent.rb +41 -0
  12. data/lib/worlddb/models/continent_comp.rb +24 -0
  13. data/lib/worlddb/models/country.rb +328 -0
  14. data/lib/worlddb/models/country_code.rb +41 -0
  15. data/lib/worlddb/models/country_comp.rb +35 -0
  16. data/lib/worlddb/models/forward.rb +57 -0
  17. data/lib/worlddb/models/lang.rb +18 -0
  18. data/lib/worlddb/models/lang_comp.rb +23 -0
  19. data/lib/worlddb/models/name.rb +13 -0
  20. data/lib/worlddb/models/place.rb +16 -0
  21. data/lib/worlddb/models/region.rb +176 -0
  22. data/lib/worlddb/models/region_comp.rb +26 -0
  23. data/lib/worlddb/models/tagdb/tag.rb +16 -0
  24. data/lib/worlddb/models/tagdb/tagging.rb +15 -0
  25. data/lib/worlddb/models/usage.rb +17 -0
  26. data/lib/worlddb/models.rb +200 -0
  27. data/lib/worlddb/patterns.rb +54 -0
  28. data/lib/worlddb/reader.rb +224 -0
  29. data/lib/worlddb/reader_file.rb +86 -0
  30. data/lib/worlddb/reader_zip.rb +160 -0
  31. data/lib/worlddb/readers/city.rb +81 -0
  32. data/lib/worlddb/readers/country.rb +78 -0
  33. data/lib/worlddb/readers/lang.rb +107 -0
  34. data/lib/worlddb/readers/region.rb +79 -0
  35. data/lib/worlddb/readers/usage.rb +98 -0
  36. data/lib/worlddb/schema.rb +202 -0
  37. data/lib/worlddb/stats.rb +31 -0
  38. data/lib/worlddb/version.rb +23 -0
  39. data/test/helper.rb +26 -0
  40. data/test/test_fixture_matchers.rb +112 -0
  41. data/test/test_model_city.rb +60 -0
  42. data/test/test_model_comp.rb +48 -0
  43. data/test/test_model_country.rb +53 -0
  44. data/test/test_model_region.rb +50 -0
  45. data/test/test_models.rb +35 -0
  46. metadata +252 -0
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+
3
+ module WorldDb
4
+ module Model
5
+
6
+ #############################################################
7
+ # collect depreciated or methods for future removal here
8
+ # - keep for now for commpatibility (for old code)
9
+
10
+ class Continent
11
+
12
+ def title() name; end
13
+ def title=(value) self.name = value; end
14
+
15
+ scope :by_title, ->{ order( 'name asc' ) } # order by title (a-z)
16
+
17
+ def synonyms() alt_names; end
18
+ def synonyms=(value) self.alt_names = value; end
19
+
20
+ end # class Continent
21
+
22
+
23
+ end # module Model
24
+ end # module WorldDb
@@ -0,0 +1,328 @@
1
+ # encoding: utf-8
2
+
3
+ module WorldDb
4
+ module Model
5
+
6
+ ########
7
+ # Country / Supra (e.g. European Union) / Territory (e.g. Puerto Rico) or Dependency (e.g. Dependent territory)
8
+
9
+ class Country < ActiveRecord::Base
10
+
11
+ extend TextUtils::TagHelper # will add self.find_tags, self.find_tags_in_attribs!, etc.
12
+
13
+ # NB: use extend - is_<type>? become class methods e.g. self.is_<type>? for use in
14
+ # self.create_or_update_from_values
15
+ extend TextUtils::ValueHelper # e.g. self.is_year?, self.is_region?, self.is_address?, is_taglist? etc.
16
+
17
+
18
+ self.table_name = 'countries'
19
+
20
+ belongs_to :place, class_name: 'Place', foreign_key: 'place_id'
21
+ belongs_to :continent, class_name: 'Continent', foreign_key: 'continent_id'
22
+
23
+ has_many :usages
24
+ has_many :langs, :through => :usages # lang(uage)s through usages (that is, countries_langs) join table
25
+
26
+ has_many :regions, class_name: 'Region', foreign_key: 'country_id'
27
+ has_many :cities, class_name: 'City', foreign_key: 'country_id'
28
+
29
+ ## self referencing hierachy within countries e.g. EU > GB > EN
30
+ belongs_to :parent, class_name: 'Country', foreign_key: 'country_id'
31
+ has_many :countries, class_name: 'Country', foreign_key: 'country_id'
32
+
33
+ has_many_tags
34
+
35
+ validates :key, format: { with: /#{COUNTRY_KEY_PATTERN}/, message: COUNTRY_KEY_PATTERN_MESSAGE }
36
+ validates :code, format: { with: /#{COUNTRY_CODE_PATTERN}/, message: COUNTRY_CODE_PATTERN_MESSAGE }
37
+
38
+
39
+ scope :by_key, ->{ order( 'key asc' ) } # order by key (a-z)
40
+ scope :by_name, ->{ order( 'name asc' ) } # order by name (a-z)
41
+ scope :by_code, ->{ order( 'code asc' ) } # order by code (a-z)
42
+ scope :by_pop, ->{ order( 'pop desc' ) } # order by pop(ulation)
43
+ scope :by_area, ->{ order( 'area desc') } # order by area (in square km)
44
+
45
+ scope :by_num, ->{ order( 'num asc' ) } # order by numeric country code
46
+ scope :by_alpha2, ->{ order( 'alpha2 asc' ) }
47
+ scope :by_alpha3, ->{ order( 'alpha2 asc' ) }
48
+ scope :by_fifa, ->{ order( 'fifa asc' ) } # football
49
+ scope :by_ioc, ->{ order( 'ioc asc' ) } # olympics
50
+ scope :by_motor, ->{ order( 'motor asc' ) } # designated signs; motor vehicle license plate
51
+ scope :by_net, ->{ order( 'net asc' ) } # internet cc top level domain; ccTLD
52
+
53
+
54
+ before_create :on_create
55
+ before_update :on_update
56
+
57
+
58
+ def on_create
59
+ place_rec = Place.create!( name: name, kind: place_kind )
60
+ self.place_id = place_rec.id
61
+
62
+ if slug.blank?
63
+ ## todo: change and to n (if en/english) ?? - why? why not?
64
+ ## remove subtitles/subnames e.g. () -- why? why not?
65
+
66
+ ## remove translations [] e.g. México [Mexico] -> México etc.
67
+ self.slug = TextUtils.slugify( name.gsub( /\[[^\]]+\]/, '' ) )
68
+ end
69
+ end
70
+
71
+ def on_update
72
+ ## fix/todo: check - if name or kind changed - only update if changed ?? why? why not??
73
+ place.update_attributes!( name: name, kind: place_kind )
74
+
75
+ ## check if name changed -- possible?
76
+ ## update slug too??
77
+ end
78
+
79
+ def place_kind # use place_kind_of_code ??
80
+ if is_supra?
81
+ 'SUPR'
82
+ elsif is_dependency?
83
+ 'TERR'
84
+ elsif is_misc? ## misc(ellaneous) country or dependent territory
85
+ # todo: use different marker?
86
+ # territory w/ shared or disputes claims e.g Antartica/Western Sahara/Paracel Islands pg Spratly Islands/etc.
87
+ 'MISC'
88
+ else
89
+ 'CNTY'
90
+ end
91
+ end
92
+
93
+
94
+ ###
95
+ # NB: use is_ for flags to avoid conflict w/ assocs
96
+
97
+ def is_supra?() s? == true; end
98
+ def is_country?() c? == true; end
99
+ def is_dependency?() d? == true; end
100
+ def is_misc?() m? == true; end
101
+
102
+
103
+ def all_names( opts={} )
104
+ ### fix:
105
+ ## allow to passing in sep or separator e.g. | or other
106
+
107
+ return name if alt_names.blank?
108
+
109
+ buf = ''
110
+ buf << name
111
+ buf << ' | '
112
+ buf << alt_names.split('|').join(' | ')
113
+ buf
114
+ end
115
+
116
+
117
+ def to_path( opts={} )
118
+ # e.g. europe/at-austria
119
+ "#{continent.slug}/#{key}-#{slug}"
120
+ end
121
+
122
+
123
+ def self.search_by_name( q ) ## todo/check: just use search (rename)? why? why not?
124
+
125
+ ## fix: add/configure logger for ActiveRecord!!!
126
+ ## logger = LogKernel::Logger.root
127
+
128
+ name = q.strip
129
+
130
+ ## 1) first try 1:1 (exact) match
131
+ cty = Country.find_by_name( name ) # NOTE: assume AR escapes quotes in name ??
132
+ if cty.nil?
133
+ ## 2) retry: a) remove all (..) enclosed
134
+ ## b) remove all extra spaces (e.g. Cocos (Keeling) Islands => Cocos__Islands => Cocos_Islands)
135
+ name = name.gsub( /\([^)]+\)/, '' ).strip
136
+ name = name.gsub( /[ \t]{2,}/, ' ' )
137
+ cty = Country.find_by_name( name )
138
+
139
+ ### NOTE: escape ' for sql like clause
140
+ ## for now use '' for escapes, that is, double quotes
141
+ ## check - working for postgresql n sqlite??
142
+ name_esc = name.gsub( /'/, "''" )
143
+
144
+ ## 3) retry: use SQL like match
145
+ ## % is used to match *zero* or more occurrences of any characters
146
+ ## todo: check if it matches zero too
147
+ if cty.nil?
148
+ cty = Country.where( "name LIKE '%#{name_esc}%'" ).first
149
+ end
150
+
151
+ ## 4) retry: use SQL like match for alternative names match
152
+ if cty.nil?
153
+ cty = Country.where( "alt_names LIKE '%#{name_esc}%'" ).first
154
+ end
155
+
156
+ ## 5) retry: use SQL like match for historic names match (e.g. Burma for Myanmar etc.)
157
+ ## todo/check: make it optional (pass in opts hash to configure) - why? why not???
158
+ if cty.nil?
159
+ cty = Country.where( "hist_names LIKE '%#{name_esc}%'" ).first
160
+ end
161
+ end
162
+
163
+ cty # return cty (country); nil if not found
164
+ end
165
+
166
+
167
+ def self.create_or_update_from_values( values, more_attribs={} )
168
+
169
+ ## key & title
170
+ ## NB: three-letter code (.e.g AUT) required - enforce in values? why? why not?
171
+ attribs, more_values = find_key_n_title( values )
172
+ attribs = attribs.merge( more_attribs )
173
+
174
+ Country.create_or_update_from_attribs( attribs, more_values )
175
+ end
176
+
177
+
178
+ def self.create_or_update_from_attribs( new_attributes, values, opts={} )
179
+
180
+ ## opts e.g. :skip_tags true|false
181
+
182
+ ## fix: add/configure logger for ActiveRecord!!!
183
+ logger = LogKernel::Logger.root
184
+
185
+ value_numbers = []
186
+ value_tag_keys = []
187
+ value_cities = []
188
+
189
+ ### check for "default" tags - that is, if present new_attributes[:tags] remove from hash
190
+ value_tag_keys += find_tags_in_attribs!( new_attributes )
191
+
192
+
193
+ new_attributes[ :c ] = true # assume country type by default (use supra,depend to change)
194
+
195
+ ## check for optional values
196
+ values.each_with_index do |value,index|
197
+ if match_supra_flag( value ) do |_| # supra(national)
198
+ new_attributes[ :c ] = false # turn off default c|country flag; make it s|supra only
199
+ new_attributes[ :s ] = true
200
+ ## auto-add tag supra
201
+ value_tag_keys << 'supra'
202
+ end
203
+ elsif match_supra( value ) do |country| # supra:
204
+ new_attributes[ :country_id ] = country.id
205
+ end
206
+ elsif match_country( value ) do |country| # country:
207
+ new_attributes[ :country_id ] = country.id
208
+ new_attributes[ :c ] = false # turn off default c|country flag; make it d|depend only
209
+ new_attributes[ :d ] = true
210
+ ## auto-add tag supra
211
+ value_tag_keys << 'territory' # rename tag to dependency? why? why not?
212
+ end
213
+ elsif match_km_squared( value ) do |num| # allow numbers like 453 km²
214
+ value_numbers << num
215
+ end
216
+ elsif match_number( value ) do |num| # numeric (nb: can use any _ or spaces inside digits e.g. 1_000_000 or 1 000 000)
217
+ value_numbers << num
218
+ end
219
+ elsif value =~ /#{COUNTRY_CODE_PATTERN}/ ## three letter code
220
+ new_attributes[ :code ] = value
221
+ elsif (values.size==(index+1)) && is_taglist?( value ) # tags must be last entry
222
+ logger.debug " found tags: >>#{value}<<"
223
+ value_tag_keys += find_tags( value )
224
+ else
225
+
226
+ ### assume it is the capital city - mark it for auto add
227
+ value_cities << value
228
+ next
229
+
230
+ # issue warning: unknown type for value
231
+ # logger.warn "unknown type for value >#{value}<"
232
+ end
233
+ end # each value
234
+
235
+ if value_numbers.size > 0
236
+ new_attributes[ :area ] = value_numbers[0]
237
+ new_attributes[ :pop ] = value_numbers[1]
238
+ end
239
+
240
+ =begin
241
+ # auto-add tags
242
+ area = value_numbers[0]
243
+ pop = value_numbers[1]
244
+
245
+ # categorize into brackets
246
+ if area >= 1_000_000
247
+ value_tag_keys << 'area_1_000_000_n_up'
248
+ elsif area >= 100_000
249
+ value_tag_keys << 'area_100_000_to_1_000_000'
250
+ elsif area >= 1000
251
+ value_tag_keys << 'area_1_000_to_100_000'
252
+ else
253
+ value_tag_keys << 'area_1_000_n_less' # microstate
254
+ end
255
+
256
+ # include all
257
+ value_tag_keys << 'area_100_000_n_up' if area >= 100_000
258
+ value_tag_keys << 'area_1_000_n_up' if area >= 1_000
259
+
260
+
261
+ # categorize into brackets
262
+ if pop >= 100_000_000
263
+ value_tag_keys << 'pop_100m_n_up'
264
+ elsif pop >= 10_000_000
265
+ value_tag_keys << 'pop_10m_to_100m'
266
+ elsif pop >= 1_000_000
267
+ value_tag_keys << 'pop_1m_to_10m'
268
+ else
269
+ value_tag_keys << 'pop_1m_n_less'
270
+ end
271
+
272
+ # include all
273
+ value_tag_keys << 'pop_10m_n_up' if pop >= 10_000_000
274
+ value_tag_keys << 'pop_1m_n_up' if pop >= 1_000_000
275
+ =end
276
+
277
+
278
+ rec = Country.find_by_key( new_attributes[ :key ] )
279
+
280
+ if rec.present?
281
+ logger.debug "update Country #{rec.id}-#{rec.key}:"
282
+ else
283
+ logger.debug "create Country:"
284
+ rec = Country.new
285
+ end
286
+
287
+ logger.debug new_attributes.to_json
288
+
289
+ rec.update_attributes!( new_attributes )
290
+
291
+ #################
292
+ ## auto add capital cities
293
+
294
+ City.create_or_update_from_titles( value_cities, country_id: rec.id )
295
+
296
+ ##################
297
+ ## add taggings
298
+
299
+ if value_tag_keys.size > 0
300
+
301
+ if opts[:skip_tags].present?
302
+ logger.debug " skipping add taggings (flag skip_tag)"
303
+ else
304
+ value_tag_keys.uniq! # remove duplicates
305
+ logger.debug " adding #{value_tag_keys.size} taggings: >>#{value_tag_keys.join('|')}<<..."
306
+
307
+ ### fix/todo: check tag_ids and only update diff (add/remove ids)
308
+
309
+ value_tag_keys.each do |key|
310
+ tag = Tag.find_by_key( key )
311
+ if tag.nil? # create tag if it doesn't exit
312
+ logger.debug " creating tag >#{key}<"
313
+ tag = Tag.create!( key: key )
314
+ end
315
+ rec.tags << tag
316
+ end
317
+ end
318
+ end
319
+
320
+ rec
321
+
322
+ end # method create_or_update_from_values
323
+
324
+
325
+ end # class Country
326
+
327
+ end # module Model
328
+ end # module WorldDb
@@ -0,0 +1,41 @@
1
+ # encoding: utf-8
2
+
3
+ module WorldDb
4
+ module Model
5
+
6
+ class CountryCode < ActiveRecord::Base
7
+
8
+ self.table_name = 'country_codes'
9
+
10
+ belongs_to :country, class_name: 'Country', foreign_key: 'country_id'
11
+
12
+
13
+ scope :by_name, ->{ order( 'name asc' ) } # order by name (a-z)
14
+
15
+
16
+ def self.update!
17
+ ## update (auto-create) country codes from existing countries in db
18
+
19
+ ## fix: add/configure logger for ActiveRecord!!!
20
+ logger = LogKernel::Logger.root
21
+
22
+ logger.debug( "delete all (old) country codes" )
23
+ CountryCode.delete_all
24
+
25
+ Country.order(:id).each do |cty|
26
+ logger.debug( "add country #{cty.key} #{cty.name}" )
27
+ CountryCode.create!( country_id: cty.id, kind: 'NET', name: cty.net ) unless cty.net.nil?
28
+ CountryCode.create!( country_id: cty.id, kind: 'NUM', name: cty.num ) unless cty.num.nil?
29
+ CountryCode.create!( country_id: cty.id, kind: 'A2', name: cty.alpha2 ) unless cty.alpha2.nil?
30
+ CountryCode.create!( country_id: cty.id, kind: 'A3', name: cty.alpha3 ) unless cty.alpha3.nil?
31
+ CountryCode.create!( country_id: cty.id, kind: 'FIFA', name: cty.fifa ) unless cty.fifa.nil?
32
+ CountryCode.create!( country_id: cty.id, kind: 'IOC', name: cty.ioc ) unless cty.ioc.nil?
33
+ CountryCode.create!( country_id: cty.id, kind: 'FIPS', name: cty.fips ) unless cty.fips.nil?
34
+ CountryCode.create!( country_id: cty.id, kind: 'M', name: cty.motor ) unless cty.motor.nil?
35
+ end
36
+ end
37
+
38
+ end # class CountryCode
39
+
40
+ end # module Model
41
+ end # module WorldDb
@@ -0,0 +1,35 @@
1
+ # encoding: utf-8
2
+
3
+ module WorldDb
4
+ module Model
5
+
6
+ #############################################################
7
+ # collect depreciated or methods for future removal here
8
+ # - keep for now for commpatibility (for old code)
9
+
10
+
11
+ class Country
12
+
13
+ def title() name; end
14
+ def title=(value) self.name = value; end
15
+
16
+ scope :by_title, ->{ order( 'name asc' ) } # order by title (a-z)
17
+
18
+ def iso2() alpha2; end
19
+ def iso2=(value) self.alpha2 = value; end
20
+
21
+ def iso3() alpha3; end
22
+ def iso3=(value) self.alpha3 = value; end
23
+
24
+
25
+ def synonyms() alt_names; end
26
+ def synonyms=(value) self.alt_names = value; end
27
+
28
+ def title_w_synonyms( opts={} ) all_names( opts ); end # depreciated: use all_names instead
29
+
30
+
31
+ end # class Country
32
+
33
+ end # module Model
34
+ end # module WorldDb
35
+
@@ -0,0 +1,57 @@
1
+
2
+ ### forward references
3
+ ## require first to resolve circular references
4
+
5
+ module WorldDb
6
+ module Model
7
+
8
+ #############
9
+ # ConfDb
10
+ Prop = ConfDb::Model::Prop
11
+
12
+ ###########
13
+ # TagDb
14
+ Tagging = TagDb::Model::Tagging
15
+ Tag = TagDb::Model::Tag
16
+
17
+
18
+ class Name < ActiveRecord::Base ; end
19
+ class Place < ActiveRecord::Base ; end
20
+ class Continent < ActiveRecord::Base ; end
21
+ class Country < ActiveRecord::Base ; end
22
+ class Region < ActiveRecord::Base ; end
23
+ class City < ActiveRecord::Base ; end
24
+
25
+ class Lang < ActiveRecord::Base ; end
26
+ class Usage < ActiveRecord::Base ; end
27
+
28
+ class CountryCode < ActiveRecord::Base ; end
29
+
30
+ end
31
+
32
+ # note: convenience alias for Model
33
+ # lets you use include WorldDb::Models
34
+ Models = Model
35
+ end # module # WorldDb
36
+
37
+
38
+ module TagDb
39
+ module Model
40
+
41
+ # add alias? why? why not? # is there a better way?
42
+ # - just include WorldDb::Models - why? why not?
43
+
44
+ Name = WorldDb::Model::Name
45
+ Place = WorldDb::Model::Place
46
+ Continent = WorldDb::Model::Continent
47
+ Country = WorldDb::Model::Country
48
+ Region = WorldDb::Model::Region
49
+ City = WorldDb::Model::City
50
+
51
+ Lang = WorldDb::Model::Lang
52
+ Usage = WorldDb::Model::Usage
53
+
54
+ CountryCode = WorldDb::Model::CountryCode
55
+
56
+ end
57
+ end
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+
3
+ module WorldDb
4
+ module Model
5
+
6
+ class Lang < ActiveRecord::Base
7
+
8
+ has_many :usages # join table for countries_langs
9
+
10
+ has_many :countries, :through => :usages
11
+
12
+ validates :key, format: { with: /#{LANG_KEY_PATTERN}/, message: LANG_KEY_PATTERN_MESSAGE }
13
+
14
+ end # class Lang
15
+
16
+ end # module Model
17
+ end # module WorldDb
18
+
@@ -0,0 +1,23 @@
1
+ # encoding: utf-8
2
+
3
+ module WorldDb
4
+ module Model
5
+
6
+ #############################################################
7
+ # collect depreciated or methods for future removal here
8
+ # - keep for now for commpatibility (for old code)
9
+
10
+
11
+ class Lang
12
+
13
+ #####################################################
14
+ # alias for name (remove! add depreciated api call ???)
15
+ def title() name; end
16
+ def title=(value) self.name = value; end
17
+
18
+ scope :by_title, ->{ order( 'name asc' ) } # order by title (a-z)
19
+
20
+ end # class Lang
21
+
22
+ end # module Model
23
+ end # module WorldDb
@@ -0,0 +1,13 @@
1
+ # encoding: utf-8
2
+
3
+ module WorldDb
4
+ module Model
5
+
6
+ class Name < ActiveRecord::Base
7
+
8
+
9
+ end # class Name
10
+
11
+
12
+ end # module Model
13
+ end # module WorldDb
@@ -0,0 +1,16 @@
1
+ # encoding: utf-8
2
+
3
+ module WorldDb
4
+ module Model
5
+
6
+ class Place < ActiveRecord::Base
7
+
8
+ ## todo: depending on type
9
+ ## has_one continent, country, region, city etc.
10
+
11
+ end # class Place
12
+
13
+
14
+ end # module Model
15
+ end # module WorldDb
16
+