worlddb-models 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.gemtest +0 -0
  3. data/HISTORY.md +4 -0
  4. data/Manifest.txt +43 -0
  5. data/README.md +85 -0
  6. data/Rakefile +44 -0
  7. data/lib/worlddb/deleter.rb +32 -0
  8. data/lib/worlddb/matcher.rb +143 -0
  9. data/lib/worlddb/models/city.rb +240 -0
  10. data/lib/worlddb/models/city_comp.rb +27 -0
  11. data/lib/worlddb/models/continent.rb +41 -0
  12. data/lib/worlddb/models/continent_comp.rb +24 -0
  13. data/lib/worlddb/models/country.rb +328 -0
  14. data/lib/worlddb/models/country_code.rb +41 -0
  15. data/lib/worlddb/models/country_comp.rb +35 -0
  16. data/lib/worlddb/models/forward.rb +57 -0
  17. data/lib/worlddb/models/lang.rb +18 -0
  18. data/lib/worlddb/models/lang_comp.rb +23 -0
  19. data/lib/worlddb/models/name.rb +13 -0
  20. data/lib/worlddb/models/place.rb +16 -0
  21. data/lib/worlddb/models/region.rb +176 -0
  22. data/lib/worlddb/models/region_comp.rb +26 -0
  23. data/lib/worlddb/models/tagdb/tag.rb +16 -0
  24. data/lib/worlddb/models/tagdb/tagging.rb +15 -0
  25. data/lib/worlddb/models/usage.rb +17 -0
  26. data/lib/worlddb/models.rb +200 -0
  27. data/lib/worlddb/patterns.rb +54 -0
  28. data/lib/worlddb/reader.rb +224 -0
  29. data/lib/worlddb/reader_file.rb +86 -0
  30. data/lib/worlddb/reader_zip.rb +160 -0
  31. data/lib/worlddb/readers/city.rb +81 -0
  32. data/lib/worlddb/readers/country.rb +78 -0
  33. data/lib/worlddb/readers/lang.rb +107 -0
  34. data/lib/worlddb/readers/region.rb +79 -0
  35. data/lib/worlddb/readers/usage.rb +98 -0
  36. data/lib/worlddb/schema.rb +202 -0
  37. data/lib/worlddb/stats.rb +31 -0
  38. data/lib/worlddb/version.rb +23 -0
  39. data/test/helper.rb +26 -0
  40. data/test/test_fixture_matchers.rb +112 -0
  41. data/test/test_model_city.rb +60 -0
  42. data/test/test_model_comp.rb +48 -0
  43. data/test/test_model_country.rb +53 -0
  44. data/test/test_model_region.rb +50 -0
  45. data/test/test_models.rb +35 -0
  46. metadata +252 -0
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+
3
+ module WorldDb
4
+ module Model
5
+
6
+ #############################################################
7
+ # collect depreciated or methods for future removal here
8
+ # - keep for now for commpatibility (for old code)
9
+
10
+ class Continent
11
+
12
+ def title() name; end
13
+ def title=(value) self.name = value; end
14
+
15
+ scope :by_title, ->{ order( 'name asc' ) } # order by title (a-z)
16
+
17
+ def synonyms() alt_names; end
18
+ def synonyms=(value) self.alt_names = value; end
19
+
20
+ end # class Continent
21
+
22
+
23
+ end # module Model
24
+ end # module WorldDb
@@ -0,0 +1,328 @@
1
+ # encoding: utf-8
2
+
3
+ module WorldDb
4
+ module Model
5
+
6
+ ########
7
+ # Country / Supra (e.g. European Union) / Territory (e.g. Puerto Rico) or Dependency (e.g. Dependent territory)
8
+
9
+ class Country < ActiveRecord::Base
10
+
11
+ extend TextUtils::TagHelper # will add self.find_tags, self.find_tags_in_attribs!, etc.
12
+
13
+ # NB: use extend - is_<type>? become class methods e.g. self.is_<type>? for use in
14
+ # self.create_or_update_from_values
15
+ extend TextUtils::ValueHelper # e.g. self.is_year?, self.is_region?, self.is_address?, is_taglist? etc.
16
+
17
+
18
+ self.table_name = 'countries'
19
+
20
+ belongs_to :place, class_name: 'Place', foreign_key: 'place_id'
21
+ belongs_to :continent, class_name: 'Continent', foreign_key: 'continent_id'
22
+
23
+ has_many :usages
24
+ has_many :langs, :through => :usages # lang(uage)s through usages (that is, countries_langs) join table
25
+
26
+ has_many :regions, class_name: 'Region', foreign_key: 'country_id'
27
+ has_many :cities, class_name: 'City', foreign_key: 'country_id'
28
+
29
+ ## self referencing hierachy within countries e.g. EU > GB > EN
30
+ belongs_to :parent, class_name: 'Country', foreign_key: 'country_id'
31
+ has_many :countries, class_name: 'Country', foreign_key: 'country_id'
32
+
33
+ has_many_tags
34
+
35
+ validates :key, format: { with: /#{COUNTRY_KEY_PATTERN}/, message: COUNTRY_KEY_PATTERN_MESSAGE }
36
+ validates :code, format: { with: /#{COUNTRY_CODE_PATTERN}/, message: COUNTRY_CODE_PATTERN_MESSAGE }
37
+
38
+
39
+ scope :by_key, ->{ order( 'key asc' ) } # order by key (a-z)
40
+ scope :by_name, ->{ order( 'name asc' ) } # order by name (a-z)
41
+ scope :by_code, ->{ order( 'code asc' ) } # order by code (a-z)
42
+ scope :by_pop, ->{ order( 'pop desc' ) } # order by pop(ulation)
43
+ scope :by_area, ->{ order( 'area desc') } # order by area (in square km)
44
+
45
+ scope :by_num, ->{ order( 'num asc' ) } # order by numeric country code
46
+ scope :by_alpha2, ->{ order( 'alpha2 asc' ) }
47
+ scope :by_alpha3, ->{ order( 'alpha2 asc' ) }
48
+ scope :by_fifa, ->{ order( 'fifa asc' ) } # football
49
+ scope :by_ioc, ->{ order( 'ioc asc' ) } # olympics
50
+ scope :by_motor, ->{ order( 'motor asc' ) } # designated signs; motor vehicle license plate
51
+ scope :by_net, ->{ order( 'net asc' ) } # internet cc top level domain; ccTLD
52
+
53
+
54
+ before_create :on_create
55
+ before_update :on_update
56
+
57
+
58
+ def on_create
59
+ place_rec = Place.create!( name: name, kind: place_kind )
60
+ self.place_id = place_rec.id
61
+
62
+ if slug.blank?
63
+ ## todo: change and to n (if en/english) ?? - why? why not?
64
+ ## remove subtitles/subnames e.g. () -- why? why not?
65
+
66
+ ## remove translations [] e.g. México [Mexico] -> México etc.
67
+ self.slug = TextUtils.slugify( name.gsub( /\[[^\]]+\]/, '' ) )
68
+ end
69
+ end
70
+
71
+ def on_update
72
+ ## fix/todo: check - if name or kind changed - only update if changed ?? why? why not??
73
+ place.update_attributes!( name: name, kind: place_kind )
74
+
75
+ ## check if name changed -- possible?
76
+ ## update slug too??
77
+ end
78
+
79
+ def place_kind # use place_kind_of_code ??
80
+ if is_supra?
81
+ 'SUPR'
82
+ elsif is_dependency?
83
+ 'TERR'
84
+ elsif is_misc? ## misc(ellaneous) country or dependent territory
85
+ # todo: use different marker?
86
+ # territory w/ shared or disputes claims e.g Antartica/Western Sahara/Paracel Islands pg Spratly Islands/etc.
87
+ 'MISC'
88
+ else
89
+ 'CNTY'
90
+ end
91
+ end
92
+
93
+
94
+ ###
95
+ # NB: use is_ for flags to avoid conflict w/ assocs
96
+
97
+ def is_supra?() s? == true; end
98
+ def is_country?() c? == true; end
99
+ def is_dependency?() d? == true; end
100
+ def is_misc?() m? == true; end
101
+
102
+
103
+ def all_names( opts={} )
104
+ ### fix:
105
+ ## allow to passing in sep or separator e.g. | or other
106
+
107
+ return name if alt_names.blank?
108
+
109
+ buf = ''
110
+ buf << name
111
+ buf << ' | '
112
+ buf << alt_names.split('|').join(' | ')
113
+ buf
114
+ end
115
+
116
+
117
+ def to_path( opts={} )
118
+ # e.g. europe/at-austria
119
+ "#{continent.slug}/#{key}-#{slug}"
120
+ end
121
+
122
+
123
+ def self.search_by_name( q ) ## todo/check: just use search (rename)? why? why not?
124
+
125
+ ## fix: add/configure logger for ActiveRecord!!!
126
+ ## logger = LogKernel::Logger.root
127
+
128
+ name = q.strip
129
+
130
+ ## 1) first try 1:1 (exact) match
131
+ cty = Country.find_by_name( name ) # NOTE: assume AR escapes quotes in name ??
132
+ if cty.nil?
133
+ ## 2) retry: a) remove all (..) enclosed
134
+ ## b) remove all extra spaces (e.g. Cocos (Keeling) Islands => Cocos__Islands => Cocos_Islands)
135
+ name = name.gsub( /\([^)]+\)/, '' ).strip
136
+ name = name.gsub( /[ \t]{2,}/, ' ' )
137
+ cty = Country.find_by_name( name )
138
+
139
+ ### NOTE: escape ' for sql like clause
140
+ ## for now use '' for escapes, that is, double quotes
141
+ ## check - working for postgresql n sqlite??
142
+ name_esc = name.gsub( /'/, "''" )
143
+
144
+ ## 3) retry: use SQL like match
145
+ ## % is used to match *zero* or more occurrences of any characters
146
+ ## todo: check if it matches zero too
147
+ if cty.nil?
148
+ cty = Country.where( "name LIKE '%#{name_esc}%'" ).first
149
+ end
150
+
151
+ ## 4) retry: use SQL like match for alternative names match
152
+ if cty.nil?
153
+ cty = Country.where( "alt_names LIKE '%#{name_esc}%'" ).first
154
+ end
155
+
156
+ ## 5) retry: use SQL like match for historic names match (e.g. Burma for Myanmar etc.)
157
+ ## todo/check: make it optional (pass in opts hash to configure) - why? why not???
158
+ if cty.nil?
159
+ cty = Country.where( "hist_names LIKE '%#{name_esc}%'" ).first
160
+ end
161
+ end
162
+
163
+ cty # return cty (country); nil if not found
164
+ end
165
+
166
+
167
+ def self.create_or_update_from_values( values, more_attribs={} )
168
+
169
+ ## key & title
170
+ ## NB: three-letter code (.e.g AUT) required - enforce in values? why? why not?
171
+ attribs, more_values = find_key_n_title( values )
172
+ attribs = attribs.merge( more_attribs )
173
+
174
+ Country.create_or_update_from_attribs( attribs, more_values )
175
+ end
176
+
177
+
178
+ def self.create_or_update_from_attribs( new_attributes, values, opts={} )
179
+
180
+ ## opts e.g. :skip_tags true|false
181
+
182
+ ## fix: add/configure logger for ActiveRecord!!!
183
+ logger = LogKernel::Logger.root
184
+
185
+ value_numbers = []
186
+ value_tag_keys = []
187
+ value_cities = []
188
+
189
+ ### check for "default" tags - that is, if present new_attributes[:tags] remove from hash
190
+ value_tag_keys += find_tags_in_attribs!( new_attributes )
191
+
192
+
193
+ new_attributes[ :c ] = true # assume country type by default (use supra,depend to change)
194
+
195
+ ## check for optional values
196
+ values.each_with_index do |value,index|
197
+ if match_supra_flag( value ) do |_| # supra(national)
198
+ new_attributes[ :c ] = false # turn off default c|country flag; make it s|supra only
199
+ new_attributes[ :s ] = true
200
+ ## auto-add tag supra
201
+ value_tag_keys << 'supra'
202
+ end
203
+ elsif match_supra( value ) do |country| # supra:
204
+ new_attributes[ :country_id ] = country.id
205
+ end
206
+ elsif match_country( value ) do |country| # country:
207
+ new_attributes[ :country_id ] = country.id
208
+ new_attributes[ :c ] = false # turn off default c|country flag; make it d|depend only
209
+ new_attributes[ :d ] = true
210
+ ## auto-add tag supra
211
+ value_tag_keys << 'territory' # rename tag to dependency? why? why not?
212
+ end
213
+ elsif match_km_squared( value ) do |num| # allow numbers like 453 km²
214
+ value_numbers << num
215
+ end
216
+ elsif match_number( value ) do |num| # numeric (nb: can use any _ or spaces inside digits e.g. 1_000_000 or 1 000 000)
217
+ value_numbers << num
218
+ end
219
+ elsif value =~ /#{COUNTRY_CODE_PATTERN}/ ## three letter code
220
+ new_attributes[ :code ] = value
221
+ elsif (values.size==(index+1)) && is_taglist?( value ) # tags must be last entry
222
+ logger.debug " found tags: >>#{value}<<"
223
+ value_tag_keys += find_tags( value )
224
+ else
225
+
226
+ ### assume it is the capital city - mark it for auto add
227
+ value_cities << value
228
+ next
229
+
230
+ # issue warning: unknown type for value
231
+ # logger.warn "unknown type for value >#{value}<"
232
+ end
233
+ end # each value
234
+
235
+ if value_numbers.size > 0
236
+ new_attributes[ :area ] = value_numbers[0]
237
+ new_attributes[ :pop ] = value_numbers[1]
238
+ end
239
+
240
+ =begin
241
+ # auto-add tags
242
+ area = value_numbers[0]
243
+ pop = value_numbers[1]
244
+
245
+ # categorize into brackets
246
+ if area >= 1_000_000
247
+ value_tag_keys << 'area_1_000_000_n_up'
248
+ elsif area >= 100_000
249
+ value_tag_keys << 'area_100_000_to_1_000_000'
250
+ elsif area >= 1000
251
+ value_tag_keys << 'area_1_000_to_100_000'
252
+ else
253
+ value_tag_keys << 'area_1_000_n_less' # microstate
254
+ end
255
+
256
+ # include all
257
+ value_tag_keys << 'area_100_000_n_up' if area >= 100_000
258
+ value_tag_keys << 'area_1_000_n_up' if area >= 1_000
259
+
260
+
261
+ # categorize into brackets
262
+ if pop >= 100_000_000
263
+ value_tag_keys << 'pop_100m_n_up'
264
+ elsif pop >= 10_000_000
265
+ value_tag_keys << 'pop_10m_to_100m'
266
+ elsif pop >= 1_000_000
267
+ value_tag_keys << 'pop_1m_to_10m'
268
+ else
269
+ value_tag_keys << 'pop_1m_n_less'
270
+ end
271
+
272
+ # include all
273
+ value_tag_keys << 'pop_10m_n_up' if pop >= 10_000_000
274
+ value_tag_keys << 'pop_1m_n_up' if pop >= 1_000_000
275
+ =end
276
+
277
+
278
+ rec = Country.find_by_key( new_attributes[ :key ] )
279
+
280
+ if rec.present?
281
+ logger.debug "update Country #{rec.id}-#{rec.key}:"
282
+ else
283
+ logger.debug "create Country:"
284
+ rec = Country.new
285
+ end
286
+
287
+ logger.debug new_attributes.to_json
288
+
289
+ rec.update_attributes!( new_attributes )
290
+
291
+ #################
292
+ ## auto add capital cities
293
+
294
+ City.create_or_update_from_titles( value_cities, country_id: rec.id )
295
+
296
+ ##################
297
+ ## add taggings
298
+
299
+ if value_tag_keys.size > 0
300
+
301
+ if opts[:skip_tags].present?
302
+ logger.debug " skipping add taggings (flag skip_tag)"
303
+ else
304
+ value_tag_keys.uniq! # remove duplicates
305
+ logger.debug " adding #{value_tag_keys.size} taggings: >>#{value_tag_keys.join('|')}<<..."
306
+
307
+ ### fix/todo: check tag_ids and only update diff (add/remove ids)
308
+
309
+ value_tag_keys.each do |key|
310
+ tag = Tag.find_by_key( key )
311
+ if tag.nil? # create tag if it doesn't exit
312
+ logger.debug " creating tag >#{key}<"
313
+ tag = Tag.create!( key: key )
314
+ end
315
+ rec.tags << tag
316
+ end
317
+ end
318
+ end
319
+
320
+ rec
321
+
322
+ end # method create_or_update_from_values
323
+
324
+
325
+ end # class Country
326
+
327
+ end # module Model
328
+ end # module WorldDb
@@ -0,0 +1,41 @@
1
+ # encoding: utf-8
2
+
3
+ module WorldDb
4
+ module Model
5
+
6
+ class CountryCode < ActiveRecord::Base
7
+
8
+ self.table_name = 'country_codes'
9
+
10
+ belongs_to :country, class_name: 'Country', foreign_key: 'country_id'
11
+
12
+
13
+ scope :by_name, ->{ order( 'name asc' ) } # order by name (a-z)
14
+
15
+
16
+ def self.update!
17
+ ## update (auto-create) country codes from existing countries in db
18
+
19
+ ## fix: add/configure logger for ActiveRecord!!!
20
+ logger = LogKernel::Logger.root
21
+
22
+ logger.debug( "delete all (old) country codes" )
23
+ CountryCode.delete_all
24
+
25
+ Country.order(:id).each do |cty|
26
+ logger.debug( "add country #{cty.key} #{cty.name}" )
27
+ CountryCode.create!( country_id: cty.id, kind: 'NET', name: cty.net ) unless cty.net.nil?
28
+ CountryCode.create!( country_id: cty.id, kind: 'NUM', name: cty.num ) unless cty.num.nil?
29
+ CountryCode.create!( country_id: cty.id, kind: 'A2', name: cty.alpha2 ) unless cty.alpha2.nil?
30
+ CountryCode.create!( country_id: cty.id, kind: 'A3', name: cty.alpha3 ) unless cty.alpha3.nil?
31
+ CountryCode.create!( country_id: cty.id, kind: 'FIFA', name: cty.fifa ) unless cty.fifa.nil?
32
+ CountryCode.create!( country_id: cty.id, kind: 'IOC', name: cty.ioc ) unless cty.ioc.nil?
33
+ CountryCode.create!( country_id: cty.id, kind: 'FIPS', name: cty.fips ) unless cty.fips.nil?
34
+ CountryCode.create!( country_id: cty.id, kind: 'M', name: cty.motor ) unless cty.motor.nil?
35
+ end
36
+ end
37
+
38
+ end # class CountryCode
39
+
40
+ end # module Model
41
+ end # module WorldDb
@@ -0,0 +1,35 @@
1
+ # encoding: utf-8
2
+
3
+ module WorldDb
4
+ module Model
5
+
6
+ #############################################################
7
+ # collect depreciated or methods for future removal here
8
+ # - keep for now for commpatibility (for old code)
9
+
10
+
11
+ class Country
12
+
13
+ def title() name; end
14
+ def title=(value) self.name = value; end
15
+
16
+ scope :by_title, ->{ order( 'name asc' ) } # order by title (a-z)
17
+
18
+ def iso2() alpha2; end
19
+ def iso2=(value) self.alpha2 = value; end
20
+
21
+ def iso3() alpha3; end
22
+ def iso3=(value) self.alpha3 = value; end
23
+
24
+
25
+ def synonyms() alt_names; end
26
+ def synonyms=(value) self.alt_names = value; end
27
+
28
+ def title_w_synonyms( opts={} ) all_names( opts ); end # depreciated: use all_names instead
29
+
30
+
31
+ end # class Country
32
+
33
+ end # module Model
34
+ end # module WorldDb
35
+
@@ -0,0 +1,57 @@
1
+
2
+ ### forward references
3
+ ## require first to resolve circular references
4
+
5
+ module WorldDb
6
+ module Model
7
+
8
+ #############
9
+ # ConfDb
10
+ Prop = ConfDb::Model::Prop
11
+
12
+ ###########
13
+ # TagDb
14
+ Tagging = TagDb::Model::Tagging
15
+ Tag = TagDb::Model::Tag
16
+
17
+
18
+ class Name < ActiveRecord::Base ; end
19
+ class Place < ActiveRecord::Base ; end
20
+ class Continent < ActiveRecord::Base ; end
21
+ class Country < ActiveRecord::Base ; end
22
+ class Region < ActiveRecord::Base ; end
23
+ class City < ActiveRecord::Base ; end
24
+
25
+ class Lang < ActiveRecord::Base ; end
26
+ class Usage < ActiveRecord::Base ; end
27
+
28
+ class CountryCode < ActiveRecord::Base ; end
29
+
30
+ end
31
+
32
+ # note: convenience alias for Model
33
+ # lets you use include WorldDb::Models
34
+ Models = Model
35
+ end # module # WorldDb
36
+
37
+
38
+ module TagDb
39
+ module Model
40
+
41
+ # add alias? why? why not? # is there a better way?
42
+ # - just include WorldDb::Models - why? why not?
43
+
44
+ Name = WorldDb::Model::Name
45
+ Place = WorldDb::Model::Place
46
+ Continent = WorldDb::Model::Continent
47
+ Country = WorldDb::Model::Country
48
+ Region = WorldDb::Model::Region
49
+ City = WorldDb::Model::City
50
+
51
+ Lang = WorldDb::Model::Lang
52
+ Usage = WorldDb::Model::Usage
53
+
54
+ CountryCode = WorldDb::Model::CountryCode
55
+
56
+ end
57
+ end
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+
3
+ module WorldDb
4
+ module Model
5
+
6
+ class Lang < ActiveRecord::Base
7
+
8
+ has_many :usages # join table for countries_langs
9
+
10
+ has_many :countries, :through => :usages
11
+
12
+ validates :key, format: { with: /#{LANG_KEY_PATTERN}/, message: LANG_KEY_PATTERN_MESSAGE }
13
+
14
+ end # class Lang
15
+
16
+ end # module Model
17
+ end # module WorldDb
18
+
@@ -0,0 +1,23 @@
1
+ # encoding: utf-8
2
+
3
+ module WorldDb
4
+ module Model
5
+
6
+ #############################################################
7
+ # collect depreciated or methods for future removal here
8
+ # - keep for now for commpatibility (for old code)
9
+
10
+
11
+ class Lang
12
+
13
+ #####################################################
14
+ # alias for name (remove! add depreciated api call ???)
15
+ def title() name; end
16
+ def title=(value) self.name = value; end
17
+
18
+ scope :by_title, ->{ order( 'name asc' ) } # order by title (a-z)
19
+
20
+ end # class Lang
21
+
22
+ end # module Model
23
+ end # module WorldDb
@@ -0,0 +1,13 @@
1
+ # encoding: utf-8
2
+
3
+ module WorldDb
4
+ module Model
5
+
6
+ class Name < ActiveRecord::Base
7
+
8
+
9
+ end # class Name
10
+
11
+
12
+ end # module Model
13
+ end # module WorldDb
@@ -0,0 +1,16 @@
1
+ # encoding: utf-8
2
+
3
+ module WorldDb
4
+ module Model
5
+
6
+ class Place < ActiveRecord::Base
7
+
8
+ ## todo: depending on type
9
+ ## has_one continent, country, region, city etc.
10
+
11
+ end # class Place
12
+
13
+
14
+ end # module Model
15
+ end # module WorldDb
16
+