has_geo_lookup 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/LICENSE.txt +21 -0
- data/README.md +389 -0
- data/Rakefile +8 -0
- data/lib/generators/has_geo_lookup/install_generator.rb +66 -0
- data/lib/generators/has_geo_lookup/templates/INSTALL.md +60 -0
- data/lib/generators/has_geo_lookup/templates/create_feature_codes.rb.erb +17 -0
- data/lib/generators/has_geo_lookup/templates/create_geoboundaries.rb.erb +24 -0
- data/lib/generators/has_geo_lookup/templates/create_geoboundaries_metros.rb.erb +10 -0
- data/lib/generators/has_geo_lookup/templates/create_geonames.rb.erb +40 -0
- data/lib/generators/has_geo_lookup/templates/create_geonames_metros.rb.erb +10 -0
- data/lib/generators/has_geo_lookup/templates/create_metros.rb.erb +17 -0
- data/lib/has_geo_lookup/concern.rb +813 -0
- data/lib/has_geo_lookup/index_checker.rb +360 -0
- data/lib/has_geo_lookup/models/feature_code.rb +194 -0
- data/lib/has_geo_lookup/models/geoboundary.rb +220 -0
- data/lib/has_geo_lookup/models/geoname.rb +152 -0
- data/lib/has_geo_lookup/models/metro.rb +247 -0
- data/lib/has_geo_lookup/railtie.rb +11 -0
- data/lib/has_geo_lookup/version.rb +5 -0
- data/lib/has_geo_lookup.rb +28 -0
- data/lib/tasks/has_geo_lookup.rake +111 -0
- data/sig/has_geo_lookup.rbs +4 -0
- metadata +183 -0
@@ -0,0 +1,813 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# app/models/concerns/has_geo_lookup.rb
|
4
|
+
#
|
5
|
+
# Comprehensive geographic lookup functionality using GeoBoundaries.org and Geonames.org data
|
6
|
+
#
|
7
|
+
# This concern provides both distance-based lookups (using Geonames.org) and precise
|
8
|
+
# geometric containment queries (using GeoBoundaries.org) for models with latitude/longitude
|
9
|
+
# coordinates. It includes data coverage utilities, municipal name cleaning, and comparison
|
10
|
+
# tools for geographic data quality analysis.
|
11
|
+
#
|
12
|
+
# @example Basic usage
|
13
|
+
# class Listing < ApplicationRecord
|
14
|
+
# include HasGeoLookup
|
15
|
+
# end
|
16
|
+
#
|
17
|
+
# listing = Listing.first
|
18
|
+
# listing.nearest_geonames(feature_class: "P", limit: 3)
|
19
|
+
# listing.containing_boundaries
|
20
|
+
# listing.compare_geo_sources
|
21
|
+
#
|
22
|
+
# @see HasGeoLookup::DataCoverage For geographic data coverage analysis utilities
|
23
|
+
module HasGeoLookup
|
24
|
+
extend ActiveSupport::Concern
|
25
|
+
|
26
|
+
class_methods do
|
27
|
+
# Efficiently select a random record that has coordinate data
|
28
|
+
#
|
29
|
+
# This method uses offset-based selection rather than ORDER BY RAND() for better
|
30
|
+
# performance on large datasets. Only considers records with both latitude and
|
31
|
+
# longitude values.
|
32
|
+
#
|
33
|
+
# @return [ActiveRecord::Base, nil] Random record with coordinates, or nil if none exist
|
34
|
+
#
|
35
|
+
# @example
|
36
|
+
# Listing.random_with_coords
|
37
|
+
# # => #<Listing id: 123, latitude: 40.7128, longitude: -74.0060, ...>
|
38
|
+
def random_with_coords
|
39
|
+
coord_scope = where.not(latitude: nil, longitude: nil)
|
40
|
+
count = coord_scope.count
|
41
|
+
return nil if count == 0
|
42
|
+
coord_scope.offset(rand(count)).first
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Struct to wrap a Geoname result with distance in kilometers
|
47
|
+
Result = Struct.new(:record, :distance_km, :feature_class, :feature_code)
|
48
|
+
|
49
|
+
# Struct to wrap a Geoboundary result for consistent API
|
50
|
+
GeoboundaryResult = Struct.new(:record, :distance_km, :level, :name) do
|
51
|
+
def feature_class; level; end
|
52
|
+
def feature_code; level; end
|
53
|
+
end
|
54
|
+
|
55
|
+
# Find nearby Geonames of a given type within a specified radius
|
56
|
+
#
|
57
|
+
# This method performs distance-based lookup of geographic features from the Geonames.org
|
58
|
+
# dataset. It supports filtering by feature type and uses bounding box optimization for
|
59
|
+
# better performance on large datasets.
|
60
|
+
#
|
61
|
+
# @param feature_class [String, nil] Geoname feature class (e.g., "P" for populated places)
|
62
|
+
# @param feature_code [String, nil] Geoname feature code (e.g., "PPL" for populated place)
|
63
|
+
# @param keyword [String, nil] Search feature names/descriptions to auto-determine criteria
|
64
|
+
# @param radius_km [Integer] Search radius in kilometers (default: 100)
|
65
|
+
# @param limit [Integer] Maximum results to return (default: 5)
|
66
|
+
#
|
67
|
+
# @return [Array<Result>] Array of Result structs with :record, :distance_km, :feature_class, :feature_code
|
68
|
+
#
|
69
|
+
# @example Find closest populated places
|
70
|
+
# listing.nearest_geonames(feature_class: "P", radius_km: 50)
|
71
|
+
#
|
72
|
+
# @example Find administrative divisions by keyword
|
73
|
+
# listing.nearest_geonames(keyword: "county", limit: 1)
|
74
|
+
def nearest_geonames(feature_class: nil, feature_code: nil, keyword: nil, radius_km: 100, limit: 5)
|
75
|
+
return [] unless latitude && longitude
|
76
|
+
|
77
|
+
if keyword && (feature_class.nil? || feature_code.nil?)
|
78
|
+
fc = find_feature_class_and_code_by_keyword(keyword)
|
79
|
+
feature_class ||= fc&.first
|
80
|
+
feature_code ||= fc&.last
|
81
|
+
end
|
82
|
+
|
83
|
+
return [] unless feature_code || feature_class
|
84
|
+
|
85
|
+
# Calculate rough bounding box for fast filtering before expensive distance calculations
|
86
|
+
# 1 degree ≈ 111km, so calculate degree offset for the radius
|
87
|
+
lat_offset = radius_km / 111.0
|
88
|
+
lng_offset = radius_km / (111.0 * Math.cos(Math::PI * latitude / 180.0))
|
89
|
+
|
90
|
+
query = Geoname.where.not(latitude: nil, longitude: nil)
|
91
|
+
|
92
|
+
query = query.where(feature_code: feature_code) if feature_code
|
93
|
+
query = query.where(feature_class: feature_class) if feature_class
|
94
|
+
|
95
|
+
# Add bounding box filter first (uses indexes, very fast)
|
96
|
+
query = query.where(
|
97
|
+
latitude: (latitude - lat_offset)..(latitude + lat_offset),
|
98
|
+
longitude: (longitude - lng_offset)..(longitude + lng_offset)
|
99
|
+
)
|
100
|
+
|
101
|
+
query = query.select(<<~SQL.squish)
|
102
|
+
geonames.*,
|
103
|
+
(6371 * acos(
|
104
|
+
cos(radians(#{latitude}))
|
105
|
+
* cos(radians(latitude))
|
106
|
+
* cos(radians(longitude) - radians(#{longitude}))
|
107
|
+
+ sin(radians(#{latitude}))
|
108
|
+
* sin(radians(latitude))
|
109
|
+
)) AS distance_km
|
110
|
+
SQL
|
111
|
+
|
112
|
+
query = query.having("distance_km <= ?", radius_km)
|
113
|
+
.order("distance_km ASC")
|
114
|
+
.limit(limit)
|
115
|
+
|
116
|
+
query.map do |record|
|
117
|
+
Result.new(record, record.try(:distance_km).to_f, feature_class, feature_code)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
# Looks up the closest county or parish-level area using feature_code = ADM2.
|
122
|
+
#
|
123
|
+
# Options:
|
124
|
+
# :radius_km — limit search radius (default: 50)
|
125
|
+
# :country_code — optionally restrict by country
|
126
|
+
#
|
127
|
+
# Returns a Result struct with distance and matched geoname.
|
128
|
+
def closest_county_or_parish(radius_km: 50, country_code: nil)
|
129
|
+
nearest_geonames(
|
130
|
+
feature_class: "A",
|
131
|
+
feature_code: "ADM2",
|
132
|
+
radius_km: radius_km,
|
133
|
+
# country_code: country_code,
|
134
|
+
limit: 1
|
135
|
+
).first
|
136
|
+
end
|
137
|
+
|
138
|
+
# Looks up the closest subdivision (e.g., neighborhood or district)
|
139
|
+
# Defaults to feature_code = "PPLX" and 1 km radius
|
140
|
+
#
|
141
|
+
# Options:
|
142
|
+
# :radius_km — limit search radius (default: 1)
|
143
|
+
# :country_code — optionally restrict by country
|
144
|
+
#
|
145
|
+
# Returns a Result struct with distance and matched geoname.
|
146
|
+
def closest_subdivision(radius_km: 1, country_code: nil)
|
147
|
+
nearest_geonames(
|
148
|
+
feature_class: "P",
|
149
|
+
feature_code: "PPLX",
|
150
|
+
radius_km: radius_km,
|
151
|
+
# country_code: country_code,
|
152
|
+
limit: 1
|
153
|
+
).first
|
154
|
+
end
|
155
|
+
|
156
|
+
# Looks up the closest township-level area using feature_code = ADM3.
|
157
|
+
# Falls back to ADM4 if no ADM3 match is found.
|
158
|
+
#
|
159
|
+
# Options:
|
160
|
+
# :radius_km — limit search radius (default: 25)
|
161
|
+
# :country_code — optionally restrict by country
|
162
|
+
#
|
163
|
+
# Returns a Result struct with distance and matched geoname.
|
164
|
+
def closest_township(radius_km: 25, country_code: nil)
|
165
|
+
%w[ADM3 ADM4].each do |code|
|
166
|
+
result = nearest_geonames(
|
167
|
+
feature_class: "A",
|
168
|
+
feature_code: code,
|
169
|
+
radius_km: radius_km,
|
170
|
+
# country_code: country_code,
|
171
|
+
limit: 1
|
172
|
+
).first
|
173
|
+
return result if result
|
174
|
+
end
|
175
|
+
|
176
|
+
nil
|
177
|
+
end
|
178
|
+
|
179
|
+
# GeoBoundaries-based equivalents using precise point-in-polygon
|
180
|
+
|
181
|
+
# Find administrative boundaries that contain this point using precise geometric containment
|
182
|
+
#
|
183
|
+
# This method uses PostGIS spatial operations to determine which GeoBoundaries.org
|
184
|
+
# administrative boundaries contain the current coordinate point. It performs automatic
|
185
|
+
# coordinate validation and swapping for common data issues.
|
186
|
+
#
|
187
|
+
# @param levels [Array<String>, nil] Specific ADM levels to search (e.g., ["ADM1", "ADM2"])
|
188
|
+
# If nil, searches all available levels
|
189
|
+
#
|
190
|
+
# @return [Array<Geoboundary>] Array of boundary records that contain this point,
|
191
|
+
# ordered by administrative level
|
192
|
+
#
|
193
|
+
# @example Find all containing boundaries
|
194
|
+
# listing.containing_boundaries
|
195
|
+
#
|
196
|
+
# @example Find only state and county level
|
197
|
+
# listing.containing_boundaries(levels: ["ADM1", "ADM2"])
|
198
|
+
def containing_boundaries(levels: nil)
|
199
|
+
return [] unless latitude && longitude
|
200
|
+
|
201
|
+
# Validate coordinate ranges
|
202
|
+
lat = latitude.to_f
|
203
|
+
lng = longitude.to_f
|
204
|
+
|
205
|
+
# Check if coordinates are swapped (common data issue)
|
206
|
+
if !lat.between?(-90, 90)
|
207
|
+
# Try swapping if latitude is invalid but longitude could be a valid latitude
|
208
|
+
if lng.between?(-90, 90) && lat.between?(-180, 180)
|
209
|
+
lat, lng = lng, lat
|
210
|
+
else
|
211
|
+
# Invalid coordinates that can't be fixed by swapping
|
212
|
+
return []
|
213
|
+
end
|
214
|
+
elsif !lng.between?(-180, 180)
|
215
|
+
# Longitude is invalid but latitude is valid - this is unusual, reject
|
216
|
+
return []
|
217
|
+
end
|
218
|
+
|
219
|
+
# MySQL spatial functions expect POINT(latitude longitude) format (different from PostGIS)
|
220
|
+
point_wkt = "POINT(#{lat} #{lng})"
|
221
|
+
|
222
|
+
query = Geoboundary.where(
|
223
|
+
"ST_Contains(boundary, ST_GeomFromText(?, 4326))",
|
224
|
+
point_wkt
|
225
|
+
)
|
226
|
+
|
227
|
+
query = query.where(level: levels) if levels
|
228
|
+
query.order(:level)
|
229
|
+
end
|
230
|
+
|
231
|
+
# Returns the boundary at a specific level that contains this point
|
232
|
+
def containing_boundary(level)
|
233
|
+
containing_boundaries(levels: level).first
|
234
|
+
end
|
235
|
+
|
236
|
+
# GeoBoundary equivalent of closest_county_or_parish
|
237
|
+
# Returns the ADM2 boundary that contains this point, with fallback to closest geoname
|
238
|
+
def county_or_parish_boundary
|
239
|
+
# First try exact containment
|
240
|
+
boundary = containing_boundary('ADM2')
|
241
|
+
return GeoboundaryResult.new(boundary, 0.0, 'ADM2', boundary.name) if boundary
|
242
|
+
|
243
|
+
# Fallback to closest geoname approach
|
244
|
+
geoname_result = closest_county_or_parish
|
245
|
+
return geoname_result if geoname_result
|
246
|
+
|
247
|
+
nil
|
248
|
+
end
|
249
|
+
|
250
|
+
# GeoBoundary equivalent of closest_township
|
251
|
+
# Returns ADM3 or ADM4 boundary that contains this point, with fallback
|
252
|
+
def township_boundary
|
253
|
+
# Try ADM3 first, then ADM4
|
254
|
+
%w[ADM3 ADM4].each do |level|
|
255
|
+
boundary = containing_boundary(level)
|
256
|
+
return GeoboundaryResult.new(boundary, 0.0, level, boundary.name) if boundary
|
257
|
+
end
|
258
|
+
|
259
|
+
# Fallback to closest geoname approach
|
260
|
+
geoname_result = closest_township
|
261
|
+
return geoname_result if geoname_result
|
262
|
+
|
263
|
+
nil
|
264
|
+
end
|
265
|
+
|
266
|
+
# GeoBoundary equivalent for state/province
|
267
|
+
# Returns the ADM1 boundary that contains this point
|
268
|
+
def state_or_province_boundary
|
269
|
+
boundary = containing_boundary('ADM1')
|
270
|
+
return GeoboundaryResult.new(boundary, 0.0, 'ADM1', boundary.name) if boundary
|
271
|
+
|
272
|
+
# No geoname fallback for ADM1 since closest_ doesn't handle it
|
273
|
+
nil
|
274
|
+
end
|
275
|
+
|
276
|
+
# For subdivision, we still use geonames since geoBoundaries doesn't have neighborhood-level data
|
277
|
+
# but we can add boundary context
|
278
|
+
def subdivision_with_boundary_context
|
279
|
+
geoname_result = closest_subdivision
|
280
|
+
return geoname_result unless geoname_result
|
281
|
+
|
282
|
+
# Add boundary context to help validate the subdivision
|
283
|
+
boundaries = containing_boundaries(levels: %w[ADM2 ADM3 ADM4])
|
284
|
+
geoname_result.record.define_singleton_method(:containing_boundaries) { boundaries }
|
285
|
+
|
286
|
+
geoname_result
|
287
|
+
end
|
288
|
+
|
289
|
+
# Find the metro area that contains this point using precise geometric containment
|
290
|
+
#
|
291
|
+
# This method uses PostGIS spatial operations to determine which metropolitan area
|
292
|
+
# contains the current coordinate point. Metros are defined as collections of
|
293
|
+
# geoboundaries (administrative boundaries) that together form a cohesive region.
|
294
|
+
#
|
295
|
+
# @return [Metro, nil] Metro area that contains this point, or nil if not within any metro
|
296
|
+
#
|
297
|
+
# @example
|
298
|
+
# listing.within_metro
|
299
|
+
# # => #<Metro id: 1, name: "Bay Area", details: "San Francisco Bay Area">
|
300
|
+
#
|
301
|
+
# @example
|
302
|
+
# listing.within_metro
|
303
|
+
# # => nil (if coordinates are not within any defined metro area)
|
304
|
+
def within_metro
|
305
|
+
return nil unless latitude && longitude
|
306
|
+
|
307
|
+
# Direct spatial containment query - check if this point is within any metro's boundaries
|
308
|
+
# MySQL spatial functions expect POINT(latitude longitude) format
|
309
|
+
Metro.joins(:geoboundaries)
|
310
|
+
.where("ST_Contains(geoboundaries.boundary, ST_GeomFromText(?, 4326))", "POINT(#{latitude} #{longitude})")
|
311
|
+
.first
|
312
|
+
end
|
313
|
+
|
314
|
+
# Legacy method for backward compatibility - will be removed after migration
|
315
|
+
# @deprecated Use {#within_metro} instead
|
316
|
+
def closest_metro
|
317
|
+
within_metro
|
318
|
+
end
|
319
|
+
|
320
|
+
# Compare geographic data from multiple sources for maintenance and debugging
|
321
|
+
#
|
322
|
+
# This method displays a side-by-side comparison of geographic attribute values from
|
323
|
+
# different data sources: current stored values, GeoBoundaries.org data, and Geonames.org
|
324
|
+
# data. Apps can extend this by overriding additional_source_columns to add their own
|
325
|
+
# data sources (e.g., original API data).
|
326
|
+
#
|
327
|
+
# @return [String] Formatted comparison table or error message if no coordinates
|
328
|
+
#
|
329
|
+
# @example
|
330
|
+
# listing.compare_geo_sources
|
331
|
+
# # => Displays formatted table comparing all geographic attributes across sources
|
332
|
+
def compare_geo_sources
|
333
|
+
return "No coordinates available for comparison" unless latitude && longitude
|
334
|
+
|
335
|
+
geo_attributes = %w[city county_or_parish state_or_province township subdivision_name country postal_code]
|
336
|
+
|
337
|
+
# Collect all data first to avoid jumbled SQL output
|
338
|
+
print "Collecting geographic data..."
|
339
|
+
data_rows = []
|
340
|
+
|
341
|
+
geo_attributes.each do |attr|
|
342
|
+
print "."
|
343
|
+
current_val = send(attr)
|
344
|
+
boundary_val = get_boundary_value(attr)
|
345
|
+
geoname_val = get_geoname_value(attr)
|
346
|
+
|
347
|
+
# Get additional source columns from the implementing model
|
348
|
+
additional_sources = respond_to?(:additional_source_columns, true) ? additional_source_columns(attr) : {}
|
349
|
+
|
350
|
+
# Truncate long values for display
|
351
|
+
current_display = truncate_value(current_val)
|
352
|
+
boundary_display = truncate_value(boundary_val)
|
353
|
+
geoname_display = truncate_value(geoname_val)
|
354
|
+
|
355
|
+
# Check if current value differs from core sources and any additional sources
|
356
|
+
all_source_vals = [boundary_val, geoname_val] + additional_sources.values
|
357
|
+
marker = all_source_vals.none? { |val| current_val == val } ? " *" : ""
|
358
|
+
|
359
|
+
row_data = {
|
360
|
+
attr: attr.upcase,
|
361
|
+
current: current_display || "(nil)",
|
362
|
+
boundary: boundary_display || "(nil)",
|
363
|
+
geoname: geoname_display || "(nil)",
|
364
|
+
marker: marker
|
365
|
+
}
|
366
|
+
|
367
|
+
# Add additional source columns
|
368
|
+
additional_sources.each do |column_name, value|
|
369
|
+
row_data[column_name.to_sym] = truncate_value(value) || "(nil)"
|
370
|
+
end
|
371
|
+
|
372
|
+
data_rows << row_data
|
373
|
+
end
|
374
|
+
|
375
|
+
puts " done!\n"
|
376
|
+
|
377
|
+
# Build header columns
|
378
|
+
base_columns = %w[ATTRIBUTE CURRENT BOUNDARY GEONAMES]
|
379
|
+
additional_columns = respond_to?(:additional_source_columns, true) ?
|
380
|
+
additional_source_columns(geo_attributes.first)&.keys || [] : []
|
381
|
+
all_columns = base_columns + additional_columns.map(&:upcase)
|
382
|
+
|
383
|
+
# Calculate total width
|
384
|
+
total_width = all_columns.length * 20 + 4
|
385
|
+
|
386
|
+
# Display results
|
387
|
+
puts "\n" + "=" * total_width
|
388
|
+
puts "GEO SOURCES COMPARISON"
|
389
|
+
puts "Coordinates: #{latitude}, #{longitude}"
|
390
|
+
puts "=" * total_width
|
391
|
+
puts sprintf((["%-20s"] * all_columns.length).join(" "), *all_columns)
|
392
|
+
puts "-" * (total_width + all_columns.length - 1)
|
393
|
+
|
394
|
+
data_rows.each do |row|
|
395
|
+
values = all_columns.map { |col| row[col.downcase.to_sym] }
|
396
|
+
puts sprintf((["%-20s"] * values.length).join(" "), *values) + row[:marker]
|
397
|
+
end
|
398
|
+
|
399
|
+
puts "-" * (total_width + all_columns.length - 1)
|
400
|
+
puts "* = Current value differs from all sources"
|
401
|
+
puts "\nLegend:"
|
402
|
+
puts " CURRENT - Value currently stored in database"
|
403
|
+
puts " BOUNDARY - Value from GeoBoundaries.org (precise polygon containment)"
|
404
|
+
puts " GEONAMES - Value from Geonames.org (nearest feature lookup)"
|
405
|
+
|
406
|
+
# Add legend entries for additional sources
|
407
|
+
if respond_to?(:additional_source_legend, true)
|
408
|
+
additional_source_legend.each do |column, description|
|
409
|
+
puts " #{column.upcase.ljust(8)} - #{description}"
|
410
|
+
end
|
411
|
+
end
|
412
|
+
|
413
|
+
puts "=" * total_width
|
414
|
+
|
415
|
+
nil
|
416
|
+
end
|
417
|
+
|
418
|
+
# Validate and convert coordinates from radians to degrees if needed
|
419
|
+
#
|
420
|
+
# This method intelligently detects whether coordinates are provided in radians or degrees
|
421
|
+
# using a multi-step validation process:
|
422
|
+
#
|
423
|
+
# 1. **Range Check**: If coordinates are outside degree ranges (|lat| > 90 or |lng| > 180),
|
424
|
+
# they are assumed to be radians and converted, unless they exceed reasonable bounds (> 1000)
|
425
|
+
# 2. **Country Validation**: For ambiguous coordinates within degree ranges, attempts to
|
426
|
+
# validate against expected country boundaries using PostGIS spatial queries
|
427
|
+
# 3. **Fallback**: If validation fails or PostGIS is unavailable, defaults to treating
|
428
|
+
# coordinates as degrees
|
429
|
+
#
|
430
|
+
# This is particularly useful when dealing with data sources that may inconsistently
|
431
|
+
# provide coordinates in different units.
|
432
|
+
#
|
433
|
+
# @param lat [Float, Integer] Latitude coordinate in degrees or radians
|
434
|
+
# @param lng [Float, Integer] Longitude coordinate in degrees or radians
|
435
|
+
# @param expected_country [String, nil] Optional 2-letter ISO country code (e.g., "US", "FR")
|
436
|
+
# used for boundary validation when coordinates are ambiguous
|
437
|
+
#
|
438
|
+
# @return [Array<(Float, Float)>] Array containing [latitude, longitude] in degrees,
|
439
|
+
# or [nil, nil] if coordinates are invalid or outside reasonable bounds
|
440
|
+
#
|
441
|
+
# @example Converting obvious radians to degrees
|
442
|
+
# validate_and_convert_coordinates(0.7128, -1.2915, "US")
|
443
|
+
# # => [40.8355, -74.0060] (converted from radians using country validation)
|
444
|
+
#
|
445
|
+
# @example Preserving valid degrees
|
446
|
+
# validate_and_convert_coordinates(40.7128, -74.0060, "US")
|
447
|
+
# # => [40.7128, -74.0060] (already in degrees, no conversion needed)
|
448
|
+
#
|
449
|
+
# @example Handling coordinates outside degree ranges
|
450
|
+
# validate_and_convert_coordinates(95.0, 1.5, "US")
|
451
|
+
# # => [5441.5, 85.9] (lat > 90°, so both coordinates converted from radians)
|
452
|
+
#
|
453
|
+
# @example Rejecting invalid coordinates
|
454
|
+
# validate_and_convert_coordinates(2000.0, 3000.0, "US")
|
455
|
+
# # => [nil, nil] (values too large to be reasonable coordinates)
|
456
|
+
#
|
457
|
+
# @note This method requires PostGIS tables (geoboundaries, geonames) for country validation.
|
458
|
+
# In test environments or when PostGIS is unavailable, country validation is skipped.
|
459
|
+
#
|
460
|
+
# @see #coordinates_match_country? for details on boundary validation logic
|
461
|
+
# @since 1.0.0
|
462
|
+
def validate_and_convert_coordinates(lat, lng, expected_country = nil)
|
463
|
+
return [nil, nil] unless lat && lng
|
464
|
+
|
465
|
+
lat = lat.to_f
|
466
|
+
lng = lng.to_f
|
467
|
+
|
468
|
+
# First check: are coordinates obviously in radians? (outside degree ranges)
|
469
|
+
if lat.abs > 90 || lng.abs > 180
|
470
|
+
# Check if they could be reasonable radians (not absurdly large)
|
471
|
+
# Reasonable upper bound: 1000 (much larger than any reasonable coordinate)
|
472
|
+
if lat.abs <= 1000 && lng.abs <= 1000
|
473
|
+
# Assume they are radians and convert
|
474
|
+
lat_deg = lat * 180.0 / Math::PI
|
475
|
+
lng_deg = lng * 180.0 / Math::PI
|
476
|
+
return [lat_deg, lng_deg]
|
477
|
+
else
|
478
|
+
# Values too large to be reasonable coordinates in any format
|
479
|
+
return [nil, nil]
|
480
|
+
end
|
481
|
+
end
|
482
|
+
|
483
|
+
# Coordinates are within degree ranges - but could still be radians
|
484
|
+
# Use country validation to determine which is correct
|
485
|
+
if expected_country.present?
|
486
|
+
# Test as degrees first
|
487
|
+
degrees_valid = coordinates_match_country?(lat, lng, expected_country)
|
488
|
+
|
489
|
+
# If degrees don't match, try converting from radians
|
490
|
+
unless degrees_valid
|
491
|
+
# Check if coordinates could be radians (within radian range)
|
492
|
+
if lat.abs <= Math::PI && lng.abs <= Math::PI
|
493
|
+
lat_from_radians = lat * 180.0 / Math::PI
|
494
|
+
lng_from_radians = lng * 180.0 / Math::PI
|
495
|
+
|
496
|
+
# Test if radians-to-degrees conversion matches expected country
|
497
|
+
if coordinates_match_country?(lat_from_radians, lng_from_radians, expected_country)
|
498
|
+
return [lat_from_radians, lng_from_radians]
|
499
|
+
end
|
500
|
+
end
|
501
|
+
end
|
502
|
+
end
|
503
|
+
|
504
|
+
# Default: assume coordinates are already in degrees
|
505
|
+
[lat, lng]
|
506
|
+
end
|
507
|
+
|
508
|
+
private
|
509
|
+
|
510
|
+
# Check if coordinates fall within the expected country's boundaries
|
511
|
+
#
|
512
|
+
# This private method performs spatial validation by checking if the given coordinates
|
513
|
+
# fall within the administrative boundaries of the expected country. It uses PostGIS
|
514
|
+
# spatial containment queries against the GeoBoundaries.org dataset.
|
515
|
+
#
|
516
|
+
# The validation process:
|
517
|
+
# 1. Validates coordinate ranges (lat: -90 to 90, lng: -180 to 180)
|
518
|
+
# 2. Checks PostGIS table availability (gracefully handles test environments)
|
519
|
+
# 3. Creates a temporary object with HasGeoLookup functionality for boundary lookup
|
520
|
+
# 4. Converts country code from ISO2 to ISO3 format for geoboundary matching
|
521
|
+
# 5. Checks if any containing boundary matches the expected country
|
522
|
+
#
|
523
|
+
# @param lat [Float] Latitude coordinate in degrees
|
524
|
+
# @param lng [Float] Longitude coordinate in degrees
|
525
|
+
# @param country_code [String] 2-letter ISO country code (e.g., "US", "FR", "CA")
|
526
|
+
#
|
527
|
+
# @return [Boolean] true if coordinates fall within the expected country's boundaries,
|
528
|
+
# false otherwise or if validation fails
|
529
|
+
#
|
530
|
+
# @example Coordinates within expected country
|
531
|
+
# coordinates_match_country?(40.7128, -74.0060, "US")
|
532
|
+
# # => true (NYC coordinates are within USA boundaries)
|
533
|
+
#
|
534
|
+
# @example Coordinates outside expected country
|
535
|
+
# coordinates_match_country?(48.8566, 2.3522, "US")
|
536
|
+
# # => false (Paris coordinates are not within USA boundaries)
|
537
|
+
#
|
538
|
+
# @example Invalid coordinate ranges
|
539
|
+
# coordinates_match_country?(200.0, -74.0, "US")
|
540
|
+
# # => false (latitude outside valid range)
|
541
|
+
#
|
542
|
+
# @note This method gracefully handles missing PostGIS tables by returning false,
|
543
|
+
# making it safe to use in test environments without spatial databases
|
544
|
+
#
|
545
|
+
# @note Requires the iso_3166 gem for country code conversion and GeoBoundaries
|
546
|
+
# data to be imported for the target country
|
547
|
+
#
|
548
|
+
# @see #containing_boundaries for the spatial containment logic
|
549
|
+
# @see #postgis_tables_available? for PostGIS availability checking
|
550
|
+
# @api private
|
551
|
+
# @since 1.0.0
|
552
|
+
def coordinates_match_country?(lat, lng, country_code)
|
553
|
+
return false unless lat && lng && country_code
|
554
|
+
|
555
|
+
# Quick range check first
|
556
|
+
return false unless lat.between?(-90, 90) && lng.between?(-180, 180)
|
557
|
+
|
558
|
+
# Only do boundary validation if PostGIS tables are available AND we're not in test environment
|
559
|
+
if postgis_tables_available? && !Rails.env.test?
|
560
|
+
# Use a temporary object to check boundaries
|
561
|
+
temp_checker = Object.new
|
562
|
+
temp_checker.define_singleton_method(:latitude) { lat }
|
563
|
+
temp_checker.define_singleton_method(:longitude) { lng }
|
564
|
+
temp_checker.extend(HasGeoLookup)
|
565
|
+
|
566
|
+
# Look for any boundary in the expected country with error handling
|
567
|
+
begin
|
568
|
+
boundaries = temp_checker.containing_boundaries
|
569
|
+
return false if boundaries.empty?
|
570
|
+
rescue StandardError => e
|
571
|
+
# If spatial queries fail, fall back to basic validation
|
572
|
+
Rails.logger&.warn "Spatial validation failed: #{e.message}"
|
573
|
+
return true # Assume coordinates are valid if we can't verify
|
574
|
+
end
|
575
|
+
|
576
|
+
# Check if any boundary matches the expected country
|
577
|
+
# Convert country code to ISO3 for geoboundary matching
|
578
|
+
begin
|
579
|
+
country_iso3 = Iso3166.for_code(country_code)&.code3
|
580
|
+
return false unless country_iso3
|
581
|
+
|
582
|
+
return boundaries.any? do |boundary|
|
583
|
+
boundary.shape_iso&.include?(country_iso3) ||
|
584
|
+
boundary.shape_group&.include?(country_iso3)
|
585
|
+
end
|
586
|
+
rescue => e
|
587
|
+
# If geoboundary lookup fails, assume coordinates don't match
|
588
|
+
return false
|
589
|
+
end
|
590
|
+
end
|
591
|
+
|
592
|
+
# In test environment or if PostGIS unavailable, skip country validation
|
593
|
+
# This means we'll default to treating coordinates as degrees
|
594
|
+
false
|
595
|
+
end
|
596
|
+
|
597
|
+
# Check if PostGIS tables are available (for test environment handling)
|
598
|
+
#
|
599
|
+
# This helper method determines whether the required PostGIS tables (geoboundaries
|
600
|
+
# and geonames) are available in the current database. It's primarily used to
|
601
|
+
# gracefully handle test environments or deployments where spatial data hasn't
|
602
|
+
# been imported yet.
|
603
|
+
#
|
604
|
+
# The method includes caching to avoid repeated database queries within the same
|
605
|
+
# object instance.
|
606
|
+
#
|
607
|
+
# @return [Boolean] true if both 'geoboundaries' and 'geonames' tables exist
|
608
|
+
# and are accessible, false otherwise
|
609
|
+
#
|
610
|
+
# @example In a production environment with PostGIS data
|
611
|
+
# postgis_tables_available?
|
612
|
+
# # => true
|
613
|
+
#
|
614
|
+
# @example In a test environment without spatial tables
|
615
|
+
# postgis_tables_available?
|
616
|
+
# # => false
|
617
|
+
#
|
618
|
+
# @note This method catches and handles any database connection errors,
|
619
|
+
# returning false if the tables cannot be accessed for any reason
|
620
|
+
#
|
621
|
+
# @note The result is cached in @postgis_available to avoid repeated
|
622
|
+
# database queries during coordinate validation
|
623
|
+
#
|
624
|
+
# @api private
|
625
|
+
# @since 1.0.0
|
626
|
+
def postgis_tables_available?
|
627
|
+
return @postgis_available if defined?(@postgis_available)
|
628
|
+
@postgis_available = begin
|
629
|
+
ActiveRecord::Base.connection.table_exists?('geoboundaries') &&
|
630
|
+
ActiveRecord::Base.connection.table_exists?('geonames')
|
631
|
+
rescue StandardError
|
632
|
+
false
|
633
|
+
end
|
634
|
+
end
|
635
|
+
|
636
|
+
# Get value from boundary sources
|
637
|
+
def get_boundary_value(attr)
|
638
|
+
case attr
|
639
|
+
when 'county_or_parish'
|
640
|
+
result = county_or_parish_boundary
|
641
|
+
result&.record&.name rescue result&.name
|
642
|
+
when 'state_or_province'
|
643
|
+
result = state_or_province_boundary
|
644
|
+
result&.record&.name rescue result&.name
|
645
|
+
when 'township'
|
646
|
+
result = township_boundary
|
647
|
+
name = result&.record&.name rescue result&.name
|
648
|
+
clean_municipal_name(name)
|
649
|
+
when 'subdivision_name'
|
650
|
+
result = subdivision_with_boundary_context
|
651
|
+
result&.record&.name rescue nil
|
652
|
+
else
|
653
|
+
nil # Geoboundary data doesn't provide city, country, or postal_code
|
654
|
+
end
|
655
|
+
end
|
656
|
+
|
657
|
+
# Get value from geoname sources
|
658
|
+
def get_geoname_value(attr)
|
659
|
+
case attr
|
660
|
+
when 'county_or_parish'
|
661
|
+
result = closest_county_or_parish
|
662
|
+
result&.record&.name
|
663
|
+
when 'township'
|
664
|
+
result = closest_township
|
665
|
+
result&.record&.name
|
666
|
+
when 'subdivision_name'
|
667
|
+
result = closest_subdivision
|
668
|
+
result&.record&.name
|
669
|
+
else
|
670
|
+
nil # Geonames lookups are specific to administrative levels
|
671
|
+
end
|
672
|
+
end
|
673
|
+
|
674
|
+
# Truncate long string values for display formatting
|
675
|
+
#
|
676
|
+
# @param value [Object] Value to truncate (will be converted to string)
|
677
|
+
# @return [String, nil] Truncated string with "..." suffix if over 18 characters, or nil if input is nil
|
678
|
+
#
|
679
|
+
# @example
|
680
|
+
# truncate_value("This is a very long string")
|
681
|
+
# # => "This is a very l..."
|
682
|
+
#
|
683
|
+
# @example
|
684
|
+
# truncate_value("Short")
|
685
|
+
# # => "Short"
|
686
|
+
def truncate_value(value)
|
687
|
+
return nil unless value
|
688
|
+
value = value.to_s
|
689
|
+
value.length > 18 ? value[0..15] + "..." : value
|
690
|
+
end
|
691
|
+
|
692
|
+
# Clean municipal names by removing common prefixes and suffixes
|
693
|
+
def clean_municipal_name(name)
|
694
|
+
return nil unless name.present?
|
695
|
+
|
696
|
+
cleaned = name.strip
|
697
|
+
|
698
|
+
# Remove common prefixes (case-insensitive)
|
699
|
+
prefixes = [
|
700
|
+
'City of',
|
701
|
+
'Borough of',
|
702
|
+
'Township of',
|
703
|
+
'Town of',
|
704
|
+
'Village of',
|
705
|
+
'Municipality of',
|
706
|
+
'County of',
|
707
|
+
'District of'
|
708
|
+
]
|
709
|
+
|
710
|
+
prefixes.each do |prefix|
|
711
|
+
if cleaned.match?(/\A#{Regexp.escape(prefix)}\s+/i)
|
712
|
+
cleaned = cleaned.sub(/\A#{Regexp.escape(prefix)}\s+/i, '')
|
713
|
+
break
|
714
|
+
end
|
715
|
+
end
|
716
|
+
|
717
|
+
# Remove common suffixes (case-insensitive)
|
718
|
+
suffixes = [
|
719
|
+
'City',
|
720
|
+
'Borough',
|
721
|
+
'Township',
|
722
|
+
'Town',
|
723
|
+
'Village',
|
724
|
+
'Municipality',
|
725
|
+
'County',
|
726
|
+
'District'
|
727
|
+
]
|
728
|
+
|
729
|
+
suffixes.each do |suffix|
|
730
|
+
if cleaned.match?(/\s+#{Regexp.escape(suffix)}\z/i) &&
|
731
|
+
!cleaned.match?(/\A#{Regexp.escape(suffix)}\z/i) # Don't remove if it's the entire name
|
732
|
+
cleaned = cleaned.sub(/\s+#{Regexp.escape(suffix)}\z/i, '')
|
733
|
+
break
|
734
|
+
end
|
735
|
+
end
|
736
|
+
|
737
|
+
cleaned.strip
|
738
|
+
end
|
739
|
+
|
740
|
+
def find_feature_class_and_code_by_keyword(keyword)
|
741
|
+
FeatureCode
|
742
|
+
.where("LOWER(name) LIKE :kw OR LOWER(description) LIKE :kw", kw: "%#{keyword.downcase}%")
|
743
|
+
.limit(1)
|
744
|
+
.pluck(:feature_class, :feature_code)
|
745
|
+
.first
|
746
|
+
end
|
747
|
+
|
748
|
+
# Utilities for analyzing geographic data coverage and availability
|
749
|
+
#
|
750
|
+
# This module provides methods to check the availability of GeoBoundaries.org and
|
751
|
+
# Geonames.org data for different countries. These utilities are gem-ready and
|
752
|
+
# don't depend on any specific application models or business logic.
|
753
|
+
#
|
754
|
+
# @example Check coverage for a country
|
755
|
+
# HasGeoLookup::DataCoverage.coverage_status('US')
|
756
|
+
# # => {boundaries: true, geonames: true, complete: true}
|
757
|
+
#
|
758
|
+
# @example Check individual data sources
|
759
|
+
# HasGeoLookup::DataCoverage.has_boundary_data?('FR') # => true
|
760
|
+
# HasGeoLookup::DataCoverage.has_geonames_data?('FR') # => false
|
761
|
+
module DataCoverage
|
762
|
+
extend self
|
763
|
+
|
764
|
+
# Check if boundary data exists for a country
|
765
|
+
#
|
766
|
+
# @param iso2 [String] 2-letter country code
|
767
|
+
# @return [Boolean] true if boundary data exists
|
768
|
+
def has_boundary_data?(iso2)
|
769
|
+
# Special cases for territories that don't have separate boundary data
|
770
|
+
territories_without_boundaries = %w[PR VI GU AS MP TC] # US territories + others
|
771
|
+
return true if territories_without_boundaries.include?(iso2)
|
772
|
+
|
773
|
+
# Convert ISO2 to ISO3 to check boundaries
|
774
|
+
country = Iso3166.for_code(iso2)
|
775
|
+
return true unless country # If we can't convert, assume it exists to avoid infinite loops
|
776
|
+
|
777
|
+
iso3 = country.code3
|
778
|
+
|
779
|
+
# Check if we have any boundaries for this country's ISO3 code
|
780
|
+
# Boundaries are stored with country info in shape_group or can be inferred from shape_iso
|
781
|
+
Geoboundary.where("shape_iso LIKE ? OR shape_group LIKE ?", "%#{iso3}%", "%#{iso3}%").exists?
|
782
|
+
end
|
783
|
+
|
784
|
+
# Check if geonames data exists for a country
|
785
|
+
#
|
786
|
+
# @param iso2 [String] 2-letter country code
|
787
|
+
# @return [Boolean] true if geonames data exists
|
788
|
+
def has_geonames_data?(iso2)
|
789
|
+
# Special cases for territories that might not have separate geonames data
|
790
|
+
territories_without_separate_geonames = %w[PR VI GU AS MP] # US territories
|
791
|
+
return true if territories_without_separate_geonames.include?(iso2)
|
792
|
+
|
793
|
+
# Check if we have geonames data for this country
|
794
|
+
Geoname.where(country_code: iso2).exists?
|
795
|
+
end
|
796
|
+
|
797
|
+
# Get comprehensive coverage status for a country
|
798
|
+
#
|
799
|
+
# @param iso2 [String] 2-letter country code
|
800
|
+
# @return [Hash] Coverage status with :boundaries, :geonames, :complete keys
|
801
|
+
def coverage_status(iso2)
|
802
|
+
boundaries = has_boundary_data?(iso2)
|
803
|
+
geonames = has_geonames_data?(iso2)
|
804
|
+
|
805
|
+
{
|
806
|
+
boundaries: boundaries,
|
807
|
+
geonames: geonames,
|
808
|
+
complete: boundaries && geonames
|
809
|
+
}
|
810
|
+
end
|
811
|
+
|
812
|
+
end
|
813
|
+
end
|