has_geo_lookup 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,813 @@
1
+ # frozen_string_literal: true
2
+
3
+ # app/models/concerns/has_geo_lookup.rb
4
+ #
5
+ # Comprehensive geographic lookup functionality using GeoBoundaries.org and Geonames.org data
6
+ #
7
+ # This concern provides both distance-based lookups (using Geonames.org) and precise
8
+ # geometric containment queries (using GeoBoundaries.org) for models with latitude/longitude
9
+ # coordinates. It includes data coverage utilities, municipal name cleaning, and comparison
10
+ # tools for geographic data quality analysis.
11
+ #
12
+ # @example Basic usage
13
+ # class Listing < ApplicationRecord
14
+ # include HasGeoLookup
15
+ # end
16
+ #
17
+ # listing = Listing.first
18
+ # listing.nearest_geonames(feature_class: "P", limit: 3)
19
+ # listing.containing_boundaries
20
+ # listing.compare_geo_sources
21
+ #
22
+ # @see HasGeoLookup::DataCoverage For geographic data coverage analysis utilities
23
+ module HasGeoLookup
24
+ extend ActiveSupport::Concern
25
+
26
+ class_methods do
27
+ # Efficiently select a random record that has coordinate data
28
+ #
29
+ # This method uses offset-based selection rather than ORDER BY RAND() for better
30
+ # performance on large datasets. Only considers records with both latitude and
31
+ # longitude values.
32
+ #
33
+ # @return [ActiveRecord::Base, nil] Random record with coordinates, or nil if none exist
34
+ #
35
+ # @example
36
+ # Listing.random_with_coords
37
+ # # => #<Listing id: 123, latitude: 40.7128, longitude: -74.0060, ...>
38
+ def random_with_coords
39
+ coord_scope = where.not(latitude: nil, longitude: nil)
40
+ count = coord_scope.count
41
+ return nil if count == 0
42
+ coord_scope.offset(rand(count)).first
43
+ end
44
+ end
45
+
46
+ # Struct to wrap a Geoname result with distance in kilometers
47
+ Result = Struct.new(:record, :distance_km, :feature_class, :feature_code)
48
+
49
+ # Struct to wrap a Geoboundary result for consistent API
50
+ GeoboundaryResult = Struct.new(:record, :distance_km, :level, :name) do
51
+ def feature_class; level; end
52
+ def feature_code; level; end
53
+ end
54
+
55
+ # Find nearby Geonames of a given type within a specified radius
56
+ #
57
+ # This method performs distance-based lookup of geographic features from the Geonames.org
58
+ # dataset. It supports filtering by feature type and uses bounding box optimization for
59
+ # better performance on large datasets.
60
+ #
61
+ # @param feature_class [String, nil] Geoname feature class (e.g., "P" for populated places)
62
+ # @param feature_code [String, nil] Geoname feature code (e.g., "PPL" for populated place)
63
+ # @param keyword [String, nil] Search feature names/descriptions to auto-determine criteria
64
+ # @param radius_km [Integer] Search radius in kilometers (default: 100)
65
+ # @param limit [Integer] Maximum results to return (default: 5)
66
+ #
67
+ # @return [Array<Result>] Array of Result structs with :record, :distance_km, :feature_class, :feature_code
68
+ #
69
+ # @example Find closest populated places
70
+ # listing.nearest_geonames(feature_class: "P", radius_km: 50)
71
+ #
72
+ # @example Find administrative divisions by keyword
73
+ # listing.nearest_geonames(keyword: "county", limit: 1)
74
+ def nearest_geonames(feature_class: nil, feature_code: nil, keyword: nil, radius_km: 100, limit: 5)
75
+ return [] unless latitude && longitude
76
+
77
+ if keyword && (feature_class.nil? || feature_code.nil?)
78
+ fc = find_feature_class_and_code_by_keyword(keyword)
79
+ feature_class ||= fc&.first
80
+ feature_code ||= fc&.last
81
+ end
82
+
83
+ return [] unless feature_code || feature_class
84
+
85
+ # Calculate rough bounding box for fast filtering before expensive distance calculations
86
+ # 1 degree ≈ 111km, so calculate degree offset for the radius
87
+ lat_offset = radius_km / 111.0
88
+ lng_offset = radius_km / (111.0 * Math.cos(Math::PI * latitude / 180.0))
89
+
90
+ query = Geoname.where.not(latitude: nil, longitude: nil)
91
+
92
+ query = query.where(feature_code: feature_code) if feature_code
93
+ query = query.where(feature_class: feature_class) if feature_class
94
+
95
+ # Add bounding box filter first (uses indexes, very fast)
96
+ query = query.where(
97
+ latitude: (latitude - lat_offset)..(latitude + lat_offset),
98
+ longitude: (longitude - lng_offset)..(longitude + lng_offset)
99
+ )
100
+
101
+ query = query.select(<<~SQL.squish)
102
+ geonames.*,
103
+ (6371 * acos(
104
+ cos(radians(#{latitude}))
105
+ * cos(radians(latitude))
106
+ * cos(radians(longitude) - radians(#{longitude}))
107
+ + sin(radians(#{latitude}))
108
+ * sin(radians(latitude))
109
+ )) AS distance_km
110
+ SQL
111
+
112
+ query = query.having("distance_km <= ?", radius_km)
113
+ .order("distance_km ASC")
114
+ .limit(limit)
115
+
116
+ query.map do |record|
117
+ Result.new(record, record.try(:distance_km).to_f, feature_class, feature_code)
118
+ end
119
+ end
120
+
121
+ # Looks up the closest county or parish-level area using feature_code = ADM2.
122
+ #
123
+ # Options:
124
+ # :radius_km — limit search radius (default: 50)
125
+ # :country_code — optionally restrict by country
126
+ #
127
+ # Returns a Result struct with distance and matched geoname.
128
+ def closest_county_or_parish(radius_km: 50, country_code: nil)
129
+ nearest_geonames(
130
+ feature_class: "A",
131
+ feature_code: "ADM2",
132
+ radius_km: radius_km,
133
+ # country_code: country_code,
134
+ limit: 1
135
+ ).first
136
+ end
137
+
138
+ # Looks up the closest subdivision (e.g., neighborhood or district)
139
+ # Defaults to feature_code = "PPLX" and 1 km radius
140
+ #
141
+ # Options:
142
+ # :radius_km — limit search radius (default: 1)
143
+ # :country_code — optionally restrict by country
144
+ #
145
+ # Returns a Result struct with distance and matched geoname.
146
+ def closest_subdivision(radius_km: 1, country_code: nil)
147
+ nearest_geonames(
148
+ feature_class: "P",
149
+ feature_code: "PPLX",
150
+ radius_km: radius_km,
151
+ # country_code: country_code,
152
+ limit: 1
153
+ ).first
154
+ end
155
+
156
+ # Looks up the closest township-level area using feature_code = ADM3.
157
+ # Falls back to ADM4 if no ADM3 match is found.
158
+ #
159
+ # Options:
160
+ # :radius_km — limit search radius (default: 25)
161
+ # :country_code — optionally restrict by country
162
+ #
163
+ # Returns a Result struct with distance and matched geoname.
164
+ def closest_township(radius_km: 25, country_code: nil)
165
+ %w[ADM3 ADM4].each do |code|
166
+ result = nearest_geonames(
167
+ feature_class: "A",
168
+ feature_code: code,
169
+ radius_km: radius_km,
170
+ # country_code: country_code,
171
+ limit: 1
172
+ ).first
173
+ return result if result
174
+ end
175
+
176
+ nil
177
+ end
178
+
179
+ # GeoBoundaries-based equivalents using precise point-in-polygon
180
+
181
+ # Find administrative boundaries that contain this point using precise geometric containment
182
+ #
183
+ # This method uses PostGIS spatial operations to determine which GeoBoundaries.org
184
+ # administrative boundaries contain the current coordinate point. It performs automatic
185
+ # coordinate validation and swapping for common data issues.
186
+ #
187
+ # @param levels [Array<String>, nil] Specific ADM levels to search (e.g., ["ADM1", "ADM2"])
188
+ # If nil, searches all available levels
189
+ #
190
+ # @return [Array<Geoboundary>] Array of boundary records that contain this point,
191
+ # ordered by administrative level
192
+ #
193
+ # @example Find all containing boundaries
194
+ # listing.containing_boundaries
195
+ #
196
+ # @example Find only state and county level
197
+ # listing.containing_boundaries(levels: ["ADM1", "ADM2"])
198
+ def containing_boundaries(levels: nil)
199
+ return [] unless latitude && longitude
200
+
201
+ # Validate coordinate ranges
202
+ lat = latitude.to_f
203
+ lng = longitude.to_f
204
+
205
+ # Check if coordinates are swapped (common data issue)
206
+ if !lat.between?(-90, 90)
207
+ # Try swapping if latitude is invalid but longitude could be a valid latitude
208
+ if lng.between?(-90, 90) && lat.between?(-180, 180)
209
+ lat, lng = lng, lat
210
+ else
211
+ # Invalid coordinates that can't be fixed by swapping
212
+ return []
213
+ end
214
+ elsif !lng.between?(-180, 180)
215
+ # Longitude is invalid but latitude is valid - this is unusual, reject
216
+ return []
217
+ end
218
+
219
+ # MySQL spatial functions expect POINT(latitude longitude) format (different from PostGIS)
220
+ point_wkt = "POINT(#{lat} #{lng})"
221
+
222
+ query = Geoboundary.where(
223
+ "ST_Contains(boundary, ST_GeomFromText(?, 4326))",
224
+ point_wkt
225
+ )
226
+
227
+ query = query.where(level: levels) if levels
228
+ query.order(:level)
229
+ end
230
+
231
+ # Returns the boundary at a specific level that contains this point
232
+ def containing_boundary(level)
233
+ containing_boundaries(levels: level).first
234
+ end
235
+
236
+ # GeoBoundary equivalent of closest_county_or_parish
237
+ # Returns the ADM2 boundary that contains this point, with fallback to closest geoname
238
+ def county_or_parish_boundary
239
+ # First try exact containment
240
+ boundary = containing_boundary('ADM2')
241
+ return GeoboundaryResult.new(boundary, 0.0, 'ADM2', boundary.name) if boundary
242
+
243
+ # Fallback to closest geoname approach
244
+ geoname_result = closest_county_or_parish
245
+ return geoname_result if geoname_result
246
+
247
+ nil
248
+ end
249
+
250
+ # GeoBoundary equivalent of closest_township
251
+ # Returns ADM3 or ADM4 boundary that contains this point, with fallback
252
+ def township_boundary
253
+ # Try ADM3 first, then ADM4
254
+ %w[ADM3 ADM4].each do |level|
255
+ boundary = containing_boundary(level)
256
+ return GeoboundaryResult.new(boundary, 0.0, level, boundary.name) if boundary
257
+ end
258
+
259
+ # Fallback to closest geoname approach
260
+ geoname_result = closest_township
261
+ return geoname_result if geoname_result
262
+
263
+ nil
264
+ end
265
+
266
+ # GeoBoundary equivalent for state/province
267
+ # Returns the ADM1 boundary that contains this point
268
+ def state_or_province_boundary
269
+ boundary = containing_boundary('ADM1')
270
+ return GeoboundaryResult.new(boundary, 0.0, 'ADM1', boundary.name) if boundary
271
+
272
+ # No geoname fallback for ADM1 since closest_ doesn't handle it
273
+ nil
274
+ end
275
+
276
+ # For subdivision, we still use geonames since geoBoundaries doesn't have neighborhood-level data
277
+ # but we can add boundary context
278
+ def subdivision_with_boundary_context
279
+ geoname_result = closest_subdivision
280
+ return geoname_result unless geoname_result
281
+
282
+ # Add boundary context to help validate the subdivision
283
+ boundaries = containing_boundaries(levels: %w[ADM2 ADM3 ADM4])
284
+ geoname_result.record.define_singleton_method(:containing_boundaries) { boundaries }
285
+
286
+ geoname_result
287
+ end
288
+
289
+ # Find the metro area that contains this point using precise geometric containment
290
+ #
291
+ # This method uses PostGIS spatial operations to determine which metropolitan area
292
+ # contains the current coordinate point. Metros are defined as collections of
293
+ # geoboundaries (administrative boundaries) that together form a cohesive region.
294
+ #
295
+ # @return [Metro, nil] Metro area that contains this point, or nil if not within any metro
296
+ #
297
+ # @example
298
+ # listing.within_metro
299
+ # # => #<Metro id: 1, name: "Bay Area", details: "San Francisco Bay Area">
300
+ #
301
+ # @example
302
+ # listing.within_metro
303
+ # # => nil (if coordinates are not within any defined metro area)
304
+ def within_metro
305
+ return nil unless latitude && longitude
306
+
307
+ # Direct spatial containment query - check if this point is within any metro's boundaries
308
+ # MySQL spatial functions expect POINT(latitude longitude) format
309
+ Metro.joins(:geoboundaries)
310
+ .where("ST_Contains(geoboundaries.boundary, ST_GeomFromText(?, 4326))", "POINT(#{latitude} #{longitude})")
311
+ .first
312
+ end
313
+
314
+ # Legacy method for backward compatibility - will be removed after migration
315
+ # @deprecated Use {#within_metro} instead
316
+ def closest_metro
317
+ within_metro
318
+ end
319
+
320
+ # Compare geographic data from multiple sources for maintenance and debugging
321
+ #
322
+ # This method displays a side-by-side comparison of geographic attribute values from
323
+ # different data sources: current stored values, GeoBoundaries.org data, and Geonames.org
324
+ # data. Apps can extend this by overriding additional_source_columns to add their own
325
+ # data sources (e.g., original API data).
326
+ #
327
+ # @return [String] Formatted comparison table or error message if no coordinates
328
+ #
329
+ # @example
330
+ # listing.compare_geo_sources
331
+ # # => Displays formatted table comparing all geographic attributes across sources
332
+ def compare_geo_sources
333
+ return "No coordinates available for comparison" unless latitude && longitude
334
+
335
+ geo_attributes = %w[city county_or_parish state_or_province township subdivision_name country postal_code]
336
+
337
+ # Collect all data first to avoid jumbled SQL output
338
+ print "Collecting geographic data..."
339
+ data_rows = []
340
+
341
+ geo_attributes.each do |attr|
342
+ print "."
343
+ current_val = send(attr)
344
+ boundary_val = get_boundary_value(attr)
345
+ geoname_val = get_geoname_value(attr)
346
+
347
+ # Get additional source columns from the implementing model
348
+ additional_sources = respond_to?(:additional_source_columns, true) ? additional_source_columns(attr) : {}
349
+
350
+ # Truncate long values for display
351
+ current_display = truncate_value(current_val)
352
+ boundary_display = truncate_value(boundary_val)
353
+ geoname_display = truncate_value(geoname_val)
354
+
355
+ # Check if current value differs from core sources and any additional sources
356
+ all_source_vals = [boundary_val, geoname_val] + additional_sources.values
357
+ marker = all_source_vals.none? { |val| current_val == val } ? " *" : ""
358
+
359
+ row_data = {
360
+ attr: attr.upcase,
361
+ current: current_display || "(nil)",
362
+ boundary: boundary_display || "(nil)",
363
+ geoname: geoname_display || "(nil)",
364
+ marker: marker
365
+ }
366
+
367
+ # Add additional source columns
368
+ additional_sources.each do |column_name, value|
369
+ row_data[column_name.to_sym] = truncate_value(value) || "(nil)"
370
+ end
371
+
372
+ data_rows << row_data
373
+ end
374
+
375
+ puts " done!\n"
376
+
377
+ # Build header columns
378
+ base_columns = %w[ATTRIBUTE CURRENT BOUNDARY GEONAMES]
379
+ additional_columns = respond_to?(:additional_source_columns, true) ?
380
+ additional_source_columns(geo_attributes.first)&.keys || [] : []
381
+ all_columns = base_columns + additional_columns.map(&:upcase)
382
+
383
+ # Calculate total width
384
+ total_width = all_columns.length * 20 + 4
385
+
386
+ # Display results
387
+ puts "\n" + "=" * total_width
388
+ puts "GEO SOURCES COMPARISON"
389
+ puts "Coordinates: #{latitude}, #{longitude}"
390
+ puts "=" * total_width
391
+ puts sprintf((["%-20s"] * all_columns.length).join(" "), *all_columns)
392
+ puts "-" * (total_width + all_columns.length - 1)
393
+
394
+ data_rows.each do |row|
395
+ values = all_columns.map { |col| row[col.downcase.to_sym] }
396
+ puts sprintf((["%-20s"] * values.length).join(" "), *values) + row[:marker]
397
+ end
398
+
399
+ puts "-" * (total_width + all_columns.length - 1)
400
+ puts "* = Current value differs from all sources"
401
+ puts "\nLegend:"
402
+ puts " CURRENT - Value currently stored in database"
403
+ puts " BOUNDARY - Value from GeoBoundaries.org (precise polygon containment)"
404
+ puts " GEONAMES - Value from Geonames.org (nearest feature lookup)"
405
+
406
+ # Add legend entries for additional sources
407
+ if respond_to?(:additional_source_legend, true)
408
+ additional_source_legend.each do |column, description|
409
+ puts " #{column.upcase.ljust(8)} - #{description}"
410
+ end
411
+ end
412
+
413
+ puts "=" * total_width
414
+
415
+ nil
416
+ end
417
+
418
+ # Validate and convert coordinates from radians to degrees if needed
419
+ #
420
+ # This method intelligently detects whether coordinates are provided in radians or degrees
421
+ # using a multi-step validation process:
422
+ #
423
+ # 1. **Range Check**: If coordinates are outside degree ranges (|lat| > 90 or |lng| > 180),
424
+ # they are assumed to be radians and converted, unless they exceed reasonable bounds (> 1000)
425
+ # 2. **Country Validation**: For ambiguous coordinates within degree ranges, attempts to
426
+ # validate against expected country boundaries using PostGIS spatial queries
427
+ # 3. **Fallback**: If validation fails or PostGIS is unavailable, defaults to treating
428
+ # coordinates as degrees
429
+ #
430
+ # This is particularly useful when dealing with data sources that may inconsistently
431
+ # provide coordinates in different units.
432
+ #
433
+ # @param lat [Float, Integer] Latitude coordinate in degrees or radians
434
+ # @param lng [Float, Integer] Longitude coordinate in degrees or radians
435
+ # @param expected_country [String, nil] Optional 2-letter ISO country code (e.g., "US", "FR")
436
+ # used for boundary validation when coordinates are ambiguous
437
+ #
438
+ # @return [Array<(Float, Float)>] Array containing [latitude, longitude] in degrees,
439
+ # or [nil, nil] if coordinates are invalid or outside reasonable bounds
440
+ #
441
+ # @example Converting obvious radians to degrees
442
+ # validate_and_convert_coordinates(0.7128, -1.2915, "US")
443
+ # # => [40.8355, -74.0060] (converted from radians using country validation)
444
+ #
445
+ # @example Preserving valid degrees
446
+ # validate_and_convert_coordinates(40.7128, -74.0060, "US")
447
+ # # => [40.7128, -74.0060] (already in degrees, no conversion needed)
448
+ #
449
+ # @example Handling coordinates outside degree ranges
450
+ # validate_and_convert_coordinates(95.0, 1.5, "US")
451
+ # # => [5441.5, 85.9] (lat > 90°, so both coordinates converted from radians)
452
+ #
453
+ # @example Rejecting invalid coordinates
454
+ # validate_and_convert_coordinates(2000.0, 3000.0, "US")
455
+ # # => [nil, nil] (values too large to be reasonable coordinates)
456
+ #
457
+ # @note This method requires PostGIS tables (geoboundaries, geonames) for country validation.
458
+ # In test environments or when PostGIS is unavailable, country validation is skipped.
459
+ #
460
+ # @see #coordinates_match_country? for details on boundary validation logic
461
+ # @since 1.0.0
462
+ def validate_and_convert_coordinates(lat, lng, expected_country = nil)
463
+ return [nil, nil] unless lat && lng
464
+
465
+ lat = lat.to_f
466
+ lng = lng.to_f
467
+
468
+ # First check: are coordinates obviously in radians? (outside degree ranges)
469
+ if lat.abs > 90 || lng.abs > 180
470
+ # Check if they could be reasonable radians (not absurdly large)
471
+ # Reasonable upper bound: 1000 (much larger than any reasonable coordinate)
472
+ if lat.abs <= 1000 && lng.abs <= 1000
473
+ # Assume they are radians and convert
474
+ lat_deg = lat * 180.0 / Math::PI
475
+ lng_deg = lng * 180.0 / Math::PI
476
+ return [lat_deg, lng_deg]
477
+ else
478
+ # Values too large to be reasonable coordinates in any format
479
+ return [nil, nil]
480
+ end
481
+ end
482
+
483
+ # Coordinates are within degree ranges - but could still be radians
484
+ # Use country validation to determine which is correct
485
+ if expected_country.present?
486
+ # Test as degrees first
487
+ degrees_valid = coordinates_match_country?(lat, lng, expected_country)
488
+
489
+ # If degrees don't match, try converting from radians
490
+ unless degrees_valid
491
+ # Check if coordinates could be radians (within radian range)
492
+ if lat.abs <= Math::PI && lng.abs <= Math::PI
493
+ lat_from_radians = lat * 180.0 / Math::PI
494
+ lng_from_radians = lng * 180.0 / Math::PI
495
+
496
+ # Test if radians-to-degrees conversion matches expected country
497
+ if coordinates_match_country?(lat_from_radians, lng_from_radians, expected_country)
498
+ return [lat_from_radians, lng_from_radians]
499
+ end
500
+ end
501
+ end
502
+ end
503
+
504
+ # Default: assume coordinates are already in degrees
505
+ [lat, lng]
506
+ end
507
+
508
+ private
509
+
510
+ # Check if coordinates fall within the expected country's boundaries
511
+ #
512
+ # This private method performs spatial validation by checking if the given coordinates
513
+ # fall within the administrative boundaries of the expected country. It uses PostGIS
514
+ # spatial containment queries against the GeoBoundaries.org dataset.
515
+ #
516
+ # The validation process:
517
+ # 1. Validates coordinate ranges (lat: -90 to 90, lng: -180 to 180)
518
+ # 2. Checks PostGIS table availability (gracefully handles test environments)
519
+ # 3. Creates a temporary object with HasGeoLookup functionality for boundary lookup
520
+ # 4. Converts country code from ISO2 to ISO3 format for geoboundary matching
521
+ # 5. Checks if any containing boundary matches the expected country
522
+ #
523
+ # @param lat [Float] Latitude coordinate in degrees
524
+ # @param lng [Float] Longitude coordinate in degrees
525
+ # @param country_code [String] 2-letter ISO country code (e.g., "US", "FR", "CA")
526
+ #
527
+ # @return [Boolean] true if coordinates fall within the expected country's boundaries,
528
+ # false otherwise or if validation fails
529
+ #
530
+ # @example Coordinates within expected country
531
+ # coordinates_match_country?(40.7128, -74.0060, "US")
532
+ # # => true (NYC coordinates are within USA boundaries)
533
+ #
534
+ # @example Coordinates outside expected country
535
+ # coordinates_match_country?(48.8566, 2.3522, "US")
536
+ # # => false (Paris coordinates are not within USA boundaries)
537
+ #
538
+ # @example Invalid coordinate ranges
539
+ # coordinates_match_country?(200.0, -74.0, "US")
540
+ # # => false (latitude outside valid range)
541
+ #
542
+ # @note This method gracefully handles missing PostGIS tables by returning false,
543
+ # making it safe to use in test environments without spatial databases
544
+ #
545
+ # @note Requires the iso_3166 gem for country code conversion and GeoBoundaries
546
+ # data to be imported for the target country
547
+ #
548
+ # @see #containing_boundaries for the spatial containment logic
549
+ # @see #postgis_tables_available? for PostGIS availability checking
550
+ # @api private
551
+ # @since 1.0.0
552
+ def coordinates_match_country?(lat, lng, country_code)
553
+ return false unless lat && lng && country_code
554
+
555
+ # Quick range check first
556
+ return false unless lat.between?(-90, 90) && lng.between?(-180, 180)
557
+
558
+ # Only do boundary validation if PostGIS tables are available AND we're not in test environment
559
+ if postgis_tables_available? && !Rails.env.test?
560
+ # Use a temporary object to check boundaries
561
+ temp_checker = Object.new
562
+ temp_checker.define_singleton_method(:latitude) { lat }
563
+ temp_checker.define_singleton_method(:longitude) { lng }
564
+ temp_checker.extend(HasGeoLookup)
565
+
566
+ # Look for any boundary in the expected country with error handling
567
+ begin
568
+ boundaries = temp_checker.containing_boundaries
569
+ return false if boundaries.empty?
570
+ rescue StandardError => e
571
+ # If spatial queries fail, fall back to basic validation
572
+ Rails.logger&.warn "Spatial validation failed: #{e.message}"
573
+ return true # Assume coordinates are valid if we can't verify
574
+ end
575
+
576
+ # Check if any boundary matches the expected country
577
+ # Convert country code to ISO3 for geoboundary matching
578
+ begin
579
+ country_iso3 = Iso3166.for_code(country_code)&.code3
580
+ return false unless country_iso3
581
+
582
+ return boundaries.any? do |boundary|
583
+ boundary.shape_iso&.include?(country_iso3) ||
584
+ boundary.shape_group&.include?(country_iso3)
585
+ end
586
+ rescue => e
587
+ # If geoboundary lookup fails, assume coordinates don't match
588
+ return false
589
+ end
590
+ end
591
+
592
+ # In test environment or if PostGIS unavailable, skip country validation
593
+ # This means we'll default to treating coordinates as degrees
594
+ false
595
+ end
596
+
597
+ # Check if PostGIS tables are available (for test environment handling)
598
+ #
599
+ # This helper method determines whether the required PostGIS tables (geoboundaries
600
+ # and geonames) are available in the current database. It's primarily used to
601
+ # gracefully handle test environments or deployments where spatial data hasn't
602
+ # been imported yet.
603
+ #
604
+ # The method includes caching to avoid repeated database queries within the same
605
+ # object instance.
606
+ #
607
+ # @return [Boolean] true if both 'geoboundaries' and 'geonames' tables exist
608
+ # and are accessible, false otherwise
609
+ #
610
+ # @example In a production environment with PostGIS data
611
+ # postgis_tables_available?
612
+ # # => true
613
+ #
614
+ # @example In a test environment without spatial tables
615
+ # postgis_tables_available?
616
+ # # => false
617
+ #
618
+ # @note This method catches and handles any database connection errors,
619
+ # returning false if the tables cannot be accessed for any reason
620
+ #
621
+ # @note The result is cached in @postgis_available to avoid repeated
622
+ # database queries during coordinate validation
623
+ #
624
+ # @api private
625
+ # @since 1.0.0
626
+ def postgis_tables_available?
627
+ return @postgis_available if defined?(@postgis_available)
628
+ @postgis_available = begin
629
+ ActiveRecord::Base.connection.table_exists?('geoboundaries') &&
630
+ ActiveRecord::Base.connection.table_exists?('geonames')
631
+ rescue StandardError
632
+ false
633
+ end
634
+ end
635
+
636
+ # Get value from boundary sources
637
+ def get_boundary_value(attr)
638
+ case attr
639
+ when 'county_or_parish'
640
+ result = county_or_parish_boundary
641
+ result&.record&.name rescue result&.name
642
+ when 'state_or_province'
643
+ result = state_or_province_boundary
644
+ result&.record&.name rescue result&.name
645
+ when 'township'
646
+ result = township_boundary
647
+ name = result&.record&.name rescue result&.name
648
+ clean_municipal_name(name)
649
+ when 'subdivision_name'
650
+ result = subdivision_with_boundary_context
651
+ result&.record&.name rescue nil
652
+ else
653
+ nil # Geoboundary data doesn't provide city, country, or postal_code
654
+ end
655
+ end
656
+
657
+ # Get value from geoname sources
658
+ def get_geoname_value(attr)
659
+ case attr
660
+ when 'county_or_parish'
661
+ result = closest_county_or_parish
662
+ result&.record&.name
663
+ when 'township'
664
+ result = closest_township
665
+ result&.record&.name
666
+ when 'subdivision_name'
667
+ result = closest_subdivision
668
+ result&.record&.name
669
+ else
670
+ nil # Geonames lookups are specific to administrative levels
671
+ end
672
+ end
673
+
674
+ # Truncate long string values for display formatting
675
+ #
676
+ # @param value [Object] Value to truncate (will be converted to string)
677
+ # @return [String, nil] Truncated string with "..." suffix if over 18 characters, or nil if input is nil
678
+ #
679
+ # @example
680
+ # truncate_value("This is a very long string")
681
+ # # => "This is a very l..."
682
+ #
683
+ # @example
684
+ # truncate_value("Short")
685
+ # # => "Short"
686
+ def truncate_value(value)
687
+ return nil unless value
688
+ value = value.to_s
689
+ value.length > 18 ? value[0..15] + "..." : value
690
+ end
691
+
692
+ # Clean municipal names by removing common prefixes and suffixes
693
+ def clean_municipal_name(name)
694
+ return nil unless name.present?
695
+
696
+ cleaned = name.strip
697
+
698
+ # Remove common prefixes (case-insensitive)
699
+ prefixes = [
700
+ 'City of',
701
+ 'Borough of',
702
+ 'Township of',
703
+ 'Town of',
704
+ 'Village of',
705
+ 'Municipality of',
706
+ 'County of',
707
+ 'District of'
708
+ ]
709
+
710
+ prefixes.each do |prefix|
711
+ if cleaned.match?(/\A#{Regexp.escape(prefix)}\s+/i)
712
+ cleaned = cleaned.sub(/\A#{Regexp.escape(prefix)}\s+/i, '')
713
+ break
714
+ end
715
+ end
716
+
717
+ # Remove common suffixes (case-insensitive)
718
+ suffixes = [
719
+ 'City',
720
+ 'Borough',
721
+ 'Township',
722
+ 'Town',
723
+ 'Village',
724
+ 'Municipality',
725
+ 'County',
726
+ 'District'
727
+ ]
728
+
729
+ suffixes.each do |suffix|
730
+ if cleaned.match?(/\s+#{Regexp.escape(suffix)}\z/i) &&
731
+ !cleaned.match?(/\A#{Regexp.escape(suffix)}\z/i) # Don't remove if it's the entire name
732
+ cleaned = cleaned.sub(/\s+#{Regexp.escape(suffix)}\z/i, '')
733
+ break
734
+ end
735
+ end
736
+
737
+ cleaned.strip
738
+ end
739
+
740
+ def find_feature_class_and_code_by_keyword(keyword)
741
+ FeatureCode
742
+ .where("LOWER(name) LIKE :kw OR LOWER(description) LIKE :kw", kw: "%#{keyword.downcase}%")
743
+ .limit(1)
744
+ .pluck(:feature_class, :feature_code)
745
+ .first
746
+ end
747
+
748
+ # Utilities for analyzing geographic data coverage and availability
749
+ #
750
+ # This module provides methods to check the availability of GeoBoundaries.org and
751
+ # Geonames.org data for different countries. These utilities are gem-ready and
752
+ # don't depend on any specific application models or business logic.
753
+ #
754
+ # @example Check coverage for a country
755
+ # HasGeoLookup::DataCoverage.coverage_status('US')
756
+ # # => {boundaries: true, geonames: true, complete: true}
757
+ #
758
+ # @example Check individual data sources
759
+ # HasGeoLookup::DataCoverage.has_boundary_data?('FR') # => true
760
+ # HasGeoLookup::DataCoverage.has_geonames_data?('FR') # => false
761
+ module DataCoverage
762
+ extend self
763
+
764
+ # Check if boundary data exists for a country
765
+ #
766
+ # @param iso2 [String] 2-letter country code
767
+ # @return [Boolean] true if boundary data exists
768
+ def has_boundary_data?(iso2)
769
+ # Special cases for territories that don't have separate boundary data
770
+ territories_without_boundaries = %w[PR VI GU AS MP TC] # US territories + others
771
+ return true if territories_without_boundaries.include?(iso2)
772
+
773
+ # Convert ISO2 to ISO3 to check boundaries
774
+ country = Iso3166.for_code(iso2)
775
+ return true unless country # If we can't convert, assume it exists to avoid infinite loops
776
+
777
+ iso3 = country.code3
778
+
779
+ # Check if we have any boundaries for this country's ISO3 code
780
+ # Boundaries are stored with country info in shape_group or can be inferred from shape_iso
781
+ Geoboundary.where("shape_iso LIKE ? OR shape_group LIKE ?", "%#{iso3}%", "%#{iso3}%").exists?
782
+ end
783
+
784
+ # Check if geonames data exists for a country
785
+ #
786
+ # @param iso2 [String] 2-letter country code
787
+ # @return [Boolean] true if geonames data exists
788
+ def has_geonames_data?(iso2)
789
+ # Special cases for territories that might not have separate geonames data
790
+ territories_without_separate_geonames = %w[PR VI GU AS MP] # US territories
791
+ return true if territories_without_separate_geonames.include?(iso2)
792
+
793
+ # Check if we have geonames data for this country
794
+ Geoname.where(country_code: iso2).exists?
795
+ end
796
+
797
+ # Get comprehensive coverage status for a country
798
+ #
799
+ # @param iso2 [String] 2-letter country code
800
+ # @return [Hash] Coverage status with :boundaries, :geonames, :complete keys
801
+ def coverage_status(iso2)
802
+ boundaries = has_boundary_data?(iso2)
803
+ geonames = has_geonames_data?(iso2)
804
+
805
+ {
806
+ boundaries: boundaries,
807
+ geonames: geonames,
808
+ complete: boundaries && geonames
809
+ }
810
+ end
811
+
812
+ end
813
+ end