geo-intel-offline 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geo_intel_offline/__init__.py +15 -0
- geo_intel_offline/api.py +114 -0
- geo_intel_offline/compression.py +238 -0
- geo_intel_offline/confidence.py +89 -0
- geo_intel_offline/data/geohash_index.json.gz +0 -0
- geo_intel_offline/data/metadata.json.gz +0 -0
- geo_intel_offline/data/polygons.json.gz +0 -0
- geo_intel_offline/data_builder.py +528 -0
- geo_intel_offline/data_builder_minimal.py +173 -0
- geo_intel_offline/data_builder_modular.py +474 -0
- geo_intel_offline/data_loader.py +170 -0
- geo_intel_offline/geohash.py +150 -0
- geo_intel_offline/hierarchical_resolver.py +136 -0
- geo_intel_offline/migrate_to_modular.py +159 -0
- geo_intel_offline/modular_data_loader.py +212 -0
- geo_intel_offline/pip.py +150 -0
- geo_intel_offline/polygon_utils.py +104 -0
- geo_intel_offline/resolver.py +306 -0
- geo_intel_offline-1.0.1.dist-info/LICENSE +21 -0
- geo_intel_offline-1.0.1.dist-info/METADATA +784 -0
- geo_intel_offline-1.0.1.dist-info/RECORD +24 -0
- geo_intel_offline-1.0.1.dist-info/WHEEL +5 -0
- geo_intel_offline-1.0.1.dist-info/entry_points.txt +2 -0
- geo_intel_offline-1.0.1.dist-info/top_level.txt +1 -0
geo_intel_offline/pip.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Point-in-Polygon (PIP) algorithm using Ray Casting.
|
|
3
|
+
|
|
4
|
+
Ray Casting Algorithm:
|
|
5
|
+
- Cast a ray from the point to infinity (we use East, +X direction)
|
|
6
|
+
- Count intersections with polygon edges
|
|
7
|
+
- Odd intersections = inside, even = outside
|
|
8
|
+
|
|
9
|
+
Design Decisions:
|
|
10
|
+
1. Ray Casting chosen over Winding Number for:
|
|
11
|
+
- Simpler implementation
|
|
12
|
+
- Better performance
|
|
13
|
+
- Deterministic results
|
|
14
|
+
|
|
15
|
+
2. Handle polygon rings (exterior + holes):
|
|
16
|
+
- Exterior ring: inside = true
|
|
17
|
+
- Interior rings (holes): inside = false
|
|
18
|
+
|
|
19
|
+
3. Edge cases handled:
|
|
20
|
+
- Points on vertices
|
|
21
|
+
- Points on edges
|
|
22
|
+
- Horizontal rays (collinear with edges)
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from typing import List, Tuple
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def point_in_polygon(
|
|
29
|
+
point: Tuple[float, float],
|
|
30
|
+
polygon: List[Tuple[float, float]]
|
|
31
|
+
) -> bool:
|
|
32
|
+
"""
|
|
33
|
+
Check if a point is inside a polygon using ray casting.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
point: (lat, lon) tuple
|
|
37
|
+
polygon: List of (lat, lon) tuples forming polygon ring
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
True if point is inside polygon, False otherwise
|
|
41
|
+
"""
|
|
42
|
+
if not polygon or len(polygon) < 3:
|
|
43
|
+
return False
|
|
44
|
+
|
|
45
|
+
lat, lon = point
|
|
46
|
+
inside = False
|
|
47
|
+
|
|
48
|
+
# Ray casting: check intersections with horizontal ray going East
|
|
49
|
+
j = len(polygon) - 1
|
|
50
|
+
for i in range(len(polygon)):
|
|
51
|
+
lat_i, lon_i = polygon[i]
|
|
52
|
+
lat_j, lon_j = polygon[j]
|
|
53
|
+
|
|
54
|
+
# Check if ray crosses edge
|
|
55
|
+
if ((lat_i > lat) != (lat_j > lat)):
|
|
56
|
+
# Calculate intersection point
|
|
57
|
+
if lon_j != lon_i:
|
|
58
|
+
# Avoid division by zero (horizontal edges)
|
|
59
|
+
intersect_lon = (lat - lat_i) * (lon_j - lon_i) / (lat_j - lat_i) + lon_i
|
|
60
|
+
else:
|
|
61
|
+
intersect_lon = lon_i
|
|
62
|
+
|
|
63
|
+
# Count intersection if ray crosses to the right
|
|
64
|
+
if lon < intersect_lon:
|
|
65
|
+
inside = not inside
|
|
66
|
+
|
|
67
|
+
j = i
|
|
68
|
+
|
|
69
|
+
return inside
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def point_in_polygon_with_holes(
|
|
73
|
+
point: Tuple[float, float],
|
|
74
|
+
exterior: List[Tuple[float, float]],
|
|
75
|
+
holes: List[List[Tuple[float, float]]] = None
|
|
76
|
+
) -> bool:
|
|
77
|
+
"""
|
|
78
|
+
Check if point is in polygon with holes (interior rings).
|
|
79
|
+
|
|
80
|
+
Design Decision: Exterior ring defines inclusion, holes define exclusion.
|
|
81
|
+
This handles countries with lakes, islands with lakes, etc.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
point: (lat, lon) tuple
|
|
85
|
+
exterior: Exterior polygon ring
|
|
86
|
+
holes: List of interior rings (holes)
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
True if point is inside exterior but not in any hole
|
|
90
|
+
"""
|
|
91
|
+
if not point_in_polygon(point, exterior):
|
|
92
|
+
return False
|
|
93
|
+
|
|
94
|
+
# Check if point is in any hole (exclude from result)
|
|
95
|
+
if holes:
|
|
96
|
+
for hole in holes:
|
|
97
|
+
if point_in_polygon(point, hole):
|
|
98
|
+
return False
|
|
99
|
+
|
|
100
|
+
return True
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def distance_to_polygon_edge(
|
|
104
|
+
point: Tuple[float, float],
|
|
105
|
+
polygon: List[Tuple[float, float]]
|
|
106
|
+
) -> float:
|
|
107
|
+
"""
|
|
108
|
+
Calculate minimum distance from point to polygon edge.
|
|
109
|
+
|
|
110
|
+
Used for confidence scoring: closer to edge = lower confidence.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
point: (lat, lon) tuple
|
|
114
|
+
polygon: Polygon ring
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Distance in degrees (approximate, for confidence scoring)
|
|
118
|
+
"""
|
|
119
|
+
if not polygon:
|
|
120
|
+
return float('inf')
|
|
121
|
+
|
|
122
|
+
min_dist = float('inf')
|
|
123
|
+
lat, lon = point
|
|
124
|
+
|
|
125
|
+
j = len(polygon) - 1
|
|
126
|
+
for i in range(len(polygon)):
|
|
127
|
+
lat_i, lon_i = polygon[i]
|
|
128
|
+
lat_j, lon_j = polygon[j]
|
|
129
|
+
|
|
130
|
+
# Distance to line segment
|
|
131
|
+
# Use point-to-line-segment distance formula
|
|
132
|
+
dx = lon_j - lon_i
|
|
133
|
+
dy = lat_j - lat_i
|
|
134
|
+
|
|
135
|
+
if dx == 0 and dy == 0:
|
|
136
|
+
# Degenerate segment (point)
|
|
137
|
+
dist = ((lat - lat_i) ** 2 + (lon - lon_i) ** 2) ** 0.5
|
|
138
|
+
else:
|
|
139
|
+
# Project point onto line segment
|
|
140
|
+
t = max(0, min(1, ((lat - lat_i) * dy + (lon - lon_i) * dx) / (dx * dx + dy * dy)))
|
|
141
|
+
|
|
142
|
+
proj_lat = lat_i + t * dy
|
|
143
|
+
proj_lon = lon_i + t * dx
|
|
144
|
+
|
|
145
|
+
dist = ((lat - proj_lat) ** 2 + (lon - proj_lon) ** 2) ** 0.5
|
|
146
|
+
|
|
147
|
+
min_dist = min(min_dist, dist)
|
|
148
|
+
j = i
|
|
149
|
+
|
|
150
|
+
return min_dist
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Shared polygon processing utilities.
|
|
3
|
+
Consolidates duplicate code from data builders.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import List, Tuple, Dict
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def calculate_bounding_box(polygon: List[Tuple[float, float]]) -> Tuple[float, float, float, float]:
|
|
10
|
+
"""Calculate bounding box for a polygon."""
|
|
11
|
+
if not polygon:
|
|
12
|
+
return 0.0, 0.0, 0.0, 0.0
|
|
13
|
+
|
|
14
|
+
lats = [p[0] for p in polygon]
|
|
15
|
+
lons = [p[1] for p in polygon]
|
|
16
|
+
return min(lats), max(lats), min(lons), max(lons)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def calculate_adaptive_step_size(lat_range: float, lon_range: float) -> float:
|
|
20
|
+
"""
|
|
21
|
+
Calculate adaptive step size for geohash sampling based on polygon size.
|
|
22
|
+
|
|
23
|
+
Geohash precision 6 covers ~1.2km x 0.6km cells. We need step size small enough
|
|
24
|
+
to ensure we sample multiple points within each geohash cell for reliable coverage.
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
Step size in degrees
|
|
28
|
+
"""
|
|
29
|
+
max_range = max(lat_range, lon_range)
|
|
30
|
+
min_range = min(lat_range, lon_range)
|
|
31
|
+
|
|
32
|
+
# For very small polygons, use very fine sampling to ensure geohash coverage
|
|
33
|
+
# Geohash precision 6 = ~0.01° latitude, ~0.02° longitude
|
|
34
|
+
if max_range < 0.001:
|
|
35
|
+
return 0.001 # ~111m - very fine for tiny islands
|
|
36
|
+
elif max_range < 0.01:
|
|
37
|
+
return 0.002 # ~222m - fine sampling for small islands
|
|
38
|
+
elif max_range < 0.05:
|
|
39
|
+
return 0.005 # ~555m - good for small countries
|
|
40
|
+
elif max_range < 0.1:
|
|
41
|
+
return 0.008 # ~888m - ensure multiple samples per geohash
|
|
42
|
+
elif max_range < 0.5:
|
|
43
|
+
return 0.015 # ~1.6km - medium countries
|
|
44
|
+
elif max_range < 2.0:
|
|
45
|
+
return 0.025 # ~2.7km - large countries
|
|
46
|
+
else:
|
|
47
|
+
return 0.05 # ~5.5km - very large countries
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def calculate_safe_iteration_limits(
|
|
51
|
+
min_lat: float,
|
|
52
|
+
max_lat: float,
|
|
53
|
+
min_lon: float,
|
|
54
|
+
max_lon: float,
|
|
55
|
+
step: float
|
|
56
|
+
) -> Tuple[int, int, int, float]:
|
|
57
|
+
"""
|
|
58
|
+
Calculate safe iteration limits to prevent infinite loops.
|
|
59
|
+
|
|
60
|
+
These limits are only used as a safety net for truly infinite loops,
|
|
61
|
+
not to reduce coverage. Returns very generous limits.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Tuple of (max_lat_iterations, max_lon_iterations, max_total_iterations, adjusted_step)
|
|
65
|
+
"""
|
|
66
|
+
lat_range = max_lat - min_lat
|
|
67
|
+
lon_range = max_lon - min_lon
|
|
68
|
+
|
|
69
|
+
max_lat_iter = int(lat_range / step) + 10 if step > 0 else 10000 # +10 buffer
|
|
70
|
+
max_lon_iter = int(lon_range / step) + 10 if step > 0 else 10000 # +10 buffer
|
|
71
|
+
max_total = max_lat_iter * max_lon_iter
|
|
72
|
+
|
|
73
|
+
# Use original step (don't adjust - preserve full coverage)
|
|
74
|
+
adjusted_step = step
|
|
75
|
+
|
|
76
|
+
# Only apply a truly massive safety cap (10 million iterations) to prevent infinite loops
|
|
77
|
+
# This should never be hit in normal operation
|
|
78
|
+
if max_total > 10000000:
|
|
79
|
+
max_total = 10000000
|
|
80
|
+
|
|
81
|
+
return max_lat_iter, max_lon_iter, max_total, adjusted_step
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def get_polygon_centroid(polygon: List[Tuple[float, float]]) -> Tuple[float, float]:
|
|
85
|
+
"""Calculate polygon centroid."""
|
|
86
|
+
if not polygon:
|
|
87
|
+
return 0.0, 0.0
|
|
88
|
+
|
|
89
|
+
lats = [p[0] for p in polygon]
|
|
90
|
+
lons = [p[1] for p in polygon]
|
|
91
|
+
return sum(lats) / len(lats), sum(lons) / len(lons)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def convert_geojson_coords_to_latlon(coords_list: List) -> List[Tuple[float, float]]:
|
|
95
|
+
"""
|
|
96
|
+
Convert GeoJSON coordinates [lon, lat] to internal format [(lat, lon), ...].
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
coords_list: GeoJSON coordinate list (each element is [lon, lat])
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
List of (lat, lon) tuples
|
|
103
|
+
"""
|
|
104
|
+
return [(p[1], p[0]) for p in coords_list]
|
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Resolver orchestration - coordinates the resolution pipeline.
|
|
3
|
+
|
|
4
|
+
Resolution Pipeline:
|
|
5
|
+
1. Encode lat/lon to geohash
|
|
6
|
+
2. Query geohash index for candidate countries
|
|
7
|
+
3. For each candidate:
|
|
8
|
+
a. Load polygon
|
|
9
|
+
b. Test point-in-polygon
|
|
10
|
+
c. If match, calculate confidence
|
|
11
|
+
4. Return best match or handle ambiguity
|
|
12
|
+
|
|
13
|
+
Edge Cases Handled:
|
|
14
|
+
- Points in oceans (no country match)
|
|
15
|
+
- Border points (multiple candidates)
|
|
16
|
+
- Geohash boundary cases (check neighbors)
|
|
17
|
+
- Countries with holes (islands, lakes)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from typing import List, Tuple, Optional, Dict
|
|
21
|
+
from .geohash import encode, get_neighbors
|
|
22
|
+
from .pip import point_in_polygon_with_holes
|
|
23
|
+
from .confidence import calculate_confidence
|
|
24
|
+
from .data_loader import get_loader
|
|
25
|
+
from .modular_data_loader import ModularDataLoader
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ResolutionResult:
|
|
29
|
+
"""Result of a geo-intelligence resolution."""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
country_id: Optional[int] = None,
|
|
34
|
+
country_name: Optional[str] = None,
|
|
35
|
+
iso2: Optional[str] = None,
|
|
36
|
+
iso3: Optional[str] = None,
|
|
37
|
+
continent: Optional[str] = None,
|
|
38
|
+
timezone: Optional[str] = None,
|
|
39
|
+
confidence: float = 0.0
|
|
40
|
+
):
|
|
41
|
+
self.country_id = country_id
|
|
42
|
+
self.country_name = country_name
|
|
43
|
+
self.iso2 = iso2
|
|
44
|
+
self.iso3 = iso3
|
|
45
|
+
self.continent = continent
|
|
46
|
+
self.timezone = timezone
|
|
47
|
+
self.confidence = confidence
|
|
48
|
+
|
|
49
|
+
def to_dict(self) -> Dict:
|
|
50
|
+
"""Convert to dictionary."""
|
|
51
|
+
return {
|
|
52
|
+
"country": self.country_name,
|
|
53
|
+
"iso2": self.iso2,
|
|
54
|
+
"iso3": self.iso3,
|
|
55
|
+
"continent": self.continent,
|
|
56
|
+
"timezone": self.timezone,
|
|
57
|
+
"confidence": self.confidence
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
def is_valid(self) -> bool:
|
|
61
|
+
"""Check if result is valid (has country)."""
|
|
62
|
+
return self.country_id is not None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def resolve(
|
|
66
|
+
lat: float,
|
|
67
|
+
lon: float,
|
|
68
|
+
data_dir: Optional[str] = None,
|
|
69
|
+
countries: Optional[List[str]] = None,
|
|
70
|
+
continents: Optional[List[str]] = None,
|
|
71
|
+
exclude_countries: Optional[List[str]] = None,
|
|
72
|
+
loader: Optional[ModularDataLoader] = None
|
|
73
|
+
) -> ResolutionResult:
|
|
74
|
+
"""
|
|
75
|
+
Resolve latitude/longitude to geo-intelligence.
|
|
76
|
+
|
|
77
|
+
Main resolution function that orchestrates the entire pipeline.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
lat: Latitude (-90 to 90)
|
|
81
|
+
lon: Longitude (-180 to 180)
|
|
82
|
+
data_dir: Optional custom data directory
|
|
83
|
+
countries: Optional list of ISO2 codes to load (modular format)
|
|
84
|
+
continents: Optional list of continent names to load (modular format)
|
|
85
|
+
exclude_countries: Optional list of ISO2 codes to exclude (modular format)
|
|
86
|
+
loader: Optional pre-configured loader instance
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
ResolutionResult with country information and confidence
|
|
90
|
+
"""
|
|
91
|
+
if loader is None:
|
|
92
|
+
# Use modular loader if filters specified, otherwise use default
|
|
93
|
+
if countries or continents or exclude_countries:
|
|
94
|
+
loader = ModularDataLoader(
|
|
95
|
+
data_dir=data_dir,
|
|
96
|
+
countries=countries,
|
|
97
|
+
continents=continents,
|
|
98
|
+
exclude_countries=exclude_countries
|
|
99
|
+
)
|
|
100
|
+
else:
|
|
101
|
+
loader = get_loader(data_dir)
|
|
102
|
+
point = (lat, lon)
|
|
103
|
+
|
|
104
|
+
# Step 1: Encode to geohash
|
|
105
|
+
geohash = encode(lat, lon)
|
|
106
|
+
|
|
107
|
+
# Step 2: Get candidate countries
|
|
108
|
+
candidates = loader.get_candidate_countries(geohash)
|
|
109
|
+
|
|
110
|
+
# Step 3: If no candidates from primary geohash, try neighbors
|
|
111
|
+
# This handles edge cases where point is on geohash boundaries
|
|
112
|
+
if not candidates:
|
|
113
|
+
neighbors = get_neighbors(geohash)
|
|
114
|
+
for neighbor_hash in neighbors:
|
|
115
|
+
neighbor_candidates = loader.get_candidate_countries(neighbor_hash)
|
|
116
|
+
candidates.extend(neighbor_candidates)
|
|
117
|
+
candidates = list(set(candidates)) # Deduplicate
|
|
118
|
+
|
|
119
|
+
# Step 3b: If still no candidates, try extended neighbors (9x9 grid around point)
|
|
120
|
+
# This improves accuracy for small countries/islands that may have sparse geohash coverage
|
|
121
|
+
if not candidates:
|
|
122
|
+
# Get all neighbors of neighbors (extended search)
|
|
123
|
+
extended_neighbors = set()
|
|
124
|
+
for neighbor_hash in neighbors:
|
|
125
|
+
extended_neighbors.add(neighbor_hash)
|
|
126
|
+
for extended_neighbor in get_neighbors(neighbor_hash):
|
|
127
|
+
extended_neighbors.add(extended_neighbor)
|
|
128
|
+
|
|
129
|
+
for extended_hash in extended_neighbors:
|
|
130
|
+
if extended_hash != geohash: # Skip primary (already checked)
|
|
131
|
+
extended_candidates = loader.get_candidate_countries(extended_hash)
|
|
132
|
+
candidates.extend(extended_candidates)
|
|
133
|
+
candidates = list(set(candidates)) # Deduplicate
|
|
134
|
+
|
|
135
|
+
# Step 3c: Final fallback - if still no candidates, try checking all loaded countries
|
|
136
|
+
# This catches edge cases where geohash indexing missed coverage for small countries
|
|
137
|
+
if not candidates:
|
|
138
|
+
# Try to get all country IDs from the loader
|
|
139
|
+
try:
|
|
140
|
+
# For monolithic loader, we can iterate through metadata
|
|
141
|
+
if hasattr(loader, 'metadata') and loader.metadata:
|
|
142
|
+
candidates = list(loader.metadata.keys())
|
|
143
|
+
# For modular loader, check if we can get all loaded countries
|
|
144
|
+
elif hasattr(loader, '_loaded_countries'):
|
|
145
|
+
candidates = list(loader._loaded_countries.keys())
|
|
146
|
+
except:
|
|
147
|
+
pass # Fallback failed, continue with empty candidates
|
|
148
|
+
|
|
149
|
+
if not candidates:
|
|
150
|
+
# No country found (likely ocean or unsupported area)
|
|
151
|
+
return ResolutionResult()
|
|
152
|
+
|
|
153
|
+
# Step 4: Test point-in-polygon for each candidate
|
|
154
|
+
matches = []
|
|
155
|
+
|
|
156
|
+
for country_id in candidates:
|
|
157
|
+
polygon_data = loader.get_polygon(country_id)
|
|
158
|
+
if not polygon_data:
|
|
159
|
+
continue
|
|
160
|
+
|
|
161
|
+
# Handle MultiPolygon - check all exteriors
|
|
162
|
+
is_multi = polygon_data.get('multi', False)
|
|
163
|
+
exteriors_data = polygon_data.get('exteriors', [])
|
|
164
|
+
|
|
165
|
+
if is_multi and exteriors_data:
|
|
166
|
+
# MultiPolygon: check all exteriors
|
|
167
|
+
for exterior in exteriors_data:
|
|
168
|
+
exterior_tuples = [(p[0], p[1]) for p in exterior]
|
|
169
|
+
holes = polygon_data.get('holes', [])
|
|
170
|
+
holes_tuples = [[(p[0], p[1]) for p in hole] for hole in holes] if holes else None
|
|
171
|
+
|
|
172
|
+
# Test point-in-polygon for this exterior
|
|
173
|
+
if point_in_polygon_with_holes(point, exterior_tuples, holes_tuples):
|
|
174
|
+
metadata = loader.get_metadata(country_id)
|
|
175
|
+
if metadata:
|
|
176
|
+
# Calculate confidence
|
|
177
|
+
confidence = calculate_confidence(
|
|
178
|
+
point,
|
|
179
|
+
exterior_tuples,
|
|
180
|
+
holes_tuples,
|
|
181
|
+
candidate_count=len(candidates)
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
matches.append({
|
|
185
|
+
'country_id': country_id,
|
|
186
|
+
'metadata': metadata,
|
|
187
|
+
'confidence': confidence,
|
|
188
|
+
'polygon': (exterior_tuples, holes_tuples)
|
|
189
|
+
})
|
|
190
|
+
break # Found match, no need to check other exteriors
|
|
191
|
+
else:
|
|
192
|
+
# Single polygon
|
|
193
|
+
exterior = polygon_data.get('exterior', [])
|
|
194
|
+
holes = polygon_data.get('holes', [])
|
|
195
|
+
|
|
196
|
+
# Convert coordinate lists to tuples
|
|
197
|
+
exterior_tuples = [(p[0], p[1]) for p in exterior]
|
|
198
|
+
holes_tuples = [[(p[0], p[1]) for p in hole] for hole in holes] if holes else None
|
|
199
|
+
|
|
200
|
+
# Test point-in-polygon
|
|
201
|
+
if point_in_polygon_with_holes(point, exterior_tuples, holes_tuples):
|
|
202
|
+
metadata = loader.get_metadata(country_id)
|
|
203
|
+
if metadata:
|
|
204
|
+
# Calculate confidence
|
|
205
|
+
confidence = calculate_confidence(
|
|
206
|
+
point,
|
|
207
|
+
exterior_tuples,
|
|
208
|
+
holes_tuples,
|
|
209
|
+
candidate_count=len(candidates)
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
matches.append({
|
|
213
|
+
'country_id': country_id,
|
|
214
|
+
'metadata': metadata,
|
|
215
|
+
'confidence': confidence,
|
|
216
|
+
'polygon': (exterior_tuples, holes_tuples)
|
|
217
|
+
})
|
|
218
|
+
|
|
219
|
+
# Step 5: If no matches from geohash candidates, try broader search
|
|
220
|
+
# This handles cases where geohash index doesn't have complete coverage
|
|
221
|
+
if not matches:
|
|
222
|
+
# Fallback: Check all countries (expensive, but ensures accuracy)
|
|
223
|
+
# This is a last resort when geohash indexing missed the country
|
|
224
|
+
metadata_dict = loader.metadata
|
|
225
|
+
polygons_dict = loader.polygons
|
|
226
|
+
|
|
227
|
+
for country_id, meta in metadata_dict.items():
|
|
228
|
+
# Skip if already checked as candidate
|
|
229
|
+
if country_id in candidates:
|
|
230
|
+
continue
|
|
231
|
+
|
|
232
|
+
polygon_data = polygons_dict.get(country_id)
|
|
233
|
+
if not polygon_data:
|
|
234
|
+
continue
|
|
235
|
+
|
|
236
|
+
# Handle MultiPolygon
|
|
237
|
+
is_multi = polygon_data.get('multi', False)
|
|
238
|
+
exteriors_data = polygon_data.get('exteriors', [])
|
|
239
|
+
|
|
240
|
+
if is_multi and exteriors_data:
|
|
241
|
+
for exterior in exteriors_data:
|
|
242
|
+
exterior_tuples = [(p[0], p[1]) for p in exterior]
|
|
243
|
+
holes = polygon_data.get('holes', [])
|
|
244
|
+
holes_tuples = [[(p[0], p[1]) for p in hole] for hole in holes] if holes else None
|
|
245
|
+
|
|
246
|
+
if point_in_polygon_with_holes(point, exterior_tuples, holes_tuples):
|
|
247
|
+
confidence = calculate_confidence(
|
|
248
|
+
point,
|
|
249
|
+
exterior_tuples,
|
|
250
|
+
holes_tuples,
|
|
251
|
+
candidate_count=1 # Single match in fallback
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
matches.append({
|
|
255
|
+
'country_id': country_id,
|
|
256
|
+
'metadata': meta,
|
|
257
|
+
'confidence': confidence * 0.95, # Slightly lower confidence for fallback match
|
|
258
|
+
'polygon': (exterior_tuples, holes_tuples)
|
|
259
|
+
})
|
|
260
|
+
break
|
|
261
|
+
else:
|
|
262
|
+
exterior = polygon_data.get('exterior', [])
|
|
263
|
+
holes = polygon_data.get('holes', [])
|
|
264
|
+
|
|
265
|
+
if not exterior:
|
|
266
|
+
continue
|
|
267
|
+
|
|
268
|
+
exterior_tuples = [(p[0], p[1]) for p in exterior]
|
|
269
|
+
holes_tuples = [[(p[0], p[1]) for p in hole] for hole in holes] if holes else None
|
|
270
|
+
|
|
271
|
+
if point_in_polygon_with_holes(point, exterior_tuples, holes_tuples):
|
|
272
|
+
confidence = calculate_confidence(
|
|
273
|
+
point,
|
|
274
|
+
exterior_tuples,
|
|
275
|
+
holes_tuples,
|
|
276
|
+
candidate_count=1
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
matches.append({
|
|
280
|
+
'country_id': country_id,
|
|
281
|
+
'metadata': meta,
|
|
282
|
+
'confidence': confidence * 0.95,
|
|
283
|
+
'polygon': (exterior_tuples, holes_tuples)
|
|
284
|
+
})
|
|
285
|
+
|
|
286
|
+
# Step 6: Return best match (highest confidence)
|
|
287
|
+
if not matches:
|
|
288
|
+
# No valid PIP matches found - return None
|
|
289
|
+
return ResolutionResult()
|
|
290
|
+
|
|
291
|
+
# Sort by confidence (descending)
|
|
292
|
+
matches.sort(key=lambda x: x['confidence'], reverse=True)
|
|
293
|
+
best_match = matches[0]
|
|
294
|
+
|
|
295
|
+
metadata = best_match['metadata']
|
|
296
|
+
return ResolutionResult(
|
|
297
|
+
country_id=best_match['country_id'],
|
|
298
|
+
country_name=metadata['name'],
|
|
299
|
+
iso2=metadata.get('iso2'),
|
|
300
|
+
iso3=metadata.get('iso3'),
|
|
301
|
+
continent=metadata.get('continent'),
|
|
302
|
+
timezone=metadata.get('timezone'),
|
|
303
|
+
confidence=best_match['confidence']
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Geo Intelligence Team
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|