geo-intel-offline 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,150 @@
1
+ """
2
+ Point-in-Polygon (PIP) algorithm using Ray Casting.
3
+
4
+ Ray Casting Algorithm:
5
+ - Cast a ray from the point to infinity (we use East, +X direction)
6
+ - Count intersections with polygon edges
7
+ - Odd intersections = inside, even = outside
8
+
9
+ Design Decisions:
10
+ 1. Ray Casting chosen over Winding Number for:
11
+ - Simpler implementation
12
+ - Better performance
13
+ - Deterministic results
14
+
15
+ 2. Handle polygon rings (exterior + holes):
16
+ - Exterior ring: inside = true
17
+ - Interior rings (holes): inside = false
18
+
19
+ 3. Edge cases handled:
20
+ - Points on vertices
21
+ - Points on edges
22
+ - Horizontal rays (collinear with edges)
23
+ """
24
+
25
+ from typing import List, Tuple
26
+
27
+
28
+ def point_in_polygon(
29
+ point: Tuple[float, float],
30
+ polygon: List[Tuple[float, float]]
31
+ ) -> bool:
32
+ """
33
+ Check if a point is inside a polygon using ray casting.
34
+
35
+ Args:
36
+ point: (lat, lon) tuple
37
+ polygon: List of (lat, lon) tuples forming polygon ring
38
+
39
+ Returns:
40
+ True if point is inside polygon, False otherwise
41
+ """
42
+ if not polygon or len(polygon) < 3:
43
+ return False
44
+
45
+ lat, lon = point
46
+ inside = False
47
+
48
+ # Ray casting: check intersections with horizontal ray going East
49
+ j = len(polygon) - 1
50
+ for i in range(len(polygon)):
51
+ lat_i, lon_i = polygon[i]
52
+ lat_j, lon_j = polygon[j]
53
+
54
+ # Check if ray crosses edge
55
+ if ((lat_i > lat) != (lat_j > lat)):
56
+ # Calculate intersection point
57
+ if lon_j != lon_i:
58
+ # Avoid division by zero (horizontal edges)
59
+ intersect_lon = (lat - lat_i) * (lon_j - lon_i) / (lat_j - lat_i) + lon_i
60
+ else:
61
+ intersect_lon = lon_i
62
+
63
+ # Count intersection if ray crosses to the right
64
+ if lon < intersect_lon:
65
+ inside = not inside
66
+
67
+ j = i
68
+
69
+ return inside
70
+
71
+
72
+ def point_in_polygon_with_holes(
73
+ point: Tuple[float, float],
74
+ exterior: List[Tuple[float, float]],
75
+ holes: List[List[Tuple[float, float]]] = None
76
+ ) -> bool:
77
+ """
78
+ Check if point is in polygon with holes (interior rings).
79
+
80
+ Design Decision: Exterior ring defines inclusion, holes define exclusion.
81
+ This handles countries with lakes, islands with lakes, etc.
82
+
83
+ Args:
84
+ point: (lat, lon) tuple
85
+ exterior: Exterior polygon ring
86
+ holes: List of interior rings (holes)
87
+
88
+ Returns:
89
+ True if point is inside exterior but not in any hole
90
+ """
91
+ if not point_in_polygon(point, exterior):
92
+ return False
93
+
94
+ # Check if point is in any hole (exclude from result)
95
+ if holes:
96
+ for hole in holes:
97
+ if point_in_polygon(point, hole):
98
+ return False
99
+
100
+ return True
101
+
102
+
103
+ def distance_to_polygon_edge(
104
+ point: Tuple[float, float],
105
+ polygon: List[Tuple[float, float]]
106
+ ) -> float:
107
+ """
108
+ Calculate minimum distance from point to polygon edge.
109
+
110
+ Used for confidence scoring: closer to edge = lower confidence.
111
+
112
+ Args:
113
+ point: (lat, lon) tuple
114
+ polygon: Polygon ring
115
+
116
+ Returns:
117
+ Distance in degrees (approximate, for confidence scoring)
118
+ """
119
+ if not polygon:
120
+ return float('inf')
121
+
122
+ min_dist = float('inf')
123
+ lat, lon = point
124
+
125
+ j = len(polygon) - 1
126
+ for i in range(len(polygon)):
127
+ lat_i, lon_i = polygon[i]
128
+ lat_j, lon_j = polygon[j]
129
+
130
+ # Distance to line segment
131
+ # Use point-to-line-segment distance formula
132
+ dx = lon_j - lon_i
133
+ dy = lat_j - lat_i
134
+
135
+ if dx == 0 and dy == 0:
136
+ # Degenerate segment (point)
137
+ dist = ((lat - lat_i) ** 2 + (lon - lon_i) ** 2) ** 0.5
138
+ else:
139
+ # Project point onto line segment
140
+ t = max(0, min(1, ((lat - lat_i) * dy + (lon - lon_i) * dx) / (dx * dx + dy * dy)))
141
+
142
+ proj_lat = lat_i + t * dy
143
+ proj_lon = lon_i + t * dx
144
+
145
+ dist = ((lat - proj_lat) ** 2 + (lon - proj_lon) ** 2) ** 0.5
146
+
147
+ min_dist = min(min_dist, dist)
148
+ j = i
149
+
150
+ return min_dist
@@ -0,0 +1,104 @@
1
+ """
2
+ Shared polygon processing utilities.
3
+ Consolidates duplicate code from data builders.
4
+ """
5
+
6
+ from typing import List, Tuple, Dict
7
+
8
+
9
+ def calculate_bounding_box(polygon: List[Tuple[float, float]]) -> Tuple[float, float, float, float]:
10
+ """Calculate bounding box for a polygon."""
11
+ if not polygon:
12
+ return 0.0, 0.0, 0.0, 0.0
13
+
14
+ lats = [p[0] for p in polygon]
15
+ lons = [p[1] for p in polygon]
16
+ return min(lats), max(lats), min(lons), max(lons)
17
+
18
+
19
+ def calculate_adaptive_step_size(lat_range: float, lon_range: float) -> float:
20
+ """
21
+ Calculate adaptive step size for geohash sampling based on polygon size.
22
+
23
+ Geohash precision 6 covers ~1.2km x 0.6km cells. We need step size small enough
24
+ to ensure we sample multiple points within each geohash cell for reliable coverage.
25
+
26
+ Returns:
27
+ Step size in degrees
28
+ """
29
+ max_range = max(lat_range, lon_range)
30
+ min_range = min(lat_range, lon_range)
31
+
32
+ # For very small polygons, use very fine sampling to ensure geohash coverage
33
+ # Geohash precision 6 = ~0.01° latitude, ~0.02° longitude
34
+ if max_range < 0.001:
35
+ return 0.001 # ~111m - very fine for tiny islands
36
+ elif max_range < 0.01:
37
+ return 0.002 # ~222m - fine sampling for small islands
38
+ elif max_range < 0.05:
39
+ return 0.005 # ~555m - good for small countries
40
+ elif max_range < 0.1:
41
+ return 0.008 # ~888m - ensure multiple samples per geohash
42
+ elif max_range < 0.5:
43
+ return 0.015 # ~1.6km - medium countries
44
+ elif max_range < 2.0:
45
+ return 0.025 # ~2.7km - large countries
46
+ else:
47
+ return 0.05 # ~5.5km - very large countries
48
+
49
+
50
+ def calculate_safe_iteration_limits(
51
+ min_lat: float,
52
+ max_lat: float,
53
+ min_lon: float,
54
+ max_lon: float,
55
+ step: float
56
+ ) -> Tuple[int, int, int, float]:
57
+ """
58
+ Calculate safe iteration limits to prevent infinite loops.
59
+
60
+ These limits are only used as a safety net for truly infinite loops,
61
+ not to reduce coverage. Returns very generous limits.
62
+
63
+ Returns:
64
+ Tuple of (max_lat_iterations, max_lon_iterations, max_total_iterations, adjusted_step)
65
+ """
66
+ lat_range = max_lat - min_lat
67
+ lon_range = max_lon - min_lon
68
+
69
+ max_lat_iter = int(lat_range / step) + 10 if step > 0 else 10000 # +10 buffer
70
+ max_lon_iter = int(lon_range / step) + 10 if step > 0 else 10000 # +10 buffer
71
+ max_total = max_lat_iter * max_lon_iter
72
+
73
+ # Use original step (don't adjust - preserve full coverage)
74
+ adjusted_step = step
75
+
76
+ # Only apply a truly massive safety cap (10 million iterations) to prevent infinite loops
77
+ # This should never be hit in normal operation
78
+ if max_total > 10000000:
79
+ max_total = 10000000
80
+
81
+ return max_lat_iter, max_lon_iter, max_total, adjusted_step
82
+
83
+
84
+ def get_polygon_centroid(polygon: List[Tuple[float, float]]) -> Tuple[float, float]:
85
+ """Calculate polygon centroid."""
86
+ if not polygon:
87
+ return 0.0, 0.0
88
+
89
+ lats = [p[0] for p in polygon]
90
+ lons = [p[1] for p in polygon]
91
+ return sum(lats) / len(lats), sum(lons) / len(lons)
92
+
93
+
94
+ def convert_geojson_coords_to_latlon(coords_list: List) -> List[Tuple[float, float]]:
95
+ """
96
+ Convert GeoJSON coordinates [lon, lat] to internal format [(lat, lon), ...].
97
+
98
+ Args:
99
+ coords_list: GeoJSON coordinate list (each element is [lon, lat])
100
+
101
+ Returns:
102
+ List of (lat, lon) tuples
103
+ """
104
+ return [(p[1], p[0]) for p in coords_list]
@@ -0,0 +1,306 @@
1
+ """
2
+ Resolver orchestration - coordinates the resolution pipeline.
3
+
4
+ Resolution Pipeline:
5
+ 1. Encode lat/lon to geohash
6
+ 2. Query geohash index for candidate countries
7
+ 3. For each candidate:
8
+ a. Load polygon
9
+ b. Test point-in-polygon
10
+ c. If match, calculate confidence
11
+ 4. Return best match or handle ambiguity
12
+
13
+ Edge Cases Handled:
14
+ - Points in oceans (no country match)
15
+ - Border points (multiple candidates)
16
+ - Geohash boundary cases (check neighbors)
17
+ - Countries with holes (islands, lakes)
18
+ """
19
+
20
+ from typing import List, Tuple, Optional, Dict
21
+ from .geohash import encode, get_neighbors
22
+ from .pip import point_in_polygon_with_holes
23
+ from .confidence import calculate_confidence
24
+ from .data_loader import get_loader
25
+ from .modular_data_loader import ModularDataLoader
26
+
27
+
28
+ class ResolutionResult:
29
+ """Result of a geo-intelligence resolution."""
30
+
31
+ def __init__(
32
+ self,
33
+ country_id: Optional[int] = None,
34
+ country_name: Optional[str] = None,
35
+ iso2: Optional[str] = None,
36
+ iso3: Optional[str] = None,
37
+ continent: Optional[str] = None,
38
+ timezone: Optional[str] = None,
39
+ confidence: float = 0.0
40
+ ):
41
+ self.country_id = country_id
42
+ self.country_name = country_name
43
+ self.iso2 = iso2
44
+ self.iso3 = iso3
45
+ self.continent = continent
46
+ self.timezone = timezone
47
+ self.confidence = confidence
48
+
49
+ def to_dict(self) -> Dict:
50
+ """Convert to dictionary."""
51
+ return {
52
+ "country": self.country_name,
53
+ "iso2": self.iso2,
54
+ "iso3": self.iso3,
55
+ "continent": self.continent,
56
+ "timezone": self.timezone,
57
+ "confidence": self.confidence
58
+ }
59
+
60
+ def is_valid(self) -> bool:
61
+ """Check if result is valid (has country)."""
62
+ return self.country_id is not None
63
+
64
+
65
+ def resolve(
66
+ lat: float,
67
+ lon: float,
68
+ data_dir: Optional[str] = None,
69
+ countries: Optional[List[str]] = None,
70
+ continents: Optional[List[str]] = None,
71
+ exclude_countries: Optional[List[str]] = None,
72
+ loader: Optional[ModularDataLoader] = None
73
+ ) -> ResolutionResult:
74
+ """
75
+ Resolve latitude/longitude to geo-intelligence.
76
+
77
+ Main resolution function that orchestrates the entire pipeline.
78
+
79
+ Args:
80
+ lat: Latitude (-90 to 90)
81
+ lon: Longitude (-180 to 180)
82
+ data_dir: Optional custom data directory
83
+ countries: Optional list of ISO2 codes to load (modular format)
84
+ continents: Optional list of continent names to load (modular format)
85
+ exclude_countries: Optional list of ISO2 codes to exclude (modular format)
86
+ loader: Optional pre-configured loader instance
87
+
88
+ Returns:
89
+ ResolutionResult with country information and confidence
90
+ """
91
+ if loader is None:
92
+ # Use modular loader if filters specified, otherwise use default
93
+ if countries or continents or exclude_countries:
94
+ loader = ModularDataLoader(
95
+ data_dir=data_dir,
96
+ countries=countries,
97
+ continents=continents,
98
+ exclude_countries=exclude_countries
99
+ )
100
+ else:
101
+ loader = get_loader(data_dir)
102
+ point = (lat, lon)
103
+
104
+ # Step 1: Encode to geohash
105
+ geohash = encode(lat, lon)
106
+
107
+ # Step 2: Get candidate countries
108
+ candidates = loader.get_candidate_countries(geohash)
109
+
110
+ # Step 3: If no candidates from primary geohash, try neighbors
111
+ # This handles edge cases where point is on geohash boundaries
112
+ if not candidates:
113
+ neighbors = get_neighbors(geohash)
114
+ for neighbor_hash in neighbors:
115
+ neighbor_candidates = loader.get_candidate_countries(neighbor_hash)
116
+ candidates.extend(neighbor_candidates)
117
+ candidates = list(set(candidates)) # Deduplicate
118
+
119
+ # Step 3b: If still no candidates, try extended neighbors (9x9 grid around point)
120
+ # This improves accuracy for small countries/islands that may have sparse geohash coverage
121
+ if not candidates:
122
+ # Get all neighbors of neighbors (extended search)
123
+ extended_neighbors = set()
124
+ for neighbor_hash in neighbors:
125
+ extended_neighbors.add(neighbor_hash)
126
+ for extended_neighbor in get_neighbors(neighbor_hash):
127
+ extended_neighbors.add(extended_neighbor)
128
+
129
+ for extended_hash in extended_neighbors:
130
+ if extended_hash != geohash: # Skip primary (already checked)
131
+ extended_candidates = loader.get_candidate_countries(extended_hash)
132
+ candidates.extend(extended_candidates)
133
+ candidates = list(set(candidates)) # Deduplicate
134
+
135
+ # Step 3c: Final fallback - if still no candidates, try checking all loaded countries
136
+ # This catches edge cases where geohash indexing missed coverage for small countries
137
+ if not candidates:
138
+ # Try to get all country IDs from the loader
139
+ try:
140
+ # For monolithic loader, we can iterate through metadata
141
+ if hasattr(loader, 'metadata') and loader.metadata:
142
+ candidates = list(loader.metadata.keys())
143
+ # For modular loader, check if we can get all loaded countries
144
+ elif hasattr(loader, '_loaded_countries'):
145
+ candidates = list(loader._loaded_countries.keys())
146
+ except:
147
+ pass # Fallback failed, continue with empty candidates
148
+
149
+ if not candidates:
150
+ # No country found (likely ocean or unsupported area)
151
+ return ResolutionResult()
152
+
153
+ # Step 4: Test point-in-polygon for each candidate
154
+ matches = []
155
+
156
+ for country_id in candidates:
157
+ polygon_data = loader.get_polygon(country_id)
158
+ if not polygon_data:
159
+ continue
160
+
161
+ # Handle MultiPolygon - check all exteriors
162
+ is_multi = polygon_data.get('multi', False)
163
+ exteriors_data = polygon_data.get('exteriors', [])
164
+
165
+ if is_multi and exteriors_data:
166
+ # MultiPolygon: check all exteriors
167
+ for exterior in exteriors_data:
168
+ exterior_tuples = [(p[0], p[1]) for p in exterior]
169
+ holes = polygon_data.get('holes', [])
170
+ holes_tuples = [[(p[0], p[1]) for p in hole] for hole in holes] if holes else None
171
+
172
+ # Test point-in-polygon for this exterior
173
+ if point_in_polygon_with_holes(point, exterior_tuples, holes_tuples):
174
+ metadata = loader.get_metadata(country_id)
175
+ if metadata:
176
+ # Calculate confidence
177
+ confidence = calculate_confidence(
178
+ point,
179
+ exterior_tuples,
180
+ holes_tuples,
181
+ candidate_count=len(candidates)
182
+ )
183
+
184
+ matches.append({
185
+ 'country_id': country_id,
186
+ 'metadata': metadata,
187
+ 'confidence': confidence,
188
+ 'polygon': (exterior_tuples, holes_tuples)
189
+ })
190
+ break # Found match, no need to check other exteriors
191
+ else:
192
+ # Single polygon
193
+ exterior = polygon_data.get('exterior', [])
194
+ holes = polygon_data.get('holes', [])
195
+
196
+ # Convert coordinate lists to tuples
197
+ exterior_tuples = [(p[0], p[1]) for p in exterior]
198
+ holes_tuples = [[(p[0], p[1]) for p in hole] for hole in holes] if holes else None
199
+
200
+ # Test point-in-polygon
201
+ if point_in_polygon_with_holes(point, exterior_tuples, holes_tuples):
202
+ metadata = loader.get_metadata(country_id)
203
+ if metadata:
204
+ # Calculate confidence
205
+ confidence = calculate_confidence(
206
+ point,
207
+ exterior_tuples,
208
+ holes_tuples,
209
+ candidate_count=len(candidates)
210
+ )
211
+
212
+ matches.append({
213
+ 'country_id': country_id,
214
+ 'metadata': metadata,
215
+ 'confidence': confidence,
216
+ 'polygon': (exterior_tuples, holes_tuples)
217
+ })
218
+
219
+ # Step 5: If no matches from geohash candidates, try broader search
220
+ # This handles cases where geohash index doesn't have complete coverage
221
+ if not matches:
222
+ # Fallback: Check all countries (expensive, but ensures accuracy)
223
+ # This is a last resort when geohash indexing missed the country
224
+ metadata_dict = loader.metadata
225
+ polygons_dict = loader.polygons
226
+
227
+ for country_id, meta in metadata_dict.items():
228
+ # Skip if already checked as candidate
229
+ if country_id in candidates:
230
+ continue
231
+
232
+ polygon_data = polygons_dict.get(country_id)
233
+ if not polygon_data:
234
+ continue
235
+
236
+ # Handle MultiPolygon
237
+ is_multi = polygon_data.get('multi', False)
238
+ exteriors_data = polygon_data.get('exteriors', [])
239
+
240
+ if is_multi and exteriors_data:
241
+ for exterior in exteriors_data:
242
+ exterior_tuples = [(p[0], p[1]) for p in exterior]
243
+ holes = polygon_data.get('holes', [])
244
+ holes_tuples = [[(p[0], p[1]) for p in hole] for hole in holes] if holes else None
245
+
246
+ if point_in_polygon_with_holes(point, exterior_tuples, holes_tuples):
247
+ confidence = calculate_confidence(
248
+ point,
249
+ exterior_tuples,
250
+ holes_tuples,
251
+ candidate_count=1 # Single match in fallback
252
+ )
253
+
254
+ matches.append({
255
+ 'country_id': country_id,
256
+ 'metadata': meta,
257
+ 'confidence': confidence * 0.95, # Slightly lower confidence for fallback match
258
+ 'polygon': (exterior_tuples, holes_tuples)
259
+ })
260
+ break
261
+ else:
262
+ exterior = polygon_data.get('exterior', [])
263
+ holes = polygon_data.get('holes', [])
264
+
265
+ if not exterior:
266
+ continue
267
+
268
+ exterior_tuples = [(p[0], p[1]) for p in exterior]
269
+ holes_tuples = [[(p[0], p[1]) for p in hole] for hole in holes] if holes else None
270
+
271
+ if point_in_polygon_with_holes(point, exterior_tuples, holes_tuples):
272
+ confidence = calculate_confidence(
273
+ point,
274
+ exterior_tuples,
275
+ holes_tuples,
276
+ candidate_count=1
277
+ )
278
+
279
+ matches.append({
280
+ 'country_id': country_id,
281
+ 'metadata': meta,
282
+ 'confidence': confidence * 0.95,
283
+ 'polygon': (exterior_tuples, holes_tuples)
284
+ })
285
+
286
+ # Step 6: Return best match (highest confidence)
287
+ if not matches:
288
+ # No valid PIP matches found - return None
289
+ return ResolutionResult()
290
+
291
+ # Sort by confidence (descending)
292
+ matches.sort(key=lambda x: x['confidence'], reverse=True)
293
+ best_match = matches[0]
294
+
295
+ metadata = best_match['metadata']
296
+ return ResolutionResult(
297
+ country_id=best_match['country_id'],
298
+ country_name=metadata['name'],
299
+ iso2=metadata.get('iso2'),
300
+ iso3=metadata.get('iso3'),
301
+ continent=metadata.get('continent'),
302
+ timezone=metadata.get('timezone'),
303
+ confidence=best_match['confidence']
304
+ )
305
+
306
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Geo Intelligence Team
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.