geo-intel-offline 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,528 @@
1
+ """
2
+ Data Builder - Process GeoJSON and generate optimized data files
3
+
4
+ Design Decisions:
5
+ 1. Accept GeoJSON input (standard format, widely available)
6
+ 2. Simplify polygons using Douglas-Peucker algorithm (reduces memory)
7
+ 3. Build geohash index by sampling polygon coverage
8
+ 4. Extract metadata from GeoJSON properties
9
+
10
+ Note: In production, you would download source data from Natural Earth
11
+ or similar authoritative sources. This script provides the processing pipeline.
12
+ """
13
+
14
+ import json
15
+ import sys
16
+ from pathlib import Path
17
+ from typing import Dict, List, Tuple
18
+ from .geohash import encode
19
+ from .polygon_utils import (
20
+ calculate_bounding_box,
21
+ calculate_adaptive_step_size,
22
+ calculate_safe_iteration_limits,
23
+ get_polygon_centroid,
24
+ convert_geojson_coords_to_latlon
25
+ )
26
+
27
+
28
+ def simplify_polygon(
29
+ polygon: List[Tuple[float, float]],
30
+ tolerance: float = 0.01
31
+ ) -> List[Tuple[float, float]]:
32
+ """
33
+ Simplify polygon using Douglas-Peucker algorithm.
34
+
35
+ For very small polygons, skip simplification to preserve validity.
36
+
37
+ Args:
38
+ polygon: List of (lat, lon) tuples
39
+ tolerance: Simplification tolerance in degrees
40
+
41
+ Returns:
42
+ Simplified polygon (guaranteed to have at least 3 vertices)
43
+ """
44
+ if len(polygon) <= 2:
45
+ return polygon
46
+
47
+ # For very small polygons, skip simplification to preserve validity
48
+ if len(polygon) <= 10:
49
+ return polygon if len(polygon) >= 3 else []
50
+
51
+ # Douglas-Peucker algorithm
52
+ def douglas_peucker(points: List[Tuple[float, float]], tol: float) -> List[Tuple[float, float]]:
53
+ if len(points) <= 3:
54
+ return points if len(points) >= 3 else []
55
+
56
+ # Find point with maximum distance
57
+ max_dist = 0
58
+ max_idx = 0
59
+ end_idx = len(points) - 1
60
+
61
+ for i in range(1, end_idx):
62
+ dist = _point_to_line_distance(points[i], points[0], points[end_idx])
63
+ if dist > max_dist:
64
+ max_dist = dist
65
+ max_idx = i
66
+
67
+ # If max distance is greater than tolerance, recursively simplify
68
+ if max_dist > tol:
69
+ left = douglas_peucker(points[:max_idx + 1], tol)
70
+ right = douglas_peucker(points[max_idx:], tol)
71
+
72
+ if left and right:
73
+ combined = left[:-1] + right
74
+ if len(combined) < 3 and len(points) >= 3:
75
+ return points
76
+ return combined
77
+ elif left:
78
+ return left if len(left) >= 3 else points
79
+ elif right:
80
+ return right if len(right) >= 3 else points
81
+ return points
82
+ else:
83
+ if len(points) >= 3:
84
+ mid_idx = len(points) // 2
85
+ return [points[0], points[mid_idx], points[end_idx]]
86
+ return points
87
+
88
+ result = douglas_peucker(polygon, tolerance)
89
+
90
+ # Final validation
91
+ if len(result) < 3:
92
+ return polygon if len(polygon) >= 3 else result
93
+
94
+ return result
95
+
96
+
97
+ def _point_to_line_distance(
98
+ point: Tuple[float, float],
99
+ line_start: Tuple[float, float],
100
+ line_end: Tuple[float, float]
101
+ ) -> float:
102
+ """Calculate perpendicular distance from point to line segment."""
103
+ px, py = point
104
+ sx, sy = line_start
105
+ ex, ey = line_end
106
+
107
+ dx = ex - sx
108
+ dy = ey - sy
109
+
110
+ if dx == 0 and dy == 0:
111
+ # Degenerate line (point)
112
+ return ((px - sx) ** 2 + (py - sy) ** 2) ** 0.5
113
+
114
+ # Calculate t parameter
115
+ t = ((px - sx) * dx + (py - sy) * dy) / (dx * dx + dy * dy)
116
+
117
+ # Clamp to line segment
118
+ t = max(0, min(1, t))
119
+
120
+ # Projection point
121
+ proj_x = sx + t * dx
122
+ proj_y = sy + t * dy
123
+
124
+ # Distance
125
+ return ((px - proj_x) ** 2 + (py - proj_y) ** 2) ** 0.5
126
+
127
+
128
+ def build_geohash_index_from_polygons(
129
+ polygons: Dict[int, Dict],
130
+ geohash_precision: int = 6,
131
+ validate_with_pip: bool = True
132
+ ) -> Dict[str, List[int]]:
133
+ """
134
+ Build geohash index from processed polygons.
135
+
136
+ Optimized version that uses already-processed polygon data.
137
+
138
+ Args:
139
+ polygons: Dict mapping country_id to polygon data
140
+ geohash_precision: Geohash precision level
141
+ validate_with_pip: If True, validate points are in polygon
142
+
143
+ Returns:
144
+ Dict mapping geohash strings to lists of country IDs
145
+ """
146
+ from .pip import point_in_polygon_with_holes
147
+
148
+ index: Dict[str, List[int]] = {}
149
+ total_countries = len(polygons)
150
+
151
+ print(f"Building geohash index for {total_countries} countries...")
152
+
153
+ for idx, (country_id, polygon_data) in enumerate(polygons.items(), 1):
154
+ if (idx) % max(1, total_countries // 20) == 0 or idx == total_countries:
155
+ progress = (idx / total_countries) * 100
156
+ print(f" Progress: {progress:.1f}% ({idx}/{total_countries})", end='\r')
157
+
158
+ is_multi = polygon_data.get('multi', False)
159
+ exteriors_data = polygon_data.get('exteriors', [])
160
+ exterior = polygon_data.get('exterior', [])
161
+ holes = polygon_data.get('holes', [])
162
+
163
+ # Handle MultiPolygon
164
+ if is_multi and exteriors_data:
165
+ exteriors_to_process = exteriors_data
166
+ else:
167
+ exteriors_to_process = [exterior] if exterior else []
168
+
169
+ geohashes_added = set()
170
+
171
+ for exterior_coords in exteriors_to_process:
172
+ if not exterior_coords or len(exterior_coords) < 3:
173
+ continue
174
+
175
+ # Calculate bounding box using shared utility
176
+ min_lat, max_lat, min_lon, max_lon = calculate_bounding_box(exterior_coords)
177
+
178
+ lat_range = max_lat - min_lat
179
+ lon_range = max_lon - min_lon
180
+
181
+ # Handle extremely small polygons - sample multiple points, not just centroid
182
+ if lat_range < 0.001 or lon_range < 0.001:
183
+ # For tiny polygons, sample multiple points (centroid + corners + midpoints)
184
+ # to ensure better geohash coverage
185
+ sample_points = []
186
+ sample_points.append(get_polygon_centroid(exterior_coords))
187
+ # Add bounding box corners and midpoints
188
+ sample_points.append((min_lat, min_lon))
189
+ sample_points.append((min_lat, max_lon))
190
+ sample_points.append((max_lat, min_lon))
191
+ sample_points.append((max_lat, max_lon))
192
+ sample_points.append((min_lat, (min_lon + max_lon) / 2))
193
+ sample_points.append((max_lat, (min_lon + max_lon) / 2))
194
+ sample_points.append(((min_lat + max_lat) / 2, min_lon))
195
+ sample_points.append(((min_lat + max_lat) / 2, max_lon))
196
+
197
+ if validate_with_pip:
198
+ for point in sample_points:
199
+ if point_in_polygon_with_holes(point, exterior_coords, holes):
200
+ geohash = encode(point[0], point[1], geohash_precision)
201
+ if geohash not in index:
202
+ index[geohash] = []
203
+ if country_id not in index[geohash]:
204
+ index[geohash].append(country_id)
205
+ continue
206
+
207
+ # Calculate adaptive step size using shared utility
208
+ step = calculate_adaptive_step_size(lat_range, lon_range)
209
+
210
+ # Calculate safe iteration limits (may adjust step for very large countries)
211
+ max_lat_iterations, max_lon_iterations, max_total_iterations, step = calculate_safe_iteration_limits(
212
+ min_lat, max_lat, min_lon, max_lon, step
213
+ )
214
+
215
+ # Convert to tuples for PIP (already in lat,lon format)
216
+ exterior_tuples = [(p[0], p[1]) for p in exterior_coords]
217
+ holes_tuples = [[(p[0], p[1]) for p in hole] for hole in holes] if holes else None
218
+
219
+ # Sample points
220
+ lat = min_lat
221
+ iterations = 0
222
+
223
+ while lat <= max_lat and iterations < max_lat_iterations * 2:
224
+ lon = min_lon
225
+ lon_iterations = 0
226
+
227
+ while lon <= max_lon and lon_iterations < max_lon_iterations * 2:
228
+ point = (lat, lon)
229
+ iterations += 1
230
+ lon_iterations += 1
231
+
232
+ # Safety: prevent infinite loops
233
+ if iterations > max_total_iterations:
234
+ break
235
+
236
+ # Validate point is in polygon
237
+ if validate_with_pip:
238
+ if not point_in_polygon_with_holes(point, exterior_tuples, holes_tuples):
239
+ lon += step
240
+ continue
241
+
242
+ # Add to index
243
+ geohash = encode(lat, lon, geohash_precision)
244
+ geohash_key = (geohash, country_id)
245
+
246
+ if geohash_key not in geohashes_added:
247
+ if geohash not in index:
248
+ index[geohash] = []
249
+ if country_id not in index[geohash]:
250
+ index[geohash].append(country_id)
251
+ geohashes_added.add(geohash_key)
252
+
253
+ lon += step
254
+
255
+ if iterations > max_total_iterations:
256
+ break
257
+ lat += step
258
+
259
+ # Fallback: ensure at least one geohash for very small countries
260
+ if len(geohashes_added) == 0 and exterior:
261
+ exterior_tuples = [(p[0], p[1]) for p in exterior]
262
+ centroid_lat, centroid_lon = get_polygon_centroid(exterior)
263
+
264
+ if not validate_with_pip or point_in_polygon_with_holes(
265
+ (centroid_lat, centroid_lon), exterior_tuples,
266
+ [[(p[0], p[1]) for p in hole] for hole in holes] if holes else None
267
+ ):
268
+ geohash = encode(centroid_lat, centroid_lon, geohash_precision)
269
+ if geohash not in index:
270
+ index[geohash] = []
271
+ if country_id not in index[geohash]:
272
+ index[geohash].append(country_id)
273
+
274
+ print() # New line after progress
275
+ print(f"✓ Completed: {total_countries} countries indexed")
276
+ return index
277
+
278
+
279
+ def process_geojson(filepath: Path, polygon_tolerance: float = 0.005, geohash_precision: int = 6) -> Tuple[Dict[str, List[int]], Dict, Dict]:
280
+ """
281
+ Process GeoJSON file and generate data files.
282
+
283
+ Returns:
284
+ Tuple of (geohash_index, polygons_dict, metadata_dict)
285
+ """
286
+ with open(filepath, 'r', encoding='utf-8') as f:
287
+ geojson = json.load(f)
288
+
289
+ features = geojson.get('features', [])
290
+
291
+ print(f"Loading {len(features)} countries from GeoJSON...")
292
+
293
+ polygons: Dict[int, Dict] = {}
294
+ metadata: Dict[int, Dict] = {}
295
+ skipped_countries = []
296
+ total_features = len(features)
297
+
298
+ for idx, feature in enumerate(features):
299
+ country_id = idx + 1
300
+ progress_pct = ((idx + 1) / total_features) * 100
301
+
302
+ if (idx + 1) % max(1, total_features // 20) == 0 or (idx + 1) == total_features:
303
+ print(f" Processing polygons: {progress_pct:.1f}% ({idx + 1}/{total_features})", end='\r')
304
+
305
+ geometry = feature.get('geometry', {})
306
+ properties = feature.get('properties', {})
307
+ country_name = properties.get('NAME', properties.get('name', f'Country {country_id}'))
308
+
309
+ if geometry.get('type') not in ('Polygon', 'MultiPolygon'):
310
+ skipped_countries.append(f"{country_name} (type: {geometry.get('type', 'unknown')})")
311
+ continue
312
+
313
+ coords = geometry.get('coordinates', [])
314
+
315
+ # Process coordinates
316
+ if geometry['type'] == 'Polygon':
317
+ exterior_coords = coords[0] if coords else []
318
+ hole_coords = coords[1:] if len(coords) > 1 else []
319
+
320
+ if not exterior_coords or len(exterior_coords) < 3:
321
+ skipped_countries.append(f"{country_name} (invalid polygon: {len(exterior_coords) if exterior_coords else 0} vertices)")
322
+ continue
323
+
324
+ exterior = [(p[1], p[0]) for p in exterior_coords]
325
+ holes = [[(p[1], p[0]) for p in hole] for hole in hole_coords] if hole_coords else []
326
+
327
+ # Simplify (skip for small polygons)
328
+ if len(exterior) <= 10:
329
+ exterior_simplified = exterior
330
+ else:
331
+ exterior_simplified = simplify_polygon(exterior, tolerance=polygon_tolerance)
332
+ if len(exterior_simplified) < 3:
333
+ exterior_simplified = exterior
334
+
335
+ holes_simplified = []
336
+ for hole in holes:
337
+ if len(hole) <= 10:
338
+ holes_simplified.append(hole)
339
+ else:
340
+ hole_simpl = simplify_polygon(hole, tolerance=polygon_tolerance)
341
+ if len(hole_simpl) >= 3:
342
+ holes_simplified.append(hole_simpl)
343
+ elif len(hole) >= 3:
344
+ holes_simplified.append(hole)
345
+
346
+ if len(exterior_simplified) >= 3:
347
+ polygons[country_id] = {
348
+ 'exterior': exterior_simplified,
349
+ 'holes': holes_simplified if holes_simplified else []
350
+ }
351
+ else:
352
+ skipped_countries.append(f"{country_name} (polygon invalid after processing)")
353
+ else: # MultiPolygon
354
+ all_exteriors = []
355
+ all_holes = []
356
+
357
+ for poly_part in coords:
358
+ if not poly_part:
359
+ continue
360
+
361
+ exterior_coords = poly_part[0] if poly_part else []
362
+ hole_coords = poly_part[1:] if len(poly_part) > 1 else []
363
+
364
+ if not exterior_coords or len(exterior_coords) < 3:
365
+ continue
366
+
367
+ exterior = [(p[1], p[0]) for p in exterior_coords]
368
+
369
+ if len(exterior) <= 10:
370
+ exterior_simplified = exterior
371
+ else:
372
+ exterior_simplified = simplify_polygon(exterior, tolerance=polygon_tolerance)
373
+ if len(exterior_simplified) < 3:
374
+ exterior_simplified = exterior
375
+
376
+ if len(exterior_simplified) >= 3:
377
+ all_exteriors.append(exterior_simplified)
378
+
379
+ if hole_coords:
380
+ for hole_coord in hole_coords:
381
+ hole = [(p[1], p[0]) for p in hole_coord]
382
+ if len(hole) >= 3:
383
+ if len(hole) <= 10:
384
+ all_holes.append(hole)
385
+ else:
386
+ hole_simpl = simplify_polygon(hole, tolerance=polygon_tolerance)
387
+ if len(hole_simpl) >= 3:
388
+ all_holes.append(hole_simpl)
389
+ elif len(hole) >= 3:
390
+ all_holes.append(hole)
391
+
392
+ if all_exteriors:
393
+ polygons[country_id] = {
394
+ 'exterior': all_exteriors[0],
395
+ 'holes': all_holes if all_holes else [],
396
+ 'multi': True,
397
+ 'exteriors': all_exteriors
398
+ }
399
+
400
+ # Extract metadata
401
+ metadata[country_id] = {
402
+ 'name': country_name,
403
+ 'iso2': properties.get('ISO_A2', properties.get('iso_a2', '')),
404
+ 'iso3': properties.get('ISO_A3', properties.get('iso_a3', '')),
405
+ 'continent': properties.get('CONTINENT', properties.get('continent', '')),
406
+ 'timezone': properties.get('TIMEZONE', properties.get('timezone', ''))
407
+ }
408
+
409
+ # Validate stored polygon
410
+ if country_id in polygons:
411
+ polygon_data = polygons[country_id]
412
+ exterior = polygon_data.get('exterior', [])
413
+ is_multi = polygon_data.get('multi', False)
414
+
415
+ if is_multi:
416
+ exteriors = polygon_data.get('exteriors', [])
417
+ if not exteriors or all(len(ext) < 3 for ext in exteriors):
418
+ skipped_countries.append(f"{country_name} (invalid MultiPolygon)")
419
+ del polygons[country_id]
420
+ elif not exterior or len(exterior) < 3:
421
+ skipped_countries.append(f"{country_name} (invalid polygon: {len(exterior)} vertices)")
422
+ del polygons[country_id]
423
+
424
+ print() # New line after progress
425
+ if skipped_countries:
426
+ print(f"\n⚠ Skipped {len(skipped_countries)} countries with invalid geometry:")
427
+ for country in skipped_countries[:15]:
428
+ print(f" - {country}")
429
+ if len(skipped_countries) > 15:
430
+ print(f" ... and {len(skipped_countries) - 15} more")
431
+
432
+ print(f"\n✓ Processed {len(polygons)} countries with valid polygons")
433
+
434
+ # Build geohash index from processed polygons (optimized)
435
+ print("\nBuilding geohash index with PIP validation...")
436
+ geohash_index = build_geohash_index_from_polygons(
437
+ polygons,
438
+ geohash_precision=geohash_precision,
439
+ validate_with_pip=True
440
+ )
441
+
442
+ return geohash_index, polygons, metadata
443
+
444
+
445
+ def main():
446
+ """CLI entry point for data builder."""
447
+ if len(sys.argv) < 3:
448
+ print("Usage: python -m geo_intel_offline.data_builder <source.geojson> <output_dir> [tolerance] [precision]")
449
+ sys.exit(1)
450
+
451
+ source_path = Path(sys.argv[1])
452
+ output_dir = Path(sys.argv[2])
453
+ polygon_tolerance = float(sys.argv[3]) if len(sys.argv) > 3 else 0.005
454
+ geohash_precision = int(sys.argv[4]) if len(sys.argv) > 4 else 6
455
+
456
+ if not source_path.exists():
457
+ print(f"Error: Source file not found: {source_path}")
458
+ sys.exit(1)
459
+
460
+ output_dir.mkdir(parents=True, exist_ok=True)
461
+
462
+ print("=" * 70)
463
+ print("GEO_INTEL_OFFLINE - DATA BUILDER")
464
+ print("=" * 70)
465
+ print(f"Source: {source_path}")
466
+ print(f"Output: {output_dir}")
467
+ print(f"Polygon tolerance: {polygon_tolerance}°")
468
+ print(f"Geohash precision: {geohash_precision}")
469
+ print()
470
+
471
+ # Process GeoJSON
472
+ geohash_index, polygons, metadata = process_geojson(
473
+ source_path,
474
+ polygon_tolerance=polygon_tolerance,
475
+ geohash_precision=geohash_precision
476
+ )
477
+
478
+ # Save data files (both uncompressed and compressed)
479
+ print("\nSaving data files...")
480
+
481
+ import gzip
482
+
483
+ # Save uncompressed (for compatibility)
484
+ print(" Saving uncompressed JSON files...")
485
+ with open(output_dir / 'geohash_index.json', 'w', encoding='utf-8') as f:
486
+ json.dump(geohash_index, f, separators=(',', ':'))
487
+
488
+ with open(output_dir / 'polygons.json', 'w', encoding='utf-8') as f:
489
+ json.dump(polygons, f, separators=(',', ':'))
490
+
491
+ with open(output_dir / 'metadata.json', 'w', encoding='utf-8') as f:
492
+ json.dump(metadata, f, separators=(',', ':'), ensure_ascii=False)
493
+
494
+ # Save compressed versions (smaller file size)
495
+ print(" Saving compressed JSON files (gzip)...")
496
+ with gzip.open(output_dir / 'geohash_index.json.gz', 'wt', encoding='utf-8', compresslevel=9) as f:
497
+ json.dump(geohash_index, f, separators=(',', ':'))
498
+
499
+ with gzip.open(output_dir / 'polygons.json.gz', 'wt', encoding='utf-8', compresslevel=9) as f:
500
+ json.dump(polygons, f, separators=(',', ':'))
501
+
502
+ with gzip.open(output_dir / 'metadata.json.gz', 'wt', encoding='utf-8', compresslevel=9) as f:
503
+ json.dump(metadata, f, separators=(',', ':'), ensure_ascii=False)
504
+
505
+ # Show file sizes
506
+ print()
507
+ print("File sizes:")
508
+ for filename in ['geohash_index.json', 'polygons.json', 'metadata.json']:
509
+ json_file = output_dir / filename
510
+ gzip_file = output_dir / f"{filename}.gz"
511
+ if json_file.exists() and gzip_file.exists():
512
+ json_size = json_file.stat().st_size / 1024 / 1024 # MB
513
+ gzip_size = gzip_file.stat().st_size / 1024 / 1024 # MB
514
+ ratio = (gzip_size / json_size) * 100 if json_size > 0 else 0
515
+ print(f" {filename}: {json_size:.2f} MB -> {gzip_size:.2f} MB ({ratio:.1f}%)")
516
+
517
+ print()
518
+ print("=" * 70)
519
+ print("BUILD COMPLETE")
520
+ print("=" * 70)
521
+ print(f"✓ Generated {len(geohash_index)} geohashes")
522
+ print(f"✓ Processed {len(polygons)} countries")
523
+ print(f"✓ Files saved to: {output_dir}")
524
+ print()
525
+
526
+
527
+ if __name__ == '__main__':
528
+ main()
@@ -0,0 +1,173 @@
1
+ """
2
+ Minimal test data generator for development/testing.
3
+
4
+ Creates a small test dataset with a few countries for testing without
5
+ requiring full Natural Earth data.
6
+
7
+ Usage:
8
+ python -m geo_intel_offline.data_builder_minimal <output_dir>
9
+ """
10
+
11
+ import json
12
+ import sys
13
+ from pathlib import Path
14
+ from .geohash import encode
15
+
16
+
17
+ def create_minimal_test_data():
18
+ """
19
+ Create minimal test data with a few countries.
20
+
21
+ Returns:
22
+ Tuple of (geohash_index, polygons, metadata)
23
+ """
24
+ # Define a few test countries with simple square polygons
25
+ # Coordinates: (lat, lon)
26
+
27
+ # United States (rough bounding box)
28
+ usa_exterior = [
29
+ (49.0, -125.0), # NW
30
+ (49.0, -66.0), # NE
31
+ (25.0, -66.0), # SE
32
+ (25.0, -125.0), # SW
33
+ ]
34
+
35
+ # United Kingdom (rough bounding box)
36
+ uk_exterior = [
37
+ (60.0, -8.0), # NW
38
+ (60.0, 2.0), # NE
39
+ (50.0, 2.0), # SE
40
+ (50.0, -8.0), # SW
41
+ ]
42
+
43
+ # Japan (rough bounding box)
44
+ japan_exterior = [
45
+ (45.0, 129.0), # NW
46
+ (45.0, 146.0), # NE
47
+ (31.0, 146.0), # SE
48
+ (31.0, 129.0), # SW
49
+ ]
50
+
51
+ # France (rough bounding box)
52
+ france_exterior = [
53
+ (51.0, -5.0), # NW
54
+ (51.0, 10.0), # NE
55
+ (42.0, 10.0), # SE
56
+ (42.0, -5.0), # SW
57
+ ]
58
+
59
+ # Germany (rough bounding box)
60
+ germany_exterior = [
61
+ (55.0, 6.0), # NW
62
+ (55.0, 15.0), # NE
63
+ (47.0, 15.0), # SE
64
+ (47.0, 6.0), # SW
65
+ ]
66
+
67
+ polygons = {
68
+ 1: {"exterior": usa_exterior, "holes": []},
69
+ 2: {"exterior": uk_exterior, "holes": []},
70
+ 3: {"exterior": japan_exterior, "holes": []},
71
+ 4: {"exterior": france_exterior, "holes": []},
72
+ 5: {"exterior": germany_exterior, "holes": []},
73
+ }
74
+
75
+ metadata = {
76
+ 1: {
77
+ "name": "United States",
78
+ "iso2": "US",
79
+ "iso3": "USA",
80
+ "continent": "North America",
81
+ "timezone": "America/New_York"
82
+ },
83
+ 2: {
84
+ "name": "United Kingdom",
85
+ "iso2": "GB",
86
+ "iso3": "GBR",
87
+ "continent": "Europe",
88
+ "timezone": "Europe/London"
89
+ },
90
+ 3: {
91
+ "name": "Japan",
92
+ "iso2": "JP",
93
+ "iso3": "JPN",
94
+ "continent": "Asia",
95
+ "timezone": "Asia/Tokyo"
96
+ },
97
+ 4: {
98
+ "name": "France",
99
+ "iso2": "FR",
100
+ "iso3": "FRA",
101
+ "continent": "Europe",
102
+ "timezone": "Europe/Paris"
103
+ },
104
+ 5: {
105
+ "name": "Germany",
106
+ "iso2": "DE",
107
+ "iso3": "DEU",
108
+ "continent": "Europe",
109
+ "timezone": "Europe/Berlin"
110
+ },
111
+ }
112
+
113
+ # Build geohash index by sampling bounding boxes
114
+ geohash_index = {}
115
+
116
+ for country_id, poly_data in polygons.items():
117
+ exterior = poly_data["exterior"]
118
+
119
+ # Get bounding box
120
+ lats = [p[0] for p in exterior]
121
+ lons = [p[1] for p in exterior]
122
+
123
+ min_lat, max_lat = min(lats), max(lats)
124
+ min_lon, max_lon = min(lons), max(lons)
125
+
126
+ # Sample points
127
+ step = 1.0 # Larger step for test data
128
+ lat = min_lat
129
+ while lat <= max_lat:
130
+ lon = min_lon
131
+ while lon <= max_lon:
132
+ geohash = encode(lat, lon, precision=6)
133
+ if geohash not in geohash_index:
134
+ geohash_index[geohash] = []
135
+ if country_id not in geohash_index[geohash]:
136
+ geohash_index[geohash].append(country_id)
137
+ lon += step
138
+ lat += step
139
+
140
+ return geohash_index, polygons, metadata
141
+
142
+
143
+ def main():
144
+ """CLI entry point."""
145
+ if len(sys.argv) < 2:
146
+ print("Usage: python -m geo_intel_offline.data_builder_minimal <output_dir>")
147
+ sys.exit(1)
148
+
149
+ output_dir = Path(sys.argv[1])
150
+ output_dir.mkdir(parents=True, exist_ok=True)
151
+
152
+ print("Generating minimal test data...")
153
+ geohash_index, polygons, metadata = create_minimal_test_data()
154
+
155
+ # Save files
156
+ print(f"Writing data files to {output_dir}...")
157
+
158
+ with open(output_dir / "geohash_index.json", 'w', encoding='utf-8') as f:
159
+ json.dump(geohash_index, f, separators=(',', ':'))
160
+
161
+ with open(output_dir / "polygons.json", 'w', encoding='utf-8') as f:
162
+ json.dump(polygons, f, separators=(',', ':'))
163
+
164
+ with open(output_dir / "metadata.json", 'w', encoding='utf-8') as f:
165
+ json.dump(metadata, f, separators=(',', ':'), ensure_ascii=False)
166
+
167
+ print(f"Done! Generated {len(geohash_index)} geohashes, {len(polygons)} countries.")
168
+ print("\nNote: This is minimal test data. For production, use data_builder.py")
169
+ print(" with Natural Earth or similar authoritative source data.")
170
+
171
+
172
+ if __name__ == '__main__':
173
+ main()